Files
contexta_be/tests/test_rag_cache.py
2026-04-26 18:52:00 +00:00

95 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for RAG response caching integration."""
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from app.services import cache as response_cache
@pytest.fixture(autouse=True)
def clear_cache():
response_cache._store.clear()
response_cache._index.clear()
yield
response_cache._store.clear()
response_cache._index.clear()
@pytest.fixture
def rag():
from app.services.rag import RAGEngine
return RAGEngine()
@pytest.fixture
def chatbot_config():
return {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"max_tokens": 500,
"temperature": 0.7,
"company_name": "Test Corp",
"system_prompt": "",
}
@pytest.fixture
def good_search_result():
return [{
"payload": {"text": "We open 9am6pm MonFri.", "file_name": "faq.pdf", "page_number": 1},
"score": 0.82,
}]
class TestRAGCaching:
async def test_second_identical_query_uses_cache(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "9am to 6pm", "tokens_used": 20, "model": "m"})
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
await rag.process_query("What are your hours?", "col-1", chatbot_config)
await rag.process_query("What are your hours?", "col-1", chatbot_config)
# LLM should only be called once; second call hits cache
assert llm_mock.call_count == 1
async def test_cache_not_used_when_conversation_history_present(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "Yes!", "tokens_used": 10, "model": "m"})
history = [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
# Both calls go to LLM because history makes them stateful
assert llm_mock.call_count == 2
async def test_different_collections_cached_separately(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "Answer", "tokens_used": 10, "model": "m"})
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
await rag.process_query("Same question", "col-A", chatbot_config)
await rag.process_query("Same question", "col-B", chatbot_config)
# Different collections → two LLM calls, not one
assert llm_mock.call_count == 2
async def test_confidence_score_returned_from_cache(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "Cached answer", "tokens_used": 10, "model": "m"})
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
first = await rag.process_query("hours?", "col-1", chatbot_config)
second = await rag.process_query("hours?", "col-1", chatbot_config)
assert first["confidence_score"] == second["confidence_score"]
assert second["response"] == "Cached answer"