mirror of
http://88.130.71.182:3000/BlitTech/contexta_be.git
synced 2026-06-13 08:30:07 +00:00
95 lines
3.9 KiB
Python
95 lines
3.9 KiB
Python
"""Tests for RAG response caching integration."""
|
||
import pytest
|
||
from unittest.mock import AsyncMock, patch, MagicMock
|
||
from app.services import cache as response_cache
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def clear_cache():
|
||
response_cache._store.clear()
|
||
response_cache._index.clear()
|
||
yield
|
||
response_cache._store.clear()
|
||
response_cache._index.clear()
|
||
|
||
|
||
@pytest.fixture
|
||
def rag():
|
||
from app.services.rag import RAGEngine
|
||
return RAGEngine()
|
||
|
||
|
||
@pytest.fixture
|
||
def chatbot_config():
|
||
return {
|
||
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||
"max_tokens": 500,
|
||
"temperature": 0.7,
|
||
"company_name": "Test Corp",
|
||
"system_prompt": "",
|
||
}
|
||
|
||
|
||
@pytest.fixture
|
||
def good_search_result():
|
||
return [{
|
||
"payload": {"text": "We open 9am–6pm Mon–Fri.", "file_name": "faq.pdf", "page_number": 1},
|
||
"score": 0.82,
|
||
}]
|
||
|
||
|
||
class TestRAGCaching:
|
||
async def test_second_identical_query_uses_cache(self, rag, chatbot_config, good_search_result):
|
||
llm_mock = AsyncMock(return_value={"content": "9am to 6pm", "tokens_used": 20, "model": "m"})
|
||
|
||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||
|
||
await rag.process_query("What are your hours?", "col-1", chatbot_config)
|
||
await rag.process_query("What are your hours?", "col-1", chatbot_config)
|
||
|
||
# LLM should only be called once; second call hits cache
|
||
assert llm_mock.call_count == 1
|
||
|
||
async def test_cache_not_used_when_conversation_history_present(self, rag, chatbot_config, good_search_result):
|
||
llm_mock = AsyncMock(return_value={"content": "Yes!", "tokens_used": 10, "model": "m"})
|
||
|
||
history = [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]
|
||
|
||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||
|
||
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
|
||
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
|
||
|
||
# Both calls go to LLM because history makes them stateful
|
||
assert llm_mock.call_count == 2
|
||
|
||
async def test_different_collections_cached_separately(self, rag, chatbot_config, good_search_result):
|
||
llm_mock = AsyncMock(return_value={"content": "Answer", "tokens_used": 10, "model": "m"})
|
||
|
||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||
|
||
await rag.process_query("Same question", "col-A", chatbot_config)
|
||
await rag.process_query("Same question", "col-B", chatbot_config)
|
||
|
||
# Different collections → two LLM calls, not one
|
||
assert llm_mock.call_count == 2
|
||
|
||
async def test_confidence_score_returned_from_cache(self, rag, chatbot_config, good_search_result):
|
||
llm_mock = AsyncMock(return_value={"content": "Cached answer", "tokens_used": 10, "model": "m"})
|
||
|
||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||
|
||
first = await rag.process_query("hours?", "col-1", chatbot_config)
|
||
second = await rag.process_query("hours?", "col-1", chatbot_config)
|
||
|
||
assert first["confidence_score"] == second["confidence_score"]
|
||
assert second["response"] == "Cached answer"
|