mirror of
http://88.130.71.182:3000/BlitTech/contexta_be.git
synced 2026-06-13 08:30:07 +00:00
fixed the RAg in test pipeline issue
This commit is contained in:
94
tests/test_rag_cache.py
Normal file
94
tests/test_rag_cache.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Tests for RAG response caching integration."""
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
from app.services import cache as response_cache
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_cache():
|
||||
response_cache._store.clear()
|
||||
response_cache._index.clear()
|
||||
yield
|
||||
response_cache._store.clear()
|
||||
response_cache._index.clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def rag():
|
||||
from app.services.rag import RAGEngine
|
||||
return RAGEngine()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def chatbot_config():
|
||||
return {
|
||||
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||
"max_tokens": 500,
|
||||
"temperature": 0.7,
|
||||
"company_name": "Test Corp",
|
||||
"system_prompt": "",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def good_search_result():
|
||||
return [{
|
||||
"payload": {"text": "We open 9am–6pm Mon–Fri.", "file_name": "faq.pdf", "page_number": 1},
|
||||
"score": 0.82,
|
||||
}]
|
||||
|
||||
|
||||
class TestRAGCaching:
|
||||
async def test_second_identical_query_uses_cache(self, rag, chatbot_config, good_search_result):
|
||||
llm_mock = AsyncMock(return_value={"content": "9am to 6pm", "tokens_used": 20, "model": "m"})
|
||||
|
||||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||||
|
||||
await rag.process_query("What are your hours?", "col-1", chatbot_config)
|
||||
await rag.process_query("What are your hours?", "col-1", chatbot_config)
|
||||
|
||||
# LLM should only be called once; second call hits cache
|
||||
assert llm_mock.call_count == 1
|
||||
|
||||
async def test_cache_not_used_when_conversation_history_present(self, rag, chatbot_config, good_search_result):
|
||||
llm_mock = AsyncMock(return_value={"content": "Yes!", "tokens_used": 10, "model": "m"})
|
||||
|
||||
history = [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]
|
||||
|
||||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||||
|
||||
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
|
||||
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
|
||||
|
||||
# Both calls go to LLM because history makes them stateful
|
||||
assert llm_mock.call_count == 2
|
||||
|
||||
async def test_different_collections_cached_separately(self, rag, chatbot_config, good_search_result):
|
||||
llm_mock = AsyncMock(return_value={"content": "Answer", "tokens_used": 10, "model": "m"})
|
||||
|
||||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||||
|
||||
await rag.process_query("Same question", "col-A", chatbot_config)
|
||||
await rag.process_query("Same question", "col-B", chatbot_config)
|
||||
|
||||
# Different collections → two LLM calls, not one
|
||||
assert llm_mock.call_count == 2
|
||||
|
||||
async def test_confidence_score_returned_from_cache(self, rag, chatbot_config, good_search_result):
|
||||
llm_mock = AsyncMock(return_value={"content": "Cached answer", "tokens_used": 10, "model": "m"})
|
||||
|
||||
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
|
||||
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
|
||||
patch.object(rag.llm_svc, "generate", llm_mock):
|
||||
|
||||
first = await rag.process_query("hours?", "col-1", chatbot_config)
|
||||
second = await rag.process_query("hours?", "col-1", chatbot_config)
|
||||
|
||||
assert first["confidence_score"] == second["confidence_score"]
|
||||
assert second["response"] == "Cached answer"
|
||||
Reference in New Issue
Block a user