fixed the RAg in test pipeline issue

This commit is contained in:
belviskhoremk
2026-04-26 18:52:00 +00:00
parent 97a501097d
commit 78023ae9c5
7 changed files with 544 additions and 0 deletions

94
tests/test_rag_cache.py Normal file
View File

@@ -0,0 +1,94 @@
"""Tests for RAG response caching integration."""
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from app.services import cache as response_cache
@pytest.fixture(autouse=True)
def clear_cache():
response_cache._store.clear()
response_cache._index.clear()
yield
response_cache._store.clear()
response_cache._index.clear()
@pytest.fixture
def rag():
from app.services.rag import RAGEngine
return RAGEngine()
@pytest.fixture
def chatbot_config():
return {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"max_tokens": 500,
"temperature": 0.7,
"company_name": "Test Corp",
"system_prompt": "",
}
@pytest.fixture
def good_search_result():
return [{
"payload": {"text": "We open 9am6pm MonFri.", "file_name": "faq.pdf", "page_number": 1},
"score": 0.82,
}]
class TestRAGCaching:
async def test_second_identical_query_uses_cache(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "9am to 6pm", "tokens_used": 20, "model": "m"})
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
await rag.process_query("What are your hours?", "col-1", chatbot_config)
await rag.process_query("What are your hours?", "col-1", chatbot_config)
# LLM should only be called once; second call hits cache
assert llm_mock.call_count == 1
async def test_cache_not_used_when_conversation_history_present(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "Yes!", "tokens_used": 10, "model": "m"})
history = [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
# Both calls go to LLM because history makes them stateful
assert llm_mock.call_count == 2
async def test_different_collections_cached_separately(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "Answer", "tokens_used": 10, "model": "m"})
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
await rag.process_query("Same question", "col-A", chatbot_config)
await rag.process_query("Same question", "col-B", chatbot_config)
# Different collections → two LLM calls, not one
assert llm_mock.call_count == 2
async def test_confidence_score_returned_from_cache(self, rag, chatbot_config, good_search_result):
llm_mock = AsyncMock(return_value={"content": "Cached answer", "tokens_used": 10, "model": "m"})
with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
patch.object(rag.vector_svc, "search", return_value=good_search_result), \
patch.object(rag.llm_svc, "generate", llm_mock):
first = await rag.process_query("hours?", "col-1", chatbot_config)
second = await rag.process_query("hours?", "col-1", chatbot_config)
assert first["confidence_score"] == second["confidence_score"]
assert second["response"] == "Cached answer"