fixed the RAg in test pipeline issue

2026-06-13 08:30:07 +00:00 · 2026-04-26 18:52:00 +00:00
parent 97a501097d
commit 78023ae9c5
7 changed files with 544 additions and 0 deletions
--- a/tests/test_rag_cache.py
+++ b/tests/test_rag_cache.py
@@ -0,0 +1,94 @@
+"""Tests for RAG response caching integration."""
+import pytest
+from unittest.mock import AsyncMock, patch, MagicMock
+from app.services import cache as response_cache
+
+
+@pytest.fixture(autouse=True)
+def clear_cache():
+    response_cache._store.clear()
+    response_cache._index.clear()
+    yield
+    response_cache._store.clear()
+    response_cache._index.clear()
+
+
+@pytest.fixture
+def rag():
+    from app.services.rag import RAGEngine
+    return RAGEngine()
+
+
+@pytest.fixture
+def chatbot_config():
+    return {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "max_tokens": 500,
+        "temperature": 0.7,
+        "company_name": "Test Corp",
+        "system_prompt": "",
+    }
+
+
+@pytest.fixture
+def good_search_result():
+    return [{
+        "payload": {"text": "We open 9am–6pm Mon–Fri.", "file_name": "faq.pdf", "page_number": 1},
+        "score": 0.82,
+    }]
+
+
+class TestRAGCaching:
+    async def test_second_identical_query_uses_cache(self, rag, chatbot_config, good_search_result):
+        llm_mock = AsyncMock(return_value={"content": "9am to 6pm", "tokens_used": 20, "model": "m"})
+
+        with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
+             patch.object(rag.vector_svc, "search", return_value=good_search_result), \
+             patch.object(rag.llm_svc, "generate", llm_mock):
+
+            await rag.process_query("What are your hours?", "col-1", chatbot_config)
+            await rag.process_query("What are your hours?", "col-1", chatbot_config)
+
+        # LLM should only be called once; second call hits cache
+        assert llm_mock.call_count == 1
+
+    async def test_cache_not_used_when_conversation_history_present(self, rag, chatbot_config, good_search_result):
+        llm_mock = AsyncMock(return_value={"content": "Yes!", "tokens_used": 10, "model": "m"})
+
+        history = [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]
+
+        with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
+             patch.object(rag.vector_svc, "search", return_value=good_search_result), \
+             patch.object(rag.llm_svc, "generate", llm_mock):
+
+            await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
+            await rag.process_query("Follow-up question", "col-1", chatbot_config, conversation_history=history)
+
+        # Both calls go to LLM because history makes them stateful
+        assert llm_mock.call_count == 2
+
+    async def test_different_collections_cached_separately(self, rag, chatbot_config, good_search_result):
+        llm_mock = AsyncMock(return_value={"content": "Answer", "tokens_used": 10, "model": "m"})
+
+        with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
+             patch.object(rag.vector_svc, "search", return_value=good_search_result), \
+             patch.object(rag.llm_svc, "generate", llm_mock):
+
+            await rag.process_query("Same question", "col-A", chatbot_config)
+            await rag.process_query("Same question", "col-B", chatbot_config)
+
+        # Different collections → two LLM calls, not one
+        assert llm_mock.call_count == 2
+
+    async def test_confidence_score_returned_from_cache(self, rag, chatbot_config, good_search_result):
+        llm_mock = AsyncMock(return_value={"content": "Cached answer", "tokens_used": 10, "model": "m"})
+
+        with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \
+             patch.object(rag.vector_svc, "search", return_value=good_search_result), \
+             patch.object(rag.llm_svc, "generate", llm_mock):
+
+            first = await rag.process_query("hours?", "col-1", chatbot_config)
+            second = await rag.process_query("hours?", "col-1", chatbot_config)
+
+        assert first["confidence_score"] == second["confidence_score"]
+        assert second["response"] == "Cached answer"