"""Tests for RAG pipeline.""" import pytest from unittest.mock import MagicMock, AsyncMock, patch class TestRAGEngine: @pytest.fixture def rag(self): from app.services.rag import RAGEngine engine = RAGEngine() return engine @pytest.fixture def chatbot_config(self): return { "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", "max_tokens": 500, "temperature": 0.7, "company_name": "Test Corp", "system_prompt": "You are helpful.", } async def test_returns_response_when_documents_found(self, rag, chatbot_config): with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \ patch.object(rag.vector_svc, "search", return_value=[{ "payload": {"text": "Test content", "file_name": "test.pdf", "page_number": 1}, "score": 0.8, }]), \ patch.object(rag.llm_svc, "generate", new_callable=AsyncMock, return_value={ "content": "Test response", "tokens_used": 100, "model": "test-model", }): result = await rag.process_query( query="What is the test?", collection_name="test-collection", chatbot_config=chatbot_config, language="en", ) assert result["response"] == "Test response" assert len(result["sources"]) == 1 assert result["sources"][0].score == 0.8 async def test_returns_graceful_message_on_embedding_failure(self, rag, chatbot_config): with patch.object(rag.embedding_svc, "embed_text", side_effect=Exception("Embedding failed")): result = await rag.process_query( query="Test query", collection_name="test-collection", chatbot_config=chatbot_config, ) assert "trouble" in result["response"].lower() assert result["sources"] == [] async def test_language_instruction_injected_for_french(self, rag, chatbot_config): injected_prompt = None async def capture_generate(messages, **kwargs): nonlocal injected_prompt injected_prompt = messages[0]["content"] return {"content": "Bonjour", "tokens_used": 10, "model": "test"} with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \ patch.object(rag.vector_svc, "search", return_value=[]), \ patch.object(rag.llm_svc, "generate", side_effect=capture_generate): await rag.process_query( query="Bonjour", collection_name="test", chatbot_config=chatbot_config, language="fr", ) assert injected_prompt is not None assert "French" in injected_prompt async def test_no_language_instruction_for_english(self, rag, chatbot_config): injected_prompt = None async def capture_generate(messages, **kwargs): nonlocal injected_prompt injected_prompt = messages[0]["content"] return {"content": "Hello", "tokens_used": 10, "model": "test"} with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \ patch.object(rag.vector_svc, "search", return_value=[]), \ patch.object(rag.llm_svc, "generate", side_effect=capture_generate): await rag.process_query( query="Hello", collection_name="test", chatbot_config=chatbot_config, language="en", ) assert injected_prompt is not None # English should NOT inject a language instruction assert "Respond in English" not in injected_prompt async def test_empty_result_when_no_documents(self, rag, chatbot_config): with patch.object(rag.embedding_svc, "embed_text", return_value=[0.1] * 1536), \ patch.object(rag.vector_svc, "search", return_value=[]), \ patch.object(rag.llm_svc, "generate", new_callable=AsyncMock, return_value={ "content": "I don't have info on that.", "tokens_used": 20, "model": "test", }): result = await rag.process_query( query="What is X?", collection_name="empty-collection", chatbot_config=chatbot_config, ) assert result["sources"] == [] assert result["response"] == "I don't have info on that."