fixed the RAg in test pipeline issue

2026-06-12 23:23:21 +00:00 · 2026-04-26 21:43:19 +00:00
parent 78023ae9c5
commit 260a9c6353
9 changed files with 262 additions and 78 deletions
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@@ -1,3 +1,4 @@
+import asyncio
 import time
 from collections import defaultdict

@@ -311,8 +312,7 @@ async def test_chat(
    company_data = chatbot.get("companies", {}) or {}
    chatbot_config = {**chatbot, "company_name": company_data.get("name", "")}

-    results = []
-    for question in body.questions:
+    async def _run_one(question: str) -> TestChatResult:
        try:
            result = await rag_engine.process_query(
                query=question,
@@ -322,22 +322,24 @@ async def test_chat(
                language="auto",
                bypass_cache=True,
            )
-            results.append(TestChatResult(
+            return TestChatResult(
                question=question,
                response=result["response"],
                confidence_score=result.get("confidence_score", 0.0),
                sources=result.get("sources", []),
                model_used=result.get("model", ""),
-            ))
+            )
        except Exception as e:
-            results.append(TestChatResult(
+            return TestChatResult(
                question=question,
                response=f"Error: {e}",
                confidence_score=0.0,
                sources=[],
                model_used="",
-            ))
-    return results
+            )
+
+    results = await asyncio.gather(*[_run_one(q) for q in body.questions])
+    return list(results)


 # ── OLD analytics endpoint REMOVED ───────────────────────────────────────────
--- a/app/routers/documents.py
+++ b/app/routers/documents.py
@@ -94,7 +94,7 @@ async def upload_document(
        file_bytes=file_bytes,
        file_name=file.filename,
        doc_id=doc_id,
-        chatbot=chatbot,
+        chatbot_id=chatbot_id,
        supabase=supabase,
    )

@@ -105,16 +105,28 @@ async def _process_document_bg(
    file_bytes: bytes,
    file_name: str,
    doc_id: str,
-    chatbot: dict,
+    chatbot_id: str,
    supabase,
 ):
    """Background task to process and embed a document"""
    try:
+        # Re-fetch chatbot to guarantee we use the canonical collection and company_id,
+        # not a snapshot that could have been captured before an update.
+        chatbot_row = supabase.table("chatbots").select("company_id, qdrant_collection_name").eq("id", chatbot_id).execute()
+        if not chatbot_row.data:
+            logger.error(f"Chatbot {chatbot_id} not found during document processing")
+            supabase.table("documents").update({
+                "status": "failed",
+                "error_message": "Chatbot not found"
+            }).eq("id", doc_id).execute()
+            return
+
+        chatbot = chatbot_row.data[0]
        company_id = chatbot.get("company_id", "")
        collection_name = chatbot.get("qdrant_collection_name")

        if not collection_name:
-            logger.error(f"No Qdrant collection for chatbot {chatbot['id']}")
+            logger.error(f"No Qdrant collection for chatbot {chatbot_id}")
            supabase.table("documents").update({
                "status": "failed",
                "error_message": "Vector store not configured"
@@ -168,7 +180,7 @@ async def _process_document_bg(
        }).eq("id", doc_id).execute()

        response_cache.invalidate(collection_name)
-        logger.info(f"Document {doc_id} processed: {len(chunks)} chunks")
+        logger.info(f"Document {doc_id} processed: {len(chunks)} chunks → collection='{collection_name}' company='{company_id}'")

    except Exception as e:
        logger.error(f"Document processing error for {doc_id}: {e}")
@@ -274,7 +286,7 @@ async def retry_document_processing(
        file_bytes=file_bytes,
        file_name=document["file_name"],
        doc_id=document_id,
-        chatbot=chatbot,
+        chatbot_id=chatbot_id,
        supabase=supabase,
    )

@@ -333,7 +345,7 @@ async def add_url_source(
        _process_url_source,
        source_id=source_id,
        url=data.url,
-        chatbot=chatbot,
+        chatbot_id=chatbot_id,
        supabase=supabase,
    )

@@ -394,12 +406,12 @@ async def refresh_url_source(
        "chunk_count": 0,
    }).eq("id", source_id).returning("representation").execute()

-    background_tasks.add_task(_process_url_source, source_id, src["url"], chatbot, supabase)
+    background_tasks.add_task(_process_url_source, source_id, src["url"], chatbot_id, supabase)

    return UrlSourceResponse(**{**src, "status": "pending", "chunk_count": 0})


-async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase):
+async def _process_url_source(source_id: str, url: str, chatbot_id: str, supabase):
    """Background task to scrape a URL and add its content to the vector store."""
    from app.services.web_scraper import scrape_url
    from app.services.document_processor import chunk_text
@@ -407,6 +419,18 @@ async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase)
    from app.services.vector_store import vector_store

    try:
+        # Re-fetch chatbot to guarantee we use the canonical collection and company_id.
+        chatbot_row = supabase.table("chatbots").select("company_id, qdrant_collection_name").eq("id", chatbot_id).execute()
+        if not chatbot_row.data:
+            logger.error(f"Chatbot {chatbot_id} not found during URL source processing")
+            supabase.table("url_sources").update({
+                "status": "failed",
+                "error_message": "Chatbot not found",
+            }).eq("id", source_id).execute()
+            return
+
+        chatbot = chatbot_row.data[0]
+
        # Update status to processing
        supabase.table("url_sources").update({"status": "processing"}).eq("id", source_id).execute()

@@ -480,7 +504,8 @@ async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase)
        }).eq("id", source_id).execute()

        response_cache.invalidate(collection_name)
-        logger.info(f"URL source {source_id} processed: {len(chunks)} chunks from {url}")
+        logger.info(f"URL source {source_id} processed: {len(chunks)} chunks from {url} → collection='{collection_name}' company='{chatbot.get('company_id', '')}'")
+

    except Exception as e:
        logger.error(f"URL source processing error {source_id}: {e}")