mirror of
http://88.130.71.182:3000/BlitTech/contexta_be.git
synced 2026-06-12 23:23:21 +00:00
fixed the RAg in test pipeline issue
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
@@ -311,8 +312,7 @@ async def test_chat(
|
||||
company_data = chatbot.get("companies", {}) or {}
|
||||
chatbot_config = {**chatbot, "company_name": company_data.get("name", "")}
|
||||
|
||||
results = []
|
||||
for question in body.questions:
|
||||
async def _run_one(question: str) -> TestChatResult:
|
||||
try:
|
||||
result = await rag_engine.process_query(
|
||||
query=question,
|
||||
@@ -322,22 +322,24 @@ async def test_chat(
|
||||
language="auto",
|
||||
bypass_cache=True,
|
||||
)
|
||||
results.append(TestChatResult(
|
||||
return TestChatResult(
|
||||
question=question,
|
||||
response=result["response"],
|
||||
confidence_score=result.get("confidence_score", 0.0),
|
||||
sources=result.get("sources", []),
|
||||
model_used=result.get("model", ""),
|
||||
))
|
||||
)
|
||||
except Exception as e:
|
||||
results.append(TestChatResult(
|
||||
return TestChatResult(
|
||||
question=question,
|
||||
response=f"Error: {e}",
|
||||
confidence_score=0.0,
|
||||
sources=[],
|
||||
model_used="",
|
||||
))
|
||||
return results
|
||||
)
|
||||
|
||||
results = await asyncio.gather(*[_run_one(q) for q in body.questions])
|
||||
return list(results)
|
||||
|
||||
|
||||
# ── OLD analytics endpoint REMOVED ───────────────────────────────────────────
|
||||
|
||||
@@ -94,7 +94,7 @@ async def upload_document(
|
||||
file_bytes=file_bytes,
|
||||
file_name=file.filename,
|
||||
doc_id=doc_id,
|
||||
chatbot=chatbot,
|
||||
chatbot_id=chatbot_id,
|
||||
supabase=supabase,
|
||||
)
|
||||
|
||||
@@ -105,16 +105,28 @@ async def _process_document_bg(
|
||||
file_bytes: bytes,
|
||||
file_name: str,
|
||||
doc_id: str,
|
||||
chatbot: dict,
|
||||
chatbot_id: str,
|
||||
supabase,
|
||||
):
|
||||
"""Background task to process and embed a document"""
|
||||
try:
|
||||
# Re-fetch chatbot to guarantee we use the canonical collection and company_id,
|
||||
# not a snapshot that could have been captured before an update.
|
||||
chatbot_row = supabase.table("chatbots").select("company_id, qdrant_collection_name").eq("id", chatbot_id).execute()
|
||||
if not chatbot_row.data:
|
||||
logger.error(f"Chatbot {chatbot_id} not found during document processing")
|
||||
supabase.table("documents").update({
|
||||
"status": "failed",
|
||||
"error_message": "Chatbot not found"
|
||||
}).eq("id", doc_id).execute()
|
||||
return
|
||||
|
||||
chatbot = chatbot_row.data[0]
|
||||
company_id = chatbot.get("company_id", "")
|
||||
collection_name = chatbot.get("qdrant_collection_name")
|
||||
|
||||
if not collection_name:
|
||||
logger.error(f"No Qdrant collection for chatbot {chatbot['id']}")
|
||||
logger.error(f"No Qdrant collection for chatbot {chatbot_id}")
|
||||
supabase.table("documents").update({
|
||||
"status": "failed",
|
||||
"error_message": "Vector store not configured"
|
||||
@@ -168,7 +180,7 @@ async def _process_document_bg(
|
||||
}).eq("id", doc_id).execute()
|
||||
|
||||
response_cache.invalidate(collection_name)
|
||||
logger.info(f"Document {doc_id} processed: {len(chunks)} chunks")
|
||||
logger.info(f"Document {doc_id} processed: {len(chunks)} chunks → collection='{collection_name}' company='{company_id}'")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Document processing error for {doc_id}: {e}")
|
||||
@@ -274,7 +286,7 @@ async def retry_document_processing(
|
||||
file_bytes=file_bytes,
|
||||
file_name=document["file_name"],
|
||||
doc_id=document_id,
|
||||
chatbot=chatbot,
|
||||
chatbot_id=chatbot_id,
|
||||
supabase=supabase,
|
||||
)
|
||||
|
||||
@@ -333,7 +345,7 @@ async def add_url_source(
|
||||
_process_url_source,
|
||||
source_id=source_id,
|
||||
url=data.url,
|
||||
chatbot=chatbot,
|
||||
chatbot_id=chatbot_id,
|
||||
supabase=supabase,
|
||||
)
|
||||
|
||||
@@ -394,12 +406,12 @@ async def refresh_url_source(
|
||||
"chunk_count": 0,
|
||||
}).eq("id", source_id).returning("representation").execute()
|
||||
|
||||
background_tasks.add_task(_process_url_source, source_id, src["url"], chatbot, supabase)
|
||||
background_tasks.add_task(_process_url_source, source_id, src["url"], chatbot_id, supabase)
|
||||
|
||||
return UrlSourceResponse(**{**src, "status": "pending", "chunk_count": 0})
|
||||
|
||||
|
||||
async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase):
|
||||
async def _process_url_source(source_id: str, url: str, chatbot_id: str, supabase):
|
||||
"""Background task to scrape a URL and add its content to the vector store."""
|
||||
from app.services.web_scraper import scrape_url
|
||||
from app.services.document_processor import chunk_text
|
||||
@@ -407,6 +419,18 @@ async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase)
|
||||
from app.services.vector_store import vector_store
|
||||
|
||||
try:
|
||||
# Re-fetch chatbot to guarantee we use the canonical collection and company_id.
|
||||
chatbot_row = supabase.table("chatbots").select("company_id, qdrant_collection_name").eq("id", chatbot_id).execute()
|
||||
if not chatbot_row.data:
|
||||
logger.error(f"Chatbot {chatbot_id} not found during URL source processing")
|
||||
supabase.table("url_sources").update({
|
||||
"status": "failed",
|
||||
"error_message": "Chatbot not found",
|
||||
}).eq("id", source_id).execute()
|
||||
return
|
||||
|
||||
chatbot = chatbot_row.data[0]
|
||||
|
||||
# Update status to processing
|
||||
supabase.table("url_sources").update({"status": "processing"}).eq("id", source_id).execute()
|
||||
|
||||
@@ -480,7 +504,8 @@ async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase)
|
||||
}).eq("id", source_id).execute()
|
||||
|
||||
response_cache.invalidate(collection_name)
|
||||
logger.info(f"URL source {source_id} processed: {len(chunks)} chunks from {url}")
|
||||
logger.info(f"URL source {source_id} processed: {len(chunks)} chunks from {url} → collection='{collection_name}' company='{chatbot.get('company_id', '')}'")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"URL source processing error {source_id}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user