fixed the RAg in test pipeline issue

This commit is contained in:
belviskhoremk
2026-04-26 18:51:48 +00:00
parent 205d9d7901
commit 97a501097d
14 changed files with 249 additions and 57 deletions

View File

@@ -6,6 +6,7 @@ from app.services.document_processor import process_document
from app.services.embeddings import embedding_service
from app.services.vector_store import vector_store
from app.services.storage import delete_from_storage, extract_storage_path
from app.services import cache as response_cache
from app.config import settings
from typing import List
import uuid
@@ -166,6 +167,7 @@ async def _process_document_bg(
"chunk_count": len(chunks),
}).eq("id", doc_id).execute()
response_cache.invalidate(collection_name)
logger.info(f"Document {doc_id} processed: {len(chunks)} chunks")
except Exception as e:
@@ -211,6 +213,8 @@ async def delete_document(chatbot_id: str, document_id: str, user=Depends(get_cu
delete_from_storage(supabase, "documents", doc.data[0]["file_url"])
supabase.table("documents").delete().eq("id", document_id).execute()
if collection_name:
response_cache.invalidate(collection_name)
return SuccessResponse(success=True, message="Document deleted")
@@ -259,6 +263,11 @@ async def retry_document_processing(
"chunk_count": 0,
}).eq("id", document_id).execute()
# Clear stale cache before re-processing so tests see fresh results
collection_name = chatbot.get("qdrant_collection_name")
if collection_name:
response_cache.invalidate(collection_name)
# Re-enqueue background processing
background_tasks.add_task(
_process_document_bg,
@@ -340,10 +349,56 @@ async def delete_url_source(chatbot_id: str, source_id: str, user=Depends(get_cu
if not source.data:
raise HTTPException(status_code=404, detail="URL source not found")
chatbot = _get_user_chatbot(chatbot_id, user.id, supabase)
collection_name = chatbot.get("qdrant_collection_name")
if collection_name:
try:
vector_store.delete_by_document_id(collection_name, source_id)
except Exception:
pass
response_cache.invalidate(collection_name)
supabase.table("url_sources").delete().eq("id", source_id).execute()
return SuccessResponse(success=True, message="URL source deleted")
@url_router.post("/{source_id}/refresh", response_model=UrlSourceResponse)
async def refresh_url_source(
chatbot_id: str,
source_id: str,
background_tasks: BackgroundTasks,
user=Depends(get_current_user),
):
"""Re-scrape a URL source and rebuild its vectors."""
supabase = get_supabase()
chatbot = _get_user_chatbot(chatbot_id, user.id, supabase)
source = supabase.table("url_sources").select("*").eq("id", source_id).eq("chatbot_id", chatbot_id).execute()
if not source.data:
raise HTTPException(status_code=404, detail="URL source not found")
src = source.data[0]
collection_name = chatbot.get("qdrant_collection_name")
# Drop existing vectors for this source
if collection_name:
try:
vector_store.delete_by_document_id(collection_name, source_id)
except Exception as e:
logger.warning(f"Could not delete old vectors for url source {source_id}: {e}")
response_cache.invalidate(collection_name)
# Reset to pending and reprocess
updated = supabase.table("url_sources").update({
"status": "pending",
"error_message": None,
"chunk_count": 0,
}).eq("id", source_id).returning("representation").execute()
background_tasks.add_task(_process_url_source, source_id, src["url"], chatbot, supabase)
return UrlSourceResponse(**{**src, "status": "pending", "chunk_count": 0})
async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase):
"""Background task to scrape a URL and add its content to the vector store."""
from app.services.web_scraper import scrape_url
@@ -424,6 +479,7 @@ async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase)
"chunk_count": len(chunks),
}).eq("id", source_id).execute()
response_cache.invalidate(collection_name)
logger.info(f"URL source {source_id} processed: {len(chunks)} chunks from {url}")
except Exception as e: