mirror of
http://88.130.71.182:3000/BlitTech/contexta_be.git
synced 2026-06-12 23:23:21 +00:00
fixed the RAg in test pipeline issue
This commit is contained in:
@@ -6,6 +6,7 @@ from app.services.document_processor import process_document
|
||||
from app.services.embeddings import embedding_service
|
||||
from app.services.vector_store import vector_store
|
||||
from app.services.storage import delete_from_storage, extract_storage_path
|
||||
from app.services import cache as response_cache
|
||||
from app.config import settings
|
||||
from typing import List
|
||||
import uuid
|
||||
@@ -166,6 +167,7 @@ async def _process_document_bg(
|
||||
"chunk_count": len(chunks),
|
||||
}).eq("id", doc_id).execute()
|
||||
|
||||
response_cache.invalidate(collection_name)
|
||||
logger.info(f"Document {doc_id} processed: {len(chunks)} chunks")
|
||||
|
||||
except Exception as e:
|
||||
@@ -211,6 +213,8 @@ async def delete_document(chatbot_id: str, document_id: str, user=Depends(get_cu
|
||||
delete_from_storage(supabase, "documents", doc.data[0]["file_url"])
|
||||
|
||||
supabase.table("documents").delete().eq("id", document_id).execute()
|
||||
if collection_name:
|
||||
response_cache.invalidate(collection_name)
|
||||
return SuccessResponse(success=True, message="Document deleted")
|
||||
|
||||
|
||||
@@ -259,6 +263,11 @@ async def retry_document_processing(
|
||||
"chunk_count": 0,
|
||||
}).eq("id", document_id).execute()
|
||||
|
||||
# Clear stale cache before re-processing so tests see fresh results
|
||||
collection_name = chatbot.get("qdrant_collection_name")
|
||||
if collection_name:
|
||||
response_cache.invalidate(collection_name)
|
||||
|
||||
# Re-enqueue background processing
|
||||
background_tasks.add_task(
|
||||
_process_document_bg,
|
||||
@@ -340,10 +349,56 @@ async def delete_url_source(chatbot_id: str, source_id: str, user=Depends(get_cu
|
||||
if not source.data:
|
||||
raise HTTPException(status_code=404, detail="URL source not found")
|
||||
|
||||
chatbot = _get_user_chatbot(chatbot_id, user.id, supabase)
|
||||
collection_name = chatbot.get("qdrant_collection_name")
|
||||
if collection_name:
|
||||
try:
|
||||
vector_store.delete_by_document_id(collection_name, source_id)
|
||||
except Exception:
|
||||
pass
|
||||
response_cache.invalidate(collection_name)
|
||||
supabase.table("url_sources").delete().eq("id", source_id).execute()
|
||||
return SuccessResponse(success=True, message="URL source deleted")
|
||||
|
||||
|
||||
@url_router.post("/{source_id}/refresh", response_model=UrlSourceResponse)
|
||||
async def refresh_url_source(
|
||||
chatbot_id: str,
|
||||
source_id: str,
|
||||
background_tasks: BackgroundTasks,
|
||||
user=Depends(get_current_user),
|
||||
):
|
||||
"""Re-scrape a URL source and rebuild its vectors."""
|
||||
supabase = get_supabase()
|
||||
chatbot = _get_user_chatbot(chatbot_id, user.id, supabase)
|
||||
|
||||
source = supabase.table("url_sources").select("*").eq("id", source_id).eq("chatbot_id", chatbot_id).execute()
|
||||
if not source.data:
|
||||
raise HTTPException(status_code=404, detail="URL source not found")
|
||||
|
||||
src = source.data[0]
|
||||
collection_name = chatbot.get("qdrant_collection_name")
|
||||
|
||||
# Drop existing vectors for this source
|
||||
if collection_name:
|
||||
try:
|
||||
vector_store.delete_by_document_id(collection_name, source_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not delete old vectors for url source {source_id}: {e}")
|
||||
response_cache.invalidate(collection_name)
|
||||
|
||||
# Reset to pending and reprocess
|
||||
updated = supabase.table("url_sources").update({
|
||||
"status": "pending",
|
||||
"error_message": None,
|
||||
"chunk_count": 0,
|
||||
}).eq("id", source_id).returning("representation").execute()
|
||||
|
||||
background_tasks.add_task(_process_url_source, source_id, src["url"], chatbot, supabase)
|
||||
|
||||
return UrlSourceResponse(**{**src, "status": "pending", "chunk_count": 0})
|
||||
|
||||
|
||||
async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase):
|
||||
"""Background task to scrape a URL and add its content to the vector store."""
|
||||
from app.services.web_scraper import scrape_url
|
||||
@@ -424,6 +479,7 @@ async def _process_url_source(source_id: str, url: str, chatbot: dict, supabase)
|
||||
"chunk_count": len(chunks),
|
||||
}).eq("id", source_id).execute()
|
||||
|
||||
response_cache.invalidate(collection_name)
|
||||
logger.info(f"URL source {source_id} processed: {len(chunks)} chunks from {url}")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user