mirror of
http://88.130.71.182:3000/BlitTech/contexta_be.git
synced 2026-06-12 23:23:21 +00:00
fixed bugs
This commit is contained in:
@@ -11,11 +11,11 @@ RAG_SYSTEM_PROMPT = """You are a helpful AI assistant for {company_name}.
|
||||
Your role is to answer questions based on the provided context from company documents.
|
||||
|
||||
IMPORTANT RULES:
|
||||
1. Only answer based on the provided context
|
||||
2. If information is not in the context, say "I don't have information about that in my knowledge base"
|
||||
1. Answer based on the provided context below
|
||||
2. If the context does not contain enough information, say so, but also try to be helpful with what IS available
|
||||
3. Be concise and helpful
|
||||
4. Always maintain a professional, friendly tone
|
||||
5. If asked about topics outside the context, politely redirect to relevant topics
|
||||
5. If asked about topics completely outside the context, politely redirect to relevant topics
|
||||
|
||||
{custom_instructions}
|
||||
|
||||
@@ -47,8 +47,9 @@ class RAGEngine:
|
||||
# Step 1: Embed the query
|
||||
try:
|
||||
query_embedding = self.embedding_svc.embed_text(query)
|
||||
logger.info(f"[RAG] Query embedded successfully. Vector length: {len(query_embedding)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Embedding error: {e}")
|
||||
logger.error(f"[RAG] Embedding error: {e}", exc_info=True)
|
||||
return {
|
||||
"response": "I'm having trouble processing your request. Please try again.",
|
||||
"sources": [],
|
||||
@@ -57,13 +58,22 @@ class RAGEngine:
|
||||
}
|
||||
|
||||
# Step 2: Retrieve relevant chunks
|
||||
# FIX: Lowered score_threshold from 0.3 to 0.1 to avoid filtering out
|
||||
# all results. With cosine similarity, 0.3 can be too aggressive for
|
||||
# many document types and query patterns.
|
||||
retrieved = self.vector_svc.search(
|
||||
collection_name=collection_name,
|
||||
query_vector=query_embedding,
|
||||
limit=5,
|
||||
score_threshold=0.3,
|
||||
score_threshold=0.1, # FIX: was 0.3, now 0.1 to avoid over-filtering
|
||||
)
|
||||
|
||||
logger.info(f"[RAG] Retrieved {len(retrieved)} chunks from collection '{collection_name}'")
|
||||
for i, item in enumerate(retrieved):
|
||||
score = item.get("score", 0)
|
||||
text_preview = item.get("payload", {}).get("text", "")[:80]
|
||||
logger.info(f"[RAG] Chunk {i+1}: score={score:.4f}, preview='{text_preview}...'")
|
||||
|
||||
# Step 3: Build sources
|
||||
sources = []
|
||||
context_parts = []
|
||||
@@ -84,7 +94,12 @@ class RAGEngine:
|
||||
)
|
||||
)
|
||||
|
||||
context = "\n\n---\n\n".join(context_parts) if context_parts else "No relevant information found."
|
||||
if context_parts:
|
||||
context = "\n\n---\n\n".join(context_parts)
|
||||
logger.info(f"[RAG] Built context from {len(context_parts)} chunks ({len(context)} chars)")
|
||||
else:
|
||||
context = "No relevant information found in the knowledge base."
|
||||
logger.warning(f"[RAG] No context found for query: '{query}' in collection '{collection_name}'")
|
||||
|
||||
# Step 4: Build messages
|
||||
system_prompt = RAG_SYSTEM_PROMPT.format(
|
||||
@@ -95,13 +110,18 @@ class RAGEngine:
|
||||
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
# Add conversation history (last 10 messages)
|
||||
for msg in conversation_history[-10:]:
|
||||
# FIX: Conversation history must be in CHRONOLOGICAL order (oldest first).
|
||||
# The history should already come sorted ascending from the chat router.
|
||||
# We take the last 10 messages for context window management.
|
||||
history_to_use = conversation_history[-10:] if conversation_history else []
|
||||
for msg in history_to_use:
|
||||
messages.append({"role": msg["role"], "content": msg["content"]})
|
||||
|
||||
# Add current query
|
||||
messages.append({"role": "user", "content": query})
|
||||
|
||||
logger.info(f"[RAG] Sending {len(messages)} messages to LLM (model: {chatbot_config.get('model')})")
|
||||
|
||||
# Step 5: Generate response
|
||||
model = chatbot_config.get("model", "accounts/fireworks/models/kimi-k2-instruct-0905")
|
||||
try:
|
||||
@@ -111,6 +131,7 @@ class RAGEngine:
|
||||
max_tokens=chatbot_config.get("max_tokens", 1000),
|
||||
temperature=chatbot_config.get("temperature", 0.7),
|
||||
)
|
||||
logger.info(f"[RAG] LLM response generated. Tokens used: {result.get('tokens_used', 0)}")
|
||||
return {
|
||||
"response": result["content"],
|
||||
"sources": sources,
|
||||
@@ -118,7 +139,7 @@ class RAGEngine:
|
||||
"model": result.get("model", model),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"LLM generation error: {e}")
|
||||
logger.error(f"[RAG] LLM generation error: {e}", exc_info=True)
|
||||
return {
|
||||
"response": "I'm having trouble generating a response. Please try again later.",
|
||||
"sources": sources,
|
||||
@@ -127,4 +148,4 @@ class RAGEngine:
|
||||
}
|
||||
|
||||
|
||||
rag_engine = RAGEngine()
|
||||
rag_engine = RAGEngine()
|
||||
Reference in New Issue
Block a user