Spaces:
Running
Running
| from vector_rag import query_vector_store, llm # <--- FIX: Import llm here! | |
| import wikipedia | |
| from typing import List, Dict | |
| # REMOVED: All duplicate model/pipeline/tokenizer imports and initialization code | |
| # The 'llm' instance is now imported from vector_rag.py and is ready to use. | |
| wikipedia.set_lang("en") | |
| def format_conversation_context(history: List[Dict], max_messages: int = 10) -> str: | |
| """ | |
| Formats conversation history into a context string for the LLM. | |
| Keeps only the most recent messages to prevent token overflow. | |
| Args: | |
| history: List of message dicts with 'role' and 'content' keys | |
| max_messages: Maximum number of messages to include (default: 10) | |
| Returns: | |
| Formatted conversation history string | |
| """ | |
| if not history: | |
| return "" | |
| # Keep only the last N messages | |
| recent_history = history[-max_messages:] | |
| formatted_lines = [] | |
| for msg in recent_history: | |
| role = "User" if msg["role"] == "user" else "Assistant" | |
| formatted_lines.append(f"{role}: {msg['content']}") | |
| return "\n".join(formatted_lines) | |
| async def get_smart_rag_response(query: str, conversation_history: List[Dict] = None) -> tuple[str, str]: | |
| """ | |
| Get a smart RAG response with conversation context. | |
| Args: | |
| query: The user's current question | |
| conversation_history: List of previous messages (optional) | |
| Returns: | |
| Tuple of (response, source) | |
| """ | |
| print(" Received Query:", query) | |
| if conversation_history is None: | |
| conversation_history = [] | |
| # Format conversation history for context | |
| context_str = format_conversation_context(conversation_history) | |
| # First: Try Wikipedia | |
| try: | |
| summary = wikipedia.summary(query, sentences=5) | |
| print("Wikipedia summary found.") | |
| # Build prompt with conversation context | |
| prompt = f"""You are a helpful assistant engaged in a conversation. | |
| """ | |
| if context_str: | |
| prompt += f""" | |
| Previous conversation: | |
| {context_str} | |
| """ | |
| prompt += f"""Use the following Wikipedia information to answer the current question as clearly as possible. | |
| Wikipedia Context: | |
| {summary} | |
| Current question: {query} | |
| Answer:""" | |
| result = llm.invoke(prompt) | |
| answer = result.replace(prompt, "").strip() | |
| return answer, "Wikipedia" | |
| except wikipedia.exceptions.PageError: | |
| print("Wikipedia page not found.") | |
| except wikipedia.exceptions.DisambiguationError as e: | |
| return f"The query is ambiguous. Did you mean: {', '.join(e.options[:5])}", "Wikipedia" | |
| # Second: Fallback to LLM with conversation context | |
| try: | |
| print("Fallback: LLM with conversation context") | |
| fallback_prompt = "You are a knowledgeable assistant engaged in a conversation.\n\n" | |
| if context_str: | |
| fallback_prompt += f"Previous conversation:\n{context_str}\n\n" | |
| fallback_prompt += f"Current question: {query}\nAnswer:" | |
| llm_answer = llm.invoke(fallback_prompt) | |
| answer = llm_answer.replace(fallback_prompt, "").strip() | |
| if answer and "not sure" not in answer.lower(): | |
| return answer.strip(), "LLM" | |
| except Exception as e: | |
| print("Error during LLM fallback:", e) | |
| # Finally: Fallback to Local Documents | |
| try: | |
| print("Fallback: Local vector search") | |
| vector_answer = query_vector_store(query, conversation_history) | |
| if vector_answer: | |
| return vector_answer, "Local Document" | |
| except Exception as e: | |
| print("Error during local vector search:", e) | |
| return "Sorry, I couldn't find any information to answer your question.", "System" | |