from vector_rag import query_vector_store, llm # <--- FIX: Import llm here!
import wikipedia
from typing import List, Dict
# REMOVED: All duplicate model/pipeline/tokenizer imports and initialization code

# The 'llm' instance is now imported from vector_rag.py and is ready to use.
wikipedia.set_lang("en")

def format_conversation_context(history: List[Dict], max_messages: int = 10) -> str:
    """
    Formats conversation history into a context string for the LLM.
    Keeps only the most recent messages to prevent token overflow.
    
    Args:
        history: List of message dicts with 'role' and 'content' keys
        max_messages: Maximum number of messages to include (default: 10)
    
    Returns:
        Formatted conversation history string
    """
    if not history:
        return ""
    
    # Keep only the last N messages
    recent_history = history[-max_messages:]
    
    formatted_lines = []
    for msg in recent_history:
        role = "User" if msg["role"] == "user" else "Assistant"
        formatted_lines.append(f"{role}: {msg['content']}")
    
    return "\n".join(formatted_lines)

async def get_smart_rag_response(query: str, conversation_history: List[Dict] = None) -> tuple[str, str]:
    """
    Get a smart RAG response with conversation context.
    
    Args:
        query: The user's current question
        conversation_history: List of previous messages (optional)
    
    Returns:
        Tuple of (response, source)
    """
    print(" Received Query:", query)
    
    if conversation_history is None:
        conversation_history = []
    
    # Format conversation history for context
    context_str = format_conversation_context(conversation_history)

    # First: Try Wikipedia
    try:
        summary = wikipedia.summary(query, sentences=5)
        print("Wikipedia summary found.")
        
        # Build prompt with conversation context
        prompt = f"""You are a helpful assistant engaged in a conversation.
"""
        if context_str:
            prompt += f"""
Previous conversation:
{context_str}

"""
        prompt += f"""Use the following Wikipedia information to answer the current question as clearly as possible.

Wikipedia Context:
{summary}

Current question: {query}
Answer:"""
        result = llm.invoke(prompt) 
        answer = result.replace(prompt, "").strip()
        return answer, "Wikipedia"
    except wikipedia.exceptions.PageError:
        print("Wikipedia page not found.")
    except wikipedia.exceptions.DisambiguationError as e:
        return f"The query is ambiguous. Did you mean: {', '.join(e.options[:5])}", "Wikipedia"

    # Second: Fallback to LLM with conversation context
    try:
        print("Fallback: LLM with conversation context")
        
        fallback_prompt = "You are a knowledgeable assistant engaged in a conversation.\n\n"
        if context_str:
            fallback_prompt += f"Previous conversation:\n{context_str}\n\n"
        fallback_prompt += f"Current question: {query}\nAnswer:"
        
        llm_answer = llm.invoke(fallback_prompt) 
        answer = llm_answer.replace(fallback_prompt, "").strip()
        if answer and "not sure" not in answer.lower():
            return answer.strip(), "LLM"
    except Exception as e:
        print("Error during LLM fallback:", e)

    # Finally: Fallback to Local Documents
    try:
        print("Fallback: Local vector search")
        vector_answer = query_vector_store(query, conversation_history)
        if vector_answer:
            return vector_answer, "Local Document"
    except Exception as e:
        print("Error during local vector search:", e)

    return "Sorry, I couldn't find any information to answer your question.", "System"