Spaces:

Zeri00
/

Cogni-Chat-document-reader-v2

Running

App Files Files Community

riteshraut commited on 9 days ago

Commit

f7d42c1

1 Parent(s): 2f22e27

fix

Browse files

Files changed (8) hide show

- Copy.gitattributes +0 -35
- Copy.gitignore +0 -6
.env - Copy.example +0 -2
app.py +207 -172
evaluate.py +205 -0
query_expansion.py +525 -0
rag_processor.py +446 -60
templates/index.html +230 -345

- Copy.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

- Copy.gitignore DELETED Viewed

@@ -1,6 +0,0 @@
-.env
-/uploads/
-/vectorstores/
-/.cache/
-__pycache__/
-*.pyc

.env - Copy.example DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copy this file to .env and fill in your API key
2	- GROQ_API_KEY=your_groq_api_key_here

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import time
 import uuid
 from flask import Flask, request, render_template, session, jsonify, Response
 from werkzeug.utils import secure_filename
@@ -10,55 +9,143 @@ import re
 import io
 from gtts import gTTS
 from langchain_core.documents import Document
-from langchain_community.document_loaders import (
-    TextLoader,
-    Docx2txtLoader,
-)
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_experimental.text_splitter import SemanticChunker
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
-from langchain.retrievers import EnsembleRetriever
 from langchain_community.retrievers import BM25Retriever
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.storage import InMemoryStore
 app = Flask(__name__)
 app.config['SECRET_KEY'] = os.urandom(24)
 is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
-if is_hf_spaces:
-    app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
-else:
-    app.config['UPLOAD_FOLDER'] = 'uploads'
 try:
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-    print(f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 except Exception as e:
-    print(f"Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
     app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-    print(f"Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
-rag_chains = {}
 message_histories = {}
-doc_stores = {} # To hold the InMemoryStore for each session
-print("Loading embedding model...")
 try:
-    hf_token = os.getenv("HF_TOKEN")
     EMBEDDING_MODEL = HuggingFaceEmbeddings(
-        model_name="google/embeddinggemma-300m",
         model_kwargs={'device': 'cpu'},
-        encode_kwargs={'normalize_embeddings': True},
     )
-    print("Embedding model loaded successfully.")
-except Exception as e:
-    print(f"FATAL: Could not load embedding model. Error: {e}")
-    raise
 def load_pdf_with_fallback(filepath):
     try:
@@ -66,51 +153,40 @@ def load_pdf_with_fallback(filepath):
         with fitz.open(filepath) as pdf_doc:
             for page_num, page in enumerate(pdf_doc):
                 text = page.get_text()
-                if text.strip():
-                    docs.append(Document(
-                        page_content=text,
-                        metadata={
-                            "source": os.path.basename(filepath),
-                            "page": page_num + 1,
-                        }
-                    ))
         if docs:
-            print(f"Successfully loaded PDF with PyMuPDF: {filepath}")
             return docs
-        else:
-            raise ValueError("No text content found in PDF.")
-    except Exception as e:
-        print(f"PyMuPDF failed for {filepath}: {e}")
-        raise
-LOADER_MAPPING = {
-    ".txt": TextLoader,
-    ".pdf": load_pdf_with_fallback,
-    ".docx": Docx2txtLoader,
-}
 def get_session_history(session_id: str) -> ChatMessageHistory:
-    if session_id not in message_histories:
-        message_histories[session_id] = ChatMessageHistory()
     return message_histories[session_id]
 @app.route('/health', methods=['GET'])
-def health_check():
-    return jsonify({'status': 'healthy'}), 200
 @app.route('/', methods=['GET'])
-def index():
-    return render_template('index.html')
 @app.route('/upload', methods=['POST'])
 def upload_files():
     files = request.files.getlist('file')
     if not files or all(f.filename == '' for f in files):
         return jsonify({'status': 'error', 'message': 'No selected files.'}), 400
-    all_docs = []
-    processed_files, failed_files = [], []
     for file in files:
         if file and file.filename:
             filename = secure_filename(file.filename)
@@ -118,169 +194,128 @@ def upload_files():
             try:
                 file.save(filepath)
                 file_ext = os.path.splitext(filename)[1].lower()
-                if file_ext not in LOADER_MAPPING:
-                    raise ValueError("Unsupported file format.")
                 loader_func = LOADER_MAPPING[file_ext]
                 docs = loader_func(filepath) if file_ext == ".pdf" else loader_func(filepath).load()
-                if not docs:
-                    raise ValueError("No content extracted.")
                 all_docs.extend(docs)
                 processed_files.append(filename)
-                print(f"✓ Successfully processed: {filename}")
             except Exception as e:
-                error_msg = str(e)
-                print(f"✗ Error processing {filename}: {error_msg}")
-                failed_files.append(f"{filename} ({error_msg})")
     if not all_docs:
-        error_summary = "Failed to process all files."
-        if failed_files:
-            error_summary += " Reasons: " + ", ".join(failed_files)
-        return jsonify({'status': 'error', 'message': error_summary}), 400
     try:
-        print("Starting RAG pipeline setup...")
-        parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=300,
-                                                        separators=["\n\n", "\n", ". ", " ", ""],  # Prioritize natural breaks
-                                                        length_function=len)
-        child_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=100)
-        parent_docs = parent_splitter.split_documents(all_docs)
-        doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
-        child_docs = []
-        for i, doc in enumerate(parent_docs):
-            _id = doc_ids[i]
-            sub_docs = child_splitter.split_documents([doc])
-            for child in sub_docs:
-                child.metadata["doc_id"] = _id
-            child_docs.extend(sub_docs)
-        store = InMemoryStore()
-        store.mset(list(zip(doc_ids, parent_docs)))
         vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
-        print(f"Stored {len(parent_docs)} parent docs and indexed {len(child_docs)} child docs.")
-        bm25_retriever = BM25Retriever.from_documents(child_docs)
-        bm25_retriever.k = 3
-        faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
-        ensemble_retriever = EnsembleRetriever(
-            retrievers=[bm25_retriever, faiss_retriever],
-            weights=[0.4, 0.6]
-        )
-        print("Created Hybrid Retriever for child documents.")
         session_id = str(uuid.uuid4())
-        doc_stores[session_id] = store
-        rag_chain_components = create_rag_chain(ensemble_retriever, get_session_history, EMBEDDING_MODEL, store)
-        rag_chains[session_id] = rag_chain_components
-        session['session_id'] = session_id
-        success_msg = f"Successfully processed: {', '.join(processed_files)}"
-        if failed_files:
-            success_msg += f"\nFailed to process: {', '.join(failed_files)}"
         return jsonify({
             'status': 'success',
-            'filename': success_msg,
-            'session_id': session_id
         })
     except Exception as e:
-        import traceback
-        traceback.print_exc()
-        return jsonify({'status': 'error', 'message': f'Failed during RAG setup: {e}'}), 500
 @app.route('/chat', methods=['POST'])
 def chat():
     data = request.get_json()
-    question = data.get('question')
-    session_id = session.get('session_id') or data.get('session_id')
-    if not question or not session_id or session_id not in rag_chains:
-        return jsonify({'status': 'error', 'message': 'Invalid session or no question provided.'}), 400
     try:
-        chain_components = rag_chains[session_id]
-        config = {"configurable": {"session_id": session_id}}
-        print("\n" + "="*50)
-        print("--- STARTING DIAGNOSTIC RUN ---")
-        print(f"Original Question: {question}")
-        print("="*50 + "\n")
-        rewritten_query = chain_components["rewriter"].invoke({"question": question, "chat_history": get_session_history(session_id).messages})
-        #print(f"--- 1. Rewritten Query ---\n{rewritten_query}\n")
-        hyde_doc = chain_components["hyde"].invoke({"question": rewritten_query})
-        #print(f"--- 2. HyDE Document ---\n{hyde_doc}\n")
-        final_retrieved_docs = chain_components["base_retriever"].get_relevant_documents(hyde_doc)
-        #print(f"--- 3. Retrieved Top {len(final_retrieved_docs)} Child Docs ---")
-        #for i, doc in enumerate(final_retrieved_docs):
-            #print(f"  Doc {i+1}: {doc.page_content[:150]}... (Source: {doc.metadata.get('source')})")
-        #print("\n")
-        final_context_docs = chain_components["parent_fetcher"].invoke(final_retrieved_docs)
-        #print(f"--- 4. Final {len(final_context_docs)} Parent Docs for LLM ---")
-        #for i, doc in enumerate(final_context_docs):
-            #print(f"  Final Doc {i+1} (Source: {doc.metadata.get('source')}, Page: {doc.metadata.get('page')}):\n  '{doc.page_content[:300]}...'\n---")
-        #print("="*50)
-        #print("--- INVOKING FINAL CHAIN ---")
-        #print("="*50 + "\n")
-        answer_string = chain_components["final_chain"].invoke({"question": question}, config=config)
-        return jsonify({'answer': answer_string})
     except Exception as e:
-        import traceback
-        traceback.print_exc()
-        return jsonify({'status': 'error', 'message': 'An error occurred while getting the answer.'}), 500
 def clean_markdown_for_tts(text: str) -> str:
-    text = re.sub(r'\*(\*?)(.*?)\1\*', r'\2', text)
-    text = re.sub(r'\_(.*?)\_', r'\1', text)
-    text = re.sub(r'`(.*?)`', r'\1', text)
-    text = re.sub(r'^\s*#{1,6}\s+', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*[\*\-]\s+', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
-    text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
     text = re.sub(r'\n+', ' ', text)
     return text.strip()
 @app.route('/tts', methods=['POST'])
 def text_to_speech():
-    data = request.get_json()
-    text = data.get('text')
-    if not text:
-        return jsonify({'status': 'error', 'message': 'No text provided.'}), 400
     try:
-        clean_text = clean_markdown_for_tts(text)
-        tts = gTTS(clean_text, lang='en')
-        mp3_fp = io.BytesIO()
-        tts.write_to_fp(mp3_fp)
-        mp3_fp.seek(0)
         return Response(mp3_fp, mimetype='audio/mpeg')
     except Exception as e:
-        print(f"Error in TTS generation: {e}")
         return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 7860))
-    app.run(host="0.0.0.0", port=port, debug=False)

 import os
 import uuid
 from flask import Flask, request, render_template, session, jsonify, Response
 from werkzeug.utils import secure_filename
 import io
 from gtts import gTTS
 from langchain_core.documents import Document
+from langchain_community.document_loaders import TextLoader, Docx2txtLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
+from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
+from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
 from langchain_community.retrievers import BM25Retriever
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.storage import InMemoryStore
+from sentence_transformers.cross_encoder import CrossEncoder
 app = Flask(__name__)
 app.config['SECRET_KEY'] = os.urandom(24)
+# --- FIX: Use STRING keys for the dictionary ---
+# Maps temperature strings (from the form) to the mode labels
+TEMPERATURE_LABELS = {
+    "0.2": "Precise",
+    "0.4": "Confident",
+    "0.6": "Balanced",
+    "0.8": "Flexible",
+    "1.0": "Creative"
+}
+class LocalReranker(BaseDocumentCompressor):
+    model: Any
+    top_n: int = 5
+    class Config:
+        arbitrary_types_allowed = True
+    def compress_documents(
+        self, documents: Sequence[Document], query: str, callbacks=None
+    ) -> Sequence[Document]:
+        if not documents:
+            return []
+        pairs = [[query, doc.page_content] for doc in documents]
+        scores = self.model.predict(pairs, show_progress_bar=False)
+        doc_scores = list(zip(documents, scores))
+        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
+        top_docs = []
+        for doc, score in sorted_doc_scores[: self.top_n]:
+            doc.metadata["rerank_score"] = float(score)
+            top_docs.append(doc)
+        return top_docs
+def create_optimized_parent_child_chunks(all_docs):
+    if not all_docs:
+        print("❌ CHUNKING: No input documents provided!")
+        return [], [], []
+    parent_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=900, chunk_overlap=200, separators=["\n\n", "\n", ". ", "! ", "? ", "; ", ", ", " ", ""]
+    )
+    child_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=350, chunk_overlap=80, separators=["\n", ". ", "! ", "? ", "; ", ", ", " ", ""]
+    )
+    parent_docs = parent_splitter.split_documents(all_docs)
+    doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
+    child_docs = []
+    for i, parent_doc in enumerate(parent_docs):
+        parent_id = doc_ids[i]
+        children = child_splitter.split_documents([parent_doc])
+        for j, child in enumerate(children):
+            child.metadata.update({
+                "doc_id": parent_id, "chunk_index": j, "total_chunks": len(children),
+                "is_first_chunk": j == 0, "is_last_chunk": j == len(children) - 1,
+            })
+            if len(children) > 1:
+                if j == 0: child.page_content = "[Beginning] " + child.page_content
+                elif j == len(children) - 1: child.page_content = "[Continues...] " + child.page_content
+            child_docs.append(child)
+    print(f"✅ CHUNKING: Created {len(parent_docs)} parent and {len(child_docs)} child chunks.")
+    return parent_docs, child_docs, doc_ids
+def get_context_aware_parents(docs: List[Document], store: InMemoryStore) -> List[Document]:
+    if not docs: return []
+    parent_scores, child_content_by_parent = {}, {}
+    for doc in docs:
+        parent_id = doc.metadata.get("doc_id")
+        if parent_id:
+            parent_scores[parent_id] = parent_scores.get(parent_id, 0) + 1
+            if parent_id not in child_content_by_parent: child_content_by_parent[parent_id] = []
+            child_content_by_parent[parent_id].append(doc.page_content)
+    parent_ids = list(parent_scores.keys())
+    parents = store.mget(parent_ids)
+    enhanced_parents = []
+    for i, parent in enumerate(parents):
+        if parent is not None:
+            parent_id = parent_ids[i]
+            if parent_id in child_content_by_parent:
+                child_excerpts = "\n".join(child_content_by_parent[parent_id][:3])
+                enhanced_content = f"{parent.page_content}\n\nRelevant excerpts:\n{child_excerpts}"
+                enhanced_parent = Document(
+                    page_content=enhanced_content,
+                    metadata={**parent.metadata, "child_relevance_score": parent_scores[parent_id], "matching_children": len(child_content_by_parent[parent_id])}
+                )
+                enhanced_parents.append(enhanced_parent)
+        else:
+            print(f"❌ PARENT_FETCH: Parent {parent_ids[i]} not found in store!")
+    enhanced_parents.sort(key=lambda p: p.metadata.get("child_relevance_score", 0), reverse=True)
+    return enhanced_parents
 is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
+app.config['UPLOAD_FOLDER'] = '/tmp/uploads' if is_hf_spaces else 'uploads'
 try:
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+    print(f"📁 Upload folder ready: {app.config['UPLOAD_FOLDER']}")
 except Exception as e:
+    print(f"❌ Failed to create upload folder, falling back to /tmp: {e}")
     app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
     os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+session_data = {}
 message_histories = {}
+print("🔄 Loading embedding model...")
 try:
     EMBEDDING_MODEL = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2",
         model_kwargs={'device': 'cpu'},
+        encode_kwargs={'normalize_embeddings': True}
     )
+    print("✅ Embedding model loaded.")
+except Exception as e: print(f"❌ FATAL: Could not load embedding model. Error: {e}"); raise e
+print("🔄 Loading reranker model...")
+try:
+    RERANKER_MODEL = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
+    print("✅ Reranker model loaded.")
+except Exception as e: print(f"❌ FATAL: Could not load reranker model. Error: {e}"); raise e
 def load_pdf_with_fallback(filepath):
     try:
         with fitz.open(filepath) as pdf_doc:
             for page_num, page in enumerate(pdf_doc):
                 text = page.get_text()
+                if text.strip(): docs.append(Document(page_content=text, metadata={"source": os.path.basename(filepath), "page": page_num + 1}))
         if docs:
+            print(f"✅ Loaded PDF: {os.path.basename(filepath)} - {len(docs)} pages")
             return docs
+        else: raise ValueError("No text content found in PDF.")
+    except Exception as e: print(f"❌ PyMuPDF failed for {filepath}: {e}"); raise
+LOADER_MAPPING = {".txt": TextLoader, ".pdf": load_pdf_with_fallback, ".docx": Docx2txtLoader}
 def get_session_history(session_id: str) -> ChatMessageHistory:
+    if session_id not in message_histories: message_histories[session_id] = ChatMessageHistory()
     return message_histories[session_id]
 @app.route('/health', methods=['GET'])
+def health_check(): return jsonify({'status': 'healthy'}), 200
 @app.route('/', methods=['GET'])
+def index(): return render_template('index.html')
 @app.route('/upload', methods=['POST'])
 def upload_files():
     files = request.files.getlist('file')
+    # Get temperature as a string for the dictionary key
+    temperature_str = request.form.get('temperature', '0.2')
+    temperature = float(temperature_str) # Convert to float for the LLM
+    model_name = request.form.get('model_name', 'moonshotai/kimi-k2-instruct')
+    print(f"⚙️ UPLOAD: Model: {model_name}, Temp: {temperature}")
     if not files or all(f.filename == '' for f in files):
         return jsonify({'status': 'error', 'message': 'No selected files.'}), 400
+    all_docs, processed_files, failed_files = [], [], []
+    print(f"📁 Processing {len(files)} file(s)...")
     for file in files:
         if file and file.filename:
             filename = secure_filename(file.filename)
             try:
                 file.save(filepath)
                 file_ext = os.path.splitext(filename)[1].lower()
+                if file_ext not in LOADER_MAPPING: raise ValueError("Unsupported file format.")
                 loader_func = LOADER_MAPPING[file_ext]
                 docs = loader_func(filepath) if file_ext == ".pdf" else loader_func(filepath).load()
+                if not docs: raise ValueError("No content extracted.")
                 all_docs.extend(docs)
                 processed_files.append(filename)
             except Exception as e:
+                print(f"✗ Error processing {filename}: {e}")
+                failed_files.append(f"{filename} ({e})")
     if not all_docs:
+        return jsonify({'status': 'error', 'message': f"Failed to process all files. Reasons: {', '.join(failed_files)}"}), 400
+    print(f"✅ UPLOAD: Processed {len(processed_files)} files.")
     try:
+        print("🔄 Starting RAG pipeline setup...")
+        parent_docs, child_docs, doc_ids = create_optimized_parent_child_chunks(all_docs)
+        if not child_docs: raise ValueError("No child documents created during chunking.")
         vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
+        store = InMemoryStore(); store.mset(list(zip(doc_ids, parent_docs)))
+        print(f"✅ Indexed {len(child_docs)} document chunks.")
+        bm25_retriever = BM25Retriever.from_documents(child_docs); bm25_retriever.k = 12
+        faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 12})
+        ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.6, 0.4])
+        reranker = LocalReranker(model=RERANKER_MODEL, top_n=5)
+        def get_parents(docs: List[Document]) -> List[Document]: return get_context_aware_parents(docs, store)
+        compression_retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=ensemble_retriever)
+        final_retriever = compression_retriever | get_parents
         session_id = str(uuid.uuid4())
+        rag_chain, api_key_manager = create_rag_chain(
+            retriever=final_retriever, get_session_history_func=get_session_history,
+            model_name=model_name, temperature=temperature
+        )
+        # Store the float temperature
+        session_data[session_id] = {'chain': rag_chain, 'model_name': model_name, 'temperature': temperature, 'api_key_manager': api_key_manager}
+        success_msg = f"Processed: {', '.join(processed_files)}"
+        if failed_files: success_msg += f". Failed: {', '.join(failed_files)}"
+        # Get the mode label using the STRING key
+        mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
+        print(f"✅ UPLOAD COMPLETE: Session {session_id} is ready.")
+        # Return all info needed by the frontend
         return jsonify({
             'status': 'success',
+            'filename': success_msg,
+            'session_id': session_id,
+            'model_name': model_name,
+            'mode': mode_label
         })
     except Exception as e:
+        import traceback; traceback.print_exc()
+        return jsonify({'status': 'error', 'message': f'RAG setup failed: {e}'}), 500
 @app.route('/chat', methods=['POST'])
 def chat():
     data = request.get_json()
+    question, session_id = data.get('question'), data.get('session_id') or session.get('session_id')
+    if not question: return jsonify({'status': 'error', 'message': 'No question provided.'}), 400
+    if not session_id or session_id not in session_data:
+        print(f"❌ CHAT: Invalid session {session_id}.")
+        return jsonify({'status': 'error', 'message': 'Invalid session. Please upload documents first.'}), 400
     try:
+        session_info = session_data[session_id]
+        rag_chain = session_info['chain']
+        # --- START: BUGFIX & FEATURE UPDATE ---
+        # 1. Get model name from session
+        model_name = session_info['model_name']
+        # 2. Get temperature (float) and convert to string for lookup
+        temperature_float = session_info['temperature']
+        temperature_str = str(temperature_float)
+        # 3. Get the correct mode label
+        mode_label = TEMPERATURE_LABELS.get(temperature_str, temperature_str)
+        # --- END: BUGFIX & FEATURE UPDATE ---
+        print(f"💬 CHAT: Invoking chain for session {session_id}...")
+        answer = rag_chain.invoke({"question": question}, config={"configurable": {"session_id": session_id}})
+        print(f"✅ CHAT: Answer generated.")
+        # Return all info needed by the frontend
+        return jsonify({
+            'answer': answer,
+            'model_name': model_name,
+            'mode': mode_label
+        })
     except Exception as e:
+        import traceback; traceback.print_exc()
+        return jsonify({'status': 'error', 'message': f'Error during chat: {e}'}), 500
 def clean_markdown_for_tts(text: str) -> str:
+    text = re.sub(r'\*(\*?)(.*?)\1\*', r'\2', text); text = re.sub(r'\_(.*?)\_', r'\1', text)
+    text = re.sub(r'`(.*?)`', r'\1', text); text = re.sub(r'^\s*#{1,6}\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*[\*\-]\s+', '', text, flags=re.MULTILINE); text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE); text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
     text = re.sub(r'\n+', ' ', text)
     return text.strip()
 @app.route('/tts', methods=['POST'])
 def text_to_speech():
+    data = request.get_json(); text = data.get('text')
+    if not text: return jsonify({'status': 'error', 'message': 'No text provided.'}), 400
     try:
+        clean_text = clean_markdown_for_tts(text); tts = gTTS(clean_text, lang='en')
+        mp3_fp = io.BytesIO(); tts.write_to_fp(mp3_fp); mp3_fp.seek(0)
         return Response(mp3_fp, mimetype='audio/mpeg')
     except Exception as e:
+        print(f"❌ TTS Error: {e}")
         return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 7860))
+    print(f"🚀 Starting Flask app on port {port}")
+    app.run(host="0.0.0.0", port=port, debug=False, threaded=False)

evaluate.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import os
+import asyncio
+import uuid
+from dotenv import load_dotenv
+from datasets import Dataset
+import pandas as pd
+from typing import Sequence, Any, List
+# Ragas and LangChain components
+from ragas import evaluate
+from ragas.metrics import (
+    faithfulness,
+    answer_relevancy,
+    context_recall,
+    context_precision,
+)
+from ragas.testset import TestsetGenerator
+# NOTE: The 'evolutions' import has been completely removed.
+# Your specific RAG components from app.py
+from langchain_groq import ChatGroq
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.storage import InMemoryStore
+from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers import EnsembleRetriever, ContextualCompressionRetriever
+from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
+from langchain_core.documents import Document
+from sentence_transformers.cross_encoder import CrossEncoder
+from rag_processor import create_rag_chain
+from langchain_community.chat_message_histories import ChatMessageHistory
+import fitz
+# Load environment variables
+load_dotenv()
+# --- Re-implementing LocalReranker from app.py ---
+class LocalReranker(BaseDocumentCompressor):
+    model: Any
+    top_n: int = 3
+    class Config:
+        arbitrary_types_allowed = True
+    def compress_documents(self, documents: Sequence[Document], query: str, callbacks=None) -> Sequence[Document]:
+        if not documents: return []
+        pairs = [[query, doc.page_content] for doc in documents]
+        scores = self.model.predict(pairs, show_progress_bar=False)
+        doc_scores = list(zip(documents, scores))
+        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
+        top_docs = []
+        for doc, score in sorted_doc_scores[:self.top_n]:
+            doc.metadata['rerank_score'] = float(score)
+            top_docs.append(doc)
+        return top_docs
+# --- Helper Functions ---
+def load_pdf_with_fallback(filepath):
+    """Load PDF using PyMuPDF"""
+    try:
+        docs = []
+        with fitz.open(filepath) as pdf_doc:
+            for page_num, page in enumerate(pdf_doc):
+                text = page.get_text()
+                if text.strip():
+                    docs.append(Document(
+                        page_content=text,
+                        metadata={"source": os.path.basename(filepath), "page": page_num + 1}
+                    ))
+        if docs:
+            print(f"✓ Successfully loaded PDF: {filepath}")
+            return docs
+        else:
+            raise ValueError("No text content found in PDF.")
+    except Exception as e:
+        print(f"✗ PyMuPDF failed for {filepath}: {e}")
+        raise
+async def main():
+    """Main execution function"""
+    print("\n" + "="*60 + "\nSTARTING RAGAS EVALUATION\n" + "="*60)
+    pdf_path = "uploads/Unit_-_1_Introduction.pdf"
+    if not os.path.exists(pdf_path):
+        print(f"✗ Error: PDF not found at {pdf_path}")
+        return
+    try:
+        # --- 1. Setup Models ---
+        print("\n--- 1. Initializing Models ---")
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        if not groq_api_key or groq_api_key == "your_groq_api_key_here":
+            raise ValueError("GROQ_API_KEY not found or is a placeholder.")
+        generator_llm = ChatGroq(model="llama-3.1-8b-instant", api_key=groq_api_key)
+        critic_llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key)
+        embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        reranker_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device='cpu')
+        print("✓ Models initialized.")
+        # --- 2. Setup RAG Pipeline ---
+        print("\n--- 2. Setting up RAG Pipeline ---")
+        documents = load_pdf_with_fallback(pdf_path)
+        # Split documents
+        parent_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=400)
+        child_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
+        parent_docs = parent_splitter.split_documents(documents)
+        doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
+        child_docs = []
+        for i, doc in enumerate(parent_docs):
+            _id = doc_ids[i]
+            sub_docs = child_splitter.split_documents([doc])
+            for child in sub_docs:
+                child.metadata["doc_id"] = _id
+            child_docs.extend(sub_docs)
+        store = InMemoryStore()
+        store.mset(list(zip(doc_ids, parent_docs)))
+        vectorstore = FAISS.from_documents(child_docs, embedding_model)
+        bm25_retriever = BM25Retriever.from_documents(child_docs, k=10)
+        faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
+        ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.4, 0.6])
+        reranker = LocalReranker(model=reranker_model, top_n=5)
+        compression_retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=ensemble_retriever)
+        def get_parents(docs: List[Document]) -> List[Document]:
+            parent_ids = {d.metadata["doc_id"] for d in docs}
+            return store.mget(list(parent_ids))
+        final_retriever = compression_retriever | get_parents
+        message_histories = {}
+        def get_session_history(session_id: str):
+            if session_id not in message_histories:
+                message_histories[session_id] = ChatMessageHistory()
+            return message_histories[session_id]
+        rag_chain = create_rag_chain(final_retriever, get_session_history)
+        print("✓ RAG chain created successfully.")
+        # --- 3. Generate Testset ---
+        print("\n--- 3. Generating Test Questions ---")
+        generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embedding_model)
+        # Generate a simple test set without complex distributions
+        testset = generator.generate_with_langchain_docs(documents, testset_size=5)
+        print("✓ Testset generated.")
+        # --- 4. Run RAG Chain on Testset ---
+        print("\n--- 4. Running RAG Chain to Generate Answers ---")
+        test_questions = [item['question'] for item in testset.to_pandas().to_dict('records')]
+        ground_truths = [item['ground_truth'] for item in testset.to_pandas().to_dict('records')]
+        answers = []
+        contexts = []
+        for i, question in enumerate(test_questions):
+            print(f"  Processing question {i+1}/{len(test_questions)}...")
+            # Retrieve contexts
+            retrieved_docs = final_retriever.invoke(question)
+            contexts.append([doc.page_content for doc in retrieved_docs])
+            # Get answer from chain
+            config = {"configurable": {"session_id": str(uuid.uuid4())}}
+            answer = await rag_chain.ainvoke({"question": question}, config=config)
+            answers.append(answer)
+        # --- 5. Evaluate with Ragas ---
+        print("\n--- 5. Evaluating Results with Ragas ---")
+        eval_data = {
+            'question': test_questions,
+            'answer': answers,
+            'contexts': contexts,
+            'ground_truth': ground_truths
+        }
+        eval_dataset = Dataset.from_dict(eval_data)
+        result = evaluate(
+            eval_dataset,
+            metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
+            llm=critic_llm,
+            embeddings=embedding_model
+        )
+        print("\n" + "="*60 + "\nEVALUATION RESULTS\n" + "="*60)
+        print(result)
+        # --- 6. Save Results ---
+        print("\n--- 6. Saving Results ---")
+        results_df = result.to_pandas()
+        results_df.to_csv("evaluation_results.csv", index=False)
+        print("✓ Evaluation results saved to evaluation_results.csv")
+        print("\n" + "="*60 + "\nEVALUATION COMPLETE!\n" + "="*60)
+    except Exception as e:
+        print(f"\n✗ An error occurred during the process: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    asyncio.run(main())

query_expansion.py ADDED Viewed

	@@ -0,0 +1,525 @@

+# utils/query_expansion.py
+"""
+Query Expansion System for CogniChat RAG Application
+This module implements advanced query expansion techniques to improve retrieval quality:
+- QueryAnalyzer: Extracts intent, entities, and keywords
+- QueryRephraser: Generates natural language variations
+- MultiQueryExpander: Creates diverse query formulations
+- MultiHopReasoner: Connects concepts across documents
+- FallbackStrategies: Handles edge cases gracefully
+Author: CogniChat Team
+Date: October 19, 2025
+"""
+import re
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+class QueryStrategy(Enum):
+    """Query expansion strategies with different complexity levels."""
+    QUICK = "quick"  # 2 queries - fast, minimal expansion
+    BALANCED = "balanced"  # 3-4 queries - good balance
+    COMPREHENSIVE = "comprehensive"  # 5-6 queries - maximum coverage
+@dataclass
+class QueryAnalysis:
+    """Results from query analysis."""
+    intent: str  # question, definition, comparison, explanation, etc.
+    entities: List[str]  # Named entities extracted
+    keywords: List[str]  # Important keywords
+    complexity: str  # simple, medium, complex
+    domain: Optional[str] = None  # Technical domain if detected
+@dataclass
+class ExpandedQuery:
+    """Container for expanded query variations."""
+    original: str
+    variations: List[str]
+    strategy_used: QueryStrategy
+    analysis: QueryAnalysis
+class QueryAnalyzer:
+    """
+    Analyzes queries to extract intent, entities, and key information.
+    Uses LLM-based analysis for intelligent query understanding.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize QueryAnalyzer.
+        Args:
+            llm: Optional LangChain LLM for advanced analysis
+        """
+        self.llm = llm
+        self.intent_patterns = {
+            'definition': r'\b(what is|define|meaning of|definition)\b',
+            'how_to': r'\b(how to|how do|how can|steps to)\b',
+            'comparison': r'\b(compare|difference|versus|vs|better than)\b',
+            'explanation': r'\b(why|explain|reason|cause)\b',
+            'listing': r'\b(list|enumerate|what are|types of)\b',
+            'example': r'\b(example|instance|sample|case)\b',
+        }
+    def analyze(self, query: str) -> QueryAnalysis:
+        """
+        Analyze query to extract intent, entities, and keywords.
+        Args:
+            query: User's original query
+        Returns:
+            QueryAnalysis object with extracted information
+        """
+        query_lower = query.lower()
+        # Detect intent
+        intent = self._detect_intent(query_lower)
+        # Extract entities (simplified - can be enhanced with NER)
+        entities = self._extract_entities(query)
+        # Extract keywords
+        keywords = self._extract_keywords(query)
+        # Assess complexity
+        complexity = self._assess_complexity(query, entities, keywords)
+        # Detect domain
+        domain = self._detect_domain(query_lower)
+        return QueryAnalysis(
+            intent=intent,
+            entities=entities,
+            keywords=keywords,
+            complexity=complexity,
+            domain=domain
+        )
+    def _detect_intent(self, query_lower: str) -> str:
+        """Detect query intent using pattern matching."""
+        for intent, pattern in self.intent_patterns.items():
+            if re.search(pattern, query_lower):
+                return intent
+        return 'general'
+    def _extract_entities(self, query: str) -> List[str]:
+        """Extract named entities (simplified version)."""
+        # Look for capitalized words (potential entities)
+        words = query.split()
+        entities = []
+        for word in words:
+            # Skip common words at sentence start
+            if word[0].isupper() and word.lower() not in ['what', 'how', 'why', 'when', 'where', 'which']:
+                entities.append(word)
+        # Look for quoted terms
+        quoted = re.findall(r'"([^"]+)"', query)
+        entities.extend(quoted)
+        return list(set(entities))
+    def _extract_keywords(self, query: str) -> List[str]:
+        """Extract important keywords from query."""
+        # Remove stop words (simplified list)
+        stop_words = {
+            'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been',
+            'what', 'how', 'why', 'when', 'where', 'which', 'who',
+            'do', 'does', 'did', 'can', 'could', 'should', 'would',
+            'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'
+        }
+        # Split and filter
+        words = re.findall(r'\b\w+\b', query.lower())
+        keywords = [w for w in words if w not in stop_words and len(w) > 2]
+        return keywords[:10]  # Limit to top 10
+    def _assess_complexity(self, query: str, entities: List[str], keywords: List[str]) -> str:
+        """Assess query complexity."""
+        word_count = len(query.split())
+        entity_count = len(entities)
+        keyword_count = len(keywords)
+        # Simple scoring
+        score = word_count + (entity_count * 2) + (keyword_count * 1.5)
+        if score < 15:
+            return 'simple'
+        elif score < 30:
+            return 'medium'
+        else:
+            return 'complex'
+    def _detect_domain(self, query_lower: str) -> Optional[str]:
+        """Detect technical domain if present."""
+        domains = {
+            'programming': ['code', 'function', 'class', 'variable', 'algorithm', 'debug'],
+            'data_science': ['model', 'dataset', 'training', 'prediction', 'accuracy'],
+            'machine_learning': ['neural', 'network', 'learning', 'ai', 'deep learning'],
+            'web': ['html', 'css', 'javascript', 'api', 'frontend', 'backend'],
+            'database': ['sql', 'query', 'database', 'table', 'index'],
+            'security': ['encryption', 'authentication', 'vulnerability', 'attack'],
+        }
+        for domain, keywords in domains.items():
+            if any(kw in query_lower for kw in keywords):
+                return domain
+        return None
+class QueryRephraser:
+    """
+    Generates natural language variations of queries using multiple strategies.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize QueryRephraser.
+        Args:
+            llm: LangChain LLM for generating variations
+        """
+        self.llm = llm
+    def generate_variations(
+        self,
+        query: str,
+        analysis: QueryAnalysis,
+        strategy: QueryStrategy = QueryStrategy.BALANCED
+    ) -> List[str]:
+        """
+        Generate query variations based on strategy.
+        Args:
+            query: Original query
+            analysis: Query analysis results
+            strategy: Expansion strategy to use
+        Returns:
+            List of query variations
+        """
+        variations = [query]  # Always include original
+        if strategy == QueryStrategy.QUICK:
+            # Just add synonym variation
+            variations.append(self._synonym_variation(query, analysis))
+        elif strategy == QueryStrategy.BALANCED:
+            # Add synonym, expanded, and simplified versions
+            variations.append(self._synonym_variation(query, analysis))
+            variations.append(self._expanded_variation(query, analysis))
+            variations.append(self._simplified_variation(query, analysis))
+        elif strategy == QueryStrategy.COMPREHENSIVE:
+            # Add all variations
+            variations.append(self._synonym_variation(query, analysis))
+            variations.append(self._expanded_variation(query, analysis))
+            variations.append(self._simplified_variation(query, analysis))
+            variations.append(self._keyword_focused(query, analysis))
+            variations.append(self._context_variation(query, analysis))
+            # Add one more: alternate phrasing
+            if analysis.intent in ['how_to', 'explanation']:
+                variations.append(f"Guide to {' '.join(analysis.keywords[:3])}")
+        # Remove duplicates and None values
+        variations = [v for v in variations if v]
+        return list(dict.fromkeys(variations))  # Preserve order, remove dupes
+    def _synonym_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Generate variation using synonyms."""
+        # Common synonym replacements
+        synonyms = {
+            'error': 'issue',
+            'problem': 'issue',
+            'fix': 'resolve',
+            'use': 'utilize',
+            'create': 'generate',
+            'make': 'create',
+            'get': 'retrieve',
+            'show': 'display',
+            'find': 'locate',
+            'explain': 'describe',
+        }
+        words = query.lower().split()
+        for i, word in enumerate(words):
+            if word in synonyms:
+                words[i] = synonyms[word]
+                break  # Only replace one word to keep natural
+        return ' '.join(words).capitalize()
+    def _expanded_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Generate expanded version with more detail."""
+        if analysis.intent == 'definition':
+            return f"Detailed explanation and definition of {' '.join(analysis.keywords)}"
+        elif analysis.intent == 'how_to':
+            return f"Step-by-step guide on {query.lower()}"
+        elif analysis.intent == 'comparison':
+            return f"Comprehensive comparison: {query}"
+        else:
+            # Add qualifying words
+            return f"Detailed information about {query.lower()}"
+    def _simplified_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Generate simplified version focusing on core concepts."""
+        # Use just the keywords
+        if len(analysis.keywords) >= 2:
+            return ' '.join(analysis.keywords[:3])
+        return query
+    def _keyword_focused(self, query: str, analysis: QueryAnalysis) -> str:
+        """Create keyword-focused variation for BM25."""
+        keywords = analysis.keywords + analysis.entities
+        return ' '.join(keywords[:5])
+    def _context_variation(self, query: str, analysis: QueryAnalysis) -> str:
+        """Add contextual information if domain detected."""
+        if analysis.domain:
+            return f"{query} in {analysis.domain} context"
+        return query
+class MultiQueryExpander:
+    """
+    Main query expansion orchestrator that combines analysis and rephrasing.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize MultiQueryExpander.
+        Args:
+            llm: LangChain LLM for advanced expansions
+        """
+        self.analyzer = QueryAnalyzer(llm)
+        self.rephraser = QueryRephraser(llm)
+    def expand(
+        self,
+        query: str,
+        strategy: QueryStrategy = QueryStrategy.BALANCED,
+        max_queries: int = 6
+    ) -> ExpandedQuery:
+        """
+        Expand query into multiple variations.
+        Args:
+            query: Original user query
+            strategy: Expansion strategy
+            max_queries: Maximum number of queries to generate
+        Returns:
+            ExpandedQuery object with all variations
+        """
+        # Analyze query
+        analysis = self.analyzer.analyze(query)
+        # Generate variations
+        variations = self.rephraser.generate_variations(query, analysis, strategy)
+        # Limit to max_queries
+        variations = variations[:max_queries]
+        return ExpandedQuery(
+            original=query,
+            variations=variations,
+            strategy_used=strategy,
+            analysis=analysis
+        )
+class MultiHopReasoner:
+    """
+    Implements multi-hop reasoning to connect concepts across documents.
+    Useful for complex queries that require information from multiple sources.
+    """
+    def __init__(self, llm=None):
+        """
+        Initialize MultiHopReasoner.
+        Args:
+            llm: LangChain LLM for reasoning
+        """
+        self.llm = llm
+    def generate_sub_queries(self, query: str, analysis: QueryAnalysis) -> List[str]:
+        """
+        Break complex query into sub-queries for multi-hop reasoning.
+        Args:
+            query: Original complex query
+            analysis: Query analysis
+        Returns:
+            List of sub-queries
+        """
+        sub_queries = [query]
+        # For comparison queries, create separate queries for each entity
+        if analysis.intent == 'comparison' and len(analysis.entities) >= 2:
+            for entity in analysis.entities[:2]:
+                sub_queries.append(f"Information about {entity}")
+        elif analysis.intent == 'comparison' and len(analysis.keywords) >= 2:
+            # Fallback: use keywords if no entities found
+            for keyword in analysis.keywords[:2]:
+                sub_queries.append(f"Information about {keyword}")
+        # For how-to queries, break into steps
+        if analysis.intent == 'how_to' and len(analysis.keywords) >= 2:
+            main_topic = ' '.join(analysis.keywords[:2])
+            sub_queries.append(f"Prerequisites for {main_topic}")
+            sub_queries.append(f"Steps to {main_topic}")
+        # For complex questions, create focused sub-queries
+        if analysis.complexity == 'complex' and len(analysis.keywords) > 3:
+            # Create queries focusing on different keyword groups
+            mid = len(analysis.keywords) // 2
+            sub_queries.append(' '.join(analysis.keywords[:mid]))
+            sub_queries.append(' '.join(analysis.keywords[mid:]))
+        return sub_queries[:5]  # Limit to 5 sub-queries
+class FallbackStrategies:
+    """
+    Implements fallback strategies for queries that don't retrieve good results.
+    """
+    @staticmethod
+    def simplify_query(query: str) -> str:
+        """Simplify query by removing modifiers and focusing on core terms."""
+        # Remove question words
+        query = re.sub(r'\b(what|how|why|when|where|which|who|can|could|should|would)\b', '', query, flags=re.IGNORECASE)
+        # Remove common phrases
+        query = re.sub(r'\b(is|are|was|were|be|been|the|a|an)\b', '', query, flags=re.IGNORECASE)
+        # Clean up extra spaces
+        query = re.sub(r'\s+', ' ', query).strip()
+        return query
+    @staticmethod
+    def broaden_query(query: str, analysis: QueryAnalysis) -> str:
+        """Broaden query to increase recall."""
+        # Remove specific constraints
+        query = re.sub(r'\b(specific|exactly|precisely|only|just)\b', '', query, flags=re.IGNORECASE)
+        # Add general terms
+        if analysis.keywords:
+            return f"{analysis.keywords[0]} overview"
+        return query
+    @staticmethod
+    def focus_entities(analysis: QueryAnalysis) -> str:
+        """Create entity-focused query as fallback."""
+        if analysis.entities:
+            return ' '.join(analysis.entities)
+        elif analysis.keywords:
+            return ' '.join(analysis.keywords[:3])
+        return ""
+# Convenience function for easy integration
+def expand_query_simple(
+    query: str,
+    strategy: str = "balanced",
+    llm=None
+) -> List[str]:
+    """
+    Simple function to expand a query without dealing with classes.
+    Args:
+        query: User's query to expand
+        strategy: "quick", "balanced", or "comprehensive"
+        llm: Optional LangChain LLM
+    Returns:
+        List of expanded query variations
+    Example:
+        >>> queries = expand_query_simple("How do I debug Python code?", strategy="balanced")
+        >>> print(queries)
+        ['How do I debug Python code?', 'How do I resolve Python code?', ...]
+    """
+    expander = MultiQueryExpander(llm=llm)
+    strategy_enum = QueryStrategy(strategy)
+    expanded = expander.expand(query, strategy=strategy_enum)
+    return expanded.variations
+# Example usage and testing
+if __name__ == "__main__":
+    # Example 1: Simple query expansion
+    print("=" * 60)
+    print("Example 1: Simple Query Expansion")
+    print("=" * 60)
+    query = "What is machine learning?"
+    queries = expand_query_simple(query, strategy="balanced")
+    print(f"\nOriginal: {query}")
+    print(f"\nExpanded queries ({len(queries)}):")
+    for i, q in enumerate(queries, 1):
+        print(f"  {i}. {q}")
+    # Example 2: Complex query with full analysis
+    print("\n" + "=" * 60)
+    print("Example 2: Complex Query with Analysis")
+    print("=" * 60)
+    expander = MultiQueryExpander()
+    query = "How do I compare the performance of different neural network architectures?"
+    result = expander.expand(query, strategy=QueryStrategy.COMPREHENSIVE)
+    print(f"\nOriginal: {result.original}")
+    print(f"\nAnalysis:")
+    print(f"  Intent: {result.analysis.intent}")
+    print(f"  Entities: {result.analysis.entities}")
+    print(f"  Keywords: {result.analysis.keywords}")
+    print(f"  Complexity: {result.analysis.complexity}")
+    print(f"  Domain: {result.analysis.domain}")
+    print(f"\nExpanded queries ({len(result.variations)}):")
+    for i, q in enumerate(result.variations, 1):
+        print(f"  {i}. {q}")
+    # Example 3: Multi-hop reasoning
+    print("\n" + "=" * 60)
+    print("Example 3: Multi-Hop Reasoning")
+    print("=" * 60)
+    reasoner = MultiHopReasoner()
+    analyzer = QueryAnalyzer()
+    query = "Compare Python and Java for web development"
+    analysis = analyzer.analyze(query)
+    sub_queries = reasoner.generate_sub_queries(query, analysis)
+    print(f"\nOriginal: {query}")
+    print(f"\nSub-queries for multi-hop reasoning:")
+    for i, sq in enumerate(sub_queries, 1):
+        print(f"  {i}. {sq}")
+    # Example 4: Fallback strategies
+    print("\n" + "=" * 60)
+    print("Example 4: Fallback Strategies")
+    print("=" * 60)
+    query = "What is the specific difference between supervised and unsupervised learning?"
+    analysis = analyzer.analyze(query)

rag_processor.py CHANGED Viewed

@@ -3,93 +3,479 @@ from dotenv import load_dotenv
 from operator import itemgetter
 from langchain_groq import ChatGroq
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables.history import RunnableWithMessageHistory
-def create_rag_chain(base_retriever, get_session_history_func, embedding_model, store):
     """
-    Creates a dictionary of RAG chain components for inspection and a final runnable chain.
     """
-    load_dotenv()
-    api_key = os.getenv("GROQ_API_KEY")
-    if not api_key or api_key == "your_groq_api_key_here":
-        raise ValueError("GROQ_API_KEY not found or not configured properly.")
-    llm = ChatGroq(model_name="moonshotai/kimi-k2-instruct-0905", api_key=api_key, temperature=0.1)
-    # 1. HyDE-like Document Generation Chain
-    hyde_template = """As a document expert, write a concise, fact-based paragraph that directly answers the user's question. This will be used for a database search.
-    Question: {question}
-    Hypothetical Answer:"""
-    hyde_prompt = ChatPromptTemplate.from_template(hyde_template)
-    hyde_chain = hyde_prompt | llm | StrOutputParser()
-    # 2. Query Rewriting Chain
-    rewrite_template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question that is optimized for a vector database.
-    **Chat History:**
-    {chat_history}
-    **Follow-up Question:**
-    {question}
-    **Standalone Question:**"""
-    rewrite_prompt = ChatPromptTemplate.from_messages([
-        ("system", rewrite_template),
-        MessagesPlaceholder(variable_name="chat_history"),
-        ("human", "Reformulate this question as a standalone query: {question}")
-    ])
-    query_rewriter_chain = rewrite_prompt | llm | StrOutputParser()
-    # 3. Parent Document Fetching Chain
-    def get_parents(docs):
-        parent_ids = {d.metadata.get("doc_id") for d in docs}
-        return store.mget(list(parent_ids))
-    parent_fetcher_chain = RunnableLambda(get_parents)
-    # 4. Main Conversational RAG Chain
-    rag_template = """You are CogniChat, an expert document analysis assistant. Your task is to answer the user's question based *only* on the provided context.
-    **Instructions:**
-    1. Read the context carefully.
-    2. If the answer is in the context, provide a clear and concise answer.
-    3. If the answer is not in the context, you *must* state that you cannot find the information in the provided documents. Do not use any external knowledge.
-    4. Where appropriate, use formatting like lists or bold text to improve readability.
-    **Context:**
-    {context}
     """
     rag_prompt = ChatPromptTemplate.from_messages([
         ("system", rag_template),
         MessagesPlaceholder(variable_name="chat_history"),
         ("human", "{question}"),
     ])
-    conversational_rag_chain = (
-        RunnablePassthrough.assign(
-            context=query_rewriter_chain | hyde_chain | base_retriever | parent_fetcher_chain
-        )
-        | rag_prompt
-        | llm
-        | StrOutputParser()
-    )
-    # 5. Final Chain with History (Simplified)
-    final_chain = RunnableWithMessageHistory(
         conversational_rag_chain,
         get_session_history_func,
         input_messages_key="question",
         history_messages_key="chat_history",
     )
-    print("\n✅ RAG chain and components successfully built.")
-    return {
-        "rewriter": query_rewriter_chain,
-        "hyde": hyde_chain,
-        "base_retriever": base_retriever,
-        "parent_fetcher": parent_fetcher_chain,
-        "final_chain": final_chain
-    }

 from operator import itemgetter
 from langchain_groq import ChatGroq
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_core.documents import Document
+from query_expansion import expand_query_simple
+from typing import List, Optional
+import time
+class GroqAPIKeyManager:
+    """Manages multiple Groq API keys with automatic rotation and fallback."""
+    def __init__(self, api_keys: List[str]):
+        """
+        Initialize with a list of API keys.
+        Args:
+            api_keys: List of Groq API keys to use
+        """
+        self.api_keys = [key for key in api_keys if key and key != "your_groq_api_key_here"]
+        if not self.api_keys:
+            raise ValueError("No valid API keys provided!")
+        self.current_index = 0
+        self.failed_keys = set()
+        self.success_count = {key: 0 for key in self.api_keys}
+        self.failure_count = {key: 0 for key in self.api_keys}
+        print(f"🔑 API Key Manager: Loaded {len(self.api_keys)} API keys")
+    def get_current_key(self) -> str:
+        """Get the current API key."""
+        return self.api_keys[self.current_index]
+    def mark_success(self, api_key: str):
+        """Mark an API key as successful."""
+        if api_key in self.success_count:
+            self.success_count[api_key] += 1
+            # Remove from failed keys if it was there
+            if api_key in self.failed_keys:
+                self.failed_keys.remove(api_key)
+                print(f"   ✅ API Key #{self.api_keys.index(api_key) + 1} recovered!")
+    def mark_failure(self, api_key: str):
+        """Mark an API key as failed."""
+        if api_key in self.failure_count:
+            self.failure_count[api_key] += 1
+            self.failed_keys.add(api_key)
+    def rotate_to_next_key(self) -> bool:
+        """
+        Rotate to the next available API key.
+        Returns:
+            True if a new key is available, False if all keys failed
+        """
+        initial_index = self.current_index
+        attempts = 0
+        while attempts < len(self.api_keys):
+            self.current_index = (self.current_index + 1) % len(self.api_keys)
+            attempts += 1
+            current_key = self.api_keys[self.current_index]
+            # If we've tried all keys, allow retry even failed ones
+            if attempts >= len(self.api_keys):
+                print(f"   ⚠️ All keys attempted, retrying with key #{self.current_index + 1}")
+                return True
+            # Skip recently failed keys unless it's been a while
+            if current_key not in self.failed_keys:
+                print(f"   🔄 Switching to API Key #{self.current_index + 1}")
+                return True
+        return False
+    def get_statistics(self) -> str:
+        """Get statistics about API key usage."""
+        stats = []
+        for i, key in enumerate(self.api_keys):
+            success = self.success_count[key]
+            failure = self.failure_count[key]
+            status = "❌ FAILED" if key in self.failed_keys else "✅ ACTIVE"
+            masked_key = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
+            stats.append(f"   Key #{i+1} ({masked_key}): {success} success, {failure} failures [{status}]")
+        return "\n".join(stats)
+def load_api_keys_from_hf_secrets() -> List[str]:
     """
+    Load API keys from Hugging Face Spaces Secrets.
+    In your Hugging Face Space settings, add these secrets:
+    - GROQ_API_KEY_1
+    - GROQ_API_KEY_2
+    - GROQ_API_KEY_3
+    - GROQ_API_KEY_4
+    Returns:
+        List of API keys retrieved from HF secrets
     """
+    api_keys = []
+    secret_names = ['GROQ_API_KEY_1', 'GROQ_API_KEY_2', 'GROQ_API_KEY_3', 'GROQ_API_KEY_4']
+    print("🔐 Loading API keys from Hugging Face Secrets...")
+    for secret_name in secret_names:
+        try:
+            # HF Spaces secrets are available as environment variables
+            api_key = os.getenv(secret_name)
+            if api_key and api_key.strip() and api_key != "your_groq_api_key_here":
+                api_keys.append(api_key.strip())
+                print(f"   ✅ Loaded: {secret_name}")
+            else:
+                print(f"   ⚠️ Not found or empty: {secret_name}")
+        except Exception as e:
+            print(f"   ❌ Error loading {secret_name}: {str(e)}")
+    # ADD THIS RETURN STATEMENT - this was missing!
+    return api_keys
+def create_llm_with_fallback(
+    api_key_manager: GroqAPIKeyManager,
+    model_name: str,
+    temperature: float,
+    max_retries: int = 3
+) -> ChatGroq:
+    """
+    Create a ChatGroq LLM with automatic API key fallback.
+    Args:
+        api_key_manager: Manager handling multiple API keys
+        model_name: Name of the model to use
+        temperature: Temperature setting
+        max_retries: Maximum number of retry attempts
+    Returns:
+        ChatGroq instance
+    """
+    for attempt in range(max_retries):
+        current_key = api_key_manager.get_current_key()
+        try:
+            llm = ChatGroq(
+                model_name=model_name,
+                api_key=current_key,
+                temperature=temperature
+            )
+            # Test the connection with a simple call
+            test_result = llm.invoke("test")
+            api_key_manager.mark_success(current_key)
+            return llm
+        except Exception as e:
+            error_msg = str(e).lower()
+            api_key_manager.mark_failure(current_key)
+            # Check if it's a rate limit or auth error
+            if "rate" in error_msg or "limit" in error_msg:
+                print(f"   ⚠️ Rate limit hit on API Key #{api_key_manager.current_index + 1}")
+            elif "auth" in error_msg or "api" in error_msg:
+                print(f"   ❌ Authentication failed on API Key #{api_key_manager.current_index + 1}")
+            else:
+                print(f"   ❌ Error with API Key #{api_key_manager.current_index + 1}: {str(e)[:50]}")
+            # Try next key if available
+            if attempt < max_retries - 1:
+                if api_key_manager.rotate_to_next_key():
+                    print(f"   🔄 Retrying with next API key (Attempt {attempt + 2}/{max_retries})...")
+                    time.sleep(1)  # Brief pause before retry
+                else:
+                    raise ValueError("All API keys failed!")
+            else:
+                raise ValueError(f"Failed to initialize LLM after {max_retries} attempts")
+    raise ValueError("Failed to create LLM with any available API key")
+def create_multi_query_retriever(base_retriever, llm, strategy: str = "balanced"):
+    """Wraps a base retriever with query expansion capabilities."""
+    def multi_query_retrieve(query: str) -> List[Document]:
+        """Retrieves documents using expanded query variations."""
+        query_variations = expand_query_simple(query, strategy=strategy, llm=llm)
+        all_docs = []
+        seen_content = set()
+        for i, query_var in enumerate(query_variations):
+            try:
+                docs = base_retriever.invoke(query_var)
+                for doc in docs:
+                    content_hash = hash(doc.page_content)
+                    if content_hash not in seen_content:
+                        seen_content.add(content_hash)
+                        all_docs.append(doc)
+            except Exception as e:
+                print(f"   ✗ Query Expansion Error (Query {i+1}): {str(e)[:50]}")
+                continue
+        print(f"   📊 Query Expansion: Retrieved {len(all_docs)} unique documents.")
+        return all_docs
+    return multi_query_retrieve
+def get_system_prompt(temperature: float) -> str:
+    """
+    Returns a system prompt dynamically based on temperature setting.
+    Temperature ranges:
+    - 0.0-0.4: Highly factual, structured, conservative
+    - 0.4-0.8: Balanced approach with moderate creativity
+    - 0.8-1.0: Creative, engaging, storytelling mode
+    """
+    if temperature <= 0.4:
+        # Conservative, structured prompt
+        return """You are CogniChat, an expert document analysis assistant specializing in comprehensive and well-structured answers.
+RESPONSE GUIDELINES:
+**Structure & Formatting:**
+- Start with a direct answer to the question
+- Use **bold** for key terms, important concepts, and technical terminology
+- Use bullet points (•) for lists, features, or multiple items
+- Use numbered lists (1., 2., 3.) for steps, procedures, or sequential information
+- Use ### Headers to organize different sections or topics
+- Add blank lines between sections for readability
+**Source Citation:**
+- Always cite information using: [Source: filename, Page: X]
+- Place citations at the end of your final answer only
+- Do not cite sources within the body of your answer
+- Multiple sources: [Source: doc1.pdf, Page: 3; doc2.pdf, Page: 7]
+**Completeness:**
+- Provide thorough, detailed answers using ALL relevant information from context
+- Summarize and properly elaborate each point for increased clarity
+- If the question has multiple parts, address each part clearly
+**Accuracy:**
+- ONLY use information from the provided context documents below
+- If information is incomplete, state what IS available and what ISN'T
+- If the answer isn't in the context, clearly state: "I cannot find this information in the uploaded documents"
+- Never make assumptions or add information not in the context
+---
+{context}
+---
+Now answer the following question comprehensively using the context above:"""
+    elif temperature <= 0.8:
+        # Balanced prompt
+        return """You are CogniChat, an intelligent document analysis assistant that combines accuracy with engaging communication.
+RESPONSE GUIDELINES:
+**Communication Style:**
+- Present information in a clear, engaging manner
+- Use **bold** for emphasis on important concepts
+- Balance structure with natural flow
+- Make complex topics accessible and interesting
+**Content Approach:**
+- Ground your response firmly in the provided context
+- Add helpful explanations and connections between concepts
+- Use analogies or examples when they help clarify ideas (but keep them brief)
+- Organize information logically with headers (###) and lists where appropriate
+**Source Attribution:**
+- Cite sources at the end: [Source: filename, Page: X]
+- Be transparent about what the documents do and don't contain
+**Accuracy:**
+- Base your answer on the context documents provided
+- If information is partial, explain what's available
+- Acknowledge gaps: "The documents don't cover this aspect"
+---
+{context}
+---
+Now answer the following question in an engaging yet accurate way:"""
+    else:  # temperature > 0.8
+        # Creative, engaging prompt
+        return """You are CogniChat, a creative and insightful document analyst who transforms information into engaging, memorable experiences.
+🎨 CREATIVE RESPONSE GUIDELINES:
+**Your Mission:**
+Transform the document content into compelling, creative responses while staying true to the facts. Think of yourself as a skilled storyteller who brings information to life!
+**Creative Techniques - Use Liberally:**
+- **Vivid Language**: Use descriptive, evocative language that paints mental pictures
+- **Analogies & Metaphors**: Create memorable comparisons that illuminate concepts
+- **Narrative Flow**: Tell a story when appropriate - build tension, reveal insights progressively
+- **Engaging Hooks**: Start with something intriguing that captures attention
+- **Real-World Connections**: Bridge abstract concepts to tangible, relatable scenarios
+- **Thought-Provoking Questions**: Pose rhetorical questions that spark curiosity
+- **Dynamic Formatting**: Use varied structures - not just bullet points. Try prose paragraphs, short punchy sentences, strategic emphasis
+**Creative Freedom:**
+- Interpret and synthesize information creatively
+- Make insightful connections between different pieces of information
+- Present the same facts in novel, interesting ways
+- Use formatting creatively: emojis (when appropriate), varied paragraph lengths, strategic **emphasis**
+- Vary your tone based on content: enthusiastic for exciting topics, contemplative for complex ones
+**Boundaries of Creativity:**
+- ✅ Creative presentation, interpretation, and synthesis of facts
+- ✅ Memorable analogies and explanatory examples
+- ✅ Engaging narrative structure and compelling language
+- ❌ Never invent facts not in the documents
+- ❌ Don't contradict the source material
+- ❌ Acknowledge when information isn't available (but do so creatively!)
+**Source Attribution:**
+- Weave citations naturally into your narrative
+- End with: [Source: filename, Page: X]
+---
+{context}
+---
+Now, using your creative prowess and the context above, craft an engaging and memorable answer to this question:"""
+def create_rag_chain(
+    retriever,
+    get_session_history_func,
+    enable_query_expansion=True,
+    expansion_strategy="balanced",
+    model_name: str = "moonshotai/kimi-k2-instruct",
+    temperature: float = 0.2,
+    api_keys: Optional[List[str]] = None
+):
+    """
+    Creates an advanced RAG chain with temperature-adaptive prompting and API key rotation.
+    Args:
+        retriever: Document retriever
+        get_session_history_func: Function to get session history
+        enable_query_expansion: Whether to enable query expansion
+        expansion_strategy: Strategy for query expansion
+        model_name: Name of the LLM model
+        temperature: Temperature setting (0.0-1.0)
+        api_keys: Optional list of API keys. If None, loads from environment
     """
+    # Load API keys from HF Secrets
+    if api_keys is None:
+        api_keys = load_api_keys_from_hf_secrets()
+    if not api_keys:
+        raise ValueError(
+            "No valid API keys found! Please set GROQ_API_KEY or GROQ_API_KEY_1, "
+            "GROQ_API_KEY_2, GROQ_API_KEY_3, GROQ_API_KEY_4 in your .env file"
+        )
+    # Initialize API key manager
+    api_key_manager = GroqAPIKeyManager(api_keys)
+    print(f"⚙️ RAG: Initializing LLM - Model: {model_name}, Temp: {temperature}")
+    # Display creativity mode based on temperature
+    if temperature <= 0.4:
+        creativity_mode = "FACTUAL & STRUCTURED"
+    elif temperature <= 0.8:
+        creativity_mode = "BALANCED & ENGAGING"
+    else:
+        creativity_mode = "CREATIVE & STORYTELLING"
+    print(f"🎭 Creativity Mode: {creativity_mode}")
+    # Create LLM with fallback
+    llm = create_llm_with_fallback(api_key_manager, model_name, temperature)
+    print(f"✅ LLM initialized with API Key #{api_key_manager.current_index + 1}")
+    if enable_query_expansion:
+        print(f"✨ RAG: Query Expansion ENABLED (Strategy: {expansion_strategy})")
+        enhanced_retriever = create_multi_query_retriever(
+            base_retriever=retriever,
+            llm=llm,
+            strategy=expansion_strategy
+        )
+    else:
+        enhanced_retriever = retriever
+    rewrite_template = """You are an expert at optimizing search queries for document retrieval.
+Given the conversation history and a follow-up question, create a comprehensive standalone question that:
+1. Incorporates all relevant context from the chat history
+2. Expands abbreviations and resolves all pronouns (it, they, this, that, etc.)
+3. Includes key technical terms and concepts that would help find relevant documents
+4. Maintains the original intent, specificity, and detail level
+5. If the question asks for comparison or multiple items, ensure all items are in the query
+Chat History:
+{chat_history}
+Follow-up Question: {question}
+Optimized Standalone Question:"""
+    rewrite_prompt = ChatPromptTemplate.from_messages([
+        ("system", rewrite_template),
+        MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{question}")
+    ])
+    query_rewriter = rewrite_prompt | llm | StrOutputParser()
+    def format_docs(docs):
+        """Format retrieved documents with clear structure and metadata."""
+        if not docs:
+            return "No relevant documents found in the knowledge base."
+        formatted_parts = []
+        for i, doc in enumerate(docs, 1):
+            source = doc.metadata.get('source', 'Unknown Document')
+            page = doc.metadata.get('page', 'N/A')
+            rerank_score = doc.metadata.get('rerank_score')
+            content = doc.page_content.strip()
+            doc_header = f"{'='*60}\nDOCUMENT {i}\n{'='*60}"
+            metadata_line = f"Source: {source} | Page: {page}"
+            if rerank_score:
+                metadata_line += f" | Relevance: {rerank_score:.3f}"
+            formatted_parts.append(
+                f"{doc_header}\n"
+                f"{metadata_line}\n"
+                f"{'-'*60}\n"
+                f"{content}\n"
+            )
+        return f"RETRIEVED CONTEXT ({len(docs)} documents):\n\n" + "\n".join(formatted_parts)
+    # Get temperature-adaptive system prompt
+    rag_template = get_system_prompt(temperature)
     rag_prompt = ChatPromptTemplate.from_messages([
         ("system", rag_template),
         MessagesPlaceholder(variable_name="chat_history"),
         ("human", "{question}"),
     ])
+    # Rewriter input construction
+    rewriter_input = RunnableParallel({
+        "question": itemgetter("question"),
+        "chat_history": itemgetter("chat_history"),
+    })
+    # Main retrieval pipeline
+    retrieval_chain = rewriter_input | query_rewriter | enhanced_retriever | format_docs
+    # Final conversational RAG chain
+    conversational_rag_chain = RunnableParallel({
+        "context": retrieval_chain,
+        "question": itemgetter("question"),
+        "chat_history": itemgetter("chat_history"),
+    }) | rag_prompt | llm | StrOutputParser()
+    chain_with_memory = RunnableWithMessageHistory(
         conversational_rag_chain,
         get_session_history_func,
         input_messages_key="question",
         history_messages_key="chat_history",
     )
+    print("✅ RAG: Chain created successfully.")
+    print("\n" + api_key_manager.get_statistics())
+    return chain_with_memory, api_key_manager  # Return manager for statistics

templates/index.html CHANGED Viewed

@@ -7,7 +7,7 @@
     <script src="https://cdn.tailwindcss.com"></script>
     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-    <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <style>
         :root {
@@ -20,23 +20,28 @@
             --input-bg: #e8f0fe;
             --user-bubble: #d9e7ff;
             --bot-bubble: #f1f3f4;
         }
-        /* Dark mode styles */
         .dark {
-            --background: #202124;
-            --foreground: #e8eaed;
-            --primary: #8ab4f8;
-            --primary-hover: #99bdfa;
-            --card: #303134;
-            --card-border: #5f6368;
-            --input-bg: #303134;
-            --user-bubble: #3c4043;
-            --bot-bubble: #3c4043;
         }
         body {
-            font-family: 'Google Sans', 'Roboto', sans-serif;
             background-color: var(--background);
             color: var(--foreground);
             overflow: hidden;
@@ -44,15 +49,14 @@
         #chat-window::-webkit-scrollbar { width: 8px; }
         #chat-window::-webkit-scrollbar-track { background: transparent; }
-        #chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
         .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
         .drop-zone--over {
             border-color: var(--primary);
-            box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
         }
-        /* Loading Spinner */
         .loader {
             width: 48px;
             height: 48px;
@@ -82,7 +86,6 @@
             100% { transform: rotate(360deg); }
         }
-        /* Typing Indicator Animation */
         .typing-indicator span {
             height: 10px;
             width: 10px;
@@ -98,173 +101,136 @@
             40% { transform: scale(1.0); }
         }
-        /* Enhanced Markdown Styling for better readability and aesthetics */
-        .markdown-content p {
-            margin-bottom: 1rem;
-            line-height: 1.75;
-        }
-        .markdown-content h1, .markdown-content h2, .markdown-content h3, .markdown-content h4 {
-            font-family: 'Google Sans', sans-serif;
-            font-weight: 700;
-            margin-top: 1.75rem;
-            margin-bottom: 1rem;
-            line-height: 1.3;
-        }
-        .markdown-content h1 { font-size: 1.75em; border-bottom: 1px solid var(--card-border); padding-bottom: 0.5rem; }
-        .markdown-content h2 { font-size: 1.5em; }
-        .markdown-content h3 { font-size: 1.25em; }
-        .markdown-content h4 { font-size: 1.1em; }
-        .markdown-content ul, .markdown-content ol {
-            padding-left: 1.75rem;
-            margin-bottom: 1rem;
-        }
-        .markdown-content li {
-            margin-bottom: 0.5rem;
-        }
-        .dark .markdown-content ul > li::marker { color: var(--primary); }
-        .markdown-content ul > li::marker { color: var(--primary); }
-        .markdown-content a {
-            color: var(--primary);
-            text-decoration: none;
-            font-weight: 500;
-            border-bottom: 1px solid transparent;
-            transition: all 0.2s ease-in-out;
-        }
-        .markdown-content a:hover {
-            border-bottom-color: var(--primary-hover);
-        }
-        .markdown-content blockquote {
-            margin: 1.5rem 0;
-            padding-left: 1.5rem;
-            border-left: 4px solid var(--card-border);
-            color: #6c757d;
-            font-style: italic;
-        }
-        .dark .markdown-content blockquote {
-            color: #adb5bd;
-        }
-        .markdown-content hr {
-            border: none;
-            border-top: 1px solid var(--card-border);
-            margin: 2rem 0;
-        }
-        .markdown-content table {
-            width: 100%;
-            border-collapse: collapse;
-            margin: 1.5rem 0;
-            font-size: 0.9em;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.05);
-            border-radius: 8px;
-            overflow: hidden;
-        }
-        .markdown-content th, .markdown-content td {
-            border: 1px solid var(--card-border);
-            padding: 0.75rem 1rem;
-            text-align: left;
-        }
-        .markdown-content th {
-            background-color: var(--bot-bubble);
-            font-weight: 500;
-        }
-        .markdown-content code {
-            background-color: rgba(0,0,0,0.05);
-            padding: 0.2rem 0.4rem;
-            border-radius: 0.25rem;
-            font-family: 'Roboto Mono', monospace;
-            font-size: 0.9em;
-        }
-        .dark .markdown-content code {
-            background-color: rgba(255,255,255,0.1);
-        }
-        .markdown-content pre {
-            position: relative;
-            background-color: #f8f9fa;
-            border: 1px solid var(--card-border);
-            border-radius: 0.5rem;
-            margin-bottom: 1rem;
-        }
-        .dark .markdown-content pre {
-            background-color: #2e2f32;
-        }
-        .markdown-content pre code {
-            background: none;
-            padding: 1rem;
-            display: block;
-            overflow-x: auto;
-        }
-        .markdown-content pre .copy-code-btn {
-            position: absolute;
-            top: 0.5rem;
-            right: 0.5rem;
-            background-color: #e8eaed;
-            border: 1px solid #dadce0;
-            color: #5f6368;
-            padding: 0.3rem 0.6rem;
-            border-radius: 0.25rem;
-            cursor: pointer;
-            opacity: 0;
-            transition: opacity 0.2s;
-            font-size: 0.8em;
-        }
-        .dark .markdown-content pre .copy-code-btn {
-            background-color: #3c4043;
-            border-color: #5f6368;
-            color: #e8eaed;
-        }
-        .markdown-content pre:hover .copy-code-btn {
-            opacity: 1;
-        }
-        /* Spinner for the TTS button */
         .tts-button-loader {
             width: 16px;
             height: 16px;
-            border: 2px solid currentColor; /* Use button's text color */
             border-radius: 50%;
             display: inline-block;
             box-sizing: border-box;
             animation: rotation 0.8s linear infinite;
-            border-bottom-color: transparent; /* Makes it a half circle spinner */
         }
     </style>
 </head>
 <body class="w-screen h-screen dark">
     <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
         <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
-            <header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
-                <h1 class="text-xl font-medium">Chat with your Docs</h1>
-                <p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
             </header>
             <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
-                <div id="chat-content" class="max-w-4xl mx-auto space-y-8">
-                </div>
             </div>
-            <div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
-                <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
                     <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
-                    <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
-                        <svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
                     </button>
                 </form>
             </div>
         </div>
         <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
-            <div class="text-center">
-                <h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
-                <div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
-                    <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
-                    <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
-                    <p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
-                    <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
                 </div>
             </div>
         </div>
-        <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
             <div class="loader"></div>
             <p id="loading-text" class="mt-6 text-sm font-medium"></p>
-            <p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
         </div>
     </main>
@@ -278,94 +244,89 @@
             const loadingOverlay = document.getElementById('loading-overlay');
             const loadingText = document.getElementById('loading-text');
             const loadingSubtext = document.getElementById('loading-subtext');
             const chatForm = document.getElementById('chat-form');
             const chatInput = document.getElementById('chat-input');
             const chatSubmitBtn = document.getElementById('chat-submit-btn');
             const chatWindow = document.getElementById('chat-window');
             const chatContent = document.getElementById('chat-content');
             const chatFilename = document.getElementById('chat-filename');
-            let sessionId = null;
-            const storedSessionId = sessionStorage.getItem('cognichat_session_id');
-            if (storedSessionId) {
-                sessionId = storedSessionId;
-                console.debug('Restored session ID from storage:', sessionId);
-            }
-            // --- File Upload Logic ---
             dropZone.addEventListener('click', () => fileUploadInput.click());
             ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
-                dropZone.addEventListener(eventName, preventDefaults, false);
-                document.body.addEventListener(eventName, preventDefaults, false);
-            });
-            ['dragenter', 'dragover'].forEach(eventName => {
-                dropZone.addEventListener(eventName, () => dropZone.classList.add('drop-zone--over'));
             });
-            ['dragleave', 'drop'].forEach(eventName => {
-                dropZone.addEventListener(eventName, () => dropZone.classList.remove('drop-zone--over'));
-            });
             dropZone.addEventListener('drop', (e) => {
-                const files = e.dataTransfer.files;
-                if (files.length > 0) handleFiles(files);
             });
             fileUploadInput.addEventListener('change', (e) => {
                 if (e.target.files.length > 0) handleFiles(e.target.files);
             });
-            function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
             async function handleFiles(files) {
                 const formData = new FormData();
-                let fileNames = [];
-                for (const file of files) {
-                    formData.append('file', file);
-                    fileNames.push(file.name);
-                }
                 fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
-                await uploadAndProcessFiles(formData, fileNames);
             }
-            async function uploadAndProcessFiles(formData, fileNames) {
                 loadingOverlay.classList.remove('hidden');
-                loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
-                loadingSubtext.textContent = "🤓Creating a knowledge base may take a minute or two. So please hold on tight";
                 try {
                     const response = await fetch('/upload', { method: 'POST', body: formData });
                     const result = await response.json();
                     if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
-                    if (result.session_id) {
-                        sessionId = result.session_id;
-                        sessionStorage.setItem('cognichat_session_id', sessionId);
-                        console.debug('Stored session ID from upload:', sessionId);
-                    } else {
-                        console.warn('Upload response missing session_id field.');
-                    }
-                    chatFilename.textContent = `Chatting with: ${result.filename}`;
                     uploadContainer.classList.add('hidden');
                     chatContainer.classList.remove('hidden');
-                    appendMessage("I've analyzed your documents. What would you like to know?", "bot");
                 } catch (error) {
                     console.error('Upload error:', error);
                     alert(`Error: ${error.message}`);
                 } finally {
                     loadingOverlay.classList.add('hidden');
-                    loadingSubtext.textContent = '';
                     fileNameSpan.textContent = '';
                     fileUploadInput.value = '';
                 }
             }
-            // --- Chat Logic ---
             chatForm.addEventListener('submit', async (e) => {
                 e.preventDefault();
                 const question = chatInput.value.trim();
@@ -377,53 +338,36 @@
                 chatSubmitBtn.disabled = true;
                 const typingIndicator = showTypingIndicator();
-                let botMessageContainer = null;
-                let contentDiv = null;
                 try {
-                    const requestBody = { question: question };
-                    if (sessionId) {
-                        requestBody.session_id = sessionId;
-                    }
                     const response = await fetch('/chat', {
                         method: 'POST',
                         headers: { 'Content-Type': 'application/json' },
-                        body: JSON.stringify(requestBody),
                     });
                     if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
-                    // ============================ MODIFICATION START ==============================
-                    // Parse the JSON response instead of reading a stream
-                    const result = await response.json();
-                    const answer = result.answer; // Extract the 'answer' field
-                    if (!answer) {
-                        throw new Error("Received an empty or invalid response from the server.");
-                    }
                     typingIndicator.remove();
-                    botMessageContainer = appendMessage('', 'bot');
-                    contentDiv = botMessageContainer.querySelector('.markdown-content');
-                    // Use the extracted answer for rendering
-                    contentDiv.innerHTML = marked.parse(answer);
                     contentDiv.querySelectorAll('pre').forEach(addCopyButton);
-                    scrollToBottom(); // Scroll after content is added
-                    // Use the extracted answer for TTS
-                    addTextToSpeechControls(botMessageContainer, answer);
-                    // ============================ MODIFICATION END ==============================
                 } catch (error) {
                     console.error('Chat error:', error);
-                    if (typingIndicator) typingIndicator.remove();
-                    if (contentDiv) {
-                        contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
-                    } else {
-                        appendMessage(`Error: ${error.message}`, 'bot');
-                    }
                 } finally {
                     chatInput.disabled = false;
                     chatSubmitBtn.disabled = false;
@@ -431,166 +375,108 @@
                 }
             });
-            // --- UI Helper Functions ---
-            function appendMessage(text, sender) {
                 const messageWrapper = document.createElement('div');
-                messageWrapper.className = `flex items-start gap-4`;
-                const iconSVG = sender === 'user'
-                    ? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
                     : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
-                const messageBubble = document.createElement('div');
-                messageBubble.className = `flex-1 pt-1`;
-                const senderName = document.createElement('p');
-                senderName.className = 'font-medium text-sm mb-1';
-                senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
-                const contentDiv = document.createElement('div');
-                contentDiv.className = 'text-base markdown-content';
-                // Only parse if text is not empty
-                if (text) {
-                    contentDiv.innerHTML = marked.parse(text);
                 }
-                const controlsContainer = document.createElement('div');
-                controlsContainer.className = 'tts-controls mt-2';
-                messageBubble.appendChild(senderName);
-                messageBubble.appendChild(contentDiv);
-                messageBubble.appendChild(controlsContainer);
-                messageWrapper.innerHTML = iconSVG;
-                messageWrapper.appendChild(messageBubble);
                 chatContent.appendChild(messageWrapper);
                 scrollToBottom();
-                return messageBubble;
             }
             function showTypingIndicator() {
-                const indicatorWrapper = document.createElement('div');
-                indicatorWrapper.className = `flex items-start gap-4`;
-                indicatorWrapper.id = 'typing-indicator';
-                const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
-                const messageBubble = document.createElement('div');
-                messageBubble.className = 'flex-1 pt-1';
-                const senderName = document.createElement('p');
-                senderName.className = 'font-medium text-sm mb-1';
-                senderName.textContent = 'CogniChat is thinking...';
                 const indicator = document.createElement('div');
-                indicator.className = 'typing-indicator';
-                indicator.innerHTML = '<span></span><span></span><span></span>';
-                messageBubble.appendChild(senderName);
-                messageBubble.appendChild(indicator);
-                indicatorWrapper.innerHTML = iconSVG;
-                indicatorWrapper.appendChild(messageBubble);
-                chatContent.appendChild(indicatorWrapper);
                 scrollToBottom();
-                return indicatorWrapper;
             }
-            function scrollToBottom() {
-                chatWindow.scrollTo({
-                    top: chatWindow.scrollHeight,
-                    behavior: 'smooth'
-                });
-            }
             function addCopyButton(pre) {
                 const button = document.createElement('button');
                 button.className = 'copy-code-btn';
                 button.textContent = 'Copy';
                 pre.appendChild(button);
                 button.addEventListener('click', () => {
-                    const code = pre.querySelector('code').innerText;
-                    navigator.clipboard.writeText(code).then(() => {
-                        button.textContent = 'Copied!';
-                        setTimeout(() => button.textContent = 'Copy', 2000);
-                    });
                 });
             }
-            // --- Text-to-Speech Logic ---
-            let currentAudio = null;
-            let currentPlayingButton = null;
             const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
             const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
             function addTextToSpeechControls(messageBubble, text) {
-                const ttsControls = messageBubble.querySelector('.tts-controls');
-                if (text.trim().length > 0) {
-                    const speakButton = document.createElement('button');
-                    speakButton.className = 'speak-btn px-4 py-2 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
-                    speakButton.title = 'Listen to this message';
-                    speakButton.setAttribute('data-state', 'play');
-                    speakButton.innerHTML = `${playIconSVG} <span>Play</span>`;
-                    ttsControls.appendChild(speakButton);
-                    speakButton.addEventListener('click', () => handleTTS(text, speakButton));
-                }
             }
             async function handleTTS(text, button) {
                 if (button === currentPlayingButton) {
                     if (currentAudio && !currentAudio.paused) {
                         currentAudio.pause();
-                        button.setAttribute('data-state', 'paused');
-                        button.innerHTML = `${playIconSVG} <span>Play</span>`;
                     } else if (currentAudio && currentAudio.paused) {
                         currentAudio.play();
-                        button.setAttribute('data-state', 'playing');
                         button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
                     }
                     return;
                 }
                 resetAllSpeakButtons();
                 currentPlayingButton = button;
-                button.setAttribute('data-state', 'loading');
                 button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
                 button.disabled = true;
                 try {
-                    const response = await fetch('/tts', {
-                        method: 'POST',
-                        headers: { 'Content-Type': 'application/json' },
-                        body: JSON.stringify({ text: text })
-                    });
                     if (!response.ok) throw new Error('Failed to generate audio.');
                     const blob = await response.blob();
-                    const audioUrl = URL.createObjectURL(blob);
-                    currentAudio = new Audio(audioUrl);
                     currentAudio.play();
-                    button.setAttribute('data-state', 'playing');
                     button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
-                    currentAudio.onended = () => {
-                        button.setAttribute('data-state', 'play');
-                        button.innerHTML = `${playIconSVG} <span>Play</span>`;
-                        currentAudio = null;
-                        currentPlayingButton = null;
-                    };
                 } catch (error) {
                     console.error('TTS Error:', error);
-                    button.setAttribute('data-state', 'error');
-                    button.innerHTML = `${playIconSVG} <span>Error</span>`;
-                    alert('Failed to play audio. Please try again.');
                     resetAllSpeakButtons();
                 } finally {
                     button.disabled = false;
@@ -599,8 +485,7 @@
             function resetAllSpeakButtons() {
                 document.querySelectorAll('.speak-btn').forEach(btn => {
-                    btn.setAttribute('data-state', 'play');
-                    btn.innerHTML = `${playIconSVG} <span>Play</span>`;
                     btn.disabled = false;
                 });
                 if (currentAudio) {

     <script src="https://cdn.tailwindcss.com"></script>
     <link rel="preconnect" href="https://fonts.googleapis.com">
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <style>
         :root {
             --input-bg: #e8f0fe;
             --user-bubble: #d9e7ff;
             --bot-bubble: #f1f3f4;
+            --select-bg: #ffffff;
+            --select-border: #dadce0;
+            --select-text: #1f1f1f;
         }
         .dark {
+            --background: #111827;
+            --foreground: #e5e7eb;
+            --primary: #3b82f6;
+            --primary-hover: #60a5fa;
+            --card: #1f2937;
+            --card-border: #4b5563;
+            --input-bg: #374151;
+            --user-bubble: #374151;
+            --bot-bubble: #374151;
+            --select-bg: #374151;
+            --select-border: #6b7280;
+            --select-text: #f3f4f6;
         }
         body {
+            font-family: 'Inter', 'Google Sans', 'Roboto', sans-serif;
             background-color: var(--background);
             color: var(--foreground);
             overflow: hidden;
         #chat-window::-webkit-scrollbar { width: 8px; }
         #chat-window::-webkit-scrollbar-track { background: transparent; }
+        #chat-window::-webkit-scrollbar-thumb { background-color: #4b5563; border-radius: 20px; }
         .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
         .drop-zone--over {
             border-color: var(--primary);
+            box-shadow: 0 0 20px rgba(59, 130, 246, 0.4);
         }
         .loader {
             width: 48px;
             height: 48px;
             100% { transform: rotate(360deg); }
         }
         .typing-indicator span {
             height: 10px;
             width: 10px;
             40% { transform: scale(1.0); }
         }
+        .markdown-content p { margin-bottom: 1rem; line-height: 1.75; }
+        .markdown-content h1, .markdown-content h2, .markdown-content h3 { font-weight: 600; margin-top: 1.5rem; margin-bottom: 0.75rem; line-height: 1.3; }
+        .markdown-content h1 { font-size: 1.5em; border-bottom: 1px solid var(--card-border); padding-bottom: 0.3rem;}
+        .markdown-content h2 { font-size: 1.25em; }
+        .markdown-content h3 { font-size: 1.1em; }
+        .markdown-content ul, .markdown-content ol { padding-left: 1.75rem; margin-bottom: 1rem; }
+        .markdown-content li { margin-bottom: 0.5rem; }
+        .markdown-content a { color: var(--primary); text-decoration: none; font-weight: 500; }
+        .markdown-content pre { position: relative; background-color: #2e2f32; border: 1px solid var(--card-border); border-radius: 0.5rem; margin-bottom: 1rem; font-size: 0.9em;}
+        .markdown-content pre code { background: none; padding: 1rem; display: block; overflow-x: auto; }
+        .markdown-content pre .copy-code-btn { position: absolute; top: 0.5rem; right: 0.5rem; background-color: #3c4043; border: 1px solid #5f6368; color: #e8eaed; padding: 0.3rem 0.6rem; border-radius: 0.25rem; cursor: pointer; opacity: 0; transition: opacity 0.2s; font-size: 0.8em;}
+        .markdown-content pre:hover .copy-code-btn { opacity: 1; }
         .tts-button-loader {
             width: 16px;
             height: 16px;
+            border: 2px solid currentColor;
             border-radius: 50%;
             display: inline-block;
             box-sizing: border-box;
             animation: rotation 0.8s linear infinite;
+            border-bottom-color: transparent;
+        }
+        .select-wrapper {
+            position: relative;
+        }
+        .select-wrapper select {
+            background-color: var(--select-bg);
+            border: 1px solid var(--select-border);
+            color: var(--select-text);
+            padding: 0.75rem 2.5rem 0.75rem 1rem;
+            border-radius: 0.75rem;
+            font-size: 0.875rem;
+            width: 100%;
+            appearance: none;
+            -webkit-appearance: none;
+            transition: all 0.2s ease-in-out;
+            cursor: pointer;
+            background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e");
+            background-position: right 0.75rem center;
+            background-repeat: no-repeat;
+            background-size: 1.25em 1.25em;
         }
     </style>
 </head>
 <body class="w-screen h-screen dark">
     <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
         <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
+            <!-- --- CORRECT HEADER (Center/Right Layout) --- -->
+            <header class="p-4 border-b border-[var(--card-border)] flex-shrink-0 flex justify-between items-center w-full">
+                <div class="w-1/4"></div>
+                <div class="w-1/2 text-center">
+                    <h1 class="text-xl font-medium tracking-wide">CogniChat</h1>
+                    <p id="chat-filename" class="text-xs text-gray-400 mt-1 truncate"></p>
+                </div>
+                <div id="chat-session-info" class="w-1/4 text-right text-xs">
+                    <!-- This will be populated by JavaScript -->
+                </div>
             </header>
+            <!-- --- END HEADER --- -->
             <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
+                <div id="chat-content" class="max-w-4xl mx-auto space-y-8"></div>
             </div>
+            <div class="p-4 flex-shrink-0 bg-opacity-50 backdrop-blur-md border-t border-[var(--card-border)]">
+                <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-lg border border-[var(--card-border)] focus-within:ring-2 focus-within:ring-[var(--primary)] transition-all">
                     <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
+                    <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2.5 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed" title="Send">
+                        <svg class="w-5 h-5" viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-8.707l-3-3a1 1 0 00-1.414 1.414L10.586 9H7a1 1 0 100 2h3.586l-1.293 1.293a1 1 0 101.414 1.414l3-3a1 1 0 000-1.414z" clip-rule="evenodd"></path></svg>
                     </button>
                 </form>
             </div>
         </div>
         <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
+            <div class="text-center max-w-xl w-full">
+                <h1 class="text-5xl font-bold mb-3 tracking-tight">CogniChat</h1>
+                <p class="text-lg text-gray-400 mb-8">Upload your documents to start a conversation.</p>
+                <div class="mb-8 p-5 bg-[var(--card)] rounded-2xl border border-[var(--card-border)] shadow-lg">
+                    <div class="flex flex-col sm:flex-row items-center gap-6">
+                        <div class="w-full sm:w-1/2">
+                            <div class="flex items-center gap-2 mb-2">
+                                <svg class="w-5 h-5 text-gray-400" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"><path d="M7 3a1 1 0 000 2h6a1 1 0 100-2H7zM4 7a1 1 0 011-1h10a1 1 0 110 2H5a1 1 0 01-1-1zM2 11a2 2 0 012-2h12a2 2 0 012 2v4a2 2 0 01-2 2H4a2 2 0 01-2-2v-4z" /></svg>
+                                <label for="model-select" class="block text-sm font-medium text-gray-300">Model</label>
+                            </div>
+                            <div class="select-wrapper">
+                                <select id="model-select" name="model_name">
+                                    <option value="moonshotai/kimi-k2-instruct" selected>Kimi Instruct</option>
+                                    <option value="openai/gpt-oss-20b">GPT OSS 20b</option>
+                                    <option value="llama-3.3-70b-versatile">Llama 3.3 70b</option>
+                                    <option value="llama-3.1-8b-instant">Llama 3.1 8b Instant</option>
+                                </select>
+                            </div>
+                        </div>
+                        <div class="w-full sm:w-1/2">
+                             <div class="flex items-center gap-2 mb-2">
+                                <svg class="w-5 h-5 text-gray-400" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M5.5 16a3.5 3.5 0 100-7 3.5 3.5 0 000 7zM12 5.5a3.5 3.5 0 11-7 0 3.5 3.5 0 017 0zM14.5 16a3.5 3.5 0 100-7 3.5 3.5 0 000 7z" clip-rule="evenodd" /></svg>
+                                <label for="temperature-select" class="block text-sm font-medium text-gray-300">Mode</label>
+                            </div>
+                             <div class="select-wrapper">
+                                <select id="temperature-select" name="temperature">
+                                    <option value="0.2" selected>0.2 - Precise</option>
+                                    <option value="0.4">0.4 - Confident</option>
+                                    <option value="0.6">0.6 - Balanced</option>
+                                    <option value="0.8">0.8 - Flexible</option>
+                                    <option value="1.0">1.0 - Creative</option>
+                                </select>
+                            </div>
+                        </div>
+                    </div>
+                    <p class="text-xs text-gray-500 mt-4 text-center">Higher creativity modes may reduce factual accuracy.</p>
+                </div>
+                <div id="drop-zone" class="w-full text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer hover:bg-[var(--card)] hover:border-[var(--primary)]">
+                    <div class="flex flex-col items-center justify-center pointer-events-none">
+                        <svg class="mx-auto h-12 w-12 text-gray-500" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
+                        <p class="mt-4 text-sm font-medium text-gray-400">Drag & drop files or <span class="text-[var(--primary)] font-semibold">click to upload</span></p>
+                        <p class="text-xs text-gray-400 mt-1">Supports PDF, DOCX, TXT</p>
+                        <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
+                    </div>
+                    <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx" multiple>
                 </div>
             </div>
         </div>
+        <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50">
             <div class="loader"></div>
             <p id="loading-text" class="mt-6 text-sm font-medium"></p>
+            <p id="loading-subtext" class="mt-2 text-xs text-gray-400"></p>
         </div>
     </main>
             const loadingOverlay = document.getElementById('loading-overlay');
             const loadingText = document.getElementById('loading-text');
             const loadingSubtext = document.getElementById('loading-subtext');
             const chatForm = document.getElementById('chat-form');
             const chatInput = document.getElementById('chat-input');
             const chatSubmitBtn = document.getElementById('chat-submit-btn');
             const chatWindow = document.getElementById('chat-window');
             const chatContent = document.getElementById('chat-content');
+            const modelSelect = document.getElementById('model-select');
+            const temperatureSelect = document.getElementById('temperature-select');
             const chatFilename = document.getElementById('chat-filename');
+            const chatSessionInfo = document.getElementById('chat-session-info');
+            let sessionId = sessionStorage.getItem('cognichat_session_id');
             dropZone.addEventListener('click', () => fileUploadInput.click());
             ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
+                dropZone.addEventListener(eventName, e => {e.preventDefault(); e.stopPropagation();});
             });
+            ['dragenter', 'dragover'].forEach(eventName => dropZone.addEventListener(eventName, () => dropZone.classList.add('drop-zone--over')));
+            ['dragleave', 'drop'].forEach(eventName => dropZone.addEventListener(eventName, () => dropZone.classList.remove('drop-zone--over')));
             dropZone.addEventListener('drop', (e) => {
+                if (e.dataTransfer.files.length > 0) handleFiles(e.dataTransfer.files);
             });
             fileUploadInput.addEventListener('change', (e) => {
                 if (e.target.files.length > 0) handleFiles(e.target.files);
             });
             async function handleFiles(files) {
                 const formData = new FormData();
+                let fileNames = Array.from(files).map(f => f.name);
+                for (const file of files) { formData.append('file', file); }
+                formData.append('model_name', modelSelect.value);
+                formData.append('temperature', temperatureSelect.value);
                 fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
+                await uploadAndProcessFiles(formData);
             }
+            async function uploadAndProcessFiles(formData) {
                 loadingOverlay.classList.remove('hidden');
+                loadingText.textContent = `Processing document(s)...`;
+                loadingSubtext.textContent = "Creating a knowledge base may take a minute. Please hold on tight!";
                 try {
                     const response = await fetch('/upload', { method: 'POST', body: formData });
                     const result = await response.json();
                     if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
+                    sessionId = result.session_id;
+                    sessionStorage.setItem('cognichat_session_id', sessionId);
+                    chatFilename.innerHTML = `Chatting with: <strong>${result.filename}</strong>`;
+                    chatFilename.title = result.filename;
+                    chatSessionInfo.innerHTML = `
+                        <span class="text-gray-500 dark:text-gray-500 italic block hover:text-gray-300 transition-colors cursor-pointer" onclick="location.reload()">
+                            Refresh to change settings
+                        </span>`;
+                    const modelOption = modelSelect.querySelector(`option[value="${result.model_name}"]`);
+                    const simpleModelName = modelOption ? modelOption.textContent : result.model_name;
+                    const modelInfo = {
+                        model: result.model_name,
+                        mode: result.mode,
+                        simpleModelName: simpleModelName
+                    };
                     uploadContainer.classList.add('hidden');
                     chatContainer.classList.remove('hidden');
+                    appendMessage("I've analyzed your documents. What would you like to know?", "bot", modelInfo);
                 } catch (error) {
                     console.error('Upload error:', error);
                     alert(`Error: ${error.message}`);
                 } finally {
                     loadingOverlay.classList.add('hidden');
                     fileNameSpan.textContent = '';
                     fileUploadInput.value = '';
                 }
             }
             chatForm.addEventListener('submit', async (e) => {
                 e.preventDefault();
                 const question = chatInput.value.trim();
                 chatSubmitBtn.disabled = true;
                 const typingIndicator = showTypingIndicator();
                 try {
                     const response = await fetch('/chat', {
                         method: 'POST',
                         headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ question, session_id: sessionId }),
                     });
                     if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
+                    const result = await response.json();
+                    const modelOption = modelSelect.querySelector(`option[value="${result.model_name}"]`);
+                    const simpleModelName = modelOption ? modelOption.textContent : result.model_name;
+                    const modelInfo = {
+                        model: result.model_name,
+                        mode: result.mode,
+                        simpleModelName: simpleModelName
+                    };
                     typingIndicator.remove();
+                    const botMessageContainer = appendMessage('', 'bot', modelInfo);
+                    const contentDiv = botMessageContainer.querySelector('.markdown-content');
+                    contentDiv.innerHTML = marked.parse(result.answer);
                     contentDiv.querySelectorAll('pre').forEach(addCopyButton);
+                    scrollToBottom();
+                    addTextToSpeechControls(botMessageContainer, result.answer);
                 } catch (error) {
                     console.error('Chat error:', error);
+                    typingIndicator.remove();
+                    appendMessage(`Error: ${error.message}`, 'bot');
                 } finally {
                     chatInput.disabled = false;
                     chatSubmitBtn.disabled = false;
                 }
             });
+            // --- FINAL, CORRECT appendMessage function ---
+            function appendMessage(text, sender, modelInfo = null) {
                 const messageWrapper = document.createElement('div');
+                const iconSVG = sender === 'user'
+                    ? `<div class="bg-blue-200 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-700 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
                     : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
+                let senderHTML;
+                if (sender === 'user') {
+                    senderHTML = '<p class="font-medium text-sm mb-1">You</p>';
+                } else {
+                    let modelInfoHTML = '';
+                    if (modelInfo && modelInfo.simpleModelName) {
+                        modelInfoHTML = `
+                            <span class="ml-2 text-xs font-normal text-gray-400">
+                                (Model: ${modelInfo.simpleModelName} • Mode: ${modelInfo.mode})
+                            </span>
+                        `;
+                    }
+                    senderHTML = `<div class="font-medium text-sm mb-1 flex items-center">CogniChat ${modelInfoHTML}</div>`;
                 }
+                messageWrapper.className = `flex items-start gap-4`;
+                messageWrapper.innerHTML = `
+                    ${iconSVG}
+                    <div class="flex-1 pt-1">
+                        ${senderHTML}
+                        <div class="text-base markdown-content">${text ? marked.parse(text) : ''}</div>
+                        <div class="tts-controls mt-2"></div>
+                    </div>
+                `;
                 chatContent.appendChild(messageWrapper);
                 scrollToBottom();
+                return messageWrapper.querySelector('.flex-1');
             }
             function showTypingIndicator() {
                 const indicator = document.createElement('div');
+                indicator.id = 'typing-indicator';
+                indicator.className = `flex items-start gap-4`;
+                indicator.innerHTML = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div><div class="flex-1 pt-1"><p class="font-medium text-sm mb-1">CogniChat is thinking...</p><div class="typing-indicator"><span></span><span></span><span></span></div></div>`;
+                chatContent.appendChild(indicator);
                 scrollToBottom();
+                return indicator;
             }
+            function scrollToBottom() { chatWindow.scrollTo({ top: chatWindow.scrollHeight, behavior: 'smooth' }); }
             function addCopyButton(pre) {
                 const button = document.createElement('button');
                 button.className = 'copy-code-btn';
                 button.textContent = 'Copy';
                 pre.appendChild(button);
                 button.addEventListener('click', () => {
+                    navigator.clipboard.writeText(pre.querySelector('code').innerText)
+                        .then(() => {
+                            button.textContent = 'Copied!';
+                            setTimeout(() => button.textContent = 'Copy', 2000);
+                        });
                 });
             }
+            // (TTS functions remain unchanged)
+            let currentAudio, currentPlayingButton;
             const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
             const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
             function addTextToSpeechControls(messageBubble, text) {
+                if (!text.trim()) return;
+                const speakButton = document.createElement('button');
+                speakButton.className = 'speak-btn mt-2 px-3 py-1.5 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50';
+                speakButton.title = 'Listen to this message';
+                speakButton.innerHTML = `${playIconSVG} <span>Listen</span>`;
+                messageBubble.querySelector('.tts-controls').appendChild(speakButton);
+                speakButton.addEventListener('click', () => handleTTS(text, speakButton));
             }
             async function handleTTS(text, button) {
                 if (button === currentPlayingButton) {
                     if (currentAudio && !currentAudio.paused) {
                         currentAudio.pause();
+                        button.innerHTML = `${playIconSVG} <span>Listen</span>`;
                     } else if (currentAudio && currentAudio.paused) {
                         currentAudio.play();
                         button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
                     }
                     return;
                 }
                 resetAllSpeakButtons();
                 currentPlayingButton = button;
                 button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
                 button.disabled = true;
                 try {
+                    const response = await fetch('/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text }) });
                     if (!response.ok) throw new Error('Failed to generate audio.');
                     const blob = await response.blob();
+                    currentAudio = new Audio(URL.createObjectURL(blob));
                     currentAudio.play();
                     button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
+                    currentAudio.onended = resetAllSpeakButtons;
                 } catch (error) {
                     console.error('TTS Error:', error);
                     resetAllSpeakButtons();
                 } finally {
                     button.disabled = false;
             function resetAllSpeakButtons() {
                 document.querySelectorAll('.speak-btn').forEach(btn => {
+                    btn.innerHTML = `${playIconSVG} <span>Listen</span>`;
                     btn.disabled = false;
                 });
                 if (currentAudio) {