riteshraut
commited on
Commit
·
becc8f7
1
Parent(s):
6579ca3
fix/new update
Browse files- - Copy.gitattributes +35 -0
- - Copy.gitignore +6 -0
- .env - Copy.example +2 -0
- .env.example +2 -0
- .gitignore +6 -0
- Dockerfile +45 -0
- README.md +560 -6
- app.py +281 -0
- diagnose.py +125 -0
- packages.txt +1 -0
- rag_processor.py +95 -0
- requirements-simple.txt +17 -0
- requirements.txt +0 -0
- templates/index.html +615 -0
- templates/index.html.backup +974 -0
- test_dependencies.py +76 -0
- test_hf_spaces_session.py +95 -0
- test_upload_permissions.py +82 -0
- verify_hf_spaces_ready.py +260 -0
- Copy.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
- Copy.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
/uploads/
|
| 3 |
+
/vectorstores/
|
| 4 |
+
/.cache/
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.pyc
|
.env - Copy.example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copy this file to .env and fill in your API key
|
| 2 |
+
GROQ_API_KEY=your_groq_api_key_here
|
.env.example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copy this file to .env and fill in your API key
|
| 2 |
+
GROQ_API_KEY=your_groq_api_key_here
|
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
/uploads/
|
| 3 |
+
/vectorstores/
|
| 4 |
+
/.cache/
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.pyc
|
Dockerfile
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
curl \
|
| 9 |
+
git \
|
| 10 |
+
libfaiss-dev \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Create a non-root user early
|
| 14 |
+
RUN useradd --create-home --shell /bin/bash --uid 1000 appuser
|
| 15 |
+
|
| 16 |
+
# Copy and install Python requirements as root first
|
| 17 |
+
COPY requirements.txt .
|
| 18 |
+
RUN pip install --no-cache-dir --upgrade pip
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# Set environment variables for HuggingFace models and cache
|
| 22 |
+
ENV HF_HOME=/home/appuser/.cache/huggingface
|
| 23 |
+
ENV HF_HUB_CACHE=/home/appuser/.cache/huggingface/hub
|
| 24 |
+
ENV TRANSFORMERS_CACHE=/home/appuser/.cache/transformers
|
| 25 |
+
ENV SENTENCE_TRANSFORMERS_HOME=/home/appuser/.cache/sentence_transformers
|
| 26 |
+
|
| 27 |
+
# Create cache directories in the user's home directory
|
| 28 |
+
RUN mkdir -p /home/appuser/.cache/huggingface/hub \
|
| 29 |
+
/home/appuser/.cache/transformers \
|
| 30 |
+
/home/appuser/.cache/sentence_transformers \
|
| 31 |
+
/app/uploads && \
|
| 32 |
+
chown -R appuser:appuser /home/appuser/.cache /app && \
|
| 33 |
+
chmod -R 755 /home/appuser/.cache /app
|
| 34 |
+
|
| 35 |
+
# Copy application code and set ownership
|
| 36 |
+
COPY --chown=appuser:appuser . .
|
| 37 |
+
|
| 38 |
+
# Switch to non-root user
|
| 39 |
+
USER appuser
|
| 40 |
+
|
| 41 |
+
# Expose port 7860
|
| 42 |
+
EXPOSE 7860
|
| 43 |
+
|
| 44 |
+
# Run the application
|
| 45 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,12 +1,566 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
-
|
| 10 |
---
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: CogniChat - Chat with Your Documents
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
+
app_port: 7860
|
| 10 |
---
|
| 11 |
+
# 🤖 CogniChat - Intelligent Document Chat System
|
| 12 |
|
| 13 |
+
<div align="center">
|
| 14 |
+
|
| 15 |
+

|
| 16 |
+

|
| 17 |
+

|
| 18 |
+

|
| 19 |
+
|
| 20 |
+
**Transform your documents into interactive conversations powered by advanced RAG technology**
|
| 21 |
+
|
| 22 |
+
<p align="center">
|
| 23 |
+
<img src="Document_reader.gif" width="100%" alt="CogniChat Demo">
|
| 24 |
+
</p>
|
| 25 |
+
|
| 26 |
+
[Features](#-features) • [Quick Start](#-quick-start) • [Architecture](#-architecture) • [Deployment](#-deployment) • [API](#-api-reference)
|
| 27 |
+
|
| 28 |
+
</div>
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## 📋 Table of Contents
|
| 33 |
+
|
| 34 |
+
- [Overview](#-overview)
|
| 35 |
+
- [Features](#-features)
|
| 36 |
+
- [Architecture](#-architecture)
|
| 37 |
+
- [Technology Stack](#-technology-stack)
|
| 38 |
+
- [Quick Start](#-quick-start)
|
| 39 |
+
- [Deployment](#-deployment)
|
| 40 |
+
- [Configuration](#-configuration)
|
| 41 |
+
- [API Reference](#-api-reference)
|
| 42 |
+
- [Troubleshooting](#-troubleshooting)
|
| 43 |
+
- [Contributing](#-contributing)
|
| 44 |
+
- [License](#-license)
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
## 🎯 Overview
|
| 49 |
+
|
| 50 |
+
CogniChat is a production-ready, intelligent document chat application that leverages **Retrieval Augmented Generation (RAG)** to enable natural conversations with your documents. Built with enterprise-grade technologies, it provides accurate, context-aware responses from your document corpus.
|
| 51 |
+
|
| 52 |
+
### Why CogniChat?
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
- **🔉 Audio Overview of Your document**:Simply ask the question and listen the audio. Now your document can speak with you.
|
| 56 |
+
- **🎯 Accurate Retrieval**: Hybrid search combining BM25 and FAISS for optimal results
|
| 57 |
+
- **💬 Conversational Memory**: Maintains context across multiple interactions
|
| 58 |
+
- **📄 Multi-Format Support**: Handles PDF, DOCX, TXT, and image files
|
| 59 |
+
- **🚀 Production Ready**: Docker support, comprehensive error handling, and security best practices
|
| 60 |
+
- **🎨 Modern UI**: Responsive design with dark mode and real-time streaming
|
| 61 |
+
|
| 62 |
+
---
|
| 63 |
+
|
| 64 |
+
## ✨ Features
|
| 65 |
+
|
| 66 |
+
### Core Capabilities
|
| 67 |
+
|
| 68 |
+
| Feature | Description |
|
| 69 |
+
|---------|-------------|
|
| 70 |
+
| **Multi-Format Processing** | Upload and process PDF, DOCX, TXT, and image files |
|
| 71 |
+
| **Hybrid Search** | Combines BM25 (keyword) and FAISS (semantic) for superior retrieval |
|
| 72 |
+
| **Conversational AI** | Powered by Groq's Llama 3.1 for intelligent responses |
|
| 73 |
+
| **Memory Management** | Maintains chat history for contextual conversations |
|
| 74 |
+
| **Text-to-Speech** | Built-in TTS for audio playback of responses |
|
| 75 |
+
| **Streaming Responses** | Real-time token streaming for better UX |
|
| 76 |
+
| **Document Chunking** | Intelligent text splitting for optimal context windows |
|
| 77 |
+
|
| 78 |
+
### Advanced Features
|
| 79 |
+
|
| 80 |
+
- **Semantic Embeddings**: HuggingFace `all-miniLM-L6-v2` for accurate vector representations
|
| 81 |
+
- **Reranking**: Contextual compression for improved relevance
|
| 82 |
+
- **Error Handling**: Comprehensive fallback mechanisms and error recovery
|
| 83 |
+
- **Security**: Non-root Docker execution and environment-based secrets
|
| 84 |
+
- **Scalability**: Optimized for both local and cloud deployments
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
## 🏗 Architecture
|
| 89 |
+
|
| 90 |
+
### RAG Pipeline Overview
|
| 91 |
+
|
| 92 |
+
```mermaid
|
| 93 |
+
graph TB
|
| 94 |
+
A[Document Upload] --> B[Document Processing]
|
| 95 |
+
B --> C[Text Extraction]
|
| 96 |
+
C --> D[Chunking Strategy]
|
| 97 |
+
D --> E[Embedding Generation]
|
| 98 |
+
E --> F[Vector Store FAISS]
|
| 99 |
+
|
| 100 |
+
G[User Query] --> H[Query Embedding]
|
| 101 |
+
H --> I[Hybrid Retrieval]
|
| 102 |
+
|
| 103 |
+
F --> I
|
| 104 |
+
J[BM25 Index] --> I
|
| 105 |
+
|
| 106 |
+
I --> K[Reranking]
|
| 107 |
+
K --> L[Context Assembly]
|
| 108 |
+
L --> M[LLM Groq Llama 3.1]
|
| 109 |
+
M --> N[Response Generation]
|
| 110 |
+
N --> O[Streaming Output]
|
| 111 |
+
|
| 112 |
+
P[Chat History] --> M
|
| 113 |
+
N --> P
|
| 114 |
+
|
| 115 |
+
style A fill:#e1f5ff
|
| 116 |
+
style G fill:#e1f5ff
|
| 117 |
+
style F fill:#ffe1f5
|
| 118 |
+
style J fill:#ffe1f5
|
| 119 |
+
style M fill:#f5e1ff
|
| 120 |
+
style O fill:#e1ffe1
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
### System Architecture
|
| 124 |
+
|
| 125 |
+
```mermaid
|
| 126 |
+
graph LR
|
| 127 |
+
A[Client Browser] -->|HTTP/WebSocket| B[Flask Server]
|
| 128 |
+
B --> C[Document Processor]
|
| 129 |
+
B --> D[RAG Engine]
|
| 130 |
+
B --> E[TTS Service]
|
| 131 |
+
|
| 132 |
+
C --> F[(File Storage)]
|
| 133 |
+
D --> G[(FAISS Vector DB)]
|
| 134 |
+
D --> H[(BM25 Index)]
|
| 135 |
+
D --> I[Groq API]
|
| 136 |
+
|
| 137 |
+
J[HuggingFace Models] --> D
|
| 138 |
+
|
| 139 |
+
style B fill:#4a90e2
|
| 140 |
+
style D fill:#e24a90
|
| 141 |
+
style I fill:#90e24a
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
### Data Flow
|
| 145 |
+
|
| 146 |
+
1. **Document Ingestion**: Files are uploaded and validated
|
| 147 |
+
2. **Processing Pipeline**: Text extraction → Chunking → Embedding
|
| 148 |
+
3. **Indexing**: Dual indexing (FAISS + BM25) for hybrid search
|
| 149 |
+
4. **Query Processing**: User queries are embedded and searched
|
| 150 |
+
5. **Retrieval**: Top-k relevant chunks retrieved using hybrid approach
|
| 151 |
+
6. **Generation**: LLM generates contextual responses with citations
|
| 152 |
+
7. **Streaming**: Responses streamed back to client in real-time
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## 🛠 Technology Stack
|
| 157 |
+
|
| 158 |
+
### Backend
|
| 159 |
+
|
| 160 |
+
| Component | Technology | Purpose |
|
| 161 |
+
|-----------|-----------|---------|
|
| 162 |
+
| **Framework** | Flask 2.3+ | Web application framework |
|
| 163 |
+
| **RAG** | LangChain | RAG pipeline orchestration |
|
| 164 |
+
| **Vector DB** | FAISS | Fast similarity search |
|
| 165 |
+
| **Keyword Search** | BM25 | Sparse retrieval |
|
| 166 |
+
| **LLM** | Groq Llama 3.1 | Response generation |
|
| 167 |
+
| **Embeddings** | HuggingFace Transformers | Semantic embeddings |
|
| 168 |
+
| **Doc Processing** | Unstructured, PyPDF, python-docx | Multi-format parsing |
|
| 169 |
+
|
| 170 |
+
### Frontend
|
| 171 |
+
|
| 172 |
+
| Component | Technology |
|
| 173 |
+
|-----------|-----------|
|
| 174 |
+
| **UI Framework** | TailwindCSS |
|
| 175 |
+
| **JavaScript** | Vanilla ES6+ |
|
| 176 |
+
| **Icons** | Font Awesome |
|
| 177 |
+
| **Markdown** | Marked.js |
|
| 178 |
+
|
| 179 |
+
### Infrastructure
|
| 180 |
+
|
| 181 |
+
- **Containerization**: Docker + Docker Compose
|
| 182 |
+
- **Deployment**: HuggingFace Spaces, local, cloud-agnostic
|
| 183 |
+
- **Security**: Environment-based secrets, non-root execution
|
| 184 |
+
|
| 185 |
+
---
|
| 186 |
+
|
| 187 |
+
## 🚀 Quick Start
|
| 188 |
+
|
| 189 |
+
### Prerequisites
|
| 190 |
+
|
| 191 |
+
- Python 3.9+
|
| 192 |
+
- Docker (optional, recommended)
|
| 193 |
+
- Groq API Key ([Get one here](https://console.groq.com/keys))
|
| 194 |
+
|
| 195 |
+
### Installation Methods
|
| 196 |
+
|
| 197 |
+
#### 🐳 Method 1: Docker (Recommended)
|
| 198 |
+
|
| 199 |
+
```bash
|
| 200 |
+
# Clone the repository
|
| 201 |
+
git clone https://github.com/RautRitesh/Chat-with-docs
|
| 202 |
+
cd cognichat
|
| 203 |
+
|
| 204 |
+
# Create environment file
|
| 205 |
+
cp .env.example .env
|
| 206 |
+
|
| 207 |
+
# Add your Groq API key to .env
|
| 208 |
+
echo "GROQ_API_KEY=your_actual_api_key_here" >> .env
|
| 209 |
+
|
| 210 |
+
# Build and run with Docker Compose
|
| 211 |
+
docker-compose up -d
|
| 212 |
+
|
| 213 |
+
# Or build manually
|
| 214 |
+
docker build -t cognichat .
|
| 215 |
+
docker run -p 7860:7860 --env-file .env cognichat
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
#### 🐍 Method 2: Local Python Environment
|
| 219 |
+
|
| 220 |
+
```bash
|
| 221 |
+
# Clone the repository
|
| 222 |
+
git clone https://github.com/RautRitesh/Chat-with-docs
|
| 223 |
+
cd cognichat
|
| 224 |
+
|
| 225 |
+
# Create virtual environment
|
| 226 |
+
python -m venv venv
|
| 227 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 228 |
+
|
| 229 |
+
# Install dependencies
|
| 230 |
+
pip install -r requirements.txt
|
| 231 |
+
|
| 232 |
+
# Set environment variables
|
| 233 |
+
export GROQ_API_KEY=your_actual_api_key_here
|
| 234 |
+
|
| 235 |
+
# Run the application
|
| 236 |
+
python app.py
|
| 237 |
+
```
|
| 238 |
+
|
| 239 |
+
#### 🤗 Method 3: HuggingFace Spaces
|
| 240 |
+
|
| 241 |
+
1. Fork this repository
|
| 242 |
+
2. Create a new Space on [HuggingFace](https://huggingface.co/spaces)
|
| 243 |
+
3. Link your forked repository
|
| 244 |
+
4. Add `GROQ_API_KEY` in Settings → Repository Secrets
|
| 245 |
+
5. Space will auto-deploy!
|
| 246 |
+
|
| 247 |
+
### First Steps
|
| 248 |
+
|
| 249 |
+
1. Open `http://localhost:7860` in your browser
|
| 250 |
+
2. Upload a document (PDF, DOCX, TXT, or image)
|
| 251 |
+
3. Wait for processing (progress indicator will show status)
|
| 252 |
+
4. Start chatting with your document!
|
| 253 |
+
5. Use the 🔊 button to hear responses via TTS
|
| 254 |
+
|
| 255 |
+
---
|
| 256 |
+
|
| 257 |
+
## 📦 Deployment
|
| 258 |
+
|
| 259 |
+
### Environment Variables
|
| 260 |
+
|
| 261 |
+
Create a `.env` file with the following variables:
|
| 262 |
+
|
| 263 |
+
```bash
|
| 264 |
+
# Required
|
| 265 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 266 |
+
|
| 267 |
+
# Optional
|
| 268 |
+
PORT=7860
|
| 269 |
+
HF_HOME=/tmp/huggingface_cache # For HF Spaces
|
| 270 |
+
FLASK_DEBUG=0 # Set to 1 for development
|
| 271 |
+
MAX_UPLOAD_SIZE=10485760 # 10MB default
|
| 272 |
+
```
|
| 273 |
+
|
| 274 |
+
### Docker Deployment
|
| 275 |
+
|
| 276 |
+
```bash
|
| 277 |
+
# Production build
|
| 278 |
+
docker build -t cognichat:latest .
|
| 279 |
+
|
| 280 |
+
# Run with resource limits
|
| 281 |
+
docker run -d \
|
| 282 |
+
--name cognichat \
|
| 283 |
+
-p 7860:7860 \
|
| 284 |
+
--env-file .env \
|
| 285 |
+
--memory="2g" \
|
| 286 |
+
--cpus="1.5" \
|
| 287 |
+
cognichat:latest
|
| 288 |
+
```
|
| 289 |
+
|
| 290 |
+
### Docker Compose
|
| 291 |
+
|
| 292 |
+
```yaml
|
| 293 |
+
version: '3.8'
|
| 294 |
+
|
| 295 |
+
services:
|
| 296 |
+
cognichat:
|
| 297 |
+
build: .
|
| 298 |
+
ports:
|
| 299 |
+
- "7860:7860"
|
| 300 |
+
environment:
|
| 301 |
+
- GROQ_API_KEY=${GROQ_API_KEY}
|
| 302 |
+
volumes:
|
| 303 |
+
- ./data:/app/data
|
| 304 |
+
restart: unless-stopped
|
| 305 |
+
```
|
| 306 |
+
|
| 307 |
+
### HuggingFace Spaces Configuration
|
| 308 |
+
|
| 309 |
+
Add these files to your repository:
|
| 310 |
+
|
| 311 |
+
**app_port** in `README.md` header:
|
| 312 |
+
```yaml
|
| 313 |
+
app_port: 7860
|
| 314 |
+
```
|
| 315 |
+
|
| 316 |
+
**Repository Secrets**:
|
| 317 |
+
- `GROQ_API_KEY`: Your Groq API key
|
| 318 |
+
|
| 319 |
+
The application automatically detects HF Spaces environment and adjusts paths accordingly.
|
| 320 |
+
|
| 321 |
+
---
|
| 322 |
+
|
| 323 |
+
## ⚙️ Configuration
|
| 324 |
+
|
| 325 |
+
### Document Processing Settings
|
| 326 |
+
|
| 327 |
+
```python
|
| 328 |
+
# In app.py - Customize these settings
|
| 329 |
+
CHUNK_SIZE = 1000 # Characters per chunk
|
| 330 |
+
CHUNK_OVERLAP = 200 # Overlap between chunks
|
| 331 |
+
EMBEDDING_MODEL = "sentence-transformers/all-miniLM-L6-v2"
|
| 332 |
+
RETRIEVER_K = 5 # Number of chunks to retrieve
|
| 333 |
+
```
|
| 334 |
+
|
| 335 |
+
### Model Configuration
|
| 336 |
+
|
| 337 |
+
```python
|
| 338 |
+
# LLM Settings
|
| 339 |
+
LLM_PROVIDER = "groq"
|
| 340 |
+
MODEL_NAME = "llama-3.1-70b-versatile"
|
| 341 |
+
TEMPERATURE = 0.7
|
| 342 |
+
MAX_TOKENS = 2048
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
### Search Configuration
|
| 346 |
+
|
| 347 |
+
```python
|
| 348 |
+
# Hybrid Search Weights
|
| 349 |
+
FAISS_WEIGHT = 0.6 # Semantic search weight
|
| 350 |
+
BM25_WEIGHT = 0.4 # Keyword search weight
|
| 351 |
+
```
|
| 352 |
+
|
| 353 |
+
---
|
| 354 |
+
|
| 355 |
+
## 📚 API Reference
|
| 356 |
+
|
| 357 |
+
### Endpoints
|
| 358 |
+
|
| 359 |
+
#### Upload Document
|
| 360 |
+
|
| 361 |
+
```http
|
| 362 |
+
POST /upload
|
| 363 |
+
Content-Type: multipart/form-data
|
| 364 |
+
|
| 365 |
+
{
|
| 366 |
+
"file": <binary>
|
| 367 |
+
}
|
| 368 |
+
```
|
| 369 |
+
|
| 370 |
+
**Response**:
|
| 371 |
+
```json
|
| 372 |
+
{
|
| 373 |
+
"status": "success",
|
| 374 |
+
"message": "Document processed successfully",
|
| 375 |
+
"filename": "example.pdf",
|
| 376 |
+
"chunks": 45
|
| 377 |
+
}
|
| 378 |
+
```
|
| 379 |
+
|
| 380 |
+
#### Chat
|
| 381 |
+
|
| 382 |
+
```http
|
| 383 |
+
POST /chat
|
| 384 |
+
Content-Type: application/json
|
| 385 |
+
|
| 386 |
+
{
|
| 387 |
+
"message": "What is the main topic?",
|
| 388 |
+
"stream": true
|
| 389 |
+
}
|
| 390 |
+
```
|
| 391 |
+
|
| 392 |
+
**Response** (Streaming):
|
| 393 |
+
```
|
| 394 |
+
data: {"token": "The", "done": false}
|
| 395 |
+
data: {"token": " main", "done": false}
|
| 396 |
+
data: {"token": " topic", "done": false}
|
| 397 |
+
data: {"done": true}
|
| 398 |
+
```
|
| 399 |
+
|
| 400 |
+
#### Clear Session
|
| 401 |
+
|
| 402 |
+
```http
|
| 403 |
+
POST /clear
|
| 404 |
+
```
|
| 405 |
+
|
| 406 |
+
**Response**:
|
| 407 |
+
```json
|
| 408 |
+
{
|
| 409 |
+
"status": "success",
|
| 410 |
+
"message": "Session cleared"
|
| 411 |
+
}
|
| 412 |
+
```
|
| 413 |
+
|
| 414 |
+
---
|
| 415 |
+
|
| 416 |
+
## 🔧 Troubleshooting
|
| 417 |
+
|
| 418 |
+
### Common Issues
|
| 419 |
+
|
| 420 |
+
#### 1. Permission Errors in Docker
|
| 421 |
+
|
| 422 |
+
**Problem**: `Permission denied` when writing to cache directories
|
| 423 |
+
|
| 424 |
+
**Solution**:
|
| 425 |
+
```bash
|
| 426 |
+
# Rebuild with proper permissions
|
| 427 |
+
docker build --no-cache -t cognichat .
|
| 428 |
+
|
| 429 |
+
# Or run with volume permissions
|
| 430 |
+
docker run -v $(pwd)/cache:/tmp/huggingface_cache \
|
| 431 |
+
--user $(id -u):$(id -g) \
|
| 432 |
+
cognichat
|
| 433 |
+
```
|
| 434 |
+
|
| 435 |
+
#### 2. Model Loading Fails
|
| 436 |
+
|
| 437 |
+
**Problem**: Cannot download HuggingFace models
|
| 438 |
+
|
| 439 |
+
**Solution**:
|
| 440 |
+
```bash
|
| 441 |
+
# Pre-download models
|
| 442 |
+
python test_embeddings.py
|
| 443 |
+
|
| 444 |
+
# Or use HF_HOME environment variable
|
| 445 |
+
export HF_HOME=/path/to/writable/directory
|
| 446 |
+
```
|
| 447 |
+
|
| 448 |
+
#### 3. Chat Returns 400 Error
|
| 449 |
+
|
| 450 |
+
**Problem**: Upload directory not writable (common in HF Spaces)
|
| 451 |
+
|
| 452 |
+
**Solution**: Application now automatically uses `/tmp/uploads` in HF Spaces environment. Ensure latest version is deployed.
|
| 453 |
+
|
| 454 |
+
#### 4. API Key Invalid
|
| 455 |
+
|
| 456 |
+
**Problem**: Groq API returns authentication error
|
| 457 |
+
|
| 458 |
+
**Solution**:
|
| 459 |
+
- Verify key at [Groq Console](https://console.groq.com/keys)
|
| 460 |
+
- Check `.env` file has correct format: `GROQ_API_KEY=gsk_...`
|
| 461 |
+
- Restart application after updating key
|
| 462 |
+
|
| 463 |
+
### Debug Mode
|
| 464 |
+
|
| 465 |
+
Enable detailed logging:
|
| 466 |
+
|
| 467 |
+
```bash
|
| 468 |
+
export FLASK_DEBUG=1
|
| 469 |
+
export LANGCHAIN_VERBOSE=true
|
| 470 |
+
python app.py
|
| 471 |
+
```
|
| 472 |
+
|
| 473 |
+
---
|
| 474 |
+
|
| 475 |
+
## 🧪 Testing
|
| 476 |
+
|
| 477 |
+
```bash
|
| 478 |
+
# Run test suite
|
| 479 |
+
pytest tests/
|
| 480 |
+
|
| 481 |
+
# Test embedding model
|
| 482 |
+
python test_embeddings.py
|
| 483 |
+
|
| 484 |
+
# Test document processing
|
| 485 |
+
pytest tests/test_document_processor.py
|
| 486 |
+
|
| 487 |
+
# Integration tests
|
| 488 |
+
pytest tests/test_integration.py
|
| 489 |
+
```
|
| 490 |
+
|
| 491 |
+
---
|
| 492 |
+
|
| 493 |
+
## 🤝 Contributing
|
| 494 |
+
|
| 495 |
+
We welcome contributions! Please follow these steps:
|
| 496 |
+
|
| 497 |
+
1. Fork the repository
|
| 498 |
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
| 499 |
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
| 500 |
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
| 501 |
+
5. Open a Pull Request
|
| 502 |
+
|
| 503 |
+
### Development Guidelines
|
| 504 |
+
|
| 505 |
+
- Follow PEP 8 style guide
|
| 506 |
+
- Add tests for new features
|
| 507 |
+
- Update documentation
|
| 508 |
+
- Ensure Docker build succeeds
|
| 509 |
+
|
| 510 |
+
---
|
| 511 |
+
|
| 512 |
+
## 📝 Changelog
|
| 513 |
+
|
| 514 |
+
### Version 2.0 (October 2025)
|
| 515 |
+
|
| 516 |
+
✅ **Major Improvements**:
|
| 517 |
+
- Fixed Docker permission issues
|
| 518 |
+
- HuggingFace Spaces compatibility
|
| 519 |
+
- Enhanced error handling
|
| 520 |
+
- Multiple model loading fallbacks
|
| 521 |
+
- Improved security (non-root execution)
|
| 522 |
+
|
| 523 |
+
✅ **Bug Fixes**:
|
| 524 |
+
- Upload directory write permissions
|
| 525 |
+
- Cache directory access
|
| 526 |
+
- Model initialization reliability
|
| 527 |
+
|
| 528 |
+
### Version 1.0 (Initial Release)
|
| 529 |
+
|
| 530 |
+
- Basic RAG functionality
|
| 531 |
+
- PDF and DOCX support
|
| 532 |
+
- FAISS vector store
|
| 533 |
+
- Conversational memory
|
| 534 |
+
|
| 535 |
+
---
|
| 536 |
+
|
| 537 |
+
## 📄 License
|
| 538 |
+
|
| 539 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 540 |
+
|
| 541 |
+
---
|
| 542 |
+
|
| 543 |
+
## 🙏 Acknowledgments
|
| 544 |
+
|
| 545 |
+
- **LangChain** for RAG framework
|
| 546 |
+
- **Groq** for high-speed LLM inference
|
| 547 |
+
- **HuggingFace** for embeddings and hosting
|
| 548 |
+
- **FAISS** for efficient vector search
|
| 549 |
+
|
| 550 |
+
---
|
| 551 |
+
|
| 552 |
+
## 📞 Support
|
| 553 |
+
|
| 554 |
+
- **Issues**: [GitHub Issues](https://github.com/yourusername/cognichat/issues)
|
| 555 |
+
- **Discussions**: [GitHub Discussions](https://github.com/yourusername/cognichat/discussions)
|
| 556 |
+
- **Email**: riteshraut123321@gmail.com
|
| 557 |
+
|
| 558 |
+
---
|
| 559 |
+
|
| 560 |
+
<div align="center">
|
| 561 |
+
|
| 562 |
+
**Made with ❤️ by the CogniChat Team**
|
| 563 |
+
|
| 564 |
+
[⭐ Star us on GitHub](https://github.com/yourusername/cognichat) • [🐛 Report Bug](https://github.com/yourusername/cognichat/issues) • [✨ Request Feature](https://github.com/yourusername/cognichat/issues)
|
| 565 |
+
|
| 566 |
+
</div>
|
app.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import uuid
|
| 4 |
+
from flask import Flask, request, render_template, session, jsonify, Response
|
| 5 |
+
from werkzeug.utils import secure_filename
|
| 6 |
+
from rag_processor import create_rag_chain
|
| 7 |
+
from typing import Sequence, Any, List
|
| 8 |
+
import fitz
|
| 9 |
+
import re
|
| 10 |
+
import io
|
| 11 |
+
from gtts import gTTS
|
| 12 |
+
from langchain_core.documents import Document
|
| 13 |
+
from langchain_community.document_loaders import (
|
| 14 |
+
TextLoader,
|
| 15 |
+
Docx2txtLoader,
|
| 16 |
+
)
|
| 17 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 18 |
+
from langchain_experimental.text_splitter import SemanticChunker
|
| 19 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 20 |
+
from langchain_community.vectorstores import FAISS
|
| 21 |
+
from langchain.retrievers import EnsembleRetriever
|
| 22 |
+
from langchain_community.retrievers import BM25Retriever
|
| 23 |
+
from langchain_community.chat_message_histories import ChatMessageHistory
|
| 24 |
+
from langchain.storage import InMemoryStore
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
app = Flask(__name__)
|
| 28 |
+
app.config['SECRET_KEY'] = os.urandom(24)
|
| 29 |
+
|
| 30 |
+
is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
|
| 31 |
+
if is_hf_spaces:
|
| 32 |
+
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
|
| 33 |
+
else:
|
| 34 |
+
app.config['UPLOAD_FOLDER'] = 'uploads'
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 38 |
+
print(f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
|
| 41 |
+
app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
|
| 42 |
+
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 43 |
+
print(f"Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
|
| 44 |
+
|
| 45 |
+
rag_chains = {}
|
| 46 |
+
message_histories = {}
|
| 47 |
+
doc_stores = {} # To hold the InMemoryStore for each session
|
| 48 |
+
|
| 49 |
+
print("Loading embedding model...")
|
| 50 |
+
try:
|
| 51 |
+
EMBEDDING_MODEL = HuggingFaceEmbeddings(
|
| 52 |
+
model_name="BAAI/bge-base-en-v1.5",
|
| 53 |
+
model_kwargs={'device': 'cpu'}
|
| 54 |
+
)
|
| 55 |
+
print("Embedding model loaded successfully.")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"FATAL: Could not load embedding model. Error: {e}")
|
| 58 |
+
raise
|
| 59 |
+
|
| 60 |
+
def load_pdf_with_fallback(filepath):
|
| 61 |
+
try:
|
| 62 |
+
docs = []
|
| 63 |
+
with fitz.open(filepath) as pdf_doc:
|
| 64 |
+
for page_num, page in enumerate(pdf_doc):
|
| 65 |
+
text = page.get_text()
|
| 66 |
+
if text.strip():
|
| 67 |
+
docs.append(Document(
|
| 68 |
+
page_content=text,
|
| 69 |
+
metadata={
|
| 70 |
+
"source": os.path.basename(filepath),
|
| 71 |
+
"page": page_num + 1,
|
| 72 |
+
}
|
| 73 |
+
))
|
| 74 |
+
if docs:
|
| 75 |
+
print(f"Successfully loaded PDF with PyMuPDF: {filepath}")
|
| 76 |
+
return docs
|
| 77 |
+
else:
|
| 78 |
+
raise ValueError("No text content found in PDF.")
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"PyMuPDF failed for {filepath}: {e}")
|
| 81 |
+
raise
|
| 82 |
+
|
| 83 |
+
LOADER_MAPPING = {
|
| 84 |
+
".txt": TextLoader,
|
| 85 |
+
".pdf": load_pdf_with_fallback,
|
| 86 |
+
".docx": Docx2txtLoader,
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
def get_session_history(session_id: str) -> ChatMessageHistory:
|
| 90 |
+
if session_id not in message_histories:
|
| 91 |
+
message_histories[session_id] = ChatMessageHistory()
|
| 92 |
+
return message_histories[session_id]
|
| 93 |
+
|
| 94 |
+
@app.route('/health', methods=['GET'])
|
| 95 |
+
def health_check():
|
| 96 |
+
return jsonify({'status': 'healthy'}), 200
|
| 97 |
+
|
| 98 |
+
@app.route('/', methods=['GET'])
|
| 99 |
+
def index():
|
| 100 |
+
return render_template('index.html')
|
| 101 |
+
|
| 102 |
+
@app.route('/upload', methods=['POST'])
|
| 103 |
+
def upload_files():
|
| 104 |
+
files = request.files.getlist('file')
|
| 105 |
+
if not files or all(f.filename == '' for f in files):
|
| 106 |
+
return jsonify({'status': 'error', 'message': 'No selected files.'}), 400
|
| 107 |
+
|
| 108 |
+
all_docs = []
|
| 109 |
+
processed_files, failed_files = [], []
|
| 110 |
+
|
| 111 |
+
for file in files:
|
| 112 |
+
if file and file.filename:
|
| 113 |
+
filename = secure_filename(file.filename)
|
| 114 |
+
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 115 |
+
try:
|
| 116 |
+
file.save(filepath)
|
| 117 |
+
file_ext = os.path.splitext(filename)[1].lower()
|
| 118 |
+
if file_ext not in LOADER_MAPPING:
|
| 119 |
+
raise ValueError("Unsupported file format.")
|
| 120 |
+
|
| 121 |
+
loader_func = LOADER_MAPPING[file_ext]
|
| 122 |
+
docs = loader_func(filepath) if file_ext == ".pdf" else loader_func(filepath).load()
|
| 123 |
+
|
| 124 |
+
if not docs:
|
| 125 |
+
raise ValueError("No content extracted.")
|
| 126 |
+
|
| 127 |
+
all_docs.extend(docs)
|
| 128 |
+
processed_files.append(filename)
|
| 129 |
+
print(f"✓ Successfully processed: {filename}")
|
| 130 |
+
except Exception as e:
|
| 131 |
+
error_msg = str(e)
|
| 132 |
+
print(f"✗ Error processing {filename}: {error_msg}")
|
| 133 |
+
failed_files.append(f"{filename} ({error_msg})")
|
| 134 |
+
|
| 135 |
+
if not all_docs:
|
| 136 |
+
error_summary = "Failed to process all files."
|
| 137 |
+
if failed_files:
|
| 138 |
+
error_summary += " Reasons: " + ", ".join(failed_files)
|
| 139 |
+
return jsonify({'status': 'error', 'message': error_summary}), 400
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
print("Starting RAG pipeline setup...")
|
| 143 |
+
|
| 144 |
+
parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=300)
|
| 145 |
+
child_splitter = SemanticChunker(EMBEDDING_MODEL,breakpoint_threshold_type='percentile',breakpoint_threshold_amount=80)
|
| 146 |
+
|
| 147 |
+
parent_docs = parent_splitter.split_documents(all_docs)
|
| 148 |
+
doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
|
| 149 |
+
|
| 150 |
+
child_docs = []
|
| 151 |
+
for i, doc in enumerate(parent_docs):
|
| 152 |
+
_id = doc_ids[i]
|
| 153 |
+
sub_docs = child_splitter.split_documents([doc])
|
| 154 |
+
for child in sub_docs:
|
| 155 |
+
child.metadata["doc_id"] = _id
|
| 156 |
+
child_docs.extend(sub_docs)
|
| 157 |
+
|
| 158 |
+
store = InMemoryStore()
|
| 159 |
+
store.mset(list(zip(doc_ids, parent_docs)))
|
| 160 |
+
|
| 161 |
+
vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
|
| 162 |
+
|
| 163 |
+
print(f"Stored {len(parent_docs)} parent docs and indexed {len(child_docs)} child docs.")
|
| 164 |
+
|
| 165 |
+
bm25_retriever = BM25Retriever.from_documents(child_docs)
|
| 166 |
+
bm25_retriever.k = 5
|
| 167 |
+
|
| 168 |
+
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
|
| 169 |
+
|
| 170 |
+
ensemble_retriever = EnsembleRetriever(
|
| 171 |
+
retrievers=[bm25_retriever, faiss_retriever],
|
| 172 |
+
weights=[0.5, 0.5]
|
| 173 |
+
)
|
| 174 |
+
print("Created Hybrid Retriever for child documents.")
|
| 175 |
+
|
| 176 |
+
session_id = str(uuid.uuid4())
|
| 177 |
+
|
| 178 |
+
doc_stores[session_id] = store
|
| 179 |
+
|
| 180 |
+
rag_chain_components = create_rag_chain(ensemble_retriever, get_session_history, EMBEDDING_MODEL, store)
|
| 181 |
+
|
| 182 |
+
rag_chains[session_id] = rag_chain_components
|
| 183 |
+
session['session_id'] = session_id
|
| 184 |
+
|
| 185 |
+
success_msg = f"Successfully processed: {', '.join(processed_files)}"
|
| 186 |
+
if failed_files:
|
| 187 |
+
success_msg += f"\nFailed to process: {', '.join(failed_files)}"
|
| 188 |
+
|
| 189 |
+
return jsonify({
|
| 190 |
+
'status': 'success',
|
| 191 |
+
'filename': success_msg,
|
| 192 |
+
'session_id': session_id
|
| 193 |
+
})
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
import traceback
|
| 197 |
+
traceback.print_exc()
|
| 198 |
+
return jsonify({'status': 'error', 'message': f'Failed during RAG setup: {e}'}), 500
|
| 199 |
+
|
| 200 |
+
@app.route('/chat', methods=['POST'])
|
| 201 |
+
def chat():
|
| 202 |
+
data = request.get_json()
|
| 203 |
+
question = data.get('question')
|
| 204 |
+
session_id = session.get('session_id') or data.get('session_id')
|
| 205 |
+
|
| 206 |
+
if not question or not session_id or session_id not in rag_chains:
|
| 207 |
+
return jsonify({'status': 'error', 'message': 'Invalid session or no question provided.'}), 400
|
| 208 |
+
|
| 209 |
+
try:
|
| 210 |
+
chain_components = rag_chains[session_id]
|
| 211 |
+
config = {"configurable": {"session_id": session_id}}
|
| 212 |
+
|
| 213 |
+
print("\n" + "="*50)
|
| 214 |
+
print("--- STARTING DIAGNOSTIC RUN ---")
|
| 215 |
+
print(f"Original Question: {question}")
|
| 216 |
+
print("="*50 + "\n")
|
| 217 |
+
|
| 218 |
+
rewritten_query = chain_components["rewriter"].invoke({"question": question, "chat_history": get_session_history(session_id).messages})
|
| 219 |
+
print(f"--- 1. Rewritten Query ---\n{rewritten_query}\n")
|
| 220 |
+
|
| 221 |
+
hyde_doc = chain_components["hyde"].invoke({"question": rewritten_query})
|
| 222 |
+
print(f"--- 2. HyDE Document ---\n{hyde_doc}\n")
|
| 223 |
+
|
| 224 |
+
final_retrieved_docs = chain_components["base_retriever"].get_relevant_documents(hyde_doc)
|
| 225 |
+
print(f"--- 3. Retrieved Top {len(final_retrieved_docs)} Child Docs ---")
|
| 226 |
+
for i, doc in enumerate(final_retrieved_docs):
|
| 227 |
+
print(f" Doc {i+1}: {doc.page_content[:150]}... (Source: {doc.metadata.get('source')})")
|
| 228 |
+
print("\n")
|
| 229 |
+
|
| 230 |
+
final_context_docs = chain_components["parent_fetcher"].invoke(final_retrieved_docs)
|
| 231 |
+
print(f"--- 4. Final {len(final_context_docs)} Parent Docs for LLM ---")
|
| 232 |
+
for i, doc in enumerate(final_context_docs):
|
| 233 |
+
print(f" Final Doc {i+1} (Source: {doc.metadata.get('source')}, Page: {doc.metadata.get('page')}):\n '{doc.page_content[:300]}...'\n---")
|
| 234 |
+
|
| 235 |
+
print("="*50)
|
| 236 |
+
print("--- INVOKING FINAL CHAIN ---")
|
| 237 |
+
print("="*50 + "\n")
|
| 238 |
+
|
| 239 |
+
answer_string = chain_components["final_chain"].invoke({"question": question}, config=config)
|
| 240 |
+
|
| 241 |
+
return jsonify({'answer': answer_string})
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
import traceback
|
| 245 |
+
traceback.print_exc()
|
| 246 |
+
return jsonify({'status': 'error', 'message': 'An error occurred while getting the answer.'}), 500
|
| 247 |
+
|
| 248 |
+
def clean_markdown_for_tts(text: str) -> str:
|
| 249 |
+
text = re.sub(r'\*(\*?)(.*?)\1\*', r'\2', text)
|
| 250 |
+
text = re.sub(r'\_(.*?)\_', r'\1', text)
|
| 251 |
+
text = re.sub(r'`(.*?)`', r'\1', text)
|
| 252 |
+
text = re.sub(r'^\s*#{1,6}\s+', '', text, flags=re.MULTILINE)
|
| 253 |
+
text = re.sub(r'^\s*[\*\-]\s+', '', text, flags=re.MULTILINE)
|
| 254 |
+
text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
|
| 255 |
+
text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
|
| 256 |
+
text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
| 257 |
+
text = re.sub(r'\n+', ' ', text)
|
| 258 |
+
return text.strip()
|
| 259 |
+
|
| 260 |
+
@app.route('/tts', methods=['POST'])
|
| 261 |
+
def text_to_speech():
|
| 262 |
+
data = request.get_json()
|
| 263 |
+
text = data.get('text')
|
| 264 |
+
|
| 265 |
+
if not text:
|
| 266 |
+
return jsonify({'status': 'error', 'message': 'No text provided.'}), 400
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
clean_text = clean_markdown_for_tts(text)
|
| 270 |
+
tts = gTTS(clean_text, lang='en')
|
| 271 |
+
mp3_fp = io.BytesIO()
|
| 272 |
+
tts.write_to_fp(mp3_fp)
|
| 273 |
+
mp3_fp.seek(0)
|
| 274 |
+
return Response(mp3_fp, mimetype='audio/mpeg')
|
| 275 |
+
except Exception as e:
|
| 276 |
+
print(f"Error in TTS generation: {e}")
|
| 277 |
+
return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
|
| 278 |
+
|
| 279 |
+
if __name__ == '__main__':
|
| 280 |
+
port = int(os.environ.get("PORT", 7860))
|
| 281 |
+
app.run(host="0.0.0.0", port=port, debug=False)
|
diagnose.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Quick diagnostic script to check CogniChat configuration and identify issues.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
print("=== CogniChat Diagnostic Tool ===")
|
| 10 |
+
print()
|
| 11 |
+
|
| 12 |
+
# Check if we're in the right directory
|
| 13 |
+
current_dir = Path.cwd()
|
| 14 |
+
print(f"Current directory: {current_dir}")
|
| 15 |
+
|
| 16 |
+
# Check for required files
|
| 17 |
+
required_files = [
|
| 18 |
+
'app.py',
|
| 19 |
+
'rag_processor.py',
|
| 20 |
+
'requirements.txt',
|
| 21 |
+
'.env.example'
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
missing_files = []
|
| 25 |
+
for file in required_files:
|
| 26 |
+
if (current_dir / file).exists():
|
| 27 |
+
print(f"✓ Found: {file}")
|
| 28 |
+
else:
|
| 29 |
+
print(f"✗ Missing: {file}")
|
| 30 |
+
missing_files.append(file)
|
| 31 |
+
|
| 32 |
+
print()
|
| 33 |
+
|
| 34 |
+
# Check .env file
|
| 35 |
+
env_file = current_dir / '.env'
|
| 36 |
+
if env_file.exists():
|
| 37 |
+
print("✓ .env file exists")
|
| 38 |
+
try:
|
| 39 |
+
with open(env_file, 'r') as f:
|
| 40 |
+
content = f.read()
|
| 41 |
+
|
| 42 |
+
if 'GROQ_API_KEY=' in content:
|
| 43 |
+
if 'your_groq_api_key_here' in content:
|
| 44 |
+
print("⚠ .env file contains placeholder API key - needs to be updated!")
|
| 45 |
+
else:
|
| 46 |
+
print("✓ GROQ_API_KEY appears to be set in .env")
|
| 47 |
+
else:
|
| 48 |
+
print("✗ GROQ_API_KEY not found in .env file")
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"✗ Error reading .env file: {e}")
|
| 51 |
+
else:
|
| 52 |
+
print("✗ .env file missing - copy from .env.example and update with your API key")
|
| 53 |
+
|
| 54 |
+
print()
|
| 55 |
+
|
| 56 |
+
# Check environment variables and detect HF Spaces
|
| 57 |
+
is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
|
| 58 |
+
print(f"Environment: {'Hugging Face Spaces' if is_hf_spaces else 'Local Development'}")
|
| 59 |
+
|
| 60 |
+
if is_hf_spaces:
|
| 61 |
+
print(f"Space ID: {os.getenv('SPACE_ID', 'Not detected')}")
|
| 62 |
+
|
| 63 |
+
groq_key = os.getenv('GROQ_API_KEY')
|
| 64 |
+
if groq_key:
|
| 65 |
+
if groq_key == 'your_groq_api_key_here':
|
| 66 |
+
print("⚠ GROQ_API_KEY is set but contains placeholder value")
|
| 67 |
+
else:
|
| 68 |
+
print("✓ GROQ_API_KEY environment variable is set")
|
| 69 |
+
if is_hf_spaces:
|
| 70 |
+
print(" (Loaded from Hugging Face Spaces secrets)")
|
| 71 |
+
else:
|
| 72 |
+
print("✗ GROQ_API_KEY environment variable not set")
|
| 73 |
+
|
| 74 |
+
print()
|
| 75 |
+
|
| 76 |
+
# Load dotenv if available
|
| 77 |
+
try:
|
| 78 |
+
from dotenv import load_dotenv
|
| 79 |
+
load_dotenv()
|
| 80 |
+
groq_key_after_dotenv = os.getenv('GROQ_API_KEY')
|
| 81 |
+
|
| 82 |
+
if groq_key_after_dotenv:
|
| 83 |
+
if groq_key_after_dotenv == 'your_groq_api_key_here':
|
| 84 |
+
print("⚠ After loading .env: GROQ_API_KEY still contains placeholder")
|
| 85 |
+
else:
|
| 86 |
+
print("✓ After loading .env: GROQ_API_KEY is properly set")
|
| 87 |
+
else:
|
| 88 |
+
print("✗ After loading .env: GROQ_API_KEY still not available")
|
| 89 |
+
|
| 90 |
+
except ImportError:
|
| 91 |
+
print("✗ python-dotenv not available - install with: pip install python-dotenv")
|
| 92 |
+
|
| 93 |
+
print()
|
| 94 |
+
|
| 95 |
+
# Recommendations
|
| 96 |
+
print("=== Recommendations ===")
|
| 97 |
+
|
| 98 |
+
if missing_files:
|
| 99 |
+
print("1. Ensure you're in the correct CogniChat directory")
|
| 100 |
+
|
| 101 |
+
if is_hf_spaces:
|
| 102 |
+
if not groq_key or groq_key == 'your_groq_api_key_here':
|
| 103 |
+
print("2. FOR HUGGING FACE SPACES - Set API key in Space Secrets:")
|
| 104 |
+
print(" - Go to your Space Settings")
|
| 105 |
+
print(" - Navigate to 'Repository Secrets'")
|
| 106 |
+
print(" - Add new secret: GROQ_API_KEY")
|
| 107 |
+
print(" - Get your key from: https://console.groq.com/keys")
|
| 108 |
+
print(" - Restart your Space after adding the secret")
|
| 109 |
+
else:
|
| 110 |
+
if not env_file.exists():
|
| 111 |
+
print("2. Copy .env.example to .env:")
|
| 112 |
+
print(" cp .env.example .env")
|
| 113 |
+
|
| 114 |
+
if env_file.exists() and 'your_groq_api_key_here' in env_file.read_text():
|
| 115 |
+
print("3. Update .env file with your actual GROQ API key:")
|
| 116 |
+
print(" - Visit: https://console.groq.com/keys")
|
| 117 |
+
print(" - Create an API key")
|
| 118 |
+
print(" - Replace 'your_groq_api_key_here' in .env with your key")
|
| 119 |
+
|
| 120 |
+
if not groq_key or groq_key == 'your_groq_api_key_here':
|
| 121 |
+
print("4. The main issue is likely the GROQ API key configuration")
|
| 122 |
+
print(" This would cause the 400 error you're seeing in /chat endpoint")
|
| 123 |
+
|
| 124 |
+
print("\n5. After fixing the API key, restart the application")
|
| 125 |
+
print("\n=== End of Diagnostic ===")
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# System packages are now installed directly in Dockerfile
|
rag_processor.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from operator import itemgetter
|
| 4 |
+
from langchain_groq import ChatGroq
|
| 5 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 6 |
+
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
|
| 7 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 8 |
+
from langchain_core.runnables.history import RunnableWithMessageHistory
|
| 9 |
+
|
| 10 |
+
def create_rag_chain(base_retriever, get_session_history_func, embedding_model, store):
|
| 11 |
+
"""
|
| 12 |
+
Creates a dictionary of RAG chain components for inspection and a final runnable chain.
|
| 13 |
+
"""
|
| 14 |
+
load_dotenv()
|
| 15 |
+
api_key = os.getenv("GROQ_API_KEY")
|
| 16 |
+
if not api_key or api_key == "your_groq_api_key_here":
|
| 17 |
+
raise ValueError("GROQ_API_KEY not found or not configured properly.")
|
| 18 |
+
|
| 19 |
+
llm = ChatGroq(model_name="llama-3.1-8b-instant", api_key=api_key, temperature=0.1)
|
| 20 |
+
|
| 21 |
+
# 1. HyDE-like Document Generation Chain
|
| 22 |
+
hyde_template = """As a document expert, write a concise, fact-based paragraph that directly answers the user's question. This will be used for a database search.
|
| 23 |
+
Question: {question}
|
| 24 |
+
Hypothetical Answer:"""
|
| 25 |
+
hyde_prompt = ChatPromptTemplate.from_template(hyde_template)
|
| 26 |
+
hyde_chain = hyde_prompt | llm | StrOutputParser()
|
| 27 |
+
|
| 28 |
+
# 2. Query Rewriting Chain
|
| 29 |
+
rewrite_template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question that is optimized for a vector database.
|
| 30 |
+
|
| 31 |
+
**Chat History:**
|
| 32 |
+
{chat_history}
|
| 33 |
+
|
| 34 |
+
**Follow-up Question:**
|
| 35 |
+
{question}
|
| 36 |
+
|
| 37 |
+
**Standalone Question:**"""
|
| 38 |
+
rewrite_prompt = ChatPromptTemplate.from_messages([
|
| 39 |
+
("system", rewrite_template),
|
| 40 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
| 41 |
+
("human", "Reformulate this question as a standalone query: {question}")
|
| 42 |
+
])
|
| 43 |
+
query_rewriter_chain = rewrite_prompt | llm | StrOutputParser()
|
| 44 |
+
|
| 45 |
+
# 3. Parent Document Fetching Chain
|
| 46 |
+
def get_parents(docs):
|
| 47 |
+
parent_ids = {d.metadata.get("doc_id") for d in docs}
|
| 48 |
+
return store.mget(list(parent_ids))
|
| 49 |
+
|
| 50 |
+
parent_fetcher_chain = RunnableLambda(get_parents)
|
| 51 |
+
|
| 52 |
+
# 4. Main Conversational RAG Chain
|
| 53 |
+
rag_template = """You are CogniChat, an expert document analysis assistant. Your task is to answer the user's question based *only* on the provided context.
|
| 54 |
+
|
| 55 |
+
**Instructions:**
|
| 56 |
+
1. Read the context carefully.
|
| 57 |
+
2. If the answer is in the context, provide a clear and concise answer.
|
| 58 |
+
3. If the answer is not in the context, you *must* state that you cannot find the information in the provided documents. Do not use any external knowledge.
|
| 59 |
+
4. Where appropriate, use formatting like lists or bold text to improve readability.
|
| 60 |
+
|
| 61 |
+
**Context:**
|
| 62 |
+
{context}
|
| 63 |
+
"""
|
| 64 |
+
rag_prompt = ChatPromptTemplate.from_messages([
|
| 65 |
+
("system", rag_template),
|
| 66 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
| 67 |
+
("human", "{question}"),
|
| 68 |
+
])
|
| 69 |
+
|
| 70 |
+
conversational_rag_chain = (
|
| 71 |
+
RunnablePassthrough.assign(
|
| 72 |
+
context=query_rewriter_chain | hyde_chain | base_retriever | parent_fetcher_chain
|
| 73 |
+
)
|
| 74 |
+
| rag_prompt
|
| 75 |
+
| llm
|
| 76 |
+
| StrOutputParser()
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# 5. Final Chain with History (Simplified)
|
| 80 |
+
final_chain = RunnableWithMessageHistory(
|
| 81 |
+
conversational_rag_chain,
|
| 82 |
+
get_session_history_func,
|
| 83 |
+
input_messages_key="question",
|
| 84 |
+
history_messages_key="chat_history",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
print("\n✅ RAG chain and components successfully built.")
|
| 88 |
+
|
| 89 |
+
return {
|
| 90 |
+
"rewriter": query_rewriter_chain,
|
| 91 |
+
"hyde": hyde_chain,
|
| 92 |
+
"base_retriever": base_retriever,
|
| 93 |
+
"parent_fetcher": parent_fetcher_chain,
|
| 94 |
+
"final_chain": final_chain
|
| 95 |
+
}
|
requirements-simple.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask==2.3.3
|
| 2 |
+
langchain==0.1.20
|
| 3 |
+
langchain-groq==0.1.5
|
| 4 |
+
langchain-community==0.0.38
|
| 5 |
+
gTTS==2.4.0
|
| 6 |
+
sentence-transformers==2.7.0
|
| 7 |
+
faiss-cpu==1.7.4
|
| 8 |
+
python-docx==1.1.0
|
| 9 |
+
docx2txt==0.8
|
| 10 |
+
pypdf==4.2.0
|
| 11 |
+
pillow==10.3.0
|
| 12 |
+
python-dotenv==1.0.1
|
| 13 |
+
werkzeug==2.3.7
|
| 14 |
+
transformers==4.40.2
|
| 15 |
+
torch==2.3.0
|
| 16 |
+
numpy==1.24.4
|
| 17 |
+
requests==2.31.0
|
requirements.txt
ADDED
|
Binary file (890 Bytes). View file
|
|
|
templates/index.html
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>CogniChat - Chat with your Documents</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
| 11 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 12 |
+
<style>
|
| 13 |
+
:root {
|
| 14 |
+
--background: #f0f4f9;
|
| 15 |
+
--foreground: #1f1f1f;
|
| 16 |
+
--primary: #1a73e8;
|
| 17 |
+
--primary-hover: #1867cf;
|
| 18 |
+
--card: #ffffff;
|
| 19 |
+
--card-border: #dadce0;
|
| 20 |
+
--input-bg: #e8f0fe;
|
| 21 |
+
--user-bubble: #d9e7ff;
|
| 22 |
+
--bot-bubble: #f1f3f4;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
/* Dark mode styles */
|
| 26 |
+
.dark {
|
| 27 |
+
--background: #202124;
|
| 28 |
+
--foreground: #e8eaed;
|
| 29 |
+
--primary: #8ab4f8;
|
| 30 |
+
--primary-hover: #99bdfa;
|
| 31 |
+
--card: #303134;
|
| 32 |
+
--card-border: #5f6368;
|
| 33 |
+
--input-bg: #303134;
|
| 34 |
+
--user-bubble: #3c4043;
|
| 35 |
+
--bot-bubble: #3c4043;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
body {
|
| 39 |
+
font-family: 'Google Sans', 'Roboto', sans-serif;
|
| 40 |
+
background-color: var(--background);
|
| 41 |
+
color: var(--foreground);
|
| 42 |
+
overflow: hidden;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
#chat-window::-webkit-scrollbar { width: 8px; }
|
| 46 |
+
#chat-window::-webkit-scrollbar-track { background: transparent; }
|
| 47 |
+
#chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
|
| 48 |
+
.dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
|
| 49 |
+
|
| 50 |
+
.drop-zone--over {
|
| 51 |
+
border-color: var(--primary);
|
| 52 |
+
box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
/* Loading Spinner */
|
| 56 |
+
.loader {
|
| 57 |
+
width: 48px;
|
| 58 |
+
height: 48px;
|
| 59 |
+
border: 3px solid var(--card-border);
|
| 60 |
+
border-radius: 50%;
|
| 61 |
+
display: inline-block;
|
| 62 |
+
position: relative;
|
| 63 |
+
box-sizing: border-box;
|
| 64 |
+
animation: rotation 1s linear infinite;
|
| 65 |
+
}
|
| 66 |
+
.loader::after {
|
| 67 |
+
content: '';
|
| 68 |
+
box-sizing: border-box;
|
| 69 |
+
position: absolute;
|
| 70 |
+
left: 50%;
|
| 71 |
+
top: 50%;
|
| 72 |
+
transform: translate(-50%, -50%);
|
| 73 |
+
width: 56px;
|
| 74 |
+
height: 56px;
|
| 75 |
+
border-radius: 50%;
|
| 76 |
+
border: 3px solid;
|
| 77 |
+
border-color: var(--primary) transparent;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
@keyframes rotation {
|
| 81 |
+
0% { transform: rotate(0deg); }
|
| 82 |
+
100% { transform: rotate(360deg); }
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/* Typing Indicator Animation */
|
| 86 |
+
.typing-indicator span {
|
| 87 |
+
height: 10px;
|
| 88 |
+
width: 10px;
|
| 89 |
+
background-color: #9E9E9E;
|
| 90 |
+
border-radius: 50%;
|
| 91 |
+
display: inline-block;
|
| 92 |
+
animation: bounce 1.4s infinite ease-in-out both;
|
| 93 |
+
}
|
| 94 |
+
.typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
|
| 95 |
+
.typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
|
| 96 |
+
@keyframes bounce {
|
| 97 |
+
0%, 80%, 100% { transform: scale(0); }
|
| 98 |
+
40% { transform: scale(1.0); }
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
/* Enhanced Markdown Styling for better readability and aesthetics */
|
| 102 |
+
.markdown-content p {
|
| 103 |
+
margin-bottom: 1rem;
|
| 104 |
+
line-height: 1.75;
|
| 105 |
+
}
|
| 106 |
+
.markdown-content h1, .markdown-content h2, .markdown-content h3, .markdown-content h4 {
|
| 107 |
+
font-family: 'Google Sans', sans-serif;
|
| 108 |
+
font-weight: 700;
|
| 109 |
+
margin-top: 1.75rem;
|
| 110 |
+
margin-bottom: 1rem;
|
| 111 |
+
line-height: 1.3;
|
| 112 |
+
}
|
| 113 |
+
.markdown-content h1 { font-size: 1.75em; border-bottom: 1px solid var(--card-border); padding-bottom: 0.5rem; }
|
| 114 |
+
.markdown-content h2 { font-size: 1.5em; }
|
| 115 |
+
.markdown-content h3 { font-size: 1.25em; }
|
| 116 |
+
.markdown-content h4 { font-size: 1.1em; }
|
| 117 |
+
.markdown-content ul, .markdown-content ol {
|
| 118 |
+
padding-left: 1.75rem;
|
| 119 |
+
margin-bottom: 1rem;
|
| 120 |
+
}
|
| 121 |
+
.markdown-content li {
|
| 122 |
+
margin-bottom: 0.5rem;
|
| 123 |
+
}
|
| 124 |
+
.dark .markdown-content ul > li::marker { color: var(--primary); }
|
| 125 |
+
.markdown-content ul > li::marker { color: var(--primary); }
|
| 126 |
+
.markdown-content a {
|
| 127 |
+
color: var(--primary);
|
| 128 |
+
text-decoration: none;
|
| 129 |
+
font-weight: 500;
|
| 130 |
+
border-bottom: 1px solid transparent;
|
| 131 |
+
transition: all 0.2s ease-in-out;
|
| 132 |
+
}
|
| 133 |
+
.markdown-content a:hover {
|
| 134 |
+
border-bottom-color: var(--primary-hover);
|
| 135 |
+
}
|
| 136 |
+
.markdown-content blockquote {
|
| 137 |
+
margin: 1.5rem 0;
|
| 138 |
+
padding-left: 1.5rem;
|
| 139 |
+
border-left: 4px solid var(--card-border);
|
| 140 |
+
color: #6c757d;
|
| 141 |
+
font-style: italic;
|
| 142 |
+
}
|
| 143 |
+
.dark .markdown-content blockquote {
|
| 144 |
+
color: #adb5bd;
|
| 145 |
+
}
|
| 146 |
+
.markdown-content hr {
|
| 147 |
+
border: none;
|
| 148 |
+
border-top: 1px solid var(--card-border);
|
| 149 |
+
margin: 2rem 0;
|
| 150 |
+
}
|
| 151 |
+
.markdown-content table {
|
| 152 |
+
width: 100%;
|
| 153 |
+
border-collapse: collapse;
|
| 154 |
+
margin: 1.5rem 0;
|
| 155 |
+
font-size: 0.9em;
|
| 156 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
| 157 |
+
border-radius: 8px;
|
| 158 |
+
overflow: hidden;
|
| 159 |
+
}
|
| 160 |
+
.markdown-content th, .markdown-content td {
|
| 161 |
+
border: 1px solid var(--card-border);
|
| 162 |
+
padding: 0.75rem 1rem;
|
| 163 |
+
text-align: left;
|
| 164 |
+
}
|
| 165 |
+
.markdown-content th {
|
| 166 |
+
background-color: var(--bot-bubble);
|
| 167 |
+
font-weight: 500;
|
| 168 |
+
}
|
| 169 |
+
.markdown-content code {
|
| 170 |
+
background-color: rgba(0,0,0,0.05);
|
| 171 |
+
padding: 0.2rem 0.4rem;
|
| 172 |
+
border-radius: 0.25rem;
|
| 173 |
+
font-family: 'Roboto Mono', monospace;
|
| 174 |
+
font-size: 0.9em;
|
| 175 |
+
}
|
| 176 |
+
.dark .markdown-content code {
|
| 177 |
+
background-color: rgba(255,255,255,0.1);
|
| 178 |
+
}
|
| 179 |
+
.markdown-content pre {
|
| 180 |
+
position: relative;
|
| 181 |
+
background-color: #f8f9fa;
|
| 182 |
+
border: 1px solid var(--card-border);
|
| 183 |
+
border-radius: 0.5rem;
|
| 184 |
+
margin-bottom: 1rem;
|
| 185 |
+
}
|
| 186 |
+
.dark .markdown-content pre {
|
| 187 |
+
background-color: #2e2f32;
|
| 188 |
+
}
|
| 189 |
+
.markdown-content pre code {
|
| 190 |
+
background: none;
|
| 191 |
+
padding: 1rem;
|
| 192 |
+
display: block;
|
| 193 |
+
overflow-x: auto;
|
| 194 |
+
}
|
| 195 |
+
.markdown-content pre .copy-code-btn {
|
| 196 |
+
position: absolute;
|
| 197 |
+
top: 0.5rem;
|
| 198 |
+
right: 0.5rem;
|
| 199 |
+
background-color: #e8eaed;
|
| 200 |
+
border: 1px solid #dadce0;
|
| 201 |
+
color: #5f6368;
|
| 202 |
+
padding: 0.3rem 0.6rem;
|
| 203 |
+
border-radius: 0.25rem;
|
| 204 |
+
cursor: pointer;
|
| 205 |
+
opacity: 0;
|
| 206 |
+
transition: opacity 0.2s;
|
| 207 |
+
font-size: 0.8em;
|
| 208 |
+
}
|
| 209 |
+
.dark .markdown-content pre .copy-code-btn {
|
| 210 |
+
background-color: #3c4043;
|
| 211 |
+
border-color: #5f6368;
|
| 212 |
+
color: #e8eaed;
|
| 213 |
+
}
|
| 214 |
+
.markdown-content pre:hover .copy-code-btn {
|
| 215 |
+
opacity: 1;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
/* Spinner for the TTS button */
|
| 219 |
+
.tts-button-loader {
|
| 220 |
+
width: 16px;
|
| 221 |
+
height: 16px;
|
| 222 |
+
border: 2px solid currentColor; /* Use button's text color */
|
| 223 |
+
border-radius: 50%;
|
| 224 |
+
display: inline-block;
|
| 225 |
+
box-sizing: border-box;
|
| 226 |
+
animation: rotation 0.8s linear infinite;
|
| 227 |
+
border-bottom-color: transparent; /* Makes it a half circle spinner */
|
| 228 |
+
}
|
| 229 |
+
</style>
|
| 230 |
+
</head>
|
| 231 |
+
<body class="w-screen h-screen dark">
|
| 232 |
+
<main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
|
| 233 |
+
<div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
|
| 234 |
+
<header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
|
| 235 |
+
<h1 class="text-xl font-medium">Chat with your Docs</h1>
|
| 236 |
+
<p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
|
| 237 |
+
</header>
|
| 238 |
+
<div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
|
| 239 |
+
<div id="chat-content" class="max-w-4xl mx-auto space-y-8">
|
| 240 |
+
</div>
|
| 241 |
+
</div>
|
| 242 |
+
<div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
|
| 243 |
+
<form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
|
| 244 |
+
<input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
|
| 245 |
+
<button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
|
| 246 |
+
<svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
|
| 247 |
+
</button>
|
| 248 |
+
</form>
|
| 249 |
+
</div>
|
| 250 |
+
</div>
|
| 251 |
+
|
| 252 |
+
<div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
|
| 253 |
+
<div class="text-center">
|
| 254 |
+
<h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
|
| 255 |
+
<div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
|
| 256 |
+
<input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
|
| 257 |
+
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
|
| 258 |
+
<p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
|
| 259 |
+
<p id="file-name" class="mt-2 text-xs text-gray-500"></p>
|
| 260 |
+
</div>
|
| 261 |
+
</div>
|
| 262 |
+
</div>
|
| 263 |
+
|
| 264 |
+
<div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
|
| 265 |
+
<div class="loader"></div>
|
| 266 |
+
<p id="loading-text" class="mt-6 text-sm font-medium"></p>
|
| 267 |
+
<p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
|
| 268 |
+
</div>
|
| 269 |
+
</main>
|
| 270 |
+
|
| 271 |
+
<script>
|
| 272 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 273 |
+
const uploadContainer = document.getElementById('upload-container');
|
| 274 |
+
const chatContainer = document.getElementById('chat-container');
|
| 275 |
+
const dropZone = document.getElementById('drop-zone');
|
| 276 |
+
const fileUploadInput = document.getElementById('file-upload');
|
| 277 |
+
const fileNameSpan = document.getElementById('file-name');
|
| 278 |
+
const loadingOverlay = document.getElementById('loading-overlay');
|
| 279 |
+
const loadingText = document.getElementById('loading-text');
|
| 280 |
+
const loadingSubtext = document.getElementById('loading-subtext');
|
| 281 |
+
|
| 282 |
+
const chatForm = document.getElementById('chat-form');
|
| 283 |
+
const chatInput = document.getElementById('chat-input');
|
| 284 |
+
const chatSubmitBtn = document.getElementById('chat-submit-btn');
|
| 285 |
+
const chatWindow = document.getElementById('chat-window');
|
| 286 |
+
const chatContent = document.getElementById('chat-content');
|
| 287 |
+
const chatFilename = document.getElementById('chat-filename');
|
| 288 |
+
|
| 289 |
+
let sessionId = null;
|
| 290 |
+
const storedSessionId = sessionStorage.getItem('cognichat_session_id');
|
| 291 |
+
if (storedSessionId) {
|
| 292 |
+
sessionId = storedSessionId;
|
| 293 |
+
console.debug('Restored session ID from storage:', sessionId);
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
// --- File Upload Logic ---
|
| 297 |
+
dropZone.addEventListener('click', () => fileUploadInput.click());
|
| 298 |
+
|
| 299 |
+
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
|
| 300 |
+
dropZone.addEventListener(eventName, preventDefaults, false);
|
| 301 |
+
document.body.addEventListener(eventName, preventDefaults, false);
|
| 302 |
+
});
|
| 303 |
+
|
| 304 |
+
['dragenter', 'dragover'].forEach(eventName => {
|
| 305 |
+
dropZone.addEventListener(eventName, () => dropZone.classList.add('drop-zone--over'));
|
| 306 |
+
});
|
| 307 |
+
['dragleave', 'drop'].forEach(eventName => {
|
| 308 |
+
dropZone.addEventListener(eventName, () => dropZone.classList.remove('drop-zone--over'));
|
| 309 |
+
});
|
| 310 |
+
|
| 311 |
+
dropZone.addEventListener('drop', (e) => {
|
| 312 |
+
const files = e.dataTransfer.files;
|
| 313 |
+
if (files.length > 0) handleFiles(files);
|
| 314 |
+
});
|
| 315 |
+
|
| 316 |
+
fileUploadInput.addEventListener('change', (e) => {
|
| 317 |
+
if (e.target.files.length > 0) handleFiles(e.target.files);
|
| 318 |
+
});
|
| 319 |
+
|
| 320 |
+
function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
|
| 321 |
+
|
| 322 |
+
async function handleFiles(files) {
|
| 323 |
+
const formData = new FormData();
|
| 324 |
+
let fileNames = [];
|
| 325 |
+
for (const file of files) {
|
| 326 |
+
formData.append('file', file);
|
| 327 |
+
fileNames.push(file.name);
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
|
| 331 |
+
await uploadAndProcessFiles(formData, fileNames);
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
async function uploadAndProcessFiles(formData, fileNames) {
|
| 335 |
+
loadingOverlay.classList.remove('hidden');
|
| 336 |
+
loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
|
| 337 |
+
loadingSubtext.textContent = "🤓Creating a knowledge base may take a minute or two. So please hold on tight";
|
| 338 |
+
|
| 339 |
+
try {
|
| 340 |
+
const response = await fetch('/upload', { method: 'POST', body: formData });
|
| 341 |
+
const result = await response.json();
|
| 342 |
+
|
| 343 |
+
if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
|
| 344 |
+
if (result.session_id) {
|
| 345 |
+
sessionId = result.session_id;
|
| 346 |
+
sessionStorage.setItem('cognichat_session_id', sessionId);
|
| 347 |
+
console.debug('Stored session ID from upload:', sessionId);
|
| 348 |
+
} else {
|
| 349 |
+
console.warn('Upload response missing session_id field.');
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
chatFilename.textContent = `Chatting with: ${result.filename}`;
|
| 353 |
+
uploadContainer.classList.add('hidden');
|
| 354 |
+
chatContainer.classList.remove('hidden');
|
| 355 |
+
appendMessage("I've analyzed your documents. What would you like to know?", "bot");
|
| 356 |
+
|
| 357 |
+
} catch (error) {
|
| 358 |
+
console.error('Upload error:', error);
|
| 359 |
+
alert(`Error: ${error.message}`);
|
| 360 |
+
} finally {
|
| 361 |
+
loadingOverlay.classList.add('hidden');
|
| 362 |
+
loadingSubtext.textContent = '';
|
| 363 |
+
fileNameSpan.textContent = '';
|
| 364 |
+
fileUploadInput.value = '';
|
| 365 |
+
}
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
// --- Chat Logic ---
|
| 369 |
+
chatForm.addEventListener('submit', async (e) => {
|
| 370 |
+
e.preventDefault();
|
| 371 |
+
const question = chatInput.value.trim();
|
| 372 |
+
if (!question) return;
|
| 373 |
+
|
| 374 |
+
appendMessage(question, 'user');
|
| 375 |
+
chatInput.value = '';
|
| 376 |
+
chatInput.disabled = true;
|
| 377 |
+
chatSubmitBtn.disabled = true;
|
| 378 |
+
|
| 379 |
+
const typingIndicator = showTypingIndicator();
|
| 380 |
+
let botMessageContainer = null;
|
| 381 |
+
let contentDiv = null;
|
| 382 |
+
|
| 383 |
+
try {
|
| 384 |
+
const requestBody = { question: question };
|
| 385 |
+
if (sessionId) {
|
| 386 |
+
requestBody.session_id = sessionId;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
const response = await fetch('/chat', {
|
| 390 |
+
method: 'POST',
|
| 391 |
+
headers: { 'Content-Type': 'application/json' },
|
| 392 |
+
body: JSON.stringify(requestBody),
|
| 393 |
+
});
|
| 394 |
+
|
| 395 |
+
if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
|
| 396 |
+
|
| 397 |
+
// ============================ MODIFICATION START ==============================
|
| 398 |
+
// Parse the JSON response instead of reading a stream
|
| 399 |
+
const result = await response.json();
|
| 400 |
+
const answer = result.answer; // Extract the 'answer' field
|
| 401 |
+
|
| 402 |
+
if (!answer) {
|
| 403 |
+
throw new Error("Received an empty or invalid response from the server.");
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
typingIndicator.remove();
|
| 407 |
+
botMessageContainer = appendMessage('', 'bot');
|
| 408 |
+
contentDiv = botMessageContainer.querySelector('.markdown-content');
|
| 409 |
+
|
| 410 |
+
// Use the extracted answer for rendering
|
| 411 |
+
contentDiv.innerHTML = marked.parse(answer);
|
| 412 |
+
contentDiv.querySelectorAll('pre').forEach(addCopyButton);
|
| 413 |
+
scrollToBottom(); // Scroll after content is added
|
| 414 |
+
|
| 415 |
+
// Use the extracted answer for TTS
|
| 416 |
+
addTextToSpeechControls(botMessageContainer, answer);
|
| 417 |
+
// ============================ MODIFICATION END ==============================
|
| 418 |
+
|
| 419 |
+
} catch (error) {
|
| 420 |
+
console.error('Chat error:', error);
|
| 421 |
+
if (typingIndicator) typingIndicator.remove();
|
| 422 |
+
if (contentDiv) {
|
| 423 |
+
contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
|
| 424 |
+
} else {
|
| 425 |
+
appendMessage(`Error: ${error.message}`, 'bot');
|
| 426 |
+
}
|
| 427 |
+
} finally {
|
| 428 |
+
chatInput.disabled = false;
|
| 429 |
+
chatSubmitBtn.disabled = false;
|
| 430 |
+
chatInput.focus();
|
| 431 |
+
}
|
| 432 |
+
});
|
| 433 |
+
|
| 434 |
+
// --- UI Helper Functions ---
|
| 435 |
+
|
| 436 |
+
function appendMessage(text, sender) {
|
| 437 |
+
const messageWrapper = document.createElement('div');
|
| 438 |
+
messageWrapper.className = `flex items-start gap-4`;
|
| 439 |
+
|
| 440 |
+
const iconSVG = sender === 'user'
|
| 441 |
+
? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
|
| 442 |
+
: `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
|
| 443 |
+
|
| 444 |
+
const messageBubble = document.createElement('div');
|
| 445 |
+
messageBubble.className = `flex-1 pt-1`;
|
| 446 |
+
|
| 447 |
+
const senderName = document.createElement('p');
|
| 448 |
+
senderName.className = 'font-medium text-sm mb-1';
|
| 449 |
+
senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
|
| 450 |
+
|
| 451 |
+
const contentDiv = document.createElement('div');
|
| 452 |
+
contentDiv.className = 'text-base markdown-content';
|
| 453 |
+
// Only parse if text is not empty
|
| 454 |
+
if (text) {
|
| 455 |
+
contentDiv.innerHTML = marked.parse(text);
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
const controlsContainer = document.createElement('div');
|
| 459 |
+
controlsContainer.className = 'tts-controls mt-2';
|
| 460 |
+
|
| 461 |
+
messageBubble.appendChild(senderName);
|
| 462 |
+
messageBubble.appendChild(contentDiv);
|
| 463 |
+
messageBubble.appendChild(controlsContainer);
|
| 464 |
+
messageWrapper.innerHTML = iconSVG;
|
| 465 |
+
messageWrapper.appendChild(messageBubble);
|
| 466 |
+
|
| 467 |
+
chatContent.appendChild(messageWrapper);
|
| 468 |
+
scrollToBottom();
|
| 469 |
+
|
| 470 |
+
return messageBubble;
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
function showTypingIndicator() {
|
| 474 |
+
const indicatorWrapper = document.createElement('div');
|
| 475 |
+
indicatorWrapper.className = `flex items-start gap-4`;
|
| 476 |
+
indicatorWrapper.id = 'typing-indicator';
|
| 477 |
+
|
| 478 |
+
const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
|
| 479 |
+
|
| 480 |
+
const messageBubble = document.createElement('div');
|
| 481 |
+
messageBubble.className = 'flex-1 pt-1';
|
| 482 |
+
|
| 483 |
+
const senderName = document.createElement('p');
|
| 484 |
+
senderName.className = 'font-medium text-sm mb-1';
|
| 485 |
+
senderName.textContent = 'CogniChat is thinking...';
|
| 486 |
+
|
| 487 |
+
const indicator = document.createElement('div');
|
| 488 |
+
indicator.className = 'typing-indicator';
|
| 489 |
+
indicator.innerHTML = '<span></span><span></span><span></span>';
|
| 490 |
+
|
| 491 |
+
messageBubble.appendChild(senderName);
|
| 492 |
+
messageBubble.appendChild(indicator);
|
| 493 |
+
indicatorWrapper.innerHTML = iconSVG;
|
| 494 |
+
indicatorWrapper.appendChild(messageBubble);
|
| 495 |
+
|
| 496 |
+
chatContent.appendChild(indicatorWrapper);
|
| 497 |
+
scrollToBottom();
|
| 498 |
+
|
| 499 |
+
return indicatorWrapper;
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
function scrollToBottom() {
|
| 503 |
+
chatWindow.scrollTo({
|
| 504 |
+
top: chatWindow.scrollHeight,
|
| 505 |
+
behavior: 'smooth'
|
| 506 |
+
});
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
function addCopyButton(pre) {
|
| 510 |
+
const button = document.createElement('button');
|
| 511 |
+
button.className = 'copy-code-btn';
|
| 512 |
+
button.textContent = 'Copy';
|
| 513 |
+
pre.appendChild(button);
|
| 514 |
+
|
| 515 |
+
button.addEventListener('click', () => {
|
| 516 |
+
const code = pre.querySelector('code').innerText;
|
| 517 |
+
navigator.clipboard.writeText(code).then(() => {
|
| 518 |
+
button.textContent = 'Copied!';
|
| 519 |
+
setTimeout(() => button.textContent = 'Copy', 2000);
|
| 520 |
+
});
|
| 521 |
+
});
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
// --- Text-to-Speech Logic ---
|
| 525 |
+
let currentAudio = null;
|
| 526 |
+
let currentPlayingButton = null;
|
| 527 |
+
|
| 528 |
+
const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
|
| 529 |
+
const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
|
| 530 |
+
|
| 531 |
+
|
| 532 |
+
function addTextToSpeechControls(messageBubble, text) {
|
| 533 |
+
const ttsControls = messageBubble.querySelector('.tts-controls');
|
| 534 |
+
if (text.trim().length > 0) {
|
| 535 |
+
const speakButton = document.createElement('button');
|
| 536 |
+
speakButton.className = 'speak-btn px-4 py-2 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
|
| 537 |
+
speakButton.title = 'Listen to this message';
|
| 538 |
+
speakButton.setAttribute('data-state', 'play');
|
| 539 |
+
speakButton.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 540 |
+
ttsControls.appendChild(speakButton);
|
| 541 |
+
speakButton.addEventListener('click', () => handleTTS(text, speakButton));
|
| 542 |
+
}
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
async function handleTTS(text, button) {
|
| 546 |
+
if (button === currentPlayingButton) {
|
| 547 |
+
if (currentAudio && !currentAudio.paused) {
|
| 548 |
+
currentAudio.pause();
|
| 549 |
+
button.setAttribute('data-state', 'paused');
|
| 550 |
+
button.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 551 |
+
} else if (currentAudio && currentAudio.paused) {
|
| 552 |
+
currentAudio.play();
|
| 553 |
+
button.setAttribute('data-state', 'playing');
|
| 554 |
+
button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
|
| 555 |
+
}
|
| 556 |
+
return;
|
| 557 |
+
}
|
| 558 |
+
|
| 559 |
+
resetAllSpeakButtons();
|
| 560 |
+
|
| 561 |
+
currentPlayingButton = button;
|
| 562 |
+
button.setAttribute('data-state', 'loading');
|
| 563 |
+
button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
|
| 564 |
+
button.disabled = true;
|
| 565 |
+
|
| 566 |
+
try {
|
| 567 |
+
const response = await fetch('/tts', {
|
| 568 |
+
method: 'POST',
|
| 569 |
+
headers: { 'Content-Type': 'application/json' },
|
| 570 |
+
body: JSON.stringify({ text: text })
|
| 571 |
+
});
|
| 572 |
+
if (!response.ok) throw new Error('Failed to generate audio.');
|
| 573 |
+
|
| 574 |
+
const blob = await response.blob();
|
| 575 |
+
const audioUrl = URL.createObjectURL(blob);
|
| 576 |
+
currentAudio = new Audio(audioUrl);
|
| 577 |
+
currentAudio.play();
|
| 578 |
+
|
| 579 |
+
button.setAttribute('data-state', 'playing');
|
| 580 |
+
button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
|
| 581 |
+
|
| 582 |
+
currentAudio.onended = () => {
|
| 583 |
+
button.setAttribute('data-state', 'play');
|
| 584 |
+
button.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 585 |
+
currentAudio = null;
|
| 586 |
+
currentPlayingButton = null;
|
| 587 |
+
};
|
| 588 |
+
|
| 589 |
+
} catch (error) {
|
| 590 |
+
console.error('TTS Error:', error);
|
| 591 |
+
button.setAttribute('data-state', 'error');
|
| 592 |
+
button.innerHTML = `${playIconSVG} <span>Error</span>`;
|
| 593 |
+
alert('Failed to play audio. Please try again.');
|
| 594 |
+
resetAllSpeakButtons();
|
| 595 |
+
} finally {
|
| 596 |
+
button.disabled = false;
|
| 597 |
+
}
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
function resetAllSpeakButtons() {
|
| 601 |
+
document.querySelectorAll('.speak-btn').forEach(btn => {
|
| 602 |
+
btn.setAttribute('data-state', 'play');
|
| 603 |
+
btn.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 604 |
+
btn.disabled = false;
|
| 605 |
+
});
|
| 606 |
+
if (currentAudio) {
|
| 607 |
+
currentAudio.pause();
|
| 608 |
+
currentAudio = null;
|
| 609 |
+
}
|
| 610 |
+
currentPlayingButton = null;
|
| 611 |
+
}
|
| 612 |
+
});
|
| 613 |
+
</script>
|
| 614 |
+
</body>
|
| 615 |
+
</html>
|
templates/index.html.backup
ADDED
|
@@ -0,0 +1,974 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>CogniChat - Chat with your Documents</title>
|
| 7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
| 11 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 12 |
+
<style>
|
| 13 |
+
:root {
|
| 14 |
+
--background: #f0f4f9;
|
| 15 |
+
--foreground: #1f1f1f;
|
| 16 |
+
--primary: #1a73e8;
|
| 17 |
+
--primary-hover: #1867cf;
|
| 18 |
+
--card: #ffffff;
|
| 19 |
+
--card-border: #dadce0;
|
| 20 |
+
--input-bg: #e8f0fe;
|
| 21 |
+
--user-bubble: #d9e7ff;
|
| 22 |
+
--bot-bubble: #f1f3f4;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
/* Dark mode styles */
|
| 26 |
+
.dark {
|
| 27 |
+
--background: #202124;
|
| 28 |
+
--foreground: #e8eaed;
|
| 29 |
+
--primary: #8ab4f8;
|
| 30 |
+
--primary-hover: #99bdfa;
|
| 31 |
+
--card: #303134;
|
| 32 |
+
--card-border: #5f6368;
|
| 33 |
+
--input-bg: #303134;
|
| 34 |
+
--user-bubble: #3c4043;
|
| 35 |
+
--bot-bubble: #3c4043;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
body {
|
| 39 |
+
font-family: 'Google Sans', 'Roboto', sans-serif;
|
| 40 |
+
background-color: var(--background);
|
| 41 |
+
color: var(--foreground);
|
| 42 |
+
overflow: hidden;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
#chat-window::-webkit-scrollbar { width: 8px; }
|
| 46 |
+
#chat-window::-webkit-scrollbar-track { background: transparent; }
|
| 47 |
+
#chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
|
| 48 |
+
.dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
|
| 49 |
+
|
| 50 |
+
.drop-zone--over {
|
| 51 |
+
border-color: var(--primary);
|
| 52 |
+
box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
/* Loading Spinner */
|
| 56 |
+
.loader {
|
| 57 |
+
width: 48px;
|
| 58 |
+
height: 48px;
|
| 59 |
+
border: 3px solid var(--card-border);
|
| 60 |
+
border-radius: 50%;
|
| 61 |
+
display: inline-block;
|
| 62 |
+
position: relative;
|
| 63 |
+
box-sizing: border-box;
|
| 64 |
+
animation: rotation 1s linear infinite;
|
| 65 |
+
}
|
| 66 |
+
.loader::after {
|
| 67 |
+
content: '';
|
| 68 |
+
box-sizing: border-box;
|
| 69 |
+
position: absolute;
|
| 70 |
+
left: 50%;
|
| 71 |
+
top: 50%;
|
| 72 |
+
transform: translate(-50%, -50%);
|
| 73 |
+
width: 56px;
|
| 74 |
+
height: 56px;
|
| 75 |
+
border-radius: 50%;
|
| 76 |
+
border: 3px solid;
|
| 77 |
+
border-color: var(--primary) transparent;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
@keyframes rotation {
|
| 81 |
+
0% { transform: rotate(0deg); }
|
| 82 |
+
100% { transform: rotate(360deg); }
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/* Typing Indicator Animation */
|
| 86 |
+
.typing-indicator span {
|
| 87 |
+
height: 10px;
|
| 88 |
+
width: 10px;
|
| 89 |
+
background-color: #9E9E9E;
|
| 90 |
+
border-radius: 50%;
|
| 91 |
+
display: inline-block;
|
| 92 |
+
animation: bounce 1.4s infinite ease-in-out both;
|
| 93 |
+
}
|
| 94 |
+
.typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
|
| 95 |
+
.typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
|
| 96 |
+
@keyframes bounce {
|
| 97 |
+
0%, 80%, 100% { transform: scale(0); }
|
| 98 |
+
40% { transform: scale(1.0); }
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
/* Markdown Styling */
|
| 102 |
+
.markdown-content p { margin-bottom: 0.75rem; line-height: 1.75; }
|
| 103 |
+
.markdown-content ul, .markdown-content ol { margin-left: 1.5rem; margin-bottom: 0.75rem; }
|
| 104 |
+
.markdown-content code { background-color: rgba(0,0,0,0.05); padding: 0.2rem 0.4rem; border-radius: 0.25rem; font-family: 'Roboto Mono', monospace; font-size: 0.9em; }
|
| 105 |
+
.dark .markdown-content code { background-color: rgba(255,255,255,0.1); }
|
| 106 |
+
.markdown-content pre { position: relative; background-color: #f8f9fa; border: 1px solid var(--card-border); border-radius: 0.5rem; margin-bottom: 1rem; }
|
| 107 |
+
.dark .markdown-content pre { background-color: #2e2f32; }
|
| 108 |
+
.markdown-content pre code { background: none; padding: 1rem; display: block; overflow-x: auto; }
|
| 109 |
+
.markdown-content pre .copy-code-btn { position: absolute; top: 0.5rem; right: 0.5rem; background-color: #e8eaed; border: 1px solid #dadce0; color: #5f6368; padding: 0.3rem 0.6rem; border-radius: 0.25rem; cursor: pointer; opacity: 0; transition: opacity 0.2s; font-size: 0.8em;}
|
| 110 |
+
.dark .markdown-content pre .copy-code-btn { background-color: #3c4043; border-color: #5f6368; color: #e8eaed; }
|
| 111 |
+
.markdown-content pre:hover .copy-code-btn { opacity: 1; }
|
| 112 |
+
|
| 113 |
+
/* Spinner for the TTS button */
|
| 114 |
+
.tts-button-loader {
|
| 115 |
+
width: 16px;
|
| 116 |
+
height: 16px;
|
| 117 |
+
border: 2px solid currentColor; /* Use button's text color */
|
| 118 |
+
border-radius: 50%;
|
| 119 |
+
display: inline-block;
|
| 120 |
+
box-sizing: border-box;
|
| 121 |
+
animation: rotation 0.8s linear infinite;
|
| 122 |
+
border-bottom-color: transparent; /* Makes it a half circle spinner */
|
| 123 |
+
}
|
| 124 |
+
</style>
|
| 125 |
+
</head>
|
| 126 |
+
<body class="w-screen h-screen dark">
|
| 127 |
+
<main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
|
| 128 |
+
<div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
|
| 129 |
+
<header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
|
| 130 |
+
<h1 class="text-xl font-medium">Chat with your Docs</h1>
|
| 131 |
+
<p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
|
| 132 |
+
</header>
|
| 133 |
+
<div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
|
| 134 |
+
<div id="chat-content" class="max-w-4xl mx-auto space-y-8">
|
| 135 |
+
</div>
|
| 136 |
+
</div>
|
| 137 |
+
<div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
|
| 138 |
+
<form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
|
| 139 |
+
<input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
|
| 140 |
+
<button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
|
| 141 |
+
<svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
|
| 142 |
+
</button>
|
| 143 |
+
</form>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
|
| 147 |
+
<div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
|
| 148 |
+
<div class="text-center">
|
| 149 |
+
<h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
|
| 150 |
+
<div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
|
| 151 |
+
<input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
|
| 152 |
+
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
|
| 153 |
+
<p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
|
| 154 |
+
<p id="file-name" class="mt-2 text-xs text-gray-500"></p>
|
| 155 |
+
</div>
|
| 156 |
+
</div>
|
| 157 |
+
</div>
|
| 158 |
+
|
| 159 |
+
<div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
|
| 160 |
+
<div class="loader"></div>
|
| 161 |
+
<p id="loading-text" class="mt-6 text-sm font-medium"></p>
|
| 162 |
+
<p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
|
| 163 |
+
</div>
|
| 164 |
+
</main>
|
| 165 |
+
|
| 166 |
+
<script>
|
| 167 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 168 |
+
const uploadContainer = document.getElementById('upload-container');
|
| 169 |
+
const chatContainer = document.getElementById('chat-container');
|
| 170 |
+
const dropZone = document.getElementById('drop-zone');
|
| 171 |
+
const fileUploadInput = document.getElementById('file-upload');
|
| 172 |
+
const fileNameSpan = document.getElementById('file-name');
|
| 173 |
+
const loadingOverlay = document.getElementById('loading-overlay');
|
| 174 |
+
const loadingText = document.getElementById('loading-text');
|
| 175 |
+
const loadingSubtext = document.getElementById('loading-subtext');
|
| 176 |
+
|
| 177 |
+
const chatForm = document.getElementById('chat-form');
|
| 178 |
+
const chatInput = document.getElementById('chat-input');
|
| 179 |
+
const chatSubmitBtn = document.getElementById('chat-submit-btn');
|
| 180 |
+
const chatWindow = document.getElementById('chat-window');
|
| 181 |
+
const chatContent = document.getElementById('chat-content');
|
| 182 |
+
const chatFilename = document.getElementById('chat-filename');
|
| 183 |
+
|
| 184 |
+
// --- File Upload Logic ---
|
| 185 |
+
dropZone.addEventListener('click', () => fileUploadInput.click());
|
| 186 |
+
|
| 187 |
+
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
|
| 188 |
+
dropZone.addEventListener(eventName, preventDefaults, false);
|
| 189 |
+
document.body.addEventListener(eventName, preventDefaults, false);
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
['dragenter', 'dragover'].forEach(eventName => dropZone.classList.add('drop-zone--over'));
|
| 193 |
+
['dragleave', 'drop'].forEach(eventName => dropZone.classList.remove('drop-zone--over'));
|
| 194 |
+
|
| 195 |
+
dropZone.addEventListener('drop', (e) => {
|
| 196 |
+
const files = e.dataTransfer.files;
|
| 197 |
+
if (files.length > 0) handleFiles(files);
|
| 198 |
+
});
|
| 199 |
+
|
| 200 |
+
fileUploadInput.addEventListener('change', (e) => {
|
| 201 |
+
if (e.target.files.length > 0) handleFiles(e.target.files);
|
| 202 |
+
});
|
| 203 |
+
|
| 204 |
+
function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
|
| 205 |
+
|
| 206 |
+
async function handleFiles(files) {
|
| 207 |
+
const formData = new FormData();
|
| 208 |
+
let fileNames = [];
|
| 209 |
+
for (const file of files) {
|
| 210 |
+
formData.append('file', file);
|
| 211 |
+
fileNames.push(file.name);
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
|
| 215 |
+
await uploadAndProcessFiles(formData, fileNames);
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
async function uploadAndProcessFiles(formData, fileNames) {
|
| 219 |
+
loadingOverlay.classList.remove('hidden');
|
| 220 |
+
loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
|
| 221 |
+
loadingSubtext.textContent = "For large documents or OCR, setup may take a few minutes to build the knowledge base.";
|
| 222 |
+
|
| 223 |
+
try {
|
| 224 |
+
const response = await fetch('/upload', { method: 'POST', body: formData });
|
| 225 |
+
const result = await response.json();
|
| 226 |
+
|
| 227 |
+
if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
|
| 228 |
+
|
| 229 |
+
chatFilename.textContent = `Chatting with: ${result.filename}`;
|
| 230 |
+
uploadContainer.classList.add('hidden');
|
| 231 |
+
chatContainer.classList.remove('hidden');
|
| 232 |
+
appendMessage("I've analyzed your documents. What would you like to know?", "bot");
|
| 233 |
+
|
| 234 |
+
} catch (error) {
|
| 235 |
+
console.error('Upload error:', error);
|
| 236 |
+
alert(`Error: ${error.message}`);
|
| 237 |
+
} finally {
|
| 238 |
+
loadingOverlay.classList.add('hidden');
|
| 239 |
+
loadingSubtext.textContent = '';
|
| 240 |
+
fileNameSpan.textContent = '';
|
| 241 |
+
fileUploadInput.value = ''; // Reset file input
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
// --- Chat Logic ---
|
| 246 |
+
chatForm.addEventListener('submit', async (e) => {
|
| 247 |
+
e.preventDefault();
|
| 248 |
+
const question = chatInput.value.trim();
|
| 249 |
+
if (!question) return;
|
| 250 |
+
|
| 251 |
+
appendMessage(question, 'user');
|
| 252 |
+
chatInput.value = '';
|
| 253 |
+
chatInput.disabled = true;
|
| 254 |
+
chatSubmitBtn.disabled = true;
|
| 255 |
+
|
| 256 |
+
const typingIndicator = showTypingIndicator();
|
| 257 |
+
let botMessageContainer = null;
|
| 258 |
+
let contentDiv = null;
|
| 259 |
+
|
| 260 |
+
try {
|
| 261 |
+
const response = await fetch('/chat', {
|
| 262 |
+
method: 'POST',
|
| 263 |
+
headers: { 'Content-Type': 'application/json' },
|
| 264 |
+
body: JSON.stringify({ question: question }),
|
| 265 |
+
});
|
| 266 |
+
|
| 267 |
+
if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
|
| 268 |
+
|
| 269 |
+
typingIndicator.remove();
|
| 270 |
+
botMessageContainer = appendMessage('', 'bot');
|
| 271 |
+
contentDiv = botMessageContainer.querySelector('.markdown-content');
|
| 272 |
+
|
| 273 |
+
const reader = response.body.getReader();
|
| 274 |
+
const decoder = new TextDecoder();
|
| 275 |
+
let fullResponse = '';
|
| 276 |
+
|
| 277 |
+
while (true) {
|
| 278 |
+
const { value, done } = await reader.read();
|
| 279 |
+
if (done) break;
|
| 280 |
+
|
| 281 |
+
fullResponse += decoder.decode(value, { stream: true });
|
| 282 |
+
contentDiv.innerHTML = marked.parse(fullResponse);
|
| 283 |
+
scrollToBottom();
|
| 284 |
+
}
|
| 285 |
+
contentDiv.querySelectorAll('pre').forEach(addCopyButton);
|
| 286 |
+
|
| 287 |
+
addTextToSpeechControls(botMessageContainer, fullResponse);
|
| 288 |
+
|
| 289 |
+
} catch (error) {
|
| 290 |
+
console.error('Chat error:', error);
|
| 291 |
+
if (typingIndicator) typingIndicator.remove();
|
| 292 |
+
if (contentDiv) {
|
| 293 |
+
contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
|
| 294 |
+
} else {
|
| 295 |
+
appendMessage(`Error: ${error.message}`, 'bot');
|
| 296 |
+
}
|
| 297 |
+
} finally {
|
| 298 |
+
chatInput.disabled = false;
|
| 299 |
+
chatSubmitBtn.disabled = false;
|
| 300 |
+
chatInput.focus();
|
| 301 |
+
}
|
| 302 |
+
});
|
| 303 |
+
|
| 304 |
+
// --- UI Helper Functions ---
|
| 305 |
+
|
| 306 |
+
function appendMessage(text, sender) {
|
| 307 |
+
const messageWrapper = document.createElement('div');
|
| 308 |
+
messageWrapper.className = `flex items-start gap-4`;
|
| 309 |
+
|
| 310 |
+
const iconSVG = sender === 'user'
|
| 311 |
+
? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
|
| 312 |
+
: `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
|
| 313 |
+
|
| 314 |
+
const messageBubble = document.createElement('div');
|
| 315 |
+
messageBubble.className = `flex-1 pt-1`;
|
| 316 |
+
|
| 317 |
+
const senderName = document.createElement('p');
|
| 318 |
+
senderName.className = 'font-medium text-sm mb-1';
|
| 319 |
+
senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
|
| 320 |
+
|
| 321 |
+
const contentDiv = document.createElement('div');
|
| 322 |
+
contentDiv.className = 'text-base markdown-content';
|
| 323 |
+
contentDiv.innerHTML = marked.parse(text);
|
| 324 |
+
|
| 325 |
+
const controlsContainer = document.createElement('div');
|
| 326 |
+
controlsContainer.className = 'tts-controls mt-2';
|
| 327 |
+
|
| 328 |
+
messageBubble.appendChild(senderName);
|
| 329 |
+
messageBubble.appendChild(contentDiv);
|
| 330 |
+
messageBubble.appendChild(controlsContainer);
|
| 331 |
+
messageWrapper.innerHTML = iconSVG;
|
| 332 |
+
messageWrapper.appendChild(messageBubble);
|
| 333 |
+
|
| 334 |
+
chatContent.appendChild(messageWrapper);
|
| 335 |
+
scrollToBottom();
|
| 336 |
+
|
| 337 |
+
return messageBubble;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
function showTypingIndicator() {
|
| 341 |
+
const indicatorWrapper = document.createElement('div');
|
| 342 |
+
indicatorWrapper.className = `flex items-start gap-4`;
|
| 343 |
+
indicatorWrapper.id = 'typing-indicator';
|
| 344 |
+
|
| 345 |
+
const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
|
| 346 |
+
|
| 347 |
+
const messageBubble = document.createElement('div');
|
| 348 |
+
messageBubble.className = 'flex-1 pt-1';
|
| 349 |
+
|
| 350 |
+
const senderName = document.createElement('p');
|
| 351 |
+
senderName.className = 'font-medium text-sm mb-1';
|
| 352 |
+
senderName.textContent = 'CogniChat is thinking...';
|
| 353 |
+
|
| 354 |
+
const indicator = document.createElement('div');
|
| 355 |
+
indicator.className = 'typing-indicator';
|
| 356 |
+
indicator.innerHTML = '<span></span><span></span><span></span>';
|
| 357 |
+
|
| 358 |
+
messageBubble.appendChild(senderName);
|
| 359 |
+
messageBubble.appendChild(indicator);
|
| 360 |
+
indicatorWrapper.innerHTML = iconSVG;
|
| 361 |
+
indicatorWrapper.appendChild(messageBubble);
|
| 362 |
+
|
| 363 |
+
chatContent.appendChild(indicatorWrapper);
|
| 364 |
+
scrollToBottom();
|
| 365 |
+
|
| 366 |
+
return indicatorWrapper;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
function scrollToBottom() {
|
| 370 |
+
chatWindow.scrollTo({
|
| 371 |
+
top: chatWindow.scrollHeight,
|
| 372 |
+
behavior: 'smooth'
|
| 373 |
+
});
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
function addCopyButton(pre) {
|
| 377 |
+
const button = document.createElement('button');
|
| 378 |
+
button.className = 'copy-code-btn';
|
| 379 |
+
button.textContent = 'Copy';
|
| 380 |
+
pre.appendChild(button);
|
| 381 |
+
|
| 382 |
+
button.addEventListener('click', () => {
|
| 383 |
+
const code = pre.querySelector('code').innerText;
|
| 384 |
+
navigator.clipboard.writeText(code).then(() => {
|
| 385 |
+
button.textContent = 'Copied!';
|
| 386 |
+
setTimeout(() => button.textContent = 'Copy', 2000);
|
| 387 |
+
});
|
| 388 |
+
});
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
// ============================ MODIFICATIONS START ==============================
|
| 392 |
+
let currentAudio = null;
|
| 393 |
+
let currentPlayingButton = null;
|
| 394 |
+
|
| 395 |
+
const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
|
| 396 |
+
const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
function addTextToSpeechControls(messageBubble, text) {
|
| 400 |
+
const ttsControls = messageBubble.querySelector('.tts-controls');
|
| 401 |
+
if (text.trim().length > 0) {
|
| 402 |
+
const speakButton = document.createElement('button');
|
| 403 |
+
// STYLING CHANGE HERE: Replaced theme variables with specific dark blue colors.
|
| 404 |
+
speakButton.className = 'speak-btn px-4 py-2 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
|
| 405 |
+
speakButton.title = 'Listen to this message';
|
| 406 |
+
speakButton.setAttribute('data-state', 'play');
|
| 407 |
+
speakButton.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 408 |
+
ttsControls.appendChild(speakButton);
|
| 409 |
+
speakButton.addEventListener('click', () => handleTTS(text, speakButton));
|
| 410 |
+
}
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
async function handleTTS(text, button) {
|
| 414 |
+
// BUG FIX: Reworked the logic to correctly handle pause/resume.
|
| 415 |
+
|
| 416 |
+
// Case 1: The clicked button is already active (playing or paused).
|
| 417 |
+
if (button === currentPlayingButton) {
|
| 418 |
+
if (currentAudio && !currentAudio.paused) { // If it's playing, pause it.
|
| 419 |
+
currentAudio.pause();
|
| 420 |
+
button.setAttribute('data-state', 'paused');
|
| 421 |
+
button.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 422 |
+
} else if (currentAudio && currentAudio.paused) { // If it's paused, resume it.
|
| 423 |
+
currentAudio.play();
|
| 424 |
+
button.setAttribute('data-state', 'playing');
|
| 425 |
+
button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
|
| 426 |
+
}
|
| 427 |
+
return; // Stop the function here.
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
// Case 2: A new button is clicked (or no audio is active).
|
| 431 |
+
// Stop any other audio that might be playing.
|
| 432 |
+
resetAllSpeakButtons();
|
| 433 |
+
|
| 434 |
+
currentPlayingButton = button;
|
| 435 |
+
button.setAttribute('data-state', 'loading');
|
| 436 |
+
button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
|
| 437 |
+
button.disabled = true;
|
| 438 |
+
|
| 439 |
+
try {
|
| 440 |
+
const response = await fetch('/tts', {
|
| 441 |
+
method: 'POST',
|
| 442 |
+
headers: { 'Content-Type': 'application/json' },
|
| 443 |
+
body: JSON.stringify({ text: text })
|
| 444 |
+
});
|
| 445 |
+
if (!response.ok) throw new Error('Failed to generate audio.');
|
| 446 |
+
|
| 447 |
+
const blob = await response.blob();
|
| 448 |
+
const audioUrl = URL.createObjectURL(blob);
|
| 449 |
+
currentAudio = new Audio(audioUrl);
|
| 450 |
+
currentAudio.play();
|
| 451 |
+
|
| 452 |
+
button.setAttribute('data-state', 'playing');
|
| 453 |
+
button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
|
| 454 |
+
|
| 455 |
+
currentAudio.onended = () => {
|
| 456 |
+
button.setAttribute('data-state', 'play');
|
| 457 |
+
button.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 458 |
+
currentAudio = null;
|
| 459 |
+
currentPlayingButton = null;
|
| 460 |
+
};
|
| 461 |
+
|
| 462 |
+
} catch (error) {
|
| 463 |
+
console.error('TTS Error:', error);
|
| 464 |
+
button.setAttribute('data-state', 'error');
|
| 465 |
+
button.innerHTML = `${playIconSVG} <span>Error</span>`;
|
| 466 |
+
alert('Failed to play audio. Please try again.');
|
| 467 |
+
resetAllSpeakButtons(); // Reset state on error
|
| 468 |
+
} finally {
|
| 469 |
+
button.disabled = false;
|
| 470 |
+
}
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
function resetAllSpeakButtons() {
|
| 474 |
+
document.querySelectorAll('.speak-btn').forEach(btn => {
|
| 475 |
+
btn.setAttribute('data-state', 'play');
|
| 476 |
+
btn.innerHTML = `${playIconSVG} <span>Play</span>`;
|
| 477 |
+
btn.disabled = false;
|
| 478 |
+
});
|
| 479 |
+
if (currentAudio) {
|
| 480 |
+
currentAudio.pause();
|
| 481 |
+
currentAudio = null;
|
| 482 |
+
}
|
| 483 |
+
currentPlayingButton = null;
|
| 484 |
+
}
|
| 485 |
+
// ============================ MODIFICATIONS END ==============================
|
| 486 |
+
});
|
| 487 |
+
</script>
|
| 488 |
+
</body>
|
| 489 |
+
</html><!DOCTYPE html>
|
| 490 |
+
<html lang="en">
|
| 491 |
+
<head>
|
| 492 |
+
<meta charset="UTF-8">
|
| 493 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 494 |
+
<title>CogniChat - Chat with your Documents</title>
|
| 495 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 496 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 497 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 498 |
+
<link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
| 499 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 500 |
+
<style>
|
| 501 |
+
:root {
|
| 502 |
+
--background: #f0f4f9;
|
| 503 |
+
--foreground: #1f1f1f;
|
| 504 |
+
--primary: #1a73e8;
|
| 505 |
+
--primary-hover: #1867cf;
|
| 506 |
+
--card: #ffffff;
|
| 507 |
+
--card-border: #dadce0;
|
| 508 |
+
--input-bg: #e8f0fe;
|
| 509 |
+
--user-bubble: #d9e7ff;
|
| 510 |
+
--bot-bubble: #f1f3f4;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
/* Dark mode styles */
|
| 514 |
+
.dark {
|
| 515 |
+
--background: #202124;
|
| 516 |
+
--foreground: #e8eaed;
|
| 517 |
+
--primary: #8ab4f8;
|
| 518 |
+
--primary-hover: #99bdfa;
|
| 519 |
+
--card: #303134;
|
| 520 |
+
--card-border: #5f6368;
|
| 521 |
+
--input-bg: #303134;
|
| 522 |
+
--user-bubble: #3c4043;
|
| 523 |
+
--bot-bubble: #3c4043;
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
body {
|
| 527 |
+
font-family: 'Google Sans', 'Roboto', sans-serif;
|
| 528 |
+
background-color: var(--background);
|
| 529 |
+
color: var(--foreground);
|
| 530 |
+
overflow: hidden;
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
#chat-window::-webkit-scrollbar { width: 8px; }
|
| 534 |
+
#chat-window::-webkit-scrollbar-track { background: transparent; }
|
| 535 |
+
#chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
|
| 536 |
+
.dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
|
| 537 |
+
|
| 538 |
+
.drop-zone--over {
|
| 539 |
+
border-color: var(--primary);
|
| 540 |
+
box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
/* Loading Spinner */
|
| 544 |
+
.loader {
|
| 545 |
+
width: 48px;
|
| 546 |
+
height: 48px;
|
| 547 |
+
border: 3px solid var(--card-border);
|
| 548 |
+
border-radius: 50%;
|
| 549 |
+
display: inline-block;
|
| 550 |
+
position: relative;
|
| 551 |
+
box-sizing: border-box;
|
| 552 |
+
animation: rotation 1s linear infinite;
|
| 553 |
+
}
|
| 554 |
+
.loader::after {
|
| 555 |
+
content: '';
|
| 556 |
+
box-sizing: border-box;
|
| 557 |
+
position: absolute;
|
| 558 |
+
left: 50%;
|
| 559 |
+
top: 50%;
|
| 560 |
+
transform: translate(-50%, -50%);
|
| 561 |
+
width: 56px;
|
| 562 |
+
height: 56px;
|
| 563 |
+
border-radius: 50%;
|
| 564 |
+
border: 3px solid;
|
| 565 |
+
border-color: var(--primary) transparent;
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
@keyframes rotation {
|
| 569 |
+
0% { transform: rotate(0deg); }
|
| 570 |
+
100% { transform: rotate(360deg); }
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
/* Typing Indicator Animation */
|
| 574 |
+
.typing-indicator span {
|
| 575 |
+
height: 10px;
|
| 576 |
+
width: 10px;
|
| 577 |
+
background-color: #9E9E9E;
|
| 578 |
+
border-radius: 50%;
|
| 579 |
+
display: inline-block;
|
| 580 |
+
animation: bounce 1.4s infinite ease-in-out both;
|
| 581 |
+
}
|
| 582 |
+
.typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
|
| 583 |
+
.typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
|
| 584 |
+
@keyframes bounce {
|
| 585 |
+
0%, 80%, 100% { transform: scale(0); }
|
| 586 |
+
40% { transform: scale(1.0); }
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
/* Markdown Styling */
|
| 590 |
+
.markdown-content p { margin-bottom: 0.75rem; line-height: 1.75; }
|
| 591 |
+
.markdown-content ul, .markdown-content ol { margin-left: 1.5rem; margin-bottom: 0.75rem; }
|
| 592 |
+
.markdown-content code { background-color: rgba(0,0,0,0.05); padding: 0.2rem 0.4rem; border-radius: 0.25rem; font-family: 'Roboto Mono', monospace; font-size: 0.9em; }
|
| 593 |
+
.dark .markdown-content code { background-color: rgba(255,255,255,0.1); }
|
| 594 |
+
.markdown-content pre { position: relative; background-color: #f8f9fa; border: 1px solid var(--card-border); border-radius: 0.5rem; margin-bottom: 1rem; }
|
| 595 |
+
.dark .markdown-content pre { background-color: #2e2f32; }
|
| 596 |
+
.markdown-content pre code { background: none; padding: 1rem; display: block; overflow-x: auto; }
|
| 597 |
+
.markdown-content pre .copy-code-btn { position: absolute; top: 0.5rem; right: 0.5rem; background-color: #e8eaed; border: 1px solid #dadce0; color: #5f6368; padding: 0.3rem 0.6rem; border-radius: 0.25rem; cursor: pointer; opacity: 0; transition: opacity 0.2s; font-size: 0.8em;}
|
| 598 |
+
.dark .markdown-content pre .copy-code-btn { background-color: #3c4043; border-color: #5f6368; color: #e8eaed; }
|
| 599 |
+
.markdown-content pre:hover .copy-code-btn { opacity: 1; }
|
| 600 |
+
|
| 601 |
+
/* Spinner for the TTS button */
|
| 602 |
+
.tts-button-loader {
|
| 603 |
+
width: 16px;
|
| 604 |
+
height: 16px;
|
| 605 |
+
border: 2px solid currentColor; /* Use button's text color */
|
| 606 |
+
border-radius: 50%;
|
| 607 |
+
display: inline-block;
|
| 608 |
+
box-sizing: border-box;
|
| 609 |
+
animation: rotation 0.8s linear infinite;
|
| 610 |
+
border-bottom-color: transparent; /* Makes it a half circle spinner */
|
| 611 |
+
}
|
| 612 |
+
</style>
|
| 613 |
+
</head>
|
| 614 |
+
<body class="w-screen h-screen dark">
|
| 615 |
+
<main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
|
| 616 |
+
<div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
|
| 617 |
+
<header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
|
| 618 |
+
<h1 class="text-xl font-medium">Chat with your Docs</h1>
|
| 619 |
+
<p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
|
| 620 |
+
</header>
|
| 621 |
+
<div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
|
| 622 |
+
<div id="chat-content" class="max-w-4xl mx-auto space-y-8">
|
| 623 |
+
</div>
|
| 624 |
+
</div>
|
| 625 |
+
<div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
|
| 626 |
+
<form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
|
| 627 |
+
<input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
|
| 628 |
+
<button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
|
| 629 |
+
<svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
|
| 630 |
+
</button>
|
| 631 |
+
</form>
|
| 632 |
+
</div>
|
| 633 |
+
</div>
|
| 634 |
+
|
| 635 |
+
<div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
|
| 636 |
+
<div class="text-center">
|
| 637 |
+
<h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
|
| 638 |
+
<div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
|
| 639 |
+
<input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
|
| 640 |
+
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
|
| 641 |
+
<p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
|
| 642 |
+
<p id="file-name" class="mt-2 text-xs text-gray-500"></p>
|
| 643 |
+
</div>
|
| 644 |
+
</div>
|
| 645 |
+
</div>
|
| 646 |
+
|
| 647 |
+
<div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
|
| 648 |
+
<div class="loader"></div>
|
| 649 |
+
<p id="loading-text" class="mt-6 text-sm font-medium"></p>
|
| 650 |
+
<p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
|
| 651 |
+
</div>
|
| 652 |
+
</main>
|
| 653 |
+
|
| 654 |
+
<script>
|
| 655 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 656 |
+
const uploadContainer = document.getElementById('upload-container');
|
| 657 |
+
const chatContainer = document.getElementById('chat-container');
|
| 658 |
+
const dropZone = document.getElementById('drop-zone');
|
| 659 |
+
const fileUploadInput = document.getElementById('file-upload');
|
| 660 |
+
const fileNameSpan = document.getElementById('file-name');
|
| 661 |
+
const loadingOverlay = document.getElementById('loading-overlay');
|
| 662 |
+
const loadingText = document.getElementById('loading-text');
|
| 663 |
+
const loadingSubtext = document.getElementById('loading-subtext');
|
| 664 |
+
|
| 665 |
+
const chatForm = document.getElementById('chat-form');
|
| 666 |
+
const chatInput = document.getElementById('chat-input');
|
| 667 |
+
const chatSubmitBtn = document.getElementById('chat-submit-btn');
|
| 668 |
+
const chatWindow = document.getElementById('chat-window');
|
| 669 |
+
const chatContent = document.getElementById('chat-content');
|
| 670 |
+
const chatFilename = document.getElementById('chat-filename');
|
| 671 |
+
|
| 672 |
+
// --- File Upload Logic ---
|
| 673 |
+
dropZone.addEventListener('click', () => fileUploadInput.click());
|
| 674 |
+
|
| 675 |
+
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
|
| 676 |
+
dropZone.addEventListener(eventName, preventDefaults, false);
|
| 677 |
+
document.body.addEventListener(eventName, preventDefaults, false);
|
| 678 |
+
});
|
| 679 |
+
|
| 680 |
+
['dragenter', 'dragover'].forEach(eventName => dropZone.classList.add('drop-zone--over'));
|
| 681 |
+
['dragleave', 'drop'].forEach(eventName => dropZone.classList.remove('drop-zone--over'));
|
| 682 |
+
|
| 683 |
+
dropZone.addEventListener('drop', (e) => {
|
| 684 |
+
const files = e.dataTransfer.files;
|
| 685 |
+
if (files.length > 0) handleFiles(files);
|
| 686 |
+
});
|
| 687 |
+
|
| 688 |
+
fileUploadInput.addEventListener('change', (e) => {
|
| 689 |
+
if (e.target.files.length > 0) handleFiles(e.target.files);
|
| 690 |
+
});
|
| 691 |
+
|
| 692 |
+
function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
|
| 693 |
+
|
| 694 |
+
async function handleFiles(files) {
|
| 695 |
+
const formData = new FormData();
|
| 696 |
+
let fileNames = [];
|
| 697 |
+
for (const file of files) {
|
| 698 |
+
formData.append('file', file);
|
| 699 |
+
fileNames.push(file.name);
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
|
| 703 |
+
await uploadAndProcessFiles(formData, fileNames);
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
async function uploadAndProcessFiles(formData, fileNames) {
|
| 707 |
+
loadingOverlay.classList.remove('hidden');
|
| 708 |
+
loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
|
| 709 |
+
loadingSubtext.textContent = "For large documents or OCR, setup may take a few minutes to build the knowledge base.";
|
| 710 |
+
|
| 711 |
+
try {
|
| 712 |
+
const response = await fetch('/upload', { method: 'POST', body: formData });
|
| 713 |
+
const result = await response.json();
|
| 714 |
+
|
| 715 |
+
if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
|
| 716 |
+
|
| 717 |
+
chatFilename.textContent = `Chatting with: ${result.filename}`;
|
| 718 |
+
uploadContainer.classList.add('hidden');
|
| 719 |
+
chatContainer.classList.remove('hidden');
|
| 720 |
+
appendMessage("I've analyzed your documents. What would you like to know?", "bot");
|
| 721 |
+
|
| 722 |
+
} catch (error) {
|
| 723 |
+
console.error('Upload error:', error);
|
| 724 |
+
alert(`Error: ${error.message}`);
|
| 725 |
+
} finally {
|
| 726 |
+
loadingOverlay.classList.add('hidden');
|
| 727 |
+
loadingSubtext.textContent = '';
|
| 728 |
+
fileNameSpan.textContent = '';
|
| 729 |
+
fileUploadInput.value = ''; // Reset file input
|
| 730 |
+
}
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
// --- Chat Logic ---
|
| 734 |
+
chatForm.addEventListener('submit', async (e) => {
|
| 735 |
+
e.preventDefault();
|
| 736 |
+
const question = chatInput.value.trim();
|
| 737 |
+
if (!question) return;
|
| 738 |
+
|
| 739 |
+
appendMessage(question, 'user');
|
| 740 |
+
chatInput.value = '';
|
| 741 |
+
chatInput.disabled = true;
|
| 742 |
+
chatSubmitBtn.disabled = true;
|
| 743 |
+
|
| 744 |
+
const typingIndicator = showTypingIndicator();
|
| 745 |
+
let botMessageContainer = null;
|
| 746 |
+
let contentDiv = null;
|
| 747 |
+
|
| 748 |
+
try {
|
| 749 |
+
const response = await fetch('/chat', {
|
| 750 |
+
method: 'POST',
|
| 751 |
+
headers: { 'Content-Type': 'application/json' },
|
| 752 |
+
body: JSON.stringify({ question: question }),
|
| 753 |
+
});
|
| 754 |
+
|
| 755 |
+
if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
|
| 756 |
+
|
| 757 |
+
typingIndicator.remove();
|
| 758 |
+
botMessageContainer = appendMessage('', 'bot');
|
| 759 |
+
contentDiv = botMessageContainer.querySelector('.markdown-content');
|
| 760 |
+
|
| 761 |
+
const reader = response.body.getReader();
|
| 762 |
+
const decoder = new TextDecoder();
|
| 763 |
+
let fullResponse = '';
|
| 764 |
+
|
| 765 |
+
while (true) {
|
| 766 |
+
const { value, done } = await reader.read();
|
| 767 |
+
if (done) break;
|
| 768 |
+
|
| 769 |
+
fullResponse += decoder.decode(value, { stream: true });
|
| 770 |
+
contentDiv.innerHTML = marked.parse(fullResponse);
|
| 771 |
+
scrollToBottom();
|
| 772 |
+
}
|
| 773 |
+
contentDiv.querySelectorAll('pre').forEach(addCopyButton);
|
| 774 |
+
|
| 775 |
+
addTextToSpeechControls(botMessageContainer, fullResponse);
|
| 776 |
+
|
| 777 |
+
} catch (error) {
|
| 778 |
+
console.error('Chat error:', error);
|
| 779 |
+
if (typingIndicator) typingIndicator.remove();
|
| 780 |
+
if (contentDiv) {
|
| 781 |
+
contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
|
| 782 |
+
} else {
|
| 783 |
+
appendMessage(`Error: ${error.message}`, 'bot');
|
| 784 |
+
}
|
| 785 |
+
} finally {
|
| 786 |
+
chatInput.disabled = false;
|
| 787 |
+
chatSubmitBtn.disabled = false;
|
| 788 |
+
chatInput.focus();
|
| 789 |
+
}
|
| 790 |
+
});
|
| 791 |
+
|
| 792 |
+
// --- UI Helper Functions ---
|
| 793 |
+
|
| 794 |
+
function appendMessage(text, sender) {
|
| 795 |
+
const messageWrapper = document.createElement('div');
|
| 796 |
+
messageWrapper.className = `flex items-start gap-4`;
|
| 797 |
+
|
| 798 |
+
const iconSVG = sender === 'user'
|
| 799 |
+
? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
|
| 800 |
+
: `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
|
| 801 |
+
|
| 802 |
+
const messageBubble = document.createElement('div');
|
| 803 |
+
messageBubble.className = `flex-1 pt-1`;
|
| 804 |
+
|
| 805 |
+
const senderName = document.createElement('p');
|
| 806 |
+
senderName.className = 'font-medium text-sm mb-1';
|
| 807 |
+
senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
|
| 808 |
+
|
| 809 |
+
const contentDiv = document.createElement('div');
|
| 810 |
+
contentDiv.className = 'text-base markdown-content';
|
| 811 |
+
contentDiv.innerHTML = marked.parse(text);
|
| 812 |
+
|
| 813 |
+
const controlsContainer = document.createElement('div');
|
| 814 |
+
controlsContainer.className = 'tts-controls mt-2';
|
| 815 |
+
|
| 816 |
+
messageBubble.appendChild(senderName);
|
| 817 |
+
messageBubble.appendChild(contentDiv);
|
| 818 |
+
messageBubble.appendChild(controlsContainer);
|
| 819 |
+
messageWrapper.innerHTML = iconSVG;
|
| 820 |
+
messageWrapper.appendChild(messageBubble);
|
| 821 |
+
|
| 822 |
+
chatContent.appendChild(messageWrapper);
|
| 823 |
+
scrollToBottom();
|
| 824 |
+
|
| 825 |
+
return messageBubble;
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
function showTypingIndicator() {
|
| 829 |
+
const indicatorWrapper = document.createElement('div');
|
| 830 |
+
indicatorWrapper.className = `flex items-start gap-4`;
|
| 831 |
+
indicatorWrapper.id = 'typing-indicator';
|
| 832 |
+
|
| 833 |
+
const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
|
| 834 |
+
|
| 835 |
+
const messageBubble = document.createElement('div');
|
| 836 |
+
messageBubble.className = 'flex-1 pt-1';
|
| 837 |
+
|
| 838 |
+
const senderName = document.createElement('p');
|
| 839 |
+
senderName.className = 'font-medium text-sm mb-1';
|
| 840 |
+
senderName.textContent = 'CogniChat is thinking...';
|
| 841 |
+
|
| 842 |
+
const indicator = document.createElement('div');
|
| 843 |
+
indicator.className = 'typing-indicator';
|
| 844 |
+
indicator.innerHTML = '<span></span><span></span><span></span>';
|
| 845 |
+
|
| 846 |
+
messageBubble.appendChild(senderName);
|
| 847 |
+
messageBubble.appendChild(indicator);
|
| 848 |
+
indicatorWrapper.innerHTML = iconSVG;
|
| 849 |
+
indicatorWrapper.appendChild(messageBubble);
|
| 850 |
+
|
| 851 |
+
chatContent.appendChild(indicatorWrapper);
|
| 852 |
+
scrollToBottom();
|
| 853 |
+
|
| 854 |
+
return indicatorWrapper;
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
function scrollToBottom() {
|
| 858 |
+
chatWindow.scrollTo({
|
| 859 |
+
top: chatWindow.scrollHeight,
|
| 860 |
+
behavior: 'smooth'
|
| 861 |
+
});
|
| 862 |
+
}
|
| 863 |
+
|
| 864 |
+
function addCopyButton(pre) {
|
| 865 |
+
const button = document.createElement('button');
|
| 866 |
+
button.className = 'copy-code-btn';
|
| 867 |
+
button.textContent = 'Copy';
|
| 868 |
+
pre.appendChild(button);
|
| 869 |
+
|
| 870 |
+
button.addEventListener('click', () => {
|
| 871 |
+
const code = pre.querySelector('code').innerText;
|
| 872 |
+
navigator.clipboard.writeText(code).then(() => {
|
| 873 |
+
button.textContent = 'Copied!';
|
| 874 |
+
setTimeout(() => button.textContent = 'Copy', 2000);
|
| 875 |
+
});
|
| 876 |
+
});
|
| 877 |
+
}
|
| 878 |
+
|
| 879 |
+
// ============================ MODIFICATIONS START ==============================
|
| 880 |
+
let currentAudio = null;
|
| 881 |
+
let currentPlayingButton = null;
|
| 882 |
+
|
| 883 |
+
const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
|
| 884 |
+
const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
|
| 885 |
+
|
| 886 |
+
function addTextToSpeechControls(messageBubble, text) {
|
| 887 |
+
const ttsControls = messageBubble.querySelector('.tts-controls');
|
| 888 |
+
if (text.trim().length > 0) {
|
| 889 |
+
const speakButton = document.createElement('button');
|
| 890 |
+
// --- STYLING CHANGE HERE: Brighter blue color for better visibility ---
|
| 891 |
+
speakButton.className = 'speak-btn px-3 py-1.5 bg-blue-600 text-white rounded-full text-sm font-medium hover:bg-blue-700 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
|
| 892 |
+
speakButton.title = 'Listen to this message';
|
| 893 |
+
// --- EMOJI ADDED ---
|
| 894 |
+
speakButton.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
|
| 895 |
+
ttsControls.appendChild(speakButton);
|
| 896 |
+
speakButton.addEventListener('click', () => handleTTS(text, speakButton));
|
| 897 |
+
}
|
| 898 |
+
}
|
| 899 |
+
|
| 900 |
+
// --- BUG FIX: Reworked the entire function for correct pause/resume/stop logic ---
|
| 901 |
+
async function handleTTS(text, button) {
|
| 902 |
+
// Case 1: The clicked button is already playing or paused.
|
| 903 |
+
if (button === currentPlayingButton) {
|
| 904 |
+
if (currentAudio && !currentAudio.paused) { // If playing, pause it.
|
| 905 |
+
currentAudio.pause();
|
| 906 |
+
button.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
|
| 907 |
+
} else if (currentAudio && currentAudio.paused) { // If paused, resume it.
|
| 908 |
+
currentAudio.play();
|
| 909 |
+
button.innerHTML = `🔊 ${pauseIconSVG} <span>Pause</span>`;
|
| 910 |
+
}
|
| 911 |
+
return;
|
| 912 |
+
}
|
| 913 |
+
|
| 914 |
+
// Case 2: A new button is clicked. Stop any other audio.
|
| 915 |
+
if (currentAudio) {
|
| 916 |
+
currentAudio.pause();
|
| 917 |
+
}
|
| 918 |
+
resetAllSpeakButtons();
|
| 919 |
+
|
| 920 |
+
currentPlayingButton = button;
|
| 921 |
+
button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
|
| 922 |
+
button.disabled = true;
|
| 923 |
+
|
| 924 |
+
try {
|
| 925 |
+
const response = await fetch('/tts', {
|
| 926 |
+
method: 'POST',
|
| 927 |
+
headers: { 'Content-Type': 'application/json' },
|
| 928 |
+
body: JSON.stringify({ text: text })
|
| 929 |
+
});
|
| 930 |
+
if (!response.ok) throw new Error('Failed to generate audio.');
|
| 931 |
+
|
| 932 |
+
const blob = await response.blob();
|
| 933 |
+
const audioUrl = URL.createObjectURL(blob);
|
| 934 |
+
currentAudio = new Audio(audioUrl);
|
| 935 |
+
|
| 936 |
+
currentAudio.play().catch(e => { throw e; });
|
| 937 |
+
button.innerHTML = `🔊 ${pauseIconSVG} <span>Pause</span>`;
|
| 938 |
+
|
| 939 |
+
currentAudio.onended = () => {
|
| 940 |
+
button.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
|
| 941 |
+
currentAudio = null;
|
| 942 |
+
currentPlayingButton = null;
|
| 943 |
+
};
|
| 944 |
+
|
| 945 |
+
currentAudio.onerror = (e) => {
|
| 946 |
+
console.error('Audio playback error:', e);
|
| 947 |
+
throw new Error('Could not play the generated audio.');
|
| 948 |
+
};
|
| 949 |
+
|
| 950 |
+
} catch (error) {
|
| 951 |
+
console.error('TTS Error:', error);
|
| 952 |
+
alert('Failed to play audio. Please try again.');
|
| 953 |
+
resetAllSpeakButtons(); // Reset state on error
|
| 954 |
+
} finally {
|
| 955 |
+
button.disabled = false;
|
| 956 |
+
}
|
| 957 |
+
}
|
| 958 |
+
|
| 959 |
+
function resetAllSpeakButtons() {
|
| 960 |
+
document.querySelectorAll('.speak-btn').forEach(btn => {
|
| 961 |
+
btn.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
|
| 962 |
+
btn.disabled = false;
|
| 963 |
+
});
|
| 964 |
+
if (currentAudio) {
|
| 965 |
+
currentAudio.pause();
|
| 966 |
+
currentAudio = null;
|
| 967 |
+
}
|
| 968 |
+
currentPlayingButton = null;
|
| 969 |
+
}
|
| 970 |
+
// ============================ MODIFICATIONS END ==============================
|
| 971 |
+
});
|
| 972 |
+
</script>
|
| 973 |
+
</body>
|
| 974 |
+
</html>
|
test_dependencies.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
print("CogniChat Dependencies & PDF Handling Test")
|
| 5 |
+
|
| 6 |
+
# Test imports
|
| 7 |
+
try:
|
| 8 |
+
print("\nTesting core imports...")
|
| 9 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 10 |
+
from langchain_community.retrievers import BM25Retriever
|
| 11 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 12 |
+
from langchain_core.documents import Document
|
| 13 |
+
print("Core LangChain imports successful!")
|
| 14 |
+
|
| 15 |
+
except ImportError as e:
|
| 16 |
+
print(f"Import error: {e}")
|
| 17 |
+
if "rank_bm25" in str(e):
|
| 18 |
+
print("Missing dependency: pip install rank-bm25==0.2.2")
|
| 19 |
+
sys.exit(1)
|
| 20 |
+
try:
|
| 21 |
+
print("\nTesting PDF loading capabilities...")
|
| 22 |
+
try:
|
| 23 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 24 |
+
print("PyPDFLoader available")
|
| 25 |
+
except ImportError:
|
| 26 |
+
print("PyPDFLoader not available")
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
import fitz
|
| 30 |
+
print("PyMuPDF (fitz) available - can handle corrupted PDFs")
|
| 31 |
+
except ImportError:
|
| 32 |
+
print("PyMuPDF (fitz) not available")
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
import pdfplumber
|
| 36 |
+
print("pdfplumber available - additional PDF parsing method")
|
| 37 |
+
except ImportError:
|
| 38 |
+
print("pdfplumber not available")
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Error testing PDF capabilities: {e}")
|
| 42 |
+
try:
|
| 43 |
+
print("\nTesting BM25 Retriever...")
|
| 44 |
+
|
| 45 |
+
test_docs = [
|
| 46 |
+
Document(page_content="This is the first test document about machine learning."),
|
| 47 |
+
Document(page_content="This is the second document discussing natural language processing."),
|
| 48 |
+
Document(page_content="The third document covers artificial intelligence topics."),
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
bm25_retriever = BM25Retriever.from_documents(test_docs)
|
| 52 |
+
bm25_retriever.k = 2
|
| 53 |
+
query = "machine learning"
|
| 54 |
+
results = bm25_retriever.get_relevant_documents(query)
|
| 55 |
+
print(f"BM25 retriever created and tested successfully!")
|
| 56 |
+
print(f"Retrieved {len(results)} documents for query: '{query}'")
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"✗ Error testing BM25 retriever: {e}")
|
| 60 |
+
import traceback
|
| 61 |
+
traceback.print_exc()
|
| 62 |
+
sys.exit(1)
|
| 63 |
+
|
| 64 |
+
print("\nAll tests completed successfully!")
|
| 65 |
+
print("\nThe application should now handle:")
|
| 66 |
+
print(" • Regular file uploads and processing")
|
| 67 |
+
print(" • Corrupted PDF files with multiple fallback methods")
|
| 68 |
+
print(" • BM25 and FAISS hybrid retrieval")
|
| 69 |
+
print(" • Proper error messages for failed file processing")
|
| 70 |
+
print("\nMake sure to install all dependencies with:")
|
| 71 |
+
print(" pip install -r requirements.txt")
|
| 72 |
+
|
| 73 |
+
print("\nKey Dependencies Added/Updated")
|
| 74 |
+
print(" • rank-bm25==0.2.2 (for BM25 retrieval)")
|
| 75 |
+
print(" • pymupdf==1.23.26 (PDF fallback method)")
|
| 76 |
+
print(" • pdfplumber==0.10.3 (additional PDF parsing)")
|
test_hf_spaces_session.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
BASE_URL = "https://huggingface.co/spaces/Zeri00/Cogni-chat-document-reader"
|
| 4 |
+
|
| 5 |
+
def test_endpoints():
|
| 6 |
+
"""Test the debug and session endpoints to understand the issue."""
|
| 7 |
+
|
| 8 |
+
print("CogniChat HF Spaces Diagnostic\n")
|
| 9 |
+
|
| 10 |
+
# Test 1: Check debug endpoint
|
| 11 |
+
print("1. Testing /debug endpoint...")
|
| 12 |
+
try:
|
| 13 |
+
response = requests.get(f"{BASE_URL}/debug")
|
| 14 |
+
if response.status_code == 200:
|
| 15 |
+
data = response.json()
|
| 16 |
+
print(" Debug endpoint working")
|
| 17 |
+
print(f" Environment: {data.get('environment')}")
|
| 18 |
+
print(f" GROQ API Key: {'Set' if data.get('groq_api_key_set') else 'NOT SET'}")
|
| 19 |
+
print(f" Sessions count: {data.get('sessions_count')}")
|
| 20 |
+
print(f" Upload folder: {data.get('upload_folder')}")
|
| 21 |
+
print(f" Upload folder writable: {data.get('upload_folder_writable')}")
|
| 22 |
+
print(f" Flask session ID: {data.get('flask_session_id')}")
|
| 23 |
+
print(f" Session keys: {data.get('flask_session_keys')}")
|
| 24 |
+
else:
|
| 25 |
+
print(f"Debug endpoint failed: {response.status_code}")
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"Error accessing debug endpoint: {e}")
|
| 28 |
+
|
| 29 |
+
print()
|
| 30 |
+
|
| 31 |
+
print("2. Testing /test-session endpoint...")
|
| 32 |
+
try:
|
| 33 |
+
session = requests.Session()
|
| 34 |
+
response = session.post(f"{BASE_URL}/test-session")
|
| 35 |
+
if response.status_code == 200:
|
| 36 |
+
data = response.json()
|
| 37 |
+
print("Session write working")
|
| 38 |
+
print(f"Test key: {data.get('test_key')}")
|
| 39 |
+
print(f"Session keys: {data.get('session_keys')}")
|
| 40 |
+
else:
|
| 41 |
+
print(f"Session write failed: {response.status_code}")
|
| 42 |
+
|
| 43 |
+
response = session.get(f"{BASE_URL}/test-session")
|
| 44 |
+
if response.status_code == 200:
|
| 45 |
+
data = response.json()
|
| 46 |
+
print("Session read working")
|
| 47 |
+
print(f" Test key persisted: {data.get('test_key')}")
|
| 48 |
+
print(f" Has session data: {data.get('has_session_data')}")
|
| 49 |
+
|
| 50 |
+
if not data.get('test_key'):
|
| 51 |
+
print("WARNING: Sessions are not persisting between requests!")
|
| 52 |
+
print(" This is likely the cause of the 400 chat error.")
|
| 53 |
+
else:
|
| 54 |
+
print(f"Session read failed: {response.status_code}")
|
| 55 |
+
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"Error testing sessions: {e}")
|
| 58 |
+
|
| 59 |
+
print()
|
| 60 |
+
|
| 61 |
+
# Test 3: Check if we can find any existing sessions
|
| 62 |
+
print("3. Checking for existing RAG sessions...")
|
| 63 |
+
try:
|
| 64 |
+
response = requests.get(f"{BASE_URL}/debug")
|
| 65 |
+
if response.status_code == 200:
|
| 66 |
+
data = response.json()
|
| 67 |
+
session_ids = data.get('session_ids', [])
|
| 68 |
+
if session_ids:
|
| 69 |
+
print(f"Found {len(session_ids)} existing RAG sessions")
|
| 70 |
+
print(f" Session IDs: {session_ids[:3]}{'...' if len(session_ids) > 3 else ''}")
|
| 71 |
+
else:
|
| 72 |
+
print("No RAG sessions found (normal if no documents were uploaded)")
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
print(f"Error checking RAG sessions: {e}")
|
| 76 |
+
|
| 77 |
+
print()
|
| 78 |
+
print("Diagnosis Complete")
|
| 79 |
+
print()
|
| 80 |
+
print("LIKELY ISSUE:")
|
| 81 |
+
print("If sessions are not persisting, this is a common issue in HF Spaces")
|
| 82 |
+
print("where Flask sessions don't work properly across requests.")
|
| 83 |
+
print()
|
| 84 |
+
print("SOLUTION:")
|
| 85 |
+
print("We need to modify the app to use a different session storage method")
|
| 86 |
+
print("or pass session ID through request body instead of Flask sessions.")
|
| 87 |
+
|
| 88 |
+
if __name__ == "__main__":
|
| 89 |
+
print("Before running this script:")
|
| 90 |
+
print("1. Update BASE_URL with your actual HF Spaces URL")
|
| 91 |
+
print("2. Make sure your Space is running")
|
| 92 |
+
print("3. Optionally upload a document first")
|
| 93 |
+
print()
|
| 94 |
+
|
| 95 |
+
print("Update the BASE_URL variable above and uncomment the test_endpoints() call")
|
test_upload_permissions.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify upload folder permissions and file operations.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
print("Upload Folder Permission Test")
|
| 10 |
+
|
| 11 |
+
# Detect environment
|
| 12 |
+
is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
|
| 13 |
+
print(f"Environment: {'Hugging Face Spaces' if is_hf_spaces else 'Local Development'}")
|
| 14 |
+
|
| 15 |
+
# Test different upload folder configurations
|
| 16 |
+
test_folders = [
|
| 17 |
+
'uploads', # Relative path (will fail in HF Spaces)
|
| 18 |
+
'./uploads', # Current directory relative path
|
| 19 |
+
'/tmp/uploads', # Recommended for HF Spaces
|
| 20 |
+
'/tmp/cognichat_uploads' # Alternative temp location
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
print("\nTesting Upload Folder Options")
|
| 24 |
+
|
| 25 |
+
for folder in test_folders:
|
| 26 |
+
print(f"\nTesting: {folder}")
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
# Try to create the directory
|
| 30 |
+
os.makedirs(folder, exist_ok=True)
|
| 31 |
+
print(f"Directory created/exists")
|
| 32 |
+
|
| 33 |
+
# Test write permissions
|
| 34 |
+
test_file = os.path.join(folder, 'test_write.txt')
|
| 35 |
+
with open(test_file, 'w') as f:
|
| 36 |
+
f.write('test content')
|
| 37 |
+
print(f"Write permission verified")
|
| 38 |
+
|
| 39 |
+
# Test read permissions
|
| 40 |
+
with open(test_file, 'r') as f:
|
| 41 |
+
content = f.read()
|
| 42 |
+
print(f"Read permission verified")
|
| 43 |
+
|
| 44 |
+
# Clean up test file
|
| 45 |
+
os.remove(test_file)
|
| 46 |
+
print(f" File deletion works")
|
| 47 |
+
|
| 48 |
+
# Get absolute path
|
| 49 |
+
abs_path = os.path.abspath(folder)
|
| 50 |
+
print(f"Full path: {abs_path}")
|
| 51 |
+
print(f"Writable: {os.access(folder, os.W_OK)}")
|
| 52 |
+
|
| 53 |
+
except PermissionError as e:
|
| 54 |
+
print(f"Permission denied: {e}")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"Error: {e}")
|
| 57 |
+
|
| 58 |
+
# Recommended configuration
|
| 59 |
+
print(f"\nRecommended Configuration")
|
| 60 |
+
if is_hf_spaces:
|
| 61 |
+
recommended_folder = '/tmp/uploads'
|
| 62 |
+
print(f"For Hugging Face Spaces: {recommended_folder}")
|
| 63 |
+
else:
|
| 64 |
+
recommended_folder = 'uploads'
|
| 65 |
+
print(f"For local development: {recommended_folder}")
|
| 66 |
+
|
| 67 |
+
print(f"\nUse this in your Flask app:")
|
| 68 |
+
print(f"app.config['UPLOAD_FOLDER'] = '{recommended_folder}'")
|
| 69 |
+
|
| 70 |
+
# Test the current working directory permissions
|
| 71 |
+
print(f"\nCurrent Directory Info")
|
| 72 |
+
cwd = os.getcwd()
|
| 73 |
+
print(f"Current working directory: {cwd}")
|
| 74 |
+
print(f"CWD is writable: {os.access(cwd, os.W_OK)}")
|
| 75 |
+
print(f"\nPath Environment Variables")
|
| 76 |
+
path_vars = ['HOME', 'TMPDIR', 'TEMP', 'TMP', 'SPACE_ID', 'SPACES_ZERO_GPU']
|
| 77 |
+
for var in path_vars:
|
| 78 |
+
value = os.getenv(var)
|
| 79 |
+
if value:
|
| 80 |
+
print(f"{var}: {value}")
|
| 81 |
+
|
| 82 |
+
print(f"\nTest Complete")
|
verify_hf_spaces_ready.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Verification script to check if CogniChat is ready for HuggingFace Spaces deployment.
|
| 4 |
+
Run this before deploying to catch any configuration issues.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
def print_header(text):
|
| 12 |
+
"""Print a formatted header."""
|
| 13 |
+
print(f"\n{'='*60}")
|
| 14 |
+
print(f" {text}")
|
| 15 |
+
print(f"{'='*60}")
|
| 16 |
+
|
| 17 |
+
def print_check(condition, message):
|
| 18 |
+
"""Print a check result."""
|
| 19 |
+
status = "PASS" if condition else "FAIL"
|
| 20 |
+
print(f"{status}: {message}")
|
| 21 |
+
return condition
|
| 22 |
+
|
| 23 |
+
def verify_files_exist():
|
| 24 |
+
"""Verify all required files exist."""
|
| 25 |
+
print_header("1. Checking Required Files")
|
| 26 |
+
|
| 27 |
+
required_files = [
|
| 28 |
+
'app.py',
|
| 29 |
+
'rag_processor.py',
|
| 30 |
+
'Dockerfile',
|
| 31 |
+
'requirements.txt',
|
| 32 |
+
'templates/index.html'
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
all_exist = True
|
| 36 |
+
for file in required_files:
|
| 37 |
+
exists = Path(file).exists()
|
| 38 |
+
all_exist = all_exist and print_check(exists, f"File exists: {file}")
|
| 39 |
+
|
| 40 |
+
return all_exist
|
| 41 |
+
|
| 42 |
+
def verify_upload_folder_config():
|
| 43 |
+
"""Verify upload folder configuration in app.py."""
|
| 44 |
+
print_header("2. Checking Upload Folder Configuration")
|
| 45 |
+
|
| 46 |
+
with open('app.py', 'r') as f:
|
| 47 |
+
app_content = f.read()
|
| 48 |
+
|
| 49 |
+
checks = [
|
| 50 |
+
('SPACE_ID' in app_content, "Detects SPACE_ID environment variable"),
|
| 51 |
+
("'/tmp/uploads'" in app_content, "Uses /tmp/uploads for HF Spaces"),
|
| 52 |
+
('is_hf_spaces' in app_content, "Has HF Spaces detection logic"),
|
| 53 |
+
('os.makedirs(app.config' in app_content, "Creates upload directory"),
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
all_pass = True
|
| 57 |
+
for condition, message in checks:
|
| 58 |
+
all_pass = all_pass and print_check(condition, message)
|
| 59 |
+
|
| 60 |
+
return all_pass
|
| 61 |
+
|
| 62 |
+
def verify_session_management():
|
| 63 |
+
"""Verify session management implementation."""
|
| 64 |
+
print_header("3. Checking Session Management")
|
| 65 |
+
|
| 66 |
+
# Check backend
|
| 67 |
+
with open('app.py', 'r') as f:
|
| 68 |
+
app_content = f.read()
|
| 69 |
+
|
| 70 |
+
backend_checks = [
|
| 71 |
+
("session['session_id']" in app_content, "Stores session_id in Flask session"),
|
| 72 |
+
("'session_id': session_id" in app_content, "Returns session_id in upload response"),
|
| 73 |
+
("data.get('session_id')" in app_content, "Accepts session_id from request body"),
|
| 74 |
+
]
|
| 75 |
+
|
| 76 |
+
# Check frontend
|
| 77 |
+
with open('templates/index.html', 'r') as f:
|
| 78 |
+
html_content = f.read()
|
| 79 |
+
|
| 80 |
+
frontend_checks = [
|
| 81 |
+
('sessionStorage.setItem' in html_content, "Frontend stores session_id"),
|
| 82 |
+
('sessionStorage.getItem' in html_content, "Frontend retrieves session_id"),
|
| 83 |
+
('requestBody.session_id' in html_content, "Frontend sends session_id in chat"),
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
all_pass = True
|
| 87 |
+
print(" Backend Implementation:")
|
| 88 |
+
for condition, message in backend_checks:
|
| 89 |
+
all_pass = all_pass and print_check(condition, f" {message}")
|
| 90 |
+
|
| 91 |
+
print("\n Frontend Implementation:")
|
| 92 |
+
for condition, message in frontend_checks:
|
| 93 |
+
all_pass = all_pass and print_check(condition, f" {message}")
|
| 94 |
+
|
| 95 |
+
return all_pass
|
| 96 |
+
|
| 97 |
+
def verify_error_handling():
|
| 98 |
+
"""Verify robust error handling."""
|
| 99 |
+
print_header("4. Checking Error Handling")
|
| 100 |
+
|
| 101 |
+
with open('app.py', 'r') as f:
|
| 102 |
+
app_content = f.read()
|
| 103 |
+
|
| 104 |
+
checks = [
|
| 105 |
+
('try:' in app_content and 'except' in app_content, "Has try/except blocks"),
|
| 106 |
+
('load_pdf_with_fallback' in app_content, "Has PDF fallback loading"),
|
| 107 |
+
('failed_files' in app_content, "Tracks failed file uploads"),
|
| 108 |
+
('fallback_dir' in app_content, "Has cache directory fallbacks"),
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
all_pass = True
|
| 112 |
+
for condition, message in checks:
|
| 113 |
+
all_pass = all_pass and print_check(condition, message)
|
| 114 |
+
|
| 115 |
+
return all_pass
|
| 116 |
+
|
| 117 |
+
def verify_environment_variables():
|
| 118 |
+
"""Check for environment variable handling."""
|
| 119 |
+
print_header("5. Checking Environment Variables")
|
| 120 |
+
|
| 121 |
+
with open('app.py', 'r') as f:
|
| 122 |
+
app_content = f.read()
|
| 123 |
+
|
| 124 |
+
with open('rag_processor.py', 'r') as f:
|
| 125 |
+
rag_content = f.read()
|
| 126 |
+
|
| 127 |
+
checks = [
|
| 128 |
+
('GROQ_API_KEY' in rag_content, "RAG processor checks GROQ_API_KEY"),
|
| 129 |
+
('SPACE_ID' in app_content, "App detects HF Spaces environment"),
|
| 130 |
+
('HF_HOME' in app_content, "Sets HuggingFace cache paths"),
|
| 131 |
+
('load_dotenv()' in rag_content, "Loads .env file for local dev"),
|
| 132 |
+
]
|
| 133 |
+
|
| 134 |
+
all_pass = True
|
| 135 |
+
for condition, message in checks:
|
| 136 |
+
all_pass = all_pass and print_check(condition, message)
|
| 137 |
+
|
| 138 |
+
# Check if .env.example exists
|
| 139 |
+
all_pass = all_pass and print_check(
|
| 140 |
+
Path('.env.example').exists() or Path('README.md').exists(),
|
| 141 |
+
"Has documentation for environment variables"
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
return all_pass
|
| 145 |
+
|
| 146 |
+
def verify_dockerfile():
|
| 147 |
+
"""Verify Dockerfile configuration."""
|
| 148 |
+
print_header("6. Checking Dockerfile")
|
| 149 |
+
|
| 150 |
+
if not Path('Dockerfile').exists():
|
| 151 |
+
print_check(False, "Dockerfile exists")
|
| 152 |
+
return False
|
| 153 |
+
|
| 154 |
+
with open('Dockerfile', 'r') as f:
|
| 155 |
+
dockerfile_content = f.read()
|
| 156 |
+
|
| 157 |
+
checks = [
|
| 158 |
+
('FROM python' in dockerfile_content, "Uses Python base image"),
|
| 159 |
+
('WORKDIR /app' in dockerfile_content, "Sets working directory"),
|
| 160 |
+
('EXPOSE 7860' in dockerfile_content, "Exposes port 7860 (HF requirement)"),
|
| 161 |
+
('appuser' in dockerfile_content, "Runs as non-root user"),
|
| 162 |
+
('CMD' in dockerfile_content or 'ENTRYPOINT' in dockerfile_content, "Has startup command"),
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
all_pass = True
|
| 166 |
+
for condition, message in checks:
|
| 167 |
+
all_pass = all_pass and print_check(condition, message)
|
| 168 |
+
|
| 169 |
+
return all_pass
|
| 170 |
+
|
| 171 |
+
def verify_requirements():
|
| 172 |
+
"""Verify requirements.txt has all dependencies."""
|
| 173 |
+
print_header("7. Checking Dependencies")
|
| 174 |
+
|
| 175 |
+
with open('requirements.txt', 'r') as f:
|
| 176 |
+
requirements = f.read().lower()
|
| 177 |
+
|
| 178 |
+
critical_deps = [
|
| 179 |
+
'flask',
|
| 180 |
+
'langchain',
|
| 181 |
+
'groq',
|
| 182 |
+
'faiss',
|
| 183 |
+
'sentence-transformers',
|
| 184 |
+
'pypdf',
|
| 185 |
+
'gtts',
|
| 186 |
+
'rank-bm25'
|
| 187 |
+
]
|
| 188 |
+
|
| 189 |
+
all_pass = True
|
| 190 |
+
for dep in critical_deps:
|
| 191 |
+
found = dep in requirements
|
| 192 |
+
all_pass = all_pass and print_check(found, f"Has dependency: {dep}")
|
| 193 |
+
|
| 194 |
+
return all_pass
|
| 195 |
+
|
| 196 |
+
def verify_no_duplicates():
|
| 197 |
+
"""Check for duplicate content in index.html."""
|
| 198 |
+
print_header("8. Checking for Duplicates")
|
| 199 |
+
|
| 200 |
+
with open('templates/index.html', 'r') as f:
|
| 201 |
+
html_content = f.read()
|
| 202 |
+
|
| 203 |
+
# Count DOCTYPE declarations
|
| 204 |
+
doctype_count = html_content.count('<!DOCTYPE html>')
|
| 205 |
+
checks = [
|
| 206 |
+
(doctype_count == 1, f"Single HTML document (found {doctype_count} DOCTYPE declarations)"),
|
| 207 |
+
(html_content.count('</html>') == 1, "Single closing </html> tag"),
|
| 208 |
+
(html_content.count('uploadAndProcessFiles') <= 2, "No duplicate JavaScript functions"),
|
| 209 |
+
]
|
| 210 |
+
|
| 211 |
+
all_pass = True
|
| 212 |
+
for condition, message in checks:
|
| 213 |
+
all_pass = all_pass and print_check(condition, message)
|
| 214 |
+
|
| 215 |
+
return all_pass
|
| 216 |
+
|
| 217 |
+
def main():
|
| 218 |
+
"""Run all verification checks."""
|
| 219 |
+
print("\n" + "="*60)
|
| 220 |
+
print(" HuggingFace Spaces Readiness Check for CogniChat")
|
| 221 |
+
print("="*60)
|
| 222 |
+
|
| 223 |
+
checks = [
|
| 224 |
+
verify_files_exist(),
|
| 225 |
+
verify_upload_folder_config(),
|
| 226 |
+
verify_session_management(),
|
| 227 |
+
verify_error_handling(),
|
| 228 |
+
verify_environment_variables(),
|
| 229 |
+
verify_dockerfile(),
|
| 230 |
+
verify_requirements(),
|
| 231 |
+
verify_no_duplicates(),
|
| 232 |
+
]
|
| 233 |
+
|
| 234 |
+
print_header("Summary")
|
| 235 |
+
|
| 236 |
+
passed = sum(checks)
|
| 237 |
+
total = len(checks)
|
| 238 |
+
|
| 239 |
+
if all(checks):
|
| 240 |
+
print(f"\n✅ ALL CHECKS PASSED ({passed}/{total})")
|
| 241 |
+
print("\n🚀 Your application is ready for HuggingFace Spaces deployment!")
|
| 242 |
+
print("\nNext steps:")
|
| 243 |
+
print("1. Go to https://huggingface.co/new-space")
|
| 244 |
+
print("2. Select 'Docker' as SDK")
|
| 245 |
+
print("3. Upload all project files")
|
| 246 |
+
print("4. Set GROQ_API_KEY in Space secrets")
|
| 247 |
+
print("5. Wait for build to complete")
|
| 248 |
+
return 0
|
| 249 |
+
else:
|
| 250 |
+
print(f"\n SOME CHECKS FAILED ({total - passed}/{total} issues)")
|
| 251 |
+
print("\n Please fix the issues above before deploying.")
|
| 252 |
+
print("\nFor detailed guidance, see:")
|
| 253 |
+
print("- HF_SPACES_FILE_STORAGE_GUIDE.md")
|
| 254 |
+
print("- DEPLOYMENT.md")
|
| 255 |
+
print("- HF_SPACES_CHECKLIST.md")
|
| 256 |
+
return 1
|
| 257 |
+
|
| 258 |
+
if __name__ == '__main__':
|
| 259 |
+
exit_code = main()
|
| 260 |
+
sys.exit(exit_code)
|