riteshraut commited on
Commit
becc8f7
·
1 Parent(s): 6579ca3

fix/new update

Browse files
- Copy.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
- Copy.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ /uploads/
3
+ /vectorstores/
4
+ /.cache/
5
+ __pycache__/
6
+ *.pyc
.env - Copy.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Copy this file to .env and fill in your API key
2
+ GROQ_API_KEY=your_groq_api_key_here
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Copy this file to .env and fill in your API key
2
+ GROQ_API_KEY=your_groq_api_key_here
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ /uploads/
3
+ /vectorstores/
4
+ /.cache/
5
+ __pycache__/
6
+ *.pyc
Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ curl \
9
+ git \
10
+ libfaiss-dev \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Create a non-root user early
14
+ RUN useradd --create-home --shell /bin/bash --uid 1000 appuser
15
+
16
+ # Copy and install Python requirements as root first
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir --upgrade pip
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Set environment variables for HuggingFace models and cache
22
+ ENV HF_HOME=/home/appuser/.cache/huggingface
23
+ ENV HF_HUB_CACHE=/home/appuser/.cache/huggingface/hub
24
+ ENV TRANSFORMERS_CACHE=/home/appuser/.cache/transformers
25
+ ENV SENTENCE_TRANSFORMERS_HOME=/home/appuser/.cache/sentence_transformers
26
+
27
+ # Create cache directories in the user's home directory
28
+ RUN mkdir -p /home/appuser/.cache/huggingface/hub \
29
+ /home/appuser/.cache/transformers \
30
+ /home/appuser/.cache/sentence_transformers \
31
+ /app/uploads && \
32
+ chown -R appuser:appuser /home/appuser/.cache /app && \
33
+ chmod -R 755 /home/appuser/.cache /app
34
+
35
+ # Copy application code and set ownership
36
+ COPY --chown=appuser:appuser . .
37
+
38
+ # Switch to non-root user
39
+ USER appuser
40
+
41
+ # Expose port 7860
42
+ EXPOSE 7860
43
+
44
+ # Run the application
45
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,566 @@
1
  ---
2
- title: Cogni Chat Document Reader V2
3
- emoji: 📚
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: Rag based document chatbot
10
  ---
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: CogniChat - Chat with Your Documents
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ app_port: 7860
10
  ---
11
+ # 🤖 CogniChat - Intelligent Document Chat System
12
 
13
+ <div align="center">
14
+
15
+ ![License](https://img.shields.io/badge/license-MIT-blue.svg)
16
+ ![Python](https://img.shields.io/badge/python-3.9+-brightgreen.svg)
17
+ ![Docker](https://img.shields.io/badge/docker-ready-blue.svg)
18
+ ![HuggingFace](https://img.shields.io/badge/🤗-Spaces-yellow.svg)
19
+
20
+ **Transform your documents into interactive conversations powered by advanced RAG technology**
21
+
22
+ <p align="center">
23
+ <img src="Document_reader.gif" width="100%" alt="CogniChat Demo">
24
+ </p>
25
+
26
+ [Features](#-features) • [Quick Start](#-quick-start) • [Architecture](#-architecture) • [Deployment](#-deployment) • [API](#-api-reference)
27
+
28
+ </div>
29
+
30
+ ---
31
+
32
+ ## 📋 Table of Contents
33
+
34
+ - [Overview](#-overview)
35
+ - [Features](#-features)
36
+ - [Architecture](#-architecture)
37
+ - [Technology Stack](#-technology-stack)
38
+ - [Quick Start](#-quick-start)
39
+ - [Deployment](#-deployment)
40
+ - [Configuration](#-configuration)
41
+ - [API Reference](#-api-reference)
42
+ - [Troubleshooting](#-troubleshooting)
43
+ - [Contributing](#-contributing)
44
+ - [License](#-license)
45
+
46
+ ---
47
+
48
+ ## 🎯 Overview
49
+
50
+ CogniChat is a production-ready, intelligent document chat application that leverages **Retrieval Augmented Generation (RAG)** to enable natural conversations with your documents. Built with enterprise-grade technologies, it provides accurate, context-aware responses from your document corpus.
51
+
52
+ ### Why CogniChat?
53
+
54
+
55
+ - **🔉 Audio Overview of Your document**:Simply ask the question and listen the audio. Now your document can speak with you.
56
+ - **🎯 Accurate Retrieval**: Hybrid search combining BM25 and FAISS for optimal results
57
+ - **💬 Conversational Memory**: Maintains context across multiple interactions
58
+ - **📄 Multi-Format Support**: Handles PDF, DOCX, TXT, and image files
59
+ - **🚀 Production Ready**: Docker support, comprehensive error handling, and security best practices
60
+ - **🎨 Modern UI**: Responsive design with dark mode and real-time streaming
61
+
62
+ ---
63
+
64
+ ## ✨ Features
65
+
66
+ ### Core Capabilities
67
+
68
+ | Feature | Description |
69
+ |---------|-------------|
70
+ | **Multi-Format Processing** | Upload and process PDF, DOCX, TXT, and image files |
71
+ | **Hybrid Search** | Combines BM25 (keyword) and FAISS (semantic) for superior retrieval |
72
+ | **Conversational AI** | Powered by Groq's Llama 3.1 for intelligent responses |
73
+ | **Memory Management** | Maintains chat history for contextual conversations |
74
+ | **Text-to-Speech** | Built-in TTS for audio playback of responses |
75
+ | **Streaming Responses** | Real-time token streaming for better UX |
76
+ | **Document Chunking** | Intelligent text splitting for optimal context windows |
77
+
78
+ ### Advanced Features
79
+
80
+ - **Semantic Embeddings**: HuggingFace `all-miniLM-L6-v2` for accurate vector representations
81
+ - **Reranking**: Contextual compression for improved relevance
82
+ - **Error Handling**: Comprehensive fallback mechanisms and error recovery
83
+ - **Security**: Non-root Docker execution and environment-based secrets
84
+ - **Scalability**: Optimized for both local and cloud deployments
85
+
86
+ ---
87
+
88
+ ## 🏗 Architecture
89
+
90
+ ### RAG Pipeline Overview
91
+
92
+ ```mermaid
93
+ graph TB
94
+ A[Document Upload] --> B[Document Processing]
95
+ B --> C[Text Extraction]
96
+ C --> D[Chunking Strategy]
97
+ D --> E[Embedding Generation]
98
+ E --> F[Vector Store FAISS]
99
+
100
+ G[User Query] --> H[Query Embedding]
101
+ H --> I[Hybrid Retrieval]
102
+
103
+ F --> I
104
+ J[BM25 Index] --> I
105
+
106
+ I --> K[Reranking]
107
+ K --> L[Context Assembly]
108
+ L --> M[LLM Groq Llama 3.1]
109
+ M --> N[Response Generation]
110
+ N --> O[Streaming Output]
111
+
112
+ P[Chat History] --> M
113
+ N --> P
114
+
115
+ style A fill:#e1f5ff
116
+ style G fill:#e1f5ff
117
+ style F fill:#ffe1f5
118
+ style J fill:#ffe1f5
119
+ style M fill:#f5e1ff
120
+ style O fill:#e1ffe1
121
+ ```
122
+
123
+ ### System Architecture
124
+
125
+ ```mermaid
126
+ graph LR
127
+ A[Client Browser] -->|HTTP/WebSocket| B[Flask Server]
128
+ B --> C[Document Processor]
129
+ B --> D[RAG Engine]
130
+ B --> E[TTS Service]
131
+
132
+ C --> F[(File Storage)]
133
+ D --> G[(FAISS Vector DB)]
134
+ D --> H[(BM25 Index)]
135
+ D --> I[Groq API]
136
+
137
+ J[HuggingFace Models] --> D
138
+
139
+ style B fill:#4a90e2
140
+ style D fill:#e24a90
141
+ style I fill:#90e24a
142
+ ```
143
+
144
+ ### Data Flow
145
+
146
+ 1. **Document Ingestion**: Files are uploaded and validated
147
+ 2. **Processing Pipeline**: Text extraction → Chunking → Embedding
148
+ 3. **Indexing**: Dual indexing (FAISS + BM25) for hybrid search
149
+ 4. **Query Processing**: User queries are embedded and searched
150
+ 5. **Retrieval**: Top-k relevant chunks retrieved using hybrid approach
151
+ 6. **Generation**: LLM generates contextual responses with citations
152
+ 7. **Streaming**: Responses streamed back to client in real-time
153
+
154
+ ---
155
+
156
+ ## 🛠 Technology Stack
157
+
158
+ ### Backend
159
+
160
+ | Component | Technology | Purpose |
161
+ |-----------|-----------|---------|
162
+ | **Framework** | Flask 2.3+ | Web application framework |
163
+ | **RAG** | LangChain | RAG pipeline orchestration |
164
+ | **Vector DB** | FAISS | Fast similarity search |
165
+ | **Keyword Search** | BM25 | Sparse retrieval |
166
+ | **LLM** | Groq Llama 3.1 | Response generation |
167
+ | **Embeddings** | HuggingFace Transformers | Semantic embeddings |
168
+ | **Doc Processing** | Unstructured, PyPDF, python-docx | Multi-format parsing |
169
+
170
+ ### Frontend
171
+
172
+ | Component | Technology |
173
+ |-----------|-----------|
174
+ | **UI Framework** | TailwindCSS |
175
+ | **JavaScript** | Vanilla ES6+ |
176
+ | **Icons** | Font Awesome |
177
+ | **Markdown** | Marked.js |
178
+
179
+ ### Infrastructure
180
+
181
+ - **Containerization**: Docker + Docker Compose
182
+ - **Deployment**: HuggingFace Spaces, local, cloud-agnostic
183
+ - **Security**: Environment-based secrets, non-root execution
184
+
185
+ ---
186
+
187
+ ## 🚀 Quick Start
188
+
189
+ ### Prerequisites
190
+
191
+ - Python 3.9+
192
+ - Docker (optional, recommended)
193
+ - Groq API Key ([Get one here](https://console.groq.com/keys))
194
+
195
+ ### Installation Methods
196
+
197
+ #### 🐳 Method 1: Docker (Recommended)
198
+
199
+ ```bash
200
+ # Clone the repository
201
+ git clone https://github.com/RautRitesh/Chat-with-docs
202
+ cd cognichat
203
+
204
+ # Create environment file
205
+ cp .env.example .env
206
+
207
+ # Add your Groq API key to .env
208
+ echo "GROQ_API_KEY=your_actual_api_key_here" >> .env
209
+
210
+ # Build and run with Docker Compose
211
+ docker-compose up -d
212
+
213
+ # Or build manually
214
+ docker build -t cognichat .
215
+ docker run -p 7860:7860 --env-file .env cognichat
216
+ ```
217
+
218
+ #### 🐍 Method 2: Local Python Environment
219
+
220
+ ```bash
221
+ # Clone the repository
222
+ git clone https://github.com/RautRitesh/Chat-with-docs
223
+ cd cognichat
224
+
225
+ # Create virtual environment
226
+ python -m venv venv
227
+ source venv/bin/activate # On Windows: venv\Scripts\activate
228
+
229
+ # Install dependencies
230
+ pip install -r requirements.txt
231
+
232
+ # Set environment variables
233
+ export GROQ_API_KEY=your_actual_api_key_here
234
+
235
+ # Run the application
236
+ python app.py
237
+ ```
238
+
239
+ #### 🤗 Method 3: HuggingFace Spaces
240
+
241
+ 1. Fork this repository
242
+ 2. Create a new Space on [HuggingFace](https://huggingface.co/spaces)
243
+ 3. Link your forked repository
244
+ 4. Add `GROQ_API_KEY` in Settings → Repository Secrets
245
+ 5. Space will auto-deploy!
246
+
247
+ ### First Steps
248
+
249
+ 1. Open `http://localhost:7860` in your browser
250
+ 2. Upload a document (PDF, DOCX, TXT, or image)
251
+ 3. Wait for processing (progress indicator will show status)
252
+ 4. Start chatting with your document!
253
+ 5. Use the 🔊 button to hear responses via TTS
254
+
255
+ ---
256
+
257
+ ## 📦 Deployment
258
+
259
+ ### Environment Variables
260
+
261
+ Create a `.env` file with the following variables:
262
+
263
+ ```bash
264
+ # Required
265
+ GROQ_API_KEY=your_groq_api_key_here
266
+
267
+ # Optional
268
+ PORT=7860
269
+ HF_HOME=/tmp/huggingface_cache # For HF Spaces
270
+ FLASK_DEBUG=0 # Set to 1 for development
271
+ MAX_UPLOAD_SIZE=10485760 # 10MB default
272
+ ```
273
+
274
+ ### Docker Deployment
275
+
276
+ ```bash
277
+ # Production build
278
+ docker build -t cognichat:latest .
279
+
280
+ # Run with resource limits
281
+ docker run -d \
282
+ --name cognichat \
283
+ -p 7860:7860 \
284
+ --env-file .env \
285
+ --memory="2g" \
286
+ --cpus="1.5" \
287
+ cognichat:latest
288
+ ```
289
+
290
+ ### Docker Compose
291
+
292
+ ```yaml
293
+ version: '3.8'
294
+
295
+ services:
296
+ cognichat:
297
+ build: .
298
+ ports:
299
+ - "7860:7860"
300
+ environment:
301
+ - GROQ_API_KEY=${GROQ_API_KEY}
302
+ volumes:
303
+ - ./data:/app/data
304
+ restart: unless-stopped
305
+ ```
306
+
307
+ ### HuggingFace Spaces Configuration
308
+
309
+ Add these files to your repository:
310
+
311
+ **app_port** in `README.md` header:
312
+ ```yaml
313
+ app_port: 7860
314
+ ```
315
+
316
+ **Repository Secrets**:
317
+ - `GROQ_API_KEY`: Your Groq API key
318
+
319
+ The application automatically detects HF Spaces environment and adjusts paths accordingly.
320
+
321
+ ---
322
+
323
+ ## ⚙️ Configuration
324
+
325
+ ### Document Processing Settings
326
+
327
+ ```python
328
+ # In app.py - Customize these settings
329
+ CHUNK_SIZE = 1000 # Characters per chunk
330
+ CHUNK_OVERLAP = 200 # Overlap between chunks
331
+ EMBEDDING_MODEL = "sentence-transformers/all-miniLM-L6-v2"
332
+ RETRIEVER_K = 5 # Number of chunks to retrieve
333
+ ```
334
+
335
+ ### Model Configuration
336
+
337
+ ```python
338
+ # LLM Settings
339
+ LLM_PROVIDER = "groq"
340
+ MODEL_NAME = "llama-3.1-70b-versatile"
341
+ TEMPERATURE = 0.7
342
+ MAX_TOKENS = 2048
343
+ ```
344
+
345
+ ### Search Configuration
346
+
347
+ ```python
348
+ # Hybrid Search Weights
349
+ FAISS_WEIGHT = 0.6 # Semantic search weight
350
+ BM25_WEIGHT = 0.4 # Keyword search weight
351
+ ```
352
+
353
+ ---
354
+
355
+ ## 📚 API Reference
356
+
357
+ ### Endpoints
358
+
359
+ #### Upload Document
360
+
361
+ ```http
362
+ POST /upload
363
+ Content-Type: multipart/form-data
364
+
365
+ {
366
+ "file": <binary>
367
+ }
368
+ ```
369
+
370
+ **Response**:
371
+ ```json
372
+ {
373
+ "status": "success",
374
+ "message": "Document processed successfully",
375
+ "filename": "example.pdf",
376
+ "chunks": 45
377
+ }
378
+ ```
379
+
380
+ #### Chat
381
+
382
+ ```http
383
+ POST /chat
384
+ Content-Type: application/json
385
+
386
+ {
387
+ "message": "What is the main topic?",
388
+ "stream": true
389
+ }
390
+ ```
391
+
392
+ **Response** (Streaming):
393
+ ```
394
+ data: {"token": "The", "done": false}
395
+ data: {"token": " main", "done": false}
396
+ data: {"token": " topic", "done": false}
397
+ data: {"done": true}
398
+ ```
399
+
400
+ #### Clear Session
401
+
402
+ ```http
403
+ POST /clear
404
+ ```
405
+
406
+ **Response**:
407
+ ```json
408
+ {
409
+ "status": "success",
410
+ "message": "Session cleared"
411
+ }
412
+ ```
413
+
414
+ ---
415
+
416
+ ## 🔧 Troubleshooting
417
+
418
+ ### Common Issues
419
+
420
+ #### 1. Permission Errors in Docker
421
+
422
+ **Problem**: `Permission denied` when writing to cache directories
423
+
424
+ **Solution**:
425
+ ```bash
426
+ # Rebuild with proper permissions
427
+ docker build --no-cache -t cognichat .
428
+
429
+ # Or run with volume permissions
430
+ docker run -v $(pwd)/cache:/tmp/huggingface_cache \
431
+ --user $(id -u):$(id -g) \
432
+ cognichat
433
+ ```
434
+
435
+ #### 2. Model Loading Fails
436
+
437
+ **Problem**: Cannot download HuggingFace models
438
+
439
+ **Solution**:
440
+ ```bash
441
+ # Pre-download models
442
+ python test_embeddings.py
443
+
444
+ # Or use HF_HOME environment variable
445
+ export HF_HOME=/path/to/writable/directory
446
+ ```
447
+
448
+ #### 3. Chat Returns 400 Error
449
+
450
+ **Problem**: Upload directory not writable (common in HF Spaces)
451
+
452
+ **Solution**: Application now automatically uses `/tmp/uploads` in HF Spaces environment. Ensure latest version is deployed.
453
+
454
+ #### 4. API Key Invalid
455
+
456
+ **Problem**: Groq API returns authentication error
457
+
458
+ **Solution**:
459
+ - Verify key at [Groq Console](https://console.groq.com/keys)
460
+ - Check `.env` file has correct format: `GROQ_API_KEY=gsk_...`
461
+ - Restart application after updating key
462
+
463
+ ### Debug Mode
464
+
465
+ Enable detailed logging:
466
+
467
+ ```bash
468
+ export FLASK_DEBUG=1
469
+ export LANGCHAIN_VERBOSE=true
470
+ python app.py
471
+ ```
472
+
473
+ ---
474
+
475
+ ## 🧪 Testing
476
+
477
+ ```bash
478
+ # Run test suite
479
+ pytest tests/
480
+
481
+ # Test embedding model
482
+ python test_embeddings.py
483
+
484
+ # Test document processing
485
+ pytest tests/test_document_processor.py
486
+
487
+ # Integration tests
488
+ pytest tests/test_integration.py
489
+ ```
490
+
491
+ ---
492
+
493
+ ## 🤝 Contributing
494
+
495
+ We welcome contributions! Please follow these steps:
496
+
497
+ 1. Fork the repository
498
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
499
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
500
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
501
+ 5. Open a Pull Request
502
+
503
+ ### Development Guidelines
504
+
505
+ - Follow PEP 8 style guide
506
+ - Add tests for new features
507
+ - Update documentation
508
+ - Ensure Docker build succeeds
509
+
510
+ ---
511
+
512
+ ## 📝 Changelog
513
+
514
+ ### Version 2.0 (October 2025)
515
+
516
+ ✅ **Major Improvements**:
517
+ - Fixed Docker permission issues
518
+ - HuggingFace Spaces compatibility
519
+ - Enhanced error handling
520
+ - Multiple model loading fallbacks
521
+ - Improved security (non-root execution)
522
+
523
+ ✅ **Bug Fixes**:
524
+ - Upload directory write permissions
525
+ - Cache directory access
526
+ - Model initialization reliability
527
+
528
+ ### Version 1.0 (Initial Release)
529
+
530
+ - Basic RAG functionality
531
+ - PDF and DOCX support
532
+ - FAISS vector store
533
+ - Conversational memory
534
+
535
+ ---
536
+
537
+ ## 📄 License
538
+
539
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
540
+
541
+ ---
542
+
543
+ ## 🙏 Acknowledgments
544
+
545
+ - **LangChain** for RAG framework
546
+ - **Groq** for high-speed LLM inference
547
+ - **HuggingFace** for embeddings and hosting
548
+ - **FAISS** for efficient vector search
549
+
550
+ ---
551
+
552
+ ## 📞 Support
553
+
554
+ - **Issues**: [GitHub Issues](https://github.com/yourusername/cognichat/issues)
555
+ - **Discussions**: [GitHub Discussions](https://github.com/yourusername/cognichat/discussions)
556
+ - **Email**: riteshraut123321@gmail.com
557
+
558
+ ---
559
+
560
+ <div align="center">
561
+
562
+ **Made with ❤️ by the CogniChat Team**
563
+
564
+ [⭐ Star us on GitHub](https://github.com/yourusername/cognichat) • [🐛 Report Bug](https://github.com/yourusername/cognichat/issues) • [✨ Request Feature](https://github.com/yourusername/cognichat/issues)
565
+
566
+ </div>
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import uuid
4
+ from flask import Flask, request, render_template, session, jsonify, Response
5
+ from werkzeug.utils import secure_filename
6
+ from rag_processor import create_rag_chain
7
+ from typing import Sequence, Any, List
8
+ import fitz
9
+ import re
10
+ import io
11
+ from gtts import gTTS
12
+ from langchain_core.documents import Document
13
+ from langchain_community.document_loaders import (
14
+ TextLoader,
15
+ Docx2txtLoader,
16
+ )
17
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
18
+ from langchain_experimental.text_splitter import SemanticChunker
19
+ from langchain_huggingface import HuggingFaceEmbeddings
20
+ from langchain_community.vectorstores import FAISS
21
+ from langchain.retrievers import EnsembleRetriever
22
+ from langchain_community.retrievers import BM25Retriever
23
+ from langchain_community.chat_message_histories import ChatMessageHistory
24
+ from langchain.storage import InMemoryStore
25
+
26
+
27
+ app = Flask(__name__)
28
+ app.config['SECRET_KEY'] = os.urandom(24)
29
+
30
+ is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
31
+ if is_hf_spaces:
32
+ app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
33
+ else:
34
+ app.config['UPLOAD_FOLDER'] = 'uploads'
35
+
36
+ try:
37
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
38
+ print(f"Upload folder ready: {app.config['UPLOAD_FOLDER']}")
39
+ except Exception as e:
40
+ print(f"Failed to create upload folder {app.config['UPLOAD_FOLDER']}: {e}")
41
+ app.config['UPLOAD_FOLDER'] = '/tmp/uploads'
42
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
43
+ print(f"Using fallback upload folder: {app.config['UPLOAD_FOLDER']}")
44
+
45
+ rag_chains = {}
46
+ message_histories = {}
47
+ doc_stores = {} # To hold the InMemoryStore for each session
48
+
49
+ print("Loading embedding model...")
50
+ try:
51
+ EMBEDDING_MODEL = HuggingFaceEmbeddings(
52
+ model_name="BAAI/bge-base-en-v1.5",
53
+ model_kwargs={'device': 'cpu'}
54
+ )
55
+ print("Embedding model loaded successfully.")
56
+ except Exception as e:
57
+ print(f"FATAL: Could not load embedding model. Error: {e}")
58
+ raise
59
+
60
+ def load_pdf_with_fallback(filepath):
61
+ try:
62
+ docs = []
63
+ with fitz.open(filepath) as pdf_doc:
64
+ for page_num, page in enumerate(pdf_doc):
65
+ text = page.get_text()
66
+ if text.strip():
67
+ docs.append(Document(
68
+ page_content=text,
69
+ metadata={
70
+ "source": os.path.basename(filepath),
71
+ "page": page_num + 1,
72
+ }
73
+ ))
74
+ if docs:
75
+ print(f"Successfully loaded PDF with PyMuPDF: {filepath}")
76
+ return docs
77
+ else:
78
+ raise ValueError("No text content found in PDF.")
79
+ except Exception as e:
80
+ print(f"PyMuPDF failed for {filepath}: {e}")
81
+ raise
82
+
83
+ LOADER_MAPPING = {
84
+ ".txt": TextLoader,
85
+ ".pdf": load_pdf_with_fallback,
86
+ ".docx": Docx2txtLoader,
87
+ }
88
+
89
+ def get_session_history(session_id: str) -> ChatMessageHistory:
90
+ if session_id not in message_histories:
91
+ message_histories[session_id] = ChatMessageHistory()
92
+ return message_histories[session_id]
93
+
94
+ @app.route('/health', methods=['GET'])
95
+ def health_check():
96
+ return jsonify({'status': 'healthy'}), 200
97
+
98
+ @app.route('/', methods=['GET'])
99
+ def index():
100
+ return render_template('index.html')
101
+
102
+ @app.route('/upload', methods=['POST'])
103
+ def upload_files():
104
+ files = request.files.getlist('file')
105
+ if not files or all(f.filename == '' for f in files):
106
+ return jsonify({'status': 'error', 'message': 'No selected files.'}), 400
107
+
108
+ all_docs = []
109
+ processed_files, failed_files = [], []
110
+
111
+ for file in files:
112
+ if file and file.filename:
113
+ filename = secure_filename(file.filename)
114
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
115
+ try:
116
+ file.save(filepath)
117
+ file_ext = os.path.splitext(filename)[1].lower()
118
+ if file_ext not in LOADER_MAPPING:
119
+ raise ValueError("Unsupported file format.")
120
+
121
+ loader_func = LOADER_MAPPING[file_ext]
122
+ docs = loader_func(filepath) if file_ext == ".pdf" else loader_func(filepath).load()
123
+
124
+ if not docs:
125
+ raise ValueError("No content extracted.")
126
+
127
+ all_docs.extend(docs)
128
+ processed_files.append(filename)
129
+ print(f"✓ Successfully processed: {filename}")
130
+ except Exception as e:
131
+ error_msg = str(e)
132
+ print(f"✗ Error processing {filename}: {error_msg}")
133
+ failed_files.append(f"{filename} ({error_msg})")
134
+
135
+ if not all_docs:
136
+ error_summary = "Failed to process all files."
137
+ if failed_files:
138
+ error_summary += " Reasons: " + ", ".join(failed_files)
139
+ return jsonify({'status': 'error', 'message': error_summary}), 400
140
+
141
+ try:
142
+ print("Starting RAG pipeline setup...")
143
+
144
+ parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=300)
145
+ child_splitter = SemanticChunker(EMBEDDING_MODEL,breakpoint_threshold_type='percentile',breakpoint_threshold_amount=80)
146
+
147
+ parent_docs = parent_splitter.split_documents(all_docs)
148
+ doc_ids = [str(uuid.uuid4()) for _ in parent_docs]
149
+
150
+ child_docs = []
151
+ for i, doc in enumerate(parent_docs):
152
+ _id = doc_ids[i]
153
+ sub_docs = child_splitter.split_documents([doc])
154
+ for child in sub_docs:
155
+ child.metadata["doc_id"] = _id
156
+ child_docs.extend(sub_docs)
157
+
158
+ store = InMemoryStore()
159
+ store.mset(list(zip(doc_ids, parent_docs)))
160
+
161
+ vectorstore = FAISS.from_documents(child_docs, EMBEDDING_MODEL)
162
+
163
+ print(f"Stored {len(parent_docs)} parent docs and indexed {len(child_docs)} child docs.")
164
+
165
+ bm25_retriever = BM25Retriever.from_documents(child_docs)
166
+ bm25_retriever.k = 5
167
+
168
+ faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
169
+
170
+ ensemble_retriever = EnsembleRetriever(
171
+ retrievers=[bm25_retriever, faiss_retriever],
172
+ weights=[0.5, 0.5]
173
+ )
174
+ print("Created Hybrid Retriever for child documents.")
175
+
176
+ session_id = str(uuid.uuid4())
177
+
178
+ doc_stores[session_id] = store
179
+
180
+ rag_chain_components = create_rag_chain(ensemble_retriever, get_session_history, EMBEDDING_MODEL, store)
181
+
182
+ rag_chains[session_id] = rag_chain_components
183
+ session['session_id'] = session_id
184
+
185
+ success_msg = f"Successfully processed: {', '.join(processed_files)}"
186
+ if failed_files:
187
+ success_msg += f"\nFailed to process: {', '.join(failed_files)}"
188
+
189
+ return jsonify({
190
+ 'status': 'success',
191
+ 'filename': success_msg,
192
+ 'session_id': session_id
193
+ })
194
+
195
+ except Exception as e:
196
+ import traceback
197
+ traceback.print_exc()
198
+ return jsonify({'status': 'error', 'message': f'Failed during RAG setup: {e}'}), 500
199
+
200
+ @app.route('/chat', methods=['POST'])
201
+ def chat():
202
+ data = request.get_json()
203
+ question = data.get('question')
204
+ session_id = session.get('session_id') or data.get('session_id')
205
+
206
+ if not question or not session_id or session_id not in rag_chains:
207
+ return jsonify({'status': 'error', 'message': 'Invalid session or no question provided.'}), 400
208
+
209
+ try:
210
+ chain_components = rag_chains[session_id]
211
+ config = {"configurable": {"session_id": session_id}}
212
+
213
+ print("\n" + "="*50)
214
+ print("--- STARTING DIAGNOSTIC RUN ---")
215
+ print(f"Original Question: {question}")
216
+ print("="*50 + "\n")
217
+
218
+ rewritten_query = chain_components["rewriter"].invoke({"question": question, "chat_history": get_session_history(session_id).messages})
219
+ print(f"--- 1. Rewritten Query ---\n{rewritten_query}\n")
220
+
221
+ hyde_doc = chain_components["hyde"].invoke({"question": rewritten_query})
222
+ print(f"--- 2. HyDE Document ---\n{hyde_doc}\n")
223
+
224
+ final_retrieved_docs = chain_components["base_retriever"].get_relevant_documents(hyde_doc)
225
+ print(f"--- 3. Retrieved Top {len(final_retrieved_docs)} Child Docs ---")
226
+ for i, doc in enumerate(final_retrieved_docs):
227
+ print(f" Doc {i+1}: {doc.page_content[:150]}... (Source: {doc.metadata.get('source')})")
228
+ print("\n")
229
+
230
+ final_context_docs = chain_components["parent_fetcher"].invoke(final_retrieved_docs)
231
+ print(f"--- 4. Final {len(final_context_docs)} Parent Docs for LLM ---")
232
+ for i, doc in enumerate(final_context_docs):
233
+ print(f" Final Doc {i+1} (Source: {doc.metadata.get('source')}, Page: {doc.metadata.get('page')}):\n '{doc.page_content[:300]}...'\n---")
234
+
235
+ print("="*50)
236
+ print("--- INVOKING FINAL CHAIN ---")
237
+ print("="*50 + "\n")
238
+
239
+ answer_string = chain_components["final_chain"].invoke({"question": question}, config=config)
240
+
241
+ return jsonify({'answer': answer_string})
242
+
243
+ except Exception as e:
244
+ import traceback
245
+ traceback.print_exc()
246
+ return jsonify({'status': 'error', 'message': 'An error occurred while getting the answer.'}), 500
247
+
248
+ def clean_markdown_for_tts(text: str) -> str:
249
+ text = re.sub(r'\*(\*?)(.*?)\1\*', r'\2', text)
250
+ text = re.sub(r'\_(.*?)\_', r'\1', text)
251
+ text = re.sub(r'`(.*?)`', r'\1', text)
252
+ text = re.sub(r'^\s*#{1,6}\s+', '', text, flags=re.MULTILINE)
253
+ text = re.sub(r'^\s*[\*\-]\s+', '', text, flags=re.MULTILINE)
254
+ text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
255
+ text = re.sub(r'^\s*>\s?', '', text, flags=re.MULTILINE)
256
+ text = re.sub(r'^\s*[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
257
+ text = re.sub(r'\n+', ' ', text)
258
+ return text.strip()
259
+
260
+ @app.route('/tts', methods=['POST'])
261
+ def text_to_speech():
262
+ data = request.get_json()
263
+ text = data.get('text')
264
+
265
+ if not text:
266
+ return jsonify({'status': 'error', 'message': 'No text provided.'}), 400
267
+
268
+ try:
269
+ clean_text = clean_markdown_for_tts(text)
270
+ tts = gTTS(clean_text, lang='en')
271
+ mp3_fp = io.BytesIO()
272
+ tts.write_to_fp(mp3_fp)
273
+ mp3_fp.seek(0)
274
+ return Response(mp3_fp, mimetype='audio/mpeg')
275
+ except Exception as e:
276
+ print(f"Error in TTS generation: {e}")
277
+ return jsonify({'status': 'error', 'message': 'Failed to generate audio.'}), 500
278
+
279
+ if __name__ == '__main__':
280
+ port = int(os.environ.get("PORT", 7860))
281
+ app.run(host="0.0.0.0", port=port, debug=False)
diagnose.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick diagnostic script to check CogniChat configuration and identify issues.
4
+ """
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ print("=== CogniChat Diagnostic Tool ===")
10
+ print()
11
+
12
+ # Check if we're in the right directory
13
+ current_dir = Path.cwd()
14
+ print(f"Current directory: {current_dir}")
15
+
16
+ # Check for required files
17
+ required_files = [
18
+ 'app.py',
19
+ 'rag_processor.py',
20
+ 'requirements.txt',
21
+ '.env.example'
22
+ ]
23
+
24
+ missing_files = []
25
+ for file in required_files:
26
+ if (current_dir / file).exists():
27
+ print(f"✓ Found: {file}")
28
+ else:
29
+ print(f"✗ Missing: {file}")
30
+ missing_files.append(file)
31
+
32
+ print()
33
+
34
+ # Check .env file
35
+ env_file = current_dir / '.env'
36
+ if env_file.exists():
37
+ print("✓ .env file exists")
38
+ try:
39
+ with open(env_file, 'r') as f:
40
+ content = f.read()
41
+
42
+ if 'GROQ_API_KEY=' in content:
43
+ if 'your_groq_api_key_here' in content:
44
+ print("⚠ .env file contains placeholder API key - needs to be updated!")
45
+ else:
46
+ print("✓ GROQ_API_KEY appears to be set in .env")
47
+ else:
48
+ print("✗ GROQ_API_KEY not found in .env file")
49
+ except Exception as e:
50
+ print(f"✗ Error reading .env file: {e}")
51
+ else:
52
+ print("✗ .env file missing - copy from .env.example and update with your API key")
53
+
54
+ print()
55
+
56
+ # Check environment variables and detect HF Spaces
57
+ is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
58
+ print(f"Environment: {'Hugging Face Spaces' if is_hf_spaces else 'Local Development'}")
59
+
60
+ if is_hf_spaces:
61
+ print(f"Space ID: {os.getenv('SPACE_ID', 'Not detected')}")
62
+
63
+ groq_key = os.getenv('GROQ_API_KEY')
64
+ if groq_key:
65
+ if groq_key == 'your_groq_api_key_here':
66
+ print("⚠ GROQ_API_KEY is set but contains placeholder value")
67
+ else:
68
+ print("✓ GROQ_API_KEY environment variable is set")
69
+ if is_hf_spaces:
70
+ print(" (Loaded from Hugging Face Spaces secrets)")
71
+ else:
72
+ print("✗ GROQ_API_KEY environment variable not set")
73
+
74
+ print()
75
+
76
+ # Load dotenv if available
77
+ try:
78
+ from dotenv import load_dotenv
79
+ load_dotenv()
80
+ groq_key_after_dotenv = os.getenv('GROQ_API_KEY')
81
+
82
+ if groq_key_after_dotenv:
83
+ if groq_key_after_dotenv == 'your_groq_api_key_here':
84
+ print("⚠ After loading .env: GROQ_API_KEY still contains placeholder")
85
+ else:
86
+ print("✓ After loading .env: GROQ_API_KEY is properly set")
87
+ else:
88
+ print("✗ After loading .env: GROQ_API_KEY still not available")
89
+
90
+ except ImportError:
91
+ print("✗ python-dotenv not available - install with: pip install python-dotenv")
92
+
93
+ print()
94
+
95
+ # Recommendations
96
+ print("=== Recommendations ===")
97
+
98
+ if missing_files:
99
+ print("1. Ensure you're in the correct CogniChat directory")
100
+
101
+ if is_hf_spaces:
102
+ if not groq_key or groq_key == 'your_groq_api_key_here':
103
+ print("2. FOR HUGGING FACE SPACES - Set API key in Space Secrets:")
104
+ print(" - Go to your Space Settings")
105
+ print(" - Navigate to 'Repository Secrets'")
106
+ print(" - Add new secret: GROQ_API_KEY")
107
+ print(" - Get your key from: https://console.groq.com/keys")
108
+ print(" - Restart your Space after adding the secret")
109
+ else:
110
+ if not env_file.exists():
111
+ print("2. Copy .env.example to .env:")
112
+ print(" cp .env.example .env")
113
+
114
+ if env_file.exists() and 'your_groq_api_key_here' in env_file.read_text():
115
+ print("3. Update .env file with your actual GROQ API key:")
116
+ print(" - Visit: https://console.groq.com/keys")
117
+ print(" - Create an API key")
118
+ print(" - Replace 'your_groq_api_key_here' in .env with your key")
119
+
120
+ if not groq_key or groq_key == 'your_groq_api_key_here':
121
+ print("4. The main issue is likely the GROQ API key configuration")
122
+ print(" This would cause the 400 error you're seeing in /chat endpoint")
123
+
124
+ print("\n5. After fixing the API key, restart the application")
125
+ print("\n=== End of Diagnostic ===")
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ # System packages are now installed directly in Dockerfile
rag_processor.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from operator import itemgetter
4
+ from langchain_groq import ChatGroq
5
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
6
+ from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_core.runnables.history import RunnableWithMessageHistory
9
+
10
+ def create_rag_chain(base_retriever, get_session_history_func, embedding_model, store):
11
+ """
12
+ Creates a dictionary of RAG chain components for inspection and a final runnable chain.
13
+ """
14
+ load_dotenv()
15
+ api_key = os.getenv("GROQ_API_KEY")
16
+ if not api_key or api_key == "your_groq_api_key_here":
17
+ raise ValueError("GROQ_API_KEY not found or not configured properly.")
18
+
19
+ llm = ChatGroq(model_name="llama-3.1-8b-instant", api_key=api_key, temperature=0.1)
20
+
21
+ # 1. HyDE-like Document Generation Chain
22
+ hyde_template = """As a document expert, write a concise, fact-based paragraph that directly answers the user's question. This will be used for a database search.
23
+ Question: {question}
24
+ Hypothetical Answer:"""
25
+ hyde_prompt = ChatPromptTemplate.from_template(hyde_template)
26
+ hyde_chain = hyde_prompt | llm | StrOutputParser()
27
+
28
+ # 2. Query Rewriting Chain
29
+ rewrite_template = """Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question that is optimized for a vector database.
30
+
31
+ **Chat History:**
32
+ {chat_history}
33
+
34
+ **Follow-up Question:**
35
+ {question}
36
+
37
+ **Standalone Question:**"""
38
+ rewrite_prompt = ChatPromptTemplate.from_messages([
39
+ ("system", rewrite_template),
40
+ MessagesPlaceholder(variable_name="chat_history"),
41
+ ("human", "Reformulate this question as a standalone query: {question}")
42
+ ])
43
+ query_rewriter_chain = rewrite_prompt | llm | StrOutputParser()
44
+
45
+ # 3. Parent Document Fetching Chain
46
+ def get_parents(docs):
47
+ parent_ids = {d.metadata.get("doc_id") for d in docs}
48
+ return store.mget(list(parent_ids))
49
+
50
+ parent_fetcher_chain = RunnableLambda(get_parents)
51
+
52
+ # 4. Main Conversational RAG Chain
53
+ rag_template = """You are CogniChat, an expert document analysis assistant. Your task is to answer the user's question based *only* on the provided context.
54
+
55
+ **Instructions:**
56
+ 1. Read the context carefully.
57
+ 2. If the answer is in the context, provide a clear and concise answer.
58
+ 3. If the answer is not in the context, you *must* state that you cannot find the information in the provided documents. Do not use any external knowledge.
59
+ 4. Where appropriate, use formatting like lists or bold text to improve readability.
60
+
61
+ **Context:**
62
+ {context}
63
+ """
64
+ rag_prompt = ChatPromptTemplate.from_messages([
65
+ ("system", rag_template),
66
+ MessagesPlaceholder(variable_name="chat_history"),
67
+ ("human", "{question}"),
68
+ ])
69
+
70
+ conversational_rag_chain = (
71
+ RunnablePassthrough.assign(
72
+ context=query_rewriter_chain | hyde_chain | base_retriever | parent_fetcher_chain
73
+ )
74
+ | rag_prompt
75
+ | llm
76
+ | StrOutputParser()
77
+ )
78
+
79
+ # 5. Final Chain with History (Simplified)
80
+ final_chain = RunnableWithMessageHistory(
81
+ conversational_rag_chain,
82
+ get_session_history_func,
83
+ input_messages_key="question",
84
+ history_messages_key="chat_history",
85
+ )
86
+
87
+ print("\n✅ RAG chain and components successfully built.")
88
+
89
+ return {
90
+ "rewriter": query_rewriter_chain,
91
+ "hyde": hyde_chain,
92
+ "base_retriever": base_retriever,
93
+ "parent_fetcher": parent_fetcher_chain,
94
+ "final_chain": final_chain
95
+ }
requirements-simple.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask==2.3.3
2
+ langchain==0.1.20
3
+ langchain-groq==0.1.5
4
+ langchain-community==0.0.38
5
+ gTTS==2.4.0
6
+ sentence-transformers==2.7.0
7
+ faiss-cpu==1.7.4
8
+ python-docx==1.1.0
9
+ docx2txt==0.8
10
+ pypdf==4.2.0
11
+ pillow==10.3.0
12
+ python-dotenv==1.0.1
13
+ werkzeug==2.3.7
14
+ transformers==4.40.2
15
+ torch==2.3.0
16
+ numpy==1.24.4
17
+ requests==2.31.0
requirements.txt ADDED
Binary file (890 Bytes). View file
 
templates/index.html ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>CogniChat - Chat with your Documents</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
11
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
12
+ <style>
13
+ :root {
14
+ --background: #f0f4f9;
15
+ --foreground: #1f1f1f;
16
+ --primary: #1a73e8;
17
+ --primary-hover: #1867cf;
18
+ --card: #ffffff;
19
+ --card-border: #dadce0;
20
+ --input-bg: #e8f0fe;
21
+ --user-bubble: #d9e7ff;
22
+ --bot-bubble: #f1f3f4;
23
+ }
24
+
25
+ /* Dark mode styles */
26
+ .dark {
27
+ --background: #202124;
28
+ --foreground: #e8eaed;
29
+ --primary: #8ab4f8;
30
+ --primary-hover: #99bdfa;
31
+ --card: #303134;
32
+ --card-border: #5f6368;
33
+ --input-bg: #303134;
34
+ --user-bubble: #3c4043;
35
+ --bot-bubble: #3c4043;
36
+ }
37
+
38
+ body {
39
+ font-family: 'Google Sans', 'Roboto', sans-serif;
40
+ background-color: var(--background);
41
+ color: var(--foreground);
42
+ overflow: hidden;
43
+ }
44
+
45
+ #chat-window::-webkit-scrollbar { width: 8px; }
46
+ #chat-window::-webkit-scrollbar-track { background: transparent; }
47
+ #chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
48
+ .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
49
+
50
+ .drop-zone--over {
51
+ border-color: var(--primary);
52
+ box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
53
+ }
54
+
55
+ /* Loading Spinner */
56
+ .loader {
57
+ width: 48px;
58
+ height: 48px;
59
+ border: 3px solid var(--card-border);
60
+ border-radius: 50%;
61
+ display: inline-block;
62
+ position: relative;
63
+ box-sizing: border-box;
64
+ animation: rotation 1s linear infinite;
65
+ }
66
+ .loader::after {
67
+ content: '';
68
+ box-sizing: border-box;
69
+ position: absolute;
70
+ left: 50%;
71
+ top: 50%;
72
+ transform: translate(-50%, -50%);
73
+ width: 56px;
74
+ height: 56px;
75
+ border-radius: 50%;
76
+ border: 3px solid;
77
+ border-color: var(--primary) transparent;
78
+ }
79
+
80
+ @keyframes rotation {
81
+ 0% { transform: rotate(0deg); }
82
+ 100% { transform: rotate(360deg); }
83
+ }
84
+
85
+ /* Typing Indicator Animation */
86
+ .typing-indicator span {
87
+ height: 10px;
88
+ width: 10px;
89
+ background-color: #9E9E9E;
90
+ border-radius: 50%;
91
+ display: inline-block;
92
+ animation: bounce 1.4s infinite ease-in-out both;
93
+ }
94
+ .typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
95
+ .typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
96
+ @keyframes bounce {
97
+ 0%, 80%, 100% { transform: scale(0); }
98
+ 40% { transform: scale(1.0); }
99
+ }
100
+
101
+ /* Enhanced Markdown Styling for better readability and aesthetics */
102
+ .markdown-content p {
103
+ margin-bottom: 1rem;
104
+ line-height: 1.75;
105
+ }
106
+ .markdown-content h1, .markdown-content h2, .markdown-content h3, .markdown-content h4 {
107
+ font-family: 'Google Sans', sans-serif;
108
+ font-weight: 700;
109
+ margin-top: 1.75rem;
110
+ margin-bottom: 1rem;
111
+ line-height: 1.3;
112
+ }
113
+ .markdown-content h1 { font-size: 1.75em; border-bottom: 1px solid var(--card-border); padding-bottom: 0.5rem; }
114
+ .markdown-content h2 { font-size: 1.5em; }
115
+ .markdown-content h3 { font-size: 1.25em; }
116
+ .markdown-content h4 { font-size: 1.1em; }
117
+ .markdown-content ul, .markdown-content ol {
118
+ padding-left: 1.75rem;
119
+ margin-bottom: 1rem;
120
+ }
121
+ .markdown-content li {
122
+ margin-bottom: 0.5rem;
123
+ }
124
+ .dark .markdown-content ul > li::marker { color: var(--primary); }
125
+ .markdown-content ul > li::marker { color: var(--primary); }
126
+ .markdown-content a {
127
+ color: var(--primary);
128
+ text-decoration: none;
129
+ font-weight: 500;
130
+ border-bottom: 1px solid transparent;
131
+ transition: all 0.2s ease-in-out;
132
+ }
133
+ .markdown-content a:hover {
134
+ border-bottom-color: var(--primary-hover);
135
+ }
136
+ .markdown-content blockquote {
137
+ margin: 1.5rem 0;
138
+ padding-left: 1.5rem;
139
+ border-left: 4px solid var(--card-border);
140
+ color: #6c757d;
141
+ font-style: italic;
142
+ }
143
+ .dark .markdown-content blockquote {
144
+ color: #adb5bd;
145
+ }
146
+ .markdown-content hr {
147
+ border: none;
148
+ border-top: 1px solid var(--card-border);
149
+ margin: 2rem 0;
150
+ }
151
+ .markdown-content table {
152
+ width: 100%;
153
+ border-collapse: collapse;
154
+ margin: 1.5rem 0;
155
+ font-size: 0.9em;
156
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
157
+ border-radius: 8px;
158
+ overflow: hidden;
159
+ }
160
+ .markdown-content th, .markdown-content td {
161
+ border: 1px solid var(--card-border);
162
+ padding: 0.75rem 1rem;
163
+ text-align: left;
164
+ }
165
+ .markdown-content th {
166
+ background-color: var(--bot-bubble);
167
+ font-weight: 500;
168
+ }
169
+ .markdown-content code {
170
+ background-color: rgba(0,0,0,0.05);
171
+ padding: 0.2rem 0.4rem;
172
+ border-radius: 0.25rem;
173
+ font-family: 'Roboto Mono', monospace;
174
+ font-size: 0.9em;
175
+ }
176
+ .dark .markdown-content code {
177
+ background-color: rgba(255,255,255,0.1);
178
+ }
179
+ .markdown-content pre {
180
+ position: relative;
181
+ background-color: #f8f9fa;
182
+ border: 1px solid var(--card-border);
183
+ border-radius: 0.5rem;
184
+ margin-bottom: 1rem;
185
+ }
186
+ .dark .markdown-content pre {
187
+ background-color: #2e2f32;
188
+ }
189
+ .markdown-content pre code {
190
+ background: none;
191
+ padding: 1rem;
192
+ display: block;
193
+ overflow-x: auto;
194
+ }
195
+ .markdown-content pre .copy-code-btn {
196
+ position: absolute;
197
+ top: 0.5rem;
198
+ right: 0.5rem;
199
+ background-color: #e8eaed;
200
+ border: 1px solid #dadce0;
201
+ color: #5f6368;
202
+ padding: 0.3rem 0.6rem;
203
+ border-radius: 0.25rem;
204
+ cursor: pointer;
205
+ opacity: 0;
206
+ transition: opacity 0.2s;
207
+ font-size: 0.8em;
208
+ }
209
+ .dark .markdown-content pre .copy-code-btn {
210
+ background-color: #3c4043;
211
+ border-color: #5f6368;
212
+ color: #e8eaed;
213
+ }
214
+ .markdown-content pre:hover .copy-code-btn {
215
+ opacity: 1;
216
+ }
217
+
218
+ /* Spinner for the TTS button */
219
+ .tts-button-loader {
220
+ width: 16px;
221
+ height: 16px;
222
+ border: 2px solid currentColor; /* Use button's text color */
223
+ border-radius: 50%;
224
+ display: inline-block;
225
+ box-sizing: border-box;
226
+ animation: rotation 0.8s linear infinite;
227
+ border-bottom-color: transparent; /* Makes it a half circle spinner */
228
+ }
229
+ </style>
230
+ </head>
231
+ <body class="w-screen h-screen dark">
232
+ <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
233
+ <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
234
+ <header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
235
+ <h1 class="text-xl font-medium">Chat with your Docs</h1>
236
+ <p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
237
+ </header>
238
+ <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
239
+ <div id="chat-content" class="max-w-4xl mx-auto space-y-8">
240
+ </div>
241
+ </div>
242
+ <div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
243
+ <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
244
+ <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
245
+ <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
246
+ <svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
247
+ </button>
248
+ </form>
249
+ </div>
250
+ </div>
251
+
252
+ <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
253
+ <div class="text-center">
254
+ <h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
255
+ <div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
256
+ <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
257
+ <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
258
+ <p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
259
+ <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
260
+ </div>
261
+ </div>
262
+ </div>
263
+
264
+ <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
265
+ <div class="loader"></div>
266
+ <p id="loading-text" class="mt-6 text-sm font-medium"></p>
267
+ <p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
268
+ </div>
269
+ </main>
270
+
271
+ <script>
272
+ document.addEventListener('DOMContentLoaded', () => {
273
+ const uploadContainer = document.getElementById('upload-container');
274
+ const chatContainer = document.getElementById('chat-container');
275
+ const dropZone = document.getElementById('drop-zone');
276
+ const fileUploadInput = document.getElementById('file-upload');
277
+ const fileNameSpan = document.getElementById('file-name');
278
+ const loadingOverlay = document.getElementById('loading-overlay');
279
+ const loadingText = document.getElementById('loading-text');
280
+ const loadingSubtext = document.getElementById('loading-subtext');
281
+
282
+ const chatForm = document.getElementById('chat-form');
283
+ const chatInput = document.getElementById('chat-input');
284
+ const chatSubmitBtn = document.getElementById('chat-submit-btn');
285
+ const chatWindow = document.getElementById('chat-window');
286
+ const chatContent = document.getElementById('chat-content');
287
+ const chatFilename = document.getElementById('chat-filename');
288
+
289
+ let sessionId = null;
290
+ const storedSessionId = sessionStorage.getItem('cognichat_session_id');
291
+ if (storedSessionId) {
292
+ sessionId = storedSessionId;
293
+ console.debug('Restored session ID from storage:', sessionId);
294
+ }
295
+
296
+ // --- File Upload Logic ---
297
+ dropZone.addEventListener('click', () => fileUploadInput.click());
298
+
299
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
300
+ dropZone.addEventListener(eventName, preventDefaults, false);
301
+ document.body.addEventListener(eventName, preventDefaults, false);
302
+ });
303
+
304
+ ['dragenter', 'dragover'].forEach(eventName => {
305
+ dropZone.addEventListener(eventName, () => dropZone.classList.add('drop-zone--over'));
306
+ });
307
+ ['dragleave', 'drop'].forEach(eventName => {
308
+ dropZone.addEventListener(eventName, () => dropZone.classList.remove('drop-zone--over'));
309
+ });
310
+
311
+ dropZone.addEventListener('drop', (e) => {
312
+ const files = e.dataTransfer.files;
313
+ if (files.length > 0) handleFiles(files);
314
+ });
315
+
316
+ fileUploadInput.addEventListener('change', (e) => {
317
+ if (e.target.files.length > 0) handleFiles(e.target.files);
318
+ });
319
+
320
+ function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
321
+
322
+ async function handleFiles(files) {
323
+ const formData = new FormData();
324
+ let fileNames = [];
325
+ for (const file of files) {
326
+ formData.append('file', file);
327
+ fileNames.push(file.name);
328
+ }
329
+
330
+ fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
331
+ await uploadAndProcessFiles(formData, fileNames);
332
+ }
333
+
334
+ async function uploadAndProcessFiles(formData, fileNames) {
335
+ loadingOverlay.classList.remove('hidden');
336
+ loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
337
+ loadingSubtext.textContent = "🤓Creating a knowledge base may take a minute or two. So please hold on tight";
338
+
339
+ try {
340
+ const response = await fetch('/upload', { method: 'POST', body: formData });
341
+ const result = await response.json();
342
+
343
+ if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
344
+ if (result.session_id) {
345
+ sessionId = result.session_id;
346
+ sessionStorage.setItem('cognichat_session_id', sessionId);
347
+ console.debug('Stored session ID from upload:', sessionId);
348
+ } else {
349
+ console.warn('Upload response missing session_id field.');
350
+ }
351
+
352
+ chatFilename.textContent = `Chatting with: ${result.filename}`;
353
+ uploadContainer.classList.add('hidden');
354
+ chatContainer.classList.remove('hidden');
355
+ appendMessage("I've analyzed your documents. What would you like to know?", "bot");
356
+
357
+ } catch (error) {
358
+ console.error('Upload error:', error);
359
+ alert(`Error: ${error.message}`);
360
+ } finally {
361
+ loadingOverlay.classList.add('hidden');
362
+ loadingSubtext.textContent = '';
363
+ fileNameSpan.textContent = '';
364
+ fileUploadInput.value = '';
365
+ }
366
+ }
367
+
368
+ // --- Chat Logic ---
369
+ chatForm.addEventListener('submit', async (e) => {
370
+ e.preventDefault();
371
+ const question = chatInput.value.trim();
372
+ if (!question) return;
373
+
374
+ appendMessage(question, 'user');
375
+ chatInput.value = '';
376
+ chatInput.disabled = true;
377
+ chatSubmitBtn.disabled = true;
378
+
379
+ const typingIndicator = showTypingIndicator();
380
+ let botMessageContainer = null;
381
+ let contentDiv = null;
382
+
383
+ try {
384
+ const requestBody = { question: question };
385
+ if (sessionId) {
386
+ requestBody.session_id = sessionId;
387
+ }
388
+
389
+ const response = await fetch('/chat', {
390
+ method: 'POST',
391
+ headers: { 'Content-Type': 'application/json' },
392
+ body: JSON.stringify(requestBody),
393
+ });
394
+
395
+ if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
396
+
397
+ // ============================ MODIFICATION START ==============================
398
+ // Parse the JSON response instead of reading a stream
399
+ const result = await response.json();
400
+ const answer = result.answer; // Extract the 'answer' field
401
+
402
+ if (!answer) {
403
+ throw new Error("Received an empty or invalid response from the server.");
404
+ }
405
+
406
+ typingIndicator.remove();
407
+ botMessageContainer = appendMessage('', 'bot');
408
+ contentDiv = botMessageContainer.querySelector('.markdown-content');
409
+
410
+ // Use the extracted answer for rendering
411
+ contentDiv.innerHTML = marked.parse(answer);
412
+ contentDiv.querySelectorAll('pre').forEach(addCopyButton);
413
+ scrollToBottom(); // Scroll after content is added
414
+
415
+ // Use the extracted answer for TTS
416
+ addTextToSpeechControls(botMessageContainer, answer);
417
+ // ============================ MODIFICATION END ==============================
418
+
419
+ } catch (error) {
420
+ console.error('Chat error:', error);
421
+ if (typingIndicator) typingIndicator.remove();
422
+ if (contentDiv) {
423
+ contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
424
+ } else {
425
+ appendMessage(`Error: ${error.message}`, 'bot');
426
+ }
427
+ } finally {
428
+ chatInput.disabled = false;
429
+ chatSubmitBtn.disabled = false;
430
+ chatInput.focus();
431
+ }
432
+ });
433
+
434
+ // --- UI Helper Functions ---
435
+
436
+ function appendMessage(text, sender) {
437
+ const messageWrapper = document.createElement('div');
438
+ messageWrapper.className = `flex items-start gap-4`;
439
+
440
+ const iconSVG = sender === 'user'
441
+ ? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
442
+ : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
443
+
444
+ const messageBubble = document.createElement('div');
445
+ messageBubble.className = `flex-1 pt-1`;
446
+
447
+ const senderName = document.createElement('p');
448
+ senderName.className = 'font-medium text-sm mb-1';
449
+ senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
450
+
451
+ const contentDiv = document.createElement('div');
452
+ contentDiv.className = 'text-base markdown-content';
453
+ // Only parse if text is not empty
454
+ if (text) {
455
+ contentDiv.innerHTML = marked.parse(text);
456
+ }
457
+
458
+ const controlsContainer = document.createElement('div');
459
+ controlsContainer.className = 'tts-controls mt-2';
460
+
461
+ messageBubble.appendChild(senderName);
462
+ messageBubble.appendChild(contentDiv);
463
+ messageBubble.appendChild(controlsContainer);
464
+ messageWrapper.innerHTML = iconSVG;
465
+ messageWrapper.appendChild(messageBubble);
466
+
467
+ chatContent.appendChild(messageWrapper);
468
+ scrollToBottom();
469
+
470
+ return messageBubble;
471
+ }
472
+
473
+ function showTypingIndicator() {
474
+ const indicatorWrapper = document.createElement('div');
475
+ indicatorWrapper.className = `flex items-start gap-4`;
476
+ indicatorWrapper.id = 'typing-indicator';
477
+
478
+ const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
479
+
480
+ const messageBubble = document.createElement('div');
481
+ messageBubble.className = 'flex-1 pt-1';
482
+
483
+ const senderName = document.createElement('p');
484
+ senderName.className = 'font-medium text-sm mb-1';
485
+ senderName.textContent = 'CogniChat is thinking...';
486
+
487
+ const indicator = document.createElement('div');
488
+ indicator.className = 'typing-indicator';
489
+ indicator.innerHTML = '<span></span><span></span><span></span>';
490
+
491
+ messageBubble.appendChild(senderName);
492
+ messageBubble.appendChild(indicator);
493
+ indicatorWrapper.innerHTML = iconSVG;
494
+ indicatorWrapper.appendChild(messageBubble);
495
+
496
+ chatContent.appendChild(indicatorWrapper);
497
+ scrollToBottom();
498
+
499
+ return indicatorWrapper;
500
+ }
501
+
502
+ function scrollToBottom() {
503
+ chatWindow.scrollTo({
504
+ top: chatWindow.scrollHeight,
505
+ behavior: 'smooth'
506
+ });
507
+ }
508
+
509
+ function addCopyButton(pre) {
510
+ const button = document.createElement('button');
511
+ button.className = 'copy-code-btn';
512
+ button.textContent = 'Copy';
513
+ pre.appendChild(button);
514
+
515
+ button.addEventListener('click', () => {
516
+ const code = pre.querySelector('code').innerText;
517
+ navigator.clipboard.writeText(code).then(() => {
518
+ button.textContent = 'Copied!';
519
+ setTimeout(() => button.textContent = 'Copy', 2000);
520
+ });
521
+ });
522
+ }
523
+
524
+ // --- Text-to-Speech Logic ---
525
+ let currentAudio = null;
526
+ let currentPlayingButton = null;
527
+
528
+ const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
529
+ const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
530
+
531
+
532
+ function addTextToSpeechControls(messageBubble, text) {
533
+ const ttsControls = messageBubble.querySelector('.tts-controls');
534
+ if (text.trim().length > 0) {
535
+ const speakButton = document.createElement('button');
536
+ speakButton.className = 'speak-btn px-4 py-2 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
537
+ speakButton.title = 'Listen to this message';
538
+ speakButton.setAttribute('data-state', 'play');
539
+ speakButton.innerHTML = `${playIconSVG} <span>Play</span>`;
540
+ ttsControls.appendChild(speakButton);
541
+ speakButton.addEventListener('click', () => handleTTS(text, speakButton));
542
+ }
543
+ }
544
+
545
+ async function handleTTS(text, button) {
546
+ if (button === currentPlayingButton) {
547
+ if (currentAudio && !currentAudio.paused) {
548
+ currentAudio.pause();
549
+ button.setAttribute('data-state', 'paused');
550
+ button.innerHTML = `${playIconSVG} <span>Play</span>`;
551
+ } else if (currentAudio && currentAudio.paused) {
552
+ currentAudio.play();
553
+ button.setAttribute('data-state', 'playing');
554
+ button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
555
+ }
556
+ return;
557
+ }
558
+
559
+ resetAllSpeakButtons();
560
+
561
+ currentPlayingButton = button;
562
+ button.setAttribute('data-state', 'loading');
563
+ button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
564
+ button.disabled = true;
565
+
566
+ try {
567
+ const response = await fetch('/tts', {
568
+ method: 'POST',
569
+ headers: { 'Content-Type': 'application/json' },
570
+ body: JSON.stringify({ text: text })
571
+ });
572
+ if (!response.ok) throw new Error('Failed to generate audio.');
573
+
574
+ const blob = await response.blob();
575
+ const audioUrl = URL.createObjectURL(blob);
576
+ currentAudio = new Audio(audioUrl);
577
+ currentAudio.play();
578
+
579
+ button.setAttribute('data-state', 'playing');
580
+ button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
581
+
582
+ currentAudio.onended = () => {
583
+ button.setAttribute('data-state', 'play');
584
+ button.innerHTML = `${playIconSVG} <span>Play</span>`;
585
+ currentAudio = null;
586
+ currentPlayingButton = null;
587
+ };
588
+
589
+ } catch (error) {
590
+ console.error('TTS Error:', error);
591
+ button.setAttribute('data-state', 'error');
592
+ button.innerHTML = `${playIconSVG} <span>Error</span>`;
593
+ alert('Failed to play audio. Please try again.');
594
+ resetAllSpeakButtons();
595
+ } finally {
596
+ button.disabled = false;
597
+ }
598
+ }
599
+
600
+ function resetAllSpeakButtons() {
601
+ document.querySelectorAll('.speak-btn').forEach(btn => {
602
+ btn.setAttribute('data-state', 'play');
603
+ btn.innerHTML = `${playIconSVG} <span>Play</span>`;
604
+ btn.disabled = false;
605
+ });
606
+ if (currentAudio) {
607
+ currentAudio.pause();
608
+ currentAudio = null;
609
+ }
610
+ currentPlayingButton = null;
611
+ }
612
+ });
613
+ </script>
614
+ </body>
615
+ </html>
templates/index.html.backup ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>CogniChat - Chat with your Documents</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
11
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
12
+ <style>
13
+ :root {
14
+ --background: #f0f4f9;
15
+ --foreground: #1f1f1f;
16
+ --primary: #1a73e8;
17
+ --primary-hover: #1867cf;
18
+ --card: #ffffff;
19
+ --card-border: #dadce0;
20
+ --input-bg: #e8f0fe;
21
+ --user-bubble: #d9e7ff;
22
+ --bot-bubble: #f1f3f4;
23
+ }
24
+
25
+ /* Dark mode styles */
26
+ .dark {
27
+ --background: #202124;
28
+ --foreground: #e8eaed;
29
+ --primary: #8ab4f8;
30
+ --primary-hover: #99bdfa;
31
+ --card: #303134;
32
+ --card-border: #5f6368;
33
+ --input-bg: #303134;
34
+ --user-bubble: #3c4043;
35
+ --bot-bubble: #3c4043;
36
+ }
37
+
38
+ body {
39
+ font-family: 'Google Sans', 'Roboto', sans-serif;
40
+ background-color: var(--background);
41
+ color: var(--foreground);
42
+ overflow: hidden;
43
+ }
44
+
45
+ #chat-window::-webkit-scrollbar { width: 8px; }
46
+ #chat-window::-webkit-scrollbar-track { background: transparent; }
47
+ #chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
48
+ .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
49
+
50
+ .drop-zone--over {
51
+ border-color: var(--primary);
52
+ box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
53
+ }
54
+
55
+ /* Loading Spinner */
56
+ .loader {
57
+ width: 48px;
58
+ height: 48px;
59
+ border: 3px solid var(--card-border);
60
+ border-radius: 50%;
61
+ display: inline-block;
62
+ position: relative;
63
+ box-sizing: border-box;
64
+ animation: rotation 1s linear infinite;
65
+ }
66
+ .loader::after {
67
+ content: '';
68
+ box-sizing: border-box;
69
+ position: absolute;
70
+ left: 50%;
71
+ top: 50%;
72
+ transform: translate(-50%, -50%);
73
+ width: 56px;
74
+ height: 56px;
75
+ border-radius: 50%;
76
+ border: 3px solid;
77
+ border-color: var(--primary) transparent;
78
+ }
79
+
80
+ @keyframes rotation {
81
+ 0% { transform: rotate(0deg); }
82
+ 100% { transform: rotate(360deg); }
83
+ }
84
+
85
+ /* Typing Indicator Animation */
86
+ .typing-indicator span {
87
+ height: 10px;
88
+ width: 10px;
89
+ background-color: #9E9E9E;
90
+ border-radius: 50%;
91
+ display: inline-block;
92
+ animation: bounce 1.4s infinite ease-in-out both;
93
+ }
94
+ .typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
95
+ .typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
96
+ @keyframes bounce {
97
+ 0%, 80%, 100% { transform: scale(0); }
98
+ 40% { transform: scale(1.0); }
99
+ }
100
+
101
+ /* Markdown Styling */
102
+ .markdown-content p { margin-bottom: 0.75rem; line-height: 1.75; }
103
+ .markdown-content ul, .markdown-content ol { margin-left: 1.5rem; margin-bottom: 0.75rem; }
104
+ .markdown-content code { background-color: rgba(0,0,0,0.05); padding: 0.2rem 0.4rem; border-radius: 0.25rem; font-family: 'Roboto Mono', monospace; font-size: 0.9em; }
105
+ .dark .markdown-content code { background-color: rgba(255,255,255,0.1); }
106
+ .markdown-content pre { position: relative; background-color: #f8f9fa; border: 1px solid var(--card-border); border-radius: 0.5rem; margin-bottom: 1rem; }
107
+ .dark .markdown-content pre { background-color: #2e2f32; }
108
+ .markdown-content pre code { background: none; padding: 1rem; display: block; overflow-x: auto; }
109
+ .markdown-content pre .copy-code-btn { position: absolute; top: 0.5rem; right: 0.5rem; background-color: #e8eaed; border: 1px solid #dadce0; color: #5f6368; padding: 0.3rem 0.6rem; border-radius: 0.25rem; cursor: pointer; opacity: 0; transition: opacity 0.2s; font-size: 0.8em;}
110
+ .dark .markdown-content pre .copy-code-btn { background-color: #3c4043; border-color: #5f6368; color: #e8eaed; }
111
+ .markdown-content pre:hover .copy-code-btn { opacity: 1; }
112
+
113
+ /* Spinner for the TTS button */
114
+ .tts-button-loader {
115
+ width: 16px;
116
+ height: 16px;
117
+ border: 2px solid currentColor; /* Use button's text color */
118
+ border-radius: 50%;
119
+ display: inline-block;
120
+ box-sizing: border-box;
121
+ animation: rotation 0.8s linear infinite;
122
+ border-bottom-color: transparent; /* Makes it a half circle spinner */
123
+ }
124
+ </style>
125
+ </head>
126
+ <body class="w-screen h-screen dark">
127
+ <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
128
+ <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
129
+ <header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
130
+ <h1 class="text-xl font-medium">Chat with your Docs</h1>
131
+ <p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
132
+ </header>
133
+ <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
134
+ <div id="chat-content" class="max-w-4xl mx-auto space-y-8">
135
+ </div>
136
+ </div>
137
+ <div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
138
+ <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
139
+ <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
140
+ <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
141
+ <svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
142
+ </button>
143
+ </form>
144
+ </div>
145
+ </div>
146
+
147
+ <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
148
+ <div class="text-center">
149
+ <h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
150
+ <div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
151
+ <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
152
+ <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
153
+ <p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
154
+ <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
155
+ </div>
156
+ </div>
157
+ </div>
158
+
159
+ <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
160
+ <div class="loader"></div>
161
+ <p id="loading-text" class="mt-6 text-sm font-medium"></p>
162
+ <p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
163
+ </div>
164
+ </main>
165
+
166
+ <script>
167
+ document.addEventListener('DOMContentLoaded', () => {
168
+ const uploadContainer = document.getElementById('upload-container');
169
+ const chatContainer = document.getElementById('chat-container');
170
+ const dropZone = document.getElementById('drop-zone');
171
+ const fileUploadInput = document.getElementById('file-upload');
172
+ const fileNameSpan = document.getElementById('file-name');
173
+ const loadingOverlay = document.getElementById('loading-overlay');
174
+ const loadingText = document.getElementById('loading-text');
175
+ const loadingSubtext = document.getElementById('loading-subtext');
176
+
177
+ const chatForm = document.getElementById('chat-form');
178
+ const chatInput = document.getElementById('chat-input');
179
+ const chatSubmitBtn = document.getElementById('chat-submit-btn');
180
+ const chatWindow = document.getElementById('chat-window');
181
+ const chatContent = document.getElementById('chat-content');
182
+ const chatFilename = document.getElementById('chat-filename');
183
+
184
+ // --- File Upload Logic ---
185
+ dropZone.addEventListener('click', () => fileUploadInput.click());
186
+
187
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
188
+ dropZone.addEventListener(eventName, preventDefaults, false);
189
+ document.body.addEventListener(eventName, preventDefaults, false);
190
+ });
191
+
192
+ ['dragenter', 'dragover'].forEach(eventName => dropZone.classList.add('drop-zone--over'));
193
+ ['dragleave', 'drop'].forEach(eventName => dropZone.classList.remove('drop-zone--over'));
194
+
195
+ dropZone.addEventListener('drop', (e) => {
196
+ const files = e.dataTransfer.files;
197
+ if (files.length > 0) handleFiles(files);
198
+ });
199
+
200
+ fileUploadInput.addEventListener('change', (e) => {
201
+ if (e.target.files.length > 0) handleFiles(e.target.files);
202
+ });
203
+
204
+ function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
205
+
206
+ async function handleFiles(files) {
207
+ const formData = new FormData();
208
+ let fileNames = [];
209
+ for (const file of files) {
210
+ formData.append('file', file);
211
+ fileNames.push(file.name);
212
+ }
213
+
214
+ fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
215
+ await uploadAndProcessFiles(formData, fileNames);
216
+ }
217
+
218
+ async function uploadAndProcessFiles(formData, fileNames) {
219
+ loadingOverlay.classList.remove('hidden');
220
+ loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
221
+ loadingSubtext.textContent = "For large documents or OCR, setup may take a few minutes to build the knowledge base.";
222
+
223
+ try {
224
+ const response = await fetch('/upload', { method: 'POST', body: formData });
225
+ const result = await response.json();
226
+
227
+ if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
228
+
229
+ chatFilename.textContent = `Chatting with: ${result.filename}`;
230
+ uploadContainer.classList.add('hidden');
231
+ chatContainer.classList.remove('hidden');
232
+ appendMessage("I've analyzed your documents. What would you like to know?", "bot");
233
+
234
+ } catch (error) {
235
+ console.error('Upload error:', error);
236
+ alert(`Error: ${error.message}`);
237
+ } finally {
238
+ loadingOverlay.classList.add('hidden');
239
+ loadingSubtext.textContent = '';
240
+ fileNameSpan.textContent = '';
241
+ fileUploadInput.value = ''; // Reset file input
242
+ }
243
+ }
244
+
245
+ // --- Chat Logic ---
246
+ chatForm.addEventListener('submit', async (e) => {
247
+ e.preventDefault();
248
+ const question = chatInput.value.trim();
249
+ if (!question) return;
250
+
251
+ appendMessage(question, 'user');
252
+ chatInput.value = '';
253
+ chatInput.disabled = true;
254
+ chatSubmitBtn.disabled = true;
255
+
256
+ const typingIndicator = showTypingIndicator();
257
+ let botMessageContainer = null;
258
+ let contentDiv = null;
259
+
260
+ try {
261
+ const response = await fetch('/chat', {
262
+ method: 'POST',
263
+ headers: { 'Content-Type': 'application/json' },
264
+ body: JSON.stringify({ question: question }),
265
+ });
266
+
267
+ if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
268
+
269
+ typingIndicator.remove();
270
+ botMessageContainer = appendMessage('', 'bot');
271
+ contentDiv = botMessageContainer.querySelector('.markdown-content');
272
+
273
+ const reader = response.body.getReader();
274
+ const decoder = new TextDecoder();
275
+ let fullResponse = '';
276
+
277
+ while (true) {
278
+ const { value, done } = await reader.read();
279
+ if (done) break;
280
+
281
+ fullResponse += decoder.decode(value, { stream: true });
282
+ contentDiv.innerHTML = marked.parse(fullResponse);
283
+ scrollToBottom();
284
+ }
285
+ contentDiv.querySelectorAll('pre').forEach(addCopyButton);
286
+
287
+ addTextToSpeechControls(botMessageContainer, fullResponse);
288
+
289
+ } catch (error) {
290
+ console.error('Chat error:', error);
291
+ if (typingIndicator) typingIndicator.remove();
292
+ if (contentDiv) {
293
+ contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
294
+ } else {
295
+ appendMessage(`Error: ${error.message}`, 'bot');
296
+ }
297
+ } finally {
298
+ chatInput.disabled = false;
299
+ chatSubmitBtn.disabled = false;
300
+ chatInput.focus();
301
+ }
302
+ });
303
+
304
+ // --- UI Helper Functions ---
305
+
306
+ function appendMessage(text, sender) {
307
+ const messageWrapper = document.createElement('div');
308
+ messageWrapper.className = `flex items-start gap-4`;
309
+
310
+ const iconSVG = sender === 'user'
311
+ ? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
312
+ : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
313
+
314
+ const messageBubble = document.createElement('div');
315
+ messageBubble.className = `flex-1 pt-1`;
316
+
317
+ const senderName = document.createElement('p');
318
+ senderName.className = 'font-medium text-sm mb-1';
319
+ senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
320
+
321
+ const contentDiv = document.createElement('div');
322
+ contentDiv.className = 'text-base markdown-content';
323
+ contentDiv.innerHTML = marked.parse(text);
324
+
325
+ const controlsContainer = document.createElement('div');
326
+ controlsContainer.className = 'tts-controls mt-2';
327
+
328
+ messageBubble.appendChild(senderName);
329
+ messageBubble.appendChild(contentDiv);
330
+ messageBubble.appendChild(controlsContainer);
331
+ messageWrapper.innerHTML = iconSVG;
332
+ messageWrapper.appendChild(messageBubble);
333
+
334
+ chatContent.appendChild(messageWrapper);
335
+ scrollToBottom();
336
+
337
+ return messageBubble;
338
+ }
339
+
340
+ function showTypingIndicator() {
341
+ const indicatorWrapper = document.createElement('div');
342
+ indicatorWrapper.className = `flex items-start gap-4`;
343
+ indicatorWrapper.id = 'typing-indicator';
344
+
345
+ const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
346
+
347
+ const messageBubble = document.createElement('div');
348
+ messageBubble.className = 'flex-1 pt-1';
349
+
350
+ const senderName = document.createElement('p');
351
+ senderName.className = 'font-medium text-sm mb-1';
352
+ senderName.textContent = 'CogniChat is thinking...';
353
+
354
+ const indicator = document.createElement('div');
355
+ indicator.className = 'typing-indicator';
356
+ indicator.innerHTML = '<span></span><span></span><span></span>';
357
+
358
+ messageBubble.appendChild(senderName);
359
+ messageBubble.appendChild(indicator);
360
+ indicatorWrapper.innerHTML = iconSVG;
361
+ indicatorWrapper.appendChild(messageBubble);
362
+
363
+ chatContent.appendChild(indicatorWrapper);
364
+ scrollToBottom();
365
+
366
+ return indicatorWrapper;
367
+ }
368
+
369
+ function scrollToBottom() {
370
+ chatWindow.scrollTo({
371
+ top: chatWindow.scrollHeight,
372
+ behavior: 'smooth'
373
+ });
374
+ }
375
+
376
+ function addCopyButton(pre) {
377
+ const button = document.createElement('button');
378
+ button.className = 'copy-code-btn';
379
+ button.textContent = 'Copy';
380
+ pre.appendChild(button);
381
+
382
+ button.addEventListener('click', () => {
383
+ const code = pre.querySelector('code').innerText;
384
+ navigator.clipboard.writeText(code).then(() => {
385
+ button.textContent = 'Copied!';
386
+ setTimeout(() => button.textContent = 'Copy', 2000);
387
+ });
388
+ });
389
+ }
390
+
391
+ // ============================ MODIFICATIONS START ==============================
392
+ let currentAudio = null;
393
+ let currentPlayingButton = null;
394
+
395
+ const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
396
+ const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
397
+
398
+
399
+ function addTextToSpeechControls(messageBubble, text) {
400
+ const ttsControls = messageBubble.querySelector('.tts-controls');
401
+ if (text.trim().length > 0) {
402
+ const speakButton = document.createElement('button');
403
+ // STYLING CHANGE HERE: Replaced theme variables with specific dark blue colors.
404
+ speakButton.className = 'speak-btn px-4 py-2 bg-blue-700 text-white rounded-full text-sm font-medium hover:bg-blue-800 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
405
+ speakButton.title = 'Listen to this message';
406
+ speakButton.setAttribute('data-state', 'play');
407
+ speakButton.innerHTML = `${playIconSVG} <span>Play</span>`;
408
+ ttsControls.appendChild(speakButton);
409
+ speakButton.addEventListener('click', () => handleTTS(text, speakButton));
410
+ }
411
+ }
412
+
413
+ async function handleTTS(text, button) {
414
+ // BUG FIX: Reworked the logic to correctly handle pause/resume.
415
+
416
+ // Case 1: The clicked button is already active (playing or paused).
417
+ if (button === currentPlayingButton) {
418
+ if (currentAudio && !currentAudio.paused) { // If it's playing, pause it.
419
+ currentAudio.pause();
420
+ button.setAttribute('data-state', 'paused');
421
+ button.innerHTML = `${playIconSVG} <span>Play</span>`;
422
+ } else if (currentAudio && currentAudio.paused) { // If it's paused, resume it.
423
+ currentAudio.play();
424
+ button.setAttribute('data-state', 'playing');
425
+ button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
426
+ }
427
+ return; // Stop the function here.
428
+ }
429
+
430
+ // Case 2: A new button is clicked (or no audio is active).
431
+ // Stop any other audio that might be playing.
432
+ resetAllSpeakButtons();
433
+
434
+ currentPlayingButton = button;
435
+ button.setAttribute('data-state', 'loading');
436
+ button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
437
+ button.disabled = true;
438
+
439
+ try {
440
+ const response = await fetch('/tts', {
441
+ method: 'POST',
442
+ headers: { 'Content-Type': 'application/json' },
443
+ body: JSON.stringify({ text: text })
444
+ });
445
+ if (!response.ok) throw new Error('Failed to generate audio.');
446
+
447
+ const blob = await response.blob();
448
+ const audioUrl = URL.createObjectURL(blob);
449
+ currentAudio = new Audio(audioUrl);
450
+ currentAudio.play();
451
+
452
+ button.setAttribute('data-state', 'playing');
453
+ button.innerHTML = `${pauseIconSVG} <span>Pause</span>`;
454
+
455
+ currentAudio.onended = () => {
456
+ button.setAttribute('data-state', 'play');
457
+ button.innerHTML = `${playIconSVG} <span>Play</span>`;
458
+ currentAudio = null;
459
+ currentPlayingButton = null;
460
+ };
461
+
462
+ } catch (error) {
463
+ console.error('TTS Error:', error);
464
+ button.setAttribute('data-state', 'error');
465
+ button.innerHTML = `${playIconSVG} <span>Error</span>`;
466
+ alert('Failed to play audio. Please try again.');
467
+ resetAllSpeakButtons(); // Reset state on error
468
+ } finally {
469
+ button.disabled = false;
470
+ }
471
+ }
472
+
473
+ function resetAllSpeakButtons() {
474
+ document.querySelectorAll('.speak-btn').forEach(btn => {
475
+ btn.setAttribute('data-state', 'play');
476
+ btn.innerHTML = `${playIconSVG} <span>Play</span>`;
477
+ btn.disabled = false;
478
+ });
479
+ if (currentAudio) {
480
+ currentAudio.pause();
481
+ currentAudio = null;
482
+ }
483
+ currentPlayingButton = null;
484
+ }
485
+ // ============================ MODIFICATIONS END ==============================
486
+ });
487
+ </script>
488
+ </body>
489
+ </html><!DOCTYPE html>
490
+ <html lang="en">
491
+ <head>
492
+ <meta charset="UTF-8">
493
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
494
+ <title>CogniChat - Chat with your Documents</title>
495
+ <script src="https://cdn.tailwindcss.com"></script>
496
+ <link rel="preconnect" href="https://fonts.googleapis.com">
497
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
498
+ <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
499
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
500
+ <style>
501
+ :root {
502
+ --background: #f0f4f9;
503
+ --foreground: #1f1f1f;
504
+ --primary: #1a73e8;
505
+ --primary-hover: #1867cf;
506
+ --card: #ffffff;
507
+ --card-border: #dadce0;
508
+ --input-bg: #e8f0fe;
509
+ --user-bubble: #d9e7ff;
510
+ --bot-bubble: #f1f3f4;
511
+ }
512
+
513
+ /* Dark mode styles */
514
+ .dark {
515
+ --background: #202124;
516
+ --foreground: #e8eaed;
517
+ --primary: #8ab4f8;
518
+ --primary-hover: #99bdfa;
519
+ --card: #303134;
520
+ --card-border: #5f6368;
521
+ --input-bg: #303134;
522
+ --user-bubble: #3c4043;
523
+ --bot-bubble: #3c4043;
524
+ }
525
+
526
+ body {
527
+ font-family: 'Google Sans', 'Roboto', sans-serif;
528
+ background-color: var(--background);
529
+ color: var(--foreground);
530
+ overflow: hidden;
531
+ }
532
+
533
+ #chat-window::-webkit-scrollbar { width: 8px; }
534
+ #chat-window::-webkit-scrollbar-track { background: transparent; }
535
+ #chat-window::-webkit-scrollbar-thumb { background-color: #bdc1c6; border-radius: 20px; }
536
+ .dark #chat-window::-webkit-scrollbar-thumb { background-color: #5f6368; }
537
+
538
+ .drop-zone--over {
539
+ border-color: var(--primary);
540
+ box-shadow: 0 0 15px rgba(26, 115, 232, 0.3);
541
+ }
542
+
543
+ /* Loading Spinner */
544
+ .loader {
545
+ width: 48px;
546
+ height: 48px;
547
+ border: 3px solid var(--card-border);
548
+ border-radius: 50%;
549
+ display: inline-block;
550
+ position: relative;
551
+ box-sizing: border-box;
552
+ animation: rotation 1s linear infinite;
553
+ }
554
+ .loader::after {
555
+ content: '';
556
+ box-sizing: border-box;
557
+ position: absolute;
558
+ left: 50%;
559
+ top: 50%;
560
+ transform: translate(-50%, -50%);
561
+ width: 56px;
562
+ height: 56px;
563
+ border-radius: 50%;
564
+ border: 3px solid;
565
+ border-color: var(--primary) transparent;
566
+ }
567
+
568
+ @keyframes rotation {
569
+ 0% { transform: rotate(0deg); }
570
+ 100% { transform: rotate(360deg); }
571
+ }
572
+
573
+ /* Typing Indicator Animation */
574
+ .typing-indicator span {
575
+ height: 10px;
576
+ width: 10px;
577
+ background-color: #9E9E9E;
578
+ border-radius: 50%;
579
+ display: inline-block;
580
+ animation: bounce 1.4s infinite ease-in-out both;
581
+ }
582
+ .typing-indicator span:nth-child(1) { animation-delay: -0.32s; }
583
+ .typing-indicator span:nth-child(2) { animation-delay: -0.16s; }
584
+ @keyframes bounce {
585
+ 0%, 80%, 100% { transform: scale(0); }
586
+ 40% { transform: scale(1.0); }
587
+ }
588
+
589
+ /* Markdown Styling */
590
+ .markdown-content p { margin-bottom: 0.75rem; line-height: 1.75; }
591
+ .markdown-content ul, .markdown-content ol { margin-left: 1.5rem; margin-bottom: 0.75rem; }
592
+ .markdown-content code { background-color: rgba(0,0,0,0.05); padding: 0.2rem 0.4rem; border-radius: 0.25rem; font-family: 'Roboto Mono', monospace; font-size: 0.9em; }
593
+ .dark .markdown-content code { background-color: rgba(255,255,255,0.1); }
594
+ .markdown-content pre { position: relative; background-color: #f8f9fa; border: 1px solid var(--card-border); border-radius: 0.5rem; margin-bottom: 1rem; }
595
+ .dark .markdown-content pre { background-color: #2e2f32; }
596
+ .markdown-content pre code { background: none; padding: 1rem; display: block; overflow-x: auto; }
597
+ .markdown-content pre .copy-code-btn { position: absolute; top: 0.5rem; right: 0.5rem; background-color: #e8eaed; border: 1px solid #dadce0; color: #5f6368; padding: 0.3rem 0.6rem; border-radius: 0.25rem; cursor: pointer; opacity: 0; transition: opacity 0.2s; font-size: 0.8em;}
598
+ .dark .markdown-content pre .copy-code-btn { background-color: #3c4043; border-color: #5f6368; color: #e8eaed; }
599
+ .markdown-content pre:hover .copy-code-btn { opacity: 1; }
600
+
601
+ /* Spinner for the TTS button */
602
+ .tts-button-loader {
603
+ width: 16px;
604
+ height: 16px;
605
+ border: 2px solid currentColor; /* Use button's text color */
606
+ border-radius: 50%;
607
+ display: inline-block;
608
+ box-sizing: border-box;
609
+ animation: rotation 0.8s linear infinite;
610
+ border-bottom-color: transparent; /* Makes it a half circle spinner */
611
+ }
612
+ </style>
613
+ </head>
614
+ <body class="w-screen h-screen dark">
615
+ <main id="main-content" class="h-full flex flex-col transition-opacity duration-500">
616
+ <div id="chat-container" class="hidden flex-1 flex flex-col w-full mx-auto overflow-hidden">
617
+ <header class="text-center p-4 border-b border-[var(--card-border)] flex-shrink-0">
618
+ <h1 class="text-xl font-medium">Chat with your Docs</h1>
619
+ <p id="chat-filename" class="text-xs text-gray-500 dark:text-gray-400 mt-1"></p>
620
+ </header>
621
+ <div id="chat-window" class="flex-1 overflow-y-auto p-4 md:p-6 lg:p-10">
622
+ <div id="chat-content" class="max-w-4xl mx-auto space-y-8">
623
+ </div>
624
+ </div>
625
+ <div class="p-4 flex-shrink-0 bg-[var(--background)] border-t border-[var(--card-border)]">
626
+ <form id="chat-form" class="max-w-4xl mx-auto bg-[var(--card)] rounded-full p-2 flex items-center shadow-sm border border-transparent focus-within:border-[var(--primary)] transition-colors">
627
+ <input type="text" id="chat-input" placeholder="Ask a question about your documents..." class="flex-grow bg-transparent focus:outline-none px-4 text-sm" autocomplete="off">
628
+ <button type="submit" id="chat-submit-btn" class="bg-[var(--primary)] hover:bg-[var(--primary-hover)] text-white p-2 rounded-full transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed disabled:bg-gray-500" title="Send">
629
+ <svg class="w-5 h-5" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M3.49941 11.5556L11.555 3.5L12.4438 4.38889L6.27721 10.5556H21.9994V11.5556H6.27721L12.4438 17.7222L11.555 18.6111L3.49941 10.5556V11.5556Z" transform="rotate(180, 12.7497, 11.0556)" fill="currentColor"></path></svg>
630
+ </button>
631
+ </form>
632
+ </div>
633
+ </div>
634
+
635
+ <div id="upload-container" class="flex-1 flex flex-col items-center justify-center p-8 transition-opacity duration-300">
636
+ <div class="text-center">
637
+ <h1 class="text-5xl font-medium mb-4">Upload docs to chat</h1>
638
+ <div id="drop-zone" class="w-full max-w-lg text-center border-2 border-dashed border-[var(--card-border)] rounded-2xl p-10 transition-all duration-300 cursor-pointer bg-[var(--card)] hover:border-[var(--primary)]">
639
+ <input id="file-upload" type="file" class="hidden" accept=".pdf,.txt,.docx,.jpg,.jpeg,.png" multiple title="input">
640
+ <svg class="mx-auto h-12 w-12 text-gray-400" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" ><path stroke-linecap="round" stroke-linejoin="round" d="M12 16.5V9.75m0 0l3-3m-3 3l-3 3M6.75 19.5a4.5 4.5 0 01-1.41-8.775 5.25 5.25 0 0110.233-2.33 3 3 0 013.758 3.848A3.752 3.752 0 0118 19.5H6.75z"></path></svg>
641
+ <p class="mt-4 text-sm font-medium">Drag & drop files or click to upload</p>
642
+ <p id="file-name" class="mt-2 text-xs text-gray-500"></p>
643
+ </div>
644
+ </div>
645
+ </div>
646
+
647
+ <div id="loading-overlay" class="hidden fixed inset-0 bg-[var(--background)] bg-opacity-80 backdrop-blur-sm flex flex-col items-center justify-center z-50 text-center p-4">
648
+ <div class="loader"></div>
649
+ <p id="loading-text" class="mt-6 text-sm font-medium"></p>
650
+ <p id="loading-subtext" class="mt-2 text-xs text-gray-500 dark:text-gray-400"></p>
651
+ </div>
652
+ </main>
653
+
654
+ <script>
655
+ document.addEventListener('DOMContentLoaded', () => {
656
+ const uploadContainer = document.getElementById('upload-container');
657
+ const chatContainer = document.getElementById('chat-container');
658
+ const dropZone = document.getElementById('drop-zone');
659
+ const fileUploadInput = document.getElementById('file-upload');
660
+ const fileNameSpan = document.getElementById('file-name');
661
+ const loadingOverlay = document.getElementById('loading-overlay');
662
+ const loadingText = document.getElementById('loading-text');
663
+ const loadingSubtext = document.getElementById('loading-subtext');
664
+
665
+ const chatForm = document.getElementById('chat-form');
666
+ const chatInput = document.getElementById('chat-input');
667
+ const chatSubmitBtn = document.getElementById('chat-submit-btn');
668
+ const chatWindow = document.getElementById('chat-window');
669
+ const chatContent = document.getElementById('chat-content');
670
+ const chatFilename = document.getElementById('chat-filename');
671
+
672
+ // --- File Upload Logic ---
673
+ dropZone.addEventListener('click', () => fileUploadInput.click());
674
+
675
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
676
+ dropZone.addEventListener(eventName, preventDefaults, false);
677
+ document.body.addEventListener(eventName, preventDefaults, false);
678
+ });
679
+
680
+ ['dragenter', 'dragover'].forEach(eventName => dropZone.classList.add('drop-zone--over'));
681
+ ['dragleave', 'drop'].forEach(eventName => dropZone.classList.remove('drop-zone--over'));
682
+
683
+ dropZone.addEventListener('drop', (e) => {
684
+ const files = e.dataTransfer.files;
685
+ if (files.length > 0) handleFiles(files);
686
+ });
687
+
688
+ fileUploadInput.addEventListener('change', (e) => {
689
+ if (e.target.files.length > 0) handleFiles(e.target.files);
690
+ });
691
+
692
+ function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); }
693
+
694
+ async function handleFiles(files) {
695
+ const formData = new FormData();
696
+ let fileNames = [];
697
+ for (const file of files) {
698
+ formData.append('file', file);
699
+ fileNames.push(file.name);
700
+ }
701
+
702
+ fileNameSpan.textContent = `Selected: ${fileNames.join(', ')}`;
703
+ await uploadAndProcessFiles(formData, fileNames);
704
+ }
705
+
706
+ async function uploadAndProcessFiles(formData, fileNames) {
707
+ loadingOverlay.classList.remove('hidden');
708
+ loadingText.textContent = `Processing ${fileNames.length} document(s)...`;
709
+ loadingSubtext.textContent = "For large documents or OCR, setup may take a few minutes to build the knowledge base.";
710
+
711
+ try {
712
+ const response = await fetch('/upload', { method: 'POST', body: formData });
713
+ const result = await response.json();
714
+
715
+ if (!response.ok) throw new Error(result.message || 'Unknown error occurred.');
716
+
717
+ chatFilename.textContent = `Chatting with: ${result.filename}`;
718
+ uploadContainer.classList.add('hidden');
719
+ chatContainer.classList.remove('hidden');
720
+ appendMessage("I've analyzed your documents. What would you like to know?", "bot");
721
+
722
+ } catch (error) {
723
+ console.error('Upload error:', error);
724
+ alert(`Error: ${error.message}`);
725
+ } finally {
726
+ loadingOverlay.classList.add('hidden');
727
+ loadingSubtext.textContent = '';
728
+ fileNameSpan.textContent = '';
729
+ fileUploadInput.value = ''; // Reset file input
730
+ }
731
+ }
732
+
733
+ // --- Chat Logic ---
734
+ chatForm.addEventListener('submit', async (e) => {
735
+ e.preventDefault();
736
+ const question = chatInput.value.trim();
737
+ if (!question) return;
738
+
739
+ appendMessage(question, 'user');
740
+ chatInput.value = '';
741
+ chatInput.disabled = true;
742
+ chatSubmitBtn.disabled = true;
743
+
744
+ const typingIndicator = showTypingIndicator();
745
+ let botMessageContainer = null;
746
+ let contentDiv = null;
747
+
748
+ try {
749
+ const response = await fetch('/chat', {
750
+ method: 'POST',
751
+ headers: { 'Content-Type': 'application/json' },
752
+ body: JSON.stringify({ question: question }),
753
+ });
754
+
755
+ if (!response.ok) throw new Error(`Server error: ${response.statusText}`);
756
+
757
+ typingIndicator.remove();
758
+ botMessageContainer = appendMessage('', 'bot');
759
+ contentDiv = botMessageContainer.querySelector('.markdown-content');
760
+
761
+ const reader = response.body.getReader();
762
+ const decoder = new TextDecoder();
763
+ let fullResponse = '';
764
+
765
+ while (true) {
766
+ const { value, done } = await reader.read();
767
+ if (done) break;
768
+
769
+ fullResponse += decoder.decode(value, { stream: true });
770
+ contentDiv.innerHTML = marked.parse(fullResponse);
771
+ scrollToBottom();
772
+ }
773
+ contentDiv.querySelectorAll('pre').forEach(addCopyButton);
774
+
775
+ addTextToSpeechControls(botMessageContainer, fullResponse);
776
+
777
+ } catch (error) {
778
+ console.error('Chat error:', error);
779
+ if (typingIndicator) typingIndicator.remove();
780
+ if (contentDiv) {
781
+ contentDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
782
+ } else {
783
+ appendMessage(`Error: ${error.message}`, 'bot');
784
+ }
785
+ } finally {
786
+ chatInput.disabled = false;
787
+ chatSubmitBtn.disabled = false;
788
+ chatInput.focus();
789
+ }
790
+ });
791
+
792
+ // --- UI Helper Functions ---
793
+
794
+ function appendMessage(text, sender) {
795
+ const messageWrapper = document.createElement('div');
796
+ messageWrapper.className = `flex items-start gap-4`;
797
+
798
+ const iconSVG = sender === 'user'
799
+ ? `<div class="bg-blue-100 dark:bg-gray-700 p-2.5 rounded-full flex-shrink-0 mt-1"><svg class="w-5 h-5 text-blue-600 dark:text-blue-300" viewBox="0 0 24 24"><path fill="currentColor" d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"></path></svg></div>`
800
+ : `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
801
+
802
+ const messageBubble = document.createElement('div');
803
+ messageBubble.className = `flex-1 pt-1`;
804
+
805
+ const senderName = document.createElement('p');
806
+ senderName.className = 'font-medium text-sm mb-1';
807
+ senderName.textContent = sender === 'user' ? 'You' : 'CogniChat';
808
+
809
+ const contentDiv = document.createElement('div');
810
+ contentDiv.className = 'text-base markdown-content';
811
+ contentDiv.innerHTML = marked.parse(text);
812
+
813
+ const controlsContainer = document.createElement('div');
814
+ controlsContainer.className = 'tts-controls mt-2';
815
+
816
+ messageBubble.appendChild(senderName);
817
+ messageBubble.appendChild(contentDiv);
818
+ messageBubble.appendChild(controlsContainer);
819
+ messageWrapper.innerHTML = iconSVG;
820
+ messageWrapper.appendChild(messageBubble);
821
+
822
+ chatContent.appendChild(messageWrapper);
823
+ scrollToBottom();
824
+
825
+ return messageBubble;
826
+ }
827
+
828
+ function showTypingIndicator() {
829
+ const indicatorWrapper = document.createElement('div');
830
+ indicatorWrapper.className = `flex items-start gap-4`;
831
+ indicatorWrapper.id = 'typing-indicator';
832
+
833
+ const iconSVG = `<div class="bg-gray-200 dark:bg-gray-700 rounded-full flex-shrink-0 mt-1 text-xl flex items-center justify-center w-10 h-10">✨</div>`;
834
+
835
+ const messageBubble = document.createElement('div');
836
+ messageBubble.className = 'flex-1 pt-1';
837
+
838
+ const senderName = document.createElement('p');
839
+ senderName.className = 'font-medium text-sm mb-1';
840
+ senderName.textContent = 'CogniChat is thinking...';
841
+
842
+ const indicator = document.createElement('div');
843
+ indicator.className = 'typing-indicator';
844
+ indicator.innerHTML = '<span></span><span></span><span></span>';
845
+
846
+ messageBubble.appendChild(senderName);
847
+ messageBubble.appendChild(indicator);
848
+ indicatorWrapper.innerHTML = iconSVG;
849
+ indicatorWrapper.appendChild(messageBubble);
850
+
851
+ chatContent.appendChild(indicatorWrapper);
852
+ scrollToBottom();
853
+
854
+ return indicatorWrapper;
855
+ }
856
+
857
+ function scrollToBottom() {
858
+ chatWindow.scrollTo({
859
+ top: chatWindow.scrollHeight,
860
+ behavior: 'smooth'
861
+ });
862
+ }
863
+
864
+ function addCopyButton(pre) {
865
+ const button = document.createElement('button');
866
+ button.className = 'copy-code-btn';
867
+ button.textContent = 'Copy';
868
+ pre.appendChild(button);
869
+
870
+ button.addEventListener('click', () => {
871
+ const code = pre.querySelector('code').innerText;
872
+ navigator.clipboard.writeText(code).then(() => {
873
+ button.textContent = 'Copied!';
874
+ setTimeout(() => button.textContent = 'Copy', 2000);
875
+ });
876
+ });
877
+ }
878
+
879
+ // ============================ MODIFICATIONS START ==============================
880
+ let currentAudio = null;
881
+ let currentPlayingButton = null;
882
+
883
+ const playIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>`;
884
+ const pauseIconSVG = `<svg class="w-5 h-5" fill="currentColor" viewBox="0 0 24 24"><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>`;
885
+
886
+ function addTextToSpeechControls(messageBubble, text) {
887
+ const ttsControls = messageBubble.querySelector('.tts-controls');
888
+ if (text.trim().length > 0) {
889
+ const speakButton = document.createElement('button');
890
+ // --- STYLING CHANGE HERE: Brighter blue color for better visibility ---
891
+ speakButton.className = 'speak-btn px-3 py-1.5 bg-blue-600 text-white rounded-full text-sm font-medium hover:bg-blue-700 transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed';
892
+ speakButton.title = 'Listen to this message';
893
+ // --- EMOJI ADDED ---
894
+ speakButton.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
895
+ ttsControls.appendChild(speakButton);
896
+ speakButton.addEventListener('click', () => handleTTS(text, speakButton));
897
+ }
898
+ }
899
+
900
+ // --- BUG FIX: Reworked the entire function for correct pause/resume/stop logic ---
901
+ async function handleTTS(text, button) {
902
+ // Case 1: The clicked button is already playing or paused.
903
+ if (button === currentPlayingButton) {
904
+ if (currentAudio && !currentAudio.paused) { // If playing, pause it.
905
+ currentAudio.pause();
906
+ button.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
907
+ } else if (currentAudio && currentAudio.paused) { // If paused, resume it.
908
+ currentAudio.play();
909
+ button.innerHTML = `🔊 ${pauseIconSVG} <span>Pause</span>`;
910
+ }
911
+ return;
912
+ }
913
+
914
+ // Case 2: A new button is clicked. Stop any other audio.
915
+ if (currentAudio) {
916
+ currentAudio.pause();
917
+ }
918
+ resetAllSpeakButtons();
919
+
920
+ currentPlayingButton = button;
921
+ button.innerHTML = `<div class="tts-button-loader"></div> <span>Loading...</span>`;
922
+ button.disabled = true;
923
+
924
+ try {
925
+ const response = await fetch('/tts', {
926
+ method: 'POST',
927
+ headers: { 'Content-Type': 'application/json' },
928
+ body: JSON.stringify({ text: text })
929
+ });
930
+ if (!response.ok) throw new Error('Failed to generate audio.');
931
+
932
+ const blob = await response.blob();
933
+ const audioUrl = URL.createObjectURL(blob);
934
+ currentAudio = new Audio(audioUrl);
935
+
936
+ currentAudio.play().catch(e => { throw e; });
937
+ button.innerHTML = `🔊 ${pauseIconSVG} <span>Pause</span>`;
938
+
939
+ currentAudio.onended = () => {
940
+ button.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
941
+ currentAudio = null;
942
+ currentPlayingButton = null;
943
+ };
944
+
945
+ currentAudio.onerror = (e) => {
946
+ console.error('Audio playback error:', e);
947
+ throw new Error('Could not play the generated audio.');
948
+ };
949
+
950
+ } catch (error) {
951
+ console.error('TTS Error:', error);
952
+ alert('Failed to play audio. Please try again.');
953
+ resetAllSpeakButtons(); // Reset state on error
954
+ } finally {
955
+ button.disabled = false;
956
+ }
957
+ }
958
+
959
+ function resetAllSpeakButtons() {
960
+ document.querySelectorAll('.speak-btn').forEach(btn => {
961
+ btn.innerHTML = `🔊 ${playIconSVG} <span>Listen</span>`;
962
+ btn.disabled = false;
963
+ });
964
+ if (currentAudio) {
965
+ currentAudio.pause();
966
+ currentAudio = null;
967
+ }
968
+ currentPlayingButton = null;
969
+ }
970
+ // ============================ MODIFICATIONS END ==============================
971
+ });
972
+ </script>
973
+ </body>
974
+ </html>
test_dependencies.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ print("CogniChat Dependencies & PDF Handling Test")
5
+
6
+ # Test imports
7
+ try:
8
+ print("\nTesting core imports...")
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_community.retrievers import BM25Retriever
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain_core.documents import Document
13
+ print("Core LangChain imports successful!")
14
+
15
+ except ImportError as e:
16
+ print(f"Import error: {e}")
17
+ if "rank_bm25" in str(e):
18
+ print("Missing dependency: pip install rank-bm25==0.2.2")
19
+ sys.exit(1)
20
+ try:
21
+ print("\nTesting PDF loading capabilities...")
22
+ try:
23
+ from langchain_community.document_loaders import PyPDFLoader
24
+ print("PyPDFLoader available")
25
+ except ImportError:
26
+ print("PyPDFLoader not available")
27
+
28
+ try:
29
+ import fitz
30
+ print("PyMuPDF (fitz) available - can handle corrupted PDFs")
31
+ except ImportError:
32
+ print("PyMuPDF (fitz) not available")
33
+
34
+ try:
35
+ import pdfplumber
36
+ print("pdfplumber available - additional PDF parsing method")
37
+ except ImportError:
38
+ print("pdfplumber not available")
39
+
40
+ except Exception as e:
41
+ print(f"Error testing PDF capabilities: {e}")
42
+ try:
43
+ print("\nTesting BM25 Retriever...")
44
+
45
+ test_docs = [
46
+ Document(page_content="This is the first test document about machine learning."),
47
+ Document(page_content="This is the second document discussing natural language processing."),
48
+ Document(page_content="The third document covers artificial intelligence topics."),
49
+ ]
50
+
51
+ bm25_retriever = BM25Retriever.from_documents(test_docs)
52
+ bm25_retriever.k = 2
53
+ query = "machine learning"
54
+ results = bm25_retriever.get_relevant_documents(query)
55
+ print(f"BM25 retriever created and tested successfully!")
56
+ print(f"Retrieved {len(results)} documents for query: '{query}'")
57
+
58
+ except Exception as e:
59
+ print(f"✗ Error testing BM25 retriever: {e}")
60
+ import traceback
61
+ traceback.print_exc()
62
+ sys.exit(1)
63
+
64
+ print("\nAll tests completed successfully!")
65
+ print("\nThe application should now handle:")
66
+ print(" • Regular file uploads and processing")
67
+ print(" • Corrupted PDF files with multiple fallback methods")
68
+ print(" • BM25 and FAISS hybrid retrieval")
69
+ print(" • Proper error messages for failed file processing")
70
+ print("\nMake sure to install all dependencies with:")
71
+ print(" pip install -r requirements.txt")
72
+
73
+ print("\nKey Dependencies Added/Updated")
74
+ print(" • rank-bm25==0.2.2 (for BM25 retrieval)")
75
+ print(" • pymupdf==1.23.26 (PDF fallback method)")
76
+ print(" • pdfplumber==0.10.3 (additional PDF parsing)")
test_hf_spaces_session.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ BASE_URL = "https://huggingface.co/spaces/Zeri00/Cogni-chat-document-reader"
4
+
5
+ def test_endpoints():
6
+ """Test the debug and session endpoints to understand the issue."""
7
+
8
+ print("CogniChat HF Spaces Diagnostic\n")
9
+
10
+ # Test 1: Check debug endpoint
11
+ print("1. Testing /debug endpoint...")
12
+ try:
13
+ response = requests.get(f"{BASE_URL}/debug")
14
+ if response.status_code == 200:
15
+ data = response.json()
16
+ print(" Debug endpoint working")
17
+ print(f" Environment: {data.get('environment')}")
18
+ print(f" GROQ API Key: {'Set' if data.get('groq_api_key_set') else 'NOT SET'}")
19
+ print(f" Sessions count: {data.get('sessions_count')}")
20
+ print(f" Upload folder: {data.get('upload_folder')}")
21
+ print(f" Upload folder writable: {data.get('upload_folder_writable')}")
22
+ print(f" Flask session ID: {data.get('flask_session_id')}")
23
+ print(f" Session keys: {data.get('flask_session_keys')}")
24
+ else:
25
+ print(f"Debug endpoint failed: {response.status_code}")
26
+ except Exception as e:
27
+ print(f"Error accessing debug endpoint: {e}")
28
+
29
+ print()
30
+
31
+ print("2. Testing /test-session endpoint...")
32
+ try:
33
+ session = requests.Session()
34
+ response = session.post(f"{BASE_URL}/test-session")
35
+ if response.status_code == 200:
36
+ data = response.json()
37
+ print("Session write working")
38
+ print(f"Test key: {data.get('test_key')}")
39
+ print(f"Session keys: {data.get('session_keys')}")
40
+ else:
41
+ print(f"Session write failed: {response.status_code}")
42
+
43
+ response = session.get(f"{BASE_URL}/test-session")
44
+ if response.status_code == 200:
45
+ data = response.json()
46
+ print("Session read working")
47
+ print(f" Test key persisted: {data.get('test_key')}")
48
+ print(f" Has session data: {data.get('has_session_data')}")
49
+
50
+ if not data.get('test_key'):
51
+ print("WARNING: Sessions are not persisting between requests!")
52
+ print(" This is likely the cause of the 400 chat error.")
53
+ else:
54
+ print(f"Session read failed: {response.status_code}")
55
+
56
+ except Exception as e:
57
+ print(f"Error testing sessions: {e}")
58
+
59
+ print()
60
+
61
+ # Test 3: Check if we can find any existing sessions
62
+ print("3. Checking for existing RAG sessions...")
63
+ try:
64
+ response = requests.get(f"{BASE_URL}/debug")
65
+ if response.status_code == 200:
66
+ data = response.json()
67
+ session_ids = data.get('session_ids', [])
68
+ if session_ids:
69
+ print(f"Found {len(session_ids)} existing RAG sessions")
70
+ print(f" Session IDs: {session_ids[:3]}{'...' if len(session_ids) > 3 else ''}")
71
+ else:
72
+ print("No RAG sessions found (normal if no documents were uploaded)")
73
+
74
+ except Exception as e:
75
+ print(f"Error checking RAG sessions: {e}")
76
+
77
+ print()
78
+ print("Diagnosis Complete")
79
+ print()
80
+ print("LIKELY ISSUE:")
81
+ print("If sessions are not persisting, this is a common issue in HF Spaces")
82
+ print("where Flask sessions don't work properly across requests.")
83
+ print()
84
+ print("SOLUTION:")
85
+ print("We need to modify the app to use a different session storage method")
86
+ print("or pass session ID through request body instead of Flask sessions.")
87
+
88
+ if __name__ == "__main__":
89
+ print("Before running this script:")
90
+ print("1. Update BASE_URL with your actual HF Spaces URL")
91
+ print("2. Make sure your Space is running")
92
+ print("3. Optionally upload a document first")
93
+ print()
94
+
95
+ print("Update the BASE_URL variable above and uncomment the test_endpoints() call")
test_upload_permissions.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ Test script to verify upload folder permissions and file operations.
4
+ """
5
+ import os
6
+ import tempfile
7
+ from pathlib import Path
8
+
9
+ print("Upload Folder Permission Test")
10
+
11
+ # Detect environment
12
+ is_hf_spaces = bool(os.getenv("SPACE_ID") or os.getenv("SPACES_ZERO_GPU"))
13
+ print(f"Environment: {'Hugging Face Spaces' if is_hf_spaces else 'Local Development'}")
14
+
15
+ # Test different upload folder configurations
16
+ test_folders = [
17
+ 'uploads', # Relative path (will fail in HF Spaces)
18
+ './uploads', # Current directory relative path
19
+ '/tmp/uploads', # Recommended for HF Spaces
20
+ '/tmp/cognichat_uploads' # Alternative temp location
21
+ ]
22
+
23
+ print("\nTesting Upload Folder Options")
24
+
25
+ for folder in test_folders:
26
+ print(f"\nTesting: {folder}")
27
+
28
+ try:
29
+ # Try to create the directory
30
+ os.makedirs(folder, exist_ok=True)
31
+ print(f"Directory created/exists")
32
+
33
+ # Test write permissions
34
+ test_file = os.path.join(folder, 'test_write.txt')
35
+ with open(test_file, 'w') as f:
36
+ f.write('test content')
37
+ print(f"Write permission verified")
38
+
39
+ # Test read permissions
40
+ with open(test_file, 'r') as f:
41
+ content = f.read()
42
+ print(f"Read permission verified")
43
+
44
+ # Clean up test file
45
+ os.remove(test_file)
46
+ print(f" File deletion works")
47
+
48
+ # Get absolute path
49
+ abs_path = os.path.abspath(folder)
50
+ print(f"Full path: {abs_path}")
51
+ print(f"Writable: {os.access(folder, os.W_OK)}")
52
+
53
+ except PermissionError as e:
54
+ print(f"Permission denied: {e}")
55
+ except Exception as e:
56
+ print(f"Error: {e}")
57
+
58
+ # Recommended configuration
59
+ print(f"\nRecommended Configuration")
60
+ if is_hf_spaces:
61
+ recommended_folder = '/tmp/uploads'
62
+ print(f"For Hugging Face Spaces: {recommended_folder}")
63
+ else:
64
+ recommended_folder = 'uploads'
65
+ print(f"For local development: {recommended_folder}")
66
+
67
+ print(f"\nUse this in your Flask app:")
68
+ print(f"app.config['UPLOAD_FOLDER'] = '{recommended_folder}'")
69
+
70
+ # Test the current working directory permissions
71
+ print(f"\nCurrent Directory Info")
72
+ cwd = os.getcwd()
73
+ print(f"Current working directory: {cwd}")
74
+ print(f"CWD is writable: {os.access(cwd, os.W_OK)}")
75
+ print(f"\nPath Environment Variables")
76
+ path_vars = ['HOME', 'TMPDIR', 'TEMP', 'TMP', 'SPACE_ID', 'SPACES_ZERO_GPU']
77
+ for var in path_vars:
78
+ value = os.getenv(var)
79
+ if value:
80
+ print(f"{var}: {value}")
81
+
82
+ print(f"\nTest Complete")
verify_hf_spaces_ready.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Verification script to check if CogniChat is ready for HuggingFace Spaces deployment.
4
+ Run this before deploying to catch any configuration issues.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ def print_header(text):
12
+ """Print a formatted header."""
13
+ print(f"\n{'='*60}")
14
+ print(f" {text}")
15
+ print(f"{'='*60}")
16
+
17
+ def print_check(condition, message):
18
+ """Print a check result."""
19
+ status = "PASS" if condition else "FAIL"
20
+ print(f"{status}: {message}")
21
+ return condition
22
+
23
+ def verify_files_exist():
24
+ """Verify all required files exist."""
25
+ print_header("1. Checking Required Files")
26
+
27
+ required_files = [
28
+ 'app.py',
29
+ 'rag_processor.py',
30
+ 'Dockerfile',
31
+ 'requirements.txt',
32
+ 'templates/index.html'
33
+ ]
34
+
35
+ all_exist = True
36
+ for file in required_files:
37
+ exists = Path(file).exists()
38
+ all_exist = all_exist and print_check(exists, f"File exists: {file}")
39
+
40
+ return all_exist
41
+
42
+ def verify_upload_folder_config():
43
+ """Verify upload folder configuration in app.py."""
44
+ print_header("2. Checking Upload Folder Configuration")
45
+
46
+ with open('app.py', 'r') as f:
47
+ app_content = f.read()
48
+
49
+ checks = [
50
+ ('SPACE_ID' in app_content, "Detects SPACE_ID environment variable"),
51
+ ("'/tmp/uploads'" in app_content, "Uses /tmp/uploads for HF Spaces"),
52
+ ('is_hf_spaces' in app_content, "Has HF Spaces detection logic"),
53
+ ('os.makedirs(app.config' in app_content, "Creates upload directory"),
54
+ ]
55
+
56
+ all_pass = True
57
+ for condition, message in checks:
58
+ all_pass = all_pass and print_check(condition, message)
59
+
60
+ return all_pass
61
+
62
+ def verify_session_management():
63
+ """Verify session management implementation."""
64
+ print_header("3. Checking Session Management")
65
+
66
+ # Check backend
67
+ with open('app.py', 'r') as f:
68
+ app_content = f.read()
69
+
70
+ backend_checks = [
71
+ ("session['session_id']" in app_content, "Stores session_id in Flask session"),
72
+ ("'session_id': session_id" in app_content, "Returns session_id in upload response"),
73
+ ("data.get('session_id')" in app_content, "Accepts session_id from request body"),
74
+ ]
75
+
76
+ # Check frontend
77
+ with open('templates/index.html', 'r') as f:
78
+ html_content = f.read()
79
+
80
+ frontend_checks = [
81
+ ('sessionStorage.setItem' in html_content, "Frontend stores session_id"),
82
+ ('sessionStorage.getItem' in html_content, "Frontend retrieves session_id"),
83
+ ('requestBody.session_id' in html_content, "Frontend sends session_id in chat"),
84
+ ]
85
+
86
+ all_pass = True
87
+ print(" Backend Implementation:")
88
+ for condition, message in backend_checks:
89
+ all_pass = all_pass and print_check(condition, f" {message}")
90
+
91
+ print("\n Frontend Implementation:")
92
+ for condition, message in frontend_checks:
93
+ all_pass = all_pass and print_check(condition, f" {message}")
94
+
95
+ return all_pass
96
+
97
+ def verify_error_handling():
98
+ """Verify robust error handling."""
99
+ print_header("4. Checking Error Handling")
100
+
101
+ with open('app.py', 'r') as f:
102
+ app_content = f.read()
103
+
104
+ checks = [
105
+ ('try:' in app_content and 'except' in app_content, "Has try/except blocks"),
106
+ ('load_pdf_with_fallback' in app_content, "Has PDF fallback loading"),
107
+ ('failed_files' in app_content, "Tracks failed file uploads"),
108
+ ('fallback_dir' in app_content, "Has cache directory fallbacks"),
109
+ ]
110
+
111
+ all_pass = True
112
+ for condition, message in checks:
113
+ all_pass = all_pass and print_check(condition, message)
114
+
115
+ return all_pass
116
+
117
+ def verify_environment_variables():
118
+ """Check for environment variable handling."""
119
+ print_header("5. Checking Environment Variables")
120
+
121
+ with open('app.py', 'r') as f:
122
+ app_content = f.read()
123
+
124
+ with open('rag_processor.py', 'r') as f:
125
+ rag_content = f.read()
126
+
127
+ checks = [
128
+ ('GROQ_API_KEY' in rag_content, "RAG processor checks GROQ_API_KEY"),
129
+ ('SPACE_ID' in app_content, "App detects HF Spaces environment"),
130
+ ('HF_HOME' in app_content, "Sets HuggingFace cache paths"),
131
+ ('load_dotenv()' in rag_content, "Loads .env file for local dev"),
132
+ ]
133
+
134
+ all_pass = True
135
+ for condition, message in checks:
136
+ all_pass = all_pass and print_check(condition, message)
137
+
138
+ # Check if .env.example exists
139
+ all_pass = all_pass and print_check(
140
+ Path('.env.example').exists() or Path('README.md').exists(),
141
+ "Has documentation for environment variables"
142
+ )
143
+
144
+ return all_pass
145
+
146
+ def verify_dockerfile():
147
+ """Verify Dockerfile configuration."""
148
+ print_header("6. Checking Dockerfile")
149
+
150
+ if not Path('Dockerfile').exists():
151
+ print_check(False, "Dockerfile exists")
152
+ return False
153
+
154
+ with open('Dockerfile', 'r') as f:
155
+ dockerfile_content = f.read()
156
+
157
+ checks = [
158
+ ('FROM python' in dockerfile_content, "Uses Python base image"),
159
+ ('WORKDIR /app' in dockerfile_content, "Sets working directory"),
160
+ ('EXPOSE 7860' in dockerfile_content, "Exposes port 7860 (HF requirement)"),
161
+ ('appuser' in dockerfile_content, "Runs as non-root user"),
162
+ ('CMD' in dockerfile_content or 'ENTRYPOINT' in dockerfile_content, "Has startup command"),
163
+ ]
164
+
165
+ all_pass = True
166
+ for condition, message in checks:
167
+ all_pass = all_pass and print_check(condition, message)
168
+
169
+ return all_pass
170
+
171
+ def verify_requirements():
172
+ """Verify requirements.txt has all dependencies."""
173
+ print_header("7. Checking Dependencies")
174
+
175
+ with open('requirements.txt', 'r') as f:
176
+ requirements = f.read().lower()
177
+
178
+ critical_deps = [
179
+ 'flask',
180
+ 'langchain',
181
+ 'groq',
182
+ 'faiss',
183
+ 'sentence-transformers',
184
+ 'pypdf',
185
+ 'gtts',
186
+ 'rank-bm25'
187
+ ]
188
+
189
+ all_pass = True
190
+ for dep in critical_deps:
191
+ found = dep in requirements
192
+ all_pass = all_pass and print_check(found, f"Has dependency: {dep}")
193
+
194
+ return all_pass
195
+
196
+ def verify_no_duplicates():
197
+ """Check for duplicate content in index.html."""
198
+ print_header("8. Checking for Duplicates")
199
+
200
+ with open('templates/index.html', 'r') as f:
201
+ html_content = f.read()
202
+
203
+ # Count DOCTYPE declarations
204
+ doctype_count = html_content.count('<!DOCTYPE html>')
205
+ checks = [
206
+ (doctype_count == 1, f"Single HTML document (found {doctype_count} DOCTYPE declarations)"),
207
+ (html_content.count('</html>') == 1, "Single closing </html> tag"),
208
+ (html_content.count('uploadAndProcessFiles') <= 2, "No duplicate JavaScript functions"),
209
+ ]
210
+
211
+ all_pass = True
212
+ for condition, message in checks:
213
+ all_pass = all_pass and print_check(condition, message)
214
+
215
+ return all_pass
216
+
217
+ def main():
218
+ """Run all verification checks."""
219
+ print("\n" + "="*60)
220
+ print(" HuggingFace Spaces Readiness Check for CogniChat")
221
+ print("="*60)
222
+
223
+ checks = [
224
+ verify_files_exist(),
225
+ verify_upload_folder_config(),
226
+ verify_session_management(),
227
+ verify_error_handling(),
228
+ verify_environment_variables(),
229
+ verify_dockerfile(),
230
+ verify_requirements(),
231
+ verify_no_duplicates(),
232
+ ]
233
+
234
+ print_header("Summary")
235
+
236
+ passed = sum(checks)
237
+ total = len(checks)
238
+
239
+ if all(checks):
240
+ print(f"\n✅ ALL CHECKS PASSED ({passed}/{total})")
241
+ print("\n🚀 Your application is ready for HuggingFace Spaces deployment!")
242
+ print("\nNext steps:")
243
+ print("1. Go to https://huggingface.co/new-space")
244
+ print("2. Select 'Docker' as SDK")
245
+ print("3. Upload all project files")
246
+ print("4. Set GROQ_API_KEY in Space secrets")
247
+ print("5. Wait for build to complete")
248
+ return 0
249
+ else:
250
+ print(f"\n SOME CHECKS FAILED ({total - passed}/{total} issues)")
251
+ print("\n Please fix the issues above before deploying.")
252
+ print("\nFor detailed guidance, see:")
253
+ print("- HF_SPACES_FILE_STORAGE_GUIDE.md")
254
+ print("- DEPLOYMENT.md")
255
+ print("- HF_SPACES_CHECKLIST.md")
256
+ return 1
257
+
258
+ if __name__ == '__main__':
259
+ exit_code = main()
260
+ sys.exit(exit_code)