Spaces:
Sleeping
Sleeping
Commit
·
5c627aa
1
Parent(s):
f8a7645
Upd proportional progress bar
Browse files- app.py +1 -1
- static/script.js +28 -16
- utils/summarizer.py +49 -14
app.py
CHANGED
|
@@ -423,7 +423,7 @@ async def upload_files(
|
|
| 423 |
|
| 424 |
# File-level summary (cheap extractive)
|
| 425 |
full_text = "\n\n".join(p.get("text", "") for p in pages)
|
| 426 |
-
file_summary = cheap_summarize(full_text, max_sentences=6)
|
| 427 |
rag.upsert_file_summary(user_id=user_id, project_id=project_id, filename=fname, summary=file_summary)
|
| 428 |
logger.info(f"[{job_id}] Completed {fname}")
|
| 429 |
except Exception as e:
|
|
|
|
| 423 |
|
| 424 |
# File-level summary (cheap extractive)
|
| 425 |
full_text = "\n\n".join(p.get("text", "") for p in pages)
|
| 426 |
+
file_summary = await cheap_summarize(full_text, max_sentences=6)
|
| 427 |
rag.upsert_file_summary(user_id=user_id, project_id=project_id, filename=fname, summary=file_summary)
|
| 428 |
logger.info(f"[{job_id}] Completed {fname}")
|
| 429 |
except Exception as e:
|
static/script.js
CHANGED
|
@@ -201,12 +201,12 @@
|
|
| 201 |
|
| 202 |
if (response.ok) {
|
| 203 |
updateProgressStatus('Upload successful! Processing documents...');
|
| 204 |
-
updateProgressFill(
|
| 205 |
logProgress(`Job ID: ${data.job_id}`);
|
| 206 |
logProgress('Files uploaded successfully');
|
| 207 |
|
| 208 |
-
//
|
| 209 |
-
simulateProcessing();
|
| 210 |
} else {
|
| 211 |
throw new Error(data.detail || 'Upload failed');
|
| 212 |
}
|
|
@@ -222,7 +222,7 @@
|
|
| 222 |
|
| 223 |
function showUploadProgress() {
|
| 224 |
uploadProgress.style.display = 'block';
|
| 225 |
-
updateProgressStatus('Uploading files...');
|
| 226 |
updateProgressFill(0);
|
| 227 |
progressLog.innerHTML = '';
|
| 228 |
}
|
|
@@ -248,23 +248,35 @@
|
|
| 248 |
progressLog.scrollTop = progressLog.scrollHeight;
|
| 249 |
}
|
| 250 |
|
| 251 |
-
function simulateProcessing() {
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
| 258 |
updateProgressStatus('Processing complete!');
|
| 259 |
logProgress('All documents processed successfully');
|
| 260 |
logProgress('You can now start chatting with your documents');
|
| 261 |
-
setTimeout(() => hideUploadProgress(),
|
| 262 |
enableChat();
|
| 263 |
-
|
| 264 |
-
updateProgressFill(progress);
|
| 265 |
-
updateProgressStatus(`Processing documents... ${Math.round(progress)}%`);
|
| 266 |
}
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
}
|
| 269 |
|
| 270 |
function enableChat() {
|
|
|
|
| 201 |
|
| 202 |
if (response.ok) {
|
| 203 |
updateProgressStatus('Upload successful! Processing documents...');
|
| 204 |
+
updateProgressFill(0);
|
| 205 |
logProgress(`Job ID: ${data.job_id}`);
|
| 206 |
logProgress('Files uploaded successfully');
|
| 207 |
|
| 208 |
+
// Deterministic per-file progression
|
| 209 |
+
simulateProcessing(selectedFiles.length);
|
| 210 |
} else {
|
| 211 |
throw new Error(data.detail || 'Upload failed');
|
| 212 |
}
|
|
|
|
| 222 |
|
| 223 |
function showUploadProgress() {
|
| 224 |
uploadProgress.style.display = 'block';
|
| 225 |
+
updateProgressStatus('Uploading files... (DO NOT REFRESH)');
|
| 226 |
updateProgressFill(0);
|
| 227 |
progressLog.innerHTML = '';
|
| 228 |
}
|
|
|
|
| 248 |
progressLog.scrollTop = progressLog.scrollHeight;
|
| 249 |
}
|
| 250 |
|
| 251 |
+
function simulateProcessing(totalFiles) {
|
| 252 |
+
// Split 100% evenly across files. Round to nearest integer.
|
| 253 |
+
let completed = 0;
|
| 254 |
+
const step = Math.round(100 / Math.max(totalFiles, 1));
|
| 255 |
+
const targets = Array.from({ length: totalFiles }, (_, i) => Math.min(100, Math.round(((i + 1) / totalFiles) * 100)));
|
| 256 |
+
|
| 257 |
+
function advance() {
|
| 258 |
+
if (completed >= totalFiles) {
|
| 259 |
+
updateProgressFill(100);
|
| 260 |
updateProgressStatus('Processing complete!');
|
| 261 |
logProgress('All documents processed successfully');
|
| 262 |
logProgress('You can now start chatting with your documents');
|
| 263 |
+
setTimeout(() => hideUploadProgress(), 1500);
|
| 264 |
enableChat();
|
| 265 |
+
return;
|
|
|
|
|
|
|
| 266 |
}
|
| 267 |
+
|
| 268 |
+
const currentTarget = targets[completed];
|
| 269 |
+
updateProgressFill(currentTarget);
|
| 270 |
+
updateProgressStatus(`Processing documents... ${currentTarget}%`);
|
| 271 |
+
logProgress(`Finished processing file ${completed + 1}/${totalFiles}`);
|
| 272 |
+
completed += 1;
|
| 273 |
+
|
| 274 |
+
// Wait a short time before next step (simulated, since backend is background)
|
| 275 |
+
setTimeout(advance, 1200);
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
// kick off first step after a short delay to show feedback
|
| 279 |
+
setTimeout(advance, 800);
|
| 280 |
}
|
| 281 |
|
| 282 |
function enableChat() {
|
utils/summarizer.py
CHANGED
|
@@ -1,19 +1,54 @@
|
|
| 1 |
from typing import List
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
from sumy.summarizers.lex_rank import LexRankSummarizer
|
| 5 |
from .logger import get_logger
|
|
|
|
| 6 |
|
| 7 |
logger = get_logger("SUM", __name__)
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import List
|
| 2 |
+
import os
|
| 3 |
+
import asyncio
|
|
|
|
| 4 |
from .logger import get_logger
|
| 5 |
+
from utils.rotator import robust_post_json
|
| 6 |
|
| 7 |
logger = get_logger("SUM", __name__)
|
| 8 |
|
| 9 |
+
|
| 10 |
+
async def llama_summarize(text: str, max_sentences: int = 3) -> str:
|
| 11 |
+
"""Summarize text using NVIDIA Llama via /v1/chat/completions. Returns plain text."""
|
| 12 |
+
text = (text or "").strip()
|
| 13 |
+
if not text:
|
| 14 |
+
return ""
|
| 15 |
+
model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
|
| 16 |
+
key = os.getenv("NVIDIA_API_1", "") or os.getenv("NVIDIA_API_KEY", "")
|
| 17 |
+
if not key:
|
| 18 |
+
logger.warning("NVIDIA API key not set; returning naive fallback summary")
|
| 19 |
+
return naive_fallback(text, max_sentences)
|
| 20 |
+
|
| 21 |
+
system_prompt = (
|
| 22 |
+
"You are a precise summarizer. Produce a concise summary of the user's text. "
|
| 23 |
+
f"Return about {max_sentences} sentences, no preface, no markdown."
|
| 24 |
+
)
|
| 25 |
+
user_prompt = f"Summarize this:\n\n{text}"
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
url = "https://integrate.api.nvidia.com/v1/chat/completions"
|
| 29 |
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
|
| 30 |
+
payload = {
|
| 31 |
+
"model": model,
|
| 32 |
+
"temperature": 0.2,
|
| 33 |
+
"messages": [
|
| 34 |
+
{"role": "system", "content": system_prompt},
|
| 35 |
+
{"role": "user", "content": user_prompt},
|
| 36 |
+
]
|
| 37 |
+
}
|
| 38 |
+
# Using rotator helper for retries; if not available, simple fetch could be used
|
| 39 |
+
data = await robust_post_json(url, headers, payload)
|
| 40 |
+
content = data["choices"][0]["message"]["content"].strip()
|
| 41 |
+
return content
|
| 42 |
+
except Exception as e:
|
| 43 |
+
logger.warning(f"LLAMA summarization failed: {e}; using fallback")
|
| 44 |
+
return naive_fallback(text, max_sentences)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def naive_fallback(text: str, max_sentences: int = 3) -> str:
|
| 48 |
+
parts = [p.strip() for p in text.split('. ') if p.strip()]
|
| 49 |
+
return '. '.join(parts[:max_sentences])
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Backward-compatible name used by app.py
|
| 53 |
+
async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
|
| 54 |
+
return await llama_summarize(text, max_sentences)
|