LiamKhoaLe commited on
Commit
5c627aa
·
1 Parent(s): f8a7645

Upd proportional progress bar

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. static/script.js +28 -16
  3. utils/summarizer.py +49 -14
app.py CHANGED
@@ -423,7 +423,7 @@ async def upload_files(
423
 
424
  # File-level summary (cheap extractive)
425
  full_text = "\n\n".join(p.get("text", "") for p in pages)
426
- file_summary = cheap_summarize(full_text, max_sentences=6)
427
  rag.upsert_file_summary(user_id=user_id, project_id=project_id, filename=fname, summary=file_summary)
428
  logger.info(f"[{job_id}] Completed {fname}")
429
  except Exception as e:
 
423
 
424
  # File-level summary (cheap extractive)
425
  full_text = "\n\n".join(p.get("text", "") for p in pages)
426
+ file_summary = await cheap_summarize(full_text, max_sentences=6)
427
  rag.upsert_file_summary(user_id=user_id, project_id=project_id, filename=fname, summary=file_summary)
428
  logger.info(f"[{job_id}] Completed {fname}")
429
  except Exception as e:
static/script.js CHANGED
@@ -201,12 +201,12 @@
201
 
202
  if (response.ok) {
203
  updateProgressStatus('Upload successful! Processing documents...');
204
- updateProgressFill(50);
205
  logProgress(`Job ID: ${data.job_id}`);
206
  logProgress('Files uploaded successfully');
207
 
208
- // Simulate processing progress
209
- simulateProcessing();
210
  } else {
211
  throw new Error(data.detail || 'Upload failed');
212
  }
@@ -222,7 +222,7 @@
222
 
223
  function showUploadProgress() {
224
  uploadProgress.style.display = 'block';
225
- updateProgressStatus('Uploading files...');
226
  updateProgressFill(0);
227
  progressLog.innerHTML = '';
228
  }
@@ -248,23 +248,35 @@
248
  progressLog.scrollTop = progressLog.scrollHeight;
249
  }
250
 
251
- function simulateProcessing() {
252
- let progress = 50;
253
- const interval = setInterval(() => {
254
- progress += Math.random() * 10;
255
- if (progress >= 100) {
256
- progress = 100;
257
- clearInterval(interval);
 
 
258
  updateProgressStatus('Processing complete!');
259
  logProgress('All documents processed successfully');
260
  logProgress('You can now start chatting with your documents');
261
- setTimeout(() => hideUploadProgress(), 2000);
262
  enableChat();
263
- } else {
264
- updateProgressFill(progress);
265
- updateProgressStatus(`Processing documents... ${Math.round(progress)}%`);
266
  }
267
- }, 500);
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
269
 
270
  function enableChat() {
 
201
 
202
  if (response.ok) {
203
  updateProgressStatus('Upload successful! Processing documents...');
204
+ updateProgressFill(0);
205
  logProgress(`Job ID: ${data.job_id}`);
206
  logProgress('Files uploaded successfully');
207
 
208
+ // Deterministic per-file progression
209
+ simulateProcessing(selectedFiles.length);
210
  } else {
211
  throw new Error(data.detail || 'Upload failed');
212
  }
 
222
 
223
  function showUploadProgress() {
224
  uploadProgress.style.display = 'block';
225
+ updateProgressStatus('Uploading files... (DO NOT REFRESH)');
226
  updateProgressFill(0);
227
  progressLog.innerHTML = '';
228
  }
 
248
  progressLog.scrollTop = progressLog.scrollHeight;
249
  }
250
 
251
+ function simulateProcessing(totalFiles) {
252
+ // Split 100% evenly across files. Round to nearest integer.
253
+ let completed = 0;
254
+ const step = Math.round(100 / Math.max(totalFiles, 1));
255
+ const targets = Array.from({ length: totalFiles }, (_, i) => Math.min(100, Math.round(((i + 1) / totalFiles) * 100)));
256
+
257
+ function advance() {
258
+ if (completed >= totalFiles) {
259
+ updateProgressFill(100);
260
  updateProgressStatus('Processing complete!');
261
  logProgress('All documents processed successfully');
262
  logProgress('You can now start chatting with your documents');
263
+ setTimeout(() => hideUploadProgress(), 1500);
264
  enableChat();
265
+ return;
 
 
266
  }
267
+
268
+ const currentTarget = targets[completed];
269
+ updateProgressFill(currentTarget);
270
+ updateProgressStatus(`Processing documents... ${currentTarget}%`);
271
+ logProgress(`Finished processing file ${completed + 1}/${totalFiles}`);
272
+ completed += 1;
273
+
274
+ // Wait a short time before next step (simulated, since backend is background)
275
+ setTimeout(advance, 1200);
276
+ }
277
+
278
+ // kick off first step after a short delay to show feedback
279
+ setTimeout(advance, 800);
280
  }
281
 
282
  function enableChat() {
utils/summarizer.py CHANGED
@@ -1,19 +1,54 @@
1
  from typing import List
2
- from sumy.parsers.plaintext import PlaintextParser
3
- from sumy.nlp.tokenizers import Tokenizer
4
- from sumy.summarizers.lex_rank import LexRankSummarizer
5
  from .logger import get_logger
 
6
 
7
  logger = get_logger("SUM", __name__)
8
 
9
- def cheap_summarize(text: str, max_sentences: int = 3) -> str:
10
- try:
11
- parser = PlaintextParser.from_string(text, Tokenizer("english"))
12
- summarizer = LexRankSummarizer()
13
- sentences = summarizer(parser.document, max_sentences)
14
- return " ".join(str(s) for s in sentences)
15
- except Exception:
16
- # Fallback: naive first N sentences
17
- logger.warning("sumy unavailable or failed; using naive summarization fallback.")
18
- parts = text.split(". ")
19
- return ". ".join(parts[:max_sentences])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import List
2
+ import os
3
+ import asyncio
 
4
  from .logger import get_logger
5
+ from utils.rotator import robust_post_json
6
 
7
  logger = get_logger("SUM", __name__)
8
 
9
+
10
+ async def llama_summarize(text: str, max_sentences: int = 3) -> str:
11
+ """Summarize text using NVIDIA Llama via /v1/chat/completions. Returns plain text."""
12
+ text = (text or "").strip()
13
+ if not text:
14
+ return ""
15
+ model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
16
+ key = os.getenv("NVIDIA_API_1", "") or os.getenv("NVIDIA_API_KEY", "")
17
+ if not key:
18
+ logger.warning("NVIDIA API key not set; returning naive fallback summary")
19
+ return naive_fallback(text, max_sentences)
20
+
21
+ system_prompt = (
22
+ "You are a precise summarizer. Produce a concise summary of the user's text. "
23
+ f"Return about {max_sentences} sentences, no preface, no markdown."
24
+ )
25
+ user_prompt = f"Summarize this:\n\n{text}"
26
+
27
+ try:
28
+ url = "https://integrate.api.nvidia.com/v1/chat/completions"
29
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
30
+ payload = {
31
+ "model": model,
32
+ "temperature": 0.2,
33
+ "messages": [
34
+ {"role": "system", "content": system_prompt},
35
+ {"role": "user", "content": user_prompt},
36
+ ]
37
+ }
38
+ # Using rotator helper for retries; if not available, simple fetch could be used
39
+ data = await robust_post_json(url, headers, payload)
40
+ content = data["choices"][0]["message"]["content"].strip()
41
+ return content
42
+ except Exception as e:
43
+ logger.warning(f"LLAMA summarization failed: {e}; using fallback")
44
+ return naive_fallback(text, max_sentences)
45
+
46
+
47
+ def naive_fallback(text: str, max_sentences: int = 3) -> str:
48
+ parts = [p.strip() for p in text.split('. ') if p.strip()]
49
+ return '. '.join(parts[:max_sentences])
50
+
51
+
52
+ # Backward-compatible name used by app.py
53
+ async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
54
+ return await llama_summarize(text, max_sentences)