Spaces:

BinKhoaLe1812
/

EdSummariser

Sleeping

App Files Files Community

LiamKhoaLe commited on Aug 28

Commit

5c627aa

1 Parent(s): f8a7645

Upd proportional progress bar

Browse files

Files changed (3) hide show

app.py +1 -1
static/script.js +28 -16
utils/summarizer.py +49 -14

app.py CHANGED Viewed

@@ -423,7 +423,7 @@ async def upload_files(
                 # File-level summary (cheap extractive)
                 full_text = "\n\n".join(p.get("text", "") for p in pages)
-                file_summary = cheap_summarize(full_text, max_sentences=6)
                 rag.upsert_file_summary(user_id=user_id, project_id=project_id, filename=fname, summary=file_summary)
                 logger.info(f"[{job_id}] Completed {fname}")
             except Exception as e:

                 # File-level summary (cheap extractive)
                 full_text = "\n\n".join(p.get("text", "") for p in pages)
+                file_summary = await cheap_summarize(full_text, max_sentences=6)
                 rag.upsert_file_summary(user_id=user_id, project_id=project_id, filename=fname, summary=file_summary)
                 logger.info(f"[{job_id}] Completed {fname}")
             except Exception as e:

static/script.js CHANGED Viewed

@@ -201,12 +201,12 @@
       if (response.ok) {
         updateProgressStatus('Upload successful! Processing documents...');
-        updateProgressFill(50);
         logProgress(`Job ID: ${data.job_id}`);
         logProgress('Files uploaded successfully');
-        // Simulate processing progress
-        simulateProcessing();
       } else {
         throw new Error(data.detail || 'Upload failed');
       }
@@ -222,7 +222,7 @@
   function showUploadProgress() {
     uploadProgress.style.display = 'block';
-    updateProgressStatus('Uploading files...');
     updateProgressFill(0);
     progressLog.innerHTML = '';
   }
@@ -248,23 +248,35 @@
     progressLog.scrollTop = progressLog.scrollHeight;
   }
-  function simulateProcessing() {
-    let progress = 50;
-    const interval = setInterval(() => {
-      progress += Math.random() * 10;
-      if (progress >= 100) {
-        progress = 100;
-        clearInterval(interval);
         updateProgressStatus('Processing complete!');
         logProgress('All documents processed successfully');
         logProgress('You can now start chatting with your documents');
-        setTimeout(() => hideUploadProgress(), 2000);
         enableChat();
-      } else {
-        updateProgressFill(progress);
-        updateProgressStatus(`Processing documents... ${Math.round(progress)}%`);
       }
-    }, 500);
   }
   function enableChat() {

       if (response.ok) {
         updateProgressStatus('Upload successful! Processing documents...');
+        updateProgressFill(0);
         logProgress(`Job ID: ${data.job_id}`);
         logProgress('Files uploaded successfully');
+        // Deterministic per-file progression
+        simulateProcessing(selectedFiles.length);
       } else {
         throw new Error(data.detail || 'Upload failed');
       }
   function showUploadProgress() {
     uploadProgress.style.display = 'block';
+    updateProgressStatus('Uploading files... (DO NOT REFRESH)');
     updateProgressFill(0);
     progressLog.innerHTML = '';
   }
     progressLog.scrollTop = progressLog.scrollHeight;
   }
+  function simulateProcessing(totalFiles) {
+    // Split 100% evenly across files. Round to nearest integer.
+    let completed = 0;
+    const step = Math.round(100 / Math.max(totalFiles, 1));
+    const targets = Array.from({ length: totalFiles }, (_, i) => Math.min(100, Math.round(((i + 1) / totalFiles) * 100)));
+    function advance() {
+      if (completed >= totalFiles) {
+        updateProgressFill(100);
         updateProgressStatus('Processing complete!');
         logProgress('All documents processed successfully');
         logProgress('You can now start chatting with your documents');
+        setTimeout(() => hideUploadProgress(), 1500);
         enableChat();
+        return;
       }
+      const currentTarget = targets[completed];
+      updateProgressFill(currentTarget);
+      updateProgressStatus(`Processing documents... ${currentTarget}%`);
+      logProgress(`Finished processing file ${completed + 1}/${totalFiles}`);
+      completed += 1;
+      // Wait a short time before next step (simulated, since backend is background)
+      setTimeout(advance, 1200);
+    }
+    // kick off first step after a short delay to show feedback
+    setTimeout(advance, 800);
   }
   function enableChat() {

utils/summarizer.py CHANGED Viewed

@@ -1,19 +1,54 @@
 from typing import List
-from sumy.parsers.plaintext import PlaintextParser
-from sumy.nlp.tokenizers import Tokenizer
-from sumy.summarizers.lex_rank import LexRankSummarizer
 from .logger import get_logger
 logger = get_logger("SUM", __name__)
-def cheap_summarize(text: str, max_sentences: int = 3) -> str:
-    try:
-        parser = PlaintextParser.from_string(text, Tokenizer("english"))
-        summarizer = LexRankSummarizer()
-        sentences = summarizer(parser.document, max_sentences)
-        return " ".join(str(s) for s in sentences)
-    except Exception:
-        # Fallback: naive first N sentences
-        logger.warning("sumy unavailable or failed; using naive summarization fallback.")
-        parts = text.split(". ")
-        return ". ".join(parts[:max_sentences])

 from typing import List
+import os
+import asyncio
 from .logger import get_logger
+from utils.rotator import robust_post_json
 logger = get_logger("SUM", __name__)
+async def llama_summarize(text: str, max_sentences: int = 3) -> str:
+  """Summarize text using NVIDIA Llama via /v1/chat/completions. Returns plain text."""
+  text = (text or "").strip()
+  if not text:
+    return ""
+  model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
+  key = os.getenv("NVIDIA_API_1", "") or os.getenv("NVIDIA_API_KEY", "")
+  if not key:
+    logger.warning("NVIDIA API key not set; returning naive fallback summary")
+    return naive_fallback(text, max_sentences)
+  system_prompt = (
+    "You are a precise summarizer. Produce a concise summary of the user's text. "
+    f"Return about {max_sentences} sentences, no preface, no markdown."
+  )
+  user_prompt = f"Summarize this:\n\n{text}"
+  try:
+    url = "https://integrate.api.nvidia.com/v1/chat/completions"
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
+    payload = {
+      "model": model,
+      "temperature": 0.2,
+      "messages": [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+      ]
+    }
+    # Using rotator helper for retries; if not available, simple fetch could be used
+    data = await robust_post_json(url, headers, payload)
+    content = data["choices"][0]["message"]["content"].strip()
+    return content
+  except Exception as e:
+    logger.warning(f"LLAMA summarization failed: {e}; using fallback")
+    return naive_fallback(text, max_sentences)
+def naive_fallback(text: str, max_sentences: int = 3) -> str:
+  parts = [p.strip() for p in text.split('. ') if p.strip()]
+  return '. '.join(parts[:max_sentences])
+# Backward-compatible name used by app.py
+async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
+  return await llama_summarize(text, max_sentences)