File size: 3,867 Bytes
5c627aa
 
e93c61d
6d2a17c
 
729a1f7
 
 
0a55b55
 
 
5c627aa
e93c61d
5c627aa
0a55b55
 
5c627aa
0a55b55
e93c61d
 
 
0a55b55
e93c61d
5c627aa
 
e93c61d
 
 
 
 
 
e2a914f
e93c61d
 
5c627aa
e93c61d
 
 
 
5c627aa
 
 
 
 
 
 
 
 
 
e93c61d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c627aa
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import asyncio
from typing import List
from utils.logger import get_logger
from utils.api.rotator import robust_post_json, APIKeyRotator

logger = get_logger("SUM", __name__)

# Create a module-level NVIDIA API key rotator (uses NVIDIA_API_1..N)
ROTATOR = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)


async def llama_chat(messages, temperature: float = 0.2) -> str:
  model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
  # Get key via rotator (supports rotation/retries in robust_post_json)
  key = ROTATOR.get_key()
  if not key:
    raise RuntimeError("NVIDIA API key not set (NVIDIA_API_*)")
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
  payload = {"model": model, "temperature": temperature, "messages": messages}
  data = await robust_post_json(url, headers, payload, ROTATOR)
  return data["choices"][0]["message"]["content"].strip()


async def llama_summarize(text: str, max_sentences: int = 3) -> str:
  text = (text or "").strip()
  if not text:
    return ""
  system = (
    "You are a precise summarizer. Produce a clear, faithful summary of the user's text. "
    f"Return ~{max_sentences} sentences, no comments, no preface, no markdown."
  )
  user = f"Summarize this text:\n\n{text}"
  try:
    return await llama_chat([
      {"role": "system", "content": system},
      {"role": "user", "content": user},
    ])
  except Exception as e:
    logger.warning(f"LLAMA summarization failed: {e}; using fallback")
    return naive_fallback(text, max_sentences)


def naive_fallback(text: str, max_sentences: int = 3) -> str:
  parts = [p.strip() for p in text.split('. ') if p.strip()]
  return '. '.join(parts[:max_sentences])


async def summarize_text(text: str, max_sentences: int = 6, chunk_size: int = 2500) -> str:
  """Hierarchical summarization for long texts using NVIDIA Llama."""
  if not text:
    return ""
  if len(text) <= chunk_size:
    return await llama_summarize(text, max_sentences=max_sentences)
  # Split into chunks on paragraph boundaries if possible
  paragraphs = text.split('\n\n')
  chunks: List[str] = []
  buf = []
  total = 0
  for p in paragraphs:
    if total + len(p) > chunk_size and buf:
      chunks.append('\n\n'.join(buf))
      buf, total = [], 0
    buf.append(p)
    total += len(p)
  if buf:
    chunks.append('\n\n'.join(buf))

  partials = []
  for ch in chunks:
    partials.append(await llama_summarize(ch, max_sentences=3))
    await asyncio.sleep(0)
  combined = '\n'.join(partials)
  return await llama_summarize(combined, max_sentences=max_sentences)


async def clean_chunk_text(text: str) -> str:
  """Use NVIDIA LLM to remove headers/footers and personally identifying/institution boilerplate.
  Keep the core academic content intact. Do not remove page numbers or section titles.
  """
  content = (text or "").strip()
  if not content:
    return content
  system = (
    "You are a content cleaner. Remove boilerplate headers/footers like institution names, course codes, student IDs, "
    "emails, author IDs, document footers/headers repeated across pages. Keep headings and the main body content. "
    "Preserve meaningful section titles. Keep pagination references in the natural text if present. Return only cleaned text."
  )
  user = f"Clean this content by removing headers/footers and IDs, keep core content:\n\n{content}"
  try:
    return await llama_chat([
      {"role": "system", "content": system},
      {"role": "user", "content": user},
    ], temperature=0.0)
  except Exception as e:
    logger.warning(f"LLAMA cleaning failed: {e}; returning original text")
    return content


# Backward-compatible name used by app.py
async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
  return await llama_summarize(text, max_sentences)