File size: 6,011 Bytes
5c627aa
 
e93c61d
6d2a17c
 
8db88dd
729a1f7
 
 
0a55b55
 
 
5c627aa
e93c61d
5c627aa
0a55b55
 
5c627aa
0a55b55
e93c61d
 
 
0a55b55
e93c61d
5c627aa
 
e93c61d
8db88dd
e93c61d
 
 
8db88dd
 
 
 
 
 
 
 
 
 
 
e93c61d
 
e2a914f
e93c61d
 
5c627aa
e93c61d
 
 
 
5c627aa
8db88dd
 
 
 
 
 
 
 
 
 
 
 
 
 
5c627aa
 
 
 
 
 
 
 
e93c61d
8db88dd
e93c61d
 
 
 
8db88dd
e93c61d
 
 
 
 
 
 
 
 
 
 
 
 
 
8db88dd
e93c61d
 
 
 
8db88dd
 
e93c61d
 
 
 
 
4386026
e93c61d
 
 
 
 
 
 
 
 
 
 
 
4386026
 
e93c61d
4386026
e93c61d
 
4386026
 
ef1ba2b
 
 
 
 
 
 
 
 
4386026
ef1ba2b
4386026
ef1ba2b
 
e93c61d
5c627aa
 
8db88dd
5c627aa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import asyncio
from typing import List
from utils.logger import get_logger
from utils.api.rotator import robust_post_json, APIKeyRotator
from utils.api.router import qwen_chat_completion, nvidia_large_chat_completion

logger = get_logger("SUM", __name__)

# Create a module-level NVIDIA API key rotator (uses NVIDIA_API_1..N)
ROTATOR = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)


async def llama_chat(messages, temperature: float = 0.2) -> str:
  model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
  # Get key via rotator (supports rotation/retries in robust_post_json)
  key = ROTATOR.get_key()
  if not key:
    raise RuntimeError("NVIDIA API key not set (NVIDIA_API_*)")
  url = "https://integrate.api.nvidia.com/v1/chat/completions"
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
  payload = {"model": model, "temperature": temperature, "messages": messages}
  data = await robust_post_json(url, headers, payload, ROTATOR)
  return data["choices"][0]["message"]["content"].strip()


async def llama_summarize(text: str, max_sentences: int = 3) -> str:
  """Flexible summarization using NVIDIA Small (Llama) for short text, NVIDIA Large for long context."""
  text = (text or "").strip()
  if not text:
    return ""
  
  # Use NVIDIA Large for long context (>1500 chars), NVIDIA Small for short context
  if len(text) > 1500:
    logger.info(f"[SUMMARIZER] Using NVIDIA Large for long context ({len(text)} chars)")
    return await nvidia_large_summarize(text, max_sentences)
  else:
    logger.info(f"[SUMMARIZER] Using NVIDIA Small for short context ({len(text)} chars)")
    return await nvidia_small_summarize(text, max_sentences)

async def nvidia_small_summarize(text: str, max_sentences: int = 3) -> str:
  """Summarization using NVIDIA Small (Llama) for short text."""
  system = (
    "You are a precise summarizer. Produce a clear, faithful summary of the user's text. "
    f"Return ~{max_sentences} sentences, no comments, no preface, no markdown."
  )
  user = f"Summarize this text:\n\n{text}"
  try:
    return await llama_chat([
      {"role": "system", "content": system},
      {"role": "user", "content": user},
    ])
  except Exception as e:
    logger.warning(f"NVIDIA Small summarization failed: {e}; using fallback")
    return naive_fallback(text, max_sentences)

async def nvidia_large_summarize(text: str, max_sentences: int = 3) -> str:
  """Summarization using NVIDIA Large (GPT-OSS) for long context."""
  system = (
    "You are a precise summarizer. Produce a clear, faithful summary of the user's text. "
    f"Return ~{max_sentences} sentences, no comments, no preface, no markdown."
  )
  user = f"Summarize this text:\n\n{text}"
  try:
    return await nvidia_large_chat_completion(system, user, ROTATOR)
  except Exception as e:
    logger.warning(f"NVIDIA Large summarization failed: {e}; using fallback")
    return naive_fallback(text, max_sentences)


def naive_fallback(text: str, max_sentences: int = 3) -> str:
  parts = [p.strip() for p in text.split('. ') if p.strip()]
  return '. '.join(parts[:max_sentences])


async def summarize_text(text: str, max_sentences: int = 6, chunk_size: int = 2500) -> str:
  """Hierarchical summarization for long texts using flexible model selection."""
  if not text:
    return ""
  if len(text) <= chunk_size:
    return await llama_summarize(text, max_sentences=max_sentences)
  
  # Split into chunks on paragraph boundaries if possible
  paragraphs = text.split('\n\n')
  chunks: List[str] = []
  buf = []
  total = 0
  for p in paragraphs:
    if total + len(p) > chunk_size and buf:
      chunks.append('\n\n'.join(buf))
      buf, total = [], 0
    buf.append(p)
    total += len(p)
  if buf:
    chunks.append('\n\n'.join(buf))

  # Process chunks with flexible model selection
  partials = []
  for ch in chunks:
    partials.append(await llama_summarize(ch, max_sentences=3))
    await asyncio.sleep(0)
  
  # Combine and summarize with flexible model selection
  combined = '\n'.join(partials)
  return await llama_summarize(combined, max_sentences=max_sentences)


async def clean_chunk_text(text: str) -> str:
  """Use Qwen LLM to remove headers/footers and personally identifying/institution boilerplate.
  Keep the core academic content intact. Do not remove page numbers or section titles.
  """
  content = (text or "").strip()
  if not content:
    return content
  system = (
    "You are a content cleaner. Remove boilerplate headers/footers like institution names, course codes, student IDs, "
    "emails, author IDs, document footers/headers repeated across pages. Keep headings and the main body content. "
    "Preserve meaningful section titles. Keep pagination references in the natural text if present. Return only cleaned text."
  )
  user = f"Clean this content by removing headers/footers and IDs, keep core content:\n\n{content}"
  try:
    # Use Qwen for better content cleaning
    return await qwen_chat_completion(system, user, ROTATOR)
  except Exception as e:
    logger.warning(f"Qwen cleaning failed: {e}; returning original text")
    return content

async def qwen_summarize(text: str, max_sentences: int = 3) -> str:
  """Use Qwen for better summarization with thinking mode."""
  text = (text or "").strip()
  if not text:
    return ""
  system = (
    "You are a precise summarizer. Produce a clear, faithful summary of the user's text. "
    f"Return ~{max_sentences} sentences, no comments, no preface, no markdown."
  )
  user = f"Summarize this text:\n\n{text}"
  try:
    return await qwen_chat_completion(system, user, ROTATOR)
  except Exception as e:
    logger.warning(f"Qwen summarization failed: {e}; using fallback")
    return naive_fallback(text, max_sentences)


# Backward-compatible name used by app.py
async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
  """Backward-compatible summarization with flexible model selection."""
  return await llama_summarize(text, max_sentences)