EdSummariser / utils /service /summarizer.py
LiamKhoaLe's picture
refactor utils
6d2a17c
raw
history blame
3.87 kB
import os
import asyncio
from typing import List
from utils.logger import get_logger
from utils.api.rotator import robust_post_json, APIKeyRotator
logger = get_logger("SUM", __name__)
# Create a module-level NVIDIA API key rotator (uses NVIDIA_API_1..N)
ROTATOR = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)
async def llama_chat(messages, temperature: float = 0.2) -> str:
model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
# Get key via rotator (supports rotation/retries in robust_post_json)
key = ROTATOR.get_key()
if not key:
raise RuntimeError("NVIDIA API key not set (NVIDIA_API_*)")
url = "https://integrate.api.nvidia.com/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
payload = {"model": model, "temperature": temperature, "messages": messages}
data = await robust_post_json(url, headers, payload, ROTATOR)
return data["choices"][0]["message"]["content"].strip()
async def llama_summarize(text: str, max_sentences: int = 3) -> str:
text = (text or "").strip()
if not text:
return ""
system = (
"You are a precise summarizer. Produce a clear, faithful summary of the user's text. "
f"Return ~{max_sentences} sentences, no comments, no preface, no markdown."
)
user = f"Summarize this text:\n\n{text}"
try:
return await llama_chat([
{"role": "system", "content": system},
{"role": "user", "content": user},
])
except Exception as e:
logger.warning(f"LLAMA summarization failed: {e}; using fallback")
return naive_fallback(text, max_sentences)
def naive_fallback(text: str, max_sentences: int = 3) -> str:
parts = [p.strip() for p in text.split('. ') if p.strip()]
return '. '.join(parts[:max_sentences])
async def summarize_text(text: str, max_sentences: int = 6, chunk_size: int = 2500) -> str:
"""Hierarchical summarization for long texts using NVIDIA Llama."""
if not text:
return ""
if len(text) <= chunk_size:
return await llama_summarize(text, max_sentences=max_sentences)
# Split into chunks on paragraph boundaries if possible
paragraphs = text.split('\n\n')
chunks: List[str] = []
buf = []
total = 0
for p in paragraphs:
if total + len(p) > chunk_size and buf:
chunks.append('\n\n'.join(buf))
buf, total = [], 0
buf.append(p)
total += len(p)
if buf:
chunks.append('\n\n'.join(buf))
partials = []
for ch in chunks:
partials.append(await llama_summarize(ch, max_sentences=3))
await asyncio.sleep(0)
combined = '\n'.join(partials)
return await llama_summarize(combined, max_sentences=max_sentences)
async def clean_chunk_text(text: str) -> str:
"""Use NVIDIA LLM to remove headers/footers and personally identifying/institution boilerplate.
Keep the core academic content intact. Do not remove page numbers or section titles.
"""
content = (text or "").strip()
if not content:
return content
system = (
"You are a content cleaner. Remove boilerplate headers/footers like institution names, course codes, student IDs, "
"emails, author IDs, document footers/headers repeated across pages. Keep headings and the main body content. "
"Preserve meaningful section titles. Keep pagination references in the natural text if present. Return only cleaned text."
)
user = f"Clean this content by removing headers/footers and IDs, keep core content:\n\n{content}"
try:
return await llama_chat([
{"role": "system", "content": system},
{"role": "user", "content": user},
], temperature=0.0)
except Exception as e:
logger.warning(f"LLAMA cleaning failed: {e}; returning original text")
return content
# Backward-compatible name used by app.py
async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
return await llama_summarize(text, max_sentences)