Spaces:

BinKhoaLe1812
/

EdSummariser

Sleeping

App Files Files Community

EdSummariser / utils /service /summarizer.py

LiamKhoaLe

refactor utils

6d2a17c 2 months ago

raw

history blame

3.87 kB

	import os
	import asyncio
	from typing import List
	from utils.logger import get_logger
	from utils.api.rotator import robust_post_json, APIKeyRotator

	logger = get_logger("SUM", __name__)

	# Create a module-level NVIDIA API key rotator (uses NVIDIA_API_1..N)
	ROTATOR = APIKeyRotator(prefix="NVIDIA_API_", max_slots=5)


	async def llama_chat(messages, temperature: float = 0.2) -> str:
	model = os.getenv("NVIDIA_SMALL", "meta/llama-3.1-8b-instruct")
	# Get key via rotator (supports rotation/retries in robust_post_json)
	key = ROTATOR.get_key()
	if not key:
	raise RuntimeError("NVIDIA API key not set (NVIDIA_API_*)")
	url = "https://integrate.api.nvidia.com/v1/chat/completions"
	headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
	payload = {"model": model, "temperature": temperature, "messages": messages}
	data = await robust_post_json(url, headers, payload, ROTATOR)
	return data["choices"][0]["message"]["content"].strip()


	async def llama_summarize(text: str, max_sentences: int = 3) -> str:
	text = (text or "").strip()
	if not text:
	return ""
	system = (
	"You are a precise summarizer. Produce a clear, faithful summary of the user's text. "
	f"Return ~{max_sentences} sentences, no comments, no preface, no markdown."
	)
	user = f"Summarize this text:\n\n{text}"
	try:
	return await llama_chat([
	{"role": "system", "content": system},
	{"role": "user", "content": user},
	])
	except Exception as e:
	logger.warning(f"LLAMA summarization failed: {e}; using fallback")
	return naive_fallback(text, max_sentences)


	def naive_fallback(text: str, max_sentences: int = 3) -> str:
	parts = [p.strip() for p in text.split('. ') if p.strip()]
	return '. '.join(parts[:max_sentences])


	async def summarize_text(text: str, max_sentences: int = 6, chunk_size: int = 2500) -> str:
	"""Hierarchical summarization for long texts using NVIDIA Llama."""
	if not text:
	return ""
	if len(text) <= chunk_size:
	return await llama_summarize(text, max_sentences=max_sentences)
	# Split into chunks on paragraph boundaries if possible
	paragraphs = text.split('\n\n')
	chunks: List[str] = []
	buf = []
	total = 0
	for p in paragraphs:
	if total + len(p) > chunk_size and buf:
	chunks.append('\n\n'.join(buf))
	buf, total = [], 0
	buf.append(p)
	total += len(p)
	if buf:
	chunks.append('\n\n'.join(buf))

	partials = []
	for ch in chunks:
	partials.append(await llama_summarize(ch, max_sentences=3))
	await asyncio.sleep(0)
	combined = '\n'.join(partials)
	return await llama_summarize(combined, max_sentences=max_sentences)


	async def clean_chunk_text(text: str) -> str:
	"""Use NVIDIA LLM to remove headers/footers and personally identifying/institution boilerplate.
	Keep the core academic content intact. Do not remove page numbers or section titles.
	"""
	content = (text or "").strip()
	if not content:
	return content
	system = (
	"You are a content cleaner. Remove boilerplate headers/footers like institution names, course codes, student IDs, "
	"emails, author IDs, document footers/headers repeated across pages. Keep headings and the main body content. "
	"Preserve meaningful section titles. Keep pagination references in the natural text if present. Return only cleaned text."
	)
	user = f"Clean this content by removing headers/footers and IDs, keep core content:\n\n{content}"
	try:
	return await llama_chat([
	{"role": "system", "content": system},
	{"role": "user", "content": user},
	], temperature=0.0)
	except Exception as e:
	logger.warning(f"LLAMA cleaning failed: {e}; returning original text")
	return content


	# Backward-compatible name used by app.py
	async def cheap_summarize(text: str, max_sentences: int = 3) -> str:
	return await llama_summarize(text, max_sentences)