Spaces:
Sleeping
Sleeping
| import re | |
| from typing import List | |
| from utils.logger import get_logger | |
| logger = get_logger("SUM", __name__) | |
| async def clean_chunk_text(text: str) -> str: | |
| """Clean and normalize text for processing.""" | |
| if not text: | |
| return "" | |
| # Remove extra whitespace and normalize | |
| text = " ".join(text.split()) | |
| # Remove common artifacts | |
| text = text.replace("\\n", " ").replace("\\t", " ") | |
| return text.strip() | |
| async def cheap_summarize(text: str, max_sentences: int = 3) -> str: | |
| """Simple text-based summarization without external APIs.""" | |
| if not text or len(text.strip()) < 50: | |
| return text.strip() | |
| try: | |
| # Simple extractive summarization: take first few sentences | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| if len(sentences) <= max_sentences: | |
| return text.strip() | |
| # Take first max_sentences sentences | |
| summary_sentences = sentences[:max_sentences] | |
| summary = '. '.join(summary_sentences) | |
| # Add period if it doesn't end with punctuation | |
| if not summary.endswith(('.', '!', '?')): | |
| summary += '.' | |
| return summary | |
| except Exception as e: | |
| logger.warning(f"[SUM] Summarization failed: {e}") | |
| # Fallback: return first part of text | |
| return text[:200] + "..." if len(text) > 200 else text |