Spaces:
Runtime error
Runtime error
| from typing import Dict, List, Union | |
| import logging | |
| from textblob import TextBlob | |
| import nltk | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| from nltk.corpus import stopwords | |
| from smolagents import tool | |
| # Set up logging | |
| logger = logging.getLogger(__name__) | |
| # Download required NLTK data | |
| try: | |
| nltk.download('punkt', quiet=True) | |
| nltk.download('stopwords', quiet=True) | |
| nltk.download('averaged_perceptron_tagger', quiet=True) | |
| except Exception as e: | |
| logger.error(f"Failed to download NLTK data: {e}") | |
| def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]: | |
| """Performs comprehensive text analysis including sentiment, readability, and key phrases. | |
| Args: | |
| text: The input text to analyze | |
| Returns: | |
| Dict containing analysis results including: | |
| - sentiment: Dict with polarity and subjectivity scores | |
| - key_phrases: List of important noun phrases | |
| - readability: Basic readability metrics | |
| - summary: Brief statistical summary | |
| """ | |
| try: | |
| # Create TextBlob object | |
| blob = TextBlob(text) | |
| # Sentiment analysis | |
| sentiment = { | |
| "polarity": round(blob.sentiment.polarity, 2), | |
| "subjectivity": round(blob.sentiment.subjectivity, 2), | |
| "sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral" | |
| } | |
| # Extract key phrases (noun phrases) | |
| key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5] | |
| # Basic text statistics | |
| sentences = sent_tokenize(text) | |
| words = word_tokenize(text) | |
| words_no_stop = [word.lower() for word in words | |
| if word.lower() not in stopwords.words('english') | |
| and word.isalnum()] | |
| # Calculate readability (basic metric based on sentence and word length) | |
| avg_sentence_length = len(words) / len(sentences) | |
| avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop) | |
| readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1) | |
| # Prepare response | |
| analysis_result = { | |
| "sentiment": sentiment, | |
| "key_phrases": key_phrases, | |
| "readability": { | |
| "score": readability_score, | |
| "avg_sentence_length": round(avg_sentence_length, 1), | |
| "avg_word_length": round(avg_word_length, 1) | |
| }, | |
| "summary": { | |
| "sentence_count": len(sentences), | |
| "word_count": len(words), | |
| "unique_words": len(set(words_no_stop)) | |
| } | |
| } | |
| return analysis_result | |
| except Exception as e: | |
| logger.error(f"Error in text analysis: {e}") | |
| return { | |
| "error": f"Analysis failed: {str(e)}", | |
| "sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"}, | |
| "key_phrases": [], | |
| "readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0}, | |
| "summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0} | |
| } | |