Spaces:

amirhoseinsedaghati
/

multi-purpose-text-application

Build error

App Files Files Community

amirhoseinsedaghati commited on Feb 22, 2024

Commit

f156672

verified ·

1 Parent(s): c4c785c

Update pages/Analyze_Text.py

Browse files

Files changed (1) hide show

pages/Analyze_Text.py +27 -22

pages/Analyze_Text.py CHANGED Viewed

@@ -6,29 +6,19 @@ import plotly.express as px
 from wordcloud.wordcloud import WordCloud
 from configs.db_configs import add_one_item
 from configs.html_features import set_image, HTML_WRAPPER
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from torch.nn.functional import softmax
 from spacy import displacy
 import spacy
 nlp = spacy.load('en_core_web_sm')
 from collections import Counter
 import neattext as nt
 import neattext.functions as nfx
 from textblob import TextBlob
 import nltk
-# These corpora are commonly used by TextBlob for various natural language processing tasks.
-nltk.download('brown')
-nltk.download('punkt')
-nltk.download('wordnet')
-nltk.download('averaged_perceptron_tagger')
-nltk.download('conll2000')
-nltk.download('movie_reviews')
 def get_tokens_analysis(text):
     doc_obj = nlp(text)
@@ -39,7 +29,6 @@ def get_tokens_analysis(text):
 def get_entities_tokens(text):
     doc_obj = nlp(text)
     html = displacy.render(doc_obj, style='ent')
     html = html.replace('\n\n', '\n')
     entities_tokens_html = HTML_WRAPPER.format(html)
@@ -69,15 +58,29 @@ def plot_top_keywords_frequencies(text, n_top_keywords):
 def get_sentence_stats(text):
-    blob = TextBlob(text)
-    sentences = [str(sentence) for sentence in blob.sentences]
-    noun_phrases = list(blob.noun_phrases)
-    sentence_stats = {
-        'Number of Sentences' : len(sentences),
-        'Number of Noun Phrases' : len(noun_phrases)
-    }
-    sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
-    return sentences, noun_phrases, sentence_stats_df
 def plot_tokens_pos(tokens_stats_df):
@@ -109,6 +112,7 @@ def plot_word_frequency(text):
     plt.axis('off')
     return fig
 def main():
     st.title('Text Analyzer')
     im1, im2, im3 = st.columns([1, 5.3, 1])
@@ -122,6 +126,7 @@ def main():
     text = st.text_area('Text Analyzer', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
     n_top_keywords = st.sidebar.slider('n Top keywords', 5, 15, 5, 1)
     if st.button('Analyze it'):
         if text != '':
             with st.expander('Original Text'):
@@ -157,7 +162,7 @@ def main():
                     st.write('Noun Phrases:\n', noun_phrases)
             with col22:
-                with st.expander('The Frequency of Tokens Part of speech'):
                     figure = plot_tokens_pos(tokens_stats_df)
                     st.plotly_chart(figure)

 from wordcloud.wordcloud import WordCloud
 from configs.db_configs import add_one_item
 from configs.html_features import set_image, HTML_WRAPPER
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 from torch.nn.functional import softmax
 from spacy import displacy
 import spacy
 nlp = spacy.load('en_core_web_sm')
 from collections import Counter
 import neattext as nt
 import neattext.functions as nfx
 from textblob import TextBlob
 import nltk
 def get_tokens_analysis(text):
     doc_obj = nlp(text)
 def get_entities_tokens(text):
     doc_obj = nlp(text)
     html = displacy.render(doc_obj, style='ent')
     html = html.replace('\n\n', '\n')
     entities_tokens_html = HTML_WRAPPER.format(html)
 def get_sentence_stats(text):
+    try:
+        blob = TextBlob(text)
+        sentences = [str(sentence) for sentence in blob.sentences]
+        noun_phrases = list(blob.noun_phrases)
+    except:
+        # These corpora are commonly used by TextBlob for various natural language processing tasks.
+        nltk.download('brown')
+        nltk.download('punkt')
+        nltk.download('wordnet')
+        nltk.download('averaged_perceptron_tagger')
+        nltk.download('conll2000')
+        nltk.download('movie_reviews')
+        blob = TextBlob(text)
+        sentences = [str(sentence) for sentence in blob.sentences]
+        noun_phrases = list(blob.noun_phrases)
+    finally:
+        sentence_stats = {
+            'Number of Sentences' : len(sentences),
+            'Number of Noun Phrases' : len(noun_phrases)
+        }
+        sentence_stats_df = pd.DataFrame(sentence_stats, index=[0])
+        return sentences, noun_phrases, sentence_stats_df
 def plot_tokens_pos(tokens_stats_df):
     plt.axis('off')
     return fig
 def main():
     st.title('Text Analyzer')
     im1, im2, im3 = st.columns([1, 5.3, 1])
     text = st.text_area('Text Analyzer', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
     n_top_keywords = st.sidebar.slider('n Top keywords', 5, 15, 5, 1)
     if st.button('Analyze it'):
         if text != '':
             with st.expander('Original Text'):
                     st.write('Noun Phrases:\n', noun_phrases)
             with col22:
+                with st.expander('The Distribution of different Parts of Speech'):
                     figure = plot_tokens_pos(tokens_stats_df)
                     st.plotly_chart(figure)