Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

App Files Files Community

sashtech commited on Sep 25, 2024

Commit

2174db5

verified ·

1 Parent(s): 051de31

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -26

app.py CHANGED Viewed

@@ -4,22 +4,25 @@ from transformers import pipeline
 import spacy
 import subprocess
 import nltk
-from nltk.corpus import wordnet
 from spellchecker import SpellChecker
 import re
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('averaged_perceptron_tagger')
 nltk.download('wordnet')
 top_words = set(stopwords.words("english"))  # More efficient as a set
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         # Handle stopwords, punctuation, and excluded words
-        if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         # Find synonyms
@@ -52,7 +55,7 @@ def plagiarism_removal(text):
         return synonym_choice
     # Tokenize, replace words, and join them back
-    para_split = word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     # Handle spacing around punctuation correctly
@@ -65,12 +68,6 @@ def plagiarism_removal(text):
     return " ".join(corrected_text)
 # Words we don't want to replace
 exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
 exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
@@ -81,10 +78,6 @@ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt
 # Initialize the spell checker
 spell = SpellChecker()
-# Ensure necessary NLTK data is downloaded
-nltk.download('wordnet')
-nltk.download('omw-1.4')
 # Ensure the SpaCy model is installed
 try:
     nlp = spacy.load("en_core_web_sm")
@@ -211,14 +204,10 @@ def correct_spelling(text):
             corrected_words.append(word)
     return ' '.join(corrected_words)
 # Main function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
-     # Add synonym replacement here
     cleaned_text = remove_redundant_words(text)
-    plag_removed=plagiarism_removal(cleaned_text)
     paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
     paraphrased_text = force_first_letter_capital(paraphrased_text)
     paraphrased_text = correct_article_errors(paraphrased_text)
@@ -240,11 +229,11 @@ with gr.Blocks() as demo:
         button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
-    with gr.Tab("Paraphrasing & Grammar Correction"):
-        t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
-        button2 = gr.Button("🔄 Paraphrase and Correct")
-        result2 = gr.Textbox(lines=5, label='Corrected Text')
-        button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
-demo.launch(share=True)

 import spacy
 import subprocess
 import nltk
+from nltk.corpus import wordnet, stopwords  # Import stopwords here
 from spellchecker import SpellChecker
 import re
+import random
+import string
+# Ensure necessary NLTK data is downloaded
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('averaged_perceptron_tagger')
 nltk.download('wordnet')
+nltk.download('omw-1.4')
 top_words = set(stopwords.words("english"))  # More efficient as a set
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         # Handle stopwords, punctuation, and excluded words
+        if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         # Find synonyms
         return synonym_choice
     # Tokenize, replace words, and join them back
+    para_split = nltk.word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     # Handle spacing around punctuation correctly
     return " ".join(corrected_text)
 # Words we don't want to replace
 exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
 exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
 # Initialize the spell checker
 spell = SpellChecker()
 # Ensure the SpaCy model is installed
 try:
     nlp = spacy.load("en_core_web_sm")
             corrected_words.append(word)
     return ' '.join(corrected_words)
 # Main function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
     cleaned_text = remove_redundant_words(text)
+    plag_removed = plagiarism_removal(cleaned_text)
     paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
     paraphrased_text = force_first_letter_capital(paraphrased_text)
     paraphrased_text = correct_article_errors(paraphrased_text)
         button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
+    with gr.Tab("Paraphrasing and Grammar Correction"):
+        t2 = gr.Textbox(lines=5, label='Input Text')
+        button2 = gr.Button("🚀 Process!")
+        output2 = gr.Textbox(lines=5, label='Processed Text')
+        button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=output2)
+demo.launch()