huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 11, 2024

Commit

aa379a6

verified ·

1 Parent(s): 81a4fe1

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import spacy
 import subprocess
 import nltk
 from nltk.corpus import wordnet
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -25,6 +26,9 @@ except OSError:
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
     nlp = spacy.load("en_core_web_sm")
 # Function to get synonyms using NLTK WordNet (Humanifier)
 def get_synonyms_nltk(word, pos):
     synsets = wordnet.synsets(word, pos=pos)
@@ -51,20 +55,6 @@ def capitalize_sentences_and_nouns(text):
     return ' '.join(corrected_text)
-# Function to correct tense errors in a sentence (Tense Correction)
-def correct_tense_errors(text):
-    doc = nlp(text)
-    corrected_text = []
-    for token in doc:
-        # Check for tense correction based on modal verbs
-        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
-            # Replace with appropriate verb form
-            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
-            corrected_text.append(lemma)
-        else:
-            corrected_text.append(token.text)
-    return ' '.join(corrected_text)
 # Function to correct singular/plural errors (Singular/Plural Correction)
 def correct_singular_plural_errors(text):
     doc = nlp(text)
@@ -72,15 +62,12 @@ def correct_singular_plural_errors(text):
     for token in doc:
         if token.pos_ == "NOUN":
-            # Check if the noun is singular or plural
             if token.tag_ == "NN":  # Singular noun
-                # Look for determiners like "many", "several", "few" to correct to plural
                 if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
                     corrected_text.append(token.lemma_ + 's')
                 else:
                     corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
-                # Look for determiners like "a", "one" to correct to singular
                 if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
                     corrected_text.append(token.lemma_)
                 else:
@@ -90,6 +77,18 @@ def correct_singular_plural_errors(text):
     return ' '.join(corrected_text)
 # Function to check and correct article errors
 def correct_article_errors(text):
     doc = nlp(text)
@@ -123,12 +122,11 @@ def replace_with_synonym(token):
     if synonyms:
         synonym = synonyms[0]
-        # Ensure the correct grammatical form is maintained
-        if token.tag_ == "VBG":  # Present participle (e.g., running)
             synonym = synonym + 'ing'
-        elif token.tag_ == "VBD" or token.tag_ == "VBN":  # Past tense or past participle
             synonym = synonym + 'ed'
-        elif token.tag_ == "VBZ":  # Third-person singular present
             synonym = synonym + 's'
         return synonym
     return token.text
@@ -139,7 +137,6 @@ def correct_double_negatives(text):
     corrected_text = []
     for token in doc:
         if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
-            # Replace the double negative with a positive statement
             corrected_text.append("always")
         else:
             corrected_text.append(token.text)
@@ -151,15 +148,20 @@ def ensure_subject_verb_agreement(text):
     corrected_text = []
     for token in doc:
         if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
-            # Check if the verb agrees with the subject in number
-            if token.tag_ == "NN" and token.head.tag_ != "VBZ":  # Singular noun, should use singular verb
                 corrected_text.append(token.head.lemma_ + "s")
-            elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":  # Plural noun, should not use singular verb
                 corrected_text.append(token.head.lemma_)
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to paraphrase and correct grammar
 def paraphrase_and_correct(text):
     # Capitalize first to ensure proper noun capitalization
     paraphrased_text = capitalize_sentences_and_nouns(text)
@@ -180,7 +182,10 @@ def paraphrase_and_correct(text):
         else:
             final_text.append(token.text)
-    return ' '.join(final_text)
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:

 import subprocess
 import nltk
 from nltk.corpus import wordnet
+from spellchecker import SpellChecker  # Import SpellChecker for spelling correction
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
     nlp = spacy.load("en_core_web_sm")
+# Initialize SpellChecker
+spell = SpellChecker()
 # Function to get synonyms using NLTK WordNet (Humanifier)
 def get_synonyms_nltk(word, pos):
     synsets = wordnet.synsets(word, pos=pos)
     return ' '.join(corrected_text)
 # Function to correct singular/plural errors (Singular/Plural Correction)
 def correct_singular_plural_errors(text):
     doc = nlp(text)
     for token in doc:
         if token.pos_ == "NOUN":
             if token.tag_ == "NN":  # Singular noun
                 if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
                     corrected_text.append(token.lemma_ + 's')
                 else:
                     corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
                 if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
                     corrected_text.append(token.lemma_)
                 else:
     return ' '.join(corrected_text)
+# Function to correct tense errors in a sentence (Tense Correction)
+def correct_tense_errors(text):
+    doc = nlp(text)
+    corrected_text = []
+    for token in doc:
+        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
+            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
+            corrected_text.append(lemma)
+        else:
+            corrected_text.append(token.text)
+    return ' '.join(corrected_text)
 # Function to check and correct article errors
 def correct_article_errors(text):
     doc = nlp(text)
     if synonyms:
         synonym = synonyms[0]
+        if token.tag_ == "VBG":
             synonym = synonym + 'ing'
+        elif token.tag_ == "VBD" or token.tag_ == "VBN":
             synonym = synonym + 'ed'
+        elif token.tag_ == "VBZ":
             synonym = synonym + 's'
         return synonym
     return token.text
     corrected_text = []
     for token in doc:
         if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
             corrected_text.append("always")
         else:
             corrected_text.append(token.text)
     corrected_text = []
     for token in doc:
         if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
+            if token.tag_ == "NN" and token.head.tag_ != "VBZ":
                 corrected_text.append(token.head.lemma_ + "s")
+            elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
                 corrected_text.append(token.head.lemma_)
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
+# Function to correct spelling errors
+def correct_spelling(text):
+    words = text.split()
+    corrected_words = [spell.candidates(word) or word for word in words]
+    return ' '.join(corrected_words)
+# Function to paraphrase, correct grammar, and fix spelling errors
 def paraphrase_and_correct(text):
     # Capitalize first to ensure proper noun capitalization
     paraphrased_text = capitalize_sentences_and_nouns(text)
         else:
             final_text.append(token.text)
+    # Correct spelling errors
+    final_text = correct_spelling(' '.join(final_text))
+    return final_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo: