Spaces:

SzegedAI
/

AI_Detector

Running

App Files Files Community

mihalykiss commited on Jan 13

Commit

a08a74d

1 Parent(s): b55fefb

examples

Browse files

Files changed (1) hide show

app.py +19 -8

app.py CHANGED Viewed

@@ -2,8 +2,9 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-model_path = "modernbert.bin"
-huggingface_model_url = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
@@ -16,6 +17,11 @@ model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/Modern
 model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
 model_2.to(device).eval()
 label_mapping = {
     0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
     6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
@@ -31,18 +37,24 @@ label_mapping = {
 def classify_text(text):
     if not text.strip():
-        return "----"
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         logits_1 = model_1(**inputs).logits
         logits_2 = model_2(**inputs).logits
         softmax_1 = torch.softmax(logits_1, dim=1)
         softmax_2 = torch.softmax(logits_2, dim=1)
-        averaged_probabilities = (softmax_1 + softmax_2) / 2
         probabilities = averaged_probabilities[0]
     ai_probs = probabilities.clone()
@@ -73,8 +85,8 @@ title = "AI Text Detector"
 description = """
-This tool uses the **ModernBERT** model to identify whether a given text was written by a human or generated by artificial intelligence (AI).
 <br>
 <div style="line-height: 1.8;">
@@ -200,11 +212,10 @@ with iface:
     text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
     result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
     text_input.change(classify_text, inputs=text_input, outputs=result_output)
-    with gr.Tab("AI Text Examples"):
         gr.Examples(AI_texts, inputs=text_input)
-    with gr.Tab("Human Text Examples"):
         gr.Examples(Human_texts, inputs=text_input)
     gr.Markdown(bottom_text, elem_id="bottom_text")
 iface.launch(share=True)

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+model1 = "modernbert.bin"
+model2 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
+model3 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
 model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
 model_2.to(device).eval()
+model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
+model_3.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
+model_3.to(device).eval()
 label_mapping = {
     0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
     6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
 def classify_text(text):
     if not text.strip():
+        result_message = (
+            f"----"
+            f"Results will appear here..."
+        )
+        return results_message
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         logits_1 = model_1(**inputs).logits
         logits_2 = model_2(**inputs).logits
+        logits_3 = model_3(**inputs).logits
         softmax_1 = torch.softmax(logits_1, dim=1)
         softmax_2 = torch.softmax(logits_2, dim=1)
+        softmax_3 = torch.softmax(logits_3, dim=1)
+        averaged_probabilities = (softmax_1 + softmax_2 + softmax_3) / 3
         probabilities = averaged_probabilities[0]
     ai_probs = probabilities.clone()
 description = """
+This tool uses the ModernBERT model to identify whether a given text was written by a human or generated by artificial intelligence (AI). It works with a soft voting ensemble using three models, combining their outputs to improve the accuracy.
 <br>
 <div style="line-height: 1.8;">
     text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
     result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
     text_input.change(classify_text, inputs=text_input, outputs=result_output)
+    with gr.Tab("AI text examples"):
         gr.Examples(AI_texts, inputs=text_input)
+    with gr.Tab("Human text examples"):
         gr.Examples(Human_texts, inputs=text_input)
     gr.Markdown(bottom_text, elem_id="bottom_text")
 iface.launch(share=True)