Spaces:
Running
Running
Commit
·
a08a74d
1
Parent(s):
b55fefb
examples
Browse files
app.py
CHANGED
|
@@ -2,8 +2,9 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
|
|
|
| 7 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 8 |
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
|
@@ -16,6 +17,11 @@ model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/Modern
|
|
| 16 |
model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
|
| 17 |
model_2.to(device).eval()
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
label_mapping = {
|
| 20 |
0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
|
| 21 |
6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
|
|
@@ -31,18 +37,24 @@ label_mapping = {
|
|
| 31 |
|
| 32 |
def classify_text(text):
|
| 33 |
if not text.strip():
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 37 |
|
| 38 |
with torch.no_grad():
|
| 39 |
logits_1 = model_1(**inputs).logits
|
| 40 |
logits_2 = model_2(**inputs).logits
|
|
|
|
| 41 |
|
| 42 |
softmax_1 = torch.softmax(logits_1, dim=1)
|
| 43 |
softmax_2 = torch.softmax(logits_2, dim=1)
|
|
|
|
| 44 |
|
| 45 |
-
averaged_probabilities = (softmax_1 + softmax_2) /
|
| 46 |
probabilities = averaged_probabilities[0]
|
| 47 |
|
| 48 |
ai_probs = probabilities.clone()
|
|
@@ -73,8 +85,8 @@ title = "AI Text Detector"
|
|
| 73 |
|
| 74 |
description = """
|
| 75 |
|
| 76 |
-
This tool uses the **ModernBERT** model to identify whether a given text was written by a human or generated by artificial intelligence (AI).
|
| 77 |
|
|
|
|
| 78 |
<br>
|
| 79 |
|
| 80 |
<div style="line-height: 1.8;">
|
|
@@ -200,11 +212,10 @@ with iface:
|
|
| 200 |
text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
|
| 201 |
result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
|
| 202 |
text_input.change(classify_text, inputs=text_input, outputs=result_output)
|
| 203 |
-
with gr.Tab("AI
|
| 204 |
gr.Examples(AI_texts, inputs=text_input)
|
| 205 |
-
with gr.Tab("Human
|
| 206 |
gr.Examples(Human_texts, inputs=text_input)
|
| 207 |
gr.Markdown(bottom_text, elem_id="bottom_text")
|
| 208 |
|
| 209 |
iface.launch(share=True)
|
| 210 |
-
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
|
| 5 |
+
model1 = "modernbert.bin"
|
| 6 |
+
model2 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
|
| 7 |
+
model3 = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
|
| 8 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 9 |
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
|
|
|
| 17 |
model_2.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
|
| 18 |
model_2.to(device).eval()
|
| 19 |
|
| 20 |
+
model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
|
| 21 |
+
model_3.load_state_dict(torch.hub.load_state_dict_from_url(huggingface_model_url, map_location=device))
|
| 22 |
+
model_3.to(device).eval()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
label_mapping = {
|
| 26 |
0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
|
| 27 |
6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
|
|
|
|
| 37 |
|
| 38 |
def classify_text(text):
|
| 39 |
if not text.strip():
|
| 40 |
+
result_message = (
|
| 41 |
+
f"----"
|
| 42 |
+
f"Results will appear here..."
|
| 43 |
+
)
|
| 44 |
+
return results_message
|
| 45 |
|
| 46 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 47 |
|
| 48 |
with torch.no_grad():
|
| 49 |
logits_1 = model_1(**inputs).logits
|
| 50 |
logits_2 = model_2(**inputs).logits
|
| 51 |
+
logits_3 = model_3(**inputs).logits
|
| 52 |
|
| 53 |
softmax_1 = torch.softmax(logits_1, dim=1)
|
| 54 |
softmax_2 = torch.softmax(logits_2, dim=1)
|
| 55 |
+
softmax_3 = torch.softmax(logits_3, dim=1)
|
| 56 |
|
| 57 |
+
averaged_probabilities = (softmax_1 + softmax_2 + softmax_3) / 3
|
| 58 |
probabilities = averaged_probabilities[0]
|
| 59 |
|
| 60 |
ai_probs = probabilities.clone()
|
|
|
|
| 85 |
|
| 86 |
description = """
|
| 87 |
|
|
|
|
| 88 |
|
| 89 |
+
This tool uses the ModernBERT model to identify whether a given text was written by a human or generated by artificial intelligence (AI). It works with a soft voting ensemble using three models, combining their outputs to improve the accuracy.
|
| 90 |
<br>
|
| 91 |
|
| 92 |
<div style="line-height: 1.8;">
|
|
|
|
| 212 |
text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
|
| 213 |
result_output = gr.Markdown("**Results will appear here...**", elem_id="result_output_box")
|
| 214 |
text_input.change(classify_text, inputs=text_input, outputs=result_output)
|
| 215 |
+
with gr.Tab("AI text examples"):
|
| 216 |
gr.Examples(AI_texts, inputs=text_input)
|
| 217 |
+
with gr.Tab("Human text examples"):
|
| 218 |
gr.Examples(Human_texts, inputs=text_input)
|
| 219 |
gr.Markdown(bottom_text, elem_id="bottom_text")
|
| 220 |
|
| 221 |
iface.launch(share=True)
|
|
|