Spaces:
Runtime error
Runtime error
Commit
Β·
e2a5271
1
Parent(s):
ba55d6f
Add application file
Browse files- main.py +220 -0
- requirements.txt +11 -0
main.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import functools
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from streamlit_option_menu import option_menu
|
| 6 |
+
import streamlit.components.v1 as html
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import altair as alt
|
| 11 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 12 |
+
from transformers import RobertaConfig
|
| 13 |
+
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
|
| 14 |
+
import torch
|
| 15 |
+
# from torch import cuda
|
| 16 |
+
import gradio as gr
|
| 17 |
+
import os
|
| 18 |
+
import re
|
| 19 |
+
import torch, gc
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 24 |
+
# device
|
| 25 |
+
|
| 26 |
+
tokenizer = AutoTokenizer.from_pretrained("devloverumar/chatgpt-content-detector")
|
| 27 |
+
model = AutoModelForSequenceClassification.from_pretrained("devloverumar/chatgpt-content-detector", num_labels=2)
|
| 28 |
+
# from PIL import Image
|
| 29 |
+
# gc.collect()
|
| 30 |
+
# torch.cuda.empty_cache()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def text_to_sentences(text):
|
| 35 |
+
clean_text = text.replace('\n', ' ')
|
| 36 |
+
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
|
| 37 |
+
|
| 38 |
+
# function to concatenate sentences into chunks of size 900 or less
|
| 39 |
+
def chunks_of_900(text, chunk_size = 900):
|
| 40 |
+
sentences = text_to_sentences(text)
|
| 41 |
+
chunks = []
|
| 42 |
+
current_chunk = ""
|
| 43 |
+
for sentence in sentences:
|
| 44 |
+
if len(current_chunk + sentence) <= chunk_size:
|
| 45 |
+
if len(current_chunk)!=0:
|
| 46 |
+
current_chunk += " "+sentence
|
| 47 |
+
else:
|
| 48 |
+
current_chunk += sentence
|
| 49 |
+
else:
|
| 50 |
+
chunks.append(current_chunk)
|
| 51 |
+
current_chunk = sentence
|
| 52 |
+
chunks.append(current_chunk)
|
| 53 |
+
return chunks
|
| 54 |
+
|
| 55 |
+
def predict(query):
|
| 56 |
+
tokens = tokenizer.encode(query)
|
| 57 |
+
all_tokens = len(tokens)
|
| 58 |
+
tokens = tokens[:tokenizer.model_max_length - 2]
|
| 59 |
+
used_tokens = len(tokens)
|
| 60 |
+
tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
|
| 61 |
+
mask = torch.ones_like(tokens)
|
| 62 |
+
|
| 63 |
+
with torch.no_grad():
|
| 64 |
+
logits = model(tokens.to(device), attention_mask=mask.to(device))[0]
|
| 65 |
+
probs = logits.softmax(dim=-1)
|
| 66 |
+
|
| 67 |
+
real, fake = probs.detach().cpu().flatten().numpy().tolist() # Hello-SimpleAI/chatgpt-detector-roberta
|
| 68 |
+
# fake, real = probs.detach().cpu().flatten().numpy().tolist() # PirateXX/AI-Content-Detector-V2
|
| 69 |
+
|
| 70 |
+
return real
|
| 71 |
+
|
| 72 |
+
def findRealProb(text):
|
| 73 |
+
chunksOfText = (chunks_of_900(text))
|
| 74 |
+
results = []
|
| 75 |
+
for chunk in chunksOfText:
|
| 76 |
+
output = predict(chunk)
|
| 77 |
+
results.append([output, len(chunk)])
|
| 78 |
+
|
| 79 |
+
ans = 0
|
| 80 |
+
cnt = 0
|
| 81 |
+
for prob, length in results:
|
| 82 |
+
cnt += length
|
| 83 |
+
ans = ans + prob*length
|
| 84 |
+
realProb = ans/cnt
|
| 85 |
+
return {"Real": realProb, "Fake": 1-realProb}, results
|
| 86 |
+
|
| 87 |
+
TXT_TO_INSPECT=None
|
| 88 |
+
def inspect_content(text):
|
| 89 |
+
TXT_TO_INSPECT=text
|
| 90 |
+
|
| 91 |
+
st.markdown(""" <style> .appview-container .main .block-container {
|
| 92 |
+
max-width: 100%;
|
| 93 |
+
padding-top: 1rem;
|
| 94 |
+
padding-right: {1}rem;
|
| 95 |
+
padding-left: {1}rem;
|
| 96 |
+
padding-bottom: {1}rem;
|
| 97 |
+
}</style> """, unsafe_allow_html=True)
|
| 98 |
+
#Add a logo (optional) in the sidebar
|
| 99 |
+
# logo = Image.open(r'C:\Users\13525\Desktop\Insights_Bees_logo.png')
|
| 100 |
+
with st.sidebar:
|
| 101 |
+
choose = option_menu("Forensic Examiner", ["Inspect Content","Generate Content","About", "Contact"],
|
| 102 |
+
icons=['camera fill', 'kanban', 'book','person lines fill'],
|
| 103 |
+
menu_icon="app-indicator", default_index=0,
|
| 104 |
+
styles={
|
| 105 |
+
"container": {"padding": "0 5 5 5 !important", "background-color": "#fafafa"},
|
| 106 |
+
"icon": {"color": "orange", "font-size": "25px"},
|
| 107 |
+
"nav-link": {"font-size": "16px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
|
| 108 |
+
"nav-link-selected": {"background-color": "#02ab21"},
|
| 109 |
+
}
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if choose == "Inspect Content":
|
| 114 |
+
#Add the cover image for the cover page. Used a little trick to center the image
|
| 115 |
+
st.markdown(""" <style> .font {
|
| 116 |
+
font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
|
| 117 |
+
</style> """, unsafe_allow_html=True)
|
| 118 |
+
col1, col2 = st.columns( [0.8, 0.2])
|
| 119 |
+
with col1: # To display the header text using css style
|
| 120 |
+
st.markdown('<p class="font">Inspect Content</p>', unsafe_allow_html=True)
|
| 121 |
+
|
| 122 |
+
with col2: # To display brand logo
|
| 123 |
+
st.image('./media/inspection-1.jpg', width=100 )
|
| 124 |
+
|
| 125 |
+
txt = st.text_area('Add Text here',height=300, max_chars=2000, value= '''
|
| 126 |
+
Cristiano Ronaldo is a Portuguese professional soccer player who currently plays
|
| 127 |
+
as a forward for Manchester United and the Portugal national team. He is widely
|
| 128 |
+
considered one of the greatest soccer players of all time, having won numerous
|
| 129 |
+
awards and accolades throughout his career. Ronaldo began his professional career
|
| 130 |
+
with Sporting CP in Portugal before moving to Manchester United in 2003.
|
| 131 |
+
He spent six seasons with the club, winning three Premier League titles
|
| 132 |
+
and one UEFA Champions League title. In 2009, he transferred to Real Madrid
|
| 133 |
+
for a then-world record transfer fee of $131 million. He spent nine seasons with
|
| 134 |
+
the club, winning four UEFA Champions League titles, two La Liga titles,
|
| 135 |
+
and two Copa del Rey titles. In 2018, he transferred to Juventus, where he spent
|
| 136 |
+
three seasons before returning to Manchester United in 2021. He has also had
|
| 137 |
+
a successful international career with the Portugal national team, having won
|
| 138 |
+
the UEFA European Championship in 2016 and the UEFA Nations League in 2019.
|
| 139 |
+
''', on_change=inspect_content)
|
| 140 |
+
|
| 141 |
+
if TXT_TO_INSPECT is not None:
|
| 142 |
+
with st.spinner('Loading the model..'):
|
| 143 |
+
model.to(device)
|
| 144 |
+
|
| 145 |
+
st.success(f'Model Loaded!', icon="β
")
|
| 146 |
+
# st.success(f'Reported EER for the selected model {reported_eer}%')
|
| 147 |
+
with st.spinner("Getting prediction..."):
|
| 148 |
+
# print(audio.shape)
|
| 149 |
+
predictions=findRealProb(txt)
|
| 150 |
+
print('prediction_value',predictions)
|
| 151 |
+
if predictions[0]['Fake'] > 0.5:
|
| 152 |
+
# st.error(f"The Sample is spoof: \n Confidence {(prediction_value) }%", icon="π¨")
|
| 153 |
+
st.error(f"This text is AI generated", icon="π¨")
|
| 154 |
+
else:
|
| 155 |
+
st.success(f"This text is real", icon="β
")
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# if choose == "Generate Content":
|
| 159 |
+
# st.markdown(""" <style> .font {
|
| 160 |
+
# font-size:25px ; font-family: 'Cooper Black'; color: #FF9633;}
|
| 161 |
+
# </style> """, unsafe_allow_html=True)
|
| 162 |
+
# st.markdown('<p class="font">Comparison of Models</p>', unsafe_allow_html=True)
|
| 163 |
+
# data_frame = get_data()
|
| 164 |
+
# tab1, tab2 = st.tabs(["EER", "min-TDCF"])
|
| 165 |
+
# with tab1:
|
| 166 |
+
# data_frame["EER ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
|
| 167 |
+
# data_frame["EER ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
|
| 168 |
+
# data_frame["Cross-dataset 19-21"] = data_frame["Cross-dataset 19-21"].astype('float64')
|
| 169 |
+
|
| 170 |
+
# data = data_frame[["Model Name","EER ASVS 2019","EER ASVS 2021","Cross-dataset 19-21"]].reset_index(drop=True).melt('Model Name')
|
| 171 |
+
# chart=alt.Chart(data).mark_line().encode(
|
| 172 |
+
# x='Model Name',
|
| 173 |
+
# y='value',
|
| 174 |
+
# color='variable'
|
| 175 |
+
# )
|
| 176 |
+
# st.altair_chart(chart, theme=None, use_container_width=True)
|
| 177 |
+
# with tab2:
|
| 178 |
+
# data_frame["min-TDCF ASVS 2019"] = data_frame["EER ASVS 2019"].astype('float64')
|
| 179 |
+
# data_frame["min-TDCF ASVS 2021"] = data_frame["EER ASVS 2021"].astype('float64')
|
| 180 |
+
# data_frame["min-TDCF Cross-dataset"] = data_frame["Cross-dataset 19-21"].astype('float64')
|
| 181 |
+
|
| 182 |
+
# data = data_frame[["Model Name","min-TDCF ASVS 2019","min-TDCF ASVS 2021","min-TDCF Cross-dataset"]].reset_index(drop=True).melt('Model Name')
|
| 183 |
+
# chart=alt.Chart(data).mark_line().encode(
|
| 184 |
+
# x='Model Name',
|
| 185 |
+
# y='value',
|
| 186 |
+
# color='variable'
|
| 187 |
+
# )
|
| 188 |
+
# st.altair_chart(chart, theme=None, use_container_width=True)
|
| 189 |
+
# # Data table
|
| 190 |
+
# st.markdown(""" <style> .appview-container .main .block-container {
|
| 191 |
+
# max-width: 100%;
|
| 192 |
+
# padding-top: {1}rem;
|
| 193 |
+
# padding-right: {1}rem;
|
| 194 |
+
# padding-left: {1}rem;
|
| 195 |
+
# padding-bottom: {1}rem;
|
| 196 |
+
# }</style> """, unsafe_allow_html=True)
|
| 197 |
+
# st.dataframe(data_frame, use_container_width=True)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
if choose == "About":
|
| 202 |
+
st.markdown(""" <style> .font {
|
| 203 |
+
font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
|
| 204 |
+
</style> """, unsafe_allow_html=True)
|
| 205 |
+
st.markdown('<p class="font">About</p>', unsafe_allow_html=True)
|
| 206 |
+
if choose == "Contact":
|
| 207 |
+
st.markdown(""" <style> .font {
|
| 208 |
+
font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
|
| 209 |
+
</style> """, unsafe_allow_html=True)
|
| 210 |
+
st.markdown('<p class="font">Contact Us</p>', unsafe_allow_html=True)
|
| 211 |
+
with st.form(key='columns_in_form2',clear_on_submit=True): #set clear_on_submit=True so that the form will be reset/cleared once it's submitted
|
| 212 |
+
#st.write('Please help us improve!')
|
| 213 |
+
Name=st.text_input(label='Please Enter Your Name') #Collect user feedback
|
| 214 |
+
Email=st.text_input(label='Please Enter Your Email') #Collect user feedback
|
| 215 |
+
Message=st.text_input(label='Please Enter Your Message') #Collect user feedback
|
| 216 |
+
submitted = st.form_submit_button('Submit')
|
| 217 |
+
if submitted:
|
| 218 |
+
st.write('Thanks for your contacting us. We will respond to your questions or inquiries as soon as possible!')
|
| 219 |
+
|
| 220 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
flask
|
| 3 |
+
torch
|
| 4 |
+
gradio
|
| 5 |
+
datasets
|
| 6 |
+
evaluate
|
| 7 |
+
scikit-learn
|
| 8 |
+
scipy
|
| 9 |
+
matplotlib
|
| 10 |
+
accelerate
|
| 11 |
+
nvidia-ml-py3
|