Spaces:
Sleeping
Sleeping
Model integration
Browse files
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.idea/vcs.xml
CHANGED
|
@@ -2,5 +2,6 @@
|
|
| 2 |
<project version="4">
|
| 3 |
<component name="VcsDirectoryMappings">
|
| 4 |
<mapping directory="" vcs="Git" />
|
|
|
|
| 5 |
</component>
|
| 6 |
</project>
|
|
|
|
| 2 |
<project version="4">
|
| 3 |
<component name="VcsDirectoryMappings">
|
| 4 |
<mapping directory="" vcs="Git" />
|
| 5 |
+
<mapping directory="$PROJECT_DIR$/my_model" vcs="Git" />
|
| 6 |
</component>
|
| 7 |
</project>
|
app.py
CHANGED
|
@@ -25,7 +25,7 @@ form = st.form(key='Sentiment Analysis')
|
|
| 25 |
st.session_state.options = ['bertweet-base-sentiment-analysis',
|
| 26 |
'distilbert-base-uncased-finetuned-sst-2-english',
|
| 27 |
'twitter-roberta-base-sentiment',
|
| 28 |
-
|
| 29 |
]
|
| 30 |
box = form.selectbox('Select Pre-trained Model:', st.session_state.options, key=1)
|
| 31 |
tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
|
|
@@ -42,6 +42,7 @@ if not st.session_state.filled:
|
|
| 42 |
print(x)
|
| 43 |
text = st.session_state.df["comment_text"].iloc[x][:128]
|
| 44 |
for s in st.session_state.options:
|
|
|
|
| 45 |
if s == 'bertweet-base-sentiment-analysis':
|
| 46 |
pline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
|
| 47 |
elif s == 'twitter-roberta-base-sentiment':
|
|
@@ -49,7 +50,7 @@ if not st.session_state.filled:
|
|
| 49 |
elif s == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 50 |
pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 51 |
else:
|
| 52 |
-
model = AutoModelForSequenceClassification.from_pretrained('./
|
| 53 |
model.eval()
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 55 |
encoding = tokenizer(tweet, return_tensors="pt")
|
|
@@ -108,7 +109,7 @@ if not st.session_state.filled:
|
|
| 108 |
if max(predictions) == 0:
|
| 109 |
log[0] = 0
|
| 110 |
log[2] = ("NO TOXICITY")
|
| 111 |
-
log[3] = (f"{100 - round(probs[0] * 100, 1)}%")
|
| 112 |
log[4] = ("N/A")
|
| 113 |
log[5] = ("N/A")
|
| 114 |
else:
|
|
@@ -116,14 +117,14 @@ if not st.session_state.filled:
|
|
| 116 |
_max = 0
|
| 117 |
_max2 = 2
|
| 118 |
for i in range(1, len(predictions)):
|
| 119 |
-
if probs[i] > probs[_max]:
|
| 120 |
_max = i
|
| 121 |
-
if i > 2 and probs[i] > probs[_max2]:
|
| 122 |
_max2 = i
|
| 123 |
log[2] = (st.session_state.labels[_max])
|
| 124 |
-
log[3] = (f"{round(probs[_max] * 100, 1)}%")
|
| 125 |
log[4] = (st.session_state.labels[_max2])
|
| 126 |
-
log[5] = (f"{round(probs[_max2] * 100, 1)}%")
|
| 127 |
st.session_state.logs[s].append(log)
|
| 128 |
|
| 129 |
if submit and tweet:
|
|
@@ -131,6 +132,7 @@ if submit and tweet:
|
|
| 131 |
time.sleep(1)
|
| 132 |
|
| 133 |
if tweet is not None:
|
|
|
|
| 134 |
if box != 'Modified Bert Toxicity Classification':
|
| 135 |
col1, col2, col3 = st.columns(3)
|
| 136 |
else:
|
|
@@ -141,43 +143,29 @@ if submit and tweet:
|
|
| 141 |
pline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
| 142 |
elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 143 |
pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
| 148 |
-
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
| 149 |
encoding = tokenizer(tweet, return_tensors="pt")
|
| 150 |
encoding = {k: v.to(model.device) for k,v in encoding.items()}
|
| 151 |
predictions = model(**encoding)
|
| 152 |
-
|
| 153 |
logits = predictions.logits
|
| 154 |
sigmoid = torch.nn.Sigmoid()
|
| 155 |
probs = sigmoid(logits.squeeze().cpu())
|
| 156 |
-
print(probs)
|
| 157 |
predictions = np.zeros(probs.shape)
|
| 158 |
predictions[np.where(probs >= 0.5)] = 1
|
| 159 |
-
# turn predicted id's into actual label names
|
| 160 |
-
st.session_state.id2label = {idx: label for idx, label in enumerate(st.session_state.labels)}
|
| 161 |
predicted_labels = [st.session_state.id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
|
| 162 |
-
print(predicted_labels)
|
| 163 |
-
print(predictions[0])
|
| 164 |
-
else:
|
| 165 |
-
model = AutoModelForSequenceClassification.from_pretrained('./model')
|
| 166 |
-
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 167 |
-
encoding = tokenizer(tweet, return_tensors="pt")
|
| 168 |
-
encoding = {k: v.to(model.device) for k,v in encoding.items()}
|
| 169 |
-
predictions = model(**encoding)
|
| 170 |
-
print(predictions)
|
| 171 |
if pline:
|
| 172 |
predictions = pline(tweet)
|
| 173 |
col2.header("Judgement")
|
| 174 |
else:
|
| 175 |
-
col2.header("")
|
| 176 |
-
col4.header("
|
| 177 |
-
col5.header("
|
| 178 |
|
| 179 |
col1.header("Tweet")
|
| 180 |
-
col3.header("
|
| 181 |
|
| 182 |
if pline:
|
| 183 |
log = [0] * 4
|
|
@@ -260,39 +248,49 @@ if submit and tweet:
|
|
| 260 |
log = [0] * 6
|
| 261 |
log[1] = tweet
|
| 262 |
if max(predictions) == 0:
|
| 263 |
-
col1.success(tweet.split("\n")[0][:
|
| 264 |
col2.success("NO TOXICITY")
|
| 265 |
-
col3.success(f"{100 - round(probs[0] * 100, 1)}%")
|
| 266 |
col4.success("N/A")
|
| 267 |
col5.success("N/A")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
else:
|
| 269 |
_max = 0
|
| 270 |
_max2 = 2
|
| 271 |
for i in range(1, len(predictions)):
|
| 272 |
-
if probs[i] > probs[_max]:
|
| 273 |
_max = i
|
| 274 |
-
if i > 2 and probs[i] > probs[_max2]:
|
| 275 |
_max2 = i
|
| 276 |
-
col1.error(tweet.split("\n")[0][:
|
| 277 |
col2.error(st.session_state.labels[_max])
|
| 278 |
-
col3.error(f"{round(probs[_max] * 100, 1)}%")
|
| 279 |
col4.error(st.session_state.labels[_max2])
|
| 280 |
-
col5.error(f"{round(probs[_max2] * 100, 1)}%")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
for a in st.session_state.logs[box][::-1]:
|
| 282 |
if a[0] == 0:
|
| 283 |
-
col1.success(a[1].split("\n")[0][:
|
| 284 |
col2.success(a[2])
|
| 285 |
col3.success(a[3])
|
| 286 |
col4.success(a[4])
|
| 287 |
col5.success(a[5])
|
| 288 |
elif a[0] == 1:
|
| 289 |
-
col1.error(a[1].split("\n")[0][:
|
| 290 |
col2.error(a[2])
|
| 291 |
col3.error(a[3])
|
| 292 |
col4.error(a[4])
|
| 293 |
col5.error(a[5])
|
| 294 |
else:
|
| 295 |
-
col1.warning(a[1].split("\n")[0][:
|
| 296 |
col2.warning(a[2])
|
| 297 |
col3.warning(a[3])
|
| 298 |
col4.warning(a[4])
|
|
|
|
| 25 |
st.session_state.options = ['bertweet-base-sentiment-analysis',
|
| 26 |
'distilbert-base-uncased-finetuned-sst-2-english',
|
| 27 |
'twitter-roberta-base-sentiment',
|
| 28 |
+
'Modified Bert Toxicity Classification'
|
| 29 |
]
|
| 30 |
box = form.selectbox('Select Pre-trained Model:', st.session_state.options, key=1)
|
| 31 |
tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
|
|
|
|
| 42 |
print(x)
|
| 43 |
text = st.session_state.df["comment_text"].iloc[x][:128]
|
| 44 |
for s in st.session_state.options:
|
| 45 |
+
pline = None
|
| 46 |
if s == 'bertweet-base-sentiment-analysis':
|
| 47 |
pline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
|
| 48 |
elif s == 'twitter-roberta-base-sentiment':
|
|
|
|
| 50 |
elif s == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 51 |
pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 52 |
else:
|
| 53 |
+
model = AutoModelForSequenceClassification.from_pretrained('./my_model')
|
| 54 |
model.eval()
|
| 55 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
| 56 |
encoding = tokenizer(tweet, return_tensors="pt")
|
|
|
|
| 109 |
if max(predictions) == 0:
|
| 110 |
log[0] = 0
|
| 111 |
log[2] = ("NO TOXICITY")
|
| 112 |
+
log[3] = (f"{100 - round(probs[0].item() * 100, 1)}%")
|
| 113 |
log[4] = ("N/A")
|
| 114 |
log[5] = ("N/A")
|
| 115 |
else:
|
|
|
|
| 117 |
_max = 0
|
| 118 |
_max2 = 2
|
| 119 |
for i in range(1, len(predictions)):
|
| 120 |
+
if probs[i].item() > probs[_max].item():
|
| 121 |
_max = i
|
| 122 |
+
if i > 2 and probs[i].item() > probs[_max2].item():
|
| 123 |
_max2 = i
|
| 124 |
log[2] = (st.session_state.labels[_max])
|
| 125 |
+
log[3] = (f"{round(probs[_max].item() * 100, 1)}%")
|
| 126 |
log[4] = (st.session_state.labels[_max2])
|
| 127 |
+
log[5] = (f"{round(probs[_max2].item() * 100, 1)}%")
|
| 128 |
st.session_state.logs[s].append(log)
|
| 129 |
|
| 130 |
if submit and tweet:
|
|
|
|
| 132 |
time.sleep(1)
|
| 133 |
|
| 134 |
if tweet is not None:
|
| 135 |
+
pline = None
|
| 136 |
if box != 'Modified Bert Toxicity Classification':
|
| 137 |
col1, col2, col3 = st.columns(3)
|
| 138 |
else:
|
|
|
|
| 143 |
pline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
| 144 |
elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
|
| 145 |
pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 146 |
+
else:
|
| 147 |
+
model = AutoModelForSequenceClassification.from_pretrained('./my_model')
|
| 148 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
|
|
|
|
|
|
| 149 |
encoding = tokenizer(tweet, return_tensors="pt")
|
| 150 |
encoding = {k: v.to(model.device) for k,v in encoding.items()}
|
| 151 |
predictions = model(**encoding)
|
|
|
|
| 152 |
logits = predictions.logits
|
| 153 |
sigmoid = torch.nn.Sigmoid()
|
| 154 |
probs = sigmoid(logits.squeeze().cpu())
|
| 155 |
+
print(probs[0].item())
|
| 156 |
predictions = np.zeros(probs.shape)
|
| 157 |
predictions[np.where(probs >= 0.5)] = 1
|
|
|
|
|
|
|
| 158 |
predicted_labels = [st.session_state.id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
if pline:
|
| 160 |
predictions = pline(tweet)
|
| 161 |
col2.header("Judgement")
|
| 162 |
else:
|
| 163 |
+
col2.header("Category")
|
| 164 |
+
col4.header("Type")
|
| 165 |
+
col5.header("Score")
|
| 166 |
|
| 167 |
col1.header("Tweet")
|
| 168 |
+
col3.header("Score")
|
| 169 |
|
| 170 |
if pline:
|
| 171 |
log = [0] * 4
|
|
|
|
| 248 |
log = [0] * 6
|
| 249 |
log[1] = tweet
|
| 250 |
if max(predictions) == 0:
|
| 251 |
+
col1.success(tweet.split("\n")[0][:10])
|
| 252 |
col2.success("NO TOXICITY")
|
| 253 |
+
col3.success(f"{100 - round(probs[0].item() * 100, 1)}%")
|
| 254 |
col4.success("N/A")
|
| 255 |
col5.success("N/A")
|
| 256 |
+
log[0] = 0
|
| 257 |
+
log[2] = "NO TOXICITY"
|
| 258 |
+
log[3] = (f"{100 - round(probs[0].item() * 100, 1)}%")
|
| 259 |
+
log[4] = ("N/A")
|
| 260 |
+
log[5] = ("N/A")
|
| 261 |
else:
|
| 262 |
_max = 0
|
| 263 |
_max2 = 2
|
| 264 |
for i in range(1, len(predictions)):
|
| 265 |
+
if probs[i].item() > probs[_max].item():
|
| 266 |
_max = i
|
| 267 |
+
if i > 2 and probs[i].item() > probs[_max2].item():
|
| 268 |
_max2 = i
|
| 269 |
+
col1.error(tweet.split("\n")[0][:10])
|
| 270 |
col2.error(st.session_state.labels[_max])
|
| 271 |
+
col3.error(f"{round(probs[_max].item() * 100, 1)}%")
|
| 272 |
col4.error(st.session_state.labels[_max2])
|
| 273 |
+
col5.error(f"{round(probs[_max2].item() * 100, 1)}%")
|
| 274 |
+
log[0] = 1
|
| 275 |
+
log[2] = (st.session_state.labels[_max])
|
| 276 |
+
log[3] = (f"{round(probs[_max].item() * 100, 1)}%")
|
| 277 |
+
log[4] = (st.session_state.labels[_max2])
|
| 278 |
+
log[5] = (f"{round(probs[_max2].item() * 100, 1)}%")
|
| 279 |
for a in st.session_state.logs[box][::-1]:
|
| 280 |
if a[0] == 0:
|
| 281 |
+
col1.success(a[1].split("\n")[0][:10])
|
| 282 |
col2.success(a[2])
|
| 283 |
col3.success(a[3])
|
| 284 |
col4.success(a[4])
|
| 285 |
col5.success(a[5])
|
| 286 |
elif a[0] == 1:
|
| 287 |
+
col1.error(a[1].split("\n")[0][:10])
|
| 288 |
col2.error(a[2])
|
| 289 |
col3.error(a[3])
|
| 290 |
col4.error(a[4])
|
| 291 |
col5.error(a[5])
|
| 292 |
else:
|
| 293 |
+
col1.warning(a[1].split("\n")[0][:10])
|
| 294 |
col2.warning(a[2])
|
| 295 |
col3.warning(a[3])
|
| 296 |
col4.warning(a[4])
|
my_model
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit 9dba0111084fc986897d95a419f8f63d76973d00
|