Ptato commited on
Commit
05d5cb6
·
1 Parent(s): 5505986

Model integration

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. .idea/vcs.xml +1 -0
  3. app.py +37 -39
  4. my_model +1 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.idea/vcs.xml CHANGED
@@ -2,5 +2,6 @@
2
  <project version="4">
3
  <component name="VcsDirectoryMappings">
4
  <mapping directory="" vcs="Git" />
 
5
  </component>
6
  </project>
 
2
  <project version="4">
3
  <component name="VcsDirectoryMappings">
4
  <mapping directory="" vcs="Git" />
5
+ <mapping directory="$PROJECT_DIR$/my_model" vcs="Git" />
6
  </component>
7
  </project>
app.py CHANGED
@@ -25,7 +25,7 @@ form = st.form(key='Sentiment Analysis')
25
  st.session_state.options = ['bertweet-base-sentiment-analysis',
26
  'distilbert-base-uncased-finetuned-sst-2-english',
27
  'twitter-roberta-base-sentiment',
28
- # 'Modified Bert Toxicity Classification'
29
  ]
30
  box = form.selectbox('Select Pre-trained Model:', st.session_state.options, key=1)
31
  tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
@@ -42,6 +42,7 @@ if not st.session_state.filled:
42
  print(x)
43
  text = st.session_state.df["comment_text"].iloc[x][:128]
44
  for s in st.session_state.options:
 
45
  if s == 'bertweet-base-sentiment-analysis':
46
  pline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
47
  elif s == 'twitter-roberta-base-sentiment':
@@ -49,7 +50,7 @@ if not st.session_state.filled:
49
  elif s == 'distilbert-base-uncased-finetuned-sst-2-english':
50
  pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
51
  else:
52
- model = AutoModelForSequenceClassification.from_pretrained('./model')
53
  model.eval()
54
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
55
  encoding = tokenizer(tweet, return_tensors="pt")
@@ -108,7 +109,7 @@ if not st.session_state.filled:
108
  if max(predictions) == 0:
109
  log[0] = 0
110
  log[2] = ("NO TOXICITY")
111
- log[3] = (f"{100 - round(probs[0] * 100, 1)}%")
112
  log[4] = ("N/A")
113
  log[5] = ("N/A")
114
  else:
@@ -116,14 +117,14 @@ if not st.session_state.filled:
116
  _max = 0
117
  _max2 = 2
118
  for i in range(1, len(predictions)):
119
- if probs[i] > probs[_max]:
120
  _max = i
121
- if i > 2 and probs[i] > probs[_max2]:
122
  _max2 = i
123
  log[2] = (st.session_state.labels[_max])
124
- log[3] = (f"{round(probs[_max] * 100, 1)}%")
125
  log[4] = (st.session_state.labels[_max2])
126
- log[5] = (f"{round(probs[_max2] * 100, 1)}%")
127
  st.session_state.logs[s].append(log)
128
 
129
  if submit and tweet:
@@ -131,6 +132,7 @@ if submit and tweet:
131
  time.sleep(1)
132
 
133
  if tweet is not None:
 
134
  if box != 'Modified Bert Toxicity Classification':
135
  col1, col2, col3 = st.columns(3)
136
  else:
@@ -141,43 +143,29 @@ if submit and tweet:
141
  pline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
142
  elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
143
  pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
144
-
145
-
146
- # <--- Unecessary Testing --->
147
- model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
148
- tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
149
  encoding = tokenizer(tweet, return_tensors="pt")
150
  encoding = {k: v.to(model.device) for k,v in encoding.items()}
151
  predictions = model(**encoding)
152
-
153
  logits = predictions.logits
154
  sigmoid = torch.nn.Sigmoid()
155
  probs = sigmoid(logits.squeeze().cpu())
156
- print(probs)
157
  predictions = np.zeros(probs.shape)
158
  predictions[np.where(probs >= 0.5)] = 1
159
- # turn predicted id's into actual label names
160
- st.session_state.id2label = {idx: label for idx, label in enumerate(st.session_state.labels)}
161
  predicted_labels = [st.session_state.id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
162
- print(predicted_labels)
163
- print(predictions[0])
164
- else:
165
- model = AutoModelForSequenceClassification.from_pretrained('./model')
166
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
167
- encoding = tokenizer(tweet, return_tensors="pt")
168
- encoding = {k: v.to(model.device) for k,v in encoding.items()}
169
- predictions = model(**encoding)
170
- print(predictions)
171
  if pline:
172
  predictions = pline(tweet)
173
  col2.header("Judgement")
174
  else:
175
- col2.header("")
176
- col4.header("Toxicity Type")
177
- col5.header("Probability")
178
 
179
  col1.header("Tweet")
180
- col3.header("Probability")
181
 
182
  if pline:
183
  log = [0] * 4
@@ -260,39 +248,49 @@ if submit and tweet:
260
  log = [0] * 6
261
  log[1] = tweet
262
  if max(predictions) == 0:
263
- col1.success(tweet.split("\n")[0][:20])
264
  col2.success("NO TOXICITY")
265
- col3.success(f"{100 - round(probs[0] * 100, 1)}%")
266
  col4.success("N/A")
267
  col5.success("N/A")
 
 
 
 
 
268
  else:
269
  _max = 0
270
  _max2 = 2
271
  for i in range(1, len(predictions)):
272
- if probs[i] > probs[_max]:
273
  _max = i
274
- if i > 2 and probs[i] > probs[_max2]:
275
  _max2 = i
276
- col1.error(tweet.split("\n")[0][:20])
277
  col2.error(st.session_state.labels[_max])
278
- col3.error(f"{round(probs[_max] * 100, 1)}%")
279
  col4.error(st.session_state.labels[_max2])
280
- col5.error(f"{round(probs[_max2] * 100, 1)}%")
 
 
 
 
 
281
  for a in st.session_state.logs[box][::-1]:
282
  if a[0] == 0:
283
- col1.success(a[1].split("\n")[0][:20])
284
  col2.success(a[2])
285
  col3.success(a[3])
286
  col4.success(a[4])
287
  col5.success(a[5])
288
  elif a[0] == 1:
289
- col1.error(a[1].split("\n")[0][:20])
290
  col2.error(a[2])
291
  col3.error(a[3])
292
  col4.error(a[4])
293
  col5.error(a[5])
294
  else:
295
- col1.warning(a[1].split("\n")[0][:20])
296
  col2.warning(a[2])
297
  col3.warning(a[3])
298
  col4.warning(a[4])
 
25
  st.session_state.options = ['bertweet-base-sentiment-analysis',
26
  'distilbert-base-uncased-finetuned-sst-2-english',
27
  'twitter-roberta-base-sentiment',
28
+ 'Modified Bert Toxicity Classification'
29
  ]
30
  box = form.selectbox('Select Pre-trained Model:', st.session_state.options, key=1)
31
  tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
 
42
  print(x)
43
  text = st.session_state.df["comment_text"].iloc[x][:128]
44
  for s in st.session_state.options:
45
+ pline = None
46
  if s == 'bertweet-base-sentiment-analysis':
47
  pline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
48
  elif s == 'twitter-roberta-base-sentiment':
 
50
  elif s == 'distilbert-base-uncased-finetuned-sst-2-english':
51
  pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
52
  else:
53
+ model = AutoModelForSequenceClassification.from_pretrained('./my_model')
54
  model.eval()
55
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
56
  encoding = tokenizer(tweet, return_tensors="pt")
 
109
  if max(predictions) == 0:
110
  log[0] = 0
111
  log[2] = ("NO TOXICITY")
112
+ log[3] = (f"{100 - round(probs[0].item() * 100, 1)}%")
113
  log[4] = ("N/A")
114
  log[5] = ("N/A")
115
  else:
 
117
  _max = 0
118
  _max2 = 2
119
  for i in range(1, len(predictions)):
120
+ if probs[i].item() > probs[_max].item():
121
  _max = i
122
+ if i > 2 and probs[i].item() > probs[_max2].item():
123
  _max2 = i
124
  log[2] = (st.session_state.labels[_max])
125
+ log[3] = (f"{round(probs[_max].item() * 100, 1)}%")
126
  log[4] = (st.session_state.labels[_max2])
127
+ log[5] = (f"{round(probs[_max2].item() * 100, 1)}%")
128
  st.session_state.logs[s].append(log)
129
 
130
  if submit and tweet:
 
132
  time.sleep(1)
133
 
134
  if tweet is not None:
135
+ pline = None
136
  if box != 'Modified Bert Toxicity Classification':
137
  col1, col2, col3 = st.columns(3)
138
  else:
 
143
  pline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
144
  elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
145
  pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
146
+ else:
147
+ model = AutoModelForSequenceClassification.from_pretrained('./my_model')
148
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 
 
149
  encoding = tokenizer(tweet, return_tensors="pt")
150
  encoding = {k: v.to(model.device) for k,v in encoding.items()}
151
  predictions = model(**encoding)
 
152
  logits = predictions.logits
153
  sigmoid = torch.nn.Sigmoid()
154
  probs = sigmoid(logits.squeeze().cpu())
155
+ print(probs[0].item())
156
  predictions = np.zeros(probs.shape)
157
  predictions[np.where(probs >= 0.5)] = 1
 
 
158
  predicted_labels = [st.session_state.id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
 
 
 
 
 
 
 
 
 
159
  if pline:
160
  predictions = pline(tweet)
161
  col2.header("Judgement")
162
  else:
163
+ col2.header("Category")
164
+ col4.header("Type")
165
+ col5.header("Score")
166
 
167
  col1.header("Tweet")
168
+ col3.header("Score")
169
 
170
  if pline:
171
  log = [0] * 4
 
248
  log = [0] * 6
249
  log[1] = tweet
250
  if max(predictions) == 0:
251
+ col1.success(tweet.split("\n")[0][:10])
252
  col2.success("NO TOXICITY")
253
+ col3.success(f"{100 - round(probs[0].item() * 100, 1)}%")
254
  col4.success("N/A")
255
  col5.success("N/A")
256
+ log[0] = 0
257
+ log[2] = "NO TOXICITY"
258
+ log[3] = (f"{100 - round(probs[0].item() * 100, 1)}%")
259
+ log[4] = ("N/A")
260
+ log[5] = ("N/A")
261
  else:
262
  _max = 0
263
  _max2 = 2
264
  for i in range(1, len(predictions)):
265
+ if probs[i].item() > probs[_max].item():
266
  _max = i
267
+ if i > 2 and probs[i].item() > probs[_max2].item():
268
  _max2 = i
269
+ col1.error(tweet.split("\n")[0][:10])
270
  col2.error(st.session_state.labels[_max])
271
+ col3.error(f"{round(probs[_max].item() * 100, 1)}%")
272
  col4.error(st.session_state.labels[_max2])
273
+ col5.error(f"{round(probs[_max2].item() * 100, 1)}%")
274
+ log[0] = 1
275
+ log[2] = (st.session_state.labels[_max])
276
+ log[3] = (f"{round(probs[_max].item() * 100, 1)}%")
277
+ log[4] = (st.session_state.labels[_max2])
278
+ log[5] = (f"{round(probs[_max2].item() * 100, 1)}%")
279
  for a in st.session_state.logs[box][::-1]:
280
  if a[0] == 0:
281
+ col1.success(a[1].split("\n")[0][:10])
282
  col2.success(a[2])
283
  col3.success(a[3])
284
  col4.success(a[4])
285
  col5.success(a[5])
286
  elif a[0] == 1:
287
+ col1.error(a[1].split("\n")[0][:10])
288
  col2.error(a[2])
289
  col3.error(a[3])
290
  col4.error(a[4])
291
  col5.error(a[5])
292
  else:
293
+ col1.warning(a[1].split("\n")[0][:10])
294
  col2.warning(a[2])
295
  col3.warning(a[3])
296
  col4.warning(a[4])
my_model ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 9dba0111084fc986897d95a419f8f63d76973d00