Spaces:

Ptato
/

Sentiment-Analysis

Sleeping

App Files Files Community

Ptato commited on Apr 26, 2023

Commit

05d5cb6

1 Parent(s): 5505986

Model integration

Browse files

Files changed (4) hide show

.DS_Store +0 -0
.idea/vcs.xml +1 -0
app.py +37 -39
my_model +1 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.idea/vcs.xml CHANGED Viewed

@@ -2,5 +2,6 @@
 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="" vcs="Git" />
   </component>
 </project>

 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/my_model" vcs="Git" />
   </component>
 </project>

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ form = st.form(key='Sentiment Analysis')
 st.session_state.options = ['bertweet-base-sentiment-analysis',
            'distilbert-base-uncased-finetuned-sst-2-english',
            'twitter-roberta-base-sentiment',
-           # 'Modified Bert Toxicity Classification'
            ]
 box = form.selectbox('Select Pre-trained Model:', st.session_state.options, key=1)
 tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
@@ -42,6 +42,7 @@ if not st.session_state.filled:
         print(x)
         text = st.session_state.df["comment_text"].iloc[x][:128]
         for s in st.session_state.options:
             if s == 'bertweet-base-sentiment-analysis':
                 pline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
             elif s == 'twitter-roberta-base-sentiment':
@@ -49,7 +50,7 @@ if not st.session_state.filled:
             elif s == 'distilbert-base-uncased-finetuned-sst-2-english':
                 pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
             else:
-                model = AutoModelForSequenceClassification.from_pretrained('./model')
                 model.eval()
                 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
                 encoding = tokenizer(tweet, return_tensors="pt")
@@ -108,7 +109,7 @@ if not st.session_state.filled:
                 if max(predictions) == 0:
                     log[0] = 0
                     log[2] = ("NO TOXICITY")
-                    log[3] = (f"{100 - round(probs[0] * 100, 1)}%")
                     log[4] = ("N/A")
                     log[5] = ("N/A")
                 else:
@@ -116,14 +117,14 @@ if not st.session_state.filled:
                     _max = 0
                     _max2 = 2
                     for i in range(1, len(predictions)):
-                        if probs[i] > probs[_max]:
                             _max = i
-                        if i > 2 and probs[i] > probs[_max2]:
                             _max2 = i
                     log[2] = (st.session_state.labels[_max])
-                    log[3] = (f"{round(probs[_max] * 100, 1)}%")
                     log[4] = (st.session_state.labels[_max2])
-                    log[5] = (f"{round(probs[_max2] * 100, 1)}%")
             st.session_state.logs[s].append(log)
 if submit and tweet:
@@ -131,6 +132,7 @@ if submit and tweet:
         time.sleep(1)
     if tweet is not None:
         if box != 'Modified Bert Toxicity Classification':
             col1, col2, col3 = st.columns(3)
         else:
@@ -141,43 +143,29 @@ if submit and tweet:
             pline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
         elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
             pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
-            # <--- Unecessary Testing --->
-            model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
-            tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
             encoding = tokenizer(tweet, return_tensors="pt")
             encoding = {k: v.to(model.device) for k,v in encoding.items()}
             predictions = model(**encoding)
             logits = predictions.logits
             sigmoid = torch.nn.Sigmoid()
             probs = sigmoid(logits.squeeze().cpu())
-            print(probs)
             predictions = np.zeros(probs.shape)
             predictions[np.where(probs >= 0.5)] = 1
-            # turn predicted id's into actual label names
-            st.session_state.id2label = {idx: label for idx, label in enumerate(st.session_state.labels)}
             predicted_labels = [st.session_state.id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
-            print(predicted_labels)
-            print(predictions[0])
-        else:
-            model = AutoModelForSequenceClassification.from_pretrained('./model')
-            tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-            encoding = tokenizer(tweet, return_tensors="pt")
-            encoding = {k: v.to(model.device) for k,v in encoding.items()}
-            predictions = model(**encoding)
-            print(predictions)
         if pline:
             predictions = pline(tweet)
             col2.header("Judgement")
         else:
-            col2.header("")
-            col4.header("Toxicity Type")
-            col5.header("Probability")
         col1.header("Tweet")
-        col3.header("Probability")
         if pline:
             log = [0] * 4
@@ -260,39 +248,49 @@ if submit and tweet:
             log = [0] * 6
             log[1] = tweet
             if max(predictions) == 0:
-                col1.success(tweet.split("\n")[0][:20])
                 col2.success("NO TOXICITY")
-                col3.success(f"{100 - round(probs[0] * 100, 1)}%")
                 col4.success("N/A")
                 col5.success("N/A")
             else:
                 _max = 0
                 _max2 = 2
                 for i in range(1, len(predictions)):
-                    if probs[i] > probs[_max]:
                         _max = i
-                    if i > 2 and probs[i] > probs[_max2]:
                         _max2 = i
-                col1.error(tweet.split("\n")[0][:20])
                 col2.error(st.session_state.labels[_max])
-                col3.error(f"{round(probs[_max] * 100, 1)}%")
                 col4.error(st.session_state.labels[_max2])
-                col5.error(f"{round(probs[_max2] * 100, 1)}%")
             for a in st.session_state.logs[box][::-1]:
                 if a[0] == 0:
-                    col1.success(a[1].split("\n")[0][:20])
                     col2.success(a[2])
                     col3.success(a[3])
                     col4.success(a[4])
                     col5.success(a[5])
                 elif a[0] == 1:
-                    col1.error(a[1].split("\n")[0][:20])
                     col2.error(a[2])
                     col3.error(a[3])
                     col4.error(a[4])
                     col5.error(a[5])
                 else:
-                    col1.warning(a[1].split("\n")[0][:20])
                     col2.warning(a[2])
                     col3.warning(a[3])
                     col4.warning(a[4])

 st.session_state.options = ['bertweet-base-sentiment-analysis',
            'distilbert-base-uncased-finetuned-sst-2-english',
            'twitter-roberta-base-sentiment',
+           'Modified Bert Toxicity Classification'
            ]
 box = form.selectbox('Select Pre-trained Model:', st.session_state.options, key=1)
 tweet = form.text_input(label='Enter text to analyze:', value="\"We've seen in the last few months, unprecedented amounts of Voter Fraud.\" @SenTedCruz True!")
         print(x)
         text = st.session_state.df["comment_text"].iloc[x][:128]
         for s in st.session_state.options:
+            pline = None
             if s == 'bertweet-base-sentiment-analysis':
                 pline = pipeline(task="sentiment-analysis", model="finiteautomata/bertweet-base-sentiment-analysis")
             elif s == 'twitter-roberta-base-sentiment':
             elif s == 'distilbert-base-uncased-finetuned-sst-2-english':
                 pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
             else:
+                model = AutoModelForSequenceClassification.from_pretrained('./my_model')
                 model.eval()
                 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
                 encoding = tokenizer(tweet, return_tensors="pt")
                 if max(predictions) == 0:
                     log[0] = 0
                     log[2] = ("NO TOXICITY")
+                    log[3] = (f"{100 - round(probs[0].item() * 100, 1)}%")
                     log[4] = ("N/A")
                     log[5] = ("N/A")
                 else:
                     _max = 0
                     _max2 = 2
                     for i in range(1, len(predictions)):
+                        if probs[i].item() > probs[_max].item():
                             _max = i
+                        if i > 2 and probs[i].item() > probs[_max2].item():
                             _max2 = i
                     log[2] = (st.session_state.labels[_max])
+                    log[3] = (f"{round(probs[_max].item() * 100, 1)}%")
                     log[4] = (st.session_state.labels[_max2])
+                    log[5] = (f"{round(probs[_max2].item() * 100, 1)}%")
             st.session_state.logs[s].append(log)
 if submit and tweet:
         time.sleep(1)
     if tweet is not None:
+        pline = None
         if box != 'Modified Bert Toxicity Classification':
             col1, col2, col3 = st.columns(3)
         else:
             pline = pipeline(task="sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
         elif box == 'distilbert-base-uncased-finetuned-sst-2-english':
             pline = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+        else:
+            model = AutoModelForSequenceClassification.from_pretrained('./my_model')
+            tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
             encoding = tokenizer(tweet, return_tensors="pt")
             encoding = {k: v.to(model.device) for k,v in encoding.items()}
             predictions = model(**encoding)
             logits = predictions.logits
             sigmoid = torch.nn.Sigmoid()
             probs = sigmoid(logits.squeeze().cpu())
+            print(probs[0].item())
             predictions = np.zeros(probs.shape)
             predictions[np.where(probs >= 0.5)] = 1
             predicted_labels = [st.session_state.id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
         if pline:
             predictions = pline(tweet)
             col2.header("Judgement")
         else:
+            col2.header("Category")
+            col4.header("Type")
+            col5.header("Score")
         col1.header("Tweet")
+        col3.header("Score")
         if pline:
             log = [0] * 4
             log = [0] * 6
             log[1] = tweet
             if max(predictions) == 0:
+                col1.success(tweet.split("\n")[0][:10])
                 col2.success("NO TOXICITY")
+                col3.success(f"{100 - round(probs[0].item() * 100, 1)}%")
                 col4.success("N/A")
                 col5.success("N/A")
+                log[0] = 0
+                log[2] = "NO TOXICITY"
+                log[3] = (f"{100 - round(probs[0].item() * 100, 1)}%")
+                log[4] = ("N/A")
+                log[5] = ("N/A")
             else:
                 _max = 0
                 _max2 = 2
                 for i in range(1, len(predictions)):
+                    if probs[i].item() > probs[_max].item():
                         _max = i
+                    if i > 2 and probs[i].item() > probs[_max2].item():
                         _max2 = i
+                col1.error(tweet.split("\n")[0][:10])
                 col2.error(st.session_state.labels[_max])
+                col3.error(f"{round(probs[_max].item() * 100, 1)}%")
                 col4.error(st.session_state.labels[_max2])
+                col5.error(f"{round(probs[_max2].item() * 100, 1)}%")
+                log[0] = 1
+                log[2] = (st.session_state.labels[_max])
+                log[3] = (f"{round(probs[_max].item() * 100, 1)}%")
+                log[4] = (st.session_state.labels[_max2])
+                log[5] = (f"{round(probs[_max2].item() * 100, 1)}%")
             for a in st.session_state.logs[box][::-1]:
                 if a[0] == 0:
+                    col1.success(a[1].split("\n")[0][:10])
                     col2.success(a[2])
                     col3.success(a[3])
                     col4.success(a[4])
                     col5.success(a[5])
                 elif a[0] == 1:
+                    col1.error(a[1].split("\n")[0][:10])
                     col2.error(a[2])
                     col3.error(a[3])
                     col4.error(a[4])
                     col5.error(a[5])
                 else:
+                    col1.warning(a[1].split("\n")[0][:10])
                     col2.warning(a[2])
                     col3.warning(a[3])
                     col4.warning(a[4])

my_model ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 9dba0111084fc986897d95a419f8f63d76973d00