Commit
·
b9a198e
1
Parent(s):
e483bf1
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,16 +9,18 @@ tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
|
|
| 9 |
|
| 10 |
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
|
| 11 |
|
| 12 |
-
|
| 13 |
|
|
|
|
| 14 |
# classifier_model_name = 'bhadresh-savani/distilbert-base-uncased-emotion'
|
| 15 |
# classifier_emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
|
| 16 |
|
| 17 |
-
def
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len):
|
|
@@ -36,7 +38,7 @@ def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total
|
|
| 36 |
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
|
| 37 |
"""
|
| 38 |
proba_list = []
|
| 39 |
-
|
| 40 |
start = 0
|
| 41 |
window_length = 510
|
| 42 |
|
|
@@ -64,6 +66,9 @@ def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total
|
|
| 64 |
}
|
| 65 |
|
| 66 |
outputs = model(**input_dict)
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
probabilities = torch.nn.functional.softmax(outputs[0], dim = -1)
|
| 69 |
proba_list.append(probabilities)
|
|
@@ -115,6 +120,7 @@ def my_inference_function(sec_text):
|
|
| 115 |
"""
|
| 116 |
tokens = tokenizer.encode_plus(sec_text, add_special_tokens=False)
|
| 117 |
|
|
|
|
| 118 |
input_ids = tokens['input_ids']
|
| 119 |
total_len = len(input_ids)
|
| 120 |
attention_mask = tokens['attention_mask']
|
|
|
|
| 9 |
|
| 10 |
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
|
| 11 |
|
| 12 |
+
summarizer = pipeline('summarization', model='t5-base')
|
| 13 |
|
| 14 |
+
classifier_emotions = ['positive', 'neutral', 'negative']
|
| 15 |
# classifier_model_name = 'bhadresh-savani/distilbert-base-uncased-emotion'
|
| 16 |
# classifier_emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
|
| 17 |
|
| 18 |
+
def summarize_sentences(sentences_by_emotion, min_length, max_length):
|
| 19 |
+
for k in sentences_by_emotion.keys():
|
| 20 |
+
if (len(sentences_by_emotion[k])!=0):
|
| 21 |
+
text = ' '.join(sentences_by_emotion[k])
|
| 22 |
+
summary = summarizer(text, min_length=min_length, max_length=max_length)
|
| 23 |
+
print(f"{k.upper()}: {summary[0]['summary_text']}\n")
|
| 24 |
|
| 25 |
|
| 26 |
def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len):
|
|
|
|
| 38 |
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
|
| 39 |
"""
|
| 40 |
proba_list = []
|
| 41 |
+
|
| 42 |
start = 0
|
| 43 |
window_length = 510
|
| 44 |
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
outputs = model(**input_dict)
|
| 69 |
+
|
| 70 |
+
decoded = tokenizer.decode(input_ids_chunk)
|
| 71 |
+
print("########:", decoded , ":##############")
|
| 72 |
|
| 73 |
probabilities = torch.nn.functional.softmax(outputs[0], dim = -1)
|
| 74 |
proba_list.append(probabilities)
|
|
|
|
| 120 |
"""
|
| 121 |
tokens = tokenizer.encode_plus(sec_text, add_special_tokens=False)
|
| 122 |
|
| 123 |
+
|
| 124 |
input_ids = tokens['input_ids']
|
| 125 |
total_len = len(input_ids)
|
| 126 |
attention_mask = tokens['attention_mask']
|