Update app.py
Browse files
app.py
CHANGED
|
@@ -31,21 +31,18 @@ model_topic.resize_token_embeddings(len(tokenizer_topic))
|
|
| 31 |
|
| 32 |
|
| 33 |
def sentiment(sent: str):
|
| 34 |
-
|
| 35 |
-
sent_ = normalize(text=sent_) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
|
| 36 |
-
except:
|
| 37 |
-
pass
|
| 38 |
input_sent = torch.tensor([tokenizer_sent.encode(sent_)]).to(device)
|
| 39 |
with torch.no_grad():
|
| 40 |
out_sent = model_sent(input_sent)
|
| 41 |
logits_sent = out_sent.logits.softmax(dim=-1).tolist()[0]
|
| 42 |
pred_sent = dict_[np.argmax(logits_sent)]
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
dump = [[i, 'O'] for s in sent_segment for i in s]
|
| 50 |
dump_set = NerDataset(feature_for_phobert([dump], tokenizer=tokenizer_topic, use_crf=True))
|
| 51 |
dump_iter = DataLoader(dump_set, batch_size=1)
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
def sentiment(sent: str):
|
| 34 |
+
sent_ = normalize(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
|
|
|
|
|
|
|
|
|
|
| 35 |
input_sent = torch.tensor([tokenizer_sent.encode(sent_)]).to(device)
|
| 36 |
with torch.no_grad():
|
| 37 |
out_sent = model_sent(input_sent)
|
| 38 |
logits_sent = out_sent.logits.softmax(dim=-1).tolist()[0]
|
| 39 |
pred_sent = dict_[np.argmax(logits_sent)]
|
| 40 |
|
| 41 |
+
sent = replace_all(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
|
| 42 |
+
sent_segment = sent.split(".")
|
| 43 |
+
for i, s in enumerate(sent_segment):
|
| 44 |
+
s = s.strip()
|
| 45 |
+
sent_segment[i] = underthesea.word_tokenize(s, format="text").split()
|
| 46 |
dump = [[i, 'O'] for s in sent_segment for i in s]
|
| 47 |
dump_set = NerDataset(feature_for_phobert([dump], tokenizer=tokenizer_topic, use_crf=True))
|
| 48 |
dump_iter = DataLoader(dump_set, batch_size=1)
|