better postprocessing
Browse files
app.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import os
|
| 3 |
|
| 4 |
-
os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
|
| 5 |
-
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
from transformers import pipeline
|
|
@@ -29,9 +29,7 @@ ner_pipeline = pipeline(task='ner', model="crabz/slovakbert-ner")
|
|
| 29 |
nlp = spacy.blank("sk")
|
| 30 |
|
| 31 |
|
| 32 |
-
def
|
| 33 |
-
classifications = ner_pipeline(sentence)
|
| 34 |
-
|
| 35 |
entities = []
|
| 36 |
for i in range(len(classifications)):
|
| 37 |
if classifications[i]['entity'] != 0:
|
|
@@ -41,13 +39,37 @@ def apply_ner(sentence: str):
|
|
| 41 |
j += 1
|
| 42 |
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
|
| 43 |
classifications[j - 1]['end']))
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
|
|
|
|
|
|
|
|
|
| 46 |
ents = []
|
| 47 |
for ee in entities:
|
| 48 |
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
|
| 49 |
doc.ents = ents
|
|
|
|
|
|
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
displacy_html = displacy.render(doc, style="ent", options=options)
|
| 52 |
return displacy_html
|
| 53 |
|
|
@@ -57,9 +79,14 @@ intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak N
|
|
| 57 |
examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších "
|
| 58 |
"štyroch prípadov variantu omikron na Slovensku."],
|
| 59 |
["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých "
|
| 60 |
-
"rovnako\"."]
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
article="")
|
| 65 |
intf.launch()
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
import os
|
| 3 |
|
| 4 |
+
# os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
|
| 5 |
+
# "https://download.pytorch.org/whl/cpu/torch_stable.html")
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
from transformers import pipeline
|
|
|
|
| 29 |
nlp = spacy.blank("sk")
|
| 30 |
|
| 31 |
|
| 32 |
+
def postprocess(classifications):
|
|
|
|
|
|
|
| 33 |
entities = []
|
| 34 |
for i in range(len(classifications)):
|
| 35 |
if classifications[i]['entity'] != 0:
|
|
|
|
| 39 |
j += 1
|
| 40 |
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
|
| 41 |
classifications[j - 1]['end']))
|
| 42 |
+
to_remove = []
|
| 43 |
+
merged_entities = []
|
| 44 |
+
for i in range(len(entities)):
|
| 45 |
+
for j in range(i + 1, len(entities)):
|
| 46 |
+
if entities[i] != entities[j] and entities[i][0] == entities[j][0] and (entities[i][2] == entities[j][1] or
|
| 47 |
+
entities[i][1] == entities[j][2]):
|
| 48 |
+
to_remove.append(entities[i])
|
| 49 |
+
to_remove.append(entities[j])
|
| 50 |
+
|
| 51 |
+
new_start = min(entities[i][1], entities[j][1])
|
| 52 |
+
new_end = max(entities[i][2], entities[j][2])
|
| 53 |
+
merged_entities.append((entities[i][0], new_start, new_end))
|
| 54 |
+
for ent in to_remove:
|
| 55 |
+
entities.remove(ent)
|
| 56 |
+
entities += merged_entities
|
| 57 |
+
return entities
|
| 58 |
|
| 59 |
+
|
| 60 |
+
def set_entities(sentence, entities):
|
| 61 |
+
doc = nlp(sentence)
|
| 62 |
ents = []
|
| 63 |
for ee in entities:
|
| 64 |
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
|
| 65 |
doc.ents = ents
|
| 66 |
+
return doc
|
| 67 |
+
|
| 68 |
|
| 69 |
+
def apply_ner(sentence: str):
|
| 70 |
+
classifications = ner_pipeline(sentence)
|
| 71 |
+
entities = postprocess(classifications)
|
| 72 |
+
doc = set_entities(sentence, entities)
|
| 73 |
displacy_html = displacy.render(doc, style="ent", options=options)
|
| 74 |
return displacy_html
|
| 75 |
|
|
|
|
| 79 |
examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších "
|
| 80 |
"štyroch prípadov variantu omikron na Slovensku."],
|
| 81 |
["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých "
|
| 82 |
+
"rovnako\"."],
|
| 83 |
+
["Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič "
|
| 84 |
+
"upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."],
|
| 85 |
+
["Začiatkom roka sa objavili nezhody medzi Richardom Sulíkom a šéfom hnutia OĽANO "
|
| 86 |
+
"Igorom Matovičom, ktoré v istej miere pretrvávajú aj dodnes."]],
|
| 87 |
+
description="Named-entity recognition (NER) labels named-entities in unstructured text. This "
|
| 88 |
+
"implementation supports three labels: person (OSOBA), organization (ORGANIZÁCIA) and "
|
| 89 |
+
"location (LOKALITA). You can try out one of the examples below or type your own "
|
| 90 |
+
"sentence. Don't forget to use double quotes (\" \") instead of curved quotes („ “)",
|
| 91 |
article="")
|
| 92 |
intf.launch()
|