Commit
·
53e96e8
1
Parent(s):
c5b3453
update app
Browse files
app.py
CHANGED
|
@@ -19,20 +19,25 @@ ner_pipeline = pipeline(
|
|
| 19 |
# Helper function to flatten entities and prepare them for HighlightedText
|
| 20 |
def prepare_entities_for_highlight(text, results):
|
| 21 |
entities = []
|
|
|
|
|
|
|
| 22 |
for category, entity_list in results.items():
|
| 23 |
for entity in entity_list:
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
return {"text": text, "entities": entities}
|
| 38 |
|
|
|
|
| 19 |
# Helper function to flatten entities and prepare them for HighlightedText
|
| 20 |
def prepare_entities_for_highlight(text, results):
|
| 21 |
entities = []
|
| 22 |
+
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
| 23 |
+
|
| 24 |
for category, entity_list in results.items():
|
| 25 |
for entity in entity_list:
|
| 26 |
+
entity_span = (entity["start"], entity["end"])
|
| 27 |
+
|
| 28 |
+
# Only add non-overlapping entities
|
| 29 |
+
if entity_span not in seen_spans:
|
| 30 |
+
seen_spans.add(entity_span)
|
| 31 |
+
entities.append(
|
| 32 |
+
{
|
| 33 |
+
"start": entity["start"],
|
| 34 |
+
"end": entity["end"],
|
| 35 |
+
"label": f"{entity['entity']}",
|
| 36 |
+
}
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Sort entities by their start position
|
| 40 |
+
entities = sorted(entities, key=lambda x: x["start"])
|
| 41 |
|
| 42 |
return {"text": text, "entities": entities}
|
| 43 |
|