Commit
·
dd1b5ba
1
Parent(s):
53e96e8
update app
Browse files
app.py
CHANGED
|
@@ -16,11 +16,14 @@ ner_pipeline = pipeline(
|
|
| 16 |
)
|
| 17 |
|
| 18 |
|
| 19 |
-
# Helper function to
|
| 20 |
def prepare_entities_for_highlight(text, results):
|
| 21 |
entities = []
|
| 22 |
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
for category, entity_list in results.items():
|
| 25 |
for entity in entity_list:
|
| 26 |
entity_span = (entity["start"], entity["end"])
|
|
@@ -28,6 +31,12 @@ def prepare_entities_for_highlight(text, results):
|
|
| 28 |
# Only add non-overlapping entities
|
| 29 |
if entity_span not in seen_spans:
|
| 30 |
seen_spans.add(entity_span)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
entities.append(
|
| 32 |
{
|
| 33 |
"start": entity["start"],
|
|
@@ -46,6 +55,9 @@ def prepare_entities_for_highlight(text, results):
|
|
| 46 |
def extract_entities(sentence):
|
| 47 |
results = ner_pipeline(sentence)
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
# Format the results for HighlightedText
|
| 50 |
return prepare_entities_for_highlight(sentence, results)
|
| 51 |
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
|
| 19 |
+
# Helper function to align entities correctly and debug tokenization
|
| 20 |
def prepare_entities_for_highlight(text, results):
|
| 21 |
entities = []
|
| 22 |
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
| 23 |
|
| 24 |
+
# Print debug info about tokenization
|
| 25 |
+
print(f"Original text: {text}")
|
| 26 |
+
|
| 27 |
for category, entity_list in results.items():
|
| 28 |
for entity in entity_list:
|
| 29 |
entity_span = (entity["start"], entity["end"])
|
|
|
|
| 31 |
# Only add non-overlapping entities
|
| 32 |
if entity_span not in seen_spans:
|
| 33 |
seen_spans.add(entity_span)
|
| 34 |
+
entity_text = text[
|
| 35 |
+
entity["start"] : entity["end"]
|
| 36 |
+
].strip() # Ensure we're working with the correct portion of the text
|
| 37 |
+
print(
|
| 38 |
+
f"Entity text: {entity_text}, Start: {entity['start']}, End: {entity['end']}, Type: {entity['entity']}"
|
| 39 |
+
)
|
| 40 |
entities.append(
|
| 41 |
{
|
| 42 |
"start": entity["start"],
|
|
|
|
| 55 |
def extract_entities(sentence):
|
| 56 |
results = ner_pipeline(sentence)
|
| 57 |
|
| 58 |
+
# Debugging the result format
|
| 59 |
+
print(f"NER results: {results}")
|
| 60 |
+
|
| 61 |
# Format the results for HighlightedText
|
| 62 |
return prepare_entities_for_highlight(sentence, results)
|
| 63 |
|