Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -405,10 +405,81 @@ def create_shared_entity_html(entity, entity_colors):
|
|
| 405 |
f'title="SHARED: {tooltip}">'
|
| 406 |
f'{entity["text"]} π€</span>')
|
| 407 |
|
| 408 |
-
def
|
| 409 |
-
"""Create
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
if not entities:
|
| 411 |
-
return "No entities found
|
| 412 |
|
| 413 |
# Share overlapping entities
|
| 414 |
shared_entities = find_overlapping_entities(entities)
|
|
@@ -426,103 +497,139 @@ def create_entity_table_gradio_tabs(entities, entity_colors):
|
|
| 426 |
entity_groups[key].append(entity)
|
| 427 |
|
| 428 |
if not entity_groups:
|
| 429 |
-
return "No entities found
|
| 430 |
-
|
| 431 |
-
# Create
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
if entity_type == 'SHARED_ENTITIES':
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
# Create table for shared entities
|
| 440 |
-
table_html = f"""
|
| 441 |
-
<div style="margin: 15px 0;">
|
| 442 |
-
<h4 style="color: {colour}; margin-bottom: 15px;">{header}</h4>
|
| 443 |
-
<table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
|
| 444 |
-
<thead>
|
| 445 |
-
<tr style="background-color: {colour}; color: white;">
|
| 446 |
-
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
|
| 447 |
-
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">All Labels</th>
|
| 448 |
-
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Sources</th>
|
| 449 |
-
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Count</th>
|
| 450 |
-
</tr>
|
| 451 |
-
</thead>
|
| 452 |
-
<tbody>
|
| 453 |
-
"""
|
| 454 |
-
|
| 455 |
-
for entity in entities_of_type:
|
| 456 |
-
labels_text = " | ".join(entity['labels'])
|
| 457 |
-
sources_text = " | ".join(entity['sources'])
|
| 458 |
-
|
| 459 |
-
table_html += f"""
|
| 460 |
-
<tr style="background-color: #fff;">
|
| 461 |
-
<td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
|
| 462 |
-
<td style="padding: 10px; border: 1px solid #ddd;">{labels_text}</td>
|
| 463 |
-
<td style="padding: 10px; border: 1px solid #ddd;">{sources_text}</td>
|
| 464 |
-
<td style="padding: 10px; border: 1px solid #ddd; text-align: center;">
|
| 465 |
-
<span style='background-color: #28a745; color: white; padding: 2px 6px; border-radius: 10px; font-size: 11px;'>
|
| 466 |
-
{entity['entity_count']}
|
| 467 |
-
</span>
|
| 468 |
-
</td>
|
| 469 |
-
</tr>
|
| 470 |
-
"""
|
| 471 |
-
|
| 472 |
-
table_html += "</tbody></table></div>"
|
| 473 |
-
tab_contents[f"π€ SHARED ({len(entities_of_type)})"] = table_html
|
| 474 |
-
|
| 475 |
else:
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
confidence = entity.get('confidence', 0.0)
|
| 504 |
-
confidence_colour = "#28a745" if confidence > 0.7 else "#ffc107" if confidence > 0.4 else "#dc3545"
|
| 505 |
-
source = entity.get('source', 'Unknown')
|
| 506 |
-
source_badge = f"<span style='background-color: #007bff; color: white; padding: 2px 6px; border-radius: 10px; font-size: 11px;'>{source}</span>"
|
| 507 |
-
|
| 508 |
-
table_html += f"""
|
| 509 |
-
<tr style="background-color: #fff;">
|
| 510 |
-
<td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
|
| 511 |
-
<td style="padding: 10px; border: 1px solid #ddd;">
|
| 512 |
-
<span style="color: {confidence_colour}; font-weight: bold;">
|
| 513 |
-
{confidence:.3f}
|
| 514 |
-
</span>
|
| 515 |
-
</td>
|
| 516 |
-
<td style="padding: 10px; border: 1px solid #ddd;">{entity['label']}</td>
|
| 517 |
-
<td style="padding: 10px; border: 1px solid #ddd;">{source_badge}</td>
|
| 518 |
-
</tr>
|
| 519 |
-
"""
|
| 520 |
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
|
| 525 |
-
|
|
|
|
|
|
|
| 526 |
|
| 527 |
def create_legend_html(entity_colors, standard_entities, custom_entities):
|
| 528 |
"""Create a legend showing entity colours"""
|
|
@@ -559,7 +666,7 @@ ner_manager = HybridNERManager()
|
|
| 559 |
def process_text(text, standard_entities, custom_entities_str, confidence_threshold, selected_model, progress=gr.Progress()):
|
| 560 |
"""Main processing function for Gradio interface with progress tracking"""
|
| 561 |
if not text.strip():
|
| 562 |
-
return "β Please enter some text to analyse", "",
|
| 563 |
|
| 564 |
progress(0.1, desc="Initialising...")
|
| 565 |
|
|
@@ -572,7 +679,7 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
|
|
| 572 |
selected_standard = [entity for entity in standard_entities if entity]
|
| 573 |
|
| 574 |
if not selected_standard and not custom_entities:
|
| 575 |
-
return "β Please select at least one common entity type OR enter custom entity types", "",
|
| 576 |
|
| 577 |
progress(0.2, desc="Loading models...")
|
| 578 |
|
|
@@ -591,7 +698,7 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
|
|
| 591 |
all_entities.extend(custom_entity_results)
|
| 592 |
|
| 593 |
if not all_entities:
|
| 594 |
-
return "β No entities found. Try lowering the confidence threshold or using different entity types.", "",
|
| 595 |
|
| 596 |
progress(0.8, desc="Processing results...")
|
| 597 |
|
|
@@ -601,12 +708,11 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
|
|
| 601 |
# Create outputs
|
| 602 |
legend_html = create_legend_html(entity_colors, selected_standard, custom_entities)
|
| 603 |
highlighted_html = create_highlighted_html(text, all_entities, entity_colors)
|
| 604 |
-
|
| 605 |
|
| 606 |
progress(0.9, desc="Creating summary...")
|
| 607 |
|
| 608 |
# Create summary with shared entities terminology
|
| 609 |
-
# Note: Shared entities are those found by BOTH common NER models AND custom GLiNER
|
| 610 |
total_entities = len(all_entities)
|
| 611 |
shared_entities = find_overlapping_entities(all_entities)
|
| 612 |
final_count = len(shared_entities)
|
|
@@ -622,7 +728,7 @@ def process_text(text, standard_entities, custom_entities_str, confidence_thresh
|
|
| 622 |
|
| 623 |
progress(1.0, desc="Complete!")
|
| 624 |
|
| 625 |
-
return summary, legend_html + highlighted_html,
|
| 626 |
|
| 627 |
# Create Gradio interface
|
| 628 |
def create_interface():
|
|
@@ -640,7 +746,7 @@ def create_interface():
|
|
| 640 |
3. **βοΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
|
| 641 |
4. **β¨ Add custom entities** (comma-separated) like "relationships, occupations, skills"
|
| 642 |
5. **βοΈ Adjust confidence threshold**
|
| 643 |
-
6. **π Click "Analyse Text"** to see results with
|
| 644 |
""")
|
| 645 |
|
| 646 |
with gr.Row():
|
|
@@ -723,142 +829,15 @@ def create_interface():
|
|
| 723 |
with gr.Row():
|
| 724 |
highlighted_output = gr.HTML(label="Highlighted Text")
|
| 725 |
|
| 726 |
-
#
|
| 727 |
-
results_tabs = gr.State({})
|
| 728 |
-
|
| 729 |
-
def update_tabs(tab_contents):
|
| 730 |
-
"""Update the results tabs based on the analysis"""
|
| 731 |
-
if not tab_contents or not isinstance(tab_contents, dict):
|
| 732 |
-
return {gr.HTML("No results to display"): gr.update(visible=True)}
|
| 733 |
-
|
| 734 |
-
# Create tabs dynamically
|
| 735 |
-
tab_components = {}
|
| 736 |
-
for tab_name, content in tab_contents.items():
|
| 737 |
-
tab_components[tab_name] = gr.HTML(content)
|
| 738 |
-
|
| 739 |
-
return tab_components
|
| 740 |
-
|
| 741 |
-
# Results section with tabs
|
| 742 |
with gr.Row():
|
| 743 |
with gr.Column():
|
| 744 |
gr.Markdown("### π Detailed Results")
|
| 745 |
-
|
| 746 |
-
# We'll update this section dynamically
|
| 747 |
-
results_container = gr.HTML(label="Results")
|
| 748 |
-
|
| 749 |
-
# Function to process and display results
|
| 750 |
-
def process_and_display(text, standard_entities, custom_entities, confidence_threshold, selected_model):
|
| 751 |
-
# Get results from main processing function
|
| 752 |
-
summary, highlighted, tab_contents = process_text(
|
| 753 |
-
text, standard_entities, custom_entities, confidence_threshold, selected_model
|
| 754 |
-
)
|
| 755 |
-
|
| 756 |
-
# Create tabs HTML manually since Gradio dynamic tabs are complex
|
| 757 |
-
if isinstance(tab_contents, dict) and tab_contents:
|
| 758 |
-
# Generate unique IDs to avoid conflicts
|
| 759 |
-
import time
|
| 760 |
-
timestamp = str(int(time.time() * 1000))
|
| 761 |
-
|
| 762 |
-
tabs_html = f"""
|
| 763 |
-
<div style="margin: 20px 0;" id="tab-container-{timestamp}">
|
| 764 |
-
<div style="border-bottom: 2px solid #ddd; margin-bottom: 20px;">
|
| 765 |
-
"""
|
| 766 |
-
|
| 767 |
-
# Create tab buttons
|
| 768 |
-
tab_names = list(tab_contents.keys())
|
| 769 |
-
for i, tab_name in enumerate(tab_names):
|
| 770 |
-
active_style = "background-color: #f8f9fa; border-bottom: 3px solid #4ECDC4;" if i == 0 else "background-color: #fff;"
|
| 771 |
-
default_bg = '#f8f9fa' if i == 0 else '#fff'
|
| 772 |
-
tabs_html += f"""
|
| 773 |
-
<button onclick="showResultTab{timestamp}('{i}')" id="result-tab-{timestamp}-{i}"
|
| 774 |
-
style="padding: 12px 24px; margin-right: 5px; border: 1px solid #ddd;
|
| 775 |
-
border-bottom: none; cursor: pointer; font-weight: bold; {active_style}
|
| 776 |
-
transition: all 0.3s ease;"
|
| 777 |
-
onmouseover="this.style.backgroundColor='#e9ecef'"
|
| 778 |
-
onmouseout="this.style.backgroundColor='{default_bg}'">
|
| 779 |
-
{tab_name}
|
| 780 |
-
</button>
|
| 781 |
-
"""
|
| 782 |
-
|
| 783 |
-
tabs_html += "</div>"
|
| 784 |
-
|
| 785 |
-
# Create tab content
|
| 786 |
-
for i, (tab_name, content) in enumerate(tab_contents.items()):
|
| 787 |
-
display_style = "display: block;" if i == 0 else "display: none;"
|
| 788 |
-
tabs_html += f"""
|
| 789 |
-
<div id="result-content-{timestamp}-{i}" style="{display_style}">
|
| 790 |
-
{content}
|
| 791 |
-
</div>
|
| 792 |
-
"""
|
| 793 |
-
|
| 794 |
-
# Add JavaScript for tab switching with unique function name
|
| 795 |
-
tabs_html += f"""
|
| 796 |
-
<script>
|
| 797 |
-
function showResultTab{timestamp}(tabIndex) {{
|
| 798 |
-
console.log('Tab clicked:', tabIndex);
|
| 799 |
-
|
| 800 |
-
// Hide all content for this specific tab container
|
| 801 |
-
var contents = document.querySelectorAll('[id^="result-content-{timestamp}-"]');
|
| 802 |
-
contents.forEach(function(content) {{
|
| 803 |
-
content.style.display = 'none';
|
| 804 |
-
}});
|
| 805 |
-
|
| 806 |
-
// Reset all tab styles for this specific tab container
|
| 807 |
-
var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
|
| 808 |
-
tabs.forEach(function(tab) {{
|
| 809 |
-
tab.style.backgroundColor = '#fff';
|
| 810 |
-
tab.style.borderBottom = 'none';
|
| 811 |
-
}});
|
| 812 |
-
|
| 813 |
-
// Show selected content
|
| 814 |
-
var targetContent = document.getElementById('result-content-{timestamp}-' + tabIndex);
|
| 815 |
-
if (targetContent) {{
|
| 816 |
-
targetContent.style.display = 'block';
|
| 817 |
-
}}
|
| 818 |
-
|
| 819 |
-
// Highlight selected tab
|
| 820 |
-
var activeTab = document.getElementById('result-tab-{timestamp}-' + tabIndex);
|
| 821 |
-
if (activeTab) {{
|
| 822 |
-
activeTab.style.backgroundColor = '#f8f9fa';
|
| 823 |
-
activeTab.style.borderBottom = '3px solid #4ECDC4';
|
| 824 |
-
}}
|
| 825 |
-
}}
|
| 826 |
-
|
| 827 |
-
// Ensure tabs are clickable after DOM load
|
| 828 |
-
document.addEventListener('DOMContentLoaded', function() {{
|
| 829 |
-
var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
|
| 830 |
-
tabs.forEach(function(tab, index) {{
|
| 831 |
-
tab.addEventListener('click', function(e) {{
|
| 832 |
-
e.preventDefault();
|
| 833 |
-
showResultTab{timestamp}(index.toString());
|
| 834 |
-
}});
|
| 835 |
-
}});
|
| 836 |
-
}});
|
| 837 |
-
|
| 838 |
-
// Also try immediate setup in case DOM is already loaded
|
| 839 |
-
setTimeout(function() {{
|
| 840 |
-
var tabs = document.querySelectorAll('[id^="result-tab-{timestamp}-"]');
|
| 841 |
-
tabs.forEach(function(tab, index) {{
|
| 842 |
-
tab.onclick = function(e) {{
|
| 843 |
-
e.preventDefault();
|
| 844 |
-
showResultTab{timestamp}(index.toString());
|
| 845 |
-
return false;
|
| 846 |
-
}};
|
| 847 |
-
}});
|
| 848 |
-
}}, 100);
|
| 849 |
-
</script>
|
| 850 |
-
</div>
|
| 851 |
-
"""
|
| 852 |
-
|
| 853 |
-
results_display = tabs_html
|
| 854 |
-
else:
|
| 855 |
-
results_display = str(tab_contents) if tab_contents else "No results to display"
|
| 856 |
-
|
| 857 |
-
return summary, highlighted, results_display
|
| 858 |
|
| 859 |
# Connect the button to the processing function
|
| 860 |
analyse_btn.click(
|
| 861 |
-
fn=
|
| 862 |
inputs=[
|
| 863 |
text_input,
|
| 864 |
standard_entities,
|
|
@@ -866,7 +845,7 @@ def create_interface():
|
|
| 866 |
confidence_threshold,
|
| 867 |
model_dropdown
|
| 868 |
],
|
| 869 |
-
outputs=[summary_output, highlighted_output,
|
| 870 |
)
|
| 871 |
|
| 872 |
# Add examples
|
|
|
|
| 405 |
f'title="SHARED: {tooltip}">'
|
| 406 |
f'{entity["text"]} π€</span>')
|
| 407 |
|
| 408 |
+
def create_entity_table_html(entities_of_type, entity_type, colour, is_shared=False):
|
| 409 |
+
"""Create HTML table for a specific entity type"""
|
| 410 |
+
if is_shared:
|
| 411 |
+
table_html = f"""
|
| 412 |
+
<table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
|
| 413 |
+
<thead>
|
| 414 |
+
<tr style="background-color: {colour}; color: white;">
|
| 415 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
|
| 416 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">All Labels</th>
|
| 417 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Sources</th>
|
| 418 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Count</th>
|
| 419 |
+
</tr>
|
| 420 |
+
</thead>
|
| 421 |
+
<tbody>
|
| 422 |
+
"""
|
| 423 |
+
|
| 424 |
+
for entity in entities_of_type:
|
| 425 |
+
labels_text = " | ".join(entity['labels'])
|
| 426 |
+
sources_text = " | ".join(entity['sources'])
|
| 427 |
+
|
| 428 |
+
table_html += f"""
|
| 429 |
+
<tr style="background-color: #fff;">
|
| 430 |
+
<td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
|
| 431 |
+
<td style="padding: 10px; border: 1px solid #ddd;">{labels_text}</td>
|
| 432 |
+
<td style="padding: 10px; border: 1px solid #ddd;">{sources_text}</td>
|
| 433 |
+
<td style="padding: 10px; border: 1px solid #ddd; text-align: center;">
|
| 434 |
+
<span style='background-color: #28a745; color: white; padding: 2px 6px; border-radius: 10px; font-size: 11px;'>
|
| 435 |
+
{entity['entity_count']}
|
| 436 |
+
</span>
|
| 437 |
+
</td>
|
| 438 |
+
</tr>
|
| 439 |
+
"""
|
| 440 |
+
else:
|
| 441 |
+
table_html = f"""
|
| 442 |
+
<table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
|
| 443 |
+
<thead>
|
| 444 |
+
<tr style="background-color: {colour}; color: white;">
|
| 445 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Entity Text</th>
|
| 446 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Confidence</th>
|
| 447 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Type</th>
|
| 448 |
+
<th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Source</th>
|
| 449 |
+
</tr>
|
| 450 |
+
</thead>
|
| 451 |
+
<tbody>
|
| 452 |
+
"""
|
| 453 |
+
|
| 454 |
+
# Sort by confidence score
|
| 455 |
+
entities_of_type.sort(key=lambda x: x.get('confidence', 0), reverse=True)
|
| 456 |
+
|
| 457 |
+
for entity in entities_of_type:
|
| 458 |
+
confidence = entity.get('confidence', 0.0)
|
| 459 |
+
confidence_colour = "#28a745" if confidence > 0.7 else "#ffc107" if confidence > 0.4 else "#dc3545"
|
| 460 |
+
source = entity.get('source', 'Unknown')
|
| 461 |
+
source_badge = f"<span style='background-color: #007bff; color: white; padding: 2px 6px; border-radius: 10px; font-size: 11px;'>{source}</span>"
|
| 462 |
+
|
| 463 |
+
table_html += f"""
|
| 464 |
+
<tr style="background-color: #fff;">
|
| 465 |
+
<td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{entity['text']}</td>
|
| 466 |
+
<td style="padding: 10px; border: 1px solid #ddd;">
|
| 467 |
+
<span style="color: {confidence_colour}; font-weight: bold;">
|
| 468 |
+
{confidence:.3f}
|
| 469 |
+
</span>
|
| 470 |
+
</td>
|
| 471 |
+
<td style="padding: 10px; border: 1px solid #ddd;">{entity['label']}</td>
|
| 472 |
+
<td style="padding: 10px; border: 1px solid #ddd;">{source_badge}</td>
|
| 473 |
+
</tr>
|
| 474 |
+
"""
|
| 475 |
+
|
| 476 |
+
table_html += "</tbody></table>"
|
| 477 |
+
return table_html
|
| 478 |
+
|
| 479 |
+
def create_all_entity_tables(entities, entity_colors):
|
| 480 |
+
"""Create all entity tables in a single container"""
|
| 481 |
if not entities:
|
| 482 |
+
return "<p style='text-align: center; padding: 20px;'>No entities found.</p>"
|
| 483 |
|
| 484 |
# Share overlapping entities
|
| 485 |
shared_entities = find_overlapping_entities(entities)
|
|
|
|
| 497 |
entity_groups[key].append(entity)
|
| 498 |
|
| 499 |
if not entity_groups:
|
| 500 |
+
return "<p style='text-align: center; padding: 20px;'>No entities found.</p>"
|
| 501 |
+
|
| 502 |
+
# Create container with all tables
|
| 503 |
+
all_tables_html = """
|
| 504 |
+
<div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
|
| 505 |
+
<style>
|
| 506 |
+
.entity-section {
|
| 507 |
+
margin-bottom: 30px;
|
| 508 |
+
background-color: white;
|
| 509 |
+
border-radius: 8px;
|
| 510 |
+
padding: 15px;
|
| 511 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 512 |
+
}
|
| 513 |
+
.entity-section:last-child {
|
| 514 |
+
margin-bottom: 0;
|
| 515 |
+
}
|
| 516 |
+
.section-header {
|
| 517 |
+
display: flex;
|
| 518 |
+
align-items: center;
|
| 519 |
+
margin-bottom: 15px;
|
| 520 |
+
padding-bottom: 10px;
|
| 521 |
+
border-bottom: 2px solid #eee;
|
| 522 |
+
}
|
| 523 |
+
.entity-count {
|
| 524 |
+
background-color: #007bff;
|
| 525 |
+
color: white;
|
| 526 |
+
padding: 4px 12px;
|
| 527 |
+
border-radius: 15px;
|
| 528 |
+
font-size: 14px;
|
| 529 |
+
font-weight: bold;
|
| 530 |
+
margin-left: 10px;
|
| 531 |
+
}
|
| 532 |
+
.quick-nav {
|
| 533 |
+
position: sticky;
|
| 534 |
+
top: 0;
|
| 535 |
+
background-color: #f8f9fa;
|
| 536 |
+
padding: 10px;
|
| 537 |
+
margin-bottom: 20px;
|
| 538 |
+
border-radius: 8px;
|
| 539 |
+
display: flex;
|
| 540 |
+
flex-wrap: wrap;
|
| 541 |
+
gap: 8px;
|
| 542 |
+
z-index: 10;
|
| 543 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 544 |
+
}
|
| 545 |
+
.nav-button {
|
| 546 |
+
padding: 6px 12px;
|
| 547 |
+
border: 1px solid #ddd;
|
| 548 |
+
border-radius: 20px;
|
| 549 |
+
background-color: white;
|
| 550 |
+
cursor: pointer;
|
| 551 |
+
transition: all 0.3s;
|
| 552 |
+
text-decoration: none;
|
| 553 |
+
color: #333;
|
| 554 |
+
font-size: 13px;
|
| 555 |
+
font-weight: 500;
|
| 556 |
+
}
|
| 557 |
+
.nav-button:hover {
|
| 558 |
+
background-color: #4ECDC4;
|
| 559 |
+
color: white;
|
| 560 |
+
border-color: #4ECDC4;
|
| 561 |
+
}
|
| 562 |
+
</style>
|
| 563 |
+
"""
|
| 564 |
+
|
| 565 |
+
# Create quick navigation
|
| 566 |
+
all_tables_html += '<div class="quick-nav">'
|
| 567 |
+
all_tables_html += '<strong style="margin-right: 10px;">Quick Navigation:</strong>'
|
| 568 |
|
| 569 |
+
# Sort entity groups to show shared entities first
|
| 570 |
+
sorted_groups = []
|
| 571 |
+
if 'SHARED_ENTITIES' in entity_groups:
|
| 572 |
+
sorted_groups.append(('SHARED_ENTITIES', entity_groups['SHARED_ENTITIES']))
|
| 573 |
+
|
| 574 |
+
for entity_type, entities_list in sorted(entity_groups.items()):
|
| 575 |
+
if entity_type != 'SHARED_ENTITIES':
|
| 576 |
+
sorted_groups.append((entity_type, entities_list))
|
| 577 |
+
|
| 578 |
+
for entity_type, entities_list in sorted_groups:
|
| 579 |
if entity_type == 'SHARED_ENTITIES':
|
| 580 |
+
icon = 'π€'
|
| 581 |
+
label = 'Shared'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
else:
|
| 583 |
+
icon = 'π―' if entity_type in STANDARD_ENTITIES else 'β¨'
|
| 584 |
+
label = entity_type
|
| 585 |
+
|
| 586 |
+
all_tables_html += f'<a href="#{entity_type.replace(" ", "_")}" class="nav-button">{icon} {label} ({len(entities_list)})</a>'
|
| 587 |
+
|
| 588 |
+
all_tables_html += '</div>'
|
| 589 |
+
|
| 590 |
+
# Add shared entities section if any
|
| 591 |
+
if 'SHARED_ENTITIES' in entity_groups:
|
| 592 |
+
shared_entities_list = entity_groups['SHARED_ENTITIES']
|
| 593 |
+
all_tables_html += f"""
|
| 594 |
+
<div class="entity-section" id="SHARED_ENTITIES">
|
| 595 |
+
<div class="section-header">
|
| 596 |
+
<h3 style="margin: 0; display: flex; align-items: center;">
|
| 597 |
+
<span style="font-size: 24px; margin-right: 10px;">π€</span>
|
| 598 |
+
Shared Entities
|
| 599 |
+
<span class="entity-count">{len(shared_entities_list)} found</span>
|
| 600 |
+
</h3>
|
| 601 |
+
</div>
|
| 602 |
+
{create_entity_table_html(shared_entities_list, 'SHARED_ENTITIES', '#666666', is_shared=True)}
|
| 603 |
+
</div>
|
| 604 |
+
"""
|
| 605 |
+
|
| 606 |
+
# Add other entity types
|
| 607 |
+
for entity_type, entities_of_type in sorted(entity_groups.items()):
|
| 608 |
+
if entity_type == 'SHARED_ENTITIES':
|
| 609 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
|
| 611 |
+
colour = entity_colors.get(entity_type.upper(), '#f0f0f0')
|
| 612 |
+
is_standard = entity_type in STANDARD_ENTITIES
|
| 613 |
+
icon = "π―" if is_standard else "β¨"
|
| 614 |
+
type_label = "Common NER" if is_standard else "Custom GLiNER"
|
| 615 |
+
|
| 616 |
+
all_tables_html += f"""
|
| 617 |
+
<div class="entity-section" id="{entity_type.replace(' ', '_')}">
|
| 618 |
+
<div class="section-header">
|
| 619 |
+
<h3 style="margin: 0; display: flex; align-items: center;">
|
| 620 |
+
<span style="font-size: 24px; margin-right: 10px;">{icon}</span>
|
| 621 |
+
{entity_type}
|
| 622 |
+
<span class="entity-count" style="background-color: {colour};">{len(entities_of_type)} found</span>
|
| 623 |
+
</h3>
|
| 624 |
+
<span style="margin-left: auto; color: #666; font-size: 14px;">{type_label}</span>
|
| 625 |
+
</div>
|
| 626 |
+
{create_entity_table_html(entities_of_type, entity_type, colour)}
|
| 627 |
+
</div>
|
| 628 |
+
"""
|
| 629 |
|
| 630 |
+
all_tables_html += "</div>"
|
| 631 |
+
|
| 632 |
+
return all_tables_html
|
| 633 |
|
| 634 |
def create_legend_html(entity_colors, standard_entities, custom_entities):
|
| 635 |
"""Create a legend showing entity colours"""
|
|
|
|
| 666 |
def process_text(text, standard_entities, custom_entities_str, confidence_threshold, selected_model, progress=gr.Progress()):
|
| 667 |
"""Main processing function for Gradio interface with progress tracking"""
|
| 668 |
if not text.strip():
|
| 669 |
+
return "β Please enter some text to analyse", "", ""
|
| 670 |
|
| 671 |
progress(0.1, desc="Initialising...")
|
| 672 |
|
|
|
|
| 679 |
selected_standard = [entity for entity in standard_entities if entity]
|
| 680 |
|
| 681 |
if not selected_standard and not custom_entities:
|
| 682 |
+
return "β Please select at least one common entity type OR enter custom entity types", "", ""
|
| 683 |
|
| 684 |
progress(0.2, desc="Loading models...")
|
| 685 |
|
|
|
|
| 698 |
all_entities.extend(custom_entity_results)
|
| 699 |
|
| 700 |
if not all_entities:
|
| 701 |
+
return "β No entities found. Try lowering the confidence threshold or using different entity types.", "", ""
|
| 702 |
|
| 703 |
progress(0.8, desc="Processing results...")
|
| 704 |
|
|
|
|
| 708 |
# Create outputs
|
| 709 |
legend_html = create_legend_html(entity_colors, selected_standard, custom_entities)
|
| 710 |
highlighted_html = create_highlighted_html(text, all_entities, entity_colors)
|
| 711 |
+
results_html = create_all_entity_tables(all_entities, entity_colors)
|
| 712 |
|
| 713 |
progress(0.9, desc="Creating summary...")
|
| 714 |
|
| 715 |
# Create summary with shared entities terminology
|
|
|
|
| 716 |
total_entities = len(all_entities)
|
| 717 |
shared_entities = find_overlapping_entities(all_entities)
|
| 718 |
final_count = len(shared_entities)
|
|
|
|
| 728 |
|
| 729 |
progress(1.0, desc="Complete!")
|
| 730 |
|
| 731 |
+
return summary, legend_html + highlighted_html, results_html
|
| 732 |
|
| 733 |
# Create Gradio interface
|
| 734 |
def create_interface():
|
|
|
|
| 746 |
3. **βοΈ Select common entities** you want to find (PER, ORG, LOC, etc.)
|
| 747 |
4. **β¨ Add custom entities** (comma-separated) like "relationships, occupations, skills"
|
| 748 |
5. **βοΈ Adjust confidence threshold**
|
| 749 |
+
6. **π Click "Analyse Text"** to see results with organized output
|
| 750 |
""")
|
| 751 |
|
| 752 |
with gr.Row():
|
|
|
|
| 829 |
with gr.Row():
|
| 830 |
highlighted_output = gr.HTML(label="Highlighted Text")
|
| 831 |
|
| 832 |
+
# Results section
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
with gr.Row():
|
| 834 |
with gr.Column():
|
| 835 |
gr.Markdown("### π Detailed Results")
|
| 836 |
+
results_output = gr.HTML(label="Entity Results")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 837 |
|
| 838 |
# Connect the button to the processing function
|
| 839 |
analyse_btn.click(
|
| 840 |
+
fn=process_text,
|
| 841 |
inputs=[
|
| 842 |
text_input,
|
| 843 |
standard_entities,
|
|
|
|
| 845 |
confidence_threshold,
|
| 846 |
model_dropdown
|
| 847 |
],
|
| 848 |
+
outputs=[summary_output, highlighted_output, results_output]
|
| 849 |
)
|
| 850 |
|
| 851 |
# Add examples
|