|
|
import gradio as gr |
|
|
import matplotlib.pyplot as plt |
|
|
import networkx as nx |
|
|
|
|
|
from collections.abc import Iterable |
|
|
|
|
|
from gradio.themes.base import Base |
|
|
from gradio.themes.utils import colors, fonts, sizes |
|
|
|
|
|
from model import Parser |
|
|
|
|
|
|
|
|
parser = Parser() |
|
|
|
|
|
|
|
|
def parse(text): |
|
|
output = parser.parse(text) |
|
|
|
|
|
dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"]) |
|
|
table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"]) |
|
|
|
|
|
return dependency_tree, table |
|
|
|
|
|
|
|
|
def render_dependency_tree(words, parents, labels): |
|
|
fig, ax = plt.subplots(figsize=(40, 16)) |
|
|
|
|
|
main_font_size = 40 if len(words) < 10 else 30 if len(words) < 20 else 24 if len(words) < 40 else 16 |
|
|
minor_font_size = 30 if len(words) < 10 else 22 if len(words) < 20 else 16 if len(words) < 40 else 12 |
|
|
pad = main_font_size // 2 |
|
|
|
|
|
|
|
|
G = nx.DiGraph() |
|
|
|
|
|
|
|
|
for i, word in enumerate(words): |
|
|
G.add_node(i, label=word) |
|
|
|
|
|
|
|
|
for i, (parent, label) in enumerate(zip(parents, labels)): |
|
|
if parent != 0: |
|
|
G.add_edge(parent - 1, i, label=label) |
|
|
|
|
|
|
|
|
pos = nx.nx_agraph.graphviz_layout(G, prog='dot') |
|
|
|
|
|
|
|
|
nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), |
|
|
arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=main_font_size, bbox = dict(facecolor="white", pad=pad) |
|
|
) |
|
|
|
|
|
|
|
|
edge_labels = nx.get_edge_attributes(G, 'label') |
|
|
nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=1.0, font_size=minor_font_size) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
description = """ |
|
|
<div style="text-align: center;"> |
|
|
<h1>Norsk UD (Bokmål og Nynorsk)</h1> |
|
|
<p align="center"> |
|
|
<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%> |
|
|
</p><p></p> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
def render_table(forms, lemmas, upos, xpos, feats, named_entities): |
|
|
feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats] |
|
|
max_len = max(1, max([len(feat) for feat in feats])) |
|
|
feats = [feat + [""] * (max_len - len(feat)) for feat in feats] |
|
|
feats = list(zip(*feats)) |
|
|
|
|
|
named_entities_converted = [] |
|
|
for i, ne in enumerate(named_entities): |
|
|
if ne == "O": |
|
|
named_entities_converted.append("") |
|
|
elif ne.startswith("B") and (i + 1 == len(named_entities) or named_entities[i + 1].startswith("I")): |
|
|
named_entities_converted.append(f"<<— {ne.split('-')[1]} —") |
|
|
elif ne.startswith("B"): |
|
|
named_entities_converted.append(f"<<— {ne.split('-')[1]} —>>") |
|
|
elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"): |
|
|
named_entities_converted.append("————") |
|
|
else: |
|
|
named_entities_converted.append(f"——>>") |
|
|
|
|
|
array = [ |
|
|
[""] + forms, |
|
|
["*LEMMAS:*"] + lemmas, |
|
|
["*UPOS:*"] + upos, |
|
|
["*XPOS:*"] + xpos, |
|
|
["*UFEATS:*"] + list(feats[0]), |
|
|
*([""] + list(row) for row in feats[1:]), |
|
|
["*NE:*"] + named_entities_converted, |
|
|
['' for _ in range(len(forms) + 1)] |
|
|
] |
|
|
|
|
|
return {"data": array[1:], "headers": array[0]} |
|
|
|
|
|
|
|
|
class Soft(Base): |
|
|
def __init__( |
|
|
self, |
|
|
*, |
|
|
primary_hue: colors.Color | str = colors.indigo, |
|
|
secondary_hue: colors.Color | str = colors.indigo, |
|
|
neutral_hue: colors.Color | str = colors.gray, |
|
|
spacing_size: sizes.Size | str = sizes.spacing_md, |
|
|
radius_size: sizes.Size | str = sizes.radius_md, |
|
|
text_size: sizes.Size | str = sizes.text_md, |
|
|
font: fonts.Font | str | Iterable[fonts.Font | str] = ( |
|
|
|
|
|
"ui-sans-serif", |
|
|
"system-ui", |
|
|
"sans-serif", |
|
|
), |
|
|
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = ( |
|
|
|
|
|
"ui-monospace", |
|
|
"Consolas", |
|
|
"monospace", |
|
|
), |
|
|
): |
|
|
super().__init__( |
|
|
primary_hue=primary_hue, |
|
|
secondary_hue=secondary_hue, |
|
|
neutral_hue=neutral_hue, |
|
|
spacing_size=spacing_size, |
|
|
radius_size=radius_size, |
|
|
text_size=text_size, |
|
|
font=font, |
|
|
font_mono=font_mono, |
|
|
) |
|
|
self.name = "soft" |
|
|
super().set( |
|
|
|
|
|
background_fill_primary="*neutral_50", |
|
|
slider_color="*primary_500", |
|
|
slider_color_dark="*primary_600", |
|
|
|
|
|
shadow_drop="0 1px 4px 0 rgb(0 0 0 / 0.1)", |
|
|
shadow_drop_lg="0 2px 5px 0 rgb(0 0 0 / 0.2)", |
|
|
|
|
|
block_background_fill="white", |
|
|
block_label_padding="*spacing_sm *spacing_md", |
|
|
block_label_background_fill="*primary_100", |
|
|
block_label_background_fill_dark="*primary_600", |
|
|
block_label_radius="*radius_md", |
|
|
block_label_text_size="*text_md", |
|
|
block_label_text_weight="600", |
|
|
block_label_text_color="*primary_500", |
|
|
block_label_text_color_dark="white", |
|
|
block_title_radius="*block_label_radius", |
|
|
block_title_padding="*block_label_padding", |
|
|
block_title_background_fill="*block_label_background_fill", |
|
|
block_title_text_weight="600", |
|
|
block_title_text_color="*primary_500", |
|
|
block_title_text_color_dark="white", |
|
|
block_label_margin="*spacing_md", |
|
|
|
|
|
|
|
|
input_background_fill="white", |
|
|
input_border_color="*neutral_100", |
|
|
input_shadow="*shadow_drop", |
|
|
input_shadow_focus="*shadow_drop_lg", |
|
|
checkbox_shadow="none", |
|
|
|
|
|
shadow_spread="6px", |
|
|
button_primary_shadow="*shadow_drop_lg", |
|
|
button_primary_shadow_hover="*shadow_drop_lg", |
|
|
button_primary_shadow_active="*shadow_inset", |
|
|
button_secondary_shadow="*shadow_drop_lg", |
|
|
button_secondary_shadow_hover="*shadow_drop_lg", |
|
|
button_secondary_shadow_active="*shadow_inset", |
|
|
checkbox_label_shadow="*shadow_drop_lg", |
|
|
button_primary_background_fill="*primary_500", |
|
|
button_primary_background_fill_hover="*primary_400", |
|
|
button_primary_background_fill_hover_dark="*primary_500", |
|
|
button_primary_text_color="white", |
|
|
button_secondary_background_fill="white", |
|
|
button_secondary_background_fill_hover="*neutral_100", |
|
|
button_secondary_background_fill_hover_dark="*primary_500", |
|
|
button_secondary_text_color="*neutral_800", |
|
|
button_cancel_background_fill="*button_secondary_background_fill", |
|
|
button_cancel_background_fill_hover="*button_secondary_background_fill_hover", |
|
|
button_cancel_background_fill_hover_dark="*button_secondary_background_fill_hover", |
|
|
button_cancel_text_color="*button_secondary_text_color", |
|
|
checkbox_label_background_fill_selected="*primary_500", |
|
|
checkbox_label_background_fill_selected_dark="*primary_600", |
|
|
checkbox_border_width="1px", |
|
|
checkbox_border_color="*neutral_100", |
|
|
checkbox_border_color_dark="*neutral_600", |
|
|
checkbox_background_color_selected="*primary_600", |
|
|
checkbox_background_color_selected_dark="*primary_700", |
|
|
checkbox_border_color_focus="*primary_500", |
|
|
checkbox_border_color_focus_dark="*primary_600", |
|
|
checkbox_border_color_selected="*primary_600", |
|
|
checkbox_border_color_selected_dark="*primary_700", |
|
|
checkbox_label_text_color_selected="white", |
|
|
|
|
|
block_border_width="0px", |
|
|
panel_border_width="0px", |
|
|
) |
|
|
|
|
|
|
|
|
custom_css = \ |
|
|
""" |
|
|
/* Hide sort buttons at gr.DataFrame */ |
|
|
.sort-button { |
|
|
display: none !important; |
|
|
} |
|
|
""" |
|
|
with gr.Blocks(theme=Soft(), css=custom_css) as demo: |
|
|
gr.HTML(description) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1, variant="panel"): |
|
|
source = gr.Textbox( |
|
|
label="Input sentence", placeholder="Write a sentence to parse", show_label=False, lines=1, max_lines=5, autofocus=True |
|
|
) |
|
|
submit = gr.Button("Submit", variant="primary") |
|
|
|
|
|
with gr.Column(scale=1, variant="panel"): |
|
|
dataset = gr.Dataset(components=[gr.Textbox(visible=False)], |
|
|
label="Input examples", |
|
|
samples=[ |
|
|
["Thomassen er på vei til sin neste gjerning."], |
|
|
["På toppen av dette kom de metodiske utfordringer."], |
|
|
["Berntsen har påtatt seg en både viktig og vanskelig oppgave."], |
|
|
["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."], |
|
|
["Statsministeren i Norges første brede og varige borgerlige koalisjonsregjering etterlot seg timelange radiointervjuer med tidligere Dagsnytt-redaktør Per Bøhn og 70-80 stappfulle esker med usorterte papirer på loft og i kjeller hjemme på gården i Flå."] |
|
|
] |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1, variant="panel"): |
|
|
|
|
|
table = gr.DataFrame([[""] * 42 for _ in range(8)], headers=[""] * 42, interactive=False, datatype="markdown") |
|
|
dependency_plot = gr.Plot(None, container=False) |
|
|
|
|
|
source.submit( |
|
|
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True |
|
|
) |
|
|
submit.click( |
|
|
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True |
|
|
) |
|
|
dataset.click( |
|
|
fn=lambda text: text[0], inputs=[dataset], outputs=[source] |
|
|
).then( |
|
|
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True |
|
|
) |
|
|
|
|
|
|
|
|
demo.queue(max_size=32) |
|
|
demo.launch() |
|
|
|