Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from widgets.widget_base import Widget | |
| from data_measurements.dataset_statistics import DatasetStatisticsCacheClass as dmt_cls | |
| import utils | |
| logs = utils.prepare_logging(__file__) | |
| ## possibyl looking for update()--> return output | |
| class TextLengths(Widget): | |
| def __init__(self): | |
| self.text_length_distribution_plot = gr.Image(render=False) | |
| self.text_length_explainer = gr.Markdown(render=False) | |
| self.text_length_drop_down = gr.Dropdown(render=False) | |
| self.text_length_df = gr.DataFrame(render=False) | |
| def update_text_length_df(self, length, dstats): | |
| return dstats.length_obj.lengths_df[ | |
| dstats.length_obj.lengths_df["length"] == length | |
| ].set_index("length") | |
| def render(self): | |
| with gr.TabItem("Text Lengths"): | |
| gr.Markdown( | |
| "Use this widget to identify outliers, particularly suspiciously long outliers." | |
| ) | |
| gr.Markdown( | |
| "Below, you can see how the lengths of the text instances in your " | |
| "dataset are distributed." | |
| ) | |
| gr.Markdown( | |
| "Any unexpected peaks or valleys in the distribution may help to " | |
| "identify instances you want to remove or augment." | |
| ) | |
| gr.Markdown( | |
| "### Here is the count of different text lengths in " "your dataset:" | |
| ) | |
| # When matplotlib first creates this, it's a Figure. | |
| # Once it's saved, then read back in, | |
| # it's an ndarray that must be displayed using st.image | |
| # (I know, lame). | |
| self.text_length_distribution_plot.render() | |
| self.text_length_explainer.render() | |
| self.text_length_drop_down.render() | |
| self.text_length_df.render() | |
| def update(self, dstats: dmt_cls): | |
| explainer_text = ( | |
| "The average length of text instances is **" | |
| + str(round(dstats.length_obj.avg_length, 2)) | |
| + " words**, with a standard deviation of **" | |
| + str(round(dstats.length_obj.std_length, 2)) | |
| + "**." | |
| ) | |
| # TODO: Add text on choosing the length you want to the dropdown. | |
| output = { | |
| self.text_length_distribution_plot: dstats.length_obj.fig_lengths, | |
| self.text_length_explainer: explainer_text, | |
| } | |
| if dstats.length_obj.lengths_df is not None: | |
| import numpy as np | |
| choices = np.sort(dstats.length_obj.lengths_df["length"].unique())[ | |
| ::-1 | |
| ].tolist() | |
| output[self.text_length_drop_down] = gr.Dropdown.update( | |
| choices=choices, value=choices[0] | |
| ) | |
| output[self.text_length_df] = self.update_text_length_df(choices[0], dstats) | |
| else: | |
| output[self.text_length_df] = gr.update(visible=False) | |
| output[self.text_length_drop_down] = gr.update(visible=False) | |
| return output | |
| def output_components(self): | |
| return [ | |
| self.text_length_distribution_plot, | |
| self.text_length_explainer, | |
| self.text_length_drop_down, | |
| self.text_length_df, | |
| ] | |
| def add_events(self, state: gr.State): | |
| self.text_length_drop_down.change( | |
| self.update_text_length_df, | |
| inputs=[self.text_length_drop_down, state], | |
| outputs=[self.text_length_df], | |
| ) | |