Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| from widgets.widget_base import Widget | |
| from data_measurements.dataset_statistics import DatasetStatisticsCacheClass as dmt_cls | |
| import utils | |
| logs = utils.prepare_logging(__file__) | |
| class GeneralStats(Widget): | |
| def __init__(self): | |
| self.general_stats = gr.Markdown(render=False) | |
| self.general_stats_top_vocab = gr.DataFrame(render=False) | |
| self.general_stats_missing = gr.Markdown(render=False) | |
| self.general_stats_duplicates = gr.Markdown(render=False) | |
| def render(self): | |
| with gr.TabItem(f"General Text Statistics"): | |
| self.general_stats.render() | |
| self.general_stats_missing.render() | |
| self.general_stats_duplicates.render() | |
| self.general_stats_top_vocab.render() | |
| def update(self, dstats: dmt_cls): | |
| general_stats_text = f""" | |
| Use this widget to check whether the terms you see most represented in the dataset make sense for the goals of the dataset. | |
| There are {str(dstats.total_words)} total words. | |
| There are {dstats.total_open_words} after removing closed class words. | |
| The most common [open class words](https://dictionary.apa.org/open-class-words) and their counts are: | |
| """ | |
| top_vocab = pd.DataFrame(dstats.sorted_top_vocab_df).round(4) | |
| missing_text = ( | |
| f"There are {dstats.text_nan_count} missing values in the dataset" | |
| ) | |
| if dstats.dups_frac > 0: | |
| dupes_text = f"The dataset is {round(dstats.dups_frac * 100, 2)}% duplicates, For more information about the duplicates, click the 'Duplicates' tab." | |
| else: | |
| dupes_text = "There are 0 duplicate items in the dataset" | |
| return { | |
| self.general_stats: general_stats_text, | |
| self.general_stats_top_vocab: top_vocab, | |
| self.general_stats_missing: missing_text, | |
| self.general_stats_duplicates: dupes_text, | |
| } | |
| def output_components(self): | |
| return [ | |
| self.general_stats, | |
| self.general_stats_top_vocab, | |
| self.general_stats_missing, | |
| self.general_stats_duplicates, | |
| ] | |
| def add_events(self, state: gr.State): | |
| pass | |