| import gradio as gr | |
| import pandas as pd | |
| from huggingface_hub.hf_api import create_repo, upload_file | |
| from huggingface_hub.repository import Repository | |
| import subprocess | |
| import os | |
| import tempfile | |
| import sweetviz as sv | |
| def analyze_datasets(dataset, dataset_name, username, token, column=None, pairwise="off"): | |
| df = pd.read_csv(dataset.name) | |
| if column is not None: | |
| analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise) | |
| else: | |
| analyze_report = sv.analyze(df, pairwise_analysis=pairwise) | |
| analyze_report.show_html('index.html', open_browser=False) | |
| repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False) | |
| upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
| readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---" | |
| with open("README.md", "w+") as f: | |
| f.write(readme) | |
| upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
| return f"Your dataset report will be ready at {repo_url}" | |
| def compare_column_values(dataset, dataset_name, username, token, column, category): | |
| df = pd.read_csv(dataset.name) | |
| arr = df[column].unique() | |
| arr = list(arr[arr != column]) | |
| compare_report = sv.compare_intra(df, df[column] == category, arr[0]) | |
| compare_report.show_html('index.html', open_browser=False) | |
| repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False) | |
| upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
| readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---" | |
| with open("README.md", "w+") as f: | |
| f.write(readme) | |
| upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
| return f"Your dataset report will be ready at {repo_url}" | |
| def compare_dataset_splits(dataset, dataset_name, username, token, splits): | |
| df = pd.read_csv(dataset.name) | |
| train = df.sample(frac=splits) | |
| test = df.loc[df.index.difference(train.index)] | |
| compare_report = sv.compare([train, "Training Data"], [test, "Test Data"]) | |
| compare_report.show_html('index.html', open_browser=False) | |
| repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False) | |
| upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
| readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---" | |
| with open("README.md", "w+") as f: | |
| f.write(readme) | |
| upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
| return f"Your dataset report will be ready at {repo_url}" | |
| with gr.Blocks() as demo: | |
| main_title = gr.Markdown("""# Easy Analysis🪄🌟✨""") | |
| main_desc = gr.Markdown("""This app enables you to run three type of dataset analysis and pushes the interactive reports to your Hugging Face Hub profile as a Space. It uses SweetViz in the back.""") | |
| with gr.Tabs(): | |
| with gr.TabItem("Analyze") as analyze: | |
| with gr.Row(): | |
| with gr.Column(): | |
| title = gr.Markdown(""" ## Analyze Dataset """) | |
| description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.") | |
| dataset = gr.File(label = "Dataset") | |
| column = gr.Text(label = "Compare dataset against a target variable (Optional)") | |
| pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis") | |
| token = gr.Textbox(label = "Your Hugging Face Token") | |
| username = gr.Textbox(label = "Your Hugging Face User Name") | |
| dataset_name = gr.Textbox(label = "Dataset Name") | |
| pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.") | |
| inference_run = gr.Button("Infer") | |
| inference_progress = gr.StatusTracker(cover_container=True) | |
| outcome = gr.outputs.Textbox() | |
| inference_run.click( | |
| analyze_datasets, | |
| inputs=[dataset, dataset_name, username, token, column, pairwise], | |
| outputs=outcome, | |
| status_tracker=inference_progress, | |
| ) | |
| with gr.TabItem("Compare Splits") as compare_splits: | |
| with gr.Row(): | |
| with gr.Column(): | |
| title = gr.Markdown(""" ## Compare Splits""") | |
| description = gr.Markdown("Split a dataset and compare splits. You need to give a fraction, e.g. 0.8.") | |
| dataset = gr.File(label = "Dataset") | |
| split_ratio = gr.Number(label = "Split Ratios") | |
| pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.") | |
| token = gr.Textbox(label = "Your Hugging Face Token") | |
| username = gr.Textbox(label = "Your Hugging Face User Name") | |
| dataset_name = gr.Textbox(label = "Dataset Name") | |
| inference_run = gr.Button("Infer") | |
| inference_progress = gr.StatusTracker(cover_container=True) | |
| outcome = gr.outputs.Textbox() | |
| inference_run.click( | |
| compare_dataset_splits, | |
| inputs=[dataset, dataset_name, username, token, split_ratio], | |
| outputs=outcome, | |
| status_tracker=inference_progress, | |
| ) | |
| with gr.TabItem("Compare Subsets") as compare_subsets: | |
| with gr.Row(): | |
| with gr.Column(): | |
| title = gr.Markdown(""" ## Compare Subsets""") | |
| description = gr.Markdown("Compare subsets of a dataset, e.g. you can pick Age Group column and compare adult category against young.") | |
| dataset = gr.File(label = "Dataset") | |
| column = gr.Text(label = "Enter column:") | |
| category = gr.Text(label = "Enter category:") | |
| pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.") | |
| token = gr.Textbox(label = "Your Hugging Face Token") | |
| username = gr.Textbox(label = "Your Hugging Face User Name") | |
| dataset_name = gr.Textbox(label = "Dataset Name") | |
| inference_run = gr.Button("Run Analysis") | |
| inference_progress = gr.StatusTracker(cover_container=True) | |
| outcome = gr.outputs.Textbox() | |
| inference_run.click( | |
| compare_column_values, | |
| inputs=[dataset, dataset_name, username, token, column, category ], | |
| outputs=outcome, | |
| status_tracker=inference_progress, | |
| ) | |
| demo.launch(debug=True) |