Spaces:

huggingface-projects
/

easy-analysis

Paused

App Files Files Community

merve HF Staff commited on Jul 1, 2022

Commit

0d87668

1 Parent(s): eea4def

Upload app.py

Browse files

Files changed (1) hide show

app.py +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import gradio as gr
+import pandas as pd
+from huggingface_hub.hf_api import create_repo, upload_file
+from huggingface_hub.repository import Repository
+import subprocess
+import os
+import tempfile
+import sweetviz as sv
+def analyze_datasets(dataset, dataset_name, username, token, column=None, pairwise="off"):
+    df = pd.read_csv(dataset.name)
+    if column is not None:
+        analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
+    else:
+        analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
+        analyze_report.show_html('index.html', open_browser=False)
+    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
+    upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
+    with open("README.md", "w+") as f:
+        f.write(readme)
+    upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    return f"Your dataset report will be ready at {repo_url}"
+def compare_column_values(dataset, dataset_name, username, token, column, category):
+    df = pd.read_csv(dataset.name)
+    arr = df[column].unique()
+    arr = list(arr[arr != column])
+    compare_report = sv.compare_intra(df, df[column] == category, arr[0])
+    compare_report.show_html('index.html', open_browser=False)
+    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
+    upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
+    with open("README.md", "w+") as f:
+        f.write(readme)
+    upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    return f"Your dataset report will be ready at {repo_url}"
+def compare_dataset_splits(dataset, dataset_name, username, token, splits):
+    df = pd.read_csv(dataset.name)
+    train = df.sample(frac=splits)
+    test = df.loc[df.index.difference(train.index)]
+    compare_report = sv.compare([train, "Training Data"], [test, "Test Data"])
+    compare_report.show_html('index.html', open_browser=False)
+    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
+    upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
+    with open("README.md", "w+") as f:
+        f.write(readme)
+    upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
+    return f"Your dataset report will be ready at {repo_url}"
+with gr.Blocks() as demo:
+    main_title = gr.Markdown("""# Easy Analysis🪄🌟✨""")
+    main_desc = gr.Markdown("""This app enables you to run three type of dataset analysis and pushes the interactive reports to your Hugging Face Hub profile as a Space. It uses SweetViz in the back.""")
+    with gr.Tabs():
+        with gr.TabItem("Analyze") as analyze:
+            with gr.Row():
+                with gr.Column():
+                    title = gr.Markdown(""" ## Analyze Dataset """)
+                    description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
+                    dataset = gr.File(label = "Dataset")
+                    column = gr.Text(label = "Compare dataset against a target variable (Optional)")
+                    pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis")
+                    token = gr.Textbox(label = "Your Hugging Face Token")
+                    username = gr.Textbox(label = "Your Hugging Face User Name")
+                    dataset_name = gr.Textbox(label = "Dataset Name")
+                    pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
+                    inference_run = gr.Button("Infer")
+                    inference_progress = gr.StatusTracker(cover_container=True)
+                outcome = gr.outputs.Textbox()
+                inference_run.click(
+                    analyze_datasets,
+                    inputs=[dataset, dataset_name, username, token, column, pairwise],
+                    outputs=outcome,
+                    status_tracker=inference_progress,
+                )
+        with gr.TabItem("Compare Splits") as compare_splits:
+            with gr.Row():
+                with gr.Column():
+                    title = gr.Markdown(""" ## Compare Splits""")
+                    description = gr.Markdown("Split a dataset and compare splits. You need to give a fraction, e.g. 0.8.")
+                    dataset = gr.File(label = "Dataset")
+                    split_ratio = gr.Number(label = "Split Ratios")
+                    pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
+                    token = gr.Textbox(label = "Your Hugging Face Token")
+                    username = gr.Textbox(label = "Your Hugging Face User Name")
+                    dataset_name = gr.Textbox(label = "Dataset Name")
+                    inference_run = gr.Button("Infer")
+                    inference_progress = gr.StatusTracker(cover_container=True)
+                outcome = gr.outputs.Textbox()
+                inference_run.click(
+                    compare_dataset_splits,
+                    inputs=[dataset, dataset_name, username, token, split_ratio],
+                    outputs=outcome,
+                    status_tracker=inference_progress,
+                )
+        with gr.TabItem("Compare Subsets") as compare_subsets:
+            with gr.Row():
+                with gr.Column():
+                    title = gr.Markdown(""" ## Compare Subsets""")
+                    description = gr.Markdown("Compare subsets of a dataset, e.g. you can pick Age Group column and compare adult category against young.")
+                    dataset = gr.File(label = "Dataset")
+                    column = gr.Text(label = "Enter column:")
+                    category = gr.Text(label = "Enter category:")
+                    pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
+                    token = gr.Textbox(label = "Your Hugging Face Token")
+                    username = gr.Textbox(label = "Your Hugging Face User Name")
+                    dataset_name = gr.Textbox(label = "Dataset Name")
+                    inference_run = gr.Button("Run Analysis")
+                    inference_progress = gr.StatusTracker(cover_container=True)
+                outcome = gr.outputs.Textbox()
+                inference_run.click(
+                    compare_column_values,
+                    inputs=[dataset, dataset_name, username, token, column, category ],
+                    outputs=outcome,
+                    status_tracker=inference_progress,
+                )
+demo.launch(debug=True)