| import pandas_profiling as pp | |
| from huggingface_hub.hf_api import create_repo | |
| from huggingface_hub.repository import Repository | |
| import gradio as gr | |
| import pandas as pd | |
| import subprocess | |
| import os | |
| import tempfile | |
| token = gr.Textbox(label = "Your Hugging Face Token") | |
| username = gr.Textbox(label = "Your Hugging Face User name") | |
| dataset_name = gr.Textbox(label = "Dataset Name") | |
| dataset = gr.File(label = "Dataset") | |
| output_text = gr.Textbox(label = "Status") | |
| def profile_dataset(dataset, username, token, dataset_name): | |
| df = pd.read_csv(dataset.name) | |
| profile = pp.ProfileReport(df, title=f"{dataset_name} Report") | |
| repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static") | |
| subprocess.run( | |
| f"git clone {repo_url}".split(), | |
| encoding="utf-8", | |
| cwd= os.getcwd(), | |
| check=True, | |
| env=os.environ.copy(), | |
| ) | |
| repo = Repository( | |
| clone_from = repo_url, | |
| local_dir=f"{username}/{dataset_name}", | |
| git_user = "merveenoyan", | |
| git_email = "merveenoyan@gmail.com", | |
| ) | |
| profile.to_file(f"{username}/{dataset_name}/index.html") | |
| repo.git_add() | |
| repo.git_commit(commit_message = "Dataset report") | |
| repo.git_push() | |
| return f"Your dataset report will be ready at {repo_url}" | |
| gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True) |