Spaces:
Runtime error
Runtime error
create app
Browse files- .gitignore +1 -0
- app.py +67 -0
- poetry.lock +0 -0
- pyproject.toml +18 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
app.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from huggingface_hub import hf_hub_download
|
| 3 |
+
import subprocess
|
| 4 |
+
|
| 5 |
+
def get_dataset_dependencies(dataset: str) -> set[str]:
|
| 6 |
+
script_name = dataset.split("/")[-1] + ".py"
|
| 7 |
+
input_file = hf_hub_download(repo_id=dataset, filename=script_name, repo_type="dataset")
|
| 8 |
+
result = subprocess.run(["findpydeps", "-i", input_file, "--no-header"], capture_output=True, text=True)
|
| 9 |
+
return set(d for d in result.stdout.split("\n") if d)
|
| 10 |
+
|
| 11 |
+
def update(datasets: str):
|
| 12 |
+
all_dependencies = set()
|
| 13 |
+
for dataset in datasets.split("\n"):
|
| 14 |
+
dataset = dataset.strip()
|
| 15 |
+
print(dataset)
|
| 16 |
+
if not dataset:
|
| 17 |
+
continue
|
| 18 |
+
try:
|
| 19 |
+
dependencies = get_dataset_dependencies(dataset)
|
| 20 |
+
print(f"Dependencies for {dataset} processed: {len(dependencies)}")
|
| 21 |
+
all_dependencies.update(dependencies)
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"Error processing {dataset}: {e}")
|
| 24 |
+
continue
|
| 25 |
+
return "\n".join(sorted(list(all_dependencies)))
|
| 26 |
+
|
| 27 |
+
with gr.Blocks() as demo:
|
| 28 |
+
gr.Markdown("""# Script-based dataset dependencies
|
| 29 |
+
|
| 30 |
+
Paste a list of newline-separated dataset names, and then click **Run** to see the list of dependencies in their scripts.
|
| 31 |
+
""")
|
| 32 |
+
with gr.Row():
|
| 33 |
+
inp = gr.Textbox(placeholder="mnist\ncifar10", label="Datasets", lines=10, max_lines=10)
|
| 34 |
+
out = gr.Textbox(label="Dependencies", lines=10, max_lines=10, show_copy_button=True)
|
| 35 |
+
btn = gr.Button("Run")
|
| 36 |
+
examples = ["mnist\ncifar10", "mnist", """espnet/yodas
|
| 37 |
+
gaia-benchmark/GAIA
|
| 38 |
+
google/fleurs
|
| 39 |
+
mozilla-foundation/common_voice_1_0
|
| 40 |
+
mozilla-foundation/common_voice_10_0
|
| 41 |
+
mozilla-foundation/common_voice_11_0
|
| 42 |
+
mozilla-foundation/common_voice_12_0
|
| 43 |
+
mozilla-foundation/common_voice_13_0
|
| 44 |
+
mozilla-foundation/common_voice_14_0
|
| 45 |
+
mozilla-foundation/common_voice_15_0
|
| 46 |
+
mozilla-foundation/common_voice_16_0
|
| 47 |
+
mozilla-foundation/common_voice_16_1
|
| 48 |
+
mozilla-foundation/common_voice_2_0
|
| 49 |
+
mozilla-foundation/common_voice_3_0
|
| 50 |
+
mozilla-foundation/common_voice_4_0
|
| 51 |
+
mozilla-foundation/common_voice_5_0
|
| 52 |
+
mozilla-foundation/common_voice_5_1
|
| 53 |
+
mozilla-foundation/common_voice_6_0
|
| 54 |
+
mozilla-foundation/common_voice_6_1
|
| 55 |
+
mozilla-foundation/common_voice_7_0
|
| 56 |
+
mozilla-foundation/common_voice_8_0
|
| 57 |
+
mozilla-foundation/common_voice_9_0
|
| 58 |
+
poloclub/diffusiondb
|
| 59 |
+
pufanyi/MIMICIT
|
| 60 |
+
speechcolab/gigaspeech
|
| 61 |
+
togethercomputer/RedPajama-Data-1T
|
| 62 |
+
togethercomputer/RedPajama-Data-V2
|
| 63 |
+
""" ]
|
| 64 |
+
gr.Examples(examples, inp, label="Example Datasets", )
|
| 65 |
+
btn.click(fn=update, inputs=inp, outputs=out)
|
| 66 |
+
|
| 67 |
+
demo.launch()
|
poetry.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.poetry]
|
| 2 |
+
name = "find-script-based-datasets-dependencies"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = ""
|
| 5 |
+
authors = ["Sylvain Lesage <sylvain.lesage@huggingface.co>"]
|
| 6 |
+
readme = "README.md"
|
| 7 |
+
|
| 8 |
+
[tool.poetry.dependencies]
|
| 9 |
+
python = "^3.9"
|
| 10 |
+
gradio = "4.23.0"
|
| 11 |
+
findpydeps = "^0.2.6"
|
| 12 |
+
pip = "^24.0"
|
| 13 |
+
huggingface-hub = "^0.22.1"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
[build-system]
|
| 17 |
+
requires = ["poetry-core"]
|
| 18 |
+
build-backend = "poetry.core.masonry.api"
|