Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import requests | |
| from io import BytesIO | |
| def convert_hf_dataset(file_url: str): | |
| file_url = file_url.strip() | |
| # Check that the URL is from Hugging Face | |
| if "huggingface.co" not in file_url: | |
| raise ValueError("Please provide a URL from Hugging Face datasets.") | |
| # Ensure the URL has a scheme; if not, add "https://" | |
| if not file_url.lower().startswith(("http://", "https://")): | |
| file_url = "https://" + file_url | |
| # Download the content from the URL | |
| response = requests.get(file_url) | |
| response.raise_for_status() | |
| content = response.content | |
| # Determine file type from URL extension and convert accordingly | |
| if file_url.lower().endswith(".csv"): | |
| # If it's a CSV, read it and convert to Parquet | |
| df = pd.read_csv(BytesIO(content)) | |
| output_file = "output.parquet" | |
| df.to_parquet(output_file, index=False) | |
| converted_format = "Parquet" | |
| elif file_url.lower().endswith(".parquet"): | |
| # If it's a Parquet file, read it and convert to CSV | |
| df = pd.read_parquet(BytesIO(content)) | |
| output_file = "output.csv" | |
| df.to_csv(output_file, index=False) | |
| converted_format = "CSV" | |
| else: | |
| raise ValueError("The URL must point to a .csv or .parquet file.") | |
| # Create a preview of the top 10 rows | |
| preview = df.head(10).to_string(index=False) | |
| info_message = ( | |
| f"Input file: {file_url.split('/')[-1]}\n" | |
| f"Converted file format: {converted_format}\n\n" | |
| f"Preview (Top 10 Rows):\n{preview}" | |
| ) | |
| return output_file, info_message | |
| demo = gr.Interface( | |
| fn=convert_hf_dataset, | |
| inputs=gr.Textbox( | |
| label="Hugging Face Dataset URL", | |
| placeholder="e.g., huggingface.co/datasets/username/dataset/filename.csv" | |
| ), | |
| outputs=[ | |
| gr.File(label="Converted File"), | |
| gr.Textbox(label="Preview (Top 10 Rows)", lines=15) | |
| ], | |
| title="Hugging Face CSV <-> Parquet Converter", | |
| description=( | |
| "Enter the URL of a Hugging Face dataset file (must end with .csv or .parquet). " | |
| "The app will automatically detect the file type, convert it to the opposite format, " | |
| "and display a preview of the top 10 rows." | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |