Datasets-Convertor

Sleeping

App Files Files Community

openfree commited on Feb 17

Commit

90f89f0

verified ·

1 Parent(s): 1fd0c30

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -40

app.py CHANGED Viewed

@@ -3,59 +3,63 @@ import pandas as pd
 import requests
 from io import BytesIO
-def convert_file(input_file, file_url, conversion_type):
-    # 파일 업로드와 URL 입력 둘 다 없으면 에러 발생
-    if input_file is None and (file_url is None or file_url.strip() == ""):
-        raise ValueError("파일 업로드 또는 URL을 제공하세요.")
-    df = None
-    # 업로드된 파일이 없으면 URL에서 읽기
-    if input_file is None:
-        file_url = file_url.strip()
-        # URL 스킴이 없으면 기본적으로 "https://"를 추가
-        if not file_url.lower().startswith(("http://", "https://")):
-            file_url = "https://" + file_url
-        response = requests.get(file_url)
-        response.raise_for_status()
-        if conversion_type == "CSV to Parquet":
-            df = pd.read_csv(BytesIO(response.content))
-        else:  # Parquet to CSV
-            df = pd.read_parquet(BytesIO(response.content))
-    else:
-        # 파일 업로드가 있는 경우
-        file_path = input_file.name
-        if conversion_type == "CSV to Parquet":
-            df = pd.read_csv(file_path)
-        else:
-            df = pd.read_parquet(file_path)
-    # 변환 실행: CSV to Parquet 혹은 Parquet to CSV
-    if conversion_type == "CSV to Parquet":
         output_file = "output.parquet"
         df.to_parquet(output_file, index=False)
-    else:
         output_file = "output.csv"
         df.to_csv(output_file, index=False)
-    # 상위 10줄 미리보기 생성
     preview = df.head(10).to_string(index=False)
-    return output_file, preview
 demo = gr.Interface(
-    fn=convert_file,
-    inputs=[
-        gr.File(label="입력 파일 (CSV 또는 Parquet)"),
-        gr.Textbox(label="입력 파일 URL (선택)", placeholder="CSV 또는 Parquet 파일의 URL을 입력하세요."),
-        gr.Radio(choices=["CSV to Parquet", "Parquet to CSV"], label="변환 유형")
-    ],
     outputs=[
-        gr.File(label="변환된 파일"),
-        gr.Textbox(label="미리보기 (상위 10줄)")
     ],
-    title="CSV <-> Parquet 변환기",
-    description="변환 유형을 선택하고, 파일을 업로드하거나 URL을 입력하여 CSV와 Parquet 파일을 상호 변환합니다. 상위 10줄 미리보기도 제공합니다."
 )
 if __name__ == "__main__":

 import requests
 from io import BytesIO
+def convert_hf_dataset(file_url: str):
+    file_url = file_url.strip()
+    # Check that the URL is from Hugging Face
+    if "huggingface.co" not in file_url:
+        raise ValueError("Please provide a URL from Hugging Face datasets.")
+    # Ensure the URL has a scheme; if not, add "https://"
+    if not file_url.lower().startswith(("http://", "https://")):
+        file_url = "https://" + file_url
+    # Download the content from the URL
+    response = requests.get(file_url)
+    response.raise_for_status()
+    content = response.content
+    # Determine file type from URL extension and convert accordingly
+    if file_url.lower().endswith(".csv"):
+        # If it's a CSV, read it and convert to Parquet
+        df = pd.read_csv(BytesIO(content))
         output_file = "output.parquet"
         df.to_parquet(output_file, index=False)
+        converted_format = "Parquet"
+    elif file_url.lower().endswith(".parquet"):
+        # If it's a Parquet file, read it and convert to CSV
+        df = pd.read_parquet(BytesIO(content))
         output_file = "output.csv"
         df.to_csv(output_file, index=False)
+        converted_format = "CSV"
+    else:
+        raise ValueError("The URL must point to a .csv or .parquet file.")
+    # Create a preview of the top 10 rows
     preview = df.head(10).to_string(index=False)
+    info_message = (
+        f"Input file: {file_url.split('/')[-1]}\n"
+        f"Converted file format: {converted_format}\n\n"
+        f"Preview (Top 10 Rows):\n{preview}"
+    )
+    return output_file, info_message
 demo = gr.Interface(
+    fn=convert_hf_dataset,
+    inputs=gr.Textbox(
+        label="Hugging Face Dataset URL",
+        placeholder="e.g., huggingface.co/datasets/username/dataset/filename.csv"
+    ),
     outputs=[
+        gr.File(label="Converted File"),
+        gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
     ],
+    title="Hugging Face CSV <-> Parquet Converter",
+    description=(
+        "Enter the URL of a Hugging Face dataset file (must end with .csv or .parquet). "
+        "The app will automatically detect the file type, convert it to the opposite format, "
+        "and display a preview of the top 10 rows."
+    )
 )
 if __name__ == "__main__":