Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,25 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
df = pd.read_parquet(parquet_file)
|
| 6 |
jsonl_data = df.to_json(orient='records', lines=True)
|
| 7 |
return jsonl_data
|
| 8 |
|
| 9 |
demo = gr.Interface(
|
| 10 |
fn=convert_parquet_to_jsonl,
|
| 11 |
-
inputs=[gr.File(label="Parquet File")],
|
| 12 |
outputs=[gr.Textbox(label="JSONL Output")],
|
| 13 |
title="Parquet to JSONL Converter",
|
| 14 |
-
description="
|
| 15 |
)
|
| 16 |
|
| 17 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
import requests
|
| 4 |
+
import spaces
|
| 5 |
|
| 6 |
+
@spaces.GPU
|
| 7 |
+
def convert_parquet_to_jsonl(parquet_file_or_url):
|
| 8 |
+
if parquet_file_or_url.startswith("http"):
|
| 9 |
+
response = requests.get(parquet_file_or_url)
|
| 10 |
+
parquet_file = response.content
|
| 11 |
+
else:
|
| 12 |
+
parquet_file = parquet_file_or_url.name
|
| 13 |
df = pd.read_parquet(parquet_file)
|
| 14 |
jsonl_data = df.to_json(orient='records', lines=True)
|
| 15 |
return jsonl_data
|
| 16 |
|
| 17 |
demo = gr.Interface(
|
| 18 |
fn=convert_parquet_to_jsonl,
|
| 19 |
+
inputs=[gr.File(label="Parquet File") | gr.Textbox(label="Parquet File URL")],
|
| 20 |
outputs=[gr.Textbox(label="JSONL Output")],
|
| 21 |
title="Parquet to JSONL Converter",
|
| 22 |
+
description="Input a Parquet file by a downloadable link or file upload and convert it to JSONL format"
|
| 23 |
)
|
| 24 |
|
| 25 |
if __name__ == "__main__":
|