Spaces:
Running
on
Zero
Running
on
Zero
| import time | |
| import gradio as gr | |
| import io | |
| import pandas as pd | |
| import spaces | |
| def stream_output(filename: str): | |
| if filename.endswith(".jsonl"): | |
| filename = filename[:-len(".jsonl")] | |
| from generate import stream_file | |
| content = "" | |
| size=3 | |
| start_time = time.time() | |
| for i, chunk in enumerate(stream_file( | |
| filename=filename, | |
| prompt="", | |
| columns=[], | |
| seed=42, | |
| size=size, | |
| )): | |
| content += chunk | |
| df = pd.read_json(io.StringIO(content), lines=True) | |
| state_msg = ( | |
| f"β Done generating {size} samples in {time.time() - start_time:.2f}s" | |
| if i + 1 == size else | |
| f"βοΈ Generating... [{i}/{size}]" | |
| ) | |
| yield df, "```json\n" + content + "\n```", state_msg | |
| def test(filename: str): | |
| if not filename.endswith(".jsonl"): | |
| yield "β 404: File name must end with .jsonl", None, "" | |
| return | |
| content = "" | |
| size = 10 | |
| start_time = time.time() | |
| for i in range(size): | |
| content += f'{{"i": {i}, "filename": "{filename}"}}\n' | |
| df = pd.read_json(io.StringIO(content), lines=True) | |
| state_msg = ( | |
| f"β Done generating {size} samples in {time.time() - start_time:.2f}s" | |
| if i + 1 == size else | |
| f"βοΈ Generating... [{i}/{size}]" | |
| ) | |
| yield df, "```json\n" + content + "\n```", state_msg | |
| time.sleep(0.1) | |
| title = "LLM DataGen" | |
| description = "Generate and stream synthetic dataset files in JSON Lines format" | |
| examples = [ | |
| "movies_data.jsonl", | |
| "common_first_names.jsonl", | |
| "bad_amazon_reviews_on_defunct_products_that_people_hate.jsonl", | |
| "dungeon_and_dragon_characters.jsonl" | |
| ] | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f"# {title}") | |
| gr.Markdown(description) | |
| filename_comp = gr.Textbox(examples[0], placeholder=examples[0]) | |
| gr.Examples(examples, filename_comp) | |
| generate_button = gr.Button("Generate dataset") | |
| state_msg_comp = gr.Markdown("π₯ Ready to generate") | |
| with gr.Tab("Dataset"): | |
| dataframe_comp = gr.DataFrame() | |
| with gr.Tab("File content"): | |
| with gr.Blocks(fill_height=True): | |
| with gr.Row(): | |
| file_content_comp = gr.Markdown() | |
| generate_button.click(test, filename_comp, [dataframe_comp, file_content_comp, state_msg_comp]) | |
| demo.launch() | |