edbeeching commited on
Commit
684d1a6
Β·
1 Parent(s): 3e3c42b

fixing descriptions

Browse files
Files changed (1) hide show
  1. app.py +52 -12
app.py CHANGED
@@ -546,25 +546,65 @@ def main():
546
  gr.Image("dataforge.png", show_label=False, show_download_button=False, container=False, height=300)
547
  # Store the current oauth token for use in submit_request
548
  current_oauth_token = gr.State(None)
549
-
550
- # Title
551
- gr.Markdown("# DataForge")
 
552
 
 
 
553
  # Main description
554
  gr.Markdown("""
555
- This tool allows you to **generate synthetic data from existing datasets**: you get expanded training data from your prompts, super useful for all your **fine-tuning/research/data augmentation** needs!
556
- """)
557
 
558
- # PRO sentence
559
- gr.Markdown("**🎯 FREE for HuggingFace PRO users (10,000 samples) β€’ 100 samples for free users**")
560
 
561
- # Sign in button
562
  with gr.Row():
563
- gr.Markdown("") # Empty space for alignment
564
- login_button = gr.LoginButton(value="πŸ”‘ Sign in", size="sm")
565
- gr.Markdown("") # Empty space for alignment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
- signin_message = gr.Markdown("## πŸ”‘ Sign In Required\n\nPlease sign in with your Hugging Face account to access the synthetic data generation service. Click the **Sign in** button above to continue.", visible=True)
 
568
  main_interface = gr.Column(visible=False)
569
 
570
  with main_interface:
 
546
  gr.Image("dataforge.png", show_label=False, show_download_button=False, container=False, height=300)
547
  # Store the current oauth token for use in submit_request
548
  current_oauth_token = gr.State(None)
549
+ with gr.Row():
550
+ gr.Markdown("") # Empty space for alignment
551
+ login_button = gr.LoginButton(value="πŸ”‘ Sign in", size="sm")
552
+ gr.Markdown("") # Empty space for alignment
553
 
554
+ signin_message = gr.Markdown("## πŸ”‘ Sign In Required\n\nPlease sign in with your Hugging Face account to access the synthetic data generation service. Click the **Sign in** button above to continue.", visible=True)
555
+
556
  # Main description
557
  gr.Markdown("""
558
+ This tool allows you to **generate synthetic data from existing datasets**, for all your **fine-tuning/research/data augmentation** needs!
 
559
 
560
+ DataForge is built on top of [DataTrove](https://github.com/huggingface/datatrove), our backend data generation script is open-source and available on [GitHub](https://github.com/huggingface/dataforge). DataForge is **FREE** for HuggingFace PRO users (10,000 samples) β€’ 100 samples for free users.
561
+ """)
562
 
563
+ # Usage guide and examples (right below description)
564
  with gr.Row():
565
+ with gr.Column(scale=1):
566
+ with gr.Accordion("Usage Guide", open=False):
567
+ gr.Markdown("""
568
+ **Step-by-Step Process:**
569
+ 1. **Load Dataset**: Enter a HF dataset name
570
+ 2. **Load Info**: Click "Load Dataset Info"
571
+ 3. **Choose Model**: Select from 20+ models
572
+ 4. **Configure**: Set generation parameters
573
+ 5. **Submit**: Monitor progress in Statistics tab
574
+
575
+ **Requirements:**
576
+ - Input dataset must be public on HF Hub
577
+ - Model must be publicly accessible
578
+ - Free users: 100 samples max, PRO: 10K max
579
+ - Token limit: 8,192 per sample
580
+ """)
581
+ with gr.Column(scale=1):
582
+ with gr.Accordion("Examples", open=False):
583
+ gr.Markdown("""
584
+ **Popular Use Cases:**
585
+
586
+ **Educational**: Q&A datasets
587
+ - Models: Qwen3-4B, Phi-3.5-mini
588
+ - Temperature: 0.3-0.5
589
+
590
+ **Conversational**: Multi-turn dialogues
591
+ - Models: Llama-3.2-3B, Mistral-7B
592
+ - Temperature: 0.7-0.9
593
+
594
+ **Code**: Problem β†’ Solution
595
+ - Models: Qwen2.5-Coder, DeepSeek-Coder
596
+ - Temperature: 0.1-0.3
597
+
598
+ **Example datasets to try:**
599
+ ```
600
+ simplescaling/s1K-1.1
601
+ HuggingFaceH4/ultrachat_200k
602
+ iamtarun/python_code_instructions_18k_alpaca
603
+ ```
604
+ """)
605
 
606
+
607
+ # Sign in button
608
  main_interface = gr.Column(visible=False)
609
 
610
  with main_interface: