Spaces:

derek-thomas
/

transformer_calculator

Paused

App Files Files Community

derek-thomas commited on Sep 13, 2024

Commit

4db8e8b

verified ·

1 Parent(s): 4d4bbe8

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -27

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import AutoConfig  # Required for Hugging Face integration
 from calc_params import calc_params  # Import calc_params from the new file
 # ---- Helper Functions ---- #
 def get_hf_model_args(hf_model_name_or_path):
@@ -320,35 +321,39 @@ with gr.Blocks() as demo:
             gr.Markdown("""
             ## FLOP Calculation
-            FLOP Calculation estimates the number of floating point operations (FLOPs) for training or inference of a model.
-            Provide the necessary model hyperparameters and click 'Calculate FLOPs' to get a result.
             """)
             with gr.Row():
-                with gr.Column():
-                    hf_model_name_or_path = gr.Textbox(
-                        label="HuggingFace Model Name or Path",
-                        info="Name of the HuggingFace Hub repository or the local file path for it"
-                    )
-                    vocab_size = gr.Number(
-                        label="Vocab Size",
-                        value=51200,
-                        info="How many tokens are in the embedding layer"
-                    )
-                    hidden_size = gr.Number(
-                        label="Hidden Size",
-                        value=6144,
-                        info="Dimension of the model's hidden size"
-                    )
-                    sequence_length = gr.Number(
-                        label="Sequence Length",
-                        value=2048,
-                        info="Sequence length used for training"
-                    )
-                    num_layers = gr.Number(
-                        label="Number of Layers",
-                        value=44,
-                        info="Number of transformer layers used in the model"
-                    )
                     kv_size_ratio = gr.Number(
                         label="KV Size Ratio",
                         value=1.0,

 import gradio as gr
 from transformers import AutoConfig  # Required for Hugging Face integration
 from calc_params import calc_params  # Import calc_params from the new file
+import math
 # ---- Helper Functions ---- #
 def get_hf_model_args(hf_model_name_or_path):
             gr.Markdown("""
             ## FLOP Calculation
+            FLOP Calculation calculates the number of theoretical FLOPs required to train a model on t tokens.
+            See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how FLOPs are calculated.
+            Other good resources that we consulted are the [Chinchilla Paper](https://arxiv.org/abs/2203.15556) and
+            [Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf).
             """)
             with gr.Row():
+                with gr.Column("Generatable"):
+                    with gr.Group():
+                        hf_model_name_or_path = gr.Textbox(
+                            label="HuggingFace Model Name or Path",
+                            info="Name of the HuggingFace Hub repository or the local file path for it"
+                        )
+                        vocab_size = gr.Number(
+                            label="Vocab Size",
+                            value=51200,
+                            info="How many tokens are in the embedding layer"
+                        )
+                        hidden_size = gr.Number(
+                            label="Hidden Size",
+                            value=6144,
+                            info="Dimension of the model's hidden size"
+                        )
+                        sequence_length = gr.Number(
+                            label="Sequence Length",
+                            value=2048,
+                            info="Sequence length used for training"
+                        )
+                        num_layers = gr.Number(
+                            label="Number of Layers",
+                            value=44,
+                            info="Number of transformer layers used in the model"
+                        )
+                with gr.Column("Generatable"):
                     kv_size_ratio = gr.Number(
                         label="KV Size Ratio",
                         value=1.0,