Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoConfig # Required for Hugging Face integration
|
| 3 |
from calc_params import calc_params # Import calc_params from the new file
|
|
|
|
| 4 |
|
| 5 |
# ---- Helper Functions ---- #
|
| 6 |
def get_hf_model_args(hf_model_name_or_path):
|
|
@@ -320,35 +321,39 @@ with gr.Blocks() as demo:
|
|
| 320 |
gr.Markdown("""
|
| 321 |
## FLOP Calculation
|
| 322 |
|
| 323 |
-
FLOP Calculation
|
| 324 |
-
|
|
|
|
|
|
|
| 325 |
""")
|
| 326 |
with gr.Row():
|
| 327 |
-
with gr.Column():
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
| 352 |
kv_size_ratio = gr.Number(
|
| 353 |
label="KV Size Ratio",
|
| 354 |
value=1.0,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import AutoConfig # Required for Hugging Face integration
|
| 3 |
from calc_params import calc_params # Import calc_params from the new file
|
| 4 |
+
import math
|
| 5 |
|
| 6 |
# ---- Helper Functions ---- #
|
| 7 |
def get_hf_model_args(hf_model_name_or_path):
|
|
|
|
| 321 |
gr.Markdown("""
|
| 322 |
## FLOP Calculation
|
| 323 |
|
| 324 |
+
FLOP Calculation calculates the number of theoretical FLOPs required to train a model on t tokens.
|
| 325 |
+
See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how FLOPs are calculated.
|
| 326 |
+
Other good resources that we consulted are the [Chinchilla Paper](https://arxiv.org/abs/2203.15556) and
|
| 327 |
+
[Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf).
|
| 328 |
""")
|
| 329 |
with gr.Row():
|
| 330 |
+
with gr.Column("Generatable"):
|
| 331 |
+
with gr.Group():
|
| 332 |
+
hf_model_name_or_path = gr.Textbox(
|
| 333 |
+
label="HuggingFace Model Name or Path",
|
| 334 |
+
info="Name of the HuggingFace Hub repository or the local file path for it"
|
| 335 |
+
)
|
| 336 |
+
vocab_size = gr.Number(
|
| 337 |
+
label="Vocab Size",
|
| 338 |
+
value=51200,
|
| 339 |
+
info="How many tokens are in the embedding layer"
|
| 340 |
+
)
|
| 341 |
+
hidden_size = gr.Number(
|
| 342 |
+
label="Hidden Size",
|
| 343 |
+
value=6144,
|
| 344 |
+
info="Dimension of the model's hidden size"
|
| 345 |
+
)
|
| 346 |
+
sequence_length = gr.Number(
|
| 347 |
+
label="Sequence Length",
|
| 348 |
+
value=2048,
|
| 349 |
+
info="Sequence length used for training"
|
| 350 |
+
)
|
| 351 |
+
num_layers = gr.Number(
|
| 352 |
+
label="Number of Layers",
|
| 353 |
+
value=44,
|
| 354 |
+
info="Number of transformer layers used in the model"
|
| 355 |
+
)
|
| 356 |
+
with gr.Column("Generatable"):
|
| 357 |
kv_size_ratio = gr.Number(
|
| 358 |
label="KV Size Ratio",
|
| 359 |
value=1.0,
|