Spaces:

derek-thomas
/

transformer_calculator

Paused

App Files Files Community

derek-thomas commited on Sep 13, 2024

Commit

d65669a

verified ·

1 Parent(s): dcb01bb

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -50

app.py CHANGED Viewed

@@ -13,19 +13,18 @@ def convert_params(params):
     return "%s %s" % (s, size_name[i])
 # Get Hugging Face model configuration and update the parameters
-def get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length):
-    if hf_model_name_or_path:
-        try:
-            config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
-        except Exception as e:
-            return None, f"Error fetching Hugging Face model: {str(e)}"
-        # Update parameters with the Hugging Face model config values
-        num_layers = config.get("num_hidden_layers", num_layers)
-        hidden_size = config.get("hidden_size", hidden_size)
-        num_attention_heads = config.get("num_attention_heads", num_attention_heads)
-        vocab_size = config.get("vocab_size", vocab_size)
-        sequence_length = config.get("max_position_embeddings", sequence_length)
     return {
         "num_layers": num_layers,
@@ -37,16 +36,17 @@ def get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attent
 # ---- Memory Calculation ---- #
 def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
-    model_params, hf_error = get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length)
     if hf_error:
         return hf_error
-    num_layers = model_params["num_layers"]
-    hidden_size = model_params["hidden_size"]
-    num_attention_heads = model_params["num_attention_heads"]
-    vocab_size = model_params["vocab_size"]
-    sequence_length = model_params["sequence_length"]
     dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
     embed_params = 2 * vocab_size * hidden_size
@@ -62,37 +62,19 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
     return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
-# ---- Parameter Calculation ---- #
-def calc_params(vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio):
-    if tied_embeddings:
-        embedding_params = hidden_size * vocab_size
-    else:
-        embedding_params = 2 * hidden_size * vocab_size
-    position_embedding_params = hidden_size * sequence_length
-    attention_params = int(2 * (1 + kv_size_ratio) * num_layers * hidden_size * hidden_size)
-    layernorm_params = 13 * num_layers * hidden_size
-    if moe:
-        num_expert_layers = num_layers / expert_interval
-        ffn_expert_params = num_mlp_linears * ffn_expansion_factor * num_expert_layers * num_experts * hidden_size * hidden_size
-        ffn_dense_params = num_mlp_linears * ffn_expansion_factor * (num_layers - num_expert_layers) * hidden_size * hidden_size
-        ffn_params = ffn_expert_params + ffn_dense_params
-        gating_params = num_expert_layers * hidden_size * num_experts
-    else:
-        ffn_params = num_mlp_linears * ffn_expansion_factor * num_layers * hidden_size * hidden_size
-    total_params = embedding_params + attention_params + ffn_params + position_embedding_params + layernorm_params
-    if moe:
-        total_params += gating_params
-    return f"""
-    Embedding parameters: {convert_params(embedding_params)}
-    Attention parameters: {convert_params(attention_params)}
-    FFN parameters: {convert_params(ffn_params)}
-    {'Gating parameters: ' + convert_params(gating_params) if moe else ''}
-    Total Params in the Model: {convert_params(total_params)}
-    """
 # ---- Gradio Interface ---- #
 with gr.Blocks() as demo:
@@ -119,6 +101,10 @@ with gr.Blocks() as demo:
                 inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
                 outputs=memory_result)
         # Parameter Calculation Tab
         with gr.TabItem("Parameter Calculation"):
             vocab_size = gr.Number(label="Vocab Size", value=51200)

     return "%s %s" % (s, size_name[i])
 # Get Hugging Face model configuration and update the parameters
+def get_hf_model_args(hf_model_name_or_path):
+    try:
+        config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
+    except Exception as e:
+        return None, f"Error fetching Hugging Face model: {str(e)}"
+    # Extract relevant values from the config
+    num_layers = config.get("num_hidden_layers", None)
+    hidden_size = config.get("hidden_size", None)
+    num_attention_heads = config.get("num_attention_heads", None)
+    vocab_size = config.get("vocab_size", None)
+    sequence_length = config.get("max_position_embeddings", None)
     return {
         "num_layers": num_layers,
 # ---- Memory Calculation ---- #
 def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
+    model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
     if hf_error:
         return hf_error
+    if model_params:
+        num_layers = model_params["num_layers"] or num_layers
+        hidden_size = model_params["hidden_size"] or hidden_size
+        num_attention_heads = model_params["num_attention_heads"] or num_attention_heads
+        vocab_size = model_params["vocab_size"] or vocab_size
+        sequence_length = model_params["sequence_length"] or sequence_length
     dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
     embed_params = 2 * vocab_size * hidden_size
     return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
+# ---- Update Gradio inputs with Hugging Face model config ---- #
+def update_from_hf_model(hf_model_name_or_path):
+    model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
+    if hf_error:
+        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
+    return (gr.update(value=model_params["num_layers"]),
+            gr.update(value=model_params["hidden_size"]),
+            gr.update(value=model_params["num_attention_heads"]),
+            gr.update(value=model_params["vocab_size"]),
+            gr.update(value=model_params["sequence_length"]),
+            "")
 # ---- Gradio Interface ---- #
 with gr.Blocks() as demo:
                 inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
                 outputs=memory_result)
+            hf_model_name_or_path.change(fn=update_from_hf_model,
+                inputs=[hf_model_name_or_path],
+                outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result])
         # Parameter Calculation Tab
         with gr.TabItem("Parameter Calculation"):
             vocab_size = gr.Number(label="Vocab Size", value=51200)