Spaces:

hf-accelerate
/

model-memory-usage

Running on CPU Upgrade

App Files Files Community

muellerzr commited on Mar 22, 2024

Commit

6b9c5c7

1 Parent(s): 27e2fc4

Update for more explained training methods

Browse files

Files changed (4) hide show

requirements.txt +1 -1
src/app.py +50 -4
src/hub_utils.py +2 -2
src/model_utils.py +4 -2

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-accelerate @ git+https://github.com/huggingface/accelerate
 transformers
 timm
 huggingface_hub==0.19.4

+accelerate @ git+https://github.com/huggingface/accelerate@improve-model-estimator
 transformers
 timm
 huggingface_hub==0.19.4

src/app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
 import pandas as pd
 from hub_utils import check_for_discussion, report_results
-from model_utils import calculate_memory, get_model
 from huggingface_hub.utils import HfHubHTTPError
 def get_results(model_name: str, library: str, options: list, access_token: str):
@@ -13,7 +14,46 @@ def get_results(model_name: str, library: str, options: list, access_token: str)
         has_discussion = True
     title = f"## Memory usage for '{model_name}'"
     data = calculate_memory(model, options)
-    return [title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=not has_discussion)]
 with gr.Blocks() as demo:
@@ -33,7 +73,13 @@ with gr.Blocks() as demo:
         )
         out_text = gr.Markdown()
         out = gr.DataFrame(
-            headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
             interactive=False,
             visible=False,
         )
@@ -56,7 +102,7 @@ with gr.Blocks() as demo:
     btn.click(
         get_results,
         inputs=[inp, library, options, access_token],
-        outputs=[out_text, out, post_to_hub],
         api_name=False,
     )

 import gradio as gr
 import pandas as pd
+from accelerate.utils import convert_bytes
 from hub_utils import check_for_discussion, report_results
 from huggingface_hub.utils import HfHubHTTPError
+from model_utils import calculate_memory, get_model
 def get_results(model_name: str, library: str, options: list, access_token: str):
         has_discussion = True
     title = f"## Memory usage for '{model_name}'"
     data = calculate_memory(model, options)
+    stages = {"model": [], "gradients": [], "optimizer": [], "step": []}
+    for i, option in enumerate(data):
+        for stage in stages:
+            stages[stage].append(option["Training using Adam"][stage])
+        value = max(data[i]["Training using Adam"].values())
+        if value == -1:
+            value = "N/A"
+        else:
+            value = convert_bytes(value)
+        data[i]["Training using Adam"] = value
+    if any(value != -1 for value in stages["model"]):
+        out_explain = "## Training using Adam explained:\n"
+        out_explain += "When training on a batch size of 1, each stage of the training process is expected to have near the following memory results for each precision you selected:\n"
+        memory_values = pd.DataFrame(
+            columns=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"]
+        )
+        for i, dtype in enumerate(options):
+            if stages["model"][i] != -1:
+                memory_values.loc[len(memory_values)] = [
+                    dtype,
+                    convert_bytes(stages["model"][i]),
+                    convert_bytes(stages["gradients"][i]),
+                    convert_bytes(stages["optimizer"][i]),
+                    convert_bytes(stages["step"][i]),
+                ]
+        return [
+            title,
+            gr.update(visible=True, value=pd.DataFrame(data)),
+            gr.update(visible=True, value=out_explain),
+            gr.update(visible=True, value=memory_values),
+            gr.update(visible=not has_discussion),
+        ]
+    return [
+        title,
+        gr.update(visible=True, value=pd.DataFrame(data)),
+        gr.update(visible=False, value=""),
+        gr.update(visible=False, value=pd.DataFrame()),
+        gr.update(visible=not has_discussion),
+    ]
 with gr.Blocks() as demo:
         )
         out_text = gr.Markdown()
         out = gr.DataFrame(
+            headers=["dtype", "Largest Layer", "Total Size", "Training using Adam (Peek vRAM)"],
+            interactive=False,
+            visible=False,
+        )
+        out_explain = gr.Markdown()
+        memory_values = gr.DataFrame(
+            headers=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"],
             interactive=False,
             visible=False,
         )
     btn.click(
         get_results,
         inputs=[inp, library, options, access_token],
+        outputs=[out_text, out, out_explain, memory_values, post_to_hub],
         api_name=False,
     )

src/hub_utils.py CHANGED Viewed

@@ -27,9 +27,9 @@ def report_results(model_name, library, access_token):
     post = f"""# Model Memory Requirements\n
 You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
 These calculations were measured from the [Model Memory Utility Space](https://huggingface.co/spaces/hf-accelerate/model-memory-usage) on the Hub.
 The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
 When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.

     post = f"""# Model Memory Requirements\n
 You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
 These calculations were measured from the [Model Memory Utility Space](https://huggingface.co/spaces/hf-accelerate/model-memory-usage) on the Hub.
 The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
 When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.

src/model_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ from urllib.parse import urlparse
 import gradio as gr
 import torch
-from accelerate.commands.estimate import check_has_model, create_empty_model
 from accelerate.utils import calculate_maximum_sizes, convert_bytes
 from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
@@ -84,10 +84,12 @@ def calculate_memory(model: torch.nn.Module, options: list):
         dtype_largest_layer = largest_layer[0]
         modifier = DTYPE_MODIFIER[dtype]
         dtype_total_size /= modifier
         dtype_largest_layer /= modifier
-        dtype_training_size = convert_bytes(dtype_total_size * 4)
         dtype_total_size = convert_bytes(dtype_total_size)
         dtype_largest_layer = convert_bytes(dtype_largest_layer)
         data.append(

 import gradio as gr
 import torch
+from accelerate.commands.estimate import check_has_model, create_empty_model, estimate_training_usage
 from accelerate.utils import calculate_maximum_sizes, convert_bytes
 from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
         dtype_largest_layer = largest_layer[0]
         modifier = DTYPE_MODIFIER[dtype]
+        dtype_training_size = estimate_training_usage(
+            dtype_total_size, dtype if dtype != "float16/bfloat16" else "float16"
+        )
         dtype_total_size /= modifier
         dtype_largest_layer /= modifier
         dtype_total_size = convert_bytes(dtype_total_size)
         dtype_largest_layer = convert_bytes(dtype_largest_layer)
         data.append(