Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update for more explained training methods
Browse files- requirements.txt +1 -1
- src/app.py +50 -4
- src/hub_utils.py +2 -2
- src/model_utils.py +4 -2
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
accelerate @ git+https://github.com/huggingface/accelerate
|
| 2 |
transformers
|
| 3 |
timm
|
| 4 |
huggingface_hub==0.19.4
|
|
|
|
| 1 |
+
accelerate @ git+https://github.com/huggingface/accelerate@improve-model-estimator
|
| 2 |
transformers
|
| 3 |
timm
|
| 4 |
huggingface_hub==0.19.4
|
src/app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
from hub_utils import check_for_discussion, report_results
|
| 4 |
-
from model_utils import calculate_memory, get_model
|
| 5 |
from huggingface_hub.utils import HfHubHTTPError
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_results(model_name: str, library: str, options: list, access_token: str):
|
|
@@ -13,7 +14,46 @@ def get_results(model_name: str, library: str, options: list, access_token: str)
|
|
| 13 |
has_discussion = True
|
| 14 |
title = f"## Memory usage for '{model_name}'"
|
| 15 |
data = calculate_memory(model, options)
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
with gr.Blocks() as demo:
|
|
@@ -33,7 +73,13 @@ with gr.Blocks() as demo:
|
|
| 33 |
)
|
| 34 |
out_text = gr.Markdown()
|
| 35 |
out = gr.DataFrame(
|
| 36 |
-
headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
interactive=False,
|
| 38 |
visible=False,
|
| 39 |
)
|
|
@@ -56,7 +102,7 @@ with gr.Blocks() as demo:
|
|
| 56 |
btn.click(
|
| 57 |
get_results,
|
| 58 |
inputs=[inp, library, options, access_token],
|
| 59 |
-
outputs=[out_text, out, post_to_hub],
|
| 60 |
api_name=False,
|
| 61 |
)
|
| 62 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
from accelerate.utils import convert_bytes
|
| 4 |
from hub_utils import check_for_discussion, report_results
|
|
|
|
| 5 |
from huggingface_hub.utils import HfHubHTTPError
|
| 6 |
+
from model_utils import calculate_memory, get_model
|
| 7 |
|
| 8 |
|
| 9 |
def get_results(model_name: str, library: str, options: list, access_token: str):
|
|
|
|
| 14 |
has_discussion = True
|
| 15 |
title = f"## Memory usage for '{model_name}'"
|
| 16 |
data = calculate_memory(model, options)
|
| 17 |
+
stages = {"model": [], "gradients": [], "optimizer": [], "step": []}
|
| 18 |
+
for i, option in enumerate(data):
|
| 19 |
+
for stage in stages:
|
| 20 |
+
stages[stage].append(option["Training using Adam"][stage])
|
| 21 |
+
value = max(data[i]["Training using Adam"].values())
|
| 22 |
+
if value == -1:
|
| 23 |
+
value = "N/A"
|
| 24 |
+
else:
|
| 25 |
+
value = convert_bytes(value)
|
| 26 |
+
data[i]["Training using Adam"] = value
|
| 27 |
+
|
| 28 |
+
if any(value != -1 for value in stages["model"]):
|
| 29 |
+
out_explain = "## Training using Adam explained:\n"
|
| 30 |
+
out_explain += "When training on a batch size of 1, each stage of the training process is expected to have near the following memory results for each precision you selected:\n"
|
| 31 |
+
memory_values = pd.DataFrame(
|
| 32 |
+
columns=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"]
|
| 33 |
+
)
|
| 34 |
+
for i, dtype in enumerate(options):
|
| 35 |
+
if stages["model"][i] != -1:
|
| 36 |
+
memory_values.loc[len(memory_values)] = [
|
| 37 |
+
dtype,
|
| 38 |
+
convert_bytes(stages["model"][i]),
|
| 39 |
+
convert_bytes(stages["gradients"][i]),
|
| 40 |
+
convert_bytes(stages["optimizer"][i]),
|
| 41 |
+
convert_bytes(stages["step"][i]),
|
| 42 |
+
]
|
| 43 |
+
return [
|
| 44 |
+
title,
|
| 45 |
+
gr.update(visible=True, value=pd.DataFrame(data)),
|
| 46 |
+
gr.update(visible=True, value=out_explain),
|
| 47 |
+
gr.update(visible=True, value=memory_values),
|
| 48 |
+
gr.update(visible=not has_discussion),
|
| 49 |
+
]
|
| 50 |
+
return [
|
| 51 |
+
title,
|
| 52 |
+
gr.update(visible=True, value=pd.DataFrame(data)),
|
| 53 |
+
gr.update(visible=False, value=""),
|
| 54 |
+
gr.update(visible=False, value=pd.DataFrame()),
|
| 55 |
+
gr.update(visible=not has_discussion),
|
| 56 |
+
]
|
| 57 |
|
| 58 |
|
| 59 |
with gr.Blocks() as demo:
|
|
|
|
| 73 |
)
|
| 74 |
out_text = gr.Markdown()
|
| 75 |
out = gr.DataFrame(
|
| 76 |
+
headers=["dtype", "Largest Layer", "Total Size", "Training using Adam (Peek vRAM)"],
|
| 77 |
+
interactive=False,
|
| 78 |
+
visible=False,
|
| 79 |
+
)
|
| 80 |
+
out_explain = gr.Markdown()
|
| 81 |
+
memory_values = gr.DataFrame(
|
| 82 |
+
headers=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"],
|
| 83 |
interactive=False,
|
| 84 |
visible=False,
|
| 85 |
)
|
|
|
|
| 102 |
btn.click(
|
| 103 |
get_results,
|
| 104 |
inputs=[inp, library, options, access_token],
|
| 105 |
+
outputs=[out_text, out, out_explain, memory_values, post_to_hub],
|
| 106 |
api_name=False,
|
| 107 |
)
|
| 108 |
|
src/hub_utils.py
CHANGED
|
@@ -27,9 +27,9 @@ def report_results(model_name, library, access_token):
|
|
| 27 |
post = f"""# Model Memory Requirements\n
|
| 28 |
|
| 29 |
You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
|
| 30 |
-
|
| 31 |
These calculations were measured from the [Model Memory Utility Space](https://huggingface.co/spaces/hf-accelerate/model-memory-usage) on the Hub.
|
| 32 |
-
|
| 33 |
The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
|
| 34 |
When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
|
| 35 |
|
|
|
|
| 27 |
post = f"""# Model Memory Requirements\n
|
| 28 |
|
| 29 |
You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
|
| 30 |
+
|
| 31 |
These calculations were measured from the [Model Memory Utility Space](https://huggingface.co/spaces/hf-accelerate/model-memory-usage) on the Hub.
|
| 32 |
+
|
| 33 |
The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
|
| 34 |
When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
|
| 35 |
|
src/model_utils.py
CHANGED
|
@@ -3,7 +3,7 @@ from urllib.parse import urlparse
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
| 6 |
-
from accelerate.commands.estimate import check_has_model, create_empty_model
|
| 7 |
from accelerate.utils import calculate_maximum_sizes, convert_bytes
|
| 8 |
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
| 9 |
|
|
@@ -84,10 +84,12 @@ def calculate_memory(model: torch.nn.Module, options: list):
|
|
| 84 |
dtype_largest_layer = largest_layer[0]
|
| 85 |
|
| 86 |
modifier = DTYPE_MODIFIER[dtype]
|
|
|
|
|
|
|
|
|
|
| 87 |
dtype_total_size /= modifier
|
| 88 |
dtype_largest_layer /= modifier
|
| 89 |
|
| 90 |
-
dtype_training_size = convert_bytes(dtype_total_size * 4)
|
| 91 |
dtype_total_size = convert_bytes(dtype_total_size)
|
| 92 |
dtype_largest_layer = convert_bytes(dtype_largest_layer)
|
| 93 |
data.append(
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
| 6 |
+
from accelerate.commands.estimate import check_has_model, create_empty_model, estimate_training_usage
|
| 7 |
from accelerate.utils import calculate_maximum_sizes, convert_bytes
|
| 8 |
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
| 9 |
|
|
|
|
| 84 |
dtype_largest_layer = largest_layer[0]
|
| 85 |
|
| 86 |
modifier = DTYPE_MODIFIER[dtype]
|
| 87 |
+
dtype_training_size = estimate_training_usage(
|
| 88 |
+
dtype_total_size, dtype if dtype != "float16/bfloat16" else "float16"
|
| 89 |
+
)
|
| 90 |
dtype_total_size /= modifier
|
| 91 |
dtype_largest_layer /= modifier
|
| 92 |
|
|
|
|
| 93 |
dtype_total_size = convert_bytes(dtype_total_size)
|
| 94 |
dtype_largest_layer = convert_bytes(dtype_largest_layer)
|
| 95 |
data.append(
|