Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
|
| 4 |
Automatically generated by Colaboratory.
|
| 5 |
|
|
@@ -9,25 +9,42 @@ Original file is located at
|
|
| 9 |
|
| 10 |
# !pip install gradio
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
-
Estimates the training cost of a large language model.
|
| 15 |
|
| 16 |
Args:
|
|
|
|
|
|
|
| 17 |
- number_of_parameters (int): The number of parameters in the model.
|
| 18 |
- number_of_tokens (int): The number of tokens to train on.
|
| 19 |
-
- gpu_throughput (float, optional): The peak throughput of the GPU in FLOPs/sec. Default is 312 TFLOPs/sec for A100 GPUs.
|
| 20 |
- utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
|
| 21 |
- overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
|
| 22 |
- cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
|
| 23 |
|
| 24 |
Returns:
|
| 25 |
- float: The estimated total cost of training the model.
|
|
|
|
|
|
|
| 26 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Calculate the total number of FLOPs required for training
|
| 28 |
total_flops = 6 * number_of_parameters * number_of_tokens
|
| 29 |
|
| 30 |
-
# Calculate the number of hours required on the
|
| 31 |
gpu_hours = total_flops / (gpu_throughput * 3600)
|
| 32 |
|
| 33 |
# Adjust for the actual utilization of the GPUs
|
|
@@ -41,38 +58,40 @@ def estimate_training_cost(number_of_parameters, number_of_tokens, gpu_throughpu
|
|
| 41 |
|
| 42 |
return total_cost
|
| 43 |
|
| 44 |
-
|
| 45 |
-
# Let's say we have a model with 70 billion parameters and it's trained on 2 trillion tokens
|
| 46 |
-
# The default values for the other parameters are used in this example
|
| 47 |
-
total_cost = estimate_training_cost(number_of_parameters=70e9, number_of_tokens=2e12)
|
| 48 |
-
total_cost
|
| 49 |
-
|
| 50 |
-
import gradio as gr
|
| 51 |
-
|
| 52 |
-
# Assume the function estimate_training_cost is already defined as per the previous discussion.
|
| 53 |
-
|
| 54 |
-
def gradio_interface(number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
|
| 55 |
-
# Convert string inputs to correct types
|
| 56 |
number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
|
| 57 |
number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
|
| 58 |
utilization_rate = float(utilization_rate)
|
| 59 |
overhead = float(overhead)
|
| 60 |
cost_per_gpu_hour = float(cost_per_gpu_hour)
|
| 61 |
|
| 62 |
-
|
| 63 |
-
cost = estimate_training_cost(number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
|
| 64 |
-
|
| 65 |
-
# Return the result as a formatted string
|
| 66 |
return f"The estimated training cost is ${cost:,.2f}"
|
| 67 |
|
|
|
|
|
|
|
|
|
|
| 68 |
# Define the title and description for the Gradio app
|
| 69 |
title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
|
| 70 |
-
description = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
# Create the Gradio interface with title and description
|
| 73 |
iface = gr.Interface(
|
| 74 |
fn=gradio_interface,
|
| 75 |
inputs=[
|
|
|
|
|
|
|
| 76 |
gr.Textbox(label="Number of Parameters (in billions)", value="70"),
|
| 77 |
gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
|
| 78 |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
|
|
@@ -85,6 +104,4 @@ iface = gr.Interface(
|
|
| 85 |
article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
|
| 86 |
)
|
| 87 |
|
| 88 |
-
# Run the interface
|
| 89 |
iface.launch()
|
| 90 |
-
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
"""LLM Training Cost Calculator App.ipynb
|
| 3 |
|
| 4 |
Automatically generated by Colaboratory.
|
| 5 |
|
|
|
|
| 9 |
|
| 10 |
# !pip install gradio
|
| 11 |
|
| 12 |
+
import gradio as gr
|
| 13 |
+
|
| 14 |
+
def estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85):
|
| 15 |
"""
|
| 16 |
+
Estimates the training cost of a large language model based on the selected GPU and precision.
|
| 17 |
|
| 18 |
Args:
|
| 19 |
+
- gpu_choice (str): The choice of GPU, e.g., 'A100 80GB PCIe', 'V100', etc.
|
| 20 |
+
- precision (str): The precision level for the GPU, e.g., 'bf16', 'tf32', 'tensor'.
|
| 21 |
- number_of_parameters (int): The number of parameters in the model.
|
| 22 |
- number_of_tokens (int): The number of tokens to train on.
|
|
|
|
| 23 |
- utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
|
| 24 |
- overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
|
| 25 |
- cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
|
| 26 |
|
| 27 |
Returns:
|
| 28 |
- float: The estimated total cost of training the model.
|
| 29 |
+
|
| 30 |
+
The function dynamically adjusts the GPU throughput based on the selected GPU and precision. The throughput values are predefined for each GPU and precision combination. This estimation assumes a linear scaling of training cost with the number of parameters and tokens.
|
| 31 |
"""
|
| 32 |
+
|
| 33 |
+
gpu_throughputs = {
|
| 34 |
+
'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12},
|
| 35 |
+
'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12},
|
| 36 |
+
'V100': {'tensor': 130e12}, # Assuming only the deep learning performance for V100
|
| 37 |
+
'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12},
|
| 38 |
+
'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12}
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# Get the correct GPU throughput
|
| 42 |
+
gpu_throughput = gpu_throughputs[gpu_choice][precision]
|
| 43 |
+
|
| 44 |
# Calculate the total number of FLOPs required for training
|
| 45 |
total_flops = 6 * number_of_parameters * number_of_tokens
|
| 46 |
|
| 47 |
+
# Calculate the number of hours required on the selected GPU
|
| 48 |
gpu_hours = total_flops / (gpu_throughput * 3600)
|
| 49 |
|
| 50 |
# Adjust for the actual utilization of the GPUs
|
|
|
|
| 58 |
|
| 59 |
return total_cost
|
| 60 |
|
| 61 |
+
def gradio_interface(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
|
| 63 |
number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
|
| 64 |
utilization_rate = float(utilization_rate)
|
| 65 |
overhead = float(overhead)
|
| 66 |
cost_per_gpu_hour = float(cost_per_gpu_hour)
|
| 67 |
|
| 68 |
+
cost = estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
|
|
|
|
|
|
|
|
|
|
| 69 |
return f"The estimated training cost is ${cost:,.2f}"
|
| 70 |
|
| 71 |
+
gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"]
|
| 72 |
+
default_precisions = ['bf16', 'tf32', 'tensor', 'bf16', 'bf16'] # Default precision for each GPU
|
| 73 |
+
|
| 74 |
# Define the title and description for the Gradio app
|
| 75 |
title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
|
| 76 |
+
description = """
|
| 77 |
+
<p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you calculate the cost based on model parameters, tokens, and GPU selections with various precision options. Select a GPU and the precision level to get an accurate cost estimate.</p>
|
| 78 |
+
<p><strong>Available GPUs and Precisions:</strong></p>
|
| 79 |
+
<ul>
|
| 80 |
+
<li><strong>A100 80GB PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
| 81 |
+
<li><strong>A100 80GB SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
| 82 |
+
<li><strong>V100:</strong> Uses Deep Learning performance with Tensor Cores (tensor) as the default and only precision.</li>
|
| 83 |
+
<li><strong>H100 SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
| 84 |
+
<li><strong>H100 PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
| 85 |
+
</ul>
|
| 86 |
+
<p>The choice of GPU and precision impacts the throughput, affecting training time and cost. BFLOAT16 is generally faster and more cost-effective, while Tensor Float 32 offers higher precision. The V100 GPU is optimized for Deep Learning with Tensor Cores.</p>
|
| 87 |
+
<p style='text-align: center;'>We plan to extend this calculator to include calculating the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p>
|
| 88 |
+
"""
|
| 89 |
|
|
|
|
| 90 |
iface = gr.Interface(
|
| 91 |
fn=gradio_interface,
|
| 92 |
inputs=[
|
| 93 |
+
gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'),
|
| 94 |
+
gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'),
|
| 95 |
gr.Textbox(label="Number of Parameters (in billions)", value="70"),
|
| 96 |
gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
|
| 97 |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
|
|
|
|
| 104 |
article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
|
| 105 |
)
|
| 106 |
|
|
|
|
| 107 |
iface.launch()
|
|
|