Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """LLM Training Cost Calculator App.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1iZpCUgC5T_ASnlDgMYm1n4RH8BZsm7sx | |
| """ | |
| # !pip install gradio | |
| import gradio as gr | |
| def estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85): | |
| """ | |
| Estimates the training cost of a large language model based on the selected GPU and precision. | |
| Args: | |
| - gpu_choice (str): The choice of GPU, e.g., 'A100 80GB PCIe', 'V100', etc. | |
| - precision (str): The precision level for the GPU, e.g., 'bf16', 'tf32', 'tensor'. | |
| - number_of_parameters (int): The number of parameters in the model. | |
| - number_of_tokens (int): The number of tokens to train on. | |
| - utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%). | |
| - overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead). | |
| - cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour. | |
| Returns: | |
| - float: The estimated total cost of training the model. | |
| The function dynamically adjusts the GPU throughput based on the selected GPU and precision. The throughput values are predefined for each GPU and precision combination. This estimation assumes a linear scaling of training cost with the number of parameters and tokens. | |
| """ | |
| gpu_throughputs = { | |
| 'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12}, | |
| 'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12}, | |
| 'V100': {'tensor': 130e12}, # Assuming only the deep learning performance for V100 | |
| 'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12}, | |
| 'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12} | |
| } | |
| # Get the correct GPU throughput | |
| gpu_throughput = gpu_throughputs[gpu_choice][precision] | |
| # Calculate the total number of FLOPs required for training | |
| total_flops = 6 * number_of_parameters * number_of_tokens | |
| # Calculate the number of hours required on the selected GPU | |
| gpu_hours = total_flops / (gpu_throughput * 3600) | |
| # Adjust for the actual utilization of the GPUs | |
| adjusted_gpu_hours = gpu_hours / utilization_rate | |
| # Account for the overhead | |
| actual_gpu_hours = adjusted_gpu_hours * overhead | |
| # Calculate the total cost | |
| total_cost = actual_gpu_hours * cost_per_gpu_hour | |
| return total_cost | |
| def gradio_interface(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour): | |
| number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number | |
| number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number | |
| utilization_rate = float(utilization_rate) | |
| overhead = float(overhead) | |
| cost_per_gpu_hour = float(cost_per_gpu_hour) | |
| cost = estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour) | |
| return f"The estimated training cost is ${cost:,.2f}" | |
| gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"] | |
| default_precisions = ['bf16', 'tf32', 'tensor', 'bf16', 'bf16'] # Default precision for each GPU | |
| # Define the title and description for the Gradio app | |
| title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>" | |
| description = """ | |
| <p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you calculate the cost based on model parameters, tokens, and GPU selections with various precision options. Select a GPU and the precision level to get an accurate cost estimate.</p> | |
| <p><strong>Available GPUs and Precisions:</strong></p> | |
| <ul> | |
| <li><strong>A100 80GB PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> | |
| <li><strong>A100 80GB SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> | |
| <li><strong>V100:</strong> Uses Deep Learning performance with Tensor Cores (tensor) as the default and only precision.</li> | |
| <li><strong>H100 SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> | |
| <li><strong>H100 PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> | |
| </ul> | |
| <p>The choice of GPU and precision impacts the throughput, affecting training time and cost. BFLOAT16 is generally faster and more cost-effective, while Tensor Float 32 offers higher precision. The V100 GPU is optimized for Deep Learning with Tensor Cores.</p> | |
| <p style='text-align: center;'>We plan to extend this calculator to include calculating the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p> | |
| """ | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[ | |
| gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'), | |
| gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'), | |
| gr.Textbox(label="Number of Parameters (in billions)", value="70"), | |
| gr.Textbox(label="Number of Tokens (in trillions)", value="2"), | |
| gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"), | |
| gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.10, label="Overhead (1 + overhead percentage)"), | |
| gr.Textbox(label="Cost per GPU Hour ($)", value="1.85") | |
| ], | |
| outputs=[gr.Textbox(label="Estimated Training Cost")], | |
| title=title, | |
| description=description, | |
| article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>" | |
| ) | |
| iface.launch() | |