Spaces:
Runtime error
Runtime error
Vokturz
commited on
Commit
·
d37299b
1
Parent(s):
0cc3d3a
added Apple vendor
Browse files- data/gpu_specs.csv +19 -0
- src/app.py +38 -17
data/gpu_specs.csv
CHANGED
|
@@ -932,3 +932,22 @@ Data Center GPU Max 1100,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"48 GB, HBM
|
|
| 932 |
Data Center GPU Max 1350,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"96 GB, HBM2e, 8192 bit",750 MHz,1200 MHz,14336 / 896 / 0,96.0,Intel,2023
|
| 933 |
Data Center GPU Max 1550,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1600 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
| 934 |
Data Center GPU Max Subsystem,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1565 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
Data Center GPU Max 1350,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"96 GB, HBM2e, 8192 bit",750 MHz,1200 MHz,14336 / 896 / 0,96.0,Intel,2023
|
| 933 |
Data Center GPU Max 1550,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1600 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
| 934 |
Data Center GPU Max Subsystem,Ponte Vecchio,"Jan 10th, 2023",PCIe 5.0 x16,"128 GB, HBM2e, 8192 bit",900 MHz,1565 MHz,16384 / 1024 / 0,128.0,Intel,2023
|
| 935 |
+
M1 8 GB,M1,"Nov 10th, 2020",None,"8 GB, LPDDR4X, 128 bit",None,None,None,8.0,Apple,2020
|
| 936 |
+
M1 16 GB,M1,"Nov 10th, 2020",None,"16 GB, LPDDR4X, 128 bit",None,None,None,16.0,Apple,2020
|
| 937 |
+
M1 Pro 16 GB,M1 Pro,"Oct 18th, 2021",None,"16 GB, LPDDR5, 256 bit",None,None,None,16.0,Apple,2021
|
| 938 |
+
M1 Pro 32 GB,M1 Pro,"Oct 18th, 2021",None,"32 GB, LPDDR5, 256 bit",None,None,None,32.0,Apple,2021
|
| 939 |
+
M1 Max 32 GB,M1 Max,"Oct 18th, 2021",None,"32 GB, LPDDR5, 512 bit",None,None,None,32.0,Apple,2021
|
| 940 |
+
M1 Max 64 GB,M1 Max,"Oct 18th, 2021",None,"64 GB, LPDDR5, 512 bit",None,None,None,64.0,Apple,2021
|
| 941 |
+
M1 Ultra 64 GB,M1 Ultra,"Mar 18th, 2022",None,"64 GB, LPDDR5, 1024 bit",None,None,None,64.0,Apple,2022
|
| 942 |
+
M1 Ultra 128 GB,M1 Ultra,"Mar 18th, 2022",None,"128 GB, LPDDR5, 1024 bit",None,None,None,128.0,Apple,2022
|
| 943 |
+
M2 8 GB,M2,"Jun 24th, 2022",None,"8 GB, LPDDR5, 128 bit",None,None,None,8.0,Apple,2022
|
| 944 |
+
M2 16 GB,M2,"Jun 10th, 2020",None,"16 GB, LPDDR5, 128 bit",None,None,None,16.0,Apple,2022
|
| 945 |
+
M2 24 GB,M2,"Jun 10th, 2020",None,"24 GB, LPDDR5, 128 bit",None,None,None,24.0,Apple,2022
|
| 946 |
+
M2 Pro 32 GB,M2 Pro,"Jan 17th, 2023",None,"32 GB, LPDDR5, 256 bit",None,None,None,32.0,Apple,2023
|
| 947 |
+
M2 Pro 64 GB,M2 Pro,"Jan 17th, 2023",None,"64 GB, LPDDR5, 256 bit",None,None,None,64.0,Apple,2023
|
| 948 |
+
M2 Max 32 GB,M2 Max,"Jan 17th, 2020",None,"32 GB, LPDDR5, 512 bit",None,None,None,32.0,Apple,2023
|
| 949 |
+
M2 Max 64 GB,M2 Max,"Jan 17th, 2020",None,"64 GB, LPDDR5, 512 bit",None,None,None,64.0,Apple,2023
|
| 950 |
+
M2 Max 96 GB,M2 Max,"Jan 17th, 2020",None,"96 GB, LPDDR5, 512 bit",None,None,None,96.0,Apple,2023
|
| 951 |
+
M2 Ultra 64 GB,M2 Ultra,"Jun 13th, 2023",None,"64 GB, LPDDR5, 1024 bit",None,None,None,64.0,Apple,2023
|
| 952 |
+
M2 Ultra 128 GB,M2 Ultra,"Jun 13th, 2023",None,"128 GB, LPDDR5, 1024 bit",None,None,None,128.0,Apple,2023
|
| 953 |
+
M2 Ultra 192 GB,M2 Ultra,"Jun 13th, 2023",None,"192 GB, LPDDR5, 1024 bit",None,None,None,192.0,Apple,2023
|
src/app.py
CHANGED
|
@@ -27,22 +27,32 @@ def get_mistralai_table():
|
|
| 27 |
model = get_model("mistralai/Mistral-7B-v0.1", library="transformers", access_token="")
|
| 28 |
return calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
| 29 |
|
| 30 |
-
def show_gpu_info(info, trainable_params=0):
|
| 31 |
for var in ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']:
|
| 32 |
_info = info.loc[var]
|
| 33 |
-
if
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
else:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
func(msg, icon=icon)
|
| 47 |
|
| 48 |
|
|
@@ -65,7 +75,6 @@ with col.expander("Information", expanded=True):
|
|
| 65 |
st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
|
| 66 |
st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
|
| 67 |
st.latex(r"\text{Memory}_\text{LoRa} \approx \text{Model Size} + \left(\text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
|
| 68 |
-
st.markdown("- You can understand `int4` as models in `GPTQ-4bit`, `AWQ-4bit` or `Q4_0 GGUF/GGML` formats")
|
| 69 |
|
| 70 |
access_token = st.sidebar.text_input("Access token")
|
| 71 |
model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
|
|
@@ -89,7 +98,7 @@ if model_name not in st.session_state:
|
|
| 89 |
st.session_state['actual_model'] = model_name
|
| 90 |
|
| 91 |
|
| 92 |
-
gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel"])
|
| 93 |
# year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
|
| 94 |
gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
|
| 95 |
# if year:
|
|
@@ -122,6 +131,10 @@ _memory_table.columns = ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']
|
|
| 122 |
_memory_table = _memory_table.stack().reset_index()
|
| 123 |
_memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
|
| 124 |
col1, col2 = st.columns([1,1.3])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
with col1:
|
| 126 |
st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
|
| 127 |
|
|
@@ -129,15 +142,23 @@ with col1:
|
|
| 129 |
tabs = st.tabs(dtypes)
|
| 130 |
for dtype, tab in zip(dtypes, tabs):
|
| 131 |
with tab:
|
|
|
|
|
|
|
|
|
|
| 132 |
info = _memory_table[_memory_table['dtype'] == dtype].set_index('Variable')
|
| 133 |
-
show_gpu_info(info, lora_pct)
|
| 134 |
st.write(memory_table.iloc[[0, 1, 2, 4]])
|
| 135 |
with col2:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
num_colors= 4
|
| 137 |
colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
|
| 138 |
fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
|
| 139 |
-
fig.update_layout(title=dict(text=f"Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
|
| 140 |
, xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
|
| 141 |
st.plotly_chart(fig, use_container_width=True)
|
| 142 |
|
| 143 |
|
|
|
|
|
|
| 27 |
model = get_model("mistralai/Mistral-7B-v0.1", library="transformers", access_token="")
|
| 28 |
return calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
|
| 29 |
|
| 30 |
+
def show_gpu_info(info, trainable_params=0, vendor=""):
|
| 31 |
for var in ['Inference', 'Full Training Adam', 'LoRa Fine-tuning']:
|
| 32 |
_info = info.loc[var]
|
| 33 |
+
if vendor != "Apple":
|
| 34 |
+
if _info['Number of GPUs'] >= 3:
|
| 35 |
+
func = st.error
|
| 36 |
+
icon = "⛔"
|
| 37 |
+
elif _info['Number of GPUs'] == 2:
|
| 38 |
+
func = st.warning
|
| 39 |
+
icon = "⚠️"
|
| 40 |
+
else:
|
| 41 |
+
func = st.success
|
| 42 |
+
icon = "✅"
|
| 43 |
+
|
| 44 |
+
msg = f"You require **{_info['Number of GPUs']}** GPUs for **{var}**"
|
| 45 |
+
if var == 'LoRa Fine-tuning':
|
| 46 |
+
msg += f" ({trainable_params}%)"
|
| 47 |
else:
|
| 48 |
+
if _info['Number of GPUs']==1:
|
| 49 |
+
msg = f"You can run **{var}**"
|
| 50 |
+
func = st.success
|
| 51 |
+
icon = "✅"
|
| 52 |
+
else:
|
| 53 |
+
msg = f"You cannot run **{var}**"
|
| 54 |
+
func = st.error
|
| 55 |
+
icon = "⛔"
|
| 56 |
func(msg, icon=icon)
|
| 57 |
|
| 58 |
|
|
|
|
| 75 |
st.latex(r"""\text{Memory}_\text{Inference} \approx \text{Model Size} \times 1.2""")
|
| 76 |
st.markdown("""- For LoRa Fine-tuning, I'm asuming a **16-bit** dtype of trainable parameters. The formula (in terms of GB) is""")
|
| 77 |
st.latex(r"\text{Memory}_\text{LoRa} \approx \text{Model Size} + \left(\text{ \# trainable Params}_\text{Billions}\times\frac{16}{8} \times 4\right) \times 1.2")
|
|
|
|
| 78 |
|
| 79 |
access_token = st.sidebar.text_input("Access token")
|
| 80 |
model_name = st.sidebar.text_input("Model name", value="mistralai/Mistral-7B-v0.1")
|
|
|
|
| 98 |
st.session_state['actual_model'] = model_name
|
| 99 |
|
| 100 |
|
| 101 |
+
gpu_vendor = st.sidebar.selectbox("GPU Vendor", ["NVIDIA", "AMD", "Intel", "Apple"])
|
| 102 |
# year = st.sidebar.selectbox("Filter by Release Year", list(range(2014, 2024))[::-1], index=None)
|
| 103 |
gpu_info = gpu_specs[gpu_specs['Vendor'] == gpu_vendor].sort_values('Product Name')
|
| 104 |
# if year:
|
|
|
|
| 131 |
_memory_table = _memory_table.stack().reset_index()
|
| 132 |
_memory_table.columns = ['dtype', 'Variable', 'Number of GPUs']
|
| 133 |
col1, col2 = st.columns([1,1.3])
|
| 134 |
+
|
| 135 |
+
if gpu_vendor == "Apple":
|
| 136 |
+
col.warning("""For M1/M2 Apple chips, PyTorch uses [Metal Performance Shaders (MPS)](https://huggingface.co/docs/accelerate/usage_guides/mps) as backend.\\
|
| 137 |
+
Remember that Apple M1/M2 chips share memory between CPU and GPU.""", icon="⚠️")
|
| 138 |
with col1:
|
| 139 |
st.write(f"#### [{model_name}](https://huggingface.co/{model_name}) ({custom_ceil(memory_table.iloc[3,0],1):.1f}B)")
|
| 140 |
|
|
|
|
| 142 |
tabs = st.tabs(dtypes)
|
| 143 |
for dtype, tab in zip(dtypes, tabs):
|
| 144 |
with tab:
|
| 145 |
+
if dtype in ["int4", "int8"]:
|
| 146 |
+
_dtype = dtype.replace("int", "")
|
| 147 |
+
st.markdown(f"`int{_dtype}` refers to models in `GPTQ-{_dtype}bit`, `AWQ-{_dtype}bit` or `Q{_dtype}_0 GGUF/GGML`")
|
| 148 |
info = _memory_table[_memory_table['dtype'] == dtype].set_index('Variable')
|
| 149 |
+
show_gpu_info(info, lora_pct, gpu_vendor)
|
| 150 |
st.write(memory_table.iloc[[0, 1, 2, 4]])
|
| 151 |
with col2:
|
| 152 |
+
extra = ""
|
| 153 |
+
if gpu_vendor == "Apple":
|
| 154 |
+
st.warning("This graph is irrelevant for M1/M2 chips as they can't run in parallel.", icon="⚠️")
|
| 155 |
+
extra = "⚠️"
|
| 156 |
num_colors= 4
|
| 157 |
colors = [px.colors.sequential.RdBu[int(i*(len(px.colors.sequential.RdBu)-1)/(num_colors-1))] for i in range(num_colors)]
|
| 158 |
fig = px.bar(_memory_table, x='Variable', y='Number of GPUs', color='dtype', barmode='group', color_discrete_sequence=colors)
|
| 159 |
+
fig.update_layout(title=dict(text=f"{extra} Number of GPUs required for<br> {get_name(gpu)}", font=dict(size=25))
|
| 160 |
, xaxis_tickfont_size=14, yaxis_tickfont_size=16, yaxis_dtick='1')
|
| 161 |
st.plotly_chart(fig, use_container_width=True)
|
| 162 |
|
| 163 |
|
| 164 |
+
|