bullerwins commited on
Commit
84c9ea6
·
1 Parent(s): 2b8e580

Add application file

Browse files
Files changed (1) hide show
  1. app.py +267 -0
app.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import math
3
+
4
+ def calculate_automatic_distribution(vram_list, total_layers, model_size_gb, start_layer):
5
+ """Distributes layers based on VRAM capacity and layer size"""
6
+ if not vram_list or total_layers <= 0 or model_size_gb <= 0:
7
+ return []
8
+
9
+ # Calculate layer size in GB
10
+ layer_size_gb = model_size_gb / total_layers
11
+
12
+ # Calculate how many layers each GPU can hold based on its VRAM
13
+ layers_per_gpu = []
14
+ remaining_layers = total_layers
15
+
16
+ for i, vram in enumerate(vram_list):
17
+ if remaining_layers <= 0:
18
+ layers_per_gpu.append(0)
19
+ else:
20
+ # Calculate how many layers this GPU can hold
21
+ max_layers_for_gpu = int(vram / layer_size_gb)
22
+ # Ensure we don't exceed remaining layers
23
+ assigned_layers = min(max_layers_for_gpu, remaining_layers)
24
+ # Ensure at least 1 layer if there are remaining layers and this is the last GPU
25
+ if remaining_layers > 0 and i == len(vram_list) - 1:
26
+ assigned_layers = max(1, assigned_layers)
27
+
28
+ layers_per_gpu.append(assigned_layers)
29
+ remaining_layers -= assigned_layers
30
+
31
+ return layers_per_gpu
32
+
33
+ def generate_layer_assignment(gpu_count, layers_per_gpu, start_layer, pattern):
34
+ """Creates the -ot command strings for llama.cpp"""
35
+ assignments = []
36
+ current_layer = start_layer
37
+
38
+ for gpu_id in range(gpu_count):
39
+ if gpu_id < len(layers_per_gpu) and layers_per_gpu[gpu_id] > 0:
40
+ # Create layer range for this GPU
41
+ layer_range = []
42
+ for i in range(layers_per_gpu[gpu_id]):
43
+ layer_range.append(str(current_layer + i))
44
+
45
+ # Format as regex pattern
46
+ layer_pattern = "|".join(layer_range)
47
+ assignment = f'-ot "blk\\.({layer_pattern})\\.{pattern}=CUDA{gpu_id}"'
48
+ assignments.append(assignment)
49
+ current_layer += layers_per_gpu[gpu_id]
50
+
51
+ return assignments
52
+
53
+ def format_output(assignments):
54
+ """Formats assignments as multi-line command arguments"""
55
+ if not assignments:
56
+ return ""
57
+
58
+ # Join with line continuation
59
+ return " \\\n".join(assignments)
60
+
61
+ def generate_layer_config(num_gpus, vram_values, start_layer, total_layers, model_size_gb, mode, manual_layers, pattern):
62
+ """Main function to generate layer configuration"""
63
+ try:
64
+ # Validate inputs
65
+ if num_gpus <= 0 or total_layers <= 0:
66
+ return "Error: Invalid number of GPUs or layers"
67
+
68
+ if mode == "Automatic":
69
+ # Use automatic distribution
70
+ layers_per_gpu = calculate_automatic_distribution(vram_values, total_layers, model_size_gb, start_layer)
71
+ else:
72
+ # Use manual distribution
73
+ layers_per_gpu = manual_layers[:num_gpus]
74
+
75
+ # Generate assignments
76
+ assignments = generate_layer_assignment(num_gpus, layers_per_gpu, start_layer, pattern)
77
+
78
+ # Format output
79
+ output = format_output(assignments)
80
+
81
+ return output
82
+
83
+ except Exception as e:
84
+ return f"Error generating configuration: {str(e)}"
85
+
86
+ # Create Gradio interface
87
+ with gr.Blocks(title="Llama.cpp Layer Assignment Tool") as app:
88
+ gr.Markdown("# Llama.cpp GPU Layer Assignment Tool")
89
+ gr.Markdown("Generate `-ot` arguments for distributing model layers across multiple GPUs")
90
+
91
+ with gr.Row():
92
+ with gr.Column(scale=1):
93
+ # Basic configuration
94
+ num_gpus = gr.Slider(1, 8, value=7, step=1, label="Number of GPUs")
95
+ start_layer = gr.Number(value=3, label="Starting Layer Number", minimum=0)
96
+ total_layers = gr.Number(value=30, label="Total Number of Layers", minimum=1)
97
+ model_size_gb = gr.Number(value=70, label="Model Size (GB)", minimum=1)
98
+ pattern = gr.Textbox(value="ffn_.*", label="Layer Pattern", placeholder="ffn_.*")
99
+
100
+ # Mode selection
101
+ mode = gr.Radio(["Automatic", "Manual"], value="Automatic", label="Distribution Mode")
102
+
103
+ # VRAM inputs container (for automatic mode)
104
+ with gr.Column() as vram_container:
105
+ gr.Markdown("### GPU VRAM Configuration (Automatic Mode)")
106
+ vram_inputs = []
107
+ for i in range(8): # Create max inputs, show/hide as needed
108
+ vram_inputs.append(gr.Number(
109
+ label=f"GPU {i} VRAM (GB)",
110
+ value=96 if i == 0 else (32 if i < 3 else 24),
111
+ minimum=1,
112
+ maximum=200,
113
+ visible=(i < 7) # Show first 7 by default
114
+ ))
115
+
116
+ # Manual layer inputs container (for manual mode)
117
+ with gr.Column(visible=False) as manual_container:
118
+ gr.Markdown("### Layer Assignment (Manual Mode)")
119
+ gr.Markdown("Specify how many layers each GPU should handle:")
120
+ manual_inputs = []
121
+ for i in range(8): # Create max inputs, show/hide as needed
122
+ manual_inputs.append(gr.Number(
123
+ label=f"GPU {i} - Number of Layers",
124
+ value=13 if i == 0 else (3 if i < 3 else 2),
125
+ minimum=0,
126
+ maximum=100,
127
+ visible=(i < 7) # Show first 7 by default
128
+ ))
129
+
130
+ with gr.Column(scale=2):
131
+ # Output
132
+ output_text = gr.Textbox(
133
+ label="Generated Command Arguments",
134
+ lines=15,
135
+ max_lines=20,
136
+ show_copy_button=True,
137
+ interactive=False
138
+ )
139
+
140
+ def generate_config(*args):
141
+ """Generate layer configuration based on all inputs"""
142
+ try:
143
+ # Extract basic inputs
144
+ num_gpus_val = int(args[0])
145
+ start_layer_val = int(args[1]) if args[1] else 0
146
+ total_layers_val = int(args[2]) if args[2] else 1
147
+ model_size_gb_val = float(args[3]) if args[3] else 1
148
+ pattern_val = args[4] if args[4] else "ffn_.*"
149
+ mode_val = args[5]
150
+
151
+ # Extract VRAM values (args[6:14])
152
+ vram_values = []
153
+ for i in range(num_gpus_val):
154
+ vram_val = args[6 + i] if args[6 + i] else 24
155
+ vram_values.append(float(vram_val))
156
+
157
+ # Extract manual layer values (args[14:22])
158
+ manual_values = []
159
+ if mode_val == "Manual":
160
+ for i in range(num_gpus_val):
161
+ manual_val = args[14 + i] if args[14 + i] else 4
162
+ manual_values.append(int(manual_val))
163
+
164
+ return generate_layer_config(
165
+ num_gpus_val, vram_values, start_layer_val, total_layers_val,
166
+ model_size_gb_val, mode_val, manual_values, pattern_val
167
+ )
168
+ except Exception as e:
169
+ return f"Error: {str(e)}"
170
+
171
+ def sync_auto_to_manual(*args):
172
+ """Sync automatic distribution to manual inputs when switching modes"""
173
+ try:
174
+ # Extract basic inputs
175
+ num_gpus_val = int(args[0])
176
+ start_layer_val = int(args[1]) if args[1] else 0
177
+ total_layers_val = int(args[2]) if args[2] else 1
178
+ model_size_gb_val = float(args[3]) if args[3] else 1
179
+
180
+ # Extract VRAM values (args[6:14])
181
+ vram_values = []
182
+ for i in range(num_gpus_val):
183
+ vram_val = args[6 + i] if args[6 + i] else 24
184
+ vram_values.append(float(vram_val))
185
+
186
+ # Calculate automatic distribution
187
+ auto_distribution = calculate_automatic_distribution(vram_values, total_layers_val, model_size_gb_val, start_layer_val)
188
+
189
+ # Update manual inputs with automatic distribution
190
+ manual_updates = []
191
+ for i in range(8):
192
+ if i < len(auto_distribution):
193
+ manual_updates.append(int(auto_distribution[i]))
194
+ else:
195
+ manual_updates.append(0)
196
+
197
+ return manual_updates
198
+ except Exception as e:
199
+ # Return default values if calculation fails
200
+ return [4] * 8
201
+
202
+ # Collect all inputs for the generation function
203
+ all_inputs = [num_gpus, start_layer, total_layers, model_size_gb, pattern, mode] + vram_inputs + manual_inputs
204
+
205
+ # Update UI visibility when GPU count changes
206
+ def update_gpu_count(num_gpus_val, mode_val):
207
+ """Update visibility when GPU count changes"""
208
+ updates = []
209
+
210
+ # Update VRAM inputs visibility (show in automatic mode)
211
+ for i in range(8):
212
+ updates.append(gr.Number(visible=(i < num_gpus_val and mode_val == "Automatic")))
213
+
214
+ # Update manual inputs visibility (show in manual mode)
215
+ for i in range(8):
216
+ updates.append(gr.Number(visible=(i < num_gpus_val and mode_val == "Manual")))
217
+
218
+ return updates
219
+
220
+ num_gpus.change(
221
+ fn=lambda n, m: update_gpu_count(n, m),
222
+ inputs=[num_gpus, mode],
223
+ outputs=vram_inputs + manual_inputs
224
+ )
225
+
226
+ # Handle mode change with sync from auto to manual
227
+ def handle_mode_change(*args):
228
+ """Handle mode change with sync from auto to manual"""
229
+ num_gpus_val = int(args[0])
230
+ mode_val = args[5]
231
+
232
+ # Update container visibility
233
+ container_updates = [
234
+ gr.Column(visible=(mode_val == "Automatic")), # vram_container
235
+ gr.Column(visible=(mode_val == "Manual")) # manual_container
236
+ ]
237
+
238
+ # Update input visibility
239
+ input_updates = []
240
+ for i in range(8):
241
+ input_updates.append(gr.Number(visible=(i < num_gpus_val and mode_val == "Automatic")))
242
+ for i in range(8):
243
+ input_updates.append(gr.Number(visible=(i < num_gpus_val and mode_val == "Manual")))
244
+
245
+ # If switching to manual mode, sync automatic distribution
246
+ if mode_val == "Manual":
247
+ manual_updates = sync_auto_to_manual(*args)
248
+ return container_updates + input_updates + manual_updates
249
+ else:
250
+ return container_updates + input_updates + [0] * 8
251
+
252
+ mode.change(
253
+ fn=handle_mode_change,
254
+ inputs=all_inputs,
255
+ outputs=[vram_container, manual_container] + vram_inputs + manual_inputs + manual_inputs
256
+ )
257
+
258
+ # Generate output on any input change
259
+ for input_component in all_inputs:
260
+ input_component.change(
261
+ fn=generate_config,
262
+ inputs=all_inputs,
263
+ outputs=[output_text]
264
+ )
265
+
266
+ if __name__ == "__main__":
267
+ app.launch()