Spaces:
Runtime error
Runtime error
Commit
·
bbc8453
1
Parent(s):
4424c49
Update models.py
Browse files
models.py
CHANGED
|
@@ -112,11 +112,21 @@ class OpenSourceLlama2Model(BaseTCOModel):
|
|
| 112 |
|
| 113 |
def on_model_change(model):
|
| 114 |
if model == "Llama 2 7B":
|
| 115 |
-
return [gr.Dropdown.update(choices=vm_choices),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
else:
|
| 117 |
not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)", "2x Nvidia A100 (Azure NC48ads A100 v4)"]
|
| 118 |
choices = [x for x in vm_choices if x not in not_supported_vm]
|
| 119 |
-
return [gr.Dropdown.update(choices=choices
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
def on_vm_change(model, vm):
|
| 122 |
# TO DO: load info from CSV
|
|
@@ -144,10 +154,9 @@ class OpenSourceLlama2Model(BaseTCOModel):
|
|
| 144 |
)
|
| 145 |
self.input_length = gr.Number(233, label="Average number of input tokens", info="This is the number of input tokens used when the model was benchmarked to get the number of tokens/second it processes",
|
| 146 |
interactive=False, visible=False)
|
| 147 |
-
self.
|
| 148 |
-
self.info_70B = gr.Markdown("To see the benchmark results used for the Llama2-70B model, [click here](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)", interactive=False, visible=False)
|
| 149 |
|
| 150 |
-
self.model.change(on_model_change, inputs=self.model, outputs=[self.vm, self.
|
| 151 |
self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=[self.vm_cost_per_hour, self.tokens_per_second])
|
| 152 |
self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
|
| 153 |
info="How much the GPU is fully used",
|
|
|
|
| 112 |
|
| 113 |
def on_model_change(model):
|
| 114 |
if model == "Llama 2 7B":
|
| 115 |
+
return [gr.Dropdown.update(choices=vm_choices),
|
| 116 |
+
gr.Markdown.update(value="To see the script used to benchmark the Llama2-7B model, [click here](https://example.com/script)"),
|
| 117 |
+
gr.Number.update(value=3.6730),
|
| 118 |
+
gr.Number.update(value=694.38),
|
| 119 |
+
gr.Number.update(visible=True)
|
| 120 |
+
]
|
| 121 |
else:
|
| 122 |
not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)", "2x Nvidia A100 (Azure NC48ads A100 v4)"]
|
| 123 |
choices = [x for x in vm_choices if x not in not_supported_vm]
|
| 124 |
+
return [gr.Dropdown.update(choices=choices, value="4x Nvidia A100 (Azure NC48ads A100 v4)"),
|
| 125 |
+
gr.Markdown.update(value="To see the benchmark results used for the Llama2-70B model, [click here](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)"),
|
| 126 |
+
gr.Number.update(value=14.692),
|
| 127 |
+
gr.Number.update(value=18.6),
|
| 128 |
+
gr.Number.update(visible=False)
|
| 129 |
+
]
|
| 130 |
|
| 131 |
def on_vm_change(model, vm):
|
| 132 |
# TO DO: load info from CSV
|
|
|
|
| 154 |
)
|
| 155 |
self.input_length = gr.Number(233, label="Average number of input tokens", info="This is the number of input tokens used when the model was benchmarked to get the number of tokens/second it processes",
|
| 156 |
interactive=False, visible=False)
|
| 157 |
+
self.info = gr.Markdown("To see the script used to benchmark the Llama2-7B model, [click here](https://example.com/script)", interactive=False, visible=False)
|
|
|
|
| 158 |
|
| 159 |
+
self.model.change(on_model_change, inputs=self.model, outputs=[self.vm, self.info, self.vm_cost_per_hour, self.tokens_per_second, self.input_length])
|
| 160 |
self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=[self.vm_cost_per_hour, self.tokens_per_second])
|
| 161 |
self.maxed_out = gr.Slider(minimum=0.01, value=50., step=0.01, label="% maxed out",
|
| 162 |
info="How much the GPU is fully used",
|