update generation type names
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ import pandas as pd
|
|
| 11 |
|
| 12 |
# benchmark order: pytorch, tf eager, tf xla; units = ms
|
| 13 |
BENCHMARK_DATA = {
|
| 14 |
-
"Greedy
|
| 15 |
"DistilGPT2": {
|
| 16 |
"T4": [336.22, 3976.23, 115.84],
|
| 17 |
"3090": [158.38, 1835.82, 46.56],
|
|
@@ -53,7 +53,7 @@ BENCHMARK_DATA = {
|
|
| 53 |
"A100": [1801.68, 16707.71, 411.93],
|
| 54 |
},
|
| 55 |
},
|
| 56 |
-
"
|
| 57 |
"DistilGPT2": {
|
| 58 |
"T4": [617.40, 6078.81, 221.65],
|
| 59 |
"3090": [310.37, 2843.73, 85.44],
|
|
@@ -184,8 +184,8 @@ with demo:
|
|
| 184 |
"""
|
| 185 |
)
|
| 186 |
with gr.Tabs():
|
| 187 |
-
with gr.TabItem("Greedy
|
| 188 |
-
plot_fn = functools.partial(get_plot, generate_type="Greedy
|
| 189 |
with gr.Row():
|
| 190 |
with gr.Column():
|
| 191 |
model_selector = gr.Dropdown(
|
|
@@ -202,7 +202,7 @@ with demo:
|
|
| 202 |
)
|
| 203 |
gr.Markdown(
|
| 204 |
"""
|
| 205 |
-
### Greedy
|
| 206 |
- `max_new_tokens = 64`;
|
| 207 |
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
|
| 208 |
"""
|
|
@@ -210,8 +210,8 @@ with demo:
|
|
| 210 |
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
|
| 211 |
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
| 212 |
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
| 213 |
-
with gr.TabItem("
|
| 214 |
-
plot_fn = functools.partial(get_plot, generate_type="
|
| 215 |
with gr.Row():
|
| 216 |
with gr.Column():
|
| 217 |
model_selector = gr.Dropdown(
|
|
@@ -228,7 +228,7 @@ with demo:
|
|
| 228 |
)
|
| 229 |
gr.Markdown(
|
| 230 |
"""
|
| 231 |
-
###
|
| 232 |
- `max_new_tokens = 128`;
|
| 233 |
- `temperature = 2.0`;
|
| 234 |
- `top_k = 50`;
|
|
|
|
| 11 |
|
| 12 |
# benchmark order: pytorch, tf eager, tf xla; units = ms
|
| 13 |
BENCHMARK_DATA = {
|
| 14 |
+
"Greedy Decoding": {
|
| 15 |
"DistilGPT2": {
|
| 16 |
"T4": [336.22, 3976.23, 115.84],
|
| 17 |
"3090": [158.38, 1835.82, 46.56],
|
|
|
|
| 53 |
"A100": [1801.68, 16707.71, 411.93],
|
| 54 |
},
|
| 55 |
},
|
| 56 |
+
"Sampling": {
|
| 57 |
"DistilGPT2": {
|
| 58 |
"T4": [617.40, 6078.81, 221.65],
|
| 59 |
"3090": [310.37, 2843.73, 85.44],
|
|
|
|
| 184 |
"""
|
| 185 |
)
|
| 186 |
with gr.Tabs():
|
| 187 |
+
with gr.TabItem("Greedy Decoding"):
|
| 188 |
+
plot_fn = functools.partial(get_plot, generate_type="Greedy Decoding")
|
| 189 |
with gr.Row():
|
| 190 |
with gr.Column():
|
| 191 |
model_selector = gr.Dropdown(
|
|
|
|
| 202 |
)
|
| 203 |
gr.Markdown(
|
| 204 |
"""
|
| 205 |
+
### Greedy Decoding benchmark parameters
|
| 206 |
- `max_new_tokens = 64`;
|
| 207 |
- `pad_to_multiple_of = 64` for Tensorflow XLA models. Others do not pad (input prompts between 2 and 33 tokens).
|
| 208 |
"""
|
|
|
|
| 210 |
plot = gr.Image(value=plot_fn("T5 Small", "Yes")) # Show plot when the gradio app is initialized
|
| 211 |
model_selector.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
| 212 |
eager_enabler.change(fn=plot_fn, inputs=[model_selector, eager_enabler], outputs=plot)
|
| 213 |
+
with gr.TabItem("Sampling"):
|
| 214 |
+
plot_fn = functools.partial(get_plot, generate_type="Sampling")
|
| 215 |
with gr.Row():
|
| 216 |
with gr.Column():
|
| 217 |
model_selector = gr.Dropdown(
|
|
|
|
| 228 |
)
|
| 229 |
gr.Markdown(
|
| 230 |
"""
|
| 231 |
+
### Sampling benchmark parameters
|
| 232 |
- `max_new_tokens = 128`;
|
| 233 |
- `temperature = 2.0`;
|
| 234 |
- `top_k = 50`;
|