Spaces:

optimum
/

auto-benchmark

Running

App Files Files Community

IlyasMoutawwakil HF Staff commited on Sep 14, 2023

Commit

eabde51

1 Parent(s): a533468

remove tgi

Browse files

Files changed (2) hide show

app.py +3 -6
config_store.py +204 -60

app.py CHANGED Viewed

@@ -8,14 +8,13 @@ from run import run_benchmark
 from config_store import (
     get_training_config,
     get_inference_config,
-    get_text_generation_inference_config,
     get_neural_compressor_config,
     get_onnxruntime_config,
     get_openvino_config,
     get_pytorch_config,
 )
-BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "text-generation-inference"]
 BENCHMARKS = ["inference", "training"]
 DEVICES = ["cpu", "cuda"]
@@ -25,14 +24,14 @@ with gr.Blocks() as demo:
     gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
     # explanation text
     gr.Markdown(
-        "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
         "<br>A unified multi-backend utility for benchmarking `transformers`, `diffusers`, `peft` and `timm` models with "
         "Optimum's optimizations & quantization, for inference & training, on different backends & hardwares."
     )
     model = gr.Textbox(
         label="model",
-        value="bert-base-uncased",
         info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
     )
     task = gr.Dropdown(
@@ -73,8 +72,6 @@ with gr.Blocks() as demo:
                     openvino_config = get_openvino_config()
                 with gr.Accordion(label="Neural Compressor Config", open=False, visible=False):
                     neural_compressor_config = get_neural_compressor_config()
-                with gr.Accordion(label="Text Generation Inference Config", open=False, visible=False):
-                    text_generation_inference_config = get_text_generation_inference_config()
         # hide backend configs based on backend
         backend.change(

 from config_store import (
     get_training_config,
     get_inference_config,
     get_neural_compressor_config,
     get_onnxruntime_config,
     get_openvino_config,
     get_pytorch_config,
 )
+BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
 BENCHMARKS = ["inference", "training"]
 DEVICES = ["cpu", "cuda"]
     gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
     # explanation text
     gr.Markdown(
+        "This is a demo space of [`optimum-Benchmark`](https://github.com/huggingface/optimum-benchmark.git):"
         "<br>A unified multi-backend utility for benchmarking `transformers`, `diffusers`, `peft` and `timm` models with "
         "Optimum's optimizations & quantization, for inference & training, on different backends & hardwares."
     )
     model = gr.Textbox(
         label="model",
+        value="optimum/distilbert-base-uncased-finetuned-sst-2-english",
         info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
     )
     task = gr.Dropdown(
                     openvino_config = get_openvino_config()
                 with gr.Accordion(label="Neural Compressor Config", open=False, visible=False):
                     neural_compressor_config = get_neural_compressor_config()
         # hide backend configs based on backend
         backend.change(

config_store.py CHANGED Viewed

@@ -105,79 +105,223 @@ def get_pytorch_config():
         #     info="Uses DistributedDataParallel for multi-gpu training",
         # ),
         # peft_strategy
-        gr.Textbox(
             value="null",
             label="pytorch.peft_strategy",
         ),
     ]
 def get_onnxruntime_config():
-    return get_base_backend_config(backend_name="onnxruntime")
-    # no_weights
-    # no_weights: bool = False
-    # # export options
-    # export: bool = True
-    # use_cache: bool = True
-    # use_merged: bool = False
-    # torch_dtype: Optional[str] = None
-    # # provider options
-    # provider: str = "${infer_provider:${device}}"
-    # device_id: Optional[int] = "${oc.deprecated:backend.provider_options.device_id}"
-    # provider_options: Dict[str, Any] = field(default_factory=lambda: {"device_id": "${infer_device_id:${device}}"})
-    # # inference options
-    # use_io_binding: bool = "${is_gpu:${device}}"
-    # enable_profiling: bool = "${oc.deprecated:backend.session_options.enable_profiling}"
-    # session_options: Dict[str, Any] = field(
-    #     default_factory=lambda: {"enable_profiling": "${is_profiling:${benchmark.name}}"}
-    # )
-    # # optimization options
-    # optimization: bool = False
-    # optimization_config: Dict[str, Any] = field(default_factory=dict)
-    # # quantization options
-    # quantization: bool = False
-    # quantization_config: Dict[str, Any] = field(default_factory=dict)
-    # # calibration options
-    # calibration: bool = False
-    # calibration_config: Dict[str, Any] = field(default_factory=dict)
-    # # null, O1, O2, O3, O4
-    # auto_optimization: Optional[str] = None
-    # auto_optimization_config: Dict[str, Any] = field(default_factory=dict)
-    # # null, arm64, avx2, avx512, avx512_vnni, tensorrt
-    # auto_quantization: Optional[str] = None
-    # auto_quantization_config: Dict[str, Any] = field(default_factory=dict)
-    # # ort-training is basically a different package so we might need to seperate these two backends in the future
-    # use_inference_session: bool = "${is_inference:${benchmark.name}}"
-    # # training options
-    # use_ddp: bool = False
-    # ddp_config: Dict[str, Any] = field(default_factory=dict)
-    # # peft options
-    # peft_strategy: Optional[str] = None
-    # peft_config: Dict[str, Any] = field(default_factory=dict)
 def get_openvino_config():
-    return get_base_backend_config(backend_name="openvino")
 def get_neural_compressor_config():
-    return get_base_backend_config(backend_name="neural-compressor")
-def get_text_generation_inference_config():
-    return get_base_backend_config(backend_name="text-generation-inference")
 def get_inference_config():

         #     info="Uses DistributedDataParallel for multi-gpu training",
         # ),
         # peft_strategy
+        gr.Dropdown(
             value="null",
+            choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
             label="pytorch.peft_strategy",
+            info="Use null for no PEFT",
         ),
     ]
 def get_onnxruntime_config():
+    return get_base_backend_config(backend_name="onnxruntime") + [
+        # no_weights
+        gr.Checkbox(
+            value=False,
+            label="pytorch.no_weights",
+            info="Generates random weights instead of downloading pretrained ones",
+        ),
+        # export
+        gr.Checkbox(
+            value=True,
+            label="onnxruntime.export",
+            info="Exports the model to ONNX",
+        ),
+        # use_cache
+        gr.Checkbox(
+            value=True,
+            label="onnxruntime.use_cache",
+            info="Uses cached ONNX model if available",
+        ),
+        # use_merged
+        gr.Checkbox(
+            value=False,
+            label="onnxruntime.use_merged",
+            info="Uses merged ONNX model if available",
+        ),
+        # torch_dtype
+        gr.Dropdown(
+            value="null",
+            label="onnxruntime.torch_dtype",
+            choices=["null", "bfloat16", "float16", "float32", "auto"],
+            info="Use null for default and `auto` for automatic dtype selection",
+        ),
+        # use_io_binding
+        gr.Checkbox(
+            value=True,
+            label="onnxruntime.use_io_binding",
+            info="Uses IO binding for inference",
+        ),
+        # auto_optimization
+        gr.Dropdown(
+            value="null",
+            label="onnxruntime.auto_optimization",
+            choices=["null", "O1", "O2", "O3", "O4"],
+            info="Use null for default",
+        ),
+        # auto_quantization
+        gr.Dropdown(
+            value="null",
+            label="onnxruntime.auto_quantization",
+            choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"],
+            info="Use null for default",
+        ),
+        # optimization
+        gr.Checkbox(
+            value=False,
+            label="onnxruntime.optimization",
+            info="Enables manual optimization",
+        ),
+        # optimization_config
+        gr.Dataframe(
+            type="array",
+            value=[["optimization_level"]],
+            headers=["1"],
+            row_count=(1, "static"),
+            col_count=(1, "dynamic"),
+            label="onnxruntime.optimization_config",
+        ),
+        # quantization
+        gr.Checkbox(
+            value=False,
+            label="onnxruntime.quantization",
+            info="Enables manual quantization",
+        ),
+        # quantization_config
+        gr.Dataframe(
+            type="array",
+            value=[["is_static"]],
+            headers=[False],
+            row_count=(1, "static"),
+            col_count=(1, "dynamic"),
+            label="onnxruntime.quantization_config",
+            info="Use null for default",
+        ),
+        # calibration
+        gr.Checkbox(
+            value=False,
+            label="onnxruntime.calibration",
+            info="Enables calibration",
+        ),
+        # calibration_config
+        gr.Dataframe(
+            type="array",
+            value=[["glue"]],
+            headers=["dataset_name"],
+            row_count=(1, "static"),
+            col_count=(1, "dynamic"),
+            label="onnxruntime.calibration_config",
+            info="Use null for default",
+        ),
+        # peft_strategy
+        gr.Dropdown(
+            value="null",
+            label="onnxruntime.peft_strategy",
+            choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
+            info="Use null for full parameters fine-tuning",
+        ),
+    ]
 def get_openvino_config():
+    return get_base_backend_config(backend_name="openvino") + [
+        # export
+        gr.Checkbox(
+            value=True,
+            label="openvino.export",
+            info="Exports the model to ONNX",
+        ),
+        # use_cache
+        gr.Checkbox(
+            value=True,
+            label="openvino.use_cache",
+            info="Uses cached ONNX model if available",
+        ),
+        # use_merged
+        gr.Checkbox(
+            value=False,
+            label="openvino.use_merged",
+            info="Uses merged ONNX model if available",
+        ),
+        # reshape
+        gr.Checkbox(
+            value=False,
+            label="openvino.reshape",
+            info="Reshapes the model to the input shape",
+        ),
+        # half
+        gr.Checkbox(
+            value=False,
+            label="openvino.half",
+            info="Converts model to half precision",
+        ),
+        # quantization
+        gr.Checkbox(
+            value=False,
+            label="openvino.quantization",
+            info="Enables quantization",
+        ),
+        # quantization_config
+        gr.Dataframe(
+            type="array",
+            headers=["compression", "input_info", "save_onnx_model"],
+            value=[[None, None, None]],
+            row_count=(1, "static"),
+            col_count=(3, "dynamic"),
+            label="openvino.quantization_config",
+        ),
+        # calibration
+        gr.Checkbox(
+            value=False,
+            label="openvino.calibration",
+            info="Enables calibration",
+        ),
+        # calibration_config
+        gr.Dataframe(
+            type="array",
+            headers=["dataset_name"],
+            value=[["glue"]],
+            row_count=(1, "static"),
+            col_count=(1, "dynamic"),
+            label="openvino.calibration_config",
+        ),
+    ]
 def get_neural_compressor_config():
+    return get_base_backend_config(backend_name="neural-compressor") + [
+        # ptq_quantization
+        gr.Checkbox(
+            value=False,
+            label="neural-compressor.ptq_quantization",
+            info="Enables post-training quantization",
+        ),
+        # ptq_quantization_config
+        gr.Dataframe(
+            type="array",
+            headers=["device"],
+            value=[["cpu"]],
+            row_count=(1, "static"),
+            col_count=(1, "dynamic"),
+            label="neural-compressor.ptq_quantization_config",
+        ),
+        # calibration
+        gr.Checkbox(
+            value=False,
+            label="neural-compressor.calibration",
+            info="Enables calibration",
+        ),
+        # calibration_config
+        gr.Dataframe(
+            type="array",
+            headers=["dataset_name"],
+            value=[["glue"]],
+            row_count=(1, "static"),
+            col_count=(1, "dynamic"),
+            label="neural-compressor.calibration_config",
+        ),
+    ]
 def get_inference_config():