Spaces:
Running
Running
| import gradio as gr | |
| def get_base_backend_config(backend_name="pytorch"): | |
| return [ | |
| # seed | |
| gr.Textbox( | |
| value=42, | |
| label=f"{backend_name}.seed", | |
| info="Sets seed for reproducibility", | |
| ), | |
| # inter_op_num_threads | |
| gr.Textbox( | |
| value="null", | |
| label=f"{backend_name}.inter_op_num_threads", | |
| info="Use null for default and -1 for cpu_count()", | |
| ), | |
| # intra_op_num_threads | |
| gr.Textbox( | |
| value="null", | |
| label=f"{backend_name}.intra_op_num_threads", | |
| info="Use null for default and -1 for cpu_count()", | |
| ), | |
| # initial_isolation_check | |
| gr.Checkbox( | |
| value=True, | |
| label=f"{backend_name}.initial_isolation_check", | |
| info="Makes sure that initially, no other process is running on the target device", | |
| ), | |
| # continous_isolation_check | |
| gr.Checkbox( | |
| value=True, | |
| label=f"{backend_name}.continous_isolation_check", | |
| info="Makes sure that throughout the benchmark, no other process is running on the target device", | |
| ), | |
| # delete_cache | |
| gr.Checkbox( | |
| value=False, | |
| label=f"{backend_name}.delete_cache", | |
| info="Deletes model cache (weights & configs) after benchmark is done", | |
| ), | |
| ] | |
| def get_pytorch_config(): | |
| return get_base_backend_config(backend_name="pytorch") + [ | |
| # no_weights | |
| gr.Checkbox( | |
| value=False, | |
| label="pytorch.no_weights", | |
| info="Generates random weights instead of downloading pretrained ones", | |
| ), | |
| # # device_map | |
| # gr.Dropdown( | |
| # value="null", | |
| # | |
| # label="pytorch.device_map", | |
| # choices=["null", "auto", "sequential"], | |
| # info="Use null for default and `auto` or `sequential` the same way as in `from_pretrained`", | |
| # ), | |
| # torch_dtype | |
| gr.Dropdown( | |
| value="null", | |
| label="pytorch.torch_dtype", | |
| choices=["null", "bfloat16", "float16", "float32", "auto"], | |
| info="Use null for default and `auto` for automatic dtype selection", | |
| ), | |
| # amp_autocast | |
| gr.Checkbox( | |
| value=False, | |
| label="pytorch.amp_autocast", | |
| info="Enables Pytorch's native Automatic Mixed Precision", | |
| ), | |
| # amp_dtype | |
| gr.Dropdown( | |
| value="null", | |
| label="pytorch.amp_dtype", | |
| info="Use null for default", | |
| choices=["null", "bfloat16", "float16"], | |
| ), | |
| # torch_compile | |
| gr.Checkbox( | |
| value=False, | |
| label="pytorch.torch_compile", | |
| info="Compiles the model with torch.compile", | |
| ), | |
| # bettertransformer | |
| gr.Checkbox( | |
| value=False, | |
| label="pytorch.bettertransformer", | |
| info="Applies optimum.BetterTransformer for fastpath anf optimized attention", | |
| ), | |
| # quantization_scheme | |
| gr.Dropdown( | |
| value="null", | |
| choices=["null", "gptq", "bnb"], | |
| label="pytorch.quantization_scheme", | |
| info="Use null for no quantization", | |
| ), | |
| # # use_ddp | |
| # gr.Checkbox( | |
| # value=False, | |
| # | |
| # label="pytorch.use_ddp", | |
| # info="Uses DistributedDataParallel for multi-gpu training", | |
| # ), | |
| # peft_strategy | |
| gr.Dropdown( | |
| value="null", | |
| choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"], | |
| label="pytorch.peft_strategy", | |
| info="Use null for no PEFT", | |
| ), | |
| ] | |
| def get_onnxruntime_config(): | |
| return get_base_backend_config(backend_name="onnxruntime") + [ | |
| # no_weights | |
| gr.Checkbox( | |
| value=False, | |
| label="pytorch.no_weights", | |
| info="Generates random weights instead of downloading pretrained ones", | |
| ), | |
| # export | |
| gr.Checkbox( | |
| value=True, | |
| label="onnxruntime.export", | |
| info="Exports the model to ONNX", | |
| ), | |
| # use_cache | |
| gr.Checkbox( | |
| value=True, | |
| label="onnxruntime.use_cache", | |
| info="Uses cached ONNX model if available", | |
| ), | |
| # use_merged | |
| gr.Checkbox( | |
| value=False, | |
| label="onnxruntime.use_merged", | |
| info="Uses merged ONNX model if available", | |
| ), | |
| # torch_dtype | |
| gr.Dropdown( | |
| value="null", | |
| label="onnxruntime.torch_dtype", | |
| choices=["null", "bfloat16", "float16", "float32", "auto"], | |
| info="Use null for default and `auto` for automatic dtype selection", | |
| ), | |
| # use_io_binding | |
| gr.Checkbox( | |
| value=True, | |
| label="onnxruntime.use_io_binding", | |
| info="Uses IO binding for inference", | |
| ), | |
| # auto_optimization | |
| gr.Dropdown( | |
| value="null", | |
| label="onnxruntime.auto_optimization", | |
| choices=["null", "O1", "O2", "O3", "O4"], | |
| info="Use null for default", | |
| ), | |
| # auto_quantization | |
| gr.Dropdown( | |
| value="null", | |
| label="onnxruntime.auto_quantization", | |
| choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"], | |
| info="Use null for default", | |
| ), | |
| # optimization | |
| gr.Checkbox( | |
| value=False, | |
| label="onnxruntime.optimization", | |
| info="Enables manual optimization", | |
| ), | |
| # optimization_config | |
| gr.Dataframe( | |
| type="array", | |
| value=[["optimization_level"]], | |
| headers=["1"], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="onnxruntime.optimization_config", | |
| ), | |
| # quantization | |
| gr.Checkbox( | |
| value=False, | |
| label="onnxruntime.quantization", | |
| info="Enables manual quantization", | |
| ), | |
| # quantization_config | |
| gr.Dataframe( | |
| type="array", | |
| value=[["is_static"]], | |
| headers=[False], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="onnxruntime.quantization_config", | |
| info="Use null for default", | |
| ), | |
| # calibration | |
| gr.Checkbox( | |
| value=False, | |
| label="onnxruntime.calibration", | |
| info="Enables calibration", | |
| ), | |
| # calibration_config | |
| gr.Dataframe( | |
| type="array", | |
| value=[["glue"]], | |
| headers=["dataset_name"], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="onnxruntime.calibration_config", | |
| info="Use null for default", | |
| ), | |
| # peft_strategy | |
| gr.Dropdown( | |
| value="null", | |
| label="onnxruntime.peft_strategy", | |
| choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"], | |
| info="Use null for full parameters fine-tuning", | |
| ), | |
| ] | |
| def get_openvino_config(): | |
| return get_base_backend_config(backend_name="openvino") + [ | |
| # export | |
| gr.Checkbox( | |
| value=True, | |
| label="openvino.export", | |
| info="Exports the model to ONNX", | |
| ), | |
| # use_cache | |
| gr.Checkbox( | |
| value=True, | |
| label="openvino.use_cache", | |
| info="Uses cached ONNX model if available", | |
| ), | |
| # use_merged | |
| gr.Checkbox( | |
| value=False, | |
| label="openvino.use_merged", | |
| info="Uses merged ONNX model if available", | |
| ), | |
| # reshape | |
| gr.Checkbox( | |
| value=False, | |
| label="openvino.reshape", | |
| info="Reshapes the model to the input shape", | |
| ), | |
| # half | |
| gr.Checkbox( | |
| value=False, | |
| label="openvino.half", | |
| info="Converts model to half precision", | |
| ), | |
| # quantization | |
| gr.Checkbox( | |
| value=False, | |
| label="openvino.quantization", | |
| info="Enables quantization", | |
| ), | |
| # quantization_config | |
| gr.Dataframe( | |
| type="array", | |
| headers=["compression", "input_info", "save_onnx_model"], | |
| value=[[None, None, None]], | |
| row_count=(1, "static"), | |
| col_count=(3, "dynamic"), | |
| label="openvino.quantization_config", | |
| ), | |
| # calibration | |
| gr.Checkbox( | |
| value=False, | |
| label="openvino.calibration", | |
| info="Enables calibration", | |
| ), | |
| # calibration_config | |
| gr.Dataframe( | |
| type="array", | |
| headers=["dataset_name"], | |
| value=[["glue"]], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="openvino.calibration_config", | |
| ), | |
| ] | |
| def get_neural_compressor_config(): | |
| return get_base_backend_config(backend_name="neural-compressor") + [ | |
| # ptq_quantization | |
| gr.Checkbox( | |
| value=False, | |
| label="neural-compressor.ptq_quantization", | |
| info="Enables post-training quantization", | |
| ), | |
| # ptq_quantization_config | |
| gr.Dataframe( | |
| type="array", | |
| headers=["device"], | |
| value=[["cpu"]], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="neural-compressor.ptq_quantization_config", | |
| ), | |
| # calibration | |
| gr.Checkbox( | |
| value=False, | |
| label="neural-compressor.calibration", | |
| info="Enables calibration", | |
| ), | |
| # calibration_config | |
| gr.Dataframe( | |
| type="array", | |
| headers=["dataset_name"], | |
| value=[["glue"]], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="neural-compressor.calibration_config", | |
| ), | |
| ] | |
| def get_inference_config(): | |
| return [ | |
| # duration | |
| gr.Textbox( | |
| value=10, | |
| label="inference.duration", | |
| info="Minimum duration of benchmark in seconds", | |
| ), | |
| # warmup runs | |
| gr.Textbox( | |
| value=10, | |
| label="inference.warmup_runs", | |
| info="Number of warmup runs before measurements", | |
| ), | |
| # memory | |
| gr.Checkbox( | |
| value=False, | |
| label="inference.memory", | |
| info="Measures the peak memory footprint", | |
| ), | |
| # energy | |
| gr.Checkbox( | |
| value=False, | |
| label="inference.energy", | |
| info="Measures energy consumption and carbon emissions", | |
| ), | |
| # input_shapes | |
| gr.Dataframe( | |
| type="array", | |
| value=[[2, 16]], | |
| row_count=(1, "static"), | |
| col_count=(2, "dynamic"), | |
| label="inference.input_shapes", | |
| headers=["batch_size", "sequence_length"], | |
| info="Controllable input shapes, add more columns for more inputs", | |
| ), | |
| # forward kwargs | |
| gr.Dataframe( | |
| type="array", | |
| value=[[False]], | |
| headers=["return_dict"], | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="inference.forward_kwargs", | |
| info="Keyword arguments for the forward pass, add more columns for more arguments", | |
| ), | |
| ] | |
| def get_training_config(): | |
| return [ | |
| # warmup steps | |
| gr.Textbox( | |
| value=40, | |
| label="training.warmup_steps", | |
| ), | |
| # dataset_shapes | |
| gr.Dataframe( | |
| type="array", | |
| value=[[500, 16]], | |
| headers=["dataset_size", "sequence_length"], | |
| row_count=(1, "static"), | |
| col_count=(2, "dynamic"), | |
| label="training.dataset_shapes", | |
| ), | |
| # training_arguments | |
| gr.Dataframe( | |
| value=[[2]], | |
| type="array", | |
| row_count=(1, "static"), | |
| col_count=(1, "dynamic"), | |
| label="training.training_arguments", | |
| headers=["per_device_train_batch_size"], | |
| ), | |
| ] | |