Spaces:
Runtime error
Runtime error
| import os | |
| import unittest | |
| from pathlib import Path | |
| from tempfile import NamedTemporaryFile | |
| from unittest import TestCase | |
| from unittest.mock import patch | |
| import pytest | |
| from parameterized import parameterized | |
| from transformers import AutoConfig, PreTrainedTokenizerBase, is_tf_available, is_torch_available | |
| from transformers.onnx import ( | |
| EXTERNAL_DATA_FORMAT_SIZE_LIMIT, | |
| OnnxConfig, | |
| OnnxConfigWithPast, | |
| ParameterFormat, | |
| validate_model_outputs, | |
| ) | |
| from transformers.onnx.utils import ( | |
| compute_effective_axis_dimension, | |
| compute_serialized_parameters_size, | |
| get_preprocessor, | |
| ) | |
| from transformers.testing_utils import require_onnx, require_rjieba, require_tf, require_torch, require_vision, slow | |
| if is_torch_available() or is_tf_available(): | |
| from transformers.onnx.features import FeaturesManager | |
| if is_torch_available(): | |
| import torch | |
| from transformers.models.deberta import modeling_deberta | |
| class OnnxUtilsTestCaseV2(TestCase): | |
| """ | |
| Cover all the utilities involved to export ONNX models | |
| """ | |
| def test_compute_effective_axis_dimension(self): | |
| """ | |
| When exporting ONNX model with dynamic axis (batch or sequence) we set batch_size and/or sequence_length = -1. | |
| We cannot generate an effective tensor with axis dim == -1, so we trick by using some "fixed" values | |
| (> 1 to avoid ONNX squeezing the axis). | |
| This test ensure we are correctly replacing generated batch / sequence tensor with axis > 1 | |
| """ | |
| # Dynamic axis (batch, no token added by the tokenizer) | |
| self.assertEqual(compute_effective_axis_dimension(-1, fixed_dimension=2, num_token_to_add=0), 2) | |
| # Static axis (batch, no token added by the tokenizer) | |
| self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=2, num_token_to_add=0), 2) | |
| # Dynamic axis (sequence, token added by the tokenizer 2 (no pair)) | |
| self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=2), 6) | |
| self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=2), 6) | |
| # Dynamic axis (sequence, token added by the tokenizer 3 (pair)) | |
| self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=3), 5) | |
| self.assertEqual(compute_effective_axis_dimension(0, fixed_dimension=8, num_token_to_add=3), 5) | |
| def test_compute_parameters_serialized_size(self): | |
| """ | |
| This test ensures we compute a "correct" approximation of the underlying storage requirement (size) for all the | |
| parameters for the specified parameter's dtype. | |
| """ | |
| self.assertEqual(compute_serialized_parameters_size(2, ParameterFormat.Float), 2 * ParameterFormat.Float.size) | |
| def test_flatten_output_collection_property(self): | |
| """ | |
| This test ensures we correctly flatten nested collection such as the one we use when returning past_keys. | |
| past_keys = Tuple[Tuple] | |
| ONNX exporter will export nested collections as ${collection_name}.${level_idx_0}.${level_idx_1}...${idx_n} | |
| """ | |
| self.assertEqual( | |
| OnnxConfig.flatten_output_collection_property("past_key", [[0], [1], [2]]), | |
| { | |
| "past_key.0": 0, | |
| "past_key.1": 1, | |
| "past_key.2": 2, | |
| }, | |
| ) | |
| class OnnxConfigTestCaseV2(TestCase): | |
| """ | |
| Cover the test for models default. | |
| Default means no specific features is being enabled on the model. | |
| """ | |
| def test_use_external_data_format(self): | |
| """ | |
| External data format is required only if the serialized size of the parameters if bigger than 2Gb | |
| """ | |
| TWO_GB_LIMIT = EXTERNAL_DATA_FORMAT_SIZE_LIMIT | |
| # No parameters | |
| self.assertFalse(OnnxConfig.use_external_data_format(0)) | |
| # Some parameters | |
| self.assertFalse(OnnxConfig.use_external_data_format(1)) | |
| # Almost 2Gb parameters | |
| self.assertFalse(OnnxConfig.use_external_data_format((TWO_GB_LIMIT - 1) // ParameterFormat.Float.size)) | |
| # Exactly 2Gb parameters | |
| self.assertTrue(OnnxConfig.use_external_data_format(TWO_GB_LIMIT)) | |
| # More than 2Gb parameters | |
| self.assertTrue(OnnxConfig.use_external_data_format((TWO_GB_LIMIT + 1) // ParameterFormat.Float.size)) | |
| class OnnxConfigWithPastTestCaseV2(TestCase): | |
| """ | |
| Cover the tests for model which have use_cache feature (i.e. "with_past" for ONNX) | |
| """ | |
| SUPPORTED_WITH_PAST_CONFIGS = {} | |
| # SUPPORTED_WITH_PAST_CONFIGS = { | |
| # ("BART", BartConfig), | |
| # ("GPT2", GPT2Config), | |
| # # ("T5", T5Config) | |
| # } | |
| def test_use_past(self): | |
| """ | |
| Ensure the use_past variable is correctly being set | |
| """ | |
| for name, config in OnnxConfigWithPastTestCaseV2.SUPPORTED_WITH_PAST_CONFIGS: | |
| with self.subTest(name): | |
| self.assertFalse( | |
| OnnxConfigWithPast.from_model_config(config()).use_past, | |
| "OnnxConfigWithPast.from_model_config() should not use_past", | |
| ) | |
| self.assertTrue( | |
| OnnxConfigWithPast.with_past(config()).use_past, | |
| "OnnxConfigWithPast.from_model_config() should use_past", | |
| ) | |
| def test_values_override(self): | |
| """ | |
| Ensure the use_past variable correctly set the `use_cache` value in model's configuration | |
| """ | |
| for name, config in OnnxConfigWithPastTestCaseV2.SUPPORTED_WITH_PAST_CONFIGS: | |
| with self.subTest(name): | |
| # without past | |
| onnx_config_default = OnnxConfigWithPast.from_model_config(config()) | |
| self.assertIsNotNone(onnx_config_default.values_override, "values_override should not be None") | |
| self.assertIn("use_cache", onnx_config_default.values_override, "use_cache should be present") | |
| self.assertFalse( | |
| onnx_config_default.values_override["use_cache"], "use_cache should be False if not using past" | |
| ) | |
| # with past | |
| onnx_config_default = OnnxConfigWithPast.with_past(config()) | |
| self.assertIsNotNone(onnx_config_default.values_override, "values_override should not be None") | |
| self.assertIn("use_cache", onnx_config_default.values_override, "use_cache should be present") | |
| self.assertTrue( | |
| onnx_config_default.values_override["use_cache"], "use_cache should be False if not using past" | |
| ) | |
| PYTORCH_EXPORT_MODELS = { | |
| ("albert", "hf-internal-testing/tiny-random-AlbertModel"), | |
| ("bert", "hf-internal-testing/tiny-random-BertModel"), | |
| ("beit", "microsoft/beit-base-patch16-224"), | |
| ("big-bird", "hf-internal-testing/tiny-random-BigBirdModel"), | |
| ("camembert", "camembert-base"), | |
| ("clip", "hf-internal-testing/tiny-random-CLIPModel"), | |
| ("convbert", "hf-internal-testing/tiny-random-ConvBertModel"), | |
| ("codegen", "hf-internal-testing/tiny-random-CodeGenModel"), | |
| ("data2vec-text", "hf-internal-testing/tiny-random-Data2VecTextModel"), | |
| ("data2vec-vision", "facebook/data2vec-vision-base"), | |
| ("deberta", "hf-internal-testing/tiny-random-DebertaModel"), | |
| ("deberta-v2", "hf-internal-testing/tiny-random-DebertaV2Model"), | |
| ("deit", "facebook/deit-small-patch16-224"), | |
| ("convnext", "facebook/convnext-tiny-224"), | |
| ("detr", "facebook/detr-resnet-50"), | |
| ("distilbert", "hf-internal-testing/tiny-random-DistilBertModel"), | |
| ("electra", "hf-internal-testing/tiny-random-ElectraModel"), | |
| ("groupvit", "nvidia/groupvit-gcc-yfcc"), | |
| ("ibert", "kssteven/ibert-roberta-base"), | |
| ("imagegpt", "openai/imagegpt-small"), | |
| ("levit", "facebook/levit-128S"), | |
| ("layoutlm", "hf-internal-testing/tiny-random-LayoutLMModel"), | |
| ("layoutlmv3", "microsoft/layoutlmv3-base"), | |
| ("longformer", "allenai/longformer-base-4096"), | |
| ("mobilebert", "hf-internal-testing/tiny-random-MobileBertModel"), | |
| ("mobilenet_v1", "google/mobilenet_v1_0.75_192"), | |
| ("mobilenet_v2", "google/mobilenet_v2_0.35_96"), | |
| ("mobilevit", "apple/mobilevit-small"), | |
| ("owlvit", "google/owlvit-base-patch32"), | |
| ("perceiver", "hf-internal-testing/tiny-random-PerceiverModel", ("masked-lm", "sequence-classification")), | |
| ("perceiver", "hf-internal-testing/tiny-random-PerceiverModel", ("image-classification",)), | |
| ("poolformer", "sail/poolformer_s12"), | |
| ("rembert", "google/rembert"), | |
| ("resnet", "microsoft/resnet-50"), | |
| ("roberta", "hf-internal-testing/tiny-random-RobertaModel"), | |
| ("roformer", "hf-internal-testing/tiny-random-RoFormerModel"), | |
| ("segformer", "nvidia/segformer-b0-finetuned-ade-512-512"), | |
| ("squeezebert", "hf-internal-testing/tiny-random-SqueezeBertModel"), | |
| ("swin", "microsoft/swin-tiny-patch4-window7-224"), | |
| ("vit", "google/vit-base-patch16-224"), | |
| ("yolos", "hustvl/yolos-tiny"), | |
| ("whisper", "openai/whisper-tiny.en"), | |
| ("xlm", "hf-internal-testing/tiny-random-XLMModel"), | |
| ("xlm-roberta", "hf-internal-testing/tiny-random-XLMRobertaXLModel"), | |
| } | |
| PYTORCH_EXPORT_ENCODER_DECODER_MODELS = { | |
| ("vision-encoder-decoder", "nlpconnect/vit-gpt2-image-captioning"), | |
| } | |
| PYTORCH_EXPORT_WITH_PAST_MODELS = { | |
| ("bloom", "hf-internal-testing/tiny-random-BloomModel"), | |
| ("gpt2", "hf-internal-testing/tiny-random-GPT2Model"), | |
| ("gpt-neo", "hf-internal-testing/tiny-random-GPTNeoModel"), | |
| } | |
| PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = { | |
| ("bart", "hf-internal-testing/tiny-random-BartModel"), | |
| ("bigbird-pegasus", "hf-internal-testing/tiny-random-BigBirdPegasusModel"), | |
| ("blenderbot-small", "facebook/blenderbot_small-90M"), | |
| ("blenderbot", "hf-internal-testing/tiny-random-BlenderbotModel"), | |
| ("longt5", "hf-internal-testing/tiny-random-LongT5Model"), | |
| ("marian", "Helsinki-NLP/opus-mt-en-de"), | |
| ("mbart", "sshleifer/tiny-mbart"), | |
| ("mt5", "google/mt5-base"), | |
| ("m2m-100", "hf-internal-testing/tiny-random-M2M100Model"), | |
| ("t5", "hf-internal-testing/tiny-random-T5Model"), | |
| } | |
| # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations. | |
| TENSORFLOW_EXPORT_DEFAULT_MODELS = { | |
| ("albert", "hf-internal-testing/tiny-albert"), | |
| ("bert", "hf-internal-testing/tiny-random-BertModel"), | |
| ("camembert", "camembert-base"), | |
| ("distilbert", "hf-internal-testing/tiny-random-DistilBertModel"), | |
| ("roberta", "hf-internal-testing/tiny-random-RobertaModel"), | |
| } | |
| # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations. | |
| TENSORFLOW_EXPORT_WITH_PAST_MODELS = {} | |
| # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS` once TensorFlow has parity with the PyTorch model implementations. | |
| TENSORFLOW_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {} | |
| def _get_models_to_test(export_models_list): | |
| models_to_test = [] | |
| if is_torch_available() or is_tf_available(): | |
| for name, model, *features in export_models_list: | |
| if features: | |
| feature_config_mapping = { | |
| feature: FeaturesManager.get_config(name, feature) for _ in features for feature in _ | |
| } | |
| else: | |
| # pre-process the model names | |
| model_type = name.replace("_", "-") | |
| model_name = getattr(model, "name", "") | |
| feature_config_mapping = FeaturesManager.get_supported_features_for_model_type( | |
| model_type, model_name=model_name | |
| ) | |
| for feature, onnx_config_class_constructor in feature_config_mapping.items(): | |
| models_to_test.append((f"{name}_{feature}", name, model, feature, onnx_config_class_constructor)) | |
| return sorted(models_to_test) | |
| else: | |
| # Returning some dummy test that should not be ever called because of the @require_torch / @require_tf | |
| # decorators. | |
| # The reason for not returning an empty list is because parameterized.expand complains when it's empty. | |
| return [("dummy", "dummy", "dummy", "dummy", OnnxConfig.from_model_config)] | |
| class OnnxExportTestCaseV2(TestCase): | |
| """ | |
| Integration tests ensuring supported models are correctly exported | |
| """ | |
| def _onnx_export( | |
| self, test_name, name, model_name, feature, onnx_config_class_constructor, device="cpu", framework="pt" | |
| ): | |
| from transformers.onnx import export | |
| model_class = FeaturesManager.get_model_class_for_feature(feature, framework=framework) | |
| config = AutoConfig.from_pretrained(model_name) | |
| model = model_class.from_config(config) | |
| # Dynamic axes aren't supported for YOLO-like models. This means they cannot be exported to ONNX on CUDA devices. | |
| # See: https://github.com/ultralytics/yolov5/pull/8378 | |
| if model.__class__.__name__.startswith("Yolos") and device != "cpu": | |
| return | |
| # ONNX inference fails with the following name, feature, framework parameterizations | |
| # See: https://github.com/huggingface/transformers/issues/19357 | |
| if (name, feature, framework) in { | |
| ("deberta-v2", "question-answering", "pt"), | |
| ("deberta-v2", "multiple-choice", "pt"), | |
| ("roformer", "multiple-choice", "pt"), | |
| ("groupvit", "default", "pt"), | |
| ("perceiver", "masked-lm", "pt"), | |
| ("perceiver", "sequence-classification", "pt"), | |
| ("perceiver", "image-classification", "pt"), | |
| ("bert", "multiple-choice", "tf"), | |
| ("camembert", "multiple-choice", "tf"), | |
| ("roberta", "multiple-choice", "tf"), | |
| }: | |
| return | |
| onnx_config = onnx_config_class_constructor(model.config) | |
| if is_torch_available(): | |
| from transformers.utils import torch_version | |
| if torch_version < onnx_config.torch_onnx_minimum_version: | |
| pytest.skip( | |
| "Skipping due to incompatible PyTorch version. Minimum required is" | |
| f" {onnx_config.torch_onnx_minimum_version}, got: {torch_version}" | |
| ) | |
| preprocessor = get_preprocessor(model_name) | |
| # Useful for causal lm models that do not use pad tokens. | |
| if isinstance(preprocessor, PreTrainedTokenizerBase) and not getattr(config, "pad_token_id", None): | |
| config.pad_token_id = preprocessor.eos_token_id | |
| with NamedTemporaryFile("w") as output: | |
| try: | |
| onnx_inputs, onnx_outputs = export( | |
| preprocessor, model, onnx_config, onnx_config.default_onnx_opset, Path(output.name), device=device | |
| ) | |
| validate_model_outputs( | |
| onnx_config, | |
| preprocessor, | |
| model, | |
| Path(output.name), | |
| onnx_outputs, | |
| onnx_config.atol_for_validation, | |
| ) | |
| except (RuntimeError, ValueError) as e: | |
| self.fail(f"{name}, {feature} -> {e}") | |
| def _onnx_export_encoder_decoder_models( | |
| self, test_name, name, model_name, feature, onnx_config_class_constructor, device="cpu" | |
| ): | |
| from transformers import AutoFeatureExtractor, AutoTokenizer | |
| from transformers.onnx import export | |
| model_class = FeaturesManager.get_model_class_for_feature(feature) | |
| config = AutoConfig.from_pretrained(model_name) | |
| model = model_class.from_config(config) | |
| onnx_config = onnx_config_class_constructor(model.config) | |
| if is_torch_available(): | |
| from transformers.utils import torch_version | |
| if torch_version < onnx_config.torch_onnx_minimum_version: | |
| pytest.skip( | |
| "Skipping due to incompatible PyTorch version. Minimum required is" | |
| f" {onnx_config.torch_onnx_minimum_version}, got: {torch_version}" | |
| ) | |
| encoder_model = model.get_encoder() | |
| decoder_model = model.get_decoder() | |
| encoder_onnx_config = onnx_config.get_encoder_config(encoder_model.config) | |
| decoder_onnx_config = onnx_config.get_decoder_config(encoder_model.config, decoder_model.config, feature) | |
| preprocessor = AutoFeatureExtractor.from_pretrained(model_name) | |
| onnx_opset = max(encoder_onnx_config.default_onnx_opset, decoder_onnx_config.default_onnx_opset) | |
| with NamedTemporaryFile("w") as encoder_output: | |
| onnx_inputs, onnx_outputs = export( | |
| preprocessor, encoder_model, encoder_onnx_config, onnx_opset, Path(encoder_output.name), device=device | |
| ) | |
| validate_model_outputs( | |
| encoder_onnx_config, | |
| preprocessor, | |
| encoder_model, | |
| Path(encoder_output.name), | |
| onnx_outputs, | |
| encoder_onnx_config.atol_for_validation, | |
| ) | |
| preprocessor = AutoTokenizer.from_pretrained(model_name) | |
| with NamedTemporaryFile("w") as decoder_output: | |
| _, onnx_outputs = export( | |
| preprocessor, | |
| decoder_model, | |
| decoder_onnx_config, | |
| onnx_config.default_onnx_opset, | |
| Path(decoder_output.name), | |
| device=device, | |
| ) | |
| validate_model_outputs( | |
| decoder_onnx_config, | |
| preprocessor, | |
| decoder_model, | |
| Path(decoder_output.name), | |
| onnx_outputs, | |
| decoder_onnx_config.atol_for_validation, | |
| ) | |
| def test_pytorch_export(self, test_name, name, model_name, feature, onnx_config_class_constructor): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor) | |
| def test_pytorch_export_on_cuda(self, test_name, name, model_name, feature, onnx_config_class_constructor): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor, device="cuda") | |
| def test_pytorch_export_encoder_decoder_models( | |
| self, test_name, name, model_name, feature, onnx_config_class_constructor | |
| ): | |
| self._onnx_export_encoder_decoder_models(test_name, name, model_name, feature, onnx_config_class_constructor) | |
| def test_pytorch_export_encoder_decoder_models_on_cuda( | |
| self, test_name, name, model_name, feature, onnx_config_class_constructor | |
| ): | |
| self._onnx_export_encoder_decoder_models( | |
| test_name, name, model_name, feature, onnx_config_class_constructor, device="cuda" | |
| ) | |
| def test_pytorch_export_with_past(self, test_name, name, model_name, feature, onnx_config_class_constructor): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor) | |
| def test_pytorch_export_seq2seq_with_past( | |
| self, test_name, name, model_name, feature, onnx_config_class_constructor | |
| ): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor) | |
| def test_tensorflow_export(self, test_name, name, model_name, feature, onnx_config_class_constructor): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor, framework="tf") | |
| def test_tensorflow_export_with_past(self, test_name, name, model_name, feature, onnx_config_class_constructor): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor, framework="tf") | |
| def test_tensorflow_export_seq2seq_with_past( | |
| self, test_name, name, model_name, feature, onnx_config_class_constructor | |
| ): | |
| self._onnx_export(test_name, name, model_name, feature, onnx_config_class_constructor, framework="tf") | |
| class StableDropoutTestCase(TestCase): | |
| """Tests export of StableDropout module.""" | |
| # torch.onnx is spammy. | |
| def test_training(self): | |
| """Tests export of StableDropout in training mode.""" | |
| devnull = open(os.devnull, "wb") | |
| # drop_prob must be > 0 for the test to be meaningful | |
| sd = modeling_deberta.StableDropout(0.1) | |
| # Avoid warnings in training mode | |
| do_constant_folding = False | |
| # Dropout is a no-op in inference mode | |
| training = torch.onnx.TrainingMode.PRESERVE | |
| input = (torch.randn(2, 2),) | |
| torch.onnx.export( | |
| sd, | |
| input, | |
| devnull, | |
| opset_version=12, # Minimum supported | |
| do_constant_folding=do_constant_folding, | |
| training=training, | |
| ) | |
| # Expected to fail with opset_version < 12 | |
| with self.assertRaises(Exception): | |
| torch.onnx.export( | |
| sd, | |
| input, | |
| devnull, | |
| opset_version=11, | |
| do_constant_folding=do_constant_folding, | |
| training=training, | |
| ) | |