Commit 
							
							·
						
						2812032
	
1
								Parent(s):
							
							920220c
								
Upload Phi-4-mini-reasoning ONNX models
Browse files- .gitattributes +2 -1
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json +3 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/merges.txt +0 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json +3 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/tokenizer.json +0 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/vocab.json +3 -0
- gpu/gpu-int4-rtn-block-32/added_tokens.json +3 -0
- gpu/gpu-int4-rtn-block-32/genai_config.json +3 -0
- gpu/gpu-int4-rtn-block-32/merges.txt +0 -0
- gpu/gpu-int4-rtn-block-32/model.onnx +3 -0
- gpu/gpu-int4-rtn-block-32/model.onnx.data +3 -0
- gpu/gpu-int4-rtn-block-32/special_tokens_map.json +3 -0
- gpu/gpu-int4-rtn-block-32/tokenizer.json +3 -0
- gpu/gpu-int4-rtn-block-32/tokenizer_config.json +3 -0
- gpu/gpu-int4-rtn-block-32/vocab.json +3 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/LICENSE +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/README.md +0 -0
- npu/qnn-int4/added_tokens.json +3 -0
- npu/qnn-int4/config.json +3 -0
- npu/qnn-int4/genai_config.json +3 -0
- npu/qnn-int4/merges.txt +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_1.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_2.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_3.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_4.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_ctx.onnx_ctx.onnx +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_embeddings.all.quant.onnx +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_iter.onnx_ctx.onnx +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_lm_head.all.quant.onnx +0 -0
- npu/qnn-int4/special_tokens_map.json +3 -0
- npu/qnn-int4/tokenizer.json +3 -0
- npu/qnn-int4/tokenizer_config.json +3 -0
- npu/qnn-int4/vocab.json +3 -0
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/added_tokens.json +0 -12
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/config.json +0 -144
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/genai_config.json +0 -391
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/special_tokens_map.json +0 -30
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer_config.json +0 -116
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/vocab.json +0 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            -
             | 
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            *.onnx.data filter=lfs diff=lfs merge=lfs -text
         | 
| 37 | 
            +
            *.json filter=lfs diff=lfs merge=lfs -text
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
         | 
| 3 | 
            +
            size 249
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0fcfa1e663f2bc867f8dc62fae65dd0924f0a4d68b43d1234df742dd19171470
         | 
| 3 | 
            +
            size 1520
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/merges.txt
    RENAMED
    
    | 
            File without changes
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:929ee60b9dfcca848a2c62c6533e18c6e74d9d657e01f577e5b54602c02e5ad5
         | 
| 3 | 
            +
            size 52119126
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:fd04a1bdfd475e5909a7a90a948b4fb425b4a565293ec36e34df7ac8c39fe8a4
         | 
| 3 | 
            +
            size 4856573952
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
         | 
| 3 | 
            +
            size 587
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/tokenizer.json
    RENAMED
    
    | 
            File without changes
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:16d7cf2abc1139ffc61368dbbacd521d1be29baa30e461382f3bea947cba16ec
         | 
| 3 | 
            +
            size 3216
         | 
    	
        cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/vocab.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
         | 
| 3 | 
            +
            size 3910310
         | 
    	
        gpu/gpu-int4-rtn-block-32/added_tokens.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
         | 
| 3 | 
            +
            size 249
         | 
    	
        gpu/gpu-int4-rtn-block-32/genai_config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:44539bd5e6a1a53e451698216f326b3f030f206ccea77b35ca5a4137df436835
         | 
| 3 | 
            +
            size 1569
         | 
    	
        gpu/gpu-int4-rtn-block-32/merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        gpu/gpu-int4-rtn-block-32/model.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:59ab70c351e50301c1cf4cca3f23176bba73e43f0d94cea26d471ba43b898f65
         | 
| 3 | 
            +
            size 287586
         | 
    	
        gpu/gpu-int4-rtn-block-32/model.onnx.data
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:5fc3832908b14538d2d53597507a10160e1307c45e153fa5822d82e6248471c3
         | 
| 3 | 
            +
            size 3413194752
         | 
    	
        gpu/gpu-int4-rtn-block-32/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
         | 
| 3 | 
            +
            size 587
         | 
    	
        gpu/gpu-int4-rtn-block-32/tokenizer.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f08ed885956f70d877a4d9078ec9e3119d8b68a8d579003e230be18cad66911c
         | 
| 3 | 
            +
            size 15524194
         | 
    	
        gpu/gpu-int4-rtn-block-32/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:16d7cf2abc1139ffc61368dbbacd521d1be29baa30e461382f3bea947cba16ec
         | 
| 3 | 
            +
            size 3216
         | 
    	
        gpu/gpu-int4-rtn-block-32/vocab.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
         | 
| 3 | 
            +
            size 3910310
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/LICENSE
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/README.md
    RENAMED
    
    | 
            File without changes
         | 
    	
        npu/qnn-int4/added_tokens.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
         | 
| 3 | 
            +
            size 249
         | 
    	
        npu/qnn-int4/config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ac65d86061d3d0d704ee2511fd0eb8713ef19eb6eedba17c3080a4165d5b933b
         | 
| 3 | 
            +
            size 2504
         | 
    	
        npu/qnn-int4/genai_config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9cf1863737bcd75cb59c56e8b505ed316132162a47689f85ff52556471dd6698
         | 
| 3 | 
            +
            size 16942
         | 
    	
        npu/qnn-int4/merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_1.bin
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_2.bin
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_3.bin
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_4.bin
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_ctx.onnx_ctx.onnx
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_embeddings.all.quant.onnx
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_iter.onnx_ctx.onnx
    RENAMED
    
    | 
            File without changes
         | 
    	
        {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_lm_head.all.quant.onnx
    RENAMED
    
    | 
            File without changes
         | 
    	
        npu/qnn-int4/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
         | 
| 3 | 
            +
            size 587
         | 
    	
        npu/qnn-int4/tokenizer.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f08ed885956f70d877a4d9078ec9e3119d8b68a8d579003e230be18cad66911c
         | 
| 3 | 
            +
            size 15524194
         | 
    	
        npu/qnn-int4/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6dcb238a84365a07e19dd4334f82da442eea64da30ecbfa3718c06557ea3a589
         | 
| 3 | 
            +
            size 3323
         | 
    	
        npu/qnn-int4/vocab.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
         | 
| 3 | 
            +
            size 3910310
         | 
    	
        phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/added_tokens.json
    DELETED
    
    | @@ -1,12 +0,0 @@ | |
| 1 | 
            -
            {
         | 
| 2 | 
            -
              "<|/tool_call|>": 200026,
         | 
| 3 | 
            -
              "<|/tool|>": 200024,
         | 
| 4 | 
            -
              "<|assistant|>": 200019,
         | 
| 5 | 
            -
              "<|end|>": 200020,
         | 
| 6 | 
            -
              "<|system|>": 200022,
         | 
| 7 | 
            -
              "<|tag|>": 200028,
         | 
| 8 | 
            -
              "<|tool_call|>": 200025,
         | 
| 9 | 
            -
              "<|tool_response|>": 200027,
         | 
| 10 | 
            -
              "<|tool|>": 200023,
         | 
| 11 | 
            -
              "<|user|>": 200021
         | 
| 12 | 
            -
            }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/config.json
    DELETED
    
    | @@ -1,144 +0,0 @@ | |
| 1 | 
            -
            {
         | 
| 2 | 
            -
              "_name_or_path": "Phi-4-mini-instruct",
         | 
| 3 | 
            -
              "architectures": [
         | 
| 4 | 
            -
                "Phi3ForCausalLM"
         | 
| 5 | 
            -
              ],
         | 
| 6 | 
            -
              "attention_bias": false,
         | 
| 7 | 
            -
              "attention_dropout": 0.0,
         | 
| 8 | 
            -
              "auto_map": {
         | 
| 9 | 
            -
                "AutoConfig": "configuration_phi3.Phi3Config",
         | 
| 10 | 
            -
                "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
         | 
| 11 | 
            -
                "AutoTokenizer": "Xenova/gpt-4o"
         | 
| 12 | 
            -
              },
         | 
| 13 | 
            -
              "bos_token_id": 199999,
         | 
| 14 | 
            -
              "embd_pdrop": 0.0,
         | 
| 15 | 
            -
              "eos_token_id": 199999,
         | 
| 16 | 
            -
              "full_attn_mod": 1,
         | 
| 17 | 
            -
              "hidden_act": "silu",
         | 
| 18 | 
            -
              "hidden_size": 3072,
         | 
| 19 | 
            -
              "initializer_range": 0.02,
         | 
| 20 | 
            -
              "intermediate_size": 8192,
         | 
| 21 | 
            -
              "interpolate_factor": 1,
         | 
| 22 | 
            -
              "lm_head_bias": false,
         | 
| 23 | 
            -
              "max_position_embeddings": 131072,
         | 
| 24 | 
            -
              "mlp_bias": false,
         | 
| 25 | 
            -
              "model_type": "phi3",
         | 
| 26 | 
            -
              "num_attention_heads": 24,
         | 
| 27 | 
            -
              "num_hidden_layers": 32,
         | 
| 28 | 
            -
              "num_key_value_heads": 8,
         | 
| 29 | 
            -
              "original_max_position_embeddings": 4096,
         | 
| 30 | 
            -
              "pad_token_id": 199999,
         | 
| 31 | 
            -
              "partial_rotary_factor": 0.75,
         | 
| 32 | 
            -
              "resid_pdrop": 0.0,
         | 
| 33 | 
            -
              "rms_norm_eps": 1e-05,
         | 
| 34 | 
            -
              "rope_scaling": {
         | 
| 35 | 
            -
                "long_factor": [
         | 
| 36 | 
            -
                  1,
         | 
| 37 | 
            -
                  1.118320672,
         | 
| 38 | 
            -
                  1.250641126,
         | 
| 39 | 
            -
                  1.398617824,
         | 
| 40 | 
            -
                  1.564103225,
         | 
| 41 | 
            -
                  1.74916897,
         | 
| 42 | 
            -
                  1.956131817,
         | 
| 43 | 
            -
                  2.187582649,
         | 
| 44 | 
            -
                  2.446418898,
         | 
| 45 | 
            -
                  2.735880826,
         | 
| 46 | 
            -
                  3.059592084,
         | 
| 47 | 
            -
                  3.421605075,
         | 
| 48 | 
            -
                  3.826451687,
         | 
| 49 | 
            -
                  4.279200023,
         | 
| 50 | 
            -
                  4.785517845,
         | 
| 51 | 
            -
                  5.351743533,
         | 
| 52 | 
            -
                  5.984965424,
         | 
| 53 | 
            -
                  6.693110555,
         | 
| 54 | 
            -
                  7.485043894,
         | 
| 55 | 
            -
                  8.370679318,
         | 
| 56 | 
            -
                  9.36110372,
         | 
| 57 | 
            -
                  10.4687158,
         | 
| 58 | 
            -
                  11.70738129,
         | 
| 59 | 
            -
                  13.09260651,
         | 
| 60 | 
            -
                  14.64173252,
         | 
| 61 | 
            -
                  16.37415215,
         | 
| 62 | 
            -
                  18.31155283,
         | 
| 63 | 
            -
                  20.47818807,
         | 
| 64 | 
            -
                  22.90118105,
         | 
| 65 | 
            -
                  25.61086418,
         | 
| 66 | 
            -
                  28.64115884,
         | 
| 67 | 
            -
                  32.03,
         | 
| 68 | 
            -
                  32.1,
         | 
| 69 | 
            -
                  32.13,
         | 
| 70 | 
            -
                  32.23,
         | 
| 71 | 
            -
                  32.6,
         | 
| 72 | 
            -
                  32.61,
         | 
| 73 | 
            -
                  32.64,
         | 
| 74 | 
            -
                  32.66,
         | 
| 75 | 
            -
                  32.7,
         | 
| 76 | 
            -
                  32.71,
         | 
| 77 | 
            -
                  32.93,
         | 
| 78 | 
            -
                  32.97,
         | 
| 79 | 
            -
                  33.28,
         | 
| 80 | 
            -
                  33.49,
         | 
| 81 | 
            -
                  33.5,
         | 
| 82 | 
            -
                  44.16,
         | 
| 83 | 
            -
                  47.77
         | 
| 84 | 
            -
                ],
         | 
| 85 | 
            -
                "short_factor": [
         | 
| 86 | 
            -
                  1.0,
         | 
| 87 | 
            -
                  1.0,
         | 
| 88 | 
            -
                  1.0,
         | 
| 89 | 
            -
                  1.0,
         | 
| 90 | 
            -
                  1.0,
         | 
| 91 | 
            -
                  1.0,
         | 
| 92 | 
            -
                  1.0,
         | 
| 93 | 
            -
                  1.0,
         | 
| 94 | 
            -
                  1.0,
         | 
| 95 | 
            -
                  1.0,
         | 
| 96 | 
            -
                  1.0,
         | 
| 97 | 
            -
                  1.0,
         | 
| 98 | 
            -
                  1.0,
         | 
| 99 | 
            -
                  1.0,
         | 
| 100 | 
            -
                  1.0,
         | 
| 101 | 
            -
                  1.0,
         | 
| 102 | 
            -
                  1.0,
         | 
| 103 | 
            -
                  1.0,
         | 
| 104 | 
            -
                  1.0,
         | 
| 105 | 
            -
                  1.0,
         | 
| 106 | 
            -
                  1.0,
         | 
| 107 | 
            -
                  1.0,
         | 
| 108 | 
            -
                  1.0,
         | 
| 109 | 
            -
                  1.0,
         | 
| 110 | 
            -
                  1.0,
         | 
| 111 | 
            -
                  1.0,
         | 
| 112 | 
            -
                  1.0,
         | 
| 113 | 
            -
                  1.0,
         | 
| 114 | 
            -
                  1.0,
         | 
| 115 | 
            -
                  1.0,
         | 
| 116 | 
            -
                  1.0,
         | 
| 117 | 
            -
                  1.0,
         | 
| 118 | 
            -
                  1.0,
         | 
| 119 | 
            -
                  1.0,
         | 
| 120 | 
            -
                  1.0,
         | 
| 121 | 
            -
                  1.0,
         | 
| 122 | 
            -
                  1.0,
         | 
| 123 | 
            -
                  1.0,
         | 
| 124 | 
            -
                  1.0,
         | 
| 125 | 
            -
                  1.0,
         | 
| 126 | 
            -
                  1.0,
         | 
| 127 | 
            -
                  1.0,
         | 
| 128 | 
            -
                  1.0,
         | 
| 129 | 
            -
                  1.0,
         | 
| 130 | 
            -
                  1.0,
         | 
| 131 | 
            -
                  1.0,
         | 
| 132 | 
            -
                  1.0,
         | 
| 133 | 
            -
                  1.0
         | 
| 134 | 
            -
                ],
         | 
| 135 | 
            -
                "type": "longrope"
         | 
| 136 | 
            -
              },
         | 
| 137 | 
            -
              "rope_theta": 10000.0,
         | 
| 138 | 
            -
              "sliding_window": 262144,
         | 
| 139 | 
            -
              "tie_word_embeddings": true,
         | 
| 140 | 
            -
              "torch_dtype": "bfloat16",
         | 
| 141 | 
            -
              "transformers_version": "4.45.0",
         | 
| 142 | 
            -
              "use_cache": true,
         | 
| 143 | 
            -
              "vocab_size": 200064
         | 
| 144 | 
            -
            }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/genai_config.json
    DELETED
    
    | @@ -1,391 +0,0 @@ | |
| 1 | 
            -
            {
         | 
| 2 | 
            -
                "model": {
         | 
| 3 | 
            -
                    "bos_token_id": 199999,
         | 
| 4 | 
            -
                    "context_length": 4096,
         | 
| 5 | 
            -
                    "decoder": {
         | 
| 6 | 
            -
                        "session_options": {
         | 
| 7 | 
            -
                            "log_id": "onnxruntime-genai",
         | 
| 8 | 
            -
                            "provider_options": []
         | 
| 9 | 
            -
                        },
         | 
| 10 | 
            -
                        "head_size": 128,
         | 
| 11 | 
            -
                        "hidden_size": 3072,
         | 
| 12 | 
            -
                        "inputs": {
         | 
| 13 | 
            -
                            "input_ids": "input_ids",
         | 
| 14 | 
            -
                            "attention_mask": "attention_mask",
         | 
| 15 | 
            -
                            "past_key_names": "past_keys_%d",
         | 
| 16 | 
            -
                            "past_value_names": "past_values_%d",
         | 
| 17 | 
            -
                            "past_sequence_length": "past_seq_len",
         | 
| 18 | 
            -
                            "total_sequence_length": "total_seq_len"
         | 
| 19 | 
            -
                        },
         | 
| 20 | 
            -
                        "outputs": {
         | 
| 21 | 
            -
                            "logits": "logits",
         | 
| 22 | 
            -
                            "present_key_names": "present_keys_%d",
         | 
| 23 | 
            -
                            "present_value_names": "present_values_%d"
         | 
| 24 | 
            -
                        },
         | 
| 25 | 
            -
                        "num_attention_heads": 24,
         | 
| 26 | 
            -
                        "num_hidden_layers": 32,
         | 
| 27 | 
            -
                        "num_key_value_heads": 8,
         | 
| 28 | 
            -
                        "sliding_window": {
         | 
| 29 | 
            -
                            "window_size": 64,
         | 
| 30 | 
            -
                            "pad_value": 0,
         | 
| 31 | 
            -
                            "alignment": "left",
         | 
| 32 | 
            -
                            "slide_key_value_cache": false
         | 
| 33 | 
            -
                        },
         | 
| 34 | 
            -
                        "pipeline": [
         | 
| 35 | 
            -
                            {
         | 
| 36 | 
            -
                                "embedding": {
         | 
| 37 | 
            -
                                    "filename": "phi_4_mini_embeddings.all.quant.onnx",
         | 
| 38 | 
            -
                                    "inputs": [
         | 
| 39 | 
            -
                                        "input_ids"
         | 
| 40 | 
            -
                                    ],
         | 
| 41 | 
            -
                                    "outputs": [
         | 
| 42 | 
            -
                                        "input_hidden_states"
         | 
| 43 | 
            -
                                    ]
         | 
| 44 | 
            -
                                },
         | 
| 45 | 
            -
                                "prompt-processor": {
         | 
| 46 | 
            -
                                    "filename": "phi_4_mini_ctx.onnx_ctx.onnx",
         | 
| 47 | 
            -
                                    "session_options": {
         | 
| 48 | 
            -
                                        "log_id": "onnxruntime-genai.prompt-processor",
         | 
| 49 | 
            -
                                        "provider_options": [
         | 
| 50 | 
            -
                                            {
         | 
| 51 | 
            -
                                                "qnn": {
         | 
| 52 | 
            -
                                                    "backend_path": "QnnHtp.dll",
         | 
| 53 | 
            -
                                                    "htp_performance_mode": "burst",
         | 
| 54 | 
            -
                                                    "htp_graph_finalization_optimization_mode": "3",
         | 
| 55 | 
            -
                                                    "soc_model": "60"
         | 
| 56 | 
            -
                                                }
         | 
| 57 | 
            -
                                            }
         | 
| 58 | 
            -
                                        ],
         | 
| 59 | 
            -
                                        "intra_op_num_threads": 12,
         | 
| 60 | 
            -
                                        "inter_op_num_threads": 1
         | 
| 61 | 
            -
                                    },
         | 
| 62 | 
            -
                                    "inputs": [
         | 
| 63 | 
            -
                                        "past_keys_0",
         | 
| 64 | 
            -
                                        "past_values_0",
         | 
| 65 | 
            -
                                        "past_keys_1",
         | 
| 66 | 
            -
                                        "past_values_1",
         | 
| 67 | 
            -
                                        "past_keys_2",
         | 
| 68 | 
            -
                                        "past_values_2",
         | 
| 69 | 
            -
                                        "past_keys_3",
         | 
| 70 | 
            -
                                        "past_values_3",
         | 
| 71 | 
            -
                                        "past_keys_4",
         | 
| 72 | 
            -
                                        "past_values_4",
         | 
| 73 | 
            -
                                        "past_keys_5",
         | 
| 74 | 
            -
                                        "past_values_5",
         | 
| 75 | 
            -
                                        "past_keys_6",
         | 
| 76 | 
            -
                                        "past_values_6",
         | 
| 77 | 
            -
                                        "past_keys_7",
         | 
| 78 | 
            -
                                        "past_values_7",
         | 
| 79 | 
            -
                                        "past_keys_8",
         | 
| 80 | 
            -
                                        "past_values_8",
         | 
| 81 | 
            -
                                        "past_keys_9",
         | 
| 82 | 
            -
                                        "past_values_9",
         | 
| 83 | 
            -
                                        "past_keys_10",
         | 
| 84 | 
            -
                                        "past_values_10",
         | 
| 85 | 
            -
                                        "past_keys_11",
         | 
| 86 | 
            -
                                        "past_values_11",
         | 
| 87 | 
            -
                                        "past_keys_12",
         | 
| 88 | 
            -
                                        "past_values_12",
         | 
| 89 | 
            -
                                        "past_keys_13",
         | 
| 90 | 
            -
                                        "past_values_13",
         | 
| 91 | 
            -
                                        "past_keys_14",
         | 
| 92 | 
            -
                                        "past_values_14",
         | 
| 93 | 
            -
                                        "past_keys_15",
         | 
| 94 | 
            -
                                        "past_values_15",
         | 
| 95 | 
            -
                                        "past_keys_16",
         | 
| 96 | 
            -
                                        "past_values_16",
         | 
| 97 | 
            -
                                        "past_keys_17",
         | 
| 98 | 
            -
                                        "past_values_17",
         | 
| 99 | 
            -
                                        "past_keys_18",
         | 
| 100 | 
            -
                                        "past_values_18",
         | 
| 101 | 
            -
                                        "past_keys_19",
         | 
| 102 | 
            -
                                        "past_values_19",
         | 
| 103 | 
            -
                                        "past_keys_20",
         | 
| 104 | 
            -
                                        "past_values_20",
         | 
| 105 | 
            -
                                        "past_keys_21",
         | 
| 106 | 
            -
                                        "past_values_21",
         | 
| 107 | 
            -
                                        "past_keys_22",
         | 
| 108 | 
            -
                                        "past_values_22",
         | 
| 109 | 
            -
                                        "past_keys_23",
         | 
| 110 | 
            -
                                        "past_values_23",
         | 
| 111 | 
            -
                                        "past_keys_24",
         | 
| 112 | 
            -
                                        "past_values_24",
         | 
| 113 | 
            -
                                        "past_keys_25",
         | 
| 114 | 
            -
                                        "past_values_25",
         | 
| 115 | 
            -
                                        "past_keys_26",
         | 
| 116 | 
            -
                                        "past_values_26",
         | 
| 117 | 
            -
                                        "past_keys_27",
         | 
| 118 | 
            -
                                        "past_values_27",
         | 
| 119 | 
            -
                                        "past_keys_28",
         | 
| 120 | 
            -
                                        "past_values_28",
         | 
| 121 | 
            -
                                        "past_keys_29",
         | 
| 122 | 
            -
                                        "past_values_29",
         | 
| 123 | 
            -
                                        "past_keys_30",
         | 
| 124 | 
            -
                                        "past_values_30",
         | 
| 125 | 
            -
                                        "past_keys_31",
         | 
| 126 | 
            -
                                        "past_values_31",
         | 
| 127 | 
            -
                                        "input_hidden_states",
         | 
| 128 | 
            -
                                        "past_seq_len",
         | 
| 129 | 
            -
                                        "total_seq_len"
         | 
| 130 | 
            -
                                    ],
         | 
| 131 | 
            -
                                    "outputs": [
         | 
| 132 | 
            -
                                        "output_hidden_states",
         | 
| 133 | 
            -
                                        "present_keys_0",
         | 
| 134 | 
            -
                                        "present_values_0",
         | 
| 135 | 
            -
                                        "present_keys_1",
         | 
| 136 | 
            -
                                        "present_values_1",
         | 
| 137 | 
            -
                                        "present_keys_2",
         | 
| 138 | 
            -
                                        "present_values_2",
         | 
| 139 | 
            -
                                        "present_keys_3",
         | 
| 140 | 
            -
                                        "present_values_3",
         | 
| 141 | 
            -
                                        "present_keys_4",
         | 
| 142 | 
            -
                                        "present_values_4",
         | 
| 143 | 
            -
                                        "present_keys_5",
         | 
| 144 | 
            -
                                        "present_values_5",
         | 
| 145 | 
            -
                                        "present_keys_6",
         | 
| 146 | 
            -
                                        "present_values_6",
         | 
| 147 | 
            -
                                        "present_keys_7",
         | 
| 148 | 
            -
                                        "present_values_7",
         | 
| 149 | 
            -
                                        "present_keys_8",
         | 
| 150 | 
            -
                                        "present_values_8",
         | 
| 151 | 
            -
                                        "present_keys_9",
         | 
| 152 | 
            -
                                        "present_values_9",
         | 
| 153 | 
            -
                                        "present_keys_10",
         | 
| 154 | 
            -
                                        "present_values_10",
         | 
| 155 | 
            -
                                        "present_keys_11",
         | 
| 156 | 
            -
                                        "present_values_11",
         | 
| 157 | 
            -
                                        "present_keys_12",
         | 
| 158 | 
            -
                                        "present_values_12",
         | 
| 159 | 
            -
                                        "present_keys_13",
         | 
| 160 | 
            -
                                        "present_values_13",
         | 
| 161 | 
            -
                                        "present_keys_14",
         | 
| 162 | 
            -
                                        "present_values_14",
         | 
| 163 | 
            -
                                        "present_keys_15",
         | 
| 164 | 
            -
                                        "present_values_15",
         | 
| 165 | 
            -
                                        "present_keys_16",
         | 
| 166 | 
            -
                                        "present_values_16",
         | 
| 167 | 
            -
                                        "present_keys_17",
         | 
| 168 | 
            -
                                        "present_values_17",
         | 
| 169 | 
            -
                                        "present_keys_18",
         | 
| 170 | 
            -
                                        "present_values_18",
         | 
| 171 | 
            -
                                        "present_keys_19",
         | 
| 172 | 
            -
                                        "present_values_19",
         | 
| 173 | 
            -
                                        "present_keys_20",
         | 
| 174 | 
            -
                                        "present_values_20",
         | 
| 175 | 
            -
                                        "present_keys_21",
         | 
| 176 | 
            -
                                        "present_values_21",
         | 
| 177 | 
            -
                                        "present_keys_22",
         | 
| 178 | 
            -
                                        "present_values_22",
         | 
| 179 | 
            -
                                        "present_keys_23",
         | 
| 180 | 
            -
                                        "present_values_23",
         | 
| 181 | 
            -
                                        "present_keys_24",
         | 
| 182 | 
            -
                                        "present_values_24",
         | 
| 183 | 
            -
                                        "present_keys_25",
         | 
| 184 | 
            -
                                        "present_values_25",
         | 
| 185 | 
            -
                                        "present_keys_26",
         | 
| 186 | 
            -
                                        "present_values_26",
         | 
| 187 | 
            -
                                        "present_keys_27",
         | 
| 188 | 
            -
                                        "present_values_27",
         | 
| 189 | 
            -
                                        "present_keys_28",
         | 
| 190 | 
            -
                                        "present_values_28",
         | 
| 191 | 
            -
                                        "present_keys_29",
         | 
| 192 | 
            -
                                        "present_values_29",
         | 
| 193 | 
            -
                                        "present_keys_30",
         | 
| 194 | 
            -
                                        "present_values_30",
         | 
| 195 | 
            -
                                        "present_keys_31",
         | 
| 196 | 
            -
                                        "present_values_31"
         | 
| 197 | 
            -
                                    ],
         | 
| 198 | 
            -
                                    "run_on_token_gen": false
         | 
| 199 | 
            -
                                },
         | 
| 200 | 
            -
                                "token-generator": {
         | 
| 201 | 
            -
                                    "filename": "phi_4_mini_iter.onnx_ctx.onnx",
         | 
| 202 | 
            -
                                    "session_options": {
         | 
| 203 | 
            -
                                        "log_id": "onnxruntime-genai.token-generator",
         | 
| 204 | 
            -
                                        "provider_options": [
         | 
| 205 | 
            -
                                            {
         | 
| 206 | 
            -
                                                "qnn": {
         | 
| 207 | 
            -
                                                    "backend_path": "QnnHtp.dll",
         | 
| 208 | 
            -
                                                    "htp_performance_mode": "burst",
         | 
| 209 | 
            -
                                                    "htp_graph_finalization_optimization_mode": "3",
         | 
| 210 | 
            -
                                                    "soc_model": "60"
         | 
| 211 | 
            -
                                                }
         | 
| 212 | 
            -
                                            }
         | 
| 213 | 
            -
                                        ],
         | 
| 214 | 
            -
                                        "intra_op_num_threads": 12,
         | 
| 215 | 
            -
                                        "inter_op_num_threads": 1
         | 
| 216 | 
            -
                                    },
         | 
| 217 | 
            -
                                    "inputs": [
         | 
| 218 | 
            -
                                        "past_keys_0",
         | 
| 219 | 
            -
                                        "past_values_0",
         | 
| 220 | 
            -
                                        "past_keys_1",
         | 
| 221 | 
            -
                                        "past_values_1",
         | 
| 222 | 
            -
                                        "past_keys_2",
         | 
| 223 | 
            -
                                        "past_values_2",
         | 
| 224 | 
            -
                                        "past_keys_3",
         | 
| 225 | 
            -
                                        "past_values_3",
         | 
| 226 | 
            -
                                        "past_keys_4",
         | 
| 227 | 
            -
                                        "past_values_4",
         | 
| 228 | 
            -
                                        "past_keys_5",
         | 
| 229 | 
            -
                                        "past_values_5",
         | 
| 230 | 
            -
                                        "past_keys_6",
         | 
| 231 | 
            -
                                        "past_values_6",
         | 
| 232 | 
            -
                                        "past_keys_7",
         | 
| 233 | 
            -
                                        "past_values_7",
         | 
| 234 | 
            -
                                        "past_keys_8",
         | 
| 235 | 
            -
                                        "past_values_8",
         | 
| 236 | 
            -
                                        "past_keys_9",
         | 
| 237 | 
            -
                                        "past_values_9",
         | 
| 238 | 
            -
                                        "past_keys_10",
         | 
| 239 | 
            -
                                        "past_values_10",
         | 
| 240 | 
            -
                                        "past_keys_11",
         | 
| 241 | 
            -
                                        "past_values_11",
         | 
| 242 | 
            -
                                        "past_keys_12",
         | 
| 243 | 
            -
                                        "past_values_12",
         | 
| 244 | 
            -
                                        "past_keys_13",
         | 
| 245 | 
            -
                                        "past_values_13",
         | 
| 246 | 
            -
                                        "past_keys_14",
         | 
| 247 | 
            -
                                        "past_values_14",
         | 
| 248 | 
            -
                                        "past_keys_15",
         | 
| 249 | 
            -
                                        "past_values_15",
         | 
| 250 | 
            -
                                        "past_keys_16",
         | 
| 251 | 
            -
                                        "past_values_16",
         | 
| 252 | 
            -
                                        "past_keys_17",
         | 
| 253 | 
            -
                                        "past_values_17",
         | 
| 254 | 
            -
                                        "past_keys_18",
         | 
| 255 | 
            -
                                        "past_values_18",
         | 
| 256 | 
            -
                                        "past_keys_19",
         | 
| 257 | 
            -
                                        "past_values_19",
         | 
| 258 | 
            -
                                        "past_keys_20",
         | 
| 259 | 
            -
                                        "past_values_20",
         | 
| 260 | 
            -
                                        "past_keys_21",
         | 
| 261 | 
            -
                                        "past_values_21",
         | 
| 262 | 
            -
                                        "past_keys_22",
         | 
| 263 | 
            -
                                        "past_values_22",
         | 
| 264 | 
            -
                                        "past_keys_23",
         | 
| 265 | 
            -
                                        "past_values_23",
         | 
| 266 | 
            -
                                        "past_keys_24",
         | 
| 267 | 
            -
                                        "past_values_24",
         | 
| 268 | 
            -
                                        "past_keys_25",
         | 
| 269 | 
            -
                                        "past_values_25",
         | 
| 270 | 
            -
                                        "past_keys_26",
         | 
| 271 | 
            -
                                        "past_values_26",
         | 
| 272 | 
            -
                                        "past_keys_27",
         | 
| 273 | 
            -
                                        "past_values_27",
         | 
| 274 | 
            -
                                        "past_keys_28",
         | 
| 275 | 
            -
                                        "past_values_28",
         | 
| 276 | 
            -
                                        "past_keys_29",
         | 
| 277 | 
            -
                                        "past_values_29",
         | 
| 278 | 
            -
                                        "past_keys_30",
         | 
| 279 | 
            -
                                        "past_values_30",
         | 
| 280 | 
            -
                                        "past_keys_31",
         | 
| 281 | 
            -
                                        "past_values_31",
         | 
| 282 | 
            -
                                        "input_hidden_states",
         | 
| 283 | 
            -
                                        "past_seq_len",
         | 
| 284 | 
            -
                                        "total_seq_len"
         | 
| 285 | 
            -
                                    ],
         | 
| 286 | 
            -
                                    "outputs": [
         | 
| 287 | 
            -
                                        "output_hidden_states",
         | 
| 288 | 
            -
                                        "present_keys_0",
         | 
| 289 | 
            -
                                        "present_values_0",
         | 
| 290 | 
            -
                                        "present_keys_1",
         | 
| 291 | 
            -
                                        "present_values_1",
         | 
| 292 | 
            -
                                        "present_keys_2",
         | 
| 293 | 
            -
                                        "present_values_2",
         | 
| 294 | 
            -
                                        "present_keys_3",
         | 
| 295 | 
            -
                                        "present_values_3",
         | 
| 296 | 
            -
                                        "present_keys_4",
         | 
| 297 | 
            -
                                        "present_values_4",
         | 
| 298 | 
            -
                                        "present_keys_5",
         | 
| 299 | 
            -
                                        "present_values_5",
         | 
| 300 | 
            -
                                        "present_keys_6",
         | 
| 301 | 
            -
                                        "present_values_6",
         | 
| 302 | 
            -
                                        "present_keys_7",
         | 
| 303 | 
            -
                                        "present_values_7",
         | 
| 304 | 
            -
                                        "present_keys_8",
         | 
| 305 | 
            -
                                        "present_values_8",
         | 
| 306 | 
            -
                                        "present_keys_9",
         | 
| 307 | 
            -
                                        "present_values_9",
         | 
| 308 | 
            -
                                        "present_keys_10",
         | 
| 309 | 
            -
                                        "present_values_10",
         | 
| 310 | 
            -
                                        "present_keys_11",
         | 
| 311 | 
            -
                                        "present_values_11",
         | 
| 312 | 
            -
                                        "present_keys_12",
         | 
| 313 | 
            -
                                        "present_values_12",
         | 
| 314 | 
            -
                                        "present_keys_13",
         | 
| 315 | 
            -
                                        "present_values_13",
         | 
| 316 | 
            -
                                        "present_keys_14",
         | 
| 317 | 
            -
                                        "present_values_14",
         | 
| 318 | 
            -
                                        "present_keys_15",
         | 
| 319 | 
            -
                                        "present_values_15",
         | 
| 320 | 
            -
                                        "present_keys_16",
         | 
| 321 | 
            -
                                        "present_values_16",
         | 
| 322 | 
            -
                                        "present_keys_17",
         | 
| 323 | 
            -
                                        "present_values_17",
         | 
| 324 | 
            -
                                        "present_keys_18",
         | 
| 325 | 
            -
                                        "present_values_18",
         | 
| 326 | 
            -
                                        "present_keys_19",
         | 
| 327 | 
            -
                                        "present_values_19",
         | 
| 328 | 
            -
                                        "present_keys_20",
         | 
| 329 | 
            -
                                        "present_values_20",
         | 
| 330 | 
            -
                                        "present_keys_21",
         | 
| 331 | 
            -
                                        "present_values_21",
         | 
| 332 | 
            -
                                        "present_keys_22",
         | 
| 333 | 
            -
                                        "present_values_22",
         | 
| 334 | 
            -
                                        "present_keys_23",
         | 
| 335 | 
            -
                                        "present_values_23",
         | 
| 336 | 
            -
                                        "present_keys_24",
         | 
| 337 | 
            -
                                        "present_values_24",
         | 
| 338 | 
            -
                                        "present_keys_25",
         | 
| 339 | 
            -
                                        "present_values_25",
         | 
| 340 | 
            -
                                        "present_keys_26",
         | 
| 341 | 
            -
                                        "present_values_26",
         | 
| 342 | 
            -
                                        "present_keys_27",
         | 
| 343 | 
            -
                                        "present_values_27",
         | 
| 344 | 
            -
                                        "present_keys_28",
         | 
| 345 | 
            -
                                        "present_values_28",
         | 
| 346 | 
            -
                                        "present_keys_29",
         | 
| 347 | 
            -
                                        "present_values_29",
         | 
| 348 | 
            -
                                        "present_keys_30",
         | 
| 349 | 
            -
                                        "present_values_30",
         | 
| 350 | 
            -
                                        "present_keys_31",
         | 
| 351 | 
            -
                                        "present_values_31"
         | 
| 352 | 
            -
                                    ],
         | 
| 353 | 
            -
                                    "run_on_prompt": false
         | 
| 354 | 
            -
                                },
         | 
| 355 | 
            -
                                "transformer-head": {
         | 
| 356 | 
            -
                                    "filename": "phi_4_mini_lm_head.all.quant.onnx",
         | 
| 357 | 
            -
                                    "inputs": [
         | 
| 358 | 
            -
                                        "output_hidden_states"
         | 
| 359 | 
            -
                                    ],
         | 
| 360 | 
            -
                                    "outputs": [
         | 
| 361 | 
            -
                                        "logits"
         | 
| 362 | 
            -
                                    ]
         | 
| 363 | 
            -
                                }
         | 
| 364 | 
            -
                            }
         | 
| 365 | 
            -
                        ]
         | 
| 366 | 
            -
                    },
         | 
| 367 | 
            -
                    "eos_token_id": [
         | 
| 368 | 
            -
                        200020,
         | 
| 369 | 
            -
                        199999
         | 
| 370 | 
            -
                    ],
         | 
| 371 | 
            -
                    "pad_token_id": 199999,
         | 
| 372 | 
            -
                    "type": "decoder-pipeline",
         | 
| 373 | 
            -
                    "vocab_size": 200064
         | 
| 374 | 
            -
                },
         | 
| 375 | 
            -
                "search": {
         | 
| 376 | 
            -
                    "diversity_penalty": 0.0,
         | 
| 377 | 
            -
                    "do_sample": true,
         | 
| 378 | 
            -
                    "early_stopping": true,
         | 
| 379 | 
            -
                    "length_penalty": 1.0,
         | 
| 380 | 
            -
                    "max_length": 4096,
         | 
| 381 | 
            -
                    "min_length": 0,
         | 
| 382 | 
            -
                    "no_repeat_ngram_size": 0,
         | 
| 383 | 
            -
                    "num_beams": 1,
         | 
| 384 | 
            -
                    "num_return_sequences": 1,
         | 
| 385 | 
            -
                    "past_present_share_buffer": true,
         | 
| 386 | 
            -
                    "repetition_penalty": 1.0,
         | 
| 387 | 
            -
                    "temperature": 0.6,
         | 
| 388 | 
            -
                    "top_k": 5,
         | 
| 389 | 
            -
                    "top_p": 0.95
         | 
| 390 | 
            -
                }
         | 
| 391 | 
            -
            }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/special_tokens_map.json
    DELETED
    
    | @@ -1,30 +0,0 @@ | |
| 1 | 
            -
            {
         | 
| 2 | 
            -
              "bos_token": {
         | 
| 3 | 
            -
                "content": "<|endoftext|>",
         | 
| 4 | 
            -
                "lstrip": false,
         | 
| 5 | 
            -
                "normalized": false,
         | 
| 6 | 
            -
                "rstrip": false,
         | 
| 7 | 
            -
                "single_word": false
         | 
| 8 | 
            -
              },
         | 
| 9 | 
            -
              "eos_token": {
         | 
| 10 | 
            -
                "content": "<|endoftext|>",
         | 
| 11 | 
            -
                "lstrip": false,
         | 
| 12 | 
            -
                "normalized": false,
         | 
| 13 | 
            -
                "rstrip": false,
         | 
| 14 | 
            -
                "single_word": false
         | 
| 15 | 
            -
              },
         | 
| 16 | 
            -
              "pad_token": {
         | 
| 17 | 
            -
                "content": "<|endoftext|>",
         | 
| 18 | 
            -
                "lstrip": false,
         | 
| 19 | 
            -
                "normalized": false,
         | 
| 20 | 
            -
                "rstrip": false,
         | 
| 21 | 
            -
                "single_word": false
         | 
| 22 | 
            -
              },
         | 
| 23 | 
            -
              "unk_token": {
         | 
| 24 | 
            -
                "content": "<|endoftext|>",
         | 
| 25 | 
            -
                "lstrip": false,
         | 
| 26 | 
            -
                "normalized": false,
         | 
| 27 | 
            -
                "rstrip": false,
         | 
| 28 | 
            -
                "single_word": false
         | 
| 29 | 
            -
              }
         | 
| 30 | 
            -
            }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer_config.json
    DELETED
    
    | @@ -1,116 +0,0 @@ | |
| 1 | 
            -
            {
         | 
| 2 | 
            -
              "add_bos_token": false,
         | 
| 3 | 
            -
              "add_eos_token": false,
         | 
| 4 | 
            -
              "add_prefix_space": false,
         | 
| 5 | 
            -
              "added_tokens_decoder": {
         | 
| 6 | 
            -
                "199999": {
         | 
| 7 | 
            -
                  "content": "<|endoftext|>",
         | 
| 8 | 
            -
                  "lstrip": false,
         | 
| 9 | 
            -
                  "normalized": false,
         | 
| 10 | 
            -
                  "rstrip": false,
         | 
| 11 | 
            -
                  "single_word": false,
         | 
| 12 | 
            -
                  "special": true
         | 
| 13 | 
            -
                },
         | 
| 14 | 
            -
                "200018": {
         | 
| 15 | 
            -
                  "content": "<|endofprompt|>",
         | 
| 16 | 
            -
                  "lstrip": false,
         | 
| 17 | 
            -
                  "normalized": false,
         | 
| 18 | 
            -
                  "rstrip": false,
         | 
| 19 | 
            -
                  "single_word": false,
         | 
| 20 | 
            -
                  "special": true
         | 
| 21 | 
            -
                },
         | 
| 22 | 
            -
                "200019": {
         | 
| 23 | 
            -
                  "content": "<|assistant|>",
         | 
| 24 | 
            -
                  "lstrip": false,
         | 
| 25 | 
            -
                  "normalized": false,
         | 
| 26 | 
            -
                  "rstrip": true,
         | 
| 27 | 
            -
                  "single_word": false,
         | 
| 28 | 
            -
                  "special": true
         | 
| 29 | 
            -
                },
         | 
| 30 | 
            -
                "200020": {
         | 
| 31 | 
            -
                  "content": "<|end|>",
         | 
| 32 | 
            -
                  "lstrip": false,
         | 
| 33 | 
            -
                  "normalized": false,
         | 
| 34 | 
            -
                  "rstrip": true,
         | 
| 35 | 
            -
                  "single_word": false,
         | 
| 36 | 
            -
                  "special": true
         | 
| 37 | 
            -
                },
         | 
| 38 | 
            -
                "200021": {
         | 
| 39 | 
            -
                  "content": "<|user|>",
         | 
| 40 | 
            -
                  "lstrip": false,
         | 
| 41 | 
            -
                  "normalized": false,
         | 
| 42 | 
            -
                  "rstrip": true,
         | 
| 43 | 
            -
                  "single_word": false,
         | 
| 44 | 
            -
                  "special": true
         | 
| 45 | 
            -
                },
         | 
| 46 | 
            -
                "200022": {
         | 
| 47 | 
            -
                  "content": "<|system|>",
         | 
| 48 | 
            -
                  "lstrip": false,
         | 
| 49 | 
            -
                  "normalized": false,
         | 
| 50 | 
            -
                  "rstrip": true,
         | 
| 51 | 
            -
                  "single_word": false,
         | 
| 52 | 
            -
                  "special": true
         | 
| 53 | 
            -
                },
         | 
| 54 | 
            -
                "200023": {
         | 
| 55 | 
            -
                  "content": "<|tool|>",
         | 
| 56 | 
            -
                  "lstrip": false,
         | 
| 57 | 
            -
                  "normalized": false,
         | 
| 58 | 
            -
                  "rstrip": true,
         | 
| 59 | 
            -
                  "single_word": false,
         | 
| 60 | 
            -
                  "special": false
         | 
| 61 | 
            -
                },
         | 
| 62 | 
            -
                "200024": {
         | 
| 63 | 
            -
                  "content": "<|/tool|>",
         | 
| 64 | 
            -
                  "lstrip": false,
         | 
| 65 | 
            -
                  "normalized": false,
         | 
| 66 | 
            -
                  "rstrip": true,
         | 
| 67 | 
            -
                  "single_word": false,
         | 
| 68 | 
            -
                  "special": false
         | 
| 69 | 
            -
                },
         | 
| 70 | 
            -
                "200025": {
         | 
| 71 | 
            -
                  "content": "<|tool_call|>",
         | 
| 72 | 
            -
                  "lstrip": false,
         | 
| 73 | 
            -
                  "normalized": false,
         | 
| 74 | 
            -
                  "rstrip": true,
         | 
| 75 | 
            -
                  "single_word": false,
         | 
| 76 | 
            -
                  "special": false
         | 
| 77 | 
            -
                },
         | 
| 78 | 
            -
                "200026": {
         | 
| 79 | 
            -
                  "content": "<|/tool_call|>",
         | 
| 80 | 
            -
                  "lstrip": false,
         | 
| 81 | 
            -
                  "normalized": false,
         | 
| 82 | 
            -
                  "rstrip": true,
         | 
| 83 | 
            -
                  "single_word": false,
         | 
| 84 | 
            -
                  "special": false
         | 
| 85 | 
            -
                },
         | 
| 86 | 
            -
                "200027": {
         | 
| 87 | 
            -
                  "content": "<|tool_response|>",
         | 
| 88 | 
            -
                  "lstrip": false,
         | 
| 89 | 
            -
                  "normalized": false,
         | 
| 90 | 
            -
                  "rstrip": true,
         | 
| 91 | 
            -
                  "single_word": false,
         | 
| 92 | 
            -
                  "special": false
         | 
| 93 | 
            -
                },
         | 
| 94 | 
            -
                "200028": {
         | 
| 95 | 
            -
                  "content": "<|tag|>",
         | 
| 96 | 
            -
                  "lstrip": false,
         | 
| 97 | 
            -
                  "normalized": false,
         | 
| 98 | 
            -
                  "rstrip": true,
         | 
| 99 | 
            -
                  "single_word": false,
         | 
| 100 | 
            -
                  "special": true
         | 
| 101 | 
            -
                }
         | 
| 102 | 
            -
              },
         | 
| 103 | 
            -
              "bos_token": "<|endoftext|>",
         | 
| 104 | 
            -
              "chat_template": "{{ '<|system|>Your name is Phi, an AI math expert developed by Microsoft.' }}{% for message in messages %}{% if message['role'] == 'system' %} {{ message['content'] }}{% endif %}{% endfor %}{{ '<|end|>' }}{% for message in messages %}{% if message['role'] != 'system' %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
         | 
| 105 | 
            -
              "clean_up_tokenization_spaces": false,
         | 
| 106 | 
            -
              "eos_token": "<|endoftext|>",
         | 
| 107 | 
            -
              "extra_special_tokens": {},
         | 
| 108 | 
            -
              "max_length": 1024,
         | 
| 109 | 
            -
              "model_max_length": 128000,
         | 
| 110 | 
            -
              "pad_token": "<|endoftext|>",
         | 
| 111 | 
            -
              "stride": 0,
         | 
| 112 | 
            -
              "tokenizer_class": "GPT2Tokenizer",
         | 
| 113 | 
            -
              "truncation_side": "right",
         | 
| 114 | 
            -
              "truncation_strategy": "longest_first",
         | 
| 115 | 
            -
              "unk_token": "<|endoftext|>"
         | 
| 116 | 
            -
            }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/vocab.json
    DELETED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
