Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- README.md +40 -0
- chat_template.jinja +1 -0
- config.json +1 -0
- genai_config.json +56 -0
- model_jit.bin +3 -0
- model_jit.onnx +3 -0
- model_jit.onnx.data +3 -0
- model_jit.pb.bin +3 -0
- rai_config.json +11 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +43 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            model_jit.onnx.data filter=lfs diff=lfs merge=lfs -text
         | 
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,40 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            language:
         | 
| 3 | 
            +
            - zh
         | 
| 4 | 
            +
            - en
         | 
| 5 | 
            +
            tags:
         | 
| 6 | 
            +
            - glm
         | 
| 7 | 
            +
            - chatglm
         | 
| 8 | 
            +
            - thudm
         | 
| 9 | 
            +
            - ryzenai-npu
         | 
| 10 | 
            +
            base_model: THUDM/chatglm3-6b
         | 
| 11 | 
            +
            ---
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # chatglm3-6b
         | 
| 14 | 
            +
            - ## Introduction
         | 
| 15 | 
            +
              This model was created using Quark Quantization, followed by OGA Model Builder, and finalized with post-processing for NPU deployment.
         | 
| 16 | 
            +
            - ## Quantization Strategy
         | 
| 17 | 
            +
              - AWQ / Group 128 / Asymmetric / BF16 activations / UINT4 weights
         | 
| 18 | 
            +
                
         | 
| 19 | 
            +
            - ## Quick Start
         | 
| 20 | 
            +
            For quickstart, refer to [Ryzen AI doucmentation](https://ryzenai.docs.amd.com/en/latest/npu_oga.html)
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            #### Evaluation scores
         | 
| 23 | 
            +
            The perplexity measurement is run on the wikitext-2-raw-v1 (raw data) dataset provided by Hugging Face. Perplexity score measured for prompt length 2k is 29.81679.
         | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
             | 
| 27 | 
            +
            #### License
         | 
| 28 | 
            +
            Modifications copyright(c) 2024 Advanced Micro Devices,Inc. All rights reserved.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 31 | 
            +
            you may not use this file except in compliance with the License.
         | 
| 32 | 
            +
            You may obtain a copy of the License at
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                http://www.apache.org/licenses/LICENSE-2.0
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            Unless required by applicable law or agreed to in writing, software
         | 
| 37 | 
            +
            distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 38 | 
            +
            WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 39 | 
            +
            See the License for the specific language governing permissions and
         | 
| 40 | 
            +
            limitations under the License.
         | 
    	
        chat_template.jinja
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {}
         | 
    	
        genai_config.json
    ADDED
    
    | @@ -0,0 +1,56 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "model": {
         | 
| 3 | 
            +
                    "bos_token_id": 1,
         | 
| 4 | 
            +
                    "context_length": 4096,
         | 
| 5 | 
            +
                    "decoder": {
         | 
| 6 | 
            +
                        "session_options": {
         | 
| 7 | 
            +
                            "log_id": "onnxruntime-genai",
         | 
| 8 | 
            +
                            "custom_ops_library": "onnx_custom_ops.dll",
         | 
| 9 | 
            +
                            "custom_allocator": "shared_d3d_xrt",
         | 
| 10 | 
            +
                            "external_data_file": "model_jit.pb.bin",
         | 
| 11 | 
            +
                            "config_entries": {
         | 
| 12 | 
            +
                                "hybrid_opt_max_seq_length": "4096"
         | 
| 13 | 
            +
                            },
         | 
| 14 | 
            +
                            "provider_options": []
         | 
| 15 | 
            +
                        },
         | 
| 16 | 
            +
                        "filename": "model_jit.onnx",
         | 
| 17 | 
            +
                        "head_size": 128,
         | 
| 18 | 
            +
                        "hidden_size": 4096,
         | 
| 19 | 
            +
                        "inputs": {
         | 
| 20 | 
            +
                            "input_ids": "input_ids",
         | 
| 21 | 
            +
                            "attention_mask": "attention_mask",
         | 
| 22 | 
            +
                            "position_ids": "position_ids",
         | 
| 23 | 
            +
                            "past_key_names": "past_key_values.%d.key",
         | 
| 24 | 
            +
                            "past_value_names": "past_key_values.%d.value"
         | 
| 25 | 
            +
                        },
         | 
| 26 | 
            +
                        "outputs": {
         | 
| 27 | 
            +
                            "logits": "logits",
         | 
| 28 | 
            +
                            "present_key_names": "present.%d.key",
         | 
| 29 | 
            +
                            "present_value_names": "present.%d.value"
         | 
| 30 | 
            +
                        },
         | 
| 31 | 
            +
                        "num_attention_heads": 32,
         | 
| 32 | 
            +
                        "num_hidden_layers": 32,
         | 
| 33 | 
            +
                        "num_key_value_heads": 32
         | 
| 34 | 
            +
                    },
         | 
| 35 | 
            +
                    "eos_token_id": 2,
         | 
| 36 | 
            +
                    "pad_token_id": 0,
         | 
| 37 | 
            +
                    "type": "llama",
         | 
| 38 | 
            +
                    "vocab_size": 32000
         | 
| 39 | 
            +
                },
         | 
| 40 | 
            +
                "search": {
         | 
| 41 | 
            +
                    "diversity_penalty": 0.0,
         | 
| 42 | 
            +
                    "do_sample": true,
         | 
| 43 | 
            +
                    "early_stopping": true,
         | 
| 44 | 
            +
                    "length_penalty": 1.0,
         | 
| 45 | 
            +
                    "max_length": 4096,
         | 
| 46 | 
            +
                    "min_length": 0,
         | 
| 47 | 
            +
                    "no_repeat_ngram_size": 0,
         | 
| 48 | 
            +
                    "num_beams": 1,
         | 
| 49 | 
            +
                    "num_return_sequences": 1,
         | 
| 50 | 
            +
                    "past_present_share_buffer": true,
         | 
| 51 | 
            +
                    "repetition_penalty": 1.0,
         | 
| 52 | 
            +
                    "temperature": 0.6,
         | 
| 53 | 
            +
                    "top_k": 50,
         | 
| 54 | 
            +
                    "top_p": 0.9
         | 
| 55 | 
            +
                }
         | 
| 56 | 
            +
            }
         | 
    	
        model_jit.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:2759285c33fe9a1c013ba1322f7f10f00d7cc9c3e7c26eee674d2685ba85089f
         | 
| 3 | 
            +
            size 3604657152
         | 
    	
        model_jit.onnx
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:197bd1512101627ed982f1f45a822eec7f1c25a4b96d90af4ba6e83f633f79f1
         | 
| 3 | 
            +
            size 348704
         | 
    	
        model_jit.onnx.data
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:93b20dc14dfa96bd545a5ab219d413d5498058931e980adf189c01552329b582
         | 
| 3 | 
            +
            size 3697420800
         | 
    	
        model_jit.pb.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:499a0bcbc008b9c32160adfa56343b4fc4a51fb05023de99486d6cbc80cfc939
         | 
| 3 | 
            +
            size 7918
         | 
    	
        rai_config.json
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "max_prompt_length": {
         | 
| 3 | 
            +
                    "1.3.1": 2048,
         | 
| 4 | 
            +
                    "1.4.0": 2048,
         | 
| 5 | 
            +
                    "1.4.1": 2048,
         | 
| 6 | 
            +
                    "1.5.0": 2048,
         | 
| 7 | 
            +
                    "1.5.1" : 2048,
         | 
| 8 | 
            +
                    "1.6.0": 4096
         | 
| 9 | 
            +
                }
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            }
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "bos_token": {
         | 
| 3 | 
            +
                "content": "<s>",
         | 
| 4 | 
            +
                "lstrip": false,
         | 
| 5 | 
            +
                "normalized": false,
         | 
| 6 | 
            +
                "rstrip": false,
         | 
| 7 | 
            +
                "single_word": false
         | 
| 8 | 
            +
              },
         | 
| 9 | 
            +
              "eos_token": {
         | 
| 10 | 
            +
                "content": "</s>",
         | 
| 11 | 
            +
                "lstrip": false,
         | 
| 12 | 
            +
                "normalized": false,
         | 
| 13 | 
            +
                "rstrip": false,
         | 
| 14 | 
            +
                "single_word": false
         | 
| 15 | 
            +
              },
         | 
| 16 | 
            +
              "pad_token": {
         | 
| 17 | 
            +
                "content": "</s>",
         | 
| 18 | 
            +
                "lstrip": false,
         | 
| 19 | 
            +
                "normalized": false,
         | 
| 20 | 
            +
                "rstrip": false,
         | 
| 21 | 
            +
                "single_word": false
         | 
| 22 | 
            +
              },
         | 
| 23 | 
            +
              "unk_token": {
         | 
| 24 | 
            +
                "content": "<unk>",
         | 
| 25 | 
            +
                "lstrip": false,
         | 
| 26 | 
            +
                "normalized": false,
         | 
| 27 | 
            +
                "rstrip": false,
         | 
| 28 | 
            +
                "single_word": false
         | 
| 29 | 
            +
              }
         | 
| 30 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        tokenizer.model
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
         | 
| 3 | 
            +
            size 499723
         | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,43 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_bos_token": true,
         | 
| 3 | 
            +
              "add_eos_token": false,
         | 
| 4 | 
            +
              "add_prefix_space": null,
         | 
| 5 | 
            +
              "added_tokens_decoder": {
         | 
| 6 | 
            +
                "0": {
         | 
| 7 | 
            +
                  "content": "<unk>",
         | 
| 8 | 
            +
                  "lstrip": false,
         | 
| 9 | 
            +
                  "normalized": false,
         | 
| 10 | 
            +
                  "rstrip": false,
         | 
| 11 | 
            +
                  "single_word": false,
         | 
| 12 | 
            +
                  "special": true
         | 
| 13 | 
            +
                },
         | 
| 14 | 
            +
                "1": {
         | 
| 15 | 
            +
                  "content": "<s>",
         | 
| 16 | 
            +
                  "lstrip": false,
         | 
| 17 | 
            +
                  "normalized": false,
         | 
| 18 | 
            +
                  "rstrip": false,
         | 
| 19 | 
            +
                  "single_word": false,
         | 
| 20 | 
            +
                  "special": true
         | 
| 21 | 
            +
                },
         | 
| 22 | 
            +
                "2": {
         | 
| 23 | 
            +
                  "content": "</s>",
         | 
| 24 | 
            +
                  "lstrip": false,
         | 
| 25 | 
            +
                  "normalized": false,
         | 
| 26 | 
            +
                  "rstrip": false,
         | 
| 27 | 
            +
                  "single_word": false,
         | 
| 28 | 
            +
                  "special": true
         | 
| 29 | 
            +
                }
         | 
| 30 | 
            +
              },
         | 
| 31 | 
            +
              "bos_token": "<s>",
         | 
| 32 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 33 | 
            +
              "eos_token": "</s>",
         | 
| 34 | 
            +
              "extra_special_tokens": {},
         | 
| 35 | 
            +
              "legacy": false,
         | 
| 36 | 
            +
              "model_max_length": 1000000000000000019884624838656,
         | 
| 37 | 
            +
              "pad_token": "</s>",
         | 
| 38 | 
            +
              "padding_side": "left",
         | 
| 39 | 
            +
              "sp_model_kwargs": {},
         | 
| 40 | 
            +
              "tokenizer_class": "LlamaTokenizer",
         | 
| 41 | 
            +
              "unk_token": "<unk>",
         | 
| 42 | 
            +
              "use_default_system_prompt": false
         | 
| 43 | 
            +
            }
         | 

