Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- added_tokens.json +428 -0
- config.json +33 -0
- config_molmo.py +60 -0
- generation_config.json +6 -0
- global_step3600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- global_step3600/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- global_step3600/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- global_step3600/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- image_preprocessing_molmo.py +546 -0
- latest +1 -0
- merges.txt +0 -0
- model-00001-of-00004.safetensors +3 -0
- model-00002-of-00004.safetensors +3 -0
- model-00003-of-00004.safetensors +3 -0
- model-00004-of-00004.safetensors +3 -0
- model.safetensors.index.json +592 -0
- modeling_molmo.py +2367 -0
- preprocessing_molmo.py +192 -0
- preprocessor_config.json +32 -0
- processor_config.json +6 -0
- rng_state_0.pth +3 -0
- rng_state_1.pth +3 -0
- rng_state_2.pth +3 -0
- rng_state_3.pth +3 -0
- rng_state_4.pth +3 -0
- rng_state_5.pth +3 -0
- rng_state_6.pth +3 -0
- rng_state_7.pth +3 -0
- sft_args.json +302 -0
- special_tokens_map.json +435 -0
- tokenizer.json +3 -0
- tokenizer_config.json +3853 -0
- trainer_state.json +0 -0
- training_args.bin +3 -0
- vocab.json +0 -0
- zero_to_fp32.py +760 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            tokenizer.json filter=lfs diff=lfs merge=lfs -text
         | 
    	
        added_tokens.json
    ADDED
    
    | @@ -0,0 +1,428 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "<im_col>": 152067,
         | 
| 3 | 
            +
              "<im_end>": 152065,
         | 
| 4 | 
            +
              "<im_patch>": 152066,
         | 
| 5 | 
            +
              "<im_start>": 152064,
         | 
| 6 | 
            +
              "<|endoftext|>": 151643,
         | 
| 7 | 
            +
              "<|im_end|>": 151645,
         | 
| 8 | 
            +
              "<|im_start|>": 151644,
         | 
| 9 | 
            +
              "<|image|>": 152068,
         | 
| 10 | 
            +
              "|<EXTRA_TOKENS_0>|": 151646,
         | 
| 11 | 
            +
              "|<EXTRA_TOKENS_100>|": 151746,
         | 
| 12 | 
            +
              "|<EXTRA_TOKENS_101>|": 151747,
         | 
| 13 | 
            +
              "|<EXTRA_TOKENS_102>|": 151748,
         | 
| 14 | 
            +
              "|<EXTRA_TOKENS_103>|": 151749,
         | 
| 15 | 
            +
              "|<EXTRA_TOKENS_104>|": 151750,
         | 
| 16 | 
            +
              "|<EXTRA_TOKENS_105>|": 151751,
         | 
| 17 | 
            +
              "|<EXTRA_TOKENS_106>|": 151752,
         | 
| 18 | 
            +
              "|<EXTRA_TOKENS_107>|": 151753,
         | 
| 19 | 
            +
              "|<EXTRA_TOKENS_108>|": 151754,
         | 
| 20 | 
            +
              "|<EXTRA_TOKENS_109>|": 151755,
         | 
| 21 | 
            +
              "|<EXTRA_TOKENS_10>|": 151656,
         | 
| 22 | 
            +
              "|<EXTRA_TOKENS_110>|": 151756,
         | 
| 23 | 
            +
              "|<EXTRA_TOKENS_111>|": 151757,
         | 
| 24 | 
            +
              "|<EXTRA_TOKENS_112>|": 151758,
         | 
| 25 | 
            +
              "|<EXTRA_TOKENS_113>|": 151759,
         | 
| 26 | 
            +
              "|<EXTRA_TOKENS_114>|": 151760,
         | 
| 27 | 
            +
              "|<EXTRA_TOKENS_115>|": 151761,
         | 
| 28 | 
            +
              "|<EXTRA_TOKENS_116>|": 151762,
         | 
| 29 | 
            +
              "|<EXTRA_TOKENS_117>|": 151763,
         | 
| 30 | 
            +
              "|<EXTRA_TOKENS_118>|": 151764,
         | 
| 31 | 
            +
              "|<EXTRA_TOKENS_119>|": 151765,
         | 
| 32 | 
            +
              "|<EXTRA_TOKENS_11>|": 151657,
         | 
| 33 | 
            +
              "|<EXTRA_TOKENS_120>|": 151766,
         | 
| 34 | 
            +
              "|<EXTRA_TOKENS_121>|": 151767,
         | 
| 35 | 
            +
              "|<EXTRA_TOKENS_122>|": 151768,
         | 
| 36 | 
            +
              "|<EXTRA_TOKENS_123>|": 151769,
         | 
| 37 | 
            +
              "|<EXTRA_TOKENS_124>|": 151770,
         | 
| 38 | 
            +
              "|<EXTRA_TOKENS_125>|": 151771,
         | 
| 39 | 
            +
              "|<EXTRA_TOKENS_126>|": 151772,
         | 
| 40 | 
            +
              "|<EXTRA_TOKENS_127>|": 151773,
         | 
| 41 | 
            +
              "|<EXTRA_TOKENS_128>|": 151774,
         | 
| 42 | 
            +
              "|<EXTRA_TOKENS_129>|": 151775,
         | 
| 43 | 
            +
              "|<EXTRA_TOKENS_12>|": 151658,
         | 
| 44 | 
            +
              "|<EXTRA_TOKENS_130>|": 151776,
         | 
| 45 | 
            +
              "|<EXTRA_TOKENS_131>|": 151777,
         | 
| 46 | 
            +
              "|<EXTRA_TOKENS_132>|": 151778,
         | 
| 47 | 
            +
              "|<EXTRA_TOKENS_133>|": 151779,
         | 
| 48 | 
            +
              "|<EXTRA_TOKENS_134>|": 151780,
         | 
| 49 | 
            +
              "|<EXTRA_TOKENS_135>|": 151781,
         | 
| 50 | 
            +
              "|<EXTRA_TOKENS_136>|": 151782,
         | 
| 51 | 
            +
              "|<EXTRA_TOKENS_137>|": 151783,
         | 
| 52 | 
            +
              "|<EXTRA_TOKENS_138>|": 151784,
         | 
| 53 | 
            +
              "|<EXTRA_TOKENS_139>|": 151785,
         | 
| 54 | 
            +
              "|<EXTRA_TOKENS_13>|": 151659,
         | 
| 55 | 
            +
              "|<EXTRA_TOKENS_140>|": 151786,
         | 
| 56 | 
            +
              "|<EXTRA_TOKENS_141>|": 151787,
         | 
| 57 | 
            +
              "|<EXTRA_TOKENS_142>|": 151788,
         | 
| 58 | 
            +
              "|<EXTRA_TOKENS_143>|": 151789,
         | 
| 59 | 
            +
              "|<EXTRA_TOKENS_144>|": 151790,
         | 
| 60 | 
            +
              "|<EXTRA_TOKENS_145>|": 151791,
         | 
| 61 | 
            +
              "|<EXTRA_TOKENS_146>|": 151792,
         | 
| 62 | 
            +
              "|<EXTRA_TOKENS_147>|": 151793,
         | 
| 63 | 
            +
              "|<EXTRA_TOKENS_148>|": 151794,
         | 
| 64 | 
            +
              "|<EXTRA_TOKENS_149>|": 151795,
         | 
| 65 | 
            +
              "|<EXTRA_TOKENS_14>|": 151660,
         | 
| 66 | 
            +
              "|<EXTRA_TOKENS_150>|": 151796,
         | 
| 67 | 
            +
              "|<EXTRA_TOKENS_151>|": 151797,
         | 
| 68 | 
            +
              "|<EXTRA_TOKENS_152>|": 151798,
         | 
| 69 | 
            +
              "|<EXTRA_TOKENS_153>|": 151799,
         | 
| 70 | 
            +
              "|<EXTRA_TOKENS_154>|": 151800,
         | 
| 71 | 
            +
              "|<EXTRA_TOKENS_155>|": 151801,
         | 
| 72 | 
            +
              "|<EXTRA_TOKENS_156>|": 151802,
         | 
| 73 | 
            +
              "|<EXTRA_TOKENS_157>|": 151803,
         | 
| 74 | 
            +
              "|<EXTRA_TOKENS_158>|": 151804,
         | 
| 75 | 
            +
              "|<EXTRA_TOKENS_159>|": 151805,
         | 
| 76 | 
            +
              "|<EXTRA_TOKENS_15>|": 151661,
         | 
| 77 | 
            +
              "|<EXTRA_TOKENS_160>|": 151806,
         | 
| 78 | 
            +
              "|<EXTRA_TOKENS_161>|": 151807,
         | 
| 79 | 
            +
              "|<EXTRA_TOKENS_162>|": 151808,
         | 
| 80 | 
            +
              "|<EXTRA_TOKENS_163>|": 151809,
         | 
| 81 | 
            +
              "|<EXTRA_TOKENS_164>|": 151810,
         | 
| 82 | 
            +
              "|<EXTRA_TOKENS_165>|": 151811,
         | 
| 83 | 
            +
              "|<EXTRA_TOKENS_166>|": 151812,
         | 
| 84 | 
            +
              "|<EXTRA_TOKENS_167>|": 151813,
         | 
| 85 | 
            +
              "|<EXTRA_TOKENS_168>|": 151814,
         | 
| 86 | 
            +
              "|<EXTRA_TOKENS_169>|": 151815,
         | 
| 87 | 
            +
              "|<EXTRA_TOKENS_16>|": 151662,
         | 
| 88 | 
            +
              "|<EXTRA_TOKENS_170>|": 151816,
         | 
| 89 | 
            +
              "|<EXTRA_TOKENS_171>|": 151817,
         | 
| 90 | 
            +
              "|<EXTRA_TOKENS_172>|": 151818,
         | 
| 91 | 
            +
              "|<EXTRA_TOKENS_173>|": 151819,
         | 
| 92 | 
            +
              "|<EXTRA_TOKENS_174>|": 151820,
         | 
| 93 | 
            +
              "|<EXTRA_TOKENS_175>|": 151821,
         | 
| 94 | 
            +
              "|<EXTRA_TOKENS_176>|": 151822,
         | 
| 95 | 
            +
              "|<EXTRA_TOKENS_177>|": 151823,
         | 
| 96 | 
            +
              "|<EXTRA_TOKENS_178>|": 151824,
         | 
| 97 | 
            +
              "|<EXTRA_TOKENS_179>|": 151825,
         | 
| 98 | 
            +
              "|<EXTRA_TOKENS_17>|": 151663,
         | 
| 99 | 
            +
              "|<EXTRA_TOKENS_180>|": 151826,
         | 
| 100 | 
            +
              "|<EXTRA_TOKENS_181>|": 151827,
         | 
| 101 | 
            +
              "|<EXTRA_TOKENS_182>|": 151828,
         | 
| 102 | 
            +
              "|<EXTRA_TOKENS_183>|": 151829,
         | 
| 103 | 
            +
              "|<EXTRA_TOKENS_184>|": 151830,
         | 
| 104 | 
            +
              "|<EXTRA_TOKENS_185>|": 151831,
         | 
| 105 | 
            +
              "|<EXTRA_TOKENS_186>|": 151832,
         | 
| 106 | 
            +
              "|<EXTRA_TOKENS_187>|": 151833,
         | 
| 107 | 
            +
              "|<EXTRA_TOKENS_188>|": 151834,
         | 
| 108 | 
            +
              "|<EXTRA_TOKENS_189>|": 151835,
         | 
| 109 | 
            +
              "|<EXTRA_TOKENS_18>|": 151664,
         | 
| 110 | 
            +
              "|<EXTRA_TOKENS_190>|": 151836,
         | 
| 111 | 
            +
              "|<EXTRA_TOKENS_191>|": 151837,
         | 
| 112 | 
            +
              "|<EXTRA_TOKENS_192>|": 151838,
         | 
| 113 | 
            +
              "|<EXTRA_TOKENS_193>|": 151839,
         | 
| 114 | 
            +
              "|<EXTRA_TOKENS_194>|": 151840,
         | 
| 115 | 
            +
              "|<EXTRA_TOKENS_195>|": 151841,
         | 
| 116 | 
            +
              "|<EXTRA_TOKENS_196>|": 151842,
         | 
| 117 | 
            +
              "|<EXTRA_TOKENS_197>|": 151843,
         | 
| 118 | 
            +
              "|<EXTRA_TOKENS_198>|": 151844,
         | 
| 119 | 
            +
              "|<EXTRA_TOKENS_199>|": 151845,
         | 
| 120 | 
            +
              "|<EXTRA_TOKENS_19>|": 151665,
         | 
| 121 | 
            +
              "|<EXTRA_TOKENS_1>|": 151647,
         | 
| 122 | 
            +
              "|<EXTRA_TOKENS_200>|": 151846,
         | 
| 123 | 
            +
              "|<EXTRA_TOKENS_201>|": 151847,
         | 
| 124 | 
            +
              "|<EXTRA_TOKENS_202>|": 151848,
         | 
| 125 | 
            +
              "|<EXTRA_TOKENS_203>|": 151849,
         | 
| 126 | 
            +
              "|<EXTRA_TOKENS_204>|": 151850,
         | 
| 127 | 
            +
              "|<EXTRA_TOKENS_205>|": 151851,
         | 
| 128 | 
            +
              "|<EXTRA_TOKENS_206>|": 151852,
         | 
| 129 | 
            +
              "|<EXTRA_TOKENS_207>|": 151853,
         | 
| 130 | 
            +
              "|<EXTRA_TOKENS_208>|": 151854,
         | 
| 131 | 
            +
              "|<EXTRA_TOKENS_209>|": 151855,
         | 
| 132 | 
            +
              "|<EXTRA_TOKENS_20>|": 151666,
         | 
| 133 | 
            +
              "|<EXTRA_TOKENS_210>|": 151856,
         | 
| 134 | 
            +
              "|<EXTRA_TOKENS_211>|": 151857,
         | 
| 135 | 
            +
              "|<EXTRA_TOKENS_212>|": 151858,
         | 
| 136 | 
            +
              "|<EXTRA_TOKENS_213>|": 151859,
         | 
| 137 | 
            +
              "|<EXTRA_TOKENS_214>|": 151860,
         | 
| 138 | 
            +
              "|<EXTRA_TOKENS_215>|": 151861,
         | 
| 139 | 
            +
              "|<EXTRA_TOKENS_216>|": 151862,
         | 
| 140 | 
            +
              "|<EXTRA_TOKENS_217>|": 151863,
         | 
| 141 | 
            +
              "|<EXTRA_TOKENS_218>|": 151864,
         | 
| 142 | 
            +
              "|<EXTRA_TOKENS_219>|": 151865,
         | 
| 143 | 
            +
              "|<EXTRA_TOKENS_21>|": 151667,
         | 
| 144 | 
            +
              "|<EXTRA_TOKENS_220>|": 151866,
         | 
| 145 | 
            +
              "|<EXTRA_TOKENS_221>|": 151867,
         | 
| 146 | 
            +
              "|<EXTRA_TOKENS_222>|": 151868,
         | 
| 147 | 
            +
              "|<EXTRA_TOKENS_223>|": 151869,
         | 
| 148 | 
            +
              "|<EXTRA_TOKENS_224>|": 151870,
         | 
| 149 | 
            +
              "|<EXTRA_TOKENS_225>|": 151871,
         | 
| 150 | 
            +
              "|<EXTRA_TOKENS_226>|": 151872,
         | 
| 151 | 
            +
              "|<EXTRA_TOKENS_227>|": 151873,
         | 
| 152 | 
            +
              "|<EXTRA_TOKENS_228>|": 151874,
         | 
| 153 | 
            +
              "|<EXTRA_TOKENS_229>|": 151875,
         | 
| 154 | 
            +
              "|<EXTRA_TOKENS_22>|": 151668,
         | 
| 155 | 
            +
              "|<EXTRA_TOKENS_230>|": 151876,
         | 
| 156 | 
            +
              "|<EXTRA_TOKENS_231>|": 151877,
         | 
| 157 | 
            +
              "|<EXTRA_TOKENS_232>|": 151878,
         | 
| 158 | 
            +
              "|<EXTRA_TOKENS_233>|": 151879,
         | 
| 159 | 
            +
              "|<EXTRA_TOKENS_234>|": 151880,
         | 
| 160 | 
            +
              "|<EXTRA_TOKENS_235>|": 151881,
         | 
| 161 | 
            +
              "|<EXTRA_TOKENS_236>|": 151882,
         | 
| 162 | 
            +
              "|<EXTRA_TOKENS_237>|": 151883,
         | 
| 163 | 
            +
              "|<EXTRA_TOKENS_238>|": 151884,
         | 
| 164 | 
            +
              "|<EXTRA_TOKENS_239>|": 151885,
         | 
| 165 | 
            +
              "|<EXTRA_TOKENS_23>|": 151669,
         | 
| 166 | 
            +
              "|<EXTRA_TOKENS_240>|": 151886,
         | 
| 167 | 
            +
              "|<EXTRA_TOKENS_241>|": 151887,
         | 
| 168 | 
            +
              "|<EXTRA_TOKENS_242>|": 151888,
         | 
| 169 | 
            +
              "|<EXTRA_TOKENS_243>|": 151889,
         | 
| 170 | 
            +
              "|<EXTRA_TOKENS_244>|": 151890,
         | 
| 171 | 
            +
              "|<EXTRA_TOKENS_245>|": 151891,
         | 
| 172 | 
            +
              "|<EXTRA_TOKENS_246>|": 151892,
         | 
| 173 | 
            +
              "|<EXTRA_TOKENS_247>|": 151893,
         | 
| 174 | 
            +
              "|<EXTRA_TOKENS_248>|": 151894,
         | 
| 175 | 
            +
              "|<EXTRA_TOKENS_249>|": 151895,
         | 
| 176 | 
            +
              "|<EXTRA_TOKENS_24>|": 151670,
         | 
| 177 | 
            +
              "|<EXTRA_TOKENS_250>|": 151896,
         | 
| 178 | 
            +
              "|<EXTRA_TOKENS_251>|": 151897,
         | 
| 179 | 
            +
              "|<EXTRA_TOKENS_252>|": 151898,
         | 
| 180 | 
            +
              "|<EXTRA_TOKENS_253>|": 151899,
         | 
| 181 | 
            +
              "|<EXTRA_TOKENS_254>|": 151900,
         | 
| 182 | 
            +
              "|<EXTRA_TOKENS_255>|": 151901,
         | 
| 183 | 
            +
              "|<EXTRA_TOKENS_256>|": 151902,
         | 
| 184 | 
            +
              "|<EXTRA_TOKENS_257>|": 151903,
         | 
| 185 | 
            +
              "|<EXTRA_TOKENS_258>|": 151904,
         | 
| 186 | 
            +
              "|<EXTRA_TOKENS_259>|": 151905,
         | 
| 187 | 
            +
              "|<EXTRA_TOKENS_25>|": 151671,
         | 
| 188 | 
            +
              "|<EXTRA_TOKENS_260>|": 151906,
         | 
| 189 | 
            +
              "|<EXTRA_TOKENS_261>|": 151907,
         | 
| 190 | 
            +
              "|<EXTRA_TOKENS_262>|": 151908,
         | 
| 191 | 
            +
              "|<EXTRA_TOKENS_263>|": 151909,
         | 
| 192 | 
            +
              "|<EXTRA_TOKENS_264>|": 151910,
         | 
| 193 | 
            +
              "|<EXTRA_TOKENS_265>|": 151911,
         | 
| 194 | 
            +
              "|<EXTRA_TOKENS_266>|": 151912,
         | 
| 195 | 
            +
              "|<EXTRA_TOKENS_267>|": 151913,
         | 
| 196 | 
            +
              "|<EXTRA_TOKENS_268>|": 151914,
         | 
| 197 | 
            +
              "|<EXTRA_TOKENS_269>|": 151915,
         | 
| 198 | 
            +
              "|<EXTRA_TOKENS_26>|": 151672,
         | 
| 199 | 
            +
              "|<EXTRA_TOKENS_270>|": 151916,
         | 
| 200 | 
            +
              "|<EXTRA_TOKENS_271>|": 151917,
         | 
| 201 | 
            +
              "|<EXTRA_TOKENS_272>|": 151918,
         | 
| 202 | 
            +
              "|<EXTRA_TOKENS_273>|": 151919,
         | 
| 203 | 
            +
              "|<EXTRA_TOKENS_274>|": 151920,
         | 
| 204 | 
            +
              "|<EXTRA_TOKENS_275>|": 151921,
         | 
| 205 | 
            +
              "|<EXTRA_TOKENS_276>|": 151922,
         | 
| 206 | 
            +
              "|<EXTRA_TOKENS_277>|": 151923,
         | 
| 207 | 
            +
              "|<EXTRA_TOKENS_278>|": 151924,
         | 
| 208 | 
            +
              "|<EXTRA_TOKENS_279>|": 151925,
         | 
| 209 | 
            +
              "|<EXTRA_TOKENS_27>|": 151673,
         | 
| 210 | 
            +
              "|<EXTRA_TOKENS_280>|": 151926,
         | 
| 211 | 
            +
              "|<EXTRA_TOKENS_281>|": 151927,
         | 
| 212 | 
            +
              "|<EXTRA_TOKENS_282>|": 151928,
         | 
| 213 | 
            +
              "|<EXTRA_TOKENS_283>|": 151929,
         | 
| 214 | 
            +
              "|<EXTRA_TOKENS_284>|": 151930,
         | 
| 215 | 
            +
              "|<EXTRA_TOKENS_285>|": 151931,
         | 
| 216 | 
            +
              "|<EXTRA_TOKENS_286>|": 151932,
         | 
| 217 | 
            +
              "|<EXTRA_TOKENS_287>|": 151933,
         | 
| 218 | 
            +
              "|<EXTRA_TOKENS_288>|": 151934,
         | 
| 219 | 
            +
              "|<EXTRA_TOKENS_289>|": 151935,
         | 
| 220 | 
            +
              "|<EXTRA_TOKENS_28>|": 151674,
         | 
| 221 | 
            +
              "|<EXTRA_TOKENS_290>|": 151936,
         | 
| 222 | 
            +
              "|<EXTRA_TOKENS_291>|": 151937,
         | 
| 223 | 
            +
              "|<EXTRA_TOKENS_292>|": 151938,
         | 
| 224 | 
            +
              "|<EXTRA_TOKENS_293>|": 151939,
         | 
| 225 | 
            +
              "|<EXTRA_TOKENS_294>|": 151940,
         | 
| 226 | 
            +
              "|<EXTRA_TOKENS_295>|": 151941,
         | 
| 227 | 
            +
              "|<EXTRA_TOKENS_296>|": 151942,
         | 
| 228 | 
            +
              "|<EXTRA_TOKENS_297>|": 151943,
         | 
| 229 | 
            +
              "|<EXTRA_TOKENS_298>|": 151944,
         | 
| 230 | 
            +
              "|<EXTRA_TOKENS_299>|": 151945,
         | 
| 231 | 
            +
              "|<EXTRA_TOKENS_29>|": 151675,
         | 
| 232 | 
            +
              "|<EXTRA_TOKENS_2>|": 151648,
         | 
| 233 | 
            +
              "|<EXTRA_TOKENS_300>|": 151946,
         | 
| 234 | 
            +
              "|<EXTRA_TOKENS_301>|": 151947,
         | 
| 235 | 
            +
              "|<EXTRA_TOKENS_302>|": 151948,
         | 
| 236 | 
            +
              "|<EXTRA_TOKENS_303>|": 151949,
         | 
| 237 | 
            +
              "|<EXTRA_TOKENS_304>|": 151950,
         | 
| 238 | 
            +
              "|<EXTRA_TOKENS_305>|": 151951,
         | 
| 239 | 
            +
              "|<EXTRA_TOKENS_306>|": 151952,
         | 
| 240 | 
            +
              "|<EXTRA_TOKENS_307>|": 151953,
         | 
| 241 | 
            +
              "|<EXTRA_TOKENS_308>|": 151954,
         | 
| 242 | 
            +
              "|<EXTRA_TOKENS_309>|": 151955,
         | 
| 243 | 
            +
              "|<EXTRA_TOKENS_30>|": 151676,
         | 
| 244 | 
            +
              "|<EXTRA_TOKENS_310>|": 151956,
         | 
| 245 | 
            +
              "|<EXTRA_TOKENS_311>|": 151957,
         | 
| 246 | 
            +
              "|<EXTRA_TOKENS_312>|": 151958,
         | 
| 247 | 
            +
              "|<EXTRA_TOKENS_313>|": 151959,
         | 
| 248 | 
            +
              "|<EXTRA_TOKENS_314>|": 151960,
         | 
| 249 | 
            +
              "|<EXTRA_TOKENS_315>|": 151961,
         | 
| 250 | 
            +
              "|<EXTRA_TOKENS_316>|": 151962,
         | 
| 251 | 
            +
              "|<EXTRA_TOKENS_317>|": 151963,
         | 
| 252 | 
            +
              "|<EXTRA_TOKENS_318>|": 151964,
         | 
| 253 | 
            +
              "|<EXTRA_TOKENS_319>|": 151965,
         | 
| 254 | 
            +
              "|<EXTRA_TOKENS_31>|": 151677,
         | 
| 255 | 
            +
              "|<EXTRA_TOKENS_320>|": 151966,
         | 
| 256 | 
            +
              "|<EXTRA_TOKENS_321>|": 151967,
         | 
| 257 | 
            +
              "|<EXTRA_TOKENS_322>|": 151968,
         | 
| 258 | 
            +
              "|<EXTRA_TOKENS_323>|": 151969,
         | 
| 259 | 
            +
              "|<EXTRA_TOKENS_324>|": 151970,
         | 
| 260 | 
            +
              "|<EXTRA_TOKENS_325>|": 151971,
         | 
| 261 | 
            +
              "|<EXTRA_TOKENS_326>|": 151972,
         | 
| 262 | 
            +
              "|<EXTRA_TOKENS_327>|": 151973,
         | 
| 263 | 
            +
              "|<EXTRA_TOKENS_328>|": 151974,
         | 
| 264 | 
            +
              "|<EXTRA_TOKENS_329>|": 151975,
         | 
| 265 | 
            +
              "|<EXTRA_TOKENS_32>|": 151678,
         | 
| 266 | 
            +
              "|<EXTRA_TOKENS_330>|": 151976,
         | 
| 267 | 
            +
              "|<EXTRA_TOKENS_331>|": 151977,
         | 
| 268 | 
            +
              "|<EXTRA_TOKENS_332>|": 151978,
         | 
| 269 | 
            +
              "|<EXTRA_TOKENS_333>|": 151979,
         | 
| 270 | 
            +
              "|<EXTRA_TOKENS_334>|": 151980,
         | 
| 271 | 
            +
              "|<EXTRA_TOKENS_335>|": 151981,
         | 
| 272 | 
            +
              "|<EXTRA_TOKENS_336>|": 151982,
         | 
| 273 | 
            +
              "|<EXTRA_TOKENS_337>|": 151983,
         | 
| 274 | 
            +
              "|<EXTRA_TOKENS_338>|": 151984,
         | 
| 275 | 
            +
              "|<EXTRA_TOKENS_339>|": 151985,
         | 
| 276 | 
            +
              "|<EXTRA_TOKENS_33>|": 151679,
         | 
| 277 | 
            +
              "|<EXTRA_TOKENS_340>|": 151986,
         | 
| 278 | 
            +
              "|<EXTRA_TOKENS_341>|": 151987,
         | 
| 279 | 
            +
              "|<EXTRA_TOKENS_342>|": 151988,
         | 
| 280 | 
            +
              "|<EXTRA_TOKENS_343>|": 151989,
         | 
| 281 | 
            +
              "|<EXTRA_TOKENS_344>|": 151990,
         | 
| 282 | 
            +
              "|<EXTRA_TOKENS_345>|": 151991,
         | 
| 283 | 
            +
              "|<EXTRA_TOKENS_346>|": 151992,
         | 
| 284 | 
            +
              "|<EXTRA_TOKENS_347>|": 151993,
         | 
| 285 | 
            +
              "|<EXTRA_TOKENS_348>|": 151994,
         | 
| 286 | 
            +
              "|<EXTRA_TOKENS_349>|": 151995,
         | 
| 287 | 
            +
              "|<EXTRA_TOKENS_34>|": 151680,
         | 
| 288 | 
            +
              "|<EXTRA_TOKENS_350>|": 151996,
         | 
| 289 | 
            +
              "|<EXTRA_TOKENS_351>|": 151997,
         | 
| 290 | 
            +
              "|<EXTRA_TOKENS_352>|": 151998,
         | 
| 291 | 
            +
              "|<EXTRA_TOKENS_353>|": 151999,
         | 
| 292 | 
            +
              "|<EXTRA_TOKENS_354>|": 152000,
         | 
| 293 | 
            +
              "|<EXTRA_TOKENS_355>|": 152001,
         | 
| 294 | 
            +
              "|<EXTRA_TOKENS_356>|": 152002,
         | 
| 295 | 
            +
              "|<EXTRA_TOKENS_357>|": 152003,
         | 
| 296 | 
            +
              "|<EXTRA_TOKENS_358>|": 152004,
         | 
| 297 | 
            +
              "|<EXTRA_TOKENS_359>|": 152005,
         | 
| 298 | 
            +
              "|<EXTRA_TOKENS_35>|": 151681,
         | 
| 299 | 
            +
              "|<EXTRA_TOKENS_360>|": 152006,
         | 
| 300 | 
            +
              "|<EXTRA_TOKENS_361>|": 152007,
         | 
| 301 | 
            +
              "|<EXTRA_TOKENS_362>|": 152008,
         | 
| 302 | 
            +
              "|<EXTRA_TOKENS_363>|": 152009,
         | 
| 303 | 
            +
              "|<EXTRA_TOKENS_364>|": 152010,
         | 
| 304 | 
            +
              "|<EXTRA_TOKENS_365>|": 152011,
         | 
| 305 | 
            +
              "|<EXTRA_TOKENS_366>|": 152012,
         | 
| 306 | 
            +
              "|<EXTRA_TOKENS_367>|": 152013,
         | 
| 307 | 
            +
              "|<EXTRA_TOKENS_368>|": 152014,
         | 
| 308 | 
            +
              "|<EXTRA_TOKENS_369>|": 152015,
         | 
| 309 | 
            +
              "|<EXTRA_TOKENS_36>|": 151682,
         | 
| 310 | 
            +
              "|<EXTRA_TOKENS_370>|": 152016,
         | 
| 311 | 
            +
              "|<EXTRA_TOKENS_371>|": 152017,
         | 
| 312 | 
            +
              "|<EXTRA_TOKENS_372>|": 152018,
         | 
| 313 | 
            +
              "|<EXTRA_TOKENS_373>|": 152019,
         | 
| 314 | 
            +
              "|<EXTRA_TOKENS_374>|": 152020,
         | 
| 315 | 
            +
              "|<EXTRA_TOKENS_375>|": 152021,
         | 
| 316 | 
            +
              "|<EXTRA_TOKENS_376>|": 152022,
         | 
| 317 | 
            +
              "|<EXTRA_TOKENS_377>|": 152023,
         | 
| 318 | 
            +
              "|<EXTRA_TOKENS_378>|": 152024,
         | 
| 319 | 
            +
              "|<EXTRA_TOKENS_379>|": 152025,
         | 
| 320 | 
            +
              "|<EXTRA_TOKENS_37>|": 151683,
         | 
| 321 | 
            +
              "|<EXTRA_TOKENS_380>|": 152026,
         | 
| 322 | 
            +
              "|<EXTRA_TOKENS_381>|": 152027,
         | 
| 323 | 
            +
              "|<EXTRA_TOKENS_382>|": 152028,
         | 
| 324 | 
            +
              "|<EXTRA_TOKENS_383>|": 152029,
         | 
| 325 | 
            +
              "|<EXTRA_TOKENS_384>|": 152030,
         | 
| 326 | 
            +
              "|<EXTRA_TOKENS_385>|": 152031,
         | 
| 327 | 
            +
              "|<EXTRA_TOKENS_386>|": 152032,
         | 
| 328 | 
            +
              "|<EXTRA_TOKENS_387>|": 152033,
         | 
| 329 | 
            +
              "|<EXTRA_TOKENS_388>|": 152034,
         | 
| 330 | 
            +
              "|<EXTRA_TOKENS_389>|": 152035,
         | 
| 331 | 
            +
              "|<EXTRA_TOKENS_38>|": 151684,
         | 
| 332 | 
            +
              "|<EXTRA_TOKENS_390>|": 152036,
         | 
| 333 | 
            +
              "|<EXTRA_TOKENS_391>|": 152037,
         | 
| 334 | 
            +
              "|<EXTRA_TOKENS_392>|": 152038,
         | 
| 335 | 
            +
              "|<EXTRA_TOKENS_393>|": 152039,
         | 
| 336 | 
            +
              "|<EXTRA_TOKENS_394>|": 152040,
         | 
| 337 | 
            +
              "|<EXTRA_TOKENS_395>|": 152041,
         | 
| 338 | 
            +
              "|<EXTRA_TOKENS_396>|": 152042,
         | 
| 339 | 
            +
              "|<EXTRA_TOKENS_397>|": 152043,
         | 
| 340 | 
            +
              "|<EXTRA_TOKENS_398>|": 152044,
         | 
| 341 | 
            +
              "|<EXTRA_TOKENS_399>|": 152045,
         | 
| 342 | 
            +
              "|<EXTRA_TOKENS_39>|": 151685,
         | 
| 343 | 
            +
              "|<EXTRA_TOKENS_3>|": 151649,
         | 
| 344 | 
            +
              "|<EXTRA_TOKENS_400>|": 152046,
         | 
| 345 | 
            +
              "|<EXTRA_TOKENS_401>|": 152047,
         | 
| 346 | 
            +
              "|<EXTRA_TOKENS_402>|": 152048,
         | 
| 347 | 
            +
              "|<EXTRA_TOKENS_403>|": 152049,
         | 
| 348 | 
            +
              "|<EXTRA_TOKENS_404>|": 152050,
         | 
| 349 | 
            +
              "|<EXTRA_TOKENS_405>|": 152051,
         | 
| 350 | 
            +
              "|<EXTRA_TOKENS_406>|": 152052,
         | 
| 351 | 
            +
              "|<EXTRA_TOKENS_407>|": 152053,
         | 
| 352 | 
            +
              "|<EXTRA_TOKENS_408>|": 152054,
         | 
| 353 | 
            +
              "|<EXTRA_TOKENS_409>|": 152055,
         | 
| 354 | 
            +
              "|<EXTRA_TOKENS_40>|": 151686,
         | 
| 355 | 
            +
              "|<EXTRA_TOKENS_410>|": 152056,
         | 
| 356 | 
            +
              "|<EXTRA_TOKENS_411>|": 152057,
         | 
| 357 | 
            +
              "|<EXTRA_TOKENS_412>|": 152058,
         | 
| 358 | 
            +
              "|<EXTRA_TOKENS_413>|": 152059,
         | 
| 359 | 
            +
              "|<EXTRA_TOKENS_414>|": 152060,
         | 
| 360 | 
            +
              "|<EXTRA_TOKENS_415>|": 152061,
         | 
| 361 | 
            +
              "|<EXTRA_TOKENS_416>|": 152062,
         | 
| 362 | 
            +
              "|<EXTRA_TOKENS_417>|": 152063,
         | 
| 363 | 
            +
              "|<EXTRA_TOKENS_41>|": 151687,
         | 
| 364 | 
            +
              "|<EXTRA_TOKENS_42>|": 151688,
         | 
| 365 | 
            +
              "|<EXTRA_TOKENS_43>|": 151689,
         | 
| 366 | 
            +
              "|<EXTRA_TOKENS_44>|": 151690,
         | 
| 367 | 
            +
              "|<EXTRA_TOKENS_45>|": 151691,
         | 
| 368 | 
            +
              "|<EXTRA_TOKENS_46>|": 151692,
         | 
| 369 | 
            +
              "|<EXTRA_TOKENS_47>|": 151693,
         | 
| 370 | 
            +
              "|<EXTRA_TOKENS_48>|": 151694,
         | 
| 371 | 
            +
              "|<EXTRA_TOKENS_49>|": 151695,
         | 
| 372 | 
            +
              "|<EXTRA_TOKENS_4>|": 151650,
         | 
| 373 | 
            +
              "|<EXTRA_TOKENS_50>|": 151696,
         | 
| 374 | 
            +
              "|<EXTRA_TOKENS_51>|": 151697,
         | 
| 375 | 
            +
              "|<EXTRA_TOKENS_52>|": 151698,
         | 
| 376 | 
            +
              "|<EXTRA_TOKENS_53>|": 151699,
         | 
| 377 | 
            +
              "|<EXTRA_TOKENS_54>|": 151700,
         | 
| 378 | 
            +
              "|<EXTRA_TOKENS_55>|": 151701,
         | 
| 379 | 
            +
              "|<EXTRA_TOKENS_56>|": 151702,
         | 
| 380 | 
            +
              "|<EXTRA_TOKENS_57>|": 151703,
         | 
| 381 | 
            +
              "|<EXTRA_TOKENS_58>|": 151704,
         | 
| 382 | 
            +
              "|<EXTRA_TOKENS_59>|": 151705,
         | 
| 383 | 
            +
              "|<EXTRA_TOKENS_5>|": 151651,
         | 
| 384 | 
            +
              "|<EXTRA_TOKENS_60>|": 151706,
         | 
| 385 | 
            +
              "|<EXTRA_TOKENS_61>|": 151707,
         | 
| 386 | 
            +
              "|<EXTRA_TOKENS_62>|": 151708,
         | 
| 387 | 
            +
              "|<EXTRA_TOKENS_63>|": 151709,
         | 
| 388 | 
            +
              "|<EXTRA_TOKENS_64>|": 151710,
         | 
| 389 | 
            +
              "|<EXTRA_TOKENS_65>|": 151711,
         | 
| 390 | 
            +
              "|<EXTRA_TOKENS_66>|": 151712,
         | 
| 391 | 
            +
              "|<EXTRA_TOKENS_67>|": 151713,
         | 
| 392 | 
            +
              "|<EXTRA_TOKENS_68>|": 151714,
         | 
| 393 | 
            +
              "|<EXTRA_TOKENS_69>|": 151715,
         | 
| 394 | 
            +
              "|<EXTRA_TOKENS_6>|": 151652,
         | 
| 395 | 
            +
              "|<EXTRA_TOKENS_70>|": 151716,
         | 
| 396 | 
            +
              "|<EXTRA_TOKENS_71>|": 151717,
         | 
| 397 | 
            +
              "|<EXTRA_TOKENS_72>|": 151718,
         | 
| 398 | 
            +
              "|<EXTRA_TOKENS_73>|": 151719,
         | 
| 399 | 
            +
              "|<EXTRA_TOKENS_74>|": 151720,
         | 
| 400 | 
            +
              "|<EXTRA_TOKENS_75>|": 151721,
         | 
| 401 | 
            +
              "|<EXTRA_TOKENS_76>|": 151722,
         | 
| 402 | 
            +
              "|<EXTRA_TOKENS_77>|": 151723,
         | 
| 403 | 
            +
              "|<EXTRA_TOKENS_78>|": 151724,
         | 
| 404 | 
            +
              "|<EXTRA_TOKENS_79>|": 151725,
         | 
| 405 | 
            +
              "|<EXTRA_TOKENS_7>|": 151653,
         | 
| 406 | 
            +
              "|<EXTRA_TOKENS_80>|": 151726,
         | 
| 407 | 
            +
              "|<EXTRA_TOKENS_81>|": 151727,
         | 
| 408 | 
            +
              "|<EXTRA_TOKENS_82>|": 151728,
         | 
| 409 | 
            +
              "|<EXTRA_TOKENS_83>|": 151729,
         | 
| 410 | 
            +
              "|<EXTRA_TOKENS_84>|": 151730,
         | 
| 411 | 
            +
              "|<EXTRA_TOKENS_85>|": 151731,
         | 
| 412 | 
            +
              "|<EXTRA_TOKENS_86>|": 151732,
         | 
| 413 | 
            +
              "|<EXTRA_TOKENS_87>|": 151733,
         | 
| 414 | 
            +
              "|<EXTRA_TOKENS_88>|": 151734,
         | 
| 415 | 
            +
              "|<EXTRA_TOKENS_89>|": 151735,
         | 
| 416 | 
            +
              "|<EXTRA_TOKENS_8>|": 151654,
         | 
| 417 | 
            +
              "|<EXTRA_TOKENS_90>|": 151736,
         | 
| 418 | 
            +
              "|<EXTRA_TOKENS_91>|": 151737,
         | 
| 419 | 
            +
              "|<EXTRA_TOKENS_92>|": 151738,
         | 
| 420 | 
            +
              "|<EXTRA_TOKENS_93>|": 151739,
         | 
| 421 | 
            +
              "|<EXTRA_TOKENS_94>|": 151740,
         | 
| 422 | 
            +
              "|<EXTRA_TOKENS_95>|": 151741,
         | 
| 423 | 
            +
              "|<EXTRA_TOKENS_96>|": 151742,
         | 
| 424 | 
            +
              "|<EXTRA_TOKENS_97>|": 151743,
         | 
| 425 | 
            +
              "|<EXTRA_TOKENS_98>|": 151744,
         | 
| 426 | 
            +
              "|<EXTRA_TOKENS_99>|": 151745,
         | 
| 427 | 
            +
              "|<EXTRA_TOKENS_9>|": 151655
         | 
| 428 | 
            +
            }
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,33 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_name_or_path": "/root/.cache/huggingface/hub/models--allenai--Molmo-7B-D-0924/snapshots/1721478b71306fb7dc671176d5c204dc7a4d27d7",
         | 
| 3 | 
            +
              "architectures": [
         | 
| 4 | 
            +
                "MolmoForCausalLM"
         | 
| 5 | 
            +
              ],
         | 
| 6 | 
            +
              "attention_layer_norm": false,
         | 
| 7 | 
            +
              "auto_map": {
         | 
| 8 | 
            +
                "AutoConfig": "config_molmo.MolmoConfig",
         | 
| 9 | 
            +
                "AutoModelForCausalLM": "modeling_molmo.MolmoForCausalLM"
         | 
| 10 | 
            +
              },
         | 
| 11 | 
            +
              "clip_qkv": null,
         | 
| 12 | 
            +
              "embedding_size": 152064,
         | 
| 13 | 
            +
              "hidden_size": 3584,
         | 
| 14 | 
            +
              "initializer_range": 0.02,
         | 
| 15 | 
            +
              "intermediate_size": 37888,
         | 
| 16 | 
            +
              "layer_norm_eps": 1e-06,
         | 
| 17 | 
            +
              "layer_norm_type": "rms",
         | 
| 18 | 
            +
              "max_position_embeddings": 4096,
         | 
| 19 | 
            +
              "model_type": "molmo",
         | 
| 20 | 
            +
              "norm_after": false,
         | 
| 21 | 
            +
              "num_attention_heads": 28,
         | 
| 22 | 
            +
              "num_hidden_layers": 28,
         | 
| 23 | 
            +
              "num_key_value_heads": 4,
         | 
| 24 | 
            +
              "qkv_bias": true,
         | 
| 25 | 
            +
              "rope_theta": 1000000.0,
         | 
| 26 | 
            +
              "tie_word_embeddings": false,
         | 
| 27 | 
            +
              "torch_dtype": "bfloat16",
         | 
| 28 | 
            +
              "transformers_version": "4.47.1",
         | 
| 29 | 
            +
              "use_cache": true,
         | 
| 30 | 
            +
              "use_position_ids": true,
         | 
| 31 | 
            +
              "vocab_size": 152064,
         | 
| 32 | 
            +
              "weight_tying": false
         | 
| 33 | 
            +
            }
         | 
    	
        config_molmo.py
    ADDED
    
    | @@ -0,0 +1,60 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from typing import List
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from transformers import PretrainedConfig, AutoTokenizer
         | 
| 4 | 
            +
             | 
| 5 | 
            +
             | 
| 6 | 
            +
            class MolmoConfig(PretrainedConfig):
         | 
| 7 | 
            +
                model_type = "molmo"
         | 
| 8 | 
            +
                keys_to_ignore_at_inference = ["past_key_values"]
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                def __init__(
         | 
| 11 | 
            +
                    self,
         | 
| 12 | 
            +
                    vocab_size=50304,
         | 
| 13 | 
            +
                    embedding_size=50304,
         | 
| 14 | 
            +
                    hidden_size=4096,
         | 
| 15 | 
            +
                    intermediate_size=11008,
         | 
| 16 | 
            +
                    num_hidden_layers=32,
         | 
| 17 | 
            +
                    num_attention_heads=32,
         | 
| 18 | 
            +
                    num_key_value_heads=None,
         | 
| 19 | 
            +
                    max_position_embeddings=2048,
         | 
| 20 | 
            +
                    initializer_range=0.02,
         | 
| 21 | 
            +
                    use_cache=True,
         | 
| 22 | 
            +
                    layer_norm_eps: float = 1e-5,
         | 
| 23 | 
            +
                    rope_theta=10000.0,
         | 
| 24 | 
            +
                    clip_qkv=None,
         | 
| 25 | 
            +
                    qkv_bias: bool = False,
         | 
| 26 | 
            +
                    weight_tying: bool = False,
         | 
| 27 | 
            +
                    use_position_ids: bool=True,
         | 
| 28 | 
            +
                    tie_word_embeddings: bool=True,
         | 
| 29 | 
            +
                    attention_layer_norm: bool=False,
         | 
| 30 | 
            +
                    norm_after: bool = False,
         | 
| 31 | 
            +
                    layer_norm_type: str="rms",
         | 
| 32 | 
            +
                    **kwargs,
         | 
| 33 | 
            +
                ):
         | 
| 34 | 
            +
                    self.vocab_size = vocab_size
         | 
| 35 | 
            +
                    self.embedding_size = embedding_size
         | 
| 36 | 
            +
                    self.max_position_embeddings = max_position_embeddings
         | 
| 37 | 
            +
                    self.hidden_size = hidden_size
         | 
| 38 | 
            +
                    self.intermediate_size = intermediate_size
         | 
| 39 | 
            +
                    self.num_hidden_layers = num_hidden_layers
         | 
| 40 | 
            +
                    self.num_attention_heads = num_attention_heads
         | 
| 41 | 
            +
                    self.layer_norm_eps = layer_norm_eps
         | 
| 42 | 
            +
                    self.weight_tying = weight_tying
         | 
| 43 | 
            +
                    self.use_position_ids = use_position_ids
         | 
| 44 | 
            +
                    self.attention_layer_norm = attention_layer_norm
         | 
| 45 | 
            +
                    self.num_key_value_heads = num_key_value_heads
         | 
| 46 | 
            +
                    self.initializer_range = initializer_range
         | 
| 47 | 
            +
                    self.use_cache = use_cache
         | 
| 48 | 
            +
                    self.rope_theta = rope_theta
         | 
| 49 | 
            +
                    self.clip_qkv = clip_qkv
         | 
| 50 | 
            +
                    self.qkv_bias = qkv_bias
         | 
| 51 | 
            +
                    self.norm_after = norm_after
         | 
| 52 | 
            +
                    self.tie_word_embeddings = tie_word_embeddings
         | 
| 53 | 
            +
                    self.layer_norm_type = layer_norm_type
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    super().__init__(
         | 
| 56 | 
            +
                        tie_word_embeddings=tie_word_embeddings,
         | 
| 57 | 
            +
                        **kwargs,
         | 
| 58 | 
            +
                    )
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            MolmoConfig.register_for_auto_class()
         | 
    	
        generation_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "eos_token_id": 151643,
         | 
| 3 | 
            +
              "max_new_tokens": 2048,
         | 
| 4 | 
            +
              "pad_token_id": 151643,
         | 
| 5 | 
            +
              "transformers_version": "4.47.1"
         | 
| 6 | 
            +
            }
         | 
    	
        global_step3600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:36b999b3b6fa4bad506a8177cc4cde11f55a443e746da493c95198db0f60f67f
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:39c490d794e5900f741c14a171dd8a288710c6aec3557ac6cf6719eaa4d4efea
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e203030134561f67c9a81b9dc61687350bca993344e0ea51db36ebaed65a3b8a
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9eedf3a97c5aea35ac97bff7273f549380b5a8b6b21770be53efc04011f6313c
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:8395293107619b123aa884be6330530b80cfba712abb5e29454b7be8a80d643c
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:5bdefdb59a311d6f2b5a65e880223d8b07460b58a7b430b881842febd2d6822c
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d06d2440aa6ea813c9581e1a4806bd6e40801e72d4fe57177689bf54f67ae398
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:fd264ecb952c8d52e64bacdc29b694f4f194cb24bd47f55a983f7ff24b067e97
         | 
| 3 | 
            +
            size 12031542784
         | 
    	
        global_step3600/zero_pp_rank_0_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d3627260958f781d57f9d90462fdaa94fd889060adff4be916fde5bc17a75fdb
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_1_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:962a8c723da6b70f9c735d81083b2ccc41f25175bb9536cbd871c89b2b8ad453
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_2_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:195eb23cb1f4e7121502fca9a61902fd405e1378abab6cead6119db3c5dc3649
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_3_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:56b48a9b30b96dfeafe647cae29e48e4d56be332c63b887fa1efbdc5df505706
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_4_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1b815a800688069ca9a6b0a1ebbec67a2b9765d21e12501c9806a917549a7a05
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_5_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0bcc01a11c1f1b13f835dfab865d067769c9168d99db633aba130e1515b00d3d
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_6_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:bc30e0d1616dc796141635c128d1446d17ae5a0a293dae3a666d6ae36d80a9d0
         | 
| 3 | 
            +
            size 328563
         | 
    	
        global_step3600/zero_pp_rank_7_mp_rank_00_model_states.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:cd12d252be64558fe2d89e275de36d8ef322c08b00a22e5dc3b943a205a901e5
         | 
| 3 | 
            +
            size 328563
         | 
    	
        image_preprocessing_molmo.py
    ADDED
    
    | @@ -0,0 +1,546 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """Image processor class for Molmo"""
         | 
| 2 | 
            +
            from typing import List, Optional, Union, Mapping
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import numpy as np
         | 
| 5 | 
            +
            import einops
         | 
| 6 | 
            +
            import torch
         | 
| 7 | 
            +
            import torchvision.transforms
         | 
| 8 | 
            +
            from torchvision.transforms import InterpolationMode
         | 
| 9 | 
            +
            from torchvision.transforms.functional import convert_image_dtype
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            from transformers.image_utils import (
         | 
| 12 | 
            +
                OPENAI_CLIP_MEAN,
         | 
| 13 | 
            +
                OPENAI_CLIP_STD,
         | 
| 14 | 
            +
                ImageInput,
         | 
| 15 | 
            +
                is_valid_image,
         | 
| 16 | 
            +
            )
         | 
| 17 | 
            +
            from transformers.processing_utils import ImagesKwargs
         | 
| 18 | 
            +
            from transformers.image_processing_utils import BaseImageProcessor
         | 
| 19 | 
            +
            from transformers.utils import logging
         | 
| 20 | 
            +
             | 
| 21 | 
            +
             | 
| 22 | 
            +
            logger = logging.get_logger(__name__)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
             | 
| 25 | 
            +
            def pad_to_bounding_box(
         | 
| 26 | 
            +
                image, offset_height, offset_width, target_height,
         | 
| 27 | 
            +
                target_width, value=0
         | 
| 28 | 
            +
            ):
         | 
| 29 | 
            +
                height, width = image.shape[:2]
         | 
| 30 | 
            +
                after_padding_width = target_width - offset_width - width
         | 
| 31 | 
            +
                after_padding_height = target_height - offset_height - height
         | 
| 32 | 
            +
                return np.pad(image, [
         | 
| 33 | 
            +
                    [offset_height, after_padding_height],
         | 
| 34 | 
            +
                    [offset_width, after_padding_width],
         | 
| 35 | 
            +
                    [0, 0]
         | 
| 36 | 
            +
                ], constant_values=value)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
             | 
| 39 | 
            +
            def normalize_image(image, offset, scale):
         | 
| 40 | 
            +
                image -= np.array(offset, dtype=np.float32)[None, None, :]
         | 
| 41 | 
            +
                image /= np.array(scale, dtype=np.float32)[None, None, :]
         | 
| 42 | 
            +
                return image
         | 
| 43 | 
            +
             | 
| 44 | 
            +
             | 
| 45 | 
            +
            def resize_and_pad(
         | 
| 46 | 
            +
                image,
         | 
| 47 | 
            +
                desired_output_size,
         | 
| 48 | 
            +
                resize_method="torch-bilinear",
         | 
| 49 | 
            +
                pad_value=0,
         | 
| 50 | 
            +
                normalize=True,
         | 
| 51 | 
            +
                image_mean=OPENAI_CLIP_MEAN,
         | 
| 52 | 
            +
                image_std=OPENAI_CLIP_STD,
         | 
| 53 | 
            +
            ):
         | 
| 54 | 
            +
                desired_height, desired_width = desired_output_size
         | 
| 55 | 
            +
                height, width = image.shape[:2]
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                # Cast into float32 since the training code did this in float32 and it (very rarely) effects
         | 
| 58 | 
            +
                # the results after rounding.
         | 
| 59 | 
            +
                image_scale_y = np.array(desired_height, np.float32) / np.array(height, np.float32)
         | 
| 60 | 
            +
                image_scale_x = np.array(desired_width, np.float32) / np.array(width, np.float32)
         | 
| 61 | 
            +
                image_scale = min(image_scale_x, image_scale_y)
         | 
| 62 | 
            +
                scaled_height = int(np.array(height, np.float32) * image_scale)
         | 
| 63 | 
            +
                scaled_width = int(np.array(width, np.float32) * image_scale)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                if resize_method == "tensorflow":
         | 
| 66 | 
            +
                    # This how the original training code did resizing, it can produce slightly different
         | 
| 67 | 
            +
                    # results then using torch resize so we keep it just in case
         | 
| 68 | 
            +
                    import tensorflow as tf
         | 
| 69 | 
            +
                    image = tf.image.convert_image_dtype(tf.constant(image), dtype=tf.float32)
         | 
| 70 | 
            +
                    image = tf.image.resize(
         | 
| 71 | 
            +
                        image,
         | 
| 72 | 
            +
                        [scaled_height, scaled_width],
         | 
| 73 | 
            +
                        method=tf.image.ResizeMethod.BILINEAR,
         | 
| 74 | 
            +
                        antialias=True,
         | 
| 75 | 
            +
                    )
         | 
| 76 | 
            +
                    image = tf.clip_by_value(image, 0.0, 1.0)
         | 
| 77 | 
            +
                    image = image.numpy()
         | 
| 78 | 
            +
                elif resize_method == "torch-bilinear":
         | 
| 79 | 
            +
                    image = torch.permute(torch.from_numpy(image), [2, 0, 1])
         | 
| 80 | 
            +
                    image = convert_image_dtype(image)  # resize in float32 to match the training code
         | 
| 81 | 
            +
                    image = torchvision.transforms.Resize(
         | 
| 82 | 
            +
                        [scaled_height, scaled_width], InterpolationMode.BILINEAR, antialias=True
         | 
| 83 | 
            +
                    )(image)
         | 
| 84 | 
            +
                    image = torch.clip(image, 0.0, 1.0)
         | 
| 85 | 
            +
                    image = torch.permute(image, [1, 2, 0]).numpy()
         | 
| 86 | 
            +
                else:
         | 
| 87 | 
            +
                    raise NotImplementedError(resize_method)
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                top_pad = (desired_height - scaled_height) // 2
         | 
| 90 | 
            +
                left_pad = (desired_width - scaled_width) // 2
         | 
| 91 | 
            +
                padding = [
         | 
| 92 | 
            +
                    [top_pad, desired_height - scaled_height - top_pad],
         | 
| 93 | 
            +
                    [left_pad, desired_width - scaled_width - left_pad],
         | 
| 94 | 
            +
                    [0, 0]
         | 
| 95 | 
            +
                ]
         | 
| 96 | 
            +
                image_mask = np.pad(np.ones_like(image[:, :, 0], dtype=bool), padding[:2])
         | 
| 97 | 
            +
                image = np.pad(image, padding, constant_values=pad_value)
         | 
| 98 | 
            +
                if normalize:
         | 
| 99 | 
            +
                    image = normalize_image(image, offset=image_mean, scale=image_std)
         | 
| 100 | 
            +
                return image, image_mask
         | 
| 101 | 
            +
             | 
| 102 | 
            +
             | 
| 103 | 
            +
            def select_tiling(h, w, patch_size, max_num_patches):
         | 
| 104 | 
            +
                """Decide how best to divide in image of size [w, h] in up to max_num_patches of size patch_size"""
         | 
| 105 | 
            +
                original_size = np.stack([h, w])  # [1, 2]
         | 
| 106 | 
            +
                original_res = h * w
         | 
| 107 | 
            +
                tilings = []
         | 
| 108 | 
            +
                for i in range(1, max_num_patches+1):
         | 
| 109 | 
            +
                    for j in range(1, max_num_patches+1):
         | 
| 110 | 
            +
                        if i*j <= max_num_patches:
         | 
| 111 | 
            +
                            tilings.append((i, j))
         | 
| 112 | 
            +
                # sort so argmin and argmax favour smaller tilings in the event of a tie
         | 
| 113 | 
            +
                tilings.sort(key=lambda x: (x[0]*x[1], x[0]))
         | 
| 114 | 
            +
                candidate_tilings = np.array(tilings, dtype=np.int32)  # [n_resolutions, 2]
         | 
| 115 | 
            +
                candidate_resolutions = candidate_tilings * patch_size  # [n_resolutions, 2]
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                # How much we would need to scale the image to fit exactly in each tiling
         | 
| 118 | 
            +
                original_size = np.stack([h, w], dtype=np.float32)  # [1, 2]
         | 
| 119 | 
            +
                required_scale_d = candidate_resolutions.astype(np.float32) / original_size
         | 
| 120 | 
            +
                required_scale = np.min(required_scale_d, axis=-1, keepdims=True)  # [n_resolutions, 1]
         | 
| 121 | 
            +
                if np.all(required_scale < 1):
         | 
| 122 | 
            +
                    # We are forced to downscale, so try to minimize the amount of downscaling
         | 
| 123 | 
            +
                    ix = np.argmax(required_scale)
         | 
| 124 | 
            +
                else:
         | 
| 125 | 
            +
                    # Pick the resolution that required the least upscaling so that it most closely fits the image
         | 
| 126 | 
            +
                    required_scale = np.where(required_scale < 1.0, 10e9, required_scale)
         | 
| 127 | 
            +
                    ix = np.argmin(required_scale)
         | 
| 128 | 
            +
                return candidate_tilings[ix]
         | 
| 129 | 
            +
             | 
| 130 | 
            +
             | 
| 131 | 
            +
            class MolmoImagesKwargs(ImagesKwargs, total=False):
         | 
| 132 | 
            +
                max_crops: Optional[int]
         | 
| 133 | 
            +
                overlap_margins: Optional[List[int]]
         | 
| 134 | 
            +
                base_image_input_size: Optional[List[int]]
         | 
| 135 | 
            +
                image_token_length_w: Optional[int]
         | 
| 136 | 
            +
                image_token_length_h: Optional[int]
         | 
| 137 | 
            +
                image_patch_size: Optional[int]
         | 
| 138 | 
            +
                image_padding_mask: Optional[bool]
         | 
| 139 | 
            +
             | 
| 140 | 
            +
             | 
| 141 | 
            +
            class MolmoImageProcessor(BaseImageProcessor):
         | 
| 142 | 
            +
                """Preprocess images and multi-model inputs"""
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                def __init__(
         | 
| 145 | 
            +
                    self,
         | 
| 146 | 
            +
                    max_crops: int = 12,
         | 
| 147 | 
            +
                    overlap_margins: List[int] = (4, 4),
         | 
| 148 | 
            +
                    base_image_input_size: List[int] = (336, 336),
         | 
| 149 | 
            +
                    image_token_length_w: int = 12,
         | 
| 150 | 
            +
                    image_token_length_h: int = 12,
         | 
| 151 | 
            +
                    image_patch_size: int = 14,
         | 
| 152 | 
            +
                    image_padding_mask: bool = True,
         | 
| 153 | 
            +
                    do_normalize: bool = True,
         | 
| 154 | 
            +
                    image_mean: Optional[Union[float, List[float]]] = None,
         | 
| 155 | 
            +
                    image_std: Optional[Union[float, List[float]]] = None,
         | 
| 156 | 
            +
                    **kwargs,
         | 
| 157 | 
            +
                ):
         | 
| 158 | 
            +
                    super().__init__(**kwargs)
         | 
| 159 | 
            +
                    self.max_crops = max_crops
         | 
| 160 | 
            +
                    self.overlap_margins = overlap_margins
         | 
| 161 | 
            +
                    self.base_image_input_size = base_image_input_size
         | 
| 162 | 
            +
                    self.image_token_length_w = image_token_length_w
         | 
| 163 | 
            +
                    self.image_token_length_h = image_token_length_h
         | 
| 164 | 
            +
                    self.image_patch_size = image_patch_size
         | 
| 165 | 
            +
                    self.image_padding_mask = image_padding_mask
         | 
| 166 | 
            +
                    self.do_normalize = do_normalize
         | 
| 167 | 
            +
                    self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
         | 
| 168 | 
            +
                    self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                def image_to_patches_and_tokens(
         | 
| 171 | 
            +
                    self,
         | 
| 172 | 
            +
                    image: ImageInput,
         | 
| 173 | 
            +
                    image_patch_token_id: int,
         | 
| 174 | 
            +
                    image_col_token_id: int,
         | 
| 175 | 
            +
                    image_start_token_id: int,
         | 
| 176 | 
            +
                    image_end_token_id: int,
         | 
| 177 | 
            +
                    max_crops: Optional[int] = None,
         | 
| 178 | 
            +
                    overlap_margins: Optional[List[int]] = None,
         | 
| 179 | 
            +
                    base_image_input_size: Optional[Union[int, List[int]]] = None,
         | 
| 180 | 
            +
                    image_token_length_w: Optional[int] = None,
         | 
| 181 | 
            +
                    image_token_length_h: Optional[int] = None,
         | 
| 182 | 
            +
                    image_patch_size: Optional[int] = None,
         | 
| 183 | 
            +
                ):
         | 
| 184 | 
            +
                    if isinstance(base_image_input_size, int):
         | 
| 185 | 
            +
                        base_image_input_size = (base_image_input_size, base_image_input_size)
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                    base_image_input_d = image_patch_size
         | 
| 188 | 
            +
                    tokens_per_image = image_token_length_w * image_token_length_h
         | 
| 189 | 
            +
                    image_base_patch_w = base_image_input_size[1] // base_image_input_d
         | 
| 190 | 
            +
                    image_base_patch_h = base_image_input_size[0] // base_image_input_d
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                    original_image_h, original_image_w = image.shape[:2]
         | 
| 193 | 
            +
                    crop_size = base_image_input_size[0]
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    # Discard this many patches from the (left/top, right/bottom) of crops
         | 
| 196 | 
            +
                    left_margin, right_margin = overlap_margins
         | 
| 197 | 
            +
                    # left_margin, right_margin = 2, 2
         | 
| 198 | 
            +
                    assert left_margin % 2 == 0  # Required for compatibility with 2x2 pooling
         | 
| 199 | 
            +
                    total_margin_pixels = base_image_input_d*(right_margin + left_margin)  # pixels removed per dim
         | 
| 200 | 
            +
                    crop_patches = base_image_input_size[0] // base_image_input_d  # patches per crop dim
         | 
| 201 | 
            +
                    crop_window_patches = crop_patches - (right_margin + left_margin)  # usable patches
         | 
| 202 | 
            +
                    crop_window_size = crop_window_patches * base_image_input_d
         | 
| 203 | 
            +
                    tiling = select_tiling(
         | 
| 204 | 
            +
                        original_image_h - total_margin_pixels,
         | 
| 205 | 
            +
                        original_image_w - total_margin_pixels,
         | 
| 206 | 
            +
                        crop_window_size,
         | 
| 207 | 
            +
                        max_crops
         | 
| 208 | 
            +
                    )
         | 
| 209 | 
            +
                    src, img_mask = resize_and_pad(
         | 
| 210 | 
            +
                        image,
         | 
| 211 | 
            +
                        [tiling[0]*crop_window_size+total_margin_pixels, tiling[1]*crop_window_size+total_margin_pixels]
         | 
| 212 | 
            +
                    )
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                    # Now we have to split the image into crops, while keeping track of how each patch in the
         | 
| 215 | 
            +
                    # each crop should be ordered in the global image, this require a lot of tricky booking
         | 
| 216 | 
            +
                    n_crops = tiling[0] * tiling[1]
         | 
| 217 | 
            +
                    patches_arr = []
         | 
| 218 | 
            +
                    mask_arr = []
         | 
| 219 | 
            +
                    patch_ordering_arr = []
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                    # We assume 2x2 pooling, but can allow padding the right/bottom with extra
         | 
| 222 | 
            +
                    # patches if the number of patches per side is not even
         | 
| 223 | 
            +
                    assert (crop_patches+1)//2 == image_token_length_h
         | 
| 224 | 
            +
                    assert (crop_patches+1)//2 == image_token_length_w
         | 
| 225 | 
            +
                    on = 0
         | 
| 226 | 
            +
                    on_patch = 0
         | 
| 227 | 
            +
                    for i in range(tiling[0]):
         | 
| 228 | 
            +
                        y0 = i*crop_window_size
         | 
| 229 | 
            +
                        if i == 0:
         | 
| 230 | 
            +
                            crop_y0 = 0
         | 
| 231 | 
            +
                        else:
         | 
| 232 | 
            +
                            crop_y0 = left_margin // 2
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                        crop_h = image_base_patch_h - (right_margin + left_margin)
         | 
| 235 | 
            +
                        if i == 0:
         | 
| 236 | 
            +
                            crop_h += left_margin
         | 
| 237 | 
            +
                        if i == (tiling[0]-1):
         | 
| 238 | 
            +
                            crop_h += right_margin
         | 
| 239 | 
            +
                        for j in range(tiling[1]):
         | 
| 240 | 
            +
                            x0 = j*crop_window_size
         | 
| 241 | 
            +
                            if j == 0:
         | 
| 242 | 
            +
                                crop_x0 = 0
         | 
| 243 | 
            +
                            else:
         | 
| 244 | 
            +
                                crop_x0 = left_margin // 2
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                            crop_w = image_base_patch_w - (right_margin + left_margin)
         | 
| 247 | 
            +
                            if j == 0:
         | 
| 248 | 
            +
                                crop_w += left_margin
         | 
| 249 | 
            +
                            if j == (tiling[1]-1):
         | 
| 250 | 
            +
                                crop_w += right_margin
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                            pooled_w = (crop_w + 1) // 2
         | 
| 253 | 
            +
                            pooled_h = (crop_h + 1) // 2
         | 
| 254 | 
            +
                            patch_ordering_arr.append(
         | 
| 255 | 
            +
                                pad_to_bounding_box(
         | 
| 256 | 
            +
                                    np.reshape(np.arange(on, on+pooled_h*pooled_w, dtype=np.int32), (pooled_h, pooled_w, 1)),
         | 
| 257 | 
            +
                                    crop_y0, crop_x0, image_token_length_h, image_token_length_w, value=-1
         | 
| 258 | 
            +
                                )[:, :, 0]
         | 
| 259 | 
            +
                            )
         | 
| 260 | 
            +
                            patches_arr.append(src[y0:y0+crop_size, x0:x0+crop_size])
         | 
| 261 | 
            +
                            mask_arr.append(img_mask[y0:y0+crop_size, x0:x0+crop_size])
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                            on += pooled_h*pooled_w
         | 
| 264 | 
            +
                            on_patch += 1
         | 
| 265 | 
            +
                    patches = np.stack(patches_arr)
         | 
| 266 | 
            +
                    patch_ordering = np.stack(patch_ordering_arr)
         | 
| 267 | 
            +
                    img_mask = np.stack(mask_arr)
         | 
| 268 | 
            +
             | 
| 269 | 
            +
                    # Switch to [n_crops, n_patches, pixels_per_patch] format
         | 
| 270 | 
            +
                    image_layout_impatch_w, image_layout_impatch_h = tiling[0], tiling[1]
         | 
| 271 | 
            +
                    patches = einops.rearrange(
         | 
| 272 | 
            +
                        patches, 'p (h dh) (w dw) c -> p (h w) (dh dw c)',
         | 
| 273 | 
            +
                        dh=base_image_input_d,
         | 
| 274 | 
            +
                        dw=base_image_input_d,
         | 
| 275 | 
            +
                        h=image_base_patch_h,
         | 
| 276 | 
            +
                        w=image_base_patch_w
         | 
| 277 | 
            +
                    )
         | 
| 278 | 
            +
                    img_mask = einops.rearrange(
         | 
| 279 | 
            +
                        img_mask, 'p (h dh) (w dw) -> p (h w) (dh dw)',
         | 
| 280 | 
            +
                        dh=base_image_input_d,
         | 
| 281 | 
            +
                        dw=base_image_input_d,
         | 
| 282 | 
            +
                        h=image_base_patch_h,
         | 
| 283 | 
            +
                        w=image_base_patch_w
         | 
| 284 | 
            +
                    )
         | 
| 285 | 
            +
             | 
| 286 | 
            +
                    img_mask = img_mask.astype(np.float32).mean(axis=-1)
         | 
| 287 | 
            +
                    patch_ordering = np.reshape(patch_ordering, [-1])
         | 
| 288 | 
            +
                    valid = patch_ordering >= 0
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                    # Transpose order, to get left-to-right order instead of crop-by-crop order
         | 
| 291 | 
            +
                    patch_ordering_rh = np.reshape(
         | 
| 292 | 
            +
                        patch_ordering,
         | 
| 293 | 
            +
                        [tiling[0], tiling[1], image_token_length_h, image_token_length_w]
         | 
| 294 | 
            +
                    )
         | 
| 295 | 
            +
                    patch_ordering_rh = np.transpose(patch_ordering_rh, [0, 2, 1, 3])
         | 
| 296 | 
            +
                    patch_ordering_rh = np.reshape(patch_ordering_rh, [-1])
         | 
| 297 | 
            +
             | 
| 298 | 
            +
                    # The transpose will screw up which patches are masked, project the
         | 
| 299 | 
            +
                    # new order into sparse structure of `patch_ordering` to fix this
         | 
| 300 | 
            +
                    patch_ordering[valid] = patch_ordering_rh[patch_ordering_rh >= 0]
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                    # Now build the output tokens
         | 
| 303 | 
            +
                    h = tiling[0] * crop_window_patches + (right_margin+left_margin)
         | 
| 304 | 
            +
                    w = tiling[1] * crop_window_patches + (right_margin+left_margin)
         | 
| 305 | 
            +
                    per_row = np.full(
         | 
| 306 | 
            +
                        ((w+1)//2,),
         | 
| 307 | 
            +
                        image_patch_token_id,
         | 
| 308 | 
            +
                    )
         | 
| 309 | 
            +
                    per_row = np.concatenate([per_row, [image_col_token_id]], 0)
         | 
| 310 | 
            +
             | 
| 311 | 
            +
                    joint = np.tile(per_row, [(h+1)//2])
         | 
| 312 | 
            +
                    joint = [
         | 
| 313 | 
            +
                        [image_start_token_id],
         | 
| 314 | 
            +
                        joint,
         | 
| 315 | 
            +
                        [image_end_token_id]
         | 
| 316 | 
            +
                    ]
         | 
| 317 | 
            +
             | 
| 318 | 
            +
                    # Finally do the same for the global image
         | 
| 319 | 
            +
                    resized, _ = resize_and_pad(image, base_image_input_size)
         | 
| 320 | 
            +
                    resized = einops.rearrange(
         | 
| 321 | 
            +
                        resized, '(h dh) (w dw) c -> (h w) (dh dw c)',
         | 
| 322 | 
            +
                        dh=base_image_input_d,
         | 
| 323 | 
            +
                        dw=base_image_input_d,
         | 
| 324 | 
            +
                        h=image_base_patch_h,
         | 
| 325 | 
            +
                        w=image_base_patch_w
         | 
| 326 | 
            +
                    )
         | 
| 327 | 
            +
                    patches = np.concatenate([np.expand_dims(resized, 0), patches], 0)
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                    # Global image goes first, so the order of patches in previous crops gets increased
         | 
| 330 | 
            +
                    patch_ordering = np.where(
         | 
| 331 | 
            +
                        patch_ordering >= 0,
         | 
| 332 | 
            +
                        patch_ordering + tokens_per_image,
         | 
| 333 | 
            +
                        -1
         | 
| 334 | 
            +
                    )
         | 
| 335 | 
            +
                    patch_ordering = np.concatenate([np.arange(0, tokens_per_image), patch_ordering], 0)
         | 
| 336 | 
            +
                    per_row = np.full(
         | 
| 337 | 
            +
                        (image_token_length_w,),
         | 
| 338 | 
            +
                        image_patch_token_id,
         | 
| 339 | 
            +
                    )
         | 
| 340 | 
            +
                    per_row = np.concatenate([per_row, [image_col_token_id]], 0)
         | 
| 341 | 
            +
                    extra_tokens = np.tile(per_row, [image_token_length_h])
         | 
| 342 | 
            +
                    joint = [
         | 
| 343 | 
            +
                                [image_start_token_id],
         | 
| 344 | 
            +
                                extra_tokens,
         | 
| 345 | 
            +
                                [image_end_token_id],
         | 
| 346 | 
            +
                            ] + joint
         | 
| 347 | 
            +
             | 
| 348 | 
            +
                    joint = np.concatenate(joint, 0)
         | 
| 349 | 
            +
                    img_mask = np.pad(img_mask, [[0, 1], [0, 0]], constant_values=-1)
         | 
| 350 | 
            +
                    return patches, joint, patch_ordering, img_mask
         | 
| 351 | 
            +
             | 
| 352 | 
            +
                def build_image_input_idx(
         | 
| 353 | 
            +
                    self,
         | 
| 354 | 
            +
                    image_tokens: np.ndarray,
         | 
| 355 | 
            +
                    patch_order: np.ndarray,
         | 
| 356 | 
            +
                    image_patch_token_id: int,
         | 
| 357 | 
            +
                    no_image: Optional[bool] = None,
         | 
| 358 | 
            +
                    image_token_length_w: Optional[int] = None,
         | 
| 359 | 
            +
                    image_token_length_h: Optional[int] = None,
         | 
| 360 | 
            +
                ):
         | 
| 361 | 
            +
                    """Converts `patch_order` into a mapping of token_id -> patch_id"""
         | 
| 362 | 
            +
             | 
| 363 | 
            +
                    tokens_per_image = image_token_length_w * image_token_length_h
         | 
| 364 | 
            +
                    if no_image is not None and no_image:
         | 
| 365 | 
            +
                        return np.zeros((0, tokens_per_image), np.int32)
         | 
| 366 | 
            +
             | 
| 367 | 
            +
                    # Indices to insert the patches
         | 
| 368 | 
            +
                    image_input_idx = image_tokens == image_patch_token_id
         | 
| 369 | 
            +
                    image_input_idx = np.nonzero(image_input_idx)[0].astype(np.int32)
         | 
| 370 | 
            +
             | 
| 371 | 
            +
                    if patch_order is not None:
         | 
| 372 | 
            +
                        n_tokens = image_input_idx.shape[0]
         | 
| 373 | 
            +
                        patch_order = np.reshape(patch_order, [-1])
         | 
| 374 | 
            +
                        n_patches = patch_order.shape[0]
         | 
| 375 | 
            +
             | 
| 376 | 
            +
                        valid = patch_order >= 0
         | 
| 377 | 
            +
                        n_valid_patches = valid.sum()
         | 
| 378 | 
            +
                        assert len(image_input_idx) == n_valid_patches
         | 
| 379 | 
            +
             | 
| 380 | 
            +
                        sorted_patch_ixs = np.zeros([n_tokens], np.int32)
         | 
| 381 | 
            +
                        sorted_patch_ixs[patch_order[valid]] = np.arange(n_valid_patches, dtype=np.int32)
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                        # Project the inverted mapping into same sparse structure
         | 
| 384 | 
            +
                        sorted_patch_ixs_ex = np.full(np.shape(patch_order), -1)
         | 
| 385 | 
            +
                        sorted_patch_ixs_ex[valid] = sorted_patch_ixs
         | 
| 386 | 
            +
             | 
| 387 | 
            +
                        # Do the gather and then re-masked outputs that were masked in `sorted_patch_ixs`
         | 
| 388 | 
            +
                        valid = (sorted_patch_ixs_ex >= 0).astype(np.int32)
         | 
| 389 | 
            +
                        image_input_idx = image_input_idx[sorted_patch_ixs_ex*valid]
         | 
| 390 | 
            +
                        image_input_idx = image_input_idx*valid - 100*(1 - valid)
         | 
| 391 | 
            +
                        image_input_idx = np.reshape(image_input_idx, [-1, tokens_per_image])
         | 
| 392 | 
            +
                    return image_input_idx
         | 
| 393 | 
            +
             | 
| 394 | 
            +
                def preprocess(
         | 
| 395 | 
            +
                    self,
         | 
| 396 | 
            +
                    image: np.ndarray,
         | 
| 397 | 
            +
                    image_patch_token_id: int,
         | 
| 398 | 
            +
                    image_col_token_id: int,
         | 
| 399 | 
            +
                    image_start_token_id: int,
         | 
| 400 | 
            +
                    image_end_token_id: int,
         | 
| 401 | 
            +
                    max_crops: Optional[int] = None,
         | 
| 402 | 
            +
                    overlap_margins: Optional[List[int]] = None,
         | 
| 403 | 
            +
                    base_image_input_size: Optional[Union[int, List[int]]] = None,
         | 
| 404 | 
            +
                    image_token_length_w: Optional[int] = None,
         | 
| 405 | 
            +
                    image_token_length_h: Optional[int] = None,
         | 
| 406 | 
            +
                    image_patch_size: Optional[int] = None,
         | 
| 407 | 
            +
                    **kwargs,
         | 
| 408 | 
            +
                ):
         | 
| 409 | 
            +
                    """Preprocesses an image
         | 
| 410 | 
            +
             | 
| 411 | 
            +
                    Returns:
         | 
| 412 | 
            +
                        crops: (n_crops, n_patches, patch_dim) individual crops, `n_crops` might
         | 
| 413 | 
            +
                               change between images but the other dimension are fixed
         | 
| 414 | 
            +
                        tokens: (n_tokens,) int32 tokens, pad tokens indicate where to insert the
         | 
| 415 | 
            +
                                            patch features, might include other special tokens as well
         | 
| 416 | 
            +
                        image_idx: (n_crops, n_patches) index in `tokens` to put the patch features from the
         | 
| 417 | 
            +
                                   crops after pooling, negative values indicates patches features to exclude
         | 
| 418 | 
            +
                        padding_mask: (n_crops, n_patches) what percent of each crop is padding, can be None
         | 
| 419 | 
            +
                                      if the image mask is not being used.
         | 
| 420 | 
            +
                    """
         | 
| 421 | 
            +
             | 
| 422 | 
            +
                    max_crops = max_crops or self.max_crops
         | 
| 423 | 
            +
                    overlap_margins = overlap_margins or self.overlap_margins
         | 
| 424 | 
            +
                    base_image_input_size = base_image_input_size or self.base_image_input_size
         | 
| 425 | 
            +
                    image_token_length_w = image_token_length_w or self.image_token_length_w
         | 
| 426 | 
            +
                    image_token_length_h = image_token_length_h or self.image_token_length_h
         | 
| 427 | 
            +
                    image_patch_size = image_patch_size or self.image_patch_size
         | 
| 428 | 
            +
             | 
| 429 | 
            +
                    crops, image_tokens, patch_ordering, img_mask = self.image_to_patches_and_tokens(
         | 
| 430 | 
            +
                        image,
         | 
| 431 | 
            +
                        image_patch_token_id,
         | 
| 432 | 
            +
                        image_col_token_id,
         | 
| 433 | 
            +
                        image_start_token_id,
         | 
| 434 | 
            +
                        image_end_token_id,
         | 
| 435 | 
            +
                        max_crops,
         | 
| 436 | 
            +
                        overlap_margins,
         | 
| 437 | 
            +
                        base_image_input_size,
         | 
| 438 | 
            +
                        image_token_length_w,
         | 
| 439 | 
            +
                        image_token_length_h,
         | 
| 440 | 
            +
                        image_patch_size,
         | 
| 441 | 
            +
                    )
         | 
| 442 | 
            +
                    patch_idx = self.build_image_input_idx(
         | 
| 443 | 
            +
                        image_tokens,
         | 
| 444 | 
            +
                        patch_ordering,
         | 
| 445 | 
            +
                        image_patch_token_id,
         | 
| 446 | 
            +
                        image_token_length_w=image_token_length_w,
         | 
| 447 | 
            +
                        image_token_length_h=image_token_length_h,
         | 
| 448 | 
            +
                    )
         | 
| 449 | 
            +
                    return crops, image_tokens, patch_idx, img_mask
         | 
| 450 | 
            +
             | 
| 451 | 
            +
                def multimodal_preprocess(
         | 
| 452 | 
            +
                    self,
         | 
| 453 | 
            +
                    images: np.ndarray,
         | 
| 454 | 
            +
                    tokens: List[int],
         | 
| 455 | 
            +
                    image_idx: np.ndarray,
         | 
| 456 | 
            +
                    sequence_length: int,
         | 
| 457 | 
            +
                    image_patch_token_id: int,
         | 
| 458 | 
            +
                    image_col_token_id: int,
         | 
| 459 | 
            +
                    image_start_token_id: int,
         | 
| 460 | 
            +
                    image_end_token_id: int,
         | 
| 461 | 
            +
                    **kwargs,
         | 
| 462 | 
            +
                ):
         | 
| 463 | 
            +
                    """Merge images and text tokens into multi-modal features for the model
         | 
| 464 | 
            +
             | 
| 465 | 
            +
                    :param images: images to use as input
         | 
| 466 | 
            +
                    :param tokens: input text tokens
         | 
| 467 | 
            +
                    :param image_idx: where to insert the images into `tokens`
         | 
| 468 | 
            +
                    :params image_patch_token_id: id to use of tokens that will contain image features
         | 
| 469 | 
            +
                    :params image_col_token_id: token id for image column special tokens
         | 
| 470 | 
            +
                    :params image_start_token_id: token id for image start special tokens
         | 
| 471 | 
            +
                    :params image_end_token_id: token id for image end special tokens
         | 
| 472 | 
            +
                    :params kwargs: override preprocessor default args
         | 
| 473 | 
            +
                    """
         | 
| 474 | 
            +
                    max_total_crops = kwargs.get("max_crops") or self.max_crops
         | 
| 475 | 
            +
                    image_token_length_w = kwargs.get("image_token_length_w") or self.image_token_length_w
         | 
| 476 | 
            +
                    image_token_length_h = kwargs.get("image_token_length_h") or self.image_token_length_h
         | 
| 477 | 
            +
                    image_patch_size = kwargs.get("image_patch_size") or self.image_patch_size
         | 
| 478 | 
            +
                    base_image_input_size = kwargs.get("base_image_input_size") or self.base_image_input_size
         | 
| 479 | 
            +
                    image_num_patch = (
         | 
| 480 | 
            +
                        base_image_input_size[0] // image_patch_size,
         | 
| 481 | 
            +
                        base_image_input_size[1] // image_patch_size,
         | 
| 482 | 
            +
                    )
         | 
| 483 | 
            +
                    image_padding_mask = kwargs.get("image_padding_mask") or self.image_padding_mask
         | 
| 484 | 
            +
             | 
| 485 | 
            +
                    tokens_per_image = image_token_length_w * image_token_length_h
         | 
| 486 | 
            +
                    n_pixels = image_patch_size * image_patch_size * 3
         | 
| 487 | 
            +
                    n_patches = image_num_patch[0] * image_num_patch[1]
         | 
| 488 | 
            +
             | 
| 489 | 
            +
                    if images is None:
         | 
| 490 | 
            +
                        return {
         | 
| 491 | 
            +
                            "input_ids": tokens,
         | 
| 492 | 
            +
                        }
         | 
| 493 | 
            +
                    else:
         | 
| 494 | 
            +
                        n = len(images)
         | 
| 495 | 
            +
                        all_crops = []
         | 
| 496 | 
            +
                        all_image_idx = []
         | 
| 497 | 
            +
                        out_tokens = []
         | 
| 498 | 
            +
                        all_crop_masks = []
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                        for ix in range(n):
         | 
| 501 | 
            +
                            token_ix = image_idx[ix]
         | 
| 502 | 
            +
                            crops, image_tokens, patch_idx, img_mask = self.preprocess(
         | 
| 503 | 
            +
                                images[ix],
         | 
| 504 | 
            +
                                image_patch_token_id,
         | 
| 505 | 
            +
                                image_col_token_id,
         | 
| 506 | 
            +
                                image_start_token_id,
         | 
| 507 | 
            +
                                image_end_token_id,
         | 
| 508 | 
            +
                                **kwargs,
         | 
| 509 | 
            +
                            )
         | 
| 510 | 
            +
             | 
| 511 | 
            +
                            if token_ix == -1:  # -1 is an image inserted at the very start
         | 
| 512 | 
            +
                                start = 0
         | 
| 513 | 
            +
                                token_ix = 0
         | 
| 514 | 
            +
                                end = 0
         | 
| 515 | 
            +
                            else:
         | 
| 516 | 
            +
                                start = 0 if ix == 0 else image_idx[ix-1] + 1
         | 
| 517 | 
            +
                                end = token_ix + 1
         | 
| 518 | 
            +
             | 
| 519 | 
            +
                            all_image_idx.append(patch_idx + token_ix)
         | 
| 520 | 
            +
                            all_crops.append(crops)
         | 
| 521 | 
            +
                            out_tokens.append(tokens[start:token_ix])
         | 
| 522 | 
            +
                            out_tokens.append(image_tokens)
         | 
| 523 | 
            +
                            if ix == (n - 1):
         | 
| 524 | 
            +
                                out_tokens.append(tokens[end:])
         | 
| 525 | 
            +
                            if image_padding_mask:
         | 
| 526 | 
            +
                                all_crop_masks.append(img_mask)
         | 
| 527 | 
            +
             | 
| 528 | 
            +
                        input_ids = np.concatenate(out_tokens, 0)
         | 
| 529 | 
            +
                        images = np.concatenate(all_crops, 0)
         | 
| 530 | 
            +
                        image_input_idx = np.concatenate(all_image_idx, 0)
         | 
| 531 | 
            +
                        if image_padding_mask:
         | 
| 532 | 
            +
                            image_masks = np.concatenate(all_crop_masks, 0)
         | 
| 533 | 
            +
                        else:
         | 
| 534 | 
            +
                            image_masks = None
         | 
| 535 | 
            +
             | 
| 536 | 
            +
                    out = {
         | 
| 537 | 
            +
                        "input_ids": input_ids,
         | 
| 538 | 
            +
                        "images": images,
         | 
| 539 | 
            +
                        "image_input_idx": image_input_idx
         | 
| 540 | 
            +
                    }
         | 
| 541 | 
            +
                    if image_masks is not None:
         | 
| 542 | 
            +
                        out["image_masks"] = image_masks
         | 
| 543 | 
            +
                    return out
         | 
| 544 | 
            +
             | 
| 545 | 
            +
             | 
| 546 | 
            +
            MolmoImageProcessor.register_for_auto_class()
         | 
    	
        latest
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            global_step3600
         | 
    	
        merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        model-00001-of-00004.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:62f7d62f4713918048f18acfd5b227ba5844e00b6810214e74e25aae1ef1d2a9
         | 
| 3 | 
            +
            size 4981346544
         | 
    	
        model-00002-of-00004.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:404d0c3c2b4cd9b779b9cf63374dab018e50d2d54795d43e37b75beea90c1f1a
         | 
| 3 | 
            +
            size 4991475304
         | 
    	
        model-00003-of-00004.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1d206eb27e99e7147c5202c8ae8e5fc4303ecb4e937a46abc9e171abae278578
         | 
| 3 | 
            +
            size 4169357528
         | 
    	
        model-00004-of-00004.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:83c551d02f5e280d5fb1e32d0b0b88ba5d0f54dbfd36ed3681d35eee598458ad
         | 
| 3 | 
            +
            size 1899952568
         | 
    	
        model.safetensors.index.json
    ADDED
    
    | @@ -0,0 +1,592 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "metadata": {
         | 
| 3 | 
            +
                "total_size": 16042050560
         | 
| 4 | 
            +
              },
         | 
| 5 | 
            +
              "weight_map": {
         | 
| 6 | 
            +
                "model.transformer.blocks.0.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 7 | 
            +
                "model.transformer.blocks.0.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 8 | 
            +
                "model.transformer.blocks.0.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 9 | 
            +
                "model.transformer.blocks.0.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 10 | 
            +
                "model.transformer.blocks.0.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 11 | 
            +
                "model.transformer.blocks.0.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 12 | 
            +
                "model.transformer.blocks.0.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 13 | 
            +
                "model.transformer.blocks.1.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 14 | 
            +
                "model.transformer.blocks.1.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 15 | 
            +
                "model.transformer.blocks.1.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 16 | 
            +
                "model.transformer.blocks.1.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 17 | 
            +
                "model.transformer.blocks.1.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 18 | 
            +
                "model.transformer.blocks.1.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 19 | 
            +
                "model.transformer.blocks.1.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 20 | 
            +
                "model.transformer.blocks.10.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 21 | 
            +
                "model.transformer.blocks.10.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 22 | 
            +
                "model.transformer.blocks.10.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 23 | 
            +
                "model.transformer.blocks.10.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 24 | 
            +
                "model.transformer.blocks.10.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 25 | 
            +
                "model.transformer.blocks.10.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 26 | 
            +
                "model.transformer.blocks.10.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 27 | 
            +
                "model.transformer.blocks.11.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 28 | 
            +
                "model.transformer.blocks.11.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 29 | 
            +
                "model.transformer.blocks.11.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 30 | 
            +
                "model.transformer.blocks.11.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 31 | 
            +
                "model.transformer.blocks.11.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 32 | 
            +
                "model.transformer.blocks.11.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 33 | 
            +
                "model.transformer.blocks.11.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 34 | 
            +
                "model.transformer.blocks.12.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 35 | 
            +
                "model.transformer.blocks.12.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 36 | 
            +
                "model.transformer.blocks.12.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 37 | 
            +
                "model.transformer.blocks.12.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 38 | 
            +
                "model.transformer.blocks.12.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 39 | 
            +
                "model.transformer.blocks.12.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 40 | 
            +
                "model.transformer.blocks.12.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 41 | 
            +
                "model.transformer.blocks.13.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 42 | 
            +
                "model.transformer.blocks.13.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 43 | 
            +
                "model.transformer.blocks.13.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 44 | 
            +
                "model.transformer.blocks.13.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 45 | 
            +
                "model.transformer.blocks.13.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 46 | 
            +
                "model.transformer.blocks.13.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 47 | 
            +
                "model.transformer.blocks.13.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 48 | 
            +
                "model.transformer.blocks.14.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 49 | 
            +
                "model.transformer.blocks.14.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 50 | 
            +
                "model.transformer.blocks.14.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 51 | 
            +
                "model.transformer.blocks.14.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 52 | 
            +
                "model.transformer.blocks.14.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 53 | 
            +
                "model.transformer.blocks.14.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 54 | 
            +
                "model.transformer.blocks.14.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 55 | 
            +
                "model.transformer.blocks.15.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 56 | 
            +
                "model.transformer.blocks.15.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 57 | 
            +
                "model.transformer.blocks.15.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 58 | 
            +
                "model.transformer.blocks.15.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 59 | 
            +
                "model.transformer.blocks.15.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 60 | 
            +
                "model.transformer.blocks.15.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 61 | 
            +
                "model.transformer.blocks.15.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 62 | 
            +
                "model.transformer.blocks.16.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 63 | 
            +
                "model.transformer.blocks.16.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 64 | 
            +
                "model.transformer.blocks.16.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 65 | 
            +
                "model.transformer.blocks.16.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 66 | 
            +
                "model.transformer.blocks.16.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 67 | 
            +
                "model.transformer.blocks.16.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 68 | 
            +
                "model.transformer.blocks.16.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 69 | 
            +
                "model.transformer.blocks.17.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 70 | 
            +
                "model.transformer.blocks.17.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 71 | 
            +
                "model.transformer.blocks.17.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 72 | 
            +
                "model.transformer.blocks.17.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 73 | 
            +
                "model.transformer.blocks.17.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 74 | 
            +
                "model.transformer.blocks.17.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 75 | 
            +
                "model.transformer.blocks.17.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 76 | 
            +
                "model.transformer.blocks.18.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 77 | 
            +
                "model.transformer.blocks.18.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 78 | 
            +
                "model.transformer.blocks.18.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 79 | 
            +
                "model.transformer.blocks.18.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 80 | 
            +
                "model.transformer.blocks.18.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 81 | 
            +
                "model.transformer.blocks.18.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 82 | 
            +
                "model.transformer.blocks.18.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 83 | 
            +
                "model.transformer.blocks.19.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 84 | 
            +
                "model.transformer.blocks.19.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 85 | 
            +
                "model.transformer.blocks.19.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 86 | 
            +
                "model.transformer.blocks.19.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 87 | 
            +
                "model.transformer.blocks.19.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 88 | 
            +
                "model.transformer.blocks.19.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 89 | 
            +
                "model.transformer.blocks.19.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 90 | 
            +
                "model.transformer.blocks.2.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 91 | 
            +
                "model.transformer.blocks.2.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 92 | 
            +
                "model.transformer.blocks.2.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 93 | 
            +
                "model.transformer.blocks.2.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 94 | 
            +
                "model.transformer.blocks.2.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 95 | 
            +
                "model.transformer.blocks.2.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 96 | 
            +
                "model.transformer.blocks.2.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 97 | 
            +
                "model.transformer.blocks.20.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 98 | 
            +
                "model.transformer.blocks.20.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 99 | 
            +
                "model.transformer.blocks.20.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 100 | 
            +
                "model.transformer.blocks.20.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 101 | 
            +
                "model.transformer.blocks.20.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 102 | 
            +
                "model.transformer.blocks.20.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 103 | 
            +
                "model.transformer.blocks.20.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 104 | 
            +
                "model.transformer.blocks.21.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 105 | 
            +
                "model.transformer.blocks.21.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 106 | 
            +
                "model.transformer.blocks.21.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 107 | 
            +
                "model.transformer.blocks.21.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 108 | 
            +
                "model.transformer.blocks.21.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 109 | 
            +
                "model.transformer.blocks.21.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 110 | 
            +
                "model.transformer.blocks.21.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 111 | 
            +
                "model.transformer.blocks.22.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 112 | 
            +
                "model.transformer.blocks.22.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 113 | 
            +
                "model.transformer.blocks.22.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 114 | 
            +
                "model.transformer.blocks.22.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 115 | 
            +
                "model.transformer.blocks.22.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 116 | 
            +
                "model.transformer.blocks.22.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 117 | 
            +
                "model.transformer.blocks.22.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 118 | 
            +
                "model.transformer.blocks.23.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 119 | 
            +
                "model.transformer.blocks.23.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 120 | 
            +
                "model.transformer.blocks.23.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 121 | 
            +
                "model.transformer.blocks.23.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 122 | 
            +
                "model.transformer.blocks.23.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 123 | 
            +
                "model.transformer.blocks.23.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 124 | 
            +
                "model.transformer.blocks.23.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 125 | 
            +
                "model.transformer.blocks.24.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 126 | 
            +
                "model.transformer.blocks.24.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 127 | 
            +
                "model.transformer.blocks.24.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 128 | 
            +
                "model.transformer.blocks.24.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 129 | 
            +
                "model.transformer.blocks.24.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 130 | 
            +
                "model.transformer.blocks.24.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 131 | 
            +
                "model.transformer.blocks.24.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 132 | 
            +
                "model.transformer.blocks.25.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 133 | 
            +
                "model.transformer.blocks.25.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 134 | 
            +
                "model.transformer.blocks.25.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 135 | 
            +
                "model.transformer.blocks.25.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 136 | 
            +
                "model.transformer.blocks.25.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 137 | 
            +
                "model.transformer.blocks.25.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 138 | 
            +
                "model.transformer.blocks.25.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 139 | 
            +
                "model.transformer.blocks.26.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 140 | 
            +
                "model.transformer.blocks.26.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 141 | 
            +
                "model.transformer.blocks.26.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 142 | 
            +
                "model.transformer.blocks.26.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 143 | 
            +
                "model.transformer.blocks.26.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 144 | 
            +
                "model.transformer.blocks.26.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 145 | 
            +
                "model.transformer.blocks.26.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 146 | 
            +
                "model.transformer.blocks.27.att_proj.bias": "model-00003-of-00004.safetensors",
         | 
| 147 | 
            +
                "model.transformer.blocks.27.att_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 148 | 
            +
                "model.transformer.blocks.27.attn_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 149 | 
            +
                "model.transformer.blocks.27.attn_out.weight": "model-00003-of-00004.safetensors",
         | 
| 150 | 
            +
                "model.transformer.blocks.27.ff_norm.weight": "model-00003-of-00004.safetensors",
         | 
| 151 | 
            +
                "model.transformer.blocks.27.ff_out.weight": "model-00003-of-00004.safetensors",
         | 
| 152 | 
            +
                "model.transformer.blocks.27.ff_proj.weight": "model-00003-of-00004.safetensors",
         | 
| 153 | 
            +
                "model.transformer.blocks.3.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 154 | 
            +
                "model.transformer.blocks.3.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 155 | 
            +
                "model.transformer.blocks.3.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 156 | 
            +
                "model.transformer.blocks.3.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 157 | 
            +
                "model.transformer.blocks.3.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 158 | 
            +
                "model.transformer.blocks.3.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 159 | 
            +
                "model.transformer.blocks.3.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 160 | 
            +
                "model.transformer.blocks.4.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 161 | 
            +
                "model.transformer.blocks.4.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 162 | 
            +
                "model.transformer.blocks.4.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 163 | 
            +
                "model.transformer.blocks.4.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 164 | 
            +
                "model.transformer.blocks.4.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 165 | 
            +
                "model.transformer.blocks.4.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 166 | 
            +
                "model.transformer.blocks.4.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 167 | 
            +
                "model.transformer.blocks.5.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 168 | 
            +
                "model.transformer.blocks.5.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 169 | 
            +
                "model.transformer.blocks.5.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 170 | 
            +
                "model.transformer.blocks.5.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 171 | 
            +
                "model.transformer.blocks.5.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 172 | 
            +
                "model.transformer.blocks.5.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 173 | 
            +
                "model.transformer.blocks.5.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 174 | 
            +
                "model.transformer.blocks.6.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 175 | 
            +
                "model.transformer.blocks.6.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 176 | 
            +
                "model.transformer.blocks.6.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 177 | 
            +
                "model.transformer.blocks.6.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 178 | 
            +
                "model.transformer.blocks.6.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 179 | 
            +
                "model.transformer.blocks.6.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 180 | 
            +
                "model.transformer.blocks.6.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 181 | 
            +
                "model.transformer.blocks.7.att_proj.bias": "model-00001-of-00004.safetensors",
         | 
| 182 | 
            +
                "model.transformer.blocks.7.att_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 183 | 
            +
                "model.transformer.blocks.7.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 184 | 
            +
                "model.transformer.blocks.7.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 185 | 
            +
                "model.transformer.blocks.7.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 186 | 
            +
                "model.transformer.blocks.7.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 187 | 
            +
                "model.transformer.blocks.7.ff_proj.weight": "model-00001-of-00004.safetensors",
         | 
| 188 | 
            +
                "model.transformer.blocks.8.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 189 | 
            +
                "model.transformer.blocks.8.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 190 | 
            +
                "model.transformer.blocks.8.attn_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 191 | 
            +
                "model.transformer.blocks.8.attn_out.weight": "model-00001-of-00004.safetensors",
         | 
| 192 | 
            +
                "model.transformer.blocks.8.ff_norm.weight": "model-00001-of-00004.safetensors",
         | 
| 193 | 
            +
                "model.transformer.blocks.8.ff_out.weight": "model-00001-of-00004.safetensors",
         | 
| 194 | 
            +
                "model.transformer.blocks.8.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 195 | 
            +
                "model.transformer.blocks.9.att_proj.bias": "model-00002-of-00004.safetensors",
         | 
| 196 | 
            +
                "model.transformer.blocks.9.att_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 197 | 
            +
                "model.transformer.blocks.9.attn_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 198 | 
            +
                "model.transformer.blocks.9.attn_out.weight": "model-00002-of-00004.safetensors",
         | 
| 199 | 
            +
                "model.transformer.blocks.9.ff_norm.weight": "model-00002-of-00004.safetensors",
         | 
| 200 | 
            +
                "model.transformer.blocks.9.ff_out.weight": "model-00002-of-00004.safetensors",
         | 
| 201 | 
            +
                "model.transformer.blocks.9.ff_proj.weight": "model-00002-of-00004.safetensors",
         | 
| 202 | 
            +
                "model.transformer.ff_out.weight": "model-00004-of-00004.safetensors",
         | 
| 203 | 
            +
                "model.transformer.ln_f.weight": "model-00001-of-00004.safetensors",
         | 
| 204 | 
            +
                "model.transformer.wte.embedding": "model-00001-of-00004.safetensors",
         | 
| 205 | 
            +
                "model.transformer.wte.new_embedding": "model-00001-of-00004.safetensors",
         | 
| 206 | 
            +
                "model.vision_backbone.image_pooling_2d.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 207 | 
            +
                "model.vision_backbone.image_pooling_2d.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 208 | 
            +
                "model.vision_backbone.image_pooling_2d.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 209 | 
            +
                "model.vision_backbone.image_pooling_2d.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 210 | 
            +
                "model.vision_backbone.image_pooling_2d.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 211 | 
            +
                "model.vision_backbone.image_pooling_2d.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 212 | 
            +
                "model.vision_backbone.image_pooling_2d.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 213 | 
            +
                "model.vision_backbone.image_pooling_2d.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 214 | 
            +
                "model.vision_backbone.image_projector.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 215 | 
            +
                "model.vision_backbone.image_projector.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 216 | 
            +
                "model.vision_backbone.image_projector.w3.weight": "model-00004-of-00004.safetensors",
         | 
| 217 | 
            +
                "model.vision_backbone.image_vit.class_embedding": "model-00004-of-00004.safetensors",
         | 
| 218 | 
            +
                "model.vision_backbone.image_vit.patch_embedding.weight": "model-00004-of-00004.safetensors",
         | 
| 219 | 
            +
                "model.vision_backbone.image_vit.positional_embedding": "model-00004-of-00004.safetensors",
         | 
| 220 | 
            +
                "model.vision_backbone.image_vit.pre_ln.bias": "model-00004-of-00004.safetensors",
         | 
| 221 | 
            +
                "model.vision_backbone.image_vit.pre_ln.weight": "model-00004-of-00004.safetensors",
         | 
| 222 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 223 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 224 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 225 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 226 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 227 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 228 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 229 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 230 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 231 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 232 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 233 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 234 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 235 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 236 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 237 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 238 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 239 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 240 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 241 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 242 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 243 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 244 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 245 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 246 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 247 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 248 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 249 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 250 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 251 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 252 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 253 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 254 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 255 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 256 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 257 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 258 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 259 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 260 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 261 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 262 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 263 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 264 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 265 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 266 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 267 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 268 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 269 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 270 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 271 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 272 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 273 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 274 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 275 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 276 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 277 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 278 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 279 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 280 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 281 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 282 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 283 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 284 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 285 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 286 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 287 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 288 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 289 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 290 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 291 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 292 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 293 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 294 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 295 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 296 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 297 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 298 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 299 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 300 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 301 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 302 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 303 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 304 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 305 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 306 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 307 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 308 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 309 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 310 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 311 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 312 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 313 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 314 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 315 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 316 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 317 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 318 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 319 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 320 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 321 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 322 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 323 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 324 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 325 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 326 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 327 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 328 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 329 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 330 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 331 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 332 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 333 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 334 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 335 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 336 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 337 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 338 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 339 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 340 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 341 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 342 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 343 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 344 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 345 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 346 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 347 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 348 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 349 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 350 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 351 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 352 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 353 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 354 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 355 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 356 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 357 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 358 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 359 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 360 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 361 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 362 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 363 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 364 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 365 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 366 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 367 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 368 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 369 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 370 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 371 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 372 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 373 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 374 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 375 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 376 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 377 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 378 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 379 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 380 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 381 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 382 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 383 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 384 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 385 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 386 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 387 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 388 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 389 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 390 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 391 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 392 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 393 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 394 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 395 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 396 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 397 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 398 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 399 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 400 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 401 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 402 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 403 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 404 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 405 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 406 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 407 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 408 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 409 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 410 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 411 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 412 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 413 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 414 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 415 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 416 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 417 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 418 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 419 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 420 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 421 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 422 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 423 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 424 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 425 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 426 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 427 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 428 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 429 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 430 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 431 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 432 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 433 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 434 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 435 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 436 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 437 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 438 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 439 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 440 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 441 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 442 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 443 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 444 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 445 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 446 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 447 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 448 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 449 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 450 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 451 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 452 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 453 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 454 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 455 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 456 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 457 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 458 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 459 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 460 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 461 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 462 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 463 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 464 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 465 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 466 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 467 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 468 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 469 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 470 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 471 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 472 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 473 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 474 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 475 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 476 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 477 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 478 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 479 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 480 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 481 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 482 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 483 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 484 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 485 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 486 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 487 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 488 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 489 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 490 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 491 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 492 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 493 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 494 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 495 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 496 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 497 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 498 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 499 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 500 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 501 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 502 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 503 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 504 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 505 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 506 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 507 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 508 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 509 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 510 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 511 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 512 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 513 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 514 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 515 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 516 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 517 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 518 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 519 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 520 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 521 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 522 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 523 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 524 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 525 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 526 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 527 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 528 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 529 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 530 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 531 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 532 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 533 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 534 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 535 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 536 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 537 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 538 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 539 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 540 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 541 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 542 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 543 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 544 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 545 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 546 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 547 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 548 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 549 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 550 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 551 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 552 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 553 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 554 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 555 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 556 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 557 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 558 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 559 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 560 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 561 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 562 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 563 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 564 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 565 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 566 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 567 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 568 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 569 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 570 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 571 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 572 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 573 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 574 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk.bias": "model-00004-of-00004.safetensors",
         | 
| 575 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk.weight": "model-00004-of-00004.safetensors",
         | 
| 576 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo.bias": "model-00004-of-00004.safetensors",
         | 
| 577 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo.weight": "model-00004-of-00004.safetensors",
         | 
| 578 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq.bias": "model-00004-of-00004.safetensors",
         | 
| 579 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq.weight": "model-00004-of-00004.safetensors",
         | 
| 580 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv.bias": "model-00004-of-00004.safetensors",
         | 
| 581 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv.weight": "model-00004-of-00004.safetensors",
         | 
| 582 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 583 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 584 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1.bias": "model-00004-of-00004.safetensors",
         | 
| 585 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1.weight": "model-00004-of-00004.safetensors",
         | 
| 586 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2.bias": "model-00004-of-00004.safetensors",
         | 
| 587 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2.weight": "model-00004-of-00004.safetensors",
         | 
| 588 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm.bias": "model-00004-of-00004.safetensors",
         | 
| 589 | 
            +
                "model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm.weight": "model-00004-of-00004.safetensors",
         | 
| 590 | 
            +
                "model.vision_backbone.pad_embed": "model-00004-of-00004.safetensors"
         | 
| 591 | 
            +
              }
         | 
| 592 | 
            +
            }
         | 
    	
        modeling_molmo.py
    ADDED
    
    | @@ -0,0 +1,2367 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import logging
         | 
| 2 | 
            +
            import math
         | 
| 3 | 
            +
            from copy import deepcopy
         | 
| 4 | 
            +
            from dataclasses import fields, dataclass, replace
         | 
| 5 | 
            +
            from enum import Enum
         | 
| 6 | 
            +
            from typing import List, Optional, Tuple, Union, Dict, Any, Sequence, Callable, cast, MutableMapping
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            import torch
         | 
| 9 | 
            +
            from einops import einsum, einops
         | 
| 10 | 
            +
            from transformers import PreTrainedModel, GenerationConfig
         | 
| 11 | 
            +
            from transformers.cache_utils import Cache
         | 
| 12 | 
            +
            from transformers.modeling_outputs import CausalLMOutputWithPast, ModelOutput
         | 
| 13 | 
            +
            from transformers.models.auto import AutoModelForCausalLM
         | 
| 14 | 
            +
            from torch import nn
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            from .config_molmo import MolmoConfig
         | 
| 17 | 
            +
            from torch.nn import functional as F
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            log = logging.getLogger(__name__)
         | 
| 21 | 
            +
             | 
| 22 | 
            +
             | 
| 23 | 
            +
            class BufferCache(dict, MutableMapping[str, torch.Tensor]):
         | 
| 24 | 
            +
                """
         | 
| 25 | 
            +
                Cache for attention biases and other things that would normally be stored as buffers.
         | 
| 26 | 
            +
                We avoid using buffers because we've run into various issues doing so with FSDP.
         | 
| 27 | 
            +
                In general it appears the way FSDP handles buffers is not well-defined.
         | 
| 28 | 
            +
                It doesn't shard them but apparently it does synchronize them across processes, which we want to avoid
         | 
| 29 | 
            +
                since (A) it isn't necessary, and (B) we sometimes have `-inf` in these biases which might get turned into
         | 
| 30 | 
            +
                NaNs when they're synchronized due to casting or some other issue.
         | 
| 31 | 
            +
                """
         | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
            class StrEnum(str, Enum):
         | 
| 35 | 
            +
                def __str__(self) -> str:
         | 
| 36 | 
            +
                    return self.value
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def __repr__(self) -> str:
         | 
| 39 | 
            +
                    return f"'{str(self)}'"
         | 
| 40 | 
            +
             | 
| 41 | 
            +
             | 
| 42 | 
            +
            class ImageProjectType(StrEnum):
         | 
| 43 | 
            +
                mlp = "mlp"
         | 
| 44 | 
            +
                mlpx2 = "2mlp"
         | 
| 45 | 
            +
                linear = "linear"
         | 
| 46 | 
            +
             | 
| 47 | 
            +
             | 
| 48 | 
            +
            class ImagePooling2DType(StrEnum):
         | 
| 49 | 
            +
                attention = "attention"
         | 
| 50 | 
            +
                attention_meanq = "attention-meanq"
         | 
| 51 | 
            +
                attention_2wide = "attention_2wide"
         | 
| 52 | 
            +
                attention_v2 = "attention-v2"
         | 
| 53 | 
            +
                none = "none"
         | 
| 54 | 
            +
                stack = "stack"
         | 
| 55 | 
            +
             | 
| 56 | 
            +
             | 
| 57 | 
            +
            class ActivationType(StrEnum):
         | 
| 58 | 
            +
                quick_gelu = "quick_gelu"
         | 
| 59 | 
            +
                gelu = "gelu"
         | 
| 60 | 
            +
                gelu_tanh = "gelu_tanh"
         | 
| 61 | 
            +
                relu = "relu"
         | 
| 62 | 
            +
                silu = "silu"
         | 
| 63 | 
            +
                llama_geglu = "llama_geglu"
         | 
| 64 | 
            +
                llama_geglu_tanh = "llama_geglu_tanh"
         | 
| 65 | 
            +
                llama_swiglu = "llama_swiglu"
         | 
| 66 | 
            +
                swiglu = "swiglu"
         | 
| 67 | 
            +
             | 
| 68 | 
            +
             | 
| 69 | 
            +
            def ensure_finite_(x: torch.Tensor, check_neg_inf: bool = True, check_pos_inf: bool = False):
         | 
| 70 | 
            +
                """
         | 
| 71 | 
            +
                Modify ``x`` in place to replace ``float("-inf")`` with the minimum value of the dtype when ``check_neg_inf``
         | 
| 72 | 
            +
                is ``True`` and to replace ``float("inf")`` with the maximum value of the dtype when ``check_pos_inf`` is ``True``.
         | 
| 73 | 
            +
                """
         | 
| 74 | 
            +
                if check_neg_inf:
         | 
| 75 | 
            +
                    x.masked_fill_(x == float("-inf"), torch.finfo(x.dtype).min)
         | 
| 76 | 
            +
                if check_pos_inf:
         | 
| 77 | 
            +
                    x.masked_fill_(x == float("inf"), torch.finfo(x.dtype).max)
         | 
| 78 | 
            +
             | 
| 79 | 
            +
             | 
| 80 | 
            +
            class MolmoConfigurationError(Exception):
         | 
| 81 | 
            +
                pass
         | 
| 82 | 
            +
             | 
| 83 | 
            +
             | 
| 84 | 
            +
            def _non_meta_init_device(config) -> torch.device:
         | 
| 85 | 
            +
                if config.init_device is not None and config.init_device != "meta":
         | 
| 86 | 
            +
                    return torch.device(config.init_device)
         | 
| 87 | 
            +
                else:
         | 
| 88 | 
            +
                    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 89 | 
            +
             | 
| 90 | 
            +
             | 
| 91 | 
            +
            class RotaryEmbedding(nn.Module):
         | 
| 92 | 
            +
                """
         | 
| 93 | 
            +
                [Rotary positional embeddings (RoPE)](https://arxiv.org/abs/2104.09864).
         | 
| 94 | 
            +
                """
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                def __init__(self, config: MolmoConfig, cache: BufferCache):
         | 
| 97 | 
            +
                    super().__init__()
         | 
| 98 | 
            +
                    self.config = config
         | 
| 99 | 
            +
                    self.__cache = cache
         | 
| 100 | 
            +
                    # Warm up cache.
         | 
| 101 | 
            +
                    self.get_rotary_embedding(
         | 
| 102 | 
            +
                        config.max_position_embeddings or config.max_sequence_length,
         | 
| 103 | 
            +
                        _non_meta_init_device(config)
         | 
| 104 | 
            +
                    )
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                def get_rotary_embedding(self, seq_len: int, device: torch.device) -> Tuple[torch.Tensor, torch.Tensor]:
         | 
| 107 | 
            +
                    if (
         | 
| 108 | 
            +
                        (pos_sin := self.__cache.get("rope_pos_sin")) is not None
         | 
| 109 | 
            +
                        and (pos_cos := self.__cache.get("rope_pos_cos")) is not None
         | 
| 110 | 
            +
                        and pos_sin.shape[-2] >= seq_len
         | 
| 111 | 
            +
                        and pos_cos.shape[-2] >= seq_len
         | 
| 112 | 
            +
                    ):
         | 
| 113 | 
            +
                        if pos_sin.device != device:
         | 
| 114 | 
            +
                            pos_sin = pos_sin.to(device)
         | 
| 115 | 
            +
                            self.__cache["rope_pos_sin"] = pos_sin
         | 
| 116 | 
            +
                        if pos_cos.device != device:
         | 
| 117 | 
            +
                            pos_cos = pos_cos.to(device)
         | 
| 118 | 
            +
                            self.__cache["rope_pos_cos"] = pos_cos
         | 
| 119 | 
            +
                        return pos_sin[:, :, :seq_len, :], pos_cos[:, :, :seq_len, :]
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                    with torch.autocast(device.type, enabled=False):
         | 
| 122 | 
            +
                        dim = self.config.d_model // self.config.n_heads
         | 
| 123 | 
            +
                        inv_freq = 1.0 / (self.config.rope_theta ** (torch.arange(0, dim, 2, device=device, dtype=torch.float) / dim))
         | 
| 124 | 
            +
                        seq = torch.arange(seq_len, device=device, dtype=torch.float)
         | 
| 125 | 
            +
                        freqs = torch.einsum("i , j -> i j", seq, inv_freq)
         | 
| 126 | 
            +
                        if self.config.rope_impl == "interleave":
         | 
| 127 | 
            +
                            positions = freqs.repeat_interleave(2, dim=-1)
         | 
| 128 | 
            +
                        else:
         | 
| 129 | 
            +
                            positions = torch.cat((freqs, freqs), dim=-1)
         | 
| 130 | 
            +
                        pos_sin, pos_cos = positions.sin()[None, None, :, :], positions.cos()[None, None, :, :]
         | 
| 131 | 
            +
                    self.__cache["rope_pos_sin"] = pos_sin
         | 
| 132 | 
            +
                    self.__cache["rope_pos_cos"] = pos_cos
         | 
| 133 | 
            +
                    return pos_sin, pos_cos
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                def rotate_half(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 136 | 
            +
                    B, nh, T, hs = x.size()
         | 
| 137 | 
            +
                    x = x.view(B, nh, T, 2, hs // 2)
         | 
| 138 | 
            +
                    x1, x2 = x.unbind(dim=-2)
         | 
| 139 | 
            +
                    return torch.cat((-x2, x1), dim=-1)
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                def rotate_every_two(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 142 | 
            +
                    B, nh, T, hs = x.size()
         | 
| 143 | 
            +
                    x = x.view(B, nh, T, hs // 2, 2)
         | 
| 144 | 
            +
                    x1, x2 = x.unbind(dim=-1)
         | 
| 145 | 
            +
                    x = torch.stack((-x2, x1), dim=-1)
         | 
| 146 | 
            +
                    return x.view(B, nh, T, hs)
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                def apply_rotary_pos_emb(self, pos_sin: torch.Tensor, pos_cos: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
         | 
| 149 | 
            +
                    if self.config.rope_impl == "interleave":
         | 
| 150 | 
            +
                        return ((t * pos_cos) + (self.rotate_every_two(t) * pos_sin)).to(t.dtype)
         | 
| 151 | 
            +
                    else:
         | 
| 152 | 
            +
                        return ((t * pos_cos) + (self.rotate_half(t) * pos_sin)).to(t.dtype)
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                def forward(
         | 
| 155 | 
            +
                    self,
         | 
| 156 | 
            +
                    q: torch.Tensor,
         | 
| 157 | 
            +
                    k: torch.Tensor,
         | 
| 158 | 
            +
                    position_ids: Optional[torch.Tensor] = None
         | 
| 159 | 
            +
                ) -> Tuple[torch.Tensor, torch.Tensor]:
         | 
| 160 | 
            +
                    if self.config.rope_full_precision:
         | 
| 161 | 
            +
                        q_, k_ = q.float(), k.float()
         | 
| 162 | 
            +
                    else:
         | 
| 163 | 
            +
                        q_, k_ = q, k
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                    with torch.autocast(q.device.type, enabled=False):
         | 
| 166 | 
            +
                        batch_size = q_.shape[0]
         | 
| 167 | 
            +
                        query_len, key_len = q_.shape[-2], k_.shape[-2]  # could be different if layer_past not None
         | 
| 168 | 
            +
                        if position_ids is not None:
         | 
| 169 | 
            +
                            freqs_cis_len = (self.config.max_position_embeddings or self.config.max_sequence_length)
         | 
| 170 | 
            +
                        else:
         | 
| 171 | 
            +
                            freqs_cis_len = key_len
         | 
| 172 | 
            +
                        pos_sin, pos_cos = self.get_rotary_embedding(freqs_cis_len, q_.device)
         | 
| 173 | 
            +
                        pos_sin = pos_sin.type_as(q_)
         | 
| 174 | 
            +
                        pos_cos = pos_cos.type_as(q_)
         | 
| 175 | 
            +
                        if position_ids is not None:
         | 
| 176 | 
            +
                            assert query_len == key_len, "Query and key lengths must be equal when using position IDs."
         | 
| 177 | 
            +
                            pos_sin = pos_sin[0, 0][position_ids].view(
         | 
| 178 | 
            +
                                (batch_size, 1, key_len, pos_sin.shape[-1])
         | 
| 179 | 
            +
                            )
         | 
| 180 | 
            +
                            pos_cos = pos_cos[0, 0][position_ids].view(
         | 
| 181 | 
            +
                                (batch_size, 1, key_len, pos_cos.shape[-1])
         | 
| 182 | 
            +
                            )
         | 
| 183 | 
            +
                        q_ = self.apply_rotary_pos_emb(
         | 
| 184 | 
            +
                            pos_sin[:, :, key_len - query_len : key_len, :],
         | 
| 185 | 
            +
                            pos_cos[:, :, key_len - query_len : key_len, :],
         | 
| 186 | 
            +
                            q_,
         | 
| 187 | 
            +
                        )
         | 
| 188 | 
            +
                        k_ = self.apply_rotary_pos_emb(pos_sin, pos_cos, k_)
         | 
| 189 | 
            +
                    return q_.type_as(q), k_.type_as(k)
         | 
| 190 | 
            +
             | 
| 191 | 
            +
             | 
| 192 | 
            +
            class MolmoBlock(nn.Module):
         | 
| 193 | 
            +
                """
         | 
| 194 | 
            +
                A base class for transformer block implementations.
         | 
| 195 | 
            +
                """
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                def __init__(self, layer_id: int, config: MolmoConfig, cache: BufferCache):
         | 
| 198 | 
            +
                    super().__init__()
         | 
| 199 | 
            +
                    self.layer_id = layer_id
         | 
| 200 | 
            +
                    self.config = config
         | 
| 201 | 
            +
                    self.hidden_size = (
         | 
| 202 | 
            +
                        config.mlp_hidden_size if config.mlp_hidden_size is not None else config.mlp_ratio * config.d_model
         | 
| 203 | 
            +
                    )
         | 
| 204 | 
            +
                    self.__cache = cache
         | 
| 205 | 
            +
                    self._activation_checkpoint_fn = None
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                    # Dropout.
         | 
| 208 | 
            +
                    self.dropout = Dropout(config.residual_dropout)
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                    # Layer norms.
         | 
| 211 | 
            +
                    self.k_norm: Optional[LayerNormBase] = None
         | 
| 212 | 
            +
                    self.q_norm: Optional[LayerNormBase] = None
         | 
| 213 | 
            +
                    if config.attention_layer_norm:
         | 
| 214 | 
            +
                        assert config.effective_n_kv_heads is not None
         | 
| 215 | 
            +
                        self.k_norm = LayerNormBase.build(
         | 
| 216 | 
            +
                            config,
         | 
| 217 | 
            +
                            size=(config.d_model // config.n_heads) * config.effective_n_kv_heads,
         | 
| 218 | 
            +
                            elementwise_affine=config.attention_layer_norm_with_affine,
         | 
| 219 | 
            +
                        )
         | 
| 220 | 
            +
                        self.q_norm = LayerNormBase.build(config, elementwise_affine=config.attention_layer_norm_with_affine)
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                    # Make sure QKV clip coefficient is positive, otherwise it's not well-defined.
         | 
| 223 | 
            +
                    if config.clip_qkv is not None:
         | 
| 224 | 
            +
                        assert config.clip_qkv > 0
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                    # Activation function.
         | 
| 227 | 
            +
                    self.act = Activation.build(config)
         | 
| 228 | 
            +
                    assert (self.act.output_multiplier * self.hidden_size) % 1 == 0
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    # Attention output projection.
         | 
| 231 | 
            +
                    input_dim = config.d_model
         | 
| 232 | 
            +
                    self.attn_out = nn.Linear(
         | 
| 233 | 
            +
                        input_dim, config.d_model,
         | 
| 234 | 
            +
                        bias=config.include_bias,
         | 
| 235 | 
            +
                        device=config.init_device
         | 
| 236 | 
            +
                    )
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                    # Feed-forward output projection.
         | 
| 239 | 
            +
                    self.ff_out = nn.Linear(
         | 
| 240 | 
            +
                        int(self.act.output_multiplier * self.hidden_size),
         | 
| 241 | 
            +
                        config.d_model,
         | 
| 242 | 
            +
                        bias=config.include_bias,
         | 
| 243 | 
            +
                        device=config.init_device,
         | 
| 244 | 
            +
                    )
         | 
| 245 | 
            +
                    self.ff_out._is_residual = True  # type: ignore
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                    # Rotary embeddings.
         | 
| 248 | 
            +
                    if self.config.rope:
         | 
| 249 | 
            +
                        self.rotary_emb = RotaryEmbedding(config, self.__cache)
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                    self.flash_attn_func = None
         | 
| 252 | 
            +
                    if config.attention_type == "flash":
         | 
| 253 | 
            +
                        try:
         | 
| 254 | 
            +
                            from flash_attn import flash_attn_func  # type: ignore
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                            self.flash_attn_func = flash_attn_func
         | 
| 257 | 
            +
                        except ModuleNotFoundError:
         | 
| 258 | 
            +
                            pass
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                def reset_parameters(self):
         | 
| 261 | 
            +
                    if self.k_norm is not None:
         | 
| 262 | 
            +
                        self.k_norm.reset_parameters()
         | 
| 263 | 
            +
                    if self.q_norm is not None:
         | 
| 264 | 
            +
                        self.q_norm.reset_parameters()
         | 
| 265 | 
            +
                    init_weights(
         | 
| 266 | 
            +
                        self.config,
         | 
| 267 | 
            +
                        self.attn_out,
         | 
| 268 | 
            +
                        d=self.config.d_model,
         | 
| 269 | 
            +
                        layer_id=self.layer_id,
         | 
| 270 | 
            +
                        type_of_module=ModuleType.out_module,
         | 
| 271 | 
            +
                    )
         | 
| 272 | 
            +
                    init_weights(
         | 
| 273 | 
            +
                        self.config,
         | 
| 274 | 
            +
                        self.ff_out,
         | 
| 275 | 
            +
                        d=self.ff_out.in_features,
         | 
| 276 | 
            +
                        layer_id=self.layer_id,
         | 
| 277 | 
            +
                        type_of_module=ModuleType.out_module,
         | 
| 278 | 
            +
                    )
         | 
| 279 | 
            +
             | 
| 280 | 
            +
                @classmethod
         | 
| 281 | 
            +
                def _cast_attn_bias(cls, bias: torch.Tensor, input_dtype: torch.dtype) -> torch.Tensor:
         | 
| 282 | 
            +
                    target_dtype = input_dtype
         | 
| 283 | 
            +
                    # NOTE: `is_autocast_enabled()` only checks for CUDA autocast, so we use the separate function
         | 
| 284 | 
            +
                    # `is_autocast_cpu_enabled()` for CPU autocast.
         | 
| 285 | 
            +
                    # See https://github.com/pytorch/pytorch/issues/110966.
         | 
| 286 | 
            +
                    if bias.device.type == "cuda" and torch.is_autocast_enabled():
         | 
| 287 | 
            +
                        target_dtype = torch.get_autocast_gpu_dtype()
         | 
| 288 | 
            +
                    elif bias.device.type == "cpu" and torch.is_autocast_cpu_enabled():
         | 
| 289 | 
            +
                        target_dtype = torch.get_autocast_cpu_dtype()
         | 
| 290 | 
            +
                    if bias.dtype != target_dtype:
         | 
| 291 | 
            +
                        bias = bias.to(target_dtype)
         | 
| 292 | 
            +
                        ensure_finite_(bias, check_neg_inf=True, check_pos_inf=False)
         | 
| 293 | 
            +
                    return bias
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                def _scaled_dot_product_attention(
         | 
| 296 | 
            +
                    self,
         | 
| 297 | 
            +
                    q: torch.Tensor,
         | 
| 298 | 
            +
                    k: torch.Tensor,
         | 
| 299 | 
            +
                    v: torch.Tensor,
         | 
| 300 | 
            +
                    attn_mask: Optional[torch.Tensor] = None,
         | 
| 301 | 
            +
                    dropout_p: float = 0.0,
         | 
| 302 | 
            +
                    response_dropout_p: float = 0.0,
         | 
| 303 | 
            +
                    is_causal: bool = False,
         | 
| 304 | 
            +
                ) -> torch.Tensor:
         | 
| 305 | 
            +
                    """
         | 
| 306 | 
            +
                    Computes scaled dot product attention on query, key and value tensors, using an optional
         | 
| 307 | 
            +
                    attention mask if passed, and applying dropout if a probability greater than 0.0 is specified.
         | 
| 308 | 
            +
                    """
         | 
| 309 | 
            +
                    if attn_mask is not None:
         | 
| 310 | 
            +
                        attn_mask = attn_mask.to(q.device)
         | 
| 311 | 
            +
             | 
| 312 | 
            +
                    if self.flash_attn_func is not None and attn_mask is None:
         | 
| 313 | 
            +
                        r = self.flash_attn_func(
         | 
| 314 | 
            +
                            q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), dropout_p=dropout_p, causal=is_causal
         | 
| 315 | 
            +
                        )
         | 
| 316 | 
            +
                        return r.transpose(1, 2)
         | 
| 317 | 
            +
                    else:
         | 
| 318 | 
            +
                        # torch's sdpa doesn't support GQA, so we're doing this
         | 
| 319 | 
            +
                        assert k.size(1) == v.size(1)
         | 
| 320 | 
            +
                        num_kv_heads = k.size(1)
         | 
| 321 | 
            +
                        num_q_heads = q.size(1)
         | 
| 322 | 
            +
                        if num_q_heads != num_kv_heads:
         | 
| 323 | 
            +
                            assert num_q_heads % num_kv_heads == 0
         | 
| 324 | 
            +
                            k = k.repeat_interleave(num_q_heads // num_kv_heads, dim=1, output_size=num_q_heads)
         | 
| 325 | 
            +
                            v = v.repeat_interleave(num_q_heads // num_kv_heads, dim=1, output_size=num_q_heads)
         | 
| 326 | 
            +
             | 
| 327 | 
            +
                        return F.scaled_dot_product_attention(
         | 
| 328 | 
            +
                            q,
         | 
| 329 | 
            +
                            k,
         | 
| 330 | 
            +
                            v,
         | 
| 331 | 
            +
                            attn_mask=attn_mask,
         | 
| 332 | 
            +
                            dropout_p=dropout_p,
         | 
| 333 | 
            +
                            is_causal=is_causal,
         | 
| 334 | 
            +
                        )
         | 
| 335 | 
            +
             | 
| 336 | 
            +
                def attention(
         | 
| 337 | 
            +
                    self,
         | 
| 338 | 
            +
                    q: torch.Tensor,
         | 
| 339 | 
            +
                    k: torch.Tensor,
         | 
| 340 | 
            +
                    v: torch.Tensor,
         | 
| 341 | 
            +
                    attention_bias: Optional[torch.Tensor] = None,
         | 
| 342 | 
            +
                    position_ids: Optional[torch.Tensor] = None,
         | 
| 343 | 
            +
                    layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
         | 
| 344 | 
            +
                    use_cache: bool = False,
         | 
| 345 | 
            +
                ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
         | 
| 346 | 
            +
                    B, T, C = q.size()  # batch size, sequence length, d_model
         | 
| 347 | 
            +
                    dtype = k.dtype
         | 
| 348 | 
            +
             | 
| 349 | 
            +
                    # Optionally apply layer norm to keys and queries.
         | 
| 350 | 
            +
                    if self.q_norm is not None and self.k_norm is not None:
         | 
| 351 | 
            +
                        q = self.q_norm(q).to(dtype=dtype)
         | 
| 352 | 
            +
                        k = self.k_norm(k).to(dtype=dtype)
         | 
| 353 | 
            +
             | 
| 354 | 
            +
                    # Move head forward to be next to the batch dim.
         | 
| 355 | 
            +
                    # shape: (B, nh, T, hs)
         | 
| 356 | 
            +
                    q = q.view(B, T, self.config.n_heads, C // self.config.n_heads).transpose(1, 2)
         | 
| 357 | 
            +
                    # shape: (B, n_kv_h, T, hs)
         | 
| 358 | 
            +
                    k = k.view(B, T, self.config.effective_n_kv_heads, C // self.config.n_heads).transpose(1, 2)
         | 
| 359 | 
            +
                    # shape: (B, n_kv_h, T, hs)
         | 
| 360 | 
            +
                    v = v.view(B, T, self.config.effective_n_kv_heads, C // self.config.n_heads).transpose(1, 2)
         | 
| 361 | 
            +
             | 
| 362 | 
            +
                    if self.config.use_position_ids and self.config.rope:
         | 
| 363 | 
            +
                        # Apply rotary embeddings
         | 
| 364 | 
            +
                        q, k = self.rotary_emb(q, k, position_ids=position_ids)
         | 
| 365 | 
            +
             | 
| 366 | 
            +
                    if layer_past is not None:
         | 
| 367 | 
            +
                        past_key, past_value = layer_past
         | 
| 368 | 
            +
                        k = torch.cat((past_key.to(k.device), k), dim=-2)
         | 
| 369 | 
            +
                        v = torch.cat((past_value.to(v.device), v), dim=-2)
         | 
| 370 | 
            +
             | 
| 371 | 
            +
                    present = (k, v) if use_cache else None
         | 
| 372 | 
            +
                    query_len, key_len = q.shape[-2], k.shape[-2]  # could be different if layer_past not None
         | 
| 373 | 
            +
             | 
| 374 | 
            +
                    if not self.config.use_position_ids and self.config.rope:
         | 
| 375 | 
            +
                        # Apply rotary embeddings
         | 
| 376 | 
            +
                        q, k = self.rotary_emb(q, k)
         | 
| 377 | 
            +
             | 
| 378 | 
            +
                    if attention_bias is not None:
         | 
| 379 | 
            +
                        # Resize and cast attention bias.
         | 
| 380 | 
            +
                        # The current dtype of the attention bias might not match the dtype that the SDP attn function will
         | 
| 381 | 
            +
                        # run in if AMP is enabled, and this can be a problem if some tokens are masked out due to padding
         | 
| 382 | 
            +
                        # as down-casting the attention bias to the autocast precision will result in -infs, which will
         | 
| 383 | 
            +
                        # cause the SDP attn function to produce NaNs.
         | 
| 384 | 
            +
                        attention_bias = self._cast_attn_bias(
         | 
| 385 | 
            +
                            attention_bias[:, :, key_len - query_len : key_len, :key_len], dtype
         | 
| 386 | 
            +
                        )
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                    # Get the attention scores.
         | 
| 389 | 
            +
                    # shape: (B, nh, T, hs)
         | 
| 390 | 
            +
                    att = self._scaled_dot_product_attention(
         | 
| 391 | 
            +
                        q,
         | 
| 392 | 
            +
                        k,
         | 
| 393 | 
            +
                        v,
         | 
| 394 | 
            +
                        attn_mask=attention_bias,
         | 
| 395 | 
            +
                        dropout_p=0.0 if not self.training else self.config.attention_dropout,
         | 
| 396 | 
            +
                        response_dropout_p=0.0 if not self.training else self.config.response_attention_dropout,
         | 
| 397 | 
            +
                        is_causal=attention_bias is None,
         | 
| 398 | 
            +
                    )
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                    # Re-assemble all head outputs side-by-side.
         | 
| 401 | 
            +
                    att = att.transpose(1, 2).contiguous().view(B, T, C)
         | 
| 402 | 
            +
             | 
| 403 | 
            +
                    # Apply output projection.
         | 
| 404 | 
            +
                    return self.attn_out(att), present
         | 
| 405 | 
            +
             | 
| 406 | 
            +
                def forward(
         | 
| 407 | 
            +
                    self,
         | 
| 408 | 
            +
                    x: torch.Tensor,
         | 
| 409 | 
            +
                    attention_bias: Optional[torch.FloatTensor] = None,
         | 
| 410 | 
            +
                    position_ids: Optional[torch.Tensor] = None,
         | 
| 411 | 
            +
                    layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
         | 
| 412 | 
            +
                    use_cache: bool = False,
         | 
| 413 | 
            +
                ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
         | 
| 414 | 
            +
                    raise NotImplementedError
         | 
| 415 | 
            +
             | 
| 416 | 
            +
                @classmethod
         | 
| 417 | 
            +
                def build(cls, layer_id: int, config: MolmoConfig, cache: BufferCache):
         | 
| 418 | 
            +
                    return MolmoSequentialBlock(layer_id, config, cache)
         | 
| 419 | 
            +
             | 
| 420 | 
            +
             | 
| 421 | 
            +
            class MolmoSequentialBlock(MolmoBlock):
         | 
| 422 | 
            +
                """
         | 
| 423 | 
            +
                This is a typical transformer block where the output is computed as ``MLP(LN(x + Attention(LN(x))))``
         | 
| 424 | 
            +
                (plus another skip connection).
         | 
| 425 | 
            +
                """
         | 
| 426 | 
            +
             | 
| 427 | 
            +
                def __init__(self, layer_id: int, config: MolmoConfig, cache: BufferCache):
         | 
| 428 | 
            +
                    super().__init__(layer_id, config, cache)
         | 
| 429 | 
            +
                    # Layer norms.
         | 
| 430 | 
            +
                    self.attn_norm = LayerNorm.build(config)
         | 
| 431 | 
            +
                    self.ff_norm = LayerNorm.build(config)
         | 
| 432 | 
            +
                    # Attention input projection. Projects x -> (q, k, v)
         | 
| 433 | 
            +
             | 
| 434 | 
            +
                    head_dim = config.d_model // config.n_heads
         | 
| 435 | 
            +
                    self.fused_dims = (
         | 
| 436 | 
            +
                        config.d_model,
         | 
| 437 | 
            +
                        config.effective_n_kv_heads * head_dim,
         | 
| 438 | 
            +
                        config.effective_n_kv_heads * head_dim,
         | 
| 439 | 
            +
                    )
         | 
| 440 | 
            +
                    self.att_proj = nn.Linear(
         | 
| 441 | 
            +
                        config.d_model, sum(self.fused_dims),
         | 
| 442 | 
            +
                        bias=config.include_bias or config.qkv_bias,
         | 
| 443 | 
            +
                        device=config.init_device
         | 
| 444 | 
            +
                    )
         | 
| 445 | 
            +
                    # Feed-forward input projection.
         | 
| 446 | 
            +
                    self.ff_proj = nn.Linear(
         | 
| 447 | 
            +
                        config.d_model, self.hidden_size, bias=config.include_bias, device=config.init_device
         | 
| 448 | 
            +
                    )
         | 
| 449 | 
            +
             | 
| 450 | 
            +
                def reset_parameters(self):
         | 
| 451 | 
            +
                    super().reset_parameters()
         | 
| 452 | 
            +
                    self.attn_norm.reset_parameters()
         | 
| 453 | 
            +
                    self.ff_norm.reset_parameters()
         | 
| 454 | 
            +
                    # NOTE: the standard deviation for these weights does not depend on the layer.
         | 
| 455 | 
            +
                    init_weights(
         | 
| 456 | 
            +
                        self.config, self.att_proj, d=self.config.d_model, layer_id=None, type_of_module=ModuleType.in_module
         | 
| 457 | 
            +
                    )
         | 
| 458 | 
            +
                    init_weights(
         | 
| 459 | 
            +
                        self.config, self.ff_proj, d=self.config.d_model, layer_id=None, type_of_module=ModuleType.in_module
         | 
| 460 | 
            +
                    )
         | 
| 461 | 
            +
             | 
| 462 | 
            +
                def forward(
         | 
| 463 | 
            +
                    self,
         | 
| 464 | 
            +
                    x: torch.Tensor,
         | 
| 465 | 
            +
                    attention_bias: Optional[torch.Tensor] = None,
         | 
| 466 | 
            +
                    position_ids: Optional[torch.Tensor] = None,
         | 
| 467 | 
            +
                    layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
         | 
| 468 | 
            +
                    use_cache: bool = False,
         | 
| 469 | 
            +
                ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
         | 
| 470 | 
            +
                    # Get query, key, value projections.
         | 
| 471 | 
            +
                    # shape:
         | 
| 472 | 
            +
                    #  - for regular attn q, k, v: (batch_size, seq_len, d_model)
         | 
| 473 | 
            +
                    #  - for multi-query attn q: (batch_size, seq_len, d_model)
         | 
| 474 | 
            +
                    #                      k, v: (batch_size, seq_len, d_model // n_heads)
         | 
| 475 | 
            +
                    #  - for group query attn q: (batch_size, seq_len, d_model)
         | 
| 476 | 
            +
                    #                      k, v: (batch_size, seq_len, d_model // n_kv_heads)
         | 
| 477 | 
            +
             | 
| 478 | 
            +
                    if not self.config.norm_after:
         | 
| 479 | 
            +
                        if self._activation_checkpoint_fn is not None:
         | 
| 480 | 
            +
                            atten_in = self._activation_checkpoint_fn(self.attn_norm, x)
         | 
| 481 | 
            +
                        else:
         | 
| 482 | 
            +
                            atten_in = self.attn_norm(x)
         | 
| 483 | 
            +
                    else:
         | 
| 484 | 
            +
                        atten_in = x
         | 
| 485 | 
            +
                    qkv = self.att_proj(atten_in)
         | 
| 486 | 
            +
             | 
| 487 | 
            +
                    if self.config.clip_qkv is not None:
         | 
| 488 | 
            +
                        qkv.clamp_(min=-self.config.clip_qkv, max=self.config.clip_qkv)
         | 
| 489 | 
            +
             | 
| 490 | 
            +
                    q, k, v = qkv.split(self.fused_dims, dim=-1)
         | 
| 491 | 
            +
             | 
| 492 | 
            +
                    # Get attention scores.
         | 
| 493 | 
            +
                    if self._activation_checkpoint_fn is not None:
         | 
| 494 | 
            +
                        att, cache = self._activation_checkpoint_fn(  # type: ignore
         | 
| 495 | 
            +
                            self.attention, q, k, v, attention_bias, position_ids=position_ids, layer_past=layer_past, use_cache=use_cache
         | 
| 496 | 
            +
                        )
         | 
| 497 | 
            +
                    else:
         | 
| 498 | 
            +
                        att, cache = self.attention(q, k, v, attention_bias, position_ids=position_ids, layer_past=layer_past, use_cache=use_cache)
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                    if self.config.norm_after:
         | 
| 501 | 
            +
                        if self._activation_checkpoint_fn is not None:
         | 
| 502 | 
            +
                            att = self._activation_checkpoint_fn(self.attn_norm, att)
         | 
| 503 | 
            +
                        else:
         | 
| 504 | 
            +
                            att = self.attn_norm(att)
         | 
| 505 | 
            +
             | 
| 506 | 
            +
                    # Add attention scores.
         | 
| 507 | 
            +
                    # shape: (B, T, C)
         | 
| 508 | 
            +
                    x = x + self.dropout(att)
         | 
| 509 | 
            +
             | 
| 510 | 
            +
                    # Add feed-forward projection.
         | 
| 511 | 
            +
                    # shape: (batch_size, seq_len, d_model)
         | 
| 512 | 
            +
                    og_x = x
         | 
| 513 | 
            +
             | 
| 514 | 
            +
                    if not self.config.norm_after:
         | 
| 515 | 
            +
                        if self._activation_checkpoint_fn is not None:
         | 
| 516 | 
            +
                            x = self._activation_checkpoint_fn(self.ff_norm, x)  # type: ignore
         | 
| 517 | 
            +
                        else:
         | 
| 518 | 
            +
                            x = self.ff_norm(x)
         | 
| 519 | 
            +
             | 
| 520 | 
            +
                    x = self.ff_proj(x)
         | 
| 521 | 
            +
                    if self._activation_checkpoint_fn is not None:
         | 
| 522 | 
            +
                        x = self._activation_checkpoint_fn(self.act, x)  # type: ignore
         | 
| 523 | 
            +
                    else:
         | 
| 524 | 
            +
                        x = self.act(x)
         | 
| 525 | 
            +
                    x = self.ff_out(x)
         | 
| 526 | 
            +
             | 
| 527 | 
            +
                    if self.config.norm_after:
         | 
| 528 | 
            +
                        if self._activation_checkpoint_fn is not None:
         | 
| 529 | 
            +
                            x = self._activation_checkpoint_fn(self.ff_norm, x)  # type: ignore
         | 
| 530 | 
            +
                        else:
         | 
| 531 | 
            +
                            x = self.ff_norm(x)
         | 
| 532 | 
            +
             | 
| 533 | 
            +
                    x = self.dropout(x)
         | 
| 534 | 
            +
                    x = og_x + x
         | 
| 535 | 
            +
             | 
| 536 | 
            +
                    return x, cache
         | 
| 537 | 
            +
             | 
| 538 | 
            +
             | 
| 539 | 
            +
            class Embedding(nn.Module):
         | 
| 540 | 
            +
                def __init__(
         | 
| 541 | 
            +
                    self,
         | 
| 542 | 
            +
                    num_embeddings: int,
         | 
| 543 | 
            +
                    num_new_embeddings: int,
         | 
| 544 | 
            +
                    features: int,
         | 
| 545 | 
            +
                    device: Union[str, torch.device],
         | 
| 546 | 
            +
                    initializer_range: float = 0.02,
         | 
| 547 | 
            +
                    new_embed_initializer_range: float = 0.02,
         | 
| 548 | 
            +
                ):
         | 
| 549 | 
            +
                    super().__init__()
         | 
| 550 | 
            +
                    self.initializer_range = initializer_range
         | 
| 551 | 
            +
                    self.new_embed_initializer_range = new_embed_initializer_range
         | 
| 552 | 
            +
                    self.embedding = nn.Parameter(
         | 
| 553 | 
            +
                        torch.zeros(num_embeddings, features, device=device),
         | 
| 554 | 
            +
                    )
         | 
| 555 | 
            +
                    self.new_embedding = nn.Parameter(
         | 
| 556 | 
            +
                        torch.zeros(num_new_embeddings, features, device=device),
         | 
| 557 | 
            +
                    )
         | 
| 558 | 
            +
             | 
| 559 | 
            +
                def reset_parameters(self):
         | 
| 560 | 
            +
                    nn.init.normal_(self.embedding, std=self.initializer_range)
         | 
| 561 | 
            +
                    nn.init.normal_(self.new_embedding, std=self.new_embed_initializer_range)
         | 
| 562 | 
            +
             | 
| 563 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 564 | 
            +
                    return F.embedding(x, torch.cat([self.embedding, self.new_embedding], dim=0))
         | 
| 565 | 
            +
             | 
| 566 | 
            +
             | 
| 567 | 
            +
            class Dropout(nn.Dropout):
         | 
| 568 | 
            +
                def __init__(
         | 
| 569 | 
            +
                    self,
         | 
| 570 | 
            +
                    p: float = 0.5,
         | 
| 571 | 
            +
                    inplace: bool = False,
         | 
| 572 | 
            +
                    mask_p: float = 0,
         | 
| 573 | 
            +
                    broadcast_dims: Sequence[int] = (),
         | 
| 574 | 
            +
                ):
         | 
| 575 | 
            +
                    super().__init__(p, inplace)
         | 
| 576 | 
            +
                    self.mask_p = mask_p
         | 
| 577 | 
            +
                    self.broadcast_dims = broadcast_dims
         | 
| 578 | 
            +
             | 
| 579 | 
            +
                def forward(self, input: torch.Tensor) -> torch.Tensor:
         | 
| 580 | 
            +
                    """
         | 
| 581 | 
            +
                    :param input: A tensor of shape `(batch_size, seq_len, embed_dim)`
         | 
| 582 | 
            +
                    """
         | 
| 583 | 
            +
                    if self.p == 0.0 and (self.mask_p is None or self.mask_p == 0.0):
         | 
| 584 | 
            +
                        return input
         | 
| 585 | 
            +
                    else:
         | 
| 586 | 
            +
                        if self.p > 0. and len(self.broadcast_dims) > 0 and self.training:
         | 
| 587 | 
            +
                            keep_prob = 1.0 - self.p
         | 
| 588 | 
            +
                            dropout_shape = list(input.shape)
         | 
| 589 | 
            +
                            for dim in self.broadcast_dims:
         | 
| 590 | 
            +
                                dropout_shape[dim] = 1
         | 
| 591 | 
            +
                            keep = input.new_empty(dropout_shape).bernoulli_(keep_prob)
         | 
| 592 | 
            +
                            multiplier = keep.broadcast_to(input.shape)
         | 
| 593 | 
            +
                            multiplier.div_(keep_prob)
         | 
| 594 | 
            +
                            input = input * multiplier
         | 
| 595 | 
            +
                        else:
         | 
| 596 | 
            +
                            return F.dropout(input, self.p, self.training, self.inplace)
         | 
| 597 | 
            +
             | 
| 598 | 
            +
             | 
| 599 | 
            +
            @dataclass
         | 
| 600 | 
            +
            class VisionBackboneConfig:
         | 
| 601 | 
            +
                image_default_input_size: Tuple[int, int] = (336, 336)
         | 
| 602 | 
            +
                image_patch_size: int = 14
         | 
| 603 | 
            +
                image_pos_patch_size: int = 14
         | 
| 604 | 
            +
                image_emb_dim: int = 1024
         | 
| 605 | 
            +
                image_num_heads: int = 16
         | 
| 606 | 
            +
                image_num_key_value_heads: int = 16
         | 
| 607 | 
            +
                image_num_layers: int = 24
         | 
| 608 | 
            +
                image_head_dim: int = 64
         | 
| 609 | 
            +
                image_mlp_dim: int = 4096
         | 
| 610 | 
            +
                image_mlp_activations: str = "gelu"
         | 
| 611 | 
            +
                image_dropout_rate: float = 0.0
         | 
| 612 | 
            +
                image_num_pos: int = 577
         | 
| 613 | 
            +
                image_norm_eps: float = 1e-5
         | 
| 614 | 
            +
                attention_dropout: float = 0.0
         | 
| 615 | 
            +
                residual_dropout: float = 0.0
         | 
| 616 | 
            +
                initializer_range: float = 0.02
         | 
| 617 | 
            +
                fsdp_wrap: bool = False
         | 
| 618 | 
            +
                resize_mode: str = "default"
         | 
| 619 | 
            +
             | 
| 620 | 
            +
                def __post_init__(self):
         | 
| 621 | 
            +
                    self.image_default_input_size = tuple(self.image_default_input_size)  # type: ignore[assignment]
         | 
| 622 | 
            +
             | 
| 623 | 
            +
                @property
         | 
| 624 | 
            +
                def image_num_patch(self):
         | 
| 625 | 
            +
                    h, w = self.image_default_input_size
         | 
| 626 | 
            +
                    return h // self.image_patch_size, w // self.image_patch_size
         | 
| 627 | 
            +
             | 
| 628 | 
            +
             | 
| 629 | 
            +
            @dataclass
         | 
| 630 | 
            +
            class FullMolmoConfig:
         | 
| 631 | 
            +
                d_model: int = 768
         | 
| 632 | 
            +
                n_heads: int = 12
         | 
| 633 | 
            +
                n_kv_heads: Optional[int] = None
         | 
| 634 | 
            +
                qkv_bias: bool = False
         | 
| 635 | 
            +
                clip_qkv: Optional[float] = None
         | 
| 636 | 
            +
                n_layers: int = 12
         | 
| 637 | 
            +
                mlp_ratio: int = 4
         | 
| 638 | 
            +
                mlp_hidden_size: Optional[int] = None
         | 
| 639 | 
            +
                activation_type: str = "swiglu"
         | 
| 640 | 
            +
                block_group_size: int = 1
         | 
| 641 | 
            +
                rope: bool = True
         | 
| 642 | 
            +
                rope_full_precision: bool = True
         | 
| 643 | 
            +
                rope_theta: float = 10000.
         | 
| 644 | 
            +
                rope_impl: str = "interleave"
         | 
| 645 | 
            +
                vision_backbone: Optional[VisionBackboneConfig] = None
         | 
| 646 | 
            +
                attention_type: str = "sdpa"
         | 
| 647 | 
            +
                float32_attention: bool = True
         | 
| 648 | 
            +
                attention_dropout: float = 0.1
         | 
| 649 | 
            +
                response_attention_dropout: float = 0.0
         | 
| 650 | 
            +
                multi_query_attention: Optional[bool] = None
         | 
| 651 | 
            +
                attention_layer_norm: bool = False
         | 
| 652 | 
            +
                residual_dropout: float = 0.1
         | 
| 653 | 
            +
                embedding_dropout: float = 0.1
         | 
| 654 | 
            +
                layer_norm_type: str = "default"
         | 
| 655 | 
            +
                layer_norm_with_affine: bool = True
         | 
| 656 | 
            +
                layer_norm_eps: Optional[float] = None
         | 
| 657 | 
            +
                attention_layer_norm_with_affine: bool = True
         | 
| 658 | 
            +
                max_sequence_length: int = 1024
         | 
| 659 | 
            +
                max_position_embeddings: Optional[int] = None
         | 
| 660 | 
            +
                include_bias: bool = True
         | 
| 661 | 
            +
                bias_for_layer_norm: Optional[bool] = None
         | 
| 662 | 
            +
                scale_logits: bool = False
         | 
| 663 | 
            +
                vocab_size: int = 50257
         | 
| 664 | 
            +
                embedding_size: Optional[int] = 50304
         | 
| 665 | 
            +
                additional_vocab_size: Optional[int] = None
         | 
| 666 | 
            +
                new_embedding_init_range: float = 0.02
         | 
| 667 | 
            +
                weight_tying: bool = True
         | 
| 668 | 
            +
                pad_token_id: int = -1
         | 
| 669 | 
            +
                init_device: Optional[str] = None
         | 
| 670 | 
            +
                init_std: float = 0.02
         | 
| 671 | 
            +
                init_cutoff_factor: Optional[float] = None
         | 
| 672 | 
            +
                norm_after: bool = False
         | 
| 673 | 
            +
                precision: Optional[str] = None
         | 
| 674 | 
            +
                image_padding_embed: Optional[str] = None
         | 
| 675 | 
            +
                vit_layers: Tuple = (-1,)
         | 
| 676 | 
            +
                image_pooling_h: int = 2
         | 
| 677 | 
            +
                image_pooling_w: int = 2
         | 
| 678 | 
            +
                image_pooling_2d: str = "attention"
         | 
| 679 | 
            +
                image_projector: str = "mlp"
         | 
| 680 | 
            +
                image_feature_dropout: float = 0.0
         | 
| 681 | 
            +
                initializer_range: float = 0.02
         | 
| 682 | 
            +
                normalize_input_embeds: bool = False
         | 
| 683 | 
            +
                use_position_ids: bool = True
         | 
| 684 | 
            +
             | 
| 685 | 
            +
                @property
         | 
| 686 | 
            +
                def effective_n_kv_heads(self) -> int:
         | 
| 687 | 
            +
                    if self.n_kv_heads is None:
         | 
| 688 | 
            +
                        if self.multi_query_attention is True:
         | 
| 689 | 
            +
                            return 1
         | 
| 690 | 
            +
                        else:
         | 
| 691 | 
            +
                            return self.n_heads
         | 
| 692 | 
            +
                    else:
         | 
| 693 | 
            +
                        if self.multi_query_attention is None:
         | 
| 694 | 
            +
                            return self.n_kv_heads
         | 
| 695 | 
            +
                        if self.multi_query_attention:
         | 
| 696 | 
            +
                            n_kv_heads_should_be = 1
         | 
| 697 | 
            +
                        else:
         | 
| 698 | 
            +
                            n_kv_heads_should_be = self.n_heads
         | 
| 699 | 
            +
                        if self.n_kv_heads == n_kv_heads_should_be:
         | 
| 700 | 
            +
                            return n_kv_heads_should_be
         | 
| 701 | 
            +
                        else:
         | 
| 702 | 
            +
                            raise MolmoConfigurationError(
         | 
| 703 | 
            +
                                "You can't set `multi_query_attention` and `n_kv_heads` at the same time."
         | 
| 704 | 
            +
                            )
         | 
| 705 | 
            +
             | 
| 706 | 
            +
                @property
         | 
| 707 | 
            +
                def image_num_patch(self):
         | 
| 708 | 
            +
                    assert self.vision_backbone is not None
         | 
| 709 | 
            +
                    return self.vision_backbone.image_num_patch
         | 
| 710 | 
            +
             | 
| 711 | 
            +
                @property
         | 
| 712 | 
            +
                def image_patch_size(self):
         | 
| 713 | 
            +
                    assert self.vision_backbone is not None
         | 
| 714 | 
            +
                    return self.visoin_backbone.image_patch_size
         | 
| 715 | 
            +
             | 
| 716 | 
            +
                def llm_patches_per_crop(self):
         | 
| 717 | 
            +
                    h, w = self.image_num_patch
         | 
| 718 | 
            +
                    # Round up in case we need to pad the image features for pooling
         | 
| 719 | 
            +
                    h = (h + self.image_pooling_h - 1) // self.image_pooling_h
         | 
| 720 | 
            +
                    w = (w + self.image_pooling_w - 1) // self.image_pooling_w
         | 
| 721 | 
            +
                    return h, w
         | 
| 722 | 
            +
             | 
| 723 | 
            +
             | 
| 724 | 
            +
            def _expand_token(token, batch_size: int):
         | 
| 725 | 
            +
                return token.view(1, 1, -1).expand(batch_size, -1, -1)
         | 
| 726 | 
            +
             | 
| 727 | 
            +
             | 
| 728 | 
            +
            class ViTMLP(nn.Module):
         | 
| 729 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 730 | 
            +
                    super().__init__()
         | 
| 731 | 
            +
                    self.config = config
         | 
| 732 | 
            +
                    v_cfg = config.vision_backbone
         | 
| 733 | 
            +
             | 
| 734 | 
            +
                    self.w1 = nn.Linear(
         | 
| 735 | 
            +
                        v_cfg.image_emb_dim,
         | 
| 736 | 
            +
                        v_cfg.image_mlp_dim,
         | 
| 737 | 
            +
                        bias=True,
         | 
| 738 | 
            +
                        device=config.init_device,
         | 
| 739 | 
            +
                    )
         | 
| 740 | 
            +
                    # Activation function.
         | 
| 741 | 
            +
                    cfg = deepcopy(config)
         | 
| 742 | 
            +
                    cfg.activation_type = v_cfg.image_mlp_activations
         | 
| 743 | 
            +
                    self.act = Activation.build(cfg)
         | 
| 744 | 
            +
                    self.w2 = nn.Linear(
         | 
| 745 | 
            +
                        v_cfg.image_mlp_dim,
         | 
| 746 | 
            +
                        v_cfg.image_emb_dim,
         | 
| 747 | 
            +
                        bias=True,
         | 
| 748 | 
            +
                        device=config.init_device,
         | 
| 749 | 
            +
                    )
         | 
| 750 | 
            +
             | 
| 751 | 
            +
                def reset_parameters(self):
         | 
| 752 | 
            +
                    v_cfg = self.config.vision_backbone
         | 
| 753 | 
            +
                    nn.init.trunc_normal_(self.w1.weight, std=math.sqrt(1 / v_cfg.image_emb_dim), a=-2.0, b=2.0)
         | 
| 754 | 
            +
                    nn.init.trunc_normal_(self.w2.weight, std=math.sqrt(1 / v_cfg.image_mlp_dim), a=-2.0, b=2.0)
         | 
| 755 | 
            +
                    nn.init.zeros_(self.w1.bias)
         | 
| 756 | 
            +
                    nn.init.zeros_(self.w2.bias)
         | 
| 757 | 
            +
             | 
| 758 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 759 | 
            +
                    x = self.w1(x)
         | 
| 760 | 
            +
                    x = self.act(x)
         | 
| 761 | 
            +
                    x = self.w2(x)
         | 
| 762 | 
            +
                    return x
         | 
| 763 | 
            +
             | 
| 764 | 
            +
             | 
| 765 | 
            +
            class ResidualAttentionBlock(nn.Module):
         | 
| 766 | 
            +
             | 
| 767 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 768 | 
            +
                    super().__init__()
         | 
| 769 | 
            +
                    self.config = config
         | 
| 770 | 
            +
             | 
| 771 | 
            +
                    v_cfg = config.vision_backbone
         | 
| 772 | 
            +
                    self.attention = MultiHeadDotProductAttention(config)
         | 
| 773 | 
            +
                    self.feed_forward = ViTMLP(config)
         | 
| 774 | 
            +
                    self.attention_norm = nn.LayerNorm(
         | 
| 775 | 
            +
                        v_cfg.image_emb_dim,
         | 
| 776 | 
            +
                        eps=v_cfg.image_norm_eps,
         | 
| 777 | 
            +
                        device=config.init_device,
         | 
| 778 | 
            +
                    )
         | 
| 779 | 
            +
                    self.ffn_norm = nn.LayerNorm(
         | 
| 780 | 
            +
                        v_cfg.image_emb_dim,
         | 
| 781 | 
            +
                        eps=v_cfg.image_norm_eps,
         | 
| 782 | 
            +
                        device=config.init_device,
         | 
| 783 | 
            +
                    )
         | 
| 784 | 
            +
             | 
| 785 | 
            +
                def reset_parameters(self):
         | 
| 786 | 
            +
                    self.attention.reset_parameters()
         | 
| 787 | 
            +
                    self.feed_forward.reset_parameters()
         | 
| 788 | 
            +
                    self.attention_norm.reset_parameters()
         | 
| 789 | 
            +
                    self.ffn_norm.reset_parameters()
         | 
| 790 | 
            +
             | 
| 791 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 792 | 
            +
                    x = x + self.attention(self.attention_norm(x))
         | 
| 793 | 
            +
                    x = x + self.feed_forward(self.ffn_norm(x))
         | 
| 794 | 
            +
                    return x
         | 
| 795 | 
            +
             | 
| 796 | 
            +
             | 
| 797 | 
            +
            class BlockCollection(nn.Module):
         | 
| 798 | 
            +
             | 
| 799 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 800 | 
            +
                    super().__init__()
         | 
| 801 | 
            +
                    self.config = config
         | 
| 802 | 
            +
                    self.grad_checkpointing: bool = False
         | 
| 803 | 
            +
             | 
| 804 | 
            +
                    v_cfg = config.vision_backbone
         | 
| 805 | 
            +
                    self.resblocks = nn.ModuleList([
         | 
| 806 | 
            +
                        ResidualAttentionBlock(config) for _ in range(v_cfg.image_num_layers)
         | 
| 807 | 
            +
                    ])
         | 
| 808 | 
            +
             | 
| 809 | 
            +
                def reset_parameters(self):
         | 
| 810 | 
            +
                    for r in self.resblocks:
         | 
| 811 | 
            +
                        r.reset_parameters()
         | 
| 812 | 
            +
             | 
| 813 | 
            +
                def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
         | 
| 814 | 
            +
                    hidden_states = []
         | 
| 815 | 
            +
                    for r in self.resblocks:
         | 
| 816 | 
            +
                        x = r(x)
         | 
| 817 | 
            +
                        hidden_states.append(x)
         | 
| 818 | 
            +
                    return hidden_states
         | 
| 819 | 
            +
             | 
| 820 | 
            +
             | 
| 821 | 
            +
            class LayerNormFp32(nn.LayerNorm):
         | 
| 822 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 823 | 
            +
                    orig_type = x.dtype
         | 
| 824 | 
            +
                    x = F.layer_norm(x.to(torch.float32), self.normalized_shape, self.weight.to(torch.float32),
         | 
| 825 | 
            +
                                     self.bias.to(torch.float32), self.eps)
         | 
| 826 | 
            +
                    return x.to(orig_type)
         | 
| 827 | 
            +
             | 
| 828 | 
            +
             | 
| 829 | 
            +
            class VisionTransformer(nn.Module):
         | 
| 830 | 
            +
             | 
| 831 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 832 | 
            +
                    super().__init__()
         | 
| 833 | 
            +
                    self.config = config
         | 
| 834 | 
            +
             | 
| 835 | 
            +
                    v_cfg = config.vision_backbone
         | 
| 836 | 
            +
                    # class embeddings and positional embeddings
         | 
| 837 | 
            +
                    self.scale = v_cfg.image_emb_dim ** -0.5
         | 
| 838 | 
            +
                    self.class_embedding = nn.Parameter(
         | 
| 839 | 
            +
                        torch.zeros(v_cfg.image_emb_dim, device=config.init_device),
         | 
| 840 | 
            +
                    )
         | 
| 841 | 
            +
                    self.num_prefix_tokens: int = 1
         | 
| 842 | 
            +
                    self.positional_embedding = nn.Parameter(
         | 
| 843 | 
            +
                        torch.zeros(v_cfg.image_num_pos, v_cfg.image_emb_dim, device=config.init_device),
         | 
| 844 | 
            +
                    )
         | 
| 845 | 
            +
             | 
| 846 | 
            +
                    image_patch_size = v_cfg.image_patch_size
         | 
| 847 | 
            +
                    self.patch_embedding = nn.Linear(
         | 
| 848 | 
            +
                        image_patch_size * image_patch_size * 3,
         | 
| 849 | 
            +
                        v_cfg.image_emb_dim,
         | 
| 850 | 
            +
                        bias=False,
         | 
| 851 | 
            +
                        device=config.init_device,
         | 
| 852 | 
            +
                        )
         | 
| 853 | 
            +
             | 
| 854 | 
            +
                    self.pre_ln = LayerNormFp32(
         | 
| 855 | 
            +
                        v_cfg.image_emb_dim,
         | 
| 856 | 
            +
                        eps=v_cfg.image_norm_eps,
         | 
| 857 | 
            +
                    )
         | 
| 858 | 
            +
             | 
| 859 | 
            +
                    self.transformer = BlockCollection(config)
         | 
| 860 | 
            +
             | 
| 861 | 
            +
                @torch.jit.ignore
         | 
| 862 | 
            +
                def set_grad_checkpointing(self, enable=True):
         | 
| 863 | 
            +
                    self.transformer.grad_checkpointing = enable
         | 
| 864 | 
            +
             | 
| 865 | 
            +
                def reset_parameters(self):
         | 
| 866 | 
            +
                    nn.init.normal_(self.class_embedding, std=self.scale)
         | 
| 867 | 
            +
                    nn.init.normal_(self.positional_embedding, std=self.scale)
         | 
| 868 | 
            +
                    nn.init.normal_(self.patch_embedding.weight, std=0.02)
         | 
| 869 | 
            +
                    self.pre_ln.reset_parameters()
         | 
| 870 | 
            +
                    self.transformer.reset_parameters()
         | 
| 871 | 
            +
             | 
| 872 | 
            +
                def add_pos_emb(self, x: torch.Tensor, patch_num: int) -> torch.Tensor:
         | 
| 873 | 
            +
                    cls_emb = self.positional_embedding[0:1]
         | 
| 874 | 
            +
                    pos_emb = self.positional_embedding[1:]
         | 
| 875 | 
            +
             | 
| 876 | 
            +
                    pos_emb = pos_emb.reshape(
         | 
| 877 | 
            +
                        (int(math.sqrt(pos_emb.shape[0])), int(math.sqrt(pos_emb.shape[0])), pos_emb.shape[1])
         | 
| 878 | 
            +
                    )
         | 
| 879 | 
            +
             | 
| 880 | 
            +
                    (patch_num_0, patch_num_1) = patch_num
         | 
| 881 | 
            +
             | 
| 882 | 
            +
                    if pos_emb.shape[0] != patch_num_0 or pos_emb.shape[1] != patch_num_1:
         | 
| 883 | 
            +
                        # Dervied from https://github.com/facebookresearch/mae/blob/main/util/pos_embed.py
         | 
| 884 | 
            +
                        # antialias: default True in jax.image.resize
         | 
| 885 | 
            +
                        pos_emb = pos_emb.unsqueeze(0).permute(0, 3, 1, 2)
         | 
| 886 | 
            +
                        pos_emb = F.interpolate(
         | 
| 887 | 
            +
                            pos_emb, size=(patch_num_0, patch_num_1), mode="bicubic", align_corners=False, antialias=True,
         | 
| 888 | 
            +
                        )
         | 
| 889 | 
            +
                        pos_emb = pos_emb.permute(0, 2, 3, 1).squeeze(0)
         | 
| 890 | 
            +
             | 
| 891 | 
            +
                    pos_emb = pos_emb.reshape(-1, pos_emb.shape[-1])
         | 
| 892 | 
            +
                    x = x + torch.cat([cls_emb[None, :, :], pos_emb[None, :, :]], dim=1).to(x.dtype)
         | 
| 893 | 
            +
                    return x
         | 
| 894 | 
            +
             | 
| 895 | 
            +
                def forward(self, x: torch.Tensor, patch_num: int = None) -> List[torch.Tensor]:
         | 
| 896 | 
            +
                    """
         | 
| 897 | 
            +
                    : param x: (batch_size, num_patch, n_pixels)
         | 
| 898 | 
            +
                    """
         | 
| 899 | 
            +
                    if patch_num is None:
         | 
| 900 | 
            +
                        patch_num = self.config.vision_backbone.image_num_patch
         | 
| 901 | 
            +
                    B, N, D = x.shape
         | 
| 902 | 
            +
             | 
| 903 | 
            +
                    x = self.patch_embedding(x)
         | 
| 904 | 
            +
             | 
| 905 | 
            +
                    # class embeddings and positional embeddings
         | 
| 906 | 
            +
                    x = torch.cat([_expand_token(self.class_embedding, x.shape[0]).to(x.dtype), x], dim=1)
         | 
| 907 | 
            +
                    x = self.add_pos_emb(x, patch_num)
         | 
| 908 | 
            +
             | 
| 909 | 
            +
                    x = self.pre_ln(x)
         | 
| 910 | 
            +
             | 
| 911 | 
            +
                    hidden_states = self.transformer(x)
         | 
| 912 | 
            +
                    return hidden_states
         | 
| 913 | 
            +
             | 
| 914 | 
            +
             | 
| 915 | 
            +
            class MultiHeadDotProductAttention(nn.Module):
         | 
| 916 | 
            +
                def __init__(self, config: FullMolmoConfig, use_bias: bool = True, is_vit_layer: Optional[bool] = True):
         | 
| 917 | 
            +
                    super().__init__()
         | 
| 918 | 
            +
                    self.config = config
         | 
| 919 | 
            +
                    self.use_bias = use_bias
         | 
| 920 | 
            +
             | 
| 921 | 
            +
                    v_cfg = config.vision_backbone
         | 
| 922 | 
            +
                    self.embed_dim = v_cfg.image_emb_dim
         | 
| 923 | 
            +
                    self.num_heads = v_cfg.image_num_heads
         | 
| 924 | 
            +
                    self.head_dim = v_cfg.image_head_dim
         | 
| 925 | 
            +
                    self.num_key_value_heads = v_cfg.image_num_key_value_heads
         | 
| 926 | 
            +
                    self.num_key_value_groups = self.num_heads // self.num_key_value_heads
         | 
| 927 | 
            +
                    self.initializer_range = v_cfg.initializer_range
         | 
| 928 | 
            +
                    self.is_vit_layer = is_vit_layer
         | 
| 929 | 
            +
             | 
| 930 | 
            +
                    nlayers = 1 if (is_vit_layer or config.vit_layers is None) else len(config.vit_layers)
         | 
| 931 | 
            +
             | 
| 932 | 
            +
                    self.wq = nn.Linear(
         | 
| 933 | 
            +
                        nlayers * self.embed_dim,
         | 
| 934 | 
            +
                        self.num_heads * self.head_dim,
         | 
| 935 | 
            +
                        bias=use_bias,
         | 
| 936 | 
            +
                        device=config.init_device,
         | 
| 937 | 
            +
                        )
         | 
| 938 | 
            +
                    self.wk = nn.Linear(
         | 
| 939 | 
            +
                        nlayers * self.embed_dim,
         | 
| 940 | 
            +
                        self.num_key_value_heads * self.head_dim,
         | 
| 941 | 
            +
                        bias=use_bias,
         | 
| 942 | 
            +
                        device=config.init_device,
         | 
| 943 | 
            +
                        )
         | 
| 944 | 
            +
                    self.wv = nn.Linear(
         | 
| 945 | 
            +
                        nlayers * self.embed_dim,
         | 
| 946 | 
            +
                        self.num_key_value_heads * self.head_dim,
         | 
| 947 | 
            +
                        bias=use_bias,
         | 
| 948 | 
            +
                        device=config.init_device,
         | 
| 949 | 
            +
                        )
         | 
| 950 | 
            +
                    self.wo = nn.Linear(
         | 
| 951 | 
            +
                        self.num_heads * self.head_dim,
         | 
| 952 | 
            +
                        self.embed_dim,
         | 
| 953 | 
            +
                        bias=use_bias,
         | 
| 954 | 
            +
                        device=config.init_device,
         | 
| 955 | 
            +
                        )
         | 
| 956 | 
            +
                    self.attention_dropout: Optional[Dropout] = None
         | 
| 957 | 
            +
                    if v_cfg.attention_dropout > 0:
         | 
| 958 | 
            +
                        self.attention_dropout = Dropout(v_cfg.attention_dropout, broadcast_dims=(0, 1))
         | 
| 959 | 
            +
                    self.residual_dropout = Dropout(v_cfg.residual_dropout)
         | 
| 960 | 
            +
             | 
| 961 | 
            +
                def reset_parameters(self):
         | 
| 962 | 
            +
                    nn.init.normal_(self.wq.weight, std=self.initializer_range)
         | 
| 963 | 
            +
                    nn.init.normal_(self.wk.weight, std=self.initializer_range)
         | 
| 964 | 
            +
                    nn.init.normal_(self.wv.weight, std=self.initializer_range)
         | 
| 965 | 
            +
                    nn.init.normal_(self.wo.weight, std=self.initializer_range)
         | 
| 966 | 
            +
                    if self.use_bias:
         | 
| 967 | 
            +
                        nn.init.constant_(self.wq.bias, 0)
         | 
| 968 | 
            +
                        nn.init.constant_(self.wk.bias, 0)
         | 
| 969 | 
            +
                        nn.init.constant_(self.wv.bias, 0)
         | 
| 970 | 
            +
                        nn.init.constant_(self.wo.bias, 0)
         | 
| 971 | 
            +
             | 
| 972 | 
            +
                def _split_heads(self, hidden_states, num_heads) -> torch.Tensor:
         | 
| 973 | 
            +
                    return hidden_states.reshape(hidden_states.shape[:2] + (num_heads, self.head_dim))
         | 
| 974 | 
            +
             | 
| 975 | 
            +
                def _merge_heads(self, hidden_states) -> torch.Tensor:
         | 
| 976 | 
            +
                    return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,))
         | 
| 977 | 
            +
             | 
| 978 | 
            +
                def forward(self, inputs_q: torch.Tensor, inputs_kv: Optional[torch.Tensor] = None) -> torch.Tensor:
         | 
| 979 | 
            +
             | 
| 980 | 
            +
                    if inputs_kv is not None:
         | 
| 981 | 
            +
                        inputs_k = inputs_kv
         | 
| 982 | 
            +
                        inputs_v = inputs_kv
         | 
| 983 | 
            +
                    else:
         | 
| 984 | 
            +
                        inputs_k = inputs_q
         | 
| 985 | 
            +
                        inputs_v = inputs_q
         | 
| 986 | 
            +
             | 
| 987 | 
            +
                    xq, xk, xv = self.wq(inputs_q), self.wk(inputs_k), self.wv(inputs_v)
         | 
| 988 | 
            +
             | 
| 989 | 
            +
                    xq = self._split_heads(xq, self.num_heads)
         | 
| 990 | 
            +
                    xk = self._split_heads(xk, self.num_key_value_heads)
         | 
| 991 | 
            +
                    xv = self._split_heads(xv, self.num_key_value_heads)
         | 
| 992 | 
            +
             | 
| 993 | 
            +
                    if self.num_heads != self.num_key_value_heads:
         | 
| 994 | 
            +
                        xk = xk.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
         | 
| 995 | 
            +
                        xv = xv.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
         | 
| 996 | 
            +
             | 
| 997 | 
            +
                    og_dtype = xq.dtype
         | 
| 998 | 
            +
             | 
| 999 | 
            +
                    if self.config.float32_attention:
         | 
| 1000 | 
            +
                        xq = xq.to(torch.float)
         | 
| 1001 | 
            +
                        xk = xk.to(torch.float)
         | 
| 1002 | 
            +
             | 
| 1003 | 
            +
                    if self.config.attention_type == "direct":
         | 
| 1004 | 
            +
                        attn_weights = torch.einsum("...qhd,...khd->...hqk", xq / math.sqrt(xq.size(-1)), xk)
         | 
| 1005 | 
            +
                        attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(xq.dtype)
         | 
| 1006 | 
            +
                        if self.attention_dropout is not None:
         | 
| 1007 | 
            +
                            attn_weights = self.attention_dropout(attn_weights)
         | 
| 1008 | 
            +
                        attn_output = torch.einsum("...hqk,...khd->...qhd", attn_weights.to(xv.dtype), xv)
         | 
| 1009 | 
            +
             | 
| 1010 | 
            +
                    elif self.config.attention_type == "sdpa":
         | 
| 1011 | 
            +
                        if self.config.float32_attention and not torch.is_autocast_enabled():
         | 
| 1012 | 
            +
                            xv = xv.to(torch.float32)
         | 
| 1013 | 
            +
                        attn_output = F.scaled_dot_product_attention(
         | 
| 1014 | 
            +
                            xq.transpose(1, 2).contiguous(),
         | 
| 1015 | 
            +
                            xk.transpose(1, 2).contiguous(),
         | 
| 1016 | 
            +
                            xv.transpose(1, 2).contiguous(),
         | 
| 1017 | 
            +
                            is_causal=False,
         | 
| 1018 | 
            +
                            dropout_p=self.config.vision_backbone.attention_dropout
         | 
| 1019 | 
            +
                        ).transpose(1, 2)
         | 
| 1020 | 
            +
                    else:
         | 
| 1021 | 
            +
                        raise NotImplementedError(self.config.attention_type)
         | 
| 1022 | 
            +
                    attn_output = attn_output.to(og_dtype)
         | 
| 1023 | 
            +
                    attn_output = self._merge_heads(attn_output)
         | 
| 1024 | 
            +
                    attn_output = self.wo(attn_output)
         | 
| 1025 | 
            +
                    attn_output = self.residual_dropout(attn_output)
         | 
| 1026 | 
            +
             | 
| 1027 | 
            +
                    return attn_output
         | 
| 1028 | 
            +
             | 
| 1029 | 
            +
             | 
| 1030 | 
            +
            class MultiHeadAttentionPool(nn.Module):
         | 
| 1031 | 
            +
                def __init__(
         | 
| 1032 | 
            +
                    self,
         | 
| 1033 | 
            +
                    config: FullMolmoConfig,
         | 
| 1034 | 
            +
                    factor: int = 1,
         | 
| 1035 | 
            +
                    use_bias: bool = True,
         | 
| 1036 | 
            +
                    dropout: bool = True,
         | 
| 1037 | 
            +
                    output_layer: bool = True,
         | 
| 1038 | 
            +
                    mean_residual: bool = False,
         | 
| 1039 | 
            +
                    query: str = "mean",
         | 
| 1040 | 
            +
                    is_vit_layer: Optional[bool] = True
         | 
| 1041 | 
            +
                ):
         | 
| 1042 | 
            +
                    super().__init__()
         | 
| 1043 | 
            +
                    self.config = config
         | 
| 1044 | 
            +
                    self.factor = factor
         | 
| 1045 | 
            +
                    self.use_bias = use_bias
         | 
| 1046 | 
            +
                    self.dropout = dropout
         | 
| 1047 | 
            +
                    self.output_layer = output_layer
         | 
| 1048 | 
            +
                    self.mean_residual = mean_residual
         | 
| 1049 | 
            +
                    self.query = query
         | 
| 1050 | 
            +
             | 
| 1051 | 
            +
                    v_cfg = config.vision_backbone
         | 
| 1052 | 
            +
                    input_dim = v_cfg.image_emb_dim
         | 
| 1053 | 
            +
                    self.embed_dim = v_cfg.image_emb_dim * factor
         | 
| 1054 | 
            +
                    self.num_heads = v_cfg.image_num_heads
         | 
| 1055 | 
            +
                    self.head_dim = v_cfg.image_head_dim * factor
         | 
| 1056 | 
            +
                    self.num_key_value_heads = v_cfg.image_num_key_value_heads
         | 
| 1057 | 
            +
                    self.num_key_value_groups = self.num_heads // self.num_key_value_heads
         | 
| 1058 | 
            +
                    self.initializer_range = v_cfg.initializer_range
         | 
| 1059 | 
            +
             | 
| 1060 | 
            +
                    nlayers = 1 if (is_vit_layer or config.vit_layers is None) else len(config.vit_layers)
         | 
| 1061 | 
            +
             | 
| 1062 | 
            +
                    if query != "vector":
         | 
| 1063 | 
            +
                        self.wq = nn.Linear(
         | 
| 1064 | 
            +
                            nlayers * input_dim,
         | 
| 1065 | 
            +
                            self.num_heads * self.head_dim,
         | 
| 1066 | 
            +
                            bias=use_bias,
         | 
| 1067 | 
            +
                            device=config.init_device,
         | 
| 1068 | 
            +
                            )
         | 
| 1069 | 
            +
                    self.wk = nn.Linear(
         | 
| 1070 | 
            +
                        nlayers * input_dim,
         | 
| 1071 | 
            +
                        self.num_key_value_heads * self.head_dim,
         | 
| 1072 | 
            +
                        bias=use_bias,
         | 
| 1073 | 
            +
                        device=config.init_device,
         | 
| 1074 | 
            +
                        )
         | 
| 1075 | 
            +
                    self.wv = nn.Linear(
         | 
| 1076 | 
            +
                        nlayers * input_dim,
         | 
| 1077 | 
            +
                        self.num_key_value_heads * self.head_dim,
         | 
| 1078 | 
            +
                        bias=use_bias,
         | 
| 1079 | 
            +
                        device=config.init_device,
         | 
| 1080 | 
            +
                        )
         | 
| 1081 | 
            +
             | 
| 1082 | 
            +
                    if query == "vector":
         | 
| 1083 | 
            +
                        self.attention_query = nn.Parameter(
         | 
| 1084 | 
            +
                            torch.zeros(
         | 
| 1085 | 
            +
                                1, self.num_key_value_heads * self.head_dim, device=config.init_device,
         | 
| 1086 | 
            +
                                   ),
         | 
| 1087 | 
            +
                        )
         | 
| 1088 | 
            +
             | 
| 1089 | 
            +
                    if output_layer:
         | 
| 1090 | 
            +
                        self.wo = nn.Linear(
         | 
| 1091 | 
            +
                            self.num_heads * self.head_dim,
         | 
| 1092 | 
            +
                            self.embed_dim,
         | 
| 1093 | 
            +
                            bias=use_bias,
         | 
| 1094 | 
            +
                            device=config.init_device,
         | 
| 1095 | 
            +
                            )
         | 
| 1096 | 
            +
                    self.attention_dropout = Dropout(v_cfg.attention_dropout, broadcast_dims=(0, 1))
         | 
| 1097 | 
            +
                    if dropout:
         | 
| 1098 | 
            +
                        self.residual_dropout = Dropout(v_cfg.residual_dropout)
         | 
| 1099 | 
            +
             | 
| 1100 | 
            +
                def reset_parameters(self):
         | 
| 1101 | 
            +
                    if self.query != "vector":
         | 
| 1102 | 
            +
                        nn.init.normal_(self.wq.weight, std=self.initializer_range)
         | 
| 1103 | 
            +
                    nn.init.normal_(self.wk.weight, std=self.initializer_range)
         | 
| 1104 | 
            +
                    nn.init.normal_(self.wv.weight, std=self.initializer_range)
         | 
| 1105 | 
            +
                    if self.output_layer:
         | 
| 1106 | 
            +
                        nn.init.normal_(self.wo.weight, std=self.initializer_range)
         | 
| 1107 | 
            +
                    if self.use_bias:
         | 
| 1108 | 
            +
                        if self.query != "vector":
         | 
| 1109 | 
            +
                            nn.init.constant_(self.wq.bias, 0)
         | 
| 1110 | 
            +
                        nn.init.constant_(self.wk.bias, 0)
         | 
| 1111 | 
            +
                        nn.init.constant_(self.wv.bias, 0)
         | 
| 1112 | 
            +
                        if self.output_layer:
         | 
| 1113 | 
            +
                            nn.init.constant_(self.wo.bias, 0)
         | 
| 1114 | 
            +
                    if self.query == "vector":
         | 
| 1115 | 
            +
                        nn.init.normal_(self.attention_query, std=self.initializer_range)
         | 
| 1116 | 
            +
             | 
| 1117 | 
            +
                def _split_heads(self, hidden_states, num_heads):
         | 
| 1118 | 
            +
                    return hidden_states.reshape(hidden_states.shape[:2] + (num_heads, self.head_dim))
         | 
| 1119 | 
            +
             | 
| 1120 | 
            +
                def _merge_heads(self, hidden_states):
         | 
| 1121 | 
            +
                    return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,))
         | 
| 1122 | 
            +
             | 
| 1123 | 
            +
                def forward(self, inputs_kv: torch.Tensor) -> torch.Tensor:
         | 
| 1124 | 
            +
             | 
| 1125 | 
            +
                    xk, xv = self.wk(inputs_kv), self.wv(inputs_kv)
         | 
| 1126 | 
            +
             | 
| 1127 | 
            +
                    if self.query == "mean":
         | 
| 1128 | 
            +
                        inputs_q = inputs_kv.mean(dim=1, keepdim=True)
         | 
| 1129 | 
            +
                        xq = self.wq(inputs_q)
         | 
| 1130 | 
            +
                    elif self.query == "first":
         | 
| 1131 | 
            +
                        inputs_q = inputs_kv[:, :1]
         | 
| 1132 | 
            +
                        xq = self.wq(inputs_q)
         | 
| 1133 | 
            +
                    elif self.query == "vector":
         | 
| 1134 | 
            +
                        xq = self.attention_query.expand(inputs_kv.size(0), -1, -1)
         | 
| 1135 | 
            +
                    elif self.query == "constant":
         | 
| 1136 | 
            +
                        inputs_q = torch.ones_like(inputs_kv[:, :1]) / math.sqrt(inputs_kv.shape[-1])
         | 
| 1137 | 
            +
                        xq = self.wq(inputs_q)
         | 
| 1138 | 
            +
                    else:
         | 
| 1139 | 
            +
                        raise ValueError(f"Unknown query type: {self.query}")
         | 
| 1140 | 
            +
             | 
| 1141 | 
            +
                    xq = self._split_heads(xq, self.num_heads)
         | 
| 1142 | 
            +
                    xk = self._split_heads(xk, self.num_key_value_heads)
         | 
| 1143 | 
            +
                    xv = self._split_heads(xv, self.num_key_value_heads)
         | 
| 1144 | 
            +
             | 
| 1145 | 
            +
                    if self.num_heads != self.num_key_value_heads:
         | 
| 1146 | 
            +
                        xk = xk.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
         | 
| 1147 | 
            +
                        xv = xv.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
         | 
| 1148 | 
            +
             | 
| 1149 | 
            +
                    xq = xq.to(torch.float)
         | 
| 1150 | 
            +
                    xk = xk.to(torch.float)
         | 
| 1151 | 
            +
             | 
| 1152 | 
            +
                    xq = xq / math.sqrt(xq.size(-1))
         | 
| 1153 | 
            +
                    attn_weights = torch.einsum("...qhd,...khd->...hqk", xq, xk)
         | 
| 1154 | 
            +
             | 
| 1155 | 
            +
                    attn_weights = F.softmax(attn_weights, dim=-1).to(xq.dtype)
         | 
| 1156 | 
            +
             | 
| 1157 | 
            +
                    attn_weights = self.attention_dropout(attn_weights).to(xv.dtype)
         | 
| 1158 | 
            +
             | 
| 1159 | 
            +
                    attn_output = torch.einsum("...hqk,...khd->...qhd", attn_weights, xv)
         | 
| 1160 | 
            +
                    attn_output = self._merge_heads(attn_output)
         | 
| 1161 | 
            +
                    if self.output_layer:
         | 
| 1162 | 
            +
                        attn_output = self.wo(attn_output)
         | 
| 1163 | 
            +
                    if self.dropout:
         | 
| 1164 | 
            +
                        attn_output = self.residual_dropout(attn_output)
         | 
| 1165 | 
            +
                    if self.mean_residual:
         | 
| 1166 | 
            +
                        attn_output += inputs_kv.mean(dim=1, keepdim=True)
         | 
| 1167 | 
            +
             | 
| 1168 | 
            +
                    return attn_output
         | 
| 1169 | 
            +
             | 
| 1170 | 
            +
             | 
| 1171 | 
            +
            class MLP(nn.Module):
         | 
| 1172 | 
            +
                def __init__(self, config: FullMolmoConfig, input_dim: int, dropout: float = 0.0):
         | 
| 1173 | 
            +
                    super().__init__()
         | 
| 1174 | 
            +
                    self.config = config
         | 
| 1175 | 
            +
                    self.hidden_size = (
         | 
| 1176 | 
            +
                        config.mlp_hidden_size if config.mlp_hidden_size is not None else config.mlp_ratio * config.d_model
         | 
| 1177 | 
            +
                    )
         | 
| 1178 | 
            +
                    self.initializer_range = config.initializer_range
         | 
| 1179 | 
            +
             | 
| 1180 | 
            +
                    self.w1 = nn.Linear(
         | 
| 1181 | 
            +
                        input_dim,
         | 
| 1182 | 
            +
                        self.hidden_size // 2,
         | 
| 1183 | 
            +
                        bias=False,
         | 
| 1184 | 
            +
                        device=config.init_device,
         | 
| 1185 | 
            +
                        )
         | 
| 1186 | 
            +
                    self.w2 = nn.Linear(
         | 
| 1187 | 
            +
                        self.hidden_size // 2,
         | 
| 1188 | 
            +
                        config.d_model,
         | 
| 1189 | 
            +
                        bias=False,
         | 
| 1190 | 
            +
                        device=config.init_device,
         | 
| 1191 | 
            +
                        )
         | 
| 1192 | 
            +
                    self.w3 = nn.Linear(
         | 
| 1193 | 
            +
                        input_dim,
         | 
| 1194 | 
            +
                        self.hidden_size // 2,
         | 
| 1195 | 
            +
                        bias=False,
         | 
| 1196 | 
            +
                        device=config.init_device,
         | 
| 1197 | 
            +
                        )
         | 
| 1198 | 
            +
                    # Activation function.
         | 
| 1199 | 
            +
                    self.act = Activation.build(config)
         | 
| 1200 | 
            +
                    self.dropout = Dropout(dropout)
         | 
| 1201 | 
            +
             | 
| 1202 | 
            +
                def reset_parameters(self):
         | 
| 1203 | 
            +
                    nn.init.normal_(self.w1.weight, std=self.initializer_range)
         | 
| 1204 | 
            +
                    nn.init.normal_(self.w2.weight, std=self.initializer_range)
         | 
| 1205 | 
            +
                    nn.init.normal_(self.w3.weight, std=self.initializer_range)
         | 
| 1206 | 
            +
             | 
| 1207 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1208 | 
            +
                    x = self.w2(self.act(self.w1(x), self.w3(x)))
         | 
| 1209 | 
            +
                    x = self.dropout(x)
         | 
| 1210 | 
            +
                    return x
         | 
| 1211 | 
            +
             | 
| 1212 | 
            +
             | 
| 1213 | 
            +
            class Residual(nn.Module):
         | 
| 1214 | 
            +
                def __init__(self, submodule: nn.Module):
         | 
| 1215 | 
            +
                    super().__init__()
         | 
| 1216 | 
            +
                    self.submodule = submodule
         | 
| 1217 | 
            +
             | 
| 1218 | 
            +
                def reset_parameters(self):
         | 
| 1219 | 
            +
                    self.submodule.reset_parameters()
         | 
| 1220 | 
            +
             | 
| 1221 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1222 | 
            +
                    return x + self.submodule(x)
         | 
| 1223 | 
            +
             | 
| 1224 | 
            +
             | 
| 1225 | 
            +
            class OLMoVisionBackbone(nn.Module):
         | 
| 1226 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 1227 | 
            +
                    super().__init__()
         | 
| 1228 | 
            +
                    self.config = config
         | 
| 1229 | 
            +
                    self.image_vit = VisionTransformer(config)
         | 
| 1230 | 
            +
             | 
| 1231 | 
            +
                    input_dim: int = None
         | 
| 1232 | 
            +
                    self.image_pooling_2d: nn.Module = None
         | 
| 1233 | 
            +
                    if config.image_pooling_2d in {ImagePooling2DType.attention, ImagePooling2DType.attention_meanq}:
         | 
| 1234 | 
            +
                        self.image_pooling_2d = MultiHeadDotProductAttention(config, is_vit_layer=False)
         | 
| 1235 | 
            +
                        input_dim = config.vision_backbone.image_emb_dim
         | 
| 1236 | 
            +
                    elif config.image_pooling_2d == ImagePooling2DType.attention_2wide:
         | 
| 1237 | 
            +
                        cfg = deepcopy(config)
         | 
| 1238 | 
            +
                        cfg.vision_backbone.image_emb_dim *= 2
         | 
| 1239 | 
            +
                        cfg.vision_backbone.image_head_dim *= 2
         | 
| 1240 | 
            +
                        self.image_pooling_2d = MultiHeadDotProductAttention(cfg, is_vit_layer=False)
         | 
| 1241 | 
            +
                        input_dim = cfg.vision_backbone.image_emb_dim
         | 
| 1242 | 
            +
                    elif config.image_pooling_2d == ImagePooling2DType.attention_v2:
         | 
| 1243 | 
            +
                        assert config.vit_layers is not None
         | 
| 1244 | 
            +
                        use_bias = True
         | 
| 1245 | 
            +
                        dropout = True
         | 
| 1246 | 
            +
                        output_layer = True
         | 
| 1247 | 
            +
                        query = "mean"
         | 
| 1248 | 
            +
                        mean_residual = False
         | 
| 1249 | 
            +
                        factor = len(config.vit_layers)
         | 
| 1250 | 
            +
                        self.image_pooling_2d = MultiHeadAttentionPool(
         | 
| 1251 | 
            +
                            config,
         | 
| 1252 | 
            +
                            factor=factor,
         | 
| 1253 | 
            +
                            use_bias=use_bias,
         | 
| 1254 | 
            +
                            dropout=dropout,
         | 
| 1255 | 
            +
                            output_layer=output_layer,
         | 
| 1256 | 
            +
                            mean_residual=mean_residual,
         | 
| 1257 | 
            +
                            query=query,
         | 
| 1258 | 
            +
                            is_vit_layer=False,
         | 
| 1259 | 
            +
                        )
         | 
| 1260 | 
            +
                        input_dim = config.vision_backbone.image_emb_dim * factor
         | 
| 1261 | 
            +
                    elif config.image_pooling_2d in [ImagePooling2DType.none, ImagePooling2DType.stack]:
         | 
| 1262 | 
            +
                        self.image_pooling_2d = None
         | 
| 1263 | 
            +
                        nlayers = 1 if config.vit_layers is None else len(config.vit_layers)
         | 
| 1264 | 
            +
                        input_dim = nlayers * config.vision_backbone.image_emb_dim
         | 
| 1265 | 
            +
                    else:
         | 
| 1266 | 
            +
                        raise NotImplementedError(f"Unknown image pooling 2D method: {config.image_pooling_2d}")
         | 
| 1267 | 
            +
             | 
| 1268 | 
            +
                    self.input_dim = input_dim
         | 
| 1269 | 
            +
             | 
| 1270 | 
            +
                    # `MLP` assume the activation takes two inputs, so it must be a 'llama' version
         | 
| 1271 | 
            +
                    if config.activation_type == ActivationType.swiglu:
         | 
| 1272 | 
            +
                        mlp_config = replace(config, activation_type=ActivationType.llama_swiglu)
         | 
| 1273 | 
            +
                    elif config.activation_type == ActivationType.gelu:
         | 
| 1274 | 
            +
                        mlp_config = replace(config, activation_type=ActivationType.llama_geglu)
         | 
| 1275 | 
            +
                    else:
         | 
| 1276 | 
            +
                        mlp_config = config
         | 
| 1277 | 
            +
                    if config.image_projector == ImageProjectType.mlpx2:
         | 
| 1278 | 
            +
                        self.image_projector = nn.ModuleList(
         | 
| 1279 | 
            +
                            [MLP(mlp_config, input_dim), Residual(MLP(config, input_dim))]
         | 
| 1280 | 
            +
                        )
         | 
| 1281 | 
            +
                    elif config.image_projector == ImageProjectType.mlp:
         | 
| 1282 | 
            +
                        self.image_projector = MLP(mlp_config, input_dim)
         | 
| 1283 | 
            +
                    elif config.image_projector == ImageProjectType.linear:
         | 
| 1284 | 
            +
                        self.image_projector = nn.Linear(
         | 
| 1285 | 
            +
                            input_dim,
         | 
| 1286 | 
            +
                            config.d_model,
         | 
| 1287 | 
            +
                            bias=False,
         | 
| 1288 | 
            +
                            device=config.init_device,
         | 
| 1289 | 
            +
                        )
         | 
| 1290 | 
            +
                    else:
         | 
| 1291 | 
            +
                        raise NotImplementedError(f"Unknown image projector: {config.image_projector}")
         | 
| 1292 | 
            +
             | 
| 1293 | 
            +
                    self.image_feature_dropout = Dropout(config.image_feature_dropout)
         | 
| 1294 | 
            +
             | 
| 1295 | 
            +
                def reset_parameters(self):
         | 
| 1296 | 
            +
                    if self.image_pooling_2d is not None:
         | 
| 1297 | 
            +
                        self.image_pooling_2d.reset_parameters()
         | 
| 1298 | 
            +
                    if self.config.image_projector == "2mlp":
         | 
| 1299 | 
            +
                        for module in self.image_projector:
         | 
| 1300 | 
            +
                            module.reset_parameters()
         | 
| 1301 | 
            +
                    elif self.config.image_projector == "linear":
         | 
| 1302 | 
            +
                        nn.init.xavier_uniform_(self.image_projector.weight)
         | 
| 1303 | 
            +
                    else:
         | 
| 1304 | 
            +
                        self.image_projector.reset_parameters()
         | 
| 1305 | 
            +
             | 
| 1306 | 
            +
                def forward(self, images: torch.Tensor, image_masks: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         | 
| 1307 | 
            +
                    raise NotImplementedError
         | 
| 1308 | 
            +
             | 
| 1309 | 
            +
             | 
| 1310 | 
            +
            class OLMoPretrainedVisionBackbone(OLMoVisionBackbone):
         | 
| 1311 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 1312 | 
            +
                    super().__init__(config)
         | 
| 1313 | 
            +
                    v_cfg = self.config.vision_backbone
         | 
| 1314 | 
            +
                    self.grad_checkpointing = False
         | 
| 1315 | 
            +
             | 
| 1316 | 
            +
                    self.num_prefix_tokens = self.image_vit.num_prefix_tokens
         | 
| 1317 | 
            +
                    assert self.num_prefix_tokens in {0, 1}, "Only 0 or 1 prefix tokens are supported"
         | 
| 1318 | 
            +
             | 
| 1319 | 
            +
                    self.pad_embed = None
         | 
| 1320 | 
            +
                    if config.image_padding_embed:
         | 
| 1321 | 
            +
                        image_dim = v_cfg.image_emb_dim*len(self.config.vit_layers)
         | 
| 1322 | 
            +
                        if config.image_padding_embed in ["pad_embed", "regress"]:
         | 
| 1323 | 
            +
                            self.pad_embed = nn.Parameter(
         | 
| 1324 | 
            +
                                torch.zeros((image_dim,), device=config.init_device))
         | 
| 1325 | 
            +
                        elif config.image_padding_embed == "pad_and_partial_pad":
         | 
| 1326 | 
            +
                            self.pad_embed = nn.Parameter(
         | 
| 1327 | 
            +
                                torch.zeros((2, image_dim), device=config.init_device))
         | 
| 1328 | 
            +
                        else:
         | 
| 1329 | 
            +
                            raise ValueError(config.image_padding_embed)
         | 
| 1330 | 
            +
             | 
| 1331 | 
            +
                def reset_parameters(self):
         | 
| 1332 | 
            +
                    super().reset_parameters()
         | 
| 1333 | 
            +
                    self.image_vit.reset_parameters()
         | 
| 1334 | 
            +
             | 
| 1335 | 
            +
                def encode_image(self, images: torch.Tensor) -> torch.Tensor:
         | 
| 1336 | 
            +
                    """
         | 
| 1337 | 
            +
                    : param images: (batch_size, num_crops, num_patch, n_pixels)
         | 
| 1338 | 
            +
                    """
         | 
| 1339 | 
            +
                    cfg = self.config
         | 
| 1340 | 
            +
                    v_cfg = self.config.vision_backbone
         | 
| 1341 | 
            +
                    B, T, N, D = images.shape
         | 
| 1342 | 
            +
             | 
| 1343 | 
            +
                    mask = ~torch.all(images.view(B * T, N, D) == -1, dim=(1, 2), keepdim=True)
         | 
| 1344 | 
            +
             | 
| 1345 | 
            +
                    # Output all hidden states
         | 
| 1346 | 
            +
                    # n_layers x (batch_num_crops, (1+)n_tokens, image_emb_dim)
         | 
| 1347 | 
            +
                    images = images.view(B * T, N, D)
         | 
| 1348 | 
            +
                    image_features = self.image_vit(images)
         | 
| 1349 | 
            +
             | 
| 1350 | 
            +
                    if cfg.vit_layers is not None:
         | 
| 1351 | 
            +
                        features = []
         | 
| 1352 | 
            +
                        for layer in cfg.vit_layers:
         | 
| 1353 | 
            +
                            features.append(image_features[layer])
         | 
| 1354 | 
            +
                        image_features = torch.cat(features, dim=-1)
         | 
| 1355 | 
            +
                    else:
         | 
| 1356 | 
            +
                        image_features = image_features[-1]
         | 
| 1357 | 
            +
             | 
| 1358 | 
            +
                    cls_embed: torch.Tensor = None
         | 
| 1359 | 
            +
                    if self.num_prefix_tokens > 0:
         | 
| 1360 | 
            +
                        cls_embed = image_features[:, 0]
         | 
| 1361 | 
            +
                        image_features = image_features[:, 1:]
         | 
| 1362 | 
            +
             | 
| 1363 | 
            +
                    image_features = image_features * mask
         | 
| 1364 | 
            +
                    image_features = image_features.view(B, T, N, -1)
         | 
| 1365 | 
            +
             | 
| 1366 | 
            +
                    cls_embed = cls_embed.view(B, T, -1) if cls_embed is not None else None
         | 
| 1367 | 
            +
             | 
| 1368 | 
            +
                    return image_features, cls_embed
         | 
| 1369 | 
            +
             | 
| 1370 | 
            +
                def forward(self, images: torch.Tensor, image_masks: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         | 
| 1371 | 
            +
                    cfg = self.config
         | 
| 1372 | 
            +
             | 
| 1373 | 
            +
                    # image_features: (batch_size, num_crops(=num_image), num_patch, nximage_emb_dim)
         | 
| 1374 | 
            +
                    batch_size, num_image = images.shape[:2]
         | 
| 1375 | 
            +
                    image_features, cls_embed = self.encode_image(images)
         | 
| 1376 | 
            +
             | 
| 1377 | 
            +
                    if cfg.image_padding_embed:
         | 
| 1378 | 
            +
                        assert image_masks is not None
         | 
| 1379 | 
            +
                        if cfg.image_padding_embed == "pad_embed":
         | 
| 1380 | 
            +
                            all_pad = (image_masks == 0).to(dtype=torch.float32)
         | 
| 1381 | 
            +
                            pad_embed = self.pad_embed[None, None, None, :]
         | 
| 1382 | 
            +
                            image_features = image_features + pad_embed * torch.unsqueeze(all_pad, -1)
         | 
| 1383 | 
            +
                        elif cfg.image_padding_embed == "regress":
         | 
| 1384 | 
            +
                            pad_embed = self.pad_embed[None, None, None, :]
         | 
| 1385 | 
            +
                            image_features = image_features + pad_embed * torch.unsqueeze(torch.maximum(image_masks, torch.zeros_like(image_masks)), -1)
         | 
| 1386 | 
            +
                        elif cfg.image_padding_embed == "pad_and_partial_pad":
         | 
| 1387 | 
            +
                            pad_embed = self.pad_embed[:, None, None, None, :]
         | 
| 1388 | 
            +
                            all_pad = image_masks == 0
         | 
| 1389 | 
            +
                            partial_pad = torch.logical_and(image_masks < 1, torch.logical_not(all_pad)).to(dtype=image_features.dtype)
         | 
| 1390 | 
            +
                            all_pad = all_pad.to(dtype=image_features.dtype)
         | 
| 1391 | 
            +
                            image_features = image_features + pad_embed[0] * torch.unsqueeze(all_pad, -1)
         | 
| 1392 | 
            +
                            image_features = image_features + pad_embed[1] * torch.unsqueeze(partial_pad, -1)
         | 
| 1393 | 
            +
                        else:
         | 
| 1394 | 
            +
                            raise ValueError(cfg.image_padding_embed)
         | 
| 1395 | 
            +
             | 
| 1396 | 
            +
                    image_features = self.image_feature_dropout(image_features)
         | 
| 1397 | 
            +
                    if cls_embed is not None:
         | 
| 1398 | 
            +
                        cls_embed = self.image_feature_dropout(cls_embed)
         | 
| 1399 | 
            +
             | 
| 1400 | 
            +
                    image_features = image_features.reshape(
         | 
| 1401 | 
            +
                        (batch_size, num_image) + cfg.image_num_patch + (-1,),
         | 
| 1402 | 
            +
                        )
         | 
| 1403 | 
            +
             | 
| 1404 | 
            +
                    if cfg.image_num_patch[0] % cfg.image_pooling_h == 1:
         | 
| 1405 | 
            +
                        # Pad so we can still pool 2x2 patches
         | 
| 1406 | 
            +
                        image_features = F.pad(
         | 
| 1407 | 
            +
                            image_features,
         | 
| 1408 | 
            +
                            (0, 0, 0, 1, 0, 1, 0, 0, 0, 0),
         | 
| 1409 | 
            +
                        )
         | 
| 1410 | 
            +
             | 
| 1411 | 
            +
                    # image pooling
         | 
| 1412 | 
            +
                    image_features = einops.rearrange(
         | 
| 1413 | 
            +
                        image_features,
         | 
| 1414 | 
            +
                        'b n (h dh) (w dw) c -> (b n h w) (dh dw) c',
         | 
| 1415 | 
            +
                        dh=cfg.image_pooling_h,
         | 
| 1416 | 
            +
                        dw=cfg.image_pooling_w,
         | 
| 1417 | 
            +
                    )
         | 
| 1418 | 
            +
             | 
| 1419 | 
            +
                    if cfg.image_pooling_2d == ImagePooling2DType.attention_meanq:
         | 
| 1420 | 
            +
                        query = image_features.mean(-2, keepdim=True)
         | 
| 1421 | 
            +
                        image_features = self.image_pooling_2d(query, image_features)
         | 
| 1422 | 
            +
                    elif cfg.image_pooling_2d not in {ImagePooling2DType.none, ImagePooling2DType.stack}:
         | 
| 1423 | 
            +
                        if self.grad_checkpointing:
         | 
| 1424 | 
            +
                            from torch.utils.checkpoint import checkpoint
         | 
| 1425 | 
            +
                            image_features = checkpoint(self.image_pooling_2d, image_features[:, :1, :], image_features, use_reentrant=False)
         | 
| 1426 | 
            +
                        else:
         | 
| 1427 | 
            +
                            image_features = self.image_pooling_2d(image_features[:, :1, :], image_features)
         | 
| 1428 | 
            +
             | 
| 1429 | 
            +
                    h, w = cfg.llm_patches_per_crop()
         | 
| 1430 | 
            +
                    image_features = image_features.reshape(batch_size, num_image, h * w, -1)
         | 
| 1431 | 
            +
             | 
| 1432 | 
            +
                    # MLP layer to map the feature.
         | 
| 1433 | 
            +
                    if self.grad_checkpointing:
         | 
| 1434 | 
            +
                        from torch.utils.checkpoint import checkpoint
         | 
| 1435 | 
            +
                        image_features = checkpoint(self.image_projector, image_features, use_reentrant=False)
         | 
| 1436 | 
            +
                    else:
         | 
| 1437 | 
            +
                        image_features = self.image_projector(image_features)
         | 
| 1438 | 
            +
             | 
| 1439 | 
            +
                    # image_features: (batch_size, num_image, num_patch, d_model)
         | 
| 1440 | 
            +
                    # cls_embed: (batch_size, num_image, d_model)
         | 
| 1441 | 
            +
                    return image_features, cls_embed
         | 
| 1442 | 
            +
             | 
| 1443 | 
            +
             | 
| 1444 | 
            +
            class ModuleType(str, Enum):
         | 
| 1445 | 
            +
                in_module = "in"
         | 
| 1446 | 
            +
                out_module = "out"
         | 
| 1447 | 
            +
                emb = "emb"
         | 
| 1448 | 
            +
                final_out = "final_out"
         | 
| 1449 | 
            +
             | 
| 1450 | 
            +
             | 
| 1451 | 
            +
            def init_weights(
         | 
| 1452 | 
            +
                config: FullMolmoConfig,
         | 
| 1453 | 
            +
                module: Union[nn.Linear, nn.Embedding],
         | 
| 1454 | 
            +
                d: Optional[int] = None,
         | 
| 1455 | 
            +
                layer_id: Optional[int] = None,
         | 
| 1456 | 
            +
                std_factor: float = 1.0,
         | 
| 1457 | 
            +
                type_of_module: Optional[ModuleType] = None,
         | 
| 1458 | 
            +
            ) -> None:
         | 
| 1459 | 
            +
                d = d if d is not None else config.d_model
         | 
| 1460 | 
            +
                std = config.init_std * std_factor
         | 
| 1461 | 
            +
                if config.init_cutoff_factor is not None:
         | 
| 1462 | 
            +
                    cutoff_value = config.init_cutoff_factor * std
         | 
| 1463 | 
            +
                    nn.init.trunc_normal_(module.weight, mean=0.0, std=std, a=-cutoff_value, b=cutoff_value)
         | 
| 1464 | 
            +
                else:
         | 
| 1465 | 
            +
                    nn.init.normal_(module.weight, mean=0.0, std=std)
         | 
| 1466 | 
            +
             | 
| 1467 | 
            +
             | 
| 1468 | 
            +
            class LlamaSwiGLU(nn.Module):
         | 
| 1469 | 
            +
                def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
         | 
| 1470 | 
            +
                    return F.silu(x1) * x2
         | 
| 1471 | 
            +
             | 
| 1472 | 
            +
                @property
         | 
| 1473 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1474 | 
            +
                    return 0.5
         | 
| 1475 | 
            +
             | 
| 1476 | 
            +
             | 
| 1477 | 
            +
            class SwiGLU(nn.Module):
         | 
| 1478 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1479 | 
            +
                    x, gate = x.chunk(2, dim=-1)
         | 
| 1480 | 
            +
                    return F.silu(gate) * x
         | 
| 1481 | 
            +
             | 
| 1482 | 
            +
                @property
         | 
| 1483 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1484 | 
            +
                    return 0.5
         | 
| 1485 | 
            +
             | 
| 1486 | 
            +
             | 
| 1487 | 
            +
            class Activation(nn.Module):
         | 
| 1488 | 
            +
                def __init__(self, config: FullMolmoConfig):
         | 
| 1489 | 
            +
                    super().__init__()
         | 
| 1490 | 
            +
                    self.config = config
         | 
| 1491 | 
            +
             | 
| 1492 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1493 | 
            +
                    raise NotImplementedError
         | 
| 1494 | 
            +
             | 
| 1495 | 
            +
                @property
         | 
| 1496 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1497 | 
            +
                    raise NotImplementedError
         | 
| 1498 | 
            +
             | 
| 1499 | 
            +
                @classmethod
         | 
| 1500 | 
            +
                def build(cls, config: FullMolmoConfig) -> 'Activation':
         | 
| 1501 | 
            +
                    if config.activation_type == "quick_gelu":
         | 
| 1502 | 
            +
                        return QuickGELU(config)
         | 
| 1503 | 
            +
                    elif config.activation_type == "gelu":
         | 
| 1504 | 
            +
                        return cast(Activation, GELU(approximate="none"))
         | 
| 1505 | 
            +
                    elif config.activation_type == "gelu_tanh":
         | 
| 1506 | 
            +
                        return cast(Activation, GELU(approximate="tanh"))
         | 
| 1507 | 
            +
                    elif config.activation_type == "relu":
         | 
| 1508 | 
            +
                        return cast(Activation, ReLU(inplace=False))
         | 
| 1509 | 
            +
                    elif config.activation_type == "silu":
         | 
| 1510 | 
            +
                        return cast(Activation, SiLU(inplace=False))
         | 
| 1511 | 
            +
                    # elif config.activation_type == "llama_geglu":
         | 
| 1512 | 
            +
                    #     return LlamaGEGLU(config)
         | 
| 1513 | 
            +
                    # elif config.activation_type == "llama_geglu_tanh":
         | 
| 1514 | 
            +
                    #     return LlamaGEGLUTanh(config)
         | 
| 1515 | 
            +
                    elif config.activation_type == "llama_swiglu":
         | 
| 1516 | 
            +
                        return LlamaSwiGLU()
         | 
| 1517 | 
            +
                    elif config.activation_type == "swiglu":
         | 
| 1518 | 
            +
                        return SwiGLU()
         | 
| 1519 | 
            +
                    else:
         | 
| 1520 | 
            +
                        raise NotImplementedError(f"Unknown activation: '{config.activation_type}'")
         | 
| 1521 | 
            +
             | 
| 1522 | 
            +
             | 
| 1523 | 
            +
            class QuickGELU(Activation):
         | 
| 1524 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1525 | 
            +
                    return x * torch.sigmoid(1.702 * x)
         | 
| 1526 | 
            +
             | 
| 1527 | 
            +
                @property
         | 
| 1528 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1529 | 
            +
                    return 1.0
         | 
| 1530 | 
            +
             | 
| 1531 | 
            +
             | 
| 1532 | 
            +
            class GELU(nn.GELU):
         | 
| 1533 | 
            +
                @property
         | 
| 1534 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1535 | 
            +
                    return 1.0
         | 
| 1536 | 
            +
             | 
| 1537 | 
            +
             | 
| 1538 | 
            +
            class ReLU(nn.ReLU):
         | 
| 1539 | 
            +
                @property
         | 
| 1540 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1541 | 
            +
                    return 1.0
         | 
| 1542 | 
            +
             | 
| 1543 | 
            +
             | 
| 1544 | 
            +
            class SiLU(nn.SiLU):
         | 
| 1545 | 
            +
                @property
         | 
| 1546 | 
            +
                def output_multiplier(self) -> float:
         | 
| 1547 | 
            +
                    return 1.0
         | 
| 1548 | 
            +
             | 
| 1549 | 
            +
             | 
| 1550 | 
            +
            def causal_attention_bias(seq_len: int, device: torch.device) -> torch.FloatTensor:
         | 
| 1551 | 
            +
                att_bias = torch.triu(
         | 
| 1552 | 
            +
                    torch.ones(seq_len, seq_len, device=device, dtype=torch.float),
         | 
| 1553 | 
            +
                    diagonal=1,
         | 
| 1554 | 
            +
                )
         | 
| 1555 | 
            +
                att_bias.masked_fill_(att_bias == 1, torch.finfo(att_bias.dtype).min)
         | 
| 1556 | 
            +
                return att_bias.view(1, 1, seq_len, seq_len)  # type: ignore
         | 
| 1557 | 
            +
             | 
| 1558 | 
            +
             | 
| 1559 | 
            +
            def get_causal_attention_bias(cache: BufferCache, seq_len: int, device: torch.device) -> torch.Tensor:
         | 
| 1560 | 
            +
                if (causal_bias := cache.get("causal_attention_bias")) is not None and causal_bias.shape[-1] >= seq_len:
         | 
| 1561 | 
            +
                    if causal_bias.device != device:
         | 
| 1562 | 
            +
                        causal_bias = causal_bias.to(device)
         | 
| 1563 | 
            +
                        cache["causal_attention_bias"] = causal_bias
         | 
| 1564 | 
            +
                    return causal_bias
         | 
| 1565 | 
            +
                with torch.autocast(device.type, enabled=False):
         | 
| 1566 | 
            +
                    causal_bias = causal_attention_bias(seq_len, device)
         | 
| 1567 | 
            +
                cache["causal_attention_bias"] = causal_bias
         | 
| 1568 | 
            +
                return causal_bias
         | 
| 1569 | 
            +
             | 
| 1570 | 
            +
             | 
| 1571 | 
            +
            class LayerNormBase(nn.Module):
         | 
| 1572 | 
            +
                def __init__(
         | 
| 1573 | 
            +
                    self,
         | 
| 1574 | 
            +
                    config: MolmoConfig,
         | 
| 1575 | 
            +
                    *,
         | 
| 1576 | 
            +
                    size: Optional[int] = None,
         | 
| 1577 | 
            +
                    elementwise_affine: Optional[bool] = True,
         | 
| 1578 | 
            +
                    eps: float = 1e-05,
         | 
| 1579 | 
            +
                    weight_initializer: Optional[Callable] = torch.ones,
         | 
| 1580 | 
            +
                    bias_initializer: Optional[Callable] = torch.zeros,
         | 
| 1581 | 
            +
                ):
         | 
| 1582 | 
            +
                    super().__init__()
         | 
| 1583 | 
            +
                    self.config = config
         | 
| 1584 | 
            +
                    self.eps = self.config.layer_norm_eps or eps
         | 
| 1585 | 
            +
                    self.normalized_shape = (size or config.d_model,)
         | 
| 1586 | 
            +
                    if elementwise_affine or (elementwise_affine is None and self.config.layer_norm_with_affine):
         | 
| 1587 | 
            +
                        self.weight = nn.Parameter(weight_initializer(self.normalized_shape, device=config.init_device))
         | 
| 1588 | 
            +
                        use_bias = self.config.bias_for_layer_norm
         | 
| 1589 | 
            +
                        if use_bias is None:
         | 
| 1590 | 
            +
                            use_bias = self.config.include_bias
         | 
| 1591 | 
            +
                        if use_bias:
         | 
| 1592 | 
            +
                            self.bias = nn.Parameter(bias_initializer(self.normalized_shape, device=config.init_device))
         | 
| 1593 | 
            +
                        else:
         | 
| 1594 | 
            +
                            self.register_parameter("bias", None)
         | 
| 1595 | 
            +
                    else:
         | 
| 1596 | 
            +
                        self.register_parameter("bias", None)
         | 
| 1597 | 
            +
                        self.register_parameter("weight", None)
         | 
| 1598 | 
            +
             | 
| 1599 | 
            +
                @classmethod
         | 
| 1600 | 
            +
                def build(cls, config: FullMolmoConfig, size: Optional[int] = None, **kwargs):
         | 
| 1601 | 
            +
                    if config.layer_norm_type == "default":
         | 
| 1602 | 
            +
                        return LayerNorm(config, size=size, low_precision=False, **kwargs)
         | 
| 1603 | 
            +
                    elif config.layer_norm_type == "low_precision":
         | 
| 1604 | 
            +
                        return LayerNorm(config, size=size, low_precision=True, **kwargs)
         | 
| 1605 | 
            +
                    elif config.layer_norm_type == "rms":
         | 
| 1606 | 
            +
                        return RMSLayerNorm(config, size=size, **kwargs)
         | 
| 1607 | 
            +
                    else:
         | 
| 1608 | 
            +
                        raise NotImplementedError(f"Unknown LayerNorm type: '{config.layer_norm_type}'")
         | 
| 1609 | 
            +
             | 
| 1610 | 
            +
             | 
| 1611 | 
            +
            class RMSLayerNorm(LayerNormBase):
         | 
| 1612 | 
            +
                """
         | 
| 1613 | 
            +
                RMS layer norm, a simplified :class:`LayerNorm` implementation
         | 
| 1614 | 
            +
                """
         | 
| 1615 | 
            +
             | 
| 1616 | 
            +
                def __init__(
         | 
| 1617 | 
            +
                    self,
         | 
| 1618 | 
            +
                    config: FullMolmoConfig,
         | 
| 1619 | 
            +
                    size: Optional[int] = None,
         | 
| 1620 | 
            +
                    elementwise_affine: Optional[bool] = None,
         | 
| 1621 | 
            +
                    eps: float = 1e-5,
         | 
| 1622 | 
            +
                ):
         | 
| 1623 | 
            +
                    super().__init__(config, size=size, elementwise_affine=elementwise_affine, eps=eps)
         | 
| 1624 | 
            +
             | 
| 1625 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1626 | 
            +
                    with torch.autocast(enabled=False, device_type=x.device.type):
         | 
| 1627 | 
            +
                        og_dtype = x.dtype
         | 
| 1628 | 
            +
                        x = x.to(torch.float32)
         | 
| 1629 | 
            +
                        variance = x.pow(2).mean(-1, keepdim=True)
         | 
| 1630 | 
            +
                        x = x * torch.rsqrt(variance + self.eps)
         | 
| 1631 | 
            +
                        x = x.to(og_dtype)
         | 
| 1632 | 
            +
             | 
| 1633 | 
            +
                    if self.weight is not None:
         | 
| 1634 | 
            +
                        if self.bias is not None:
         | 
| 1635 | 
            +
                            return self.weight * x + self.bias
         | 
| 1636 | 
            +
                        else:
         | 
| 1637 | 
            +
                            return self.weight * x
         | 
| 1638 | 
            +
                    else:
         | 
| 1639 | 
            +
                        return x
         | 
| 1640 | 
            +
             | 
| 1641 | 
            +
             | 
| 1642 | 
            +
            class LayerNorm(LayerNormBase):
         | 
| 1643 | 
            +
                """
         | 
| 1644 | 
            +
                The default :class:`LayerNorm` implementation which can optionally run in low precision.
         | 
| 1645 | 
            +
                """
         | 
| 1646 | 
            +
             | 
| 1647 | 
            +
                def __init__(
         | 
| 1648 | 
            +
                    self,
         | 
| 1649 | 
            +
                    config: FullMolmoConfig,
         | 
| 1650 | 
            +
                    size: Optional[int] = None,
         | 
| 1651 | 
            +
                    low_precision: bool = False,
         | 
| 1652 | 
            +
                    elementwise_affine: Optional[bool] = None,
         | 
| 1653 | 
            +
                    eps: float = 1e-05,
         | 
| 1654 | 
            +
                ):
         | 
| 1655 | 
            +
                    super().__init__(config, size=size, elementwise_affine=elementwise_affine, eps=eps)
         | 
| 1656 | 
            +
                    self.low_precision = low_precision
         | 
| 1657 | 
            +
             | 
| 1658 | 
            +
                def forward(self, x: torch.Tensor) -> torch.Tensor:
         | 
| 1659 | 
            +
                    if self.low_precision:
         | 
| 1660 | 
            +
                        module_device = x.device
         | 
| 1661 | 
            +
                        downcast_x = self._cast_if_autocast_enabled(x)
         | 
| 1662 | 
            +
                        downcast_weight = (
         | 
| 1663 | 
            +
                            self._cast_if_autocast_enabled(self.weight) if self.weight is not None else self.weight
         | 
| 1664 | 
            +
                        )
         | 
| 1665 | 
            +
                        downcast_bias = self._cast_if_autocast_enabled(self.bias) if self.bias is not None else self.bias
         | 
| 1666 | 
            +
                        with torch.autocast(enabled=False, device_type=module_device.type):
         | 
| 1667 | 
            +
                            return F.layer_norm(
         | 
| 1668 | 
            +
                                downcast_x, self.normalized_shape, weight=downcast_weight, bias=downcast_bias, eps=self.eps
         | 
| 1669 | 
            +
                            )
         | 
| 1670 | 
            +
                    else:
         | 
| 1671 | 
            +
                        return F.layer_norm(x, self.normalized_shape, weight=self.weight, bias=self.bias, eps=self.eps)
         | 
| 1672 | 
            +
             | 
| 1673 | 
            +
             | 
| 1674 | 
            +
            class Molmo(nn.Module):
         | 
| 1675 | 
            +
                def __init__(self, config: FullMolmoConfig, init_params: bool = True):
         | 
| 1676 | 
            +
                    super().__init__()
         | 
| 1677 | 
            +
                    self.config = config
         | 
| 1678 | 
            +
                    self.__cache = BufferCache()
         | 
| 1679 | 
            +
             | 
| 1680 | 
            +
                    # Validate config.
         | 
| 1681 | 
            +
                    if self.config.embedding_size is not None and self.config.embedding_size != self.config.vocab_size:
         | 
| 1682 | 
            +
                        if self.config.embedding_size < self.config.vocab_size:
         | 
| 1683 | 
            +
                            raise MolmoConfigurationError("embedding size should be at least as big as vocab size")
         | 
| 1684 | 
            +
                        elif self.config.embedding_size % 128 != 0:
         | 
| 1685 | 
            +
                            import warnings
         | 
| 1686 | 
            +
             | 
| 1687 | 
            +
                            warnings.warn(
         | 
| 1688 | 
            +
                                "Embedding size is not a multiple of 128! This could hurt throughput performance.", UserWarning
         | 
| 1689 | 
            +
                            )
         | 
| 1690 | 
            +
                    torch.backends.cuda.enable_flash_sdp(True)
         | 
| 1691 | 
            +
                    torch.backends.cuda.enable_mem_efficient_sdp(False)  # this is super slow so make sure torch won't use it
         | 
| 1692 | 
            +
             | 
| 1693 | 
            +
                    wte = None
         | 
| 1694 | 
            +
                    if self.config.additional_vocab_size is not None:
         | 
| 1695 | 
            +
                        wte = Embedding(
         | 
| 1696 | 
            +
                            config.embedding_size or config.vocab_size,
         | 
| 1697 | 
            +
                            config.additional_vocab_size,
         | 
| 1698 | 
            +
                            config.d_model,
         | 
| 1699 | 
            +
                            device=config.init_device,
         | 
| 1700 | 
            +
                            initializer_range=config.initializer_range,
         | 
| 1701 | 
            +
                            new_embed_initializer_range=config.new_embedding_init_range
         | 
| 1702 | 
            +
                        )
         | 
| 1703 | 
            +
                    else:
         | 
| 1704 | 
            +
                        wte=nn.Embedding(
         | 
| 1705 | 
            +
                            config.embedding_size or config.vocab_size, config.d_model, device=config.init_device
         | 
| 1706 | 
            +
                        )
         | 
| 1707 | 
            +
             | 
| 1708 | 
            +
                    self.transformer = nn.ModuleDict(
         | 
| 1709 | 
            +
                        dict(
         | 
| 1710 | 
            +
                            wte=wte,
         | 
| 1711 | 
            +
                            emb_drop=Dropout(config.embedding_dropout),
         | 
| 1712 | 
            +
                            ln_f=LayerNorm.build(config),
         | 
| 1713 | 
            +
                        )
         | 
| 1714 | 
            +
                    )
         | 
| 1715 | 
            +
             | 
| 1716 | 
            +
                    blocks = [MolmoBlock.build(i, config, self.__cache) for i in range(config.n_layers)]
         | 
| 1717 | 
            +
                    if self.config.block_group_size > 1:
         | 
| 1718 | 
            +
                        raise NotImplementedError()
         | 
| 1719 | 
            +
                    else:
         | 
| 1720 | 
            +
                        self.transformer.update({"blocks": nn.ModuleList(blocks)})
         | 
| 1721 | 
            +
             | 
| 1722 | 
            +
                    if not self.config.rope:
         | 
| 1723 | 
            +
                        self.transformer.update(
         | 
| 1724 | 
            +
                            {"wpe": nn.Embedding(config.max_sequence_length, config.d_model, device=config.init_device)}
         | 
| 1725 | 
            +
                        )
         | 
| 1726 | 
            +
                    if not config.weight_tying:
         | 
| 1727 | 
            +
                        self.transformer.update(
         | 
| 1728 | 
            +
                            {
         | 
| 1729 | 
            +
                                "ff_out": nn.Linear(
         | 
| 1730 | 
            +
                                    config.d_model,
         | 
| 1731 | 
            +
                                    config.embedding_size or config.vocab_size,
         | 
| 1732 | 
            +
                                    bias=config.include_bias,
         | 
| 1733 | 
            +
                                    device=config.init_device,
         | 
| 1734 | 
            +
                                    )
         | 
| 1735 | 
            +
                            }
         | 
| 1736 | 
            +
                        )
         | 
| 1737 | 
            +
             | 
| 1738 | 
            +
                    self.vision_backbone: Optional[OLMoVisionBackbone] = None
         | 
| 1739 | 
            +
                    if config.vision_backbone is not None:
         | 
| 1740 | 
            +
                        self.vision_backbone = OLMoPretrainedVisionBackbone(config)
         | 
| 1741 | 
            +
             | 
| 1742 | 
            +
                    self.__num_fwd_flops: Optional[int] = None
         | 
| 1743 | 
            +
             | 
| 1744 | 
            +
                def reset_parameters(self):
         | 
| 1745 | 
            +
                    if self.vision_backbone is not None:
         | 
| 1746 | 
            +
                        self.vision_backbone.reset_parameters()
         | 
| 1747 | 
            +
                    self.reset_non_vision_parameters()
         | 
| 1748 | 
            +
             | 
| 1749 | 
            +
                def reset_non_vision_parameters(self):
         | 
| 1750 | 
            +
                    self.transformer.wte.reset_parameters()
         | 
| 1751 | 
            +
                    if hasattr(self.transformer.wte, "new_embedding"):
         | 
| 1752 | 
            +
                        nn.init.normal_(self.transformer.wte.new_embedding, std=self.config.new_embedding_init_range)
         | 
| 1753 | 
            +
             | 
| 1754 | 
            +
                    if hasattr(self.transformer, "wpe"):
         | 
| 1755 | 
            +
                        nn.init.normal_(self.transformer.wpe, mean=0.0, std=1.0)
         | 
| 1756 | 
            +
             | 
| 1757 | 
            +
                    self.transformer.ln_f.reset_parameters()  # type: ignore
         | 
| 1758 | 
            +
             | 
| 1759 | 
            +
                    if hasattr(self.transformer, "ff_out"):
         | 
| 1760 | 
            +
                        nn.init.normal_(self.transformer.ff_out, mean=0.0, std=0.02)
         | 
| 1761 | 
            +
             | 
| 1762 | 
            +
                    if self.config.block_group_size == 1:
         | 
| 1763 | 
            +
                        for block in self.transformer.blocks:
         | 
| 1764 | 
            +
                            block.reset_parameters()
         | 
| 1765 | 
            +
                    else:
         | 
| 1766 | 
            +
                        for block_group in self.transformer.block_groups:
         | 
| 1767 | 
            +
                            block_group.reset_parameters()
         | 
| 1768 | 
            +
             | 
| 1769 | 
            +
             | 
| 1770 | 
            +
                def forward(
         | 
| 1771 | 
            +
                    self,
         | 
| 1772 | 
            +
                    input_ids: torch.LongTensor,
         | 
| 1773 | 
            +
                    input_embeddings: Optional[torch.FloatTensor] = None,
         | 
| 1774 | 
            +
                    attention_mask: Optional[torch.Tensor] = None,
         | 
| 1775 | 
            +
                    attention_bias: Optional[torch.Tensor] = None,
         | 
| 1776 | 
            +
                    response_mask: Optional[torch.Tensor] = None,
         | 
| 1777 | 
            +
                    images: Optional[torch.Tensor] = None,
         | 
| 1778 | 
            +
                    image_masks: Optional[torch.Tensor] = None,
         | 
| 1779 | 
            +
                    image_input_idx: Optional[torch.Tensor] = None,
         | 
| 1780 | 
            +
                    subsegment_ids: Optional[torch.Tensor] = None,
         | 
| 1781 | 
            +
                    position_ids: Optional[torch.Tensor] = None,
         | 
| 1782 | 
            +
                    past_key_values: Optional[Sequence[Tuple[torch.Tensor, torch.Tensor]]] = None,
         | 
| 1783 | 
            +
                    use_cache: bool = False,
         | 
| 1784 | 
            +
                    last_logits_only: bool = False,
         | 
| 1785 | 
            +
                    output_hidden_states: Optional[bool] = None,
         | 
| 1786 | 
            +
                    append_last_valid_logits: Optional[torch.Tensor] = None,
         | 
| 1787 | 
            +
                ) -> ModelOutput:
         | 
| 1788 | 
            +
                    """
         | 
| 1789 | 
            +
                    :param input_ids: A tensor of shape `(batch_size, seq_len)`.
         | 
| 1790 | 
            +
                    :param input_embeddings: A tensor of shape `(batch_size, seq_len, d_model)` with input
         | 
| 1791 | 
            +
                        embeddings. When provided, it is treated as the output of the input embedding layer.
         | 
| 1792 | 
            +
                    :param attention_mask: A tensor of shape `(batch_size, seq_len)` that indicates
         | 
| 1793 | 
            +
                        which input IDs are masked. A `1` value in the mask means that
         | 
| 1794 | 
            +
                        the corresponding input ID should *not* be ignored. A `0` means
         | 
| 1795 | 
            +
                        that the corresponding input ID is masked.
         | 
| 1796 | 
            +
             | 
| 1797 | 
            +
                        This has the same meaning as the `attention_mask` in HuggingFace's `transformers`
         | 
| 1798 | 
            +
                        library.
         | 
| 1799 | 
            +
                    :param attention_bias: A tensor of shape `(batch_size, 1, seq_len, seq_len)`,
         | 
| 1800 | 
            +
                        `(1, 1, seq_len, seq_len)`, or `(seq_len, seq_len)`. This is used
         | 
| 1801 | 
            +
                        to introduce causal or other biases.
         | 
| 1802 | 
            +
             | 
| 1803 | 
            +
                        If the tensor is a bool or byte tensor, a `True` or `1` at `attention_bias[:, :, i, j]`
         | 
| 1804 | 
            +
                        indicates that the i-th element in the sequence is allowed to attend to the j-th
         | 
| 1805 | 
            +
                        element in the sequence.
         | 
| 1806 | 
            +
             | 
| 1807 | 
            +
                        If the tensor is a float tensor, it will just be added to the attention
         | 
| 1808 | 
            +
                        scores before the softmax.
         | 
| 1809 | 
            +
             | 
| 1810 | 
            +
                        The default is causal, which corresponds to a lower-diagonal byte matrix of ones.
         | 
| 1811 | 
            +
                    :param response_mask: A tensor of shape `(batch_size, seq_len)` that indicates
         | 
| 1812 | 
            +
                        the response mask. A `1` value in the mask means that the corresponding token
         | 
| 1813 | 
            +
                        is a response token. A `0` means that the corresponding token is not
         | 
| 1814 | 
            +
                        a response token.
         | 
| 1815 | 
            +
                    :param past_key_values: Pre-computed keys and values for each attention block.
         | 
| 1816 | 
            +
                        Can be used to speed up sequential decoding. The `input_ids` which have
         | 
| 1817 | 
            +
                        their past given to this model should not be passed as `input_ids` as they have already been computed.
         | 
| 1818 | 
            +
                    :param use_cache: If `True`, return key and value tensors for each block.
         | 
| 1819 | 
            +
                    :param last_logits_only: If `True`, only compute the logits for the last token of each sequence.
         | 
| 1820 | 
            +
                        This can speed up decoding when you only care about the next token.
         | 
| 1821 | 
            +
                    """
         | 
| 1822 | 
            +
                    output_hidden_states = output_hidden_states if output_hidden_states is not None else False
         | 
| 1823 | 
            +
             | 
| 1824 | 
            +
                    if past_key_values:
         | 
| 1825 | 
            +
                        assert len(past_key_values) == self.config.n_layers
         | 
| 1826 | 
            +
             | 
| 1827 | 
            +
                    has_image = images is not None
         | 
| 1828 | 
            +
             | 
| 1829 | 
            +
                    assert not (has_image and input_embeddings is not None), "Cannot provide both images and input embeddings."
         | 
| 1830 | 
            +
                    assert not (has_image and past_key_values is not None), "Cached key and values should not be used with images."
         | 
| 1831 | 
            +
             | 
| 1832 | 
            +
                    batch_size, seq_len = input_ids.size() if input_embeddings is None else input_embeddings.size()[:2]
         | 
| 1833 | 
            +
                    if past_key_values is None:
         | 
| 1834 | 
            +
                        past_length = 0
         | 
| 1835 | 
            +
                    else:
         | 
| 1836 | 
            +
                        past_length = past_key_values[0][0].size(-2)
         | 
| 1837 | 
            +
             | 
| 1838 | 
            +
                    if self.config.use_position_ids and attention_mask is None:
         | 
| 1839 | 
            +
                        attention_mask = input_ids != -1
         | 
| 1840 | 
            +
             | 
| 1841 | 
            +
                    if subsegment_ids is not None:
         | 
| 1842 | 
            +
                        assert not use_cache, "Subsegment_ids cannot be used with cache."
         | 
| 1843 | 
            +
                        subsegment_mask = subsegment_ids.unsqueeze(2) <= subsegment_ids.unsqueeze(1)
         | 
| 1844 | 
            +
                        attention_mask = (
         | 
| 1845 | 
            +
                            subsegment_mask.to(attention_mask.dtype) *
         | 
| 1846 | 
            +
                            attention_mask.unsqueeze(2) *
         | 
| 1847 | 
            +
                            attention_mask.unsqueeze(1))
         | 
| 1848 | 
            +
                        if position_ids is None:
         | 
| 1849 | 
            +
                            raise ValueError(f"Positioned ids must be given if using subsegment_ids")
         | 
| 1850 | 
            +
                    else:
         | 
| 1851 | 
            +
                        if self.config.use_position_ids and position_ids is None:
         | 
| 1852 | 
            +
                            position_ids = torch.clamp(
         | 
| 1853 | 
            +
                                torch.cumsum(attention_mask.to(torch.int32), dim=-1) - 1,
         | 
| 1854 | 
            +
                                min=0,
         | 
| 1855 | 
            +
                                ).broadcast_to((batch_size, attention_mask.shape[-1]))
         | 
| 1856 | 
            +
             | 
| 1857 | 
            +
                    # Get embeddings of input.
         | 
| 1858 | 
            +
                    # shape: (batch_size, seq_len, d_model)
         | 
| 1859 | 
            +
                    if input_ids is not None:
         | 
| 1860 | 
            +
                        input_ids = input_ids * (input_ids != -1).to(input_ids.dtype)
         | 
| 1861 | 
            +
                    x = self.transformer.wte(input_ids) if input_embeddings is None else input_embeddings  # type: ignore
         | 
| 1862 | 
            +
             | 
| 1863 | 
            +
                    num_image: Optional[int] = None
         | 
| 1864 | 
            +
                    if images is not None:
         | 
| 1865 | 
            +
                        # shape: (batch_size, num_image, num_patch, d_model)
         | 
| 1866 | 
            +
                        # cls_embed: (batch_size, num_image, d_model)
         | 
| 1867 | 
            +
                        image_features, cls_embed = self.vision_backbone(images, image_masks)
         | 
| 1868 | 
            +
                        num_image, num_patch = image_features.shape[1:3]
         | 
| 1869 | 
            +
                        assert image_input_idx.shape == (batch_size, num_image, num_patch)
         | 
| 1870 | 
            +
             | 
| 1871 | 
            +
                        # inster the image feature into the embedding.
         | 
| 1872 | 
            +
                        image_features = image_features.view(batch_size, num_image * num_patch, -1)
         | 
| 1873 | 
            +
                        image_input_idx = image_input_idx.view(batch_size, num_image * num_patch)
         | 
| 1874 | 
            +
             | 
| 1875 | 
            +
                        valid = image_input_idx >= 0
         | 
| 1876 | 
            +
                        batch_idx = torch.arange(batch_size, device=x.device)
         | 
| 1877 | 
            +
                        batch_idx = torch.tile(batch_idx[:, None], [1, image_features.shape[1]])
         | 
| 1878 | 
            +
             | 
| 1879 | 
            +
                        # For hf demo/endpoint
         | 
| 1880 | 
            +
                        image_features = image_features.to(x.device)
         | 
| 1881 | 
            +
             | 
| 1882 | 
            +
                        x[batch_idx[valid], image_input_idx[valid]] += image_features[valid]
         | 
| 1883 | 
            +
             | 
| 1884 | 
            +
                    if not self.config.rope:
         | 
| 1885 | 
            +
                        # Get positional embeddings.
         | 
| 1886 | 
            +
                        # shape: (1, seq_len)
         | 
| 1887 | 
            +
                        pos = torch.arange(past_length, past_length + seq_len, dtype=torch.long, device=x.device).unsqueeze(0)
         | 
| 1888 | 
            +
                        # shape: (1, seq_len, d_model)
         | 
| 1889 | 
            +
                        pos_emb = self.transformer.wpe(pos)  # type: ignore
         | 
| 1890 | 
            +
                        x = pos_emb + x
         | 
| 1891 | 
            +
             | 
| 1892 | 
            +
                    # Add input + positional embeddings and apply dropout.
         | 
| 1893 | 
            +
                    # shape: (batch_size, seq_len, d_model)
         | 
| 1894 | 
            +
                    x = self.transformer.emb_drop(x)  # type: ignore
         | 
| 1895 | 
            +
             | 
| 1896 | 
            +
                    # normalized
         | 
| 1897 | 
            +
                    if self.config.normalize_input_embeds:
         | 
| 1898 | 
            +
                        x = x * (self.config.d_model ** 0.5)
         | 
| 1899 | 
            +
             | 
| 1900 | 
            +
                    # Transform the attention mask into what the blocks expect.
         | 
| 1901 | 
            +
                    if attention_mask is not None:
         | 
| 1902 | 
            +
                        # shape: (batch_size, 1, 1, seq_len)
         | 
| 1903 | 
            +
                        if len(attention_mask.shape) == 2:
         | 
| 1904 | 
            +
                            attention_mask = attention_mask[:, :past_length + seq_len]
         | 
| 1905 | 
            +
                            attention_mask = attention_mask.to(dtype=torch.float).view(batch_size, -1)[:, None, None, :]
         | 
| 1906 | 
            +
                        else:
         | 
| 1907 | 
            +
                            attention_mask = attention_mask.unsqueeze(1).to(dtype=torch.float)
         | 
| 1908 | 
            +
                        attention_mask = (1.0 - attention_mask) * torch.finfo(attention_mask.dtype).min
         | 
| 1909 | 
            +
             | 
| 1910 | 
            +
                    # Merge attention mask with attention bias.
         | 
| 1911 | 
            +
                    if (
         | 
| 1912 | 
            +
                        attention_bias is not None
         | 
| 1913 | 
            +
                        or attention_mask is not None
         | 
| 1914 | 
            +
                        # NOTE (epwalsh): we need to initialize the attn bias in order for attn to work properly
         | 
| 1915 | 
            +
                        # with key+value cache. Otherwise `F.scaled_dot_product_attention()` doesn't seem to compute
         | 
| 1916 | 
            +
                        # scores correctly.
         | 
| 1917 | 
            +
                        or past_key_values is not None
         | 
| 1918 | 
            +
                    ):
         | 
| 1919 | 
            +
                        if attention_bias is None:
         | 
| 1920 | 
            +
                            attention_bias = get_causal_attention_bias(self.__cache, past_length + seq_len, x.device)
         | 
| 1921 | 
            +
                        elif attention_bias.dtype in (torch.int8, torch.bool):
         | 
| 1922 | 
            +
                            attention_bias = attention_bias.to(dtype=torch.float)
         | 
| 1923 | 
            +
                            attention_bias.masked_fill_(attention_bias == 0.0, torch.finfo(attention_bias.dtype).min)
         | 
| 1924 | 
            +
             | 
| 1925 | 
            +
                        # Transform to the right shape and data type.
         | 
| 1926 | 
            +
                        mask_len = seq_len
         | 
| 1927 | 
            +
                        if attention_mask is not None:
         | 
| 1928 | 
            +
                            mask_len = attention_mask.shape[-1]
         | 
| 1929 | 
            +
                        elif past_key_values is not None:
         | 
| 1930 | 
            +
                            mask_len = past_key_values[0][0].shape[-2] + seq_len
         | 
| 1931 | 
            +
                        attention_bias = attention_bias[:, :, :mask_len, :mask_len].to(dtype=torch.float)
         | 
| 1932 | 
            +
             | 
| 1933 | 
            +
                        # Add in the masking bias.
         | 
| 1934 | 
            +
                        if attention_mask is not None:
         | 
| 1935 | 
            +
                            attention_bias = attention_bias + attention_mask
         | 
| 1936 | 
            +
                            # Might get -infs after adding attention mask, since dtype.min + dtype.min = -inf.
         | 
| 1937 | 
            +
                            # `F.scaled_dot_product_attention()` doesn't handle -inf like you'd expect, instead
         | 
| 1938 | 
            +
                            # it can produce NaNs.
         | 
| 1939 | 
            +
                            ensure_finite_(attention_bias, check_neg_inf=True, check_pos_inf=False)
         | 
| 1940 | 
            +
             | 
| 1941 | 
            +
                    attn_key_values: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = [] if use_cache else None
         | 
| 1942 | 
            +
             | 
| 1943 | 
            +
                    # decoder layers
         | 
| 1944 | 
            +
                    all_hidden_states = []
         | 
| 1945 | 
            +
             | 
| 1946 | 
            +
                    # Apply blocks one-by-one.
         | 
| 1947 | 
            +
                    if self.config.block_group_size == 1:
         | 
| 1948 | 
            +
                        for block_idx, block in enumerate(self.transformer.blocks):
         | 
| 1949 | 
            +
                            if output_hidden_states:
         | 
| 1950 | 
            +
                                # add hidden states
         | 
| 1951 | 
            +
                                all_hidden_states.append(x)
         | 
| 1952 | 
            +
             | 
| 1953 | 
            +
                            layer_past = None if past_key_values is None else past_key_values[block_idx]
         | 
| 1954 | 
            +
                            x, cache = block(x, attention_bias=attention_bias, position_ids=position_ids, layer_past=layer_past, use_cache=use_cache)
         | 
| 1955 | 
            +
             | 
| 1956 | 
            +
                            if attn_key_values is not None:
         | 
| 1957 | 
            +
                                assert cache is not None
         | 
| 1958 | 
            +
                                attn_key_values.append(cache)
         | 
| 1959 | 
            +
                    else:
         | 
| 1960 | 
            +
                        for group_idx, block_group in enumerate(self.transformer.block_groups):
         | 
| 1961 | 
            +
                            if output_hidden_states:
         | 
| 1962 | 
            +
                                # add hidden states
         | 
| 1963 | 
            +
                                all_hidden_states.append(x)
         | 
| 1964 | 
            +
             | 
| 1965 | 
            +
                            layers_past = (
         | 
| 1966 | 
            +
                                None
         | 
| 1967 | 
            +
                                if past_key_values is None
         | 
| 1968 | 
            +
                                else past_key_values[
         | 
| 1969 | 
            +
                                     group_idx * self.config.block_group_size : (group_idx + 1) * self.config.block_group_size
         | 
| 1970 | 
            +
                                     ]
         | 
| 1971 | 
            +
                            )
         | 
| 1972 | 
            +
                            x, cache = block_group(
         | 
| 1973 | 
            +
                                x, attention_bias=attention_bias, position_ids=position_ids, layers_past=layers_past, use_cache=use_cache
         | 
| 1974 | 
            +
                            )
         | 
| 1975 | 
            +
                            if attn_key_values is not None:
         | 
| 1976 | 
            +
                                assert cache is not None
         | 
| 1977 | 
            +
                                attn_key_values.extend(cache)
         | 
| 1978 | 
            +
             | 
| 1979 | 
            +
                    if last_logits_only:
         | 
| 1980 | 
            +
                        # shape: (batch_size, 1, d_model)
         | 
| 1981 | 
            +
                        if append_last_valid_logits is not None:
         | 
| 1982 | 
            +
                            last_valid_output = x[
         | 
| 1983 | 
            +
                                torch.arange(x.shape[0], device=x.device), append_last_valid_logits.to(x.device)]
         | 
| 1984 | 
            +
                            x = last_valid_output.unsqueeze(1)
         | 
| 1985 | 
            +
                        else:
         | 
| 1986 | 
            +
                            x = x[:, -1, :].unsqueeze(1)
         | 
| 1987 | 
            +
             | 
| 1988 | 
            +
                    # Apply final layer norm.
         | 
| 1989 | 
            +
                    # shape: (batch_size, seq_len or 1, d_model)
         | 
| 1990 | 
            +
                    x = self.transformer.ln_f(x)  # type: ignore
         | 
| 1991 | 
            +
                    if output_hidden_states:
         | 
| 1992 | 
            +
                        # add final hidden state post-final-layernorm, following HuggingFace's convention
         | 
| 1993 | 
            +
                        all_hidden_states.append(x)
         | 
| 1994 | 
            +
             | 
| 1995 | 
            +
                    # Get logits.
         | 
| 1996 | 
            +
                    # shape: (batch_size, seq_len or 1, vocab_size)
         | 
| 1997 | 
            +
                    if self.config.weight_tying:
         | 
| 1998 | 
            +
                        logits = F.linear(x, self.transformer.wte.weight, None)  # type: ignore
         | 
| 1999 | 
            +
                    else:
         | 
| 2000 | 
            +
                        logits = self.transformer.ff_out(x)  # type: ignore
         | 
| 2001 | 
            +
                    if self.config.scale_logits:
         | 
| 2002 | 
            +
                        logits.mul_(1 / math.sqrt(self.config.d_model))
         | 
| 2003 | 
            +
             | 
| 2004 | 
            +
                    if not last_logits_only and append_last_valid_logits is not None:
         | 
| 2005 | 
            +
                        last_valid_logit = logits[
         | 
| 2006 | 
            +
                            torch.arange(logits.shape[0], device=logits.device), append_last_valid_logits]
         | 
| 2007 | 
            +
                        logits = torch.cat([logits[:, :-1], last_valid_logit[:, None]], dim=1)
         | 
| 2008 | 
            +
             | 
| 2009 | 
            +
                    return ModelOutput(logits=logits, attn_key_values=attn_key_values, hidden_states=tuple(all_hidden_states) if output_hidden_states else None)  # type: ignore[arg-type]
         | 
| 2010 | 
            +
             | 
| 2011 | 
            +
             | 
| 2012 | 
            +
            class MolmoForCausalLM(PreTrainedModel):
         | 
| 2013 | 
            +
                config_class = MolmoConfig
         | 
| 2014 | 
            +
                base_model_prefix = "model"
         | 
| 2015 | 
            +
                _no_split_modules = ["MolmoBlock"]
         | 
| 2016 | 
            +
             | 
| 2017 | 
            +
                def __init__(self, config: MolmoConfig, model: Optional[Molmo] = None, init_params: bool = False):
         | 
| 2018 | 
            +
                    super().__init__(config)
         | 
| 2019 | 
            +
             | 
| 2020 | 
            +
                    if not model:
         | 
| 2021 | 
            +
                        full_config = FullMolmoConfig(
         | 
| 2022 | 
            +
                            image_padding_embed="pad_and_partial_pad",
         | 
| 2023 | 
            +
                            image_pooling_2d="attention-meanq",
         | 
| 2024 | 
            +
                            attention_layer_norm=config.attention_layer_norm,
         | 
| 2025 | 
            +
                            rope_impl="llama",
         | 
| 2026 | 
            +
                            vocab_size=config.vocab_size,
         | 
| 2027 | 
            +
                            max_sequence_length=config.max_position_embeddings,
         | 
| 2028 | 
            +
                            qkv_bias=config.qkv_bias,
         | 
| 2029 | 
            +
                            norm_after=config.norm_after,
         | 
| 2030 | 
            +
                            embedding_size=config.embedding_size,
         | 
| 2031 | 
            +
                            attention_type="sdpa",
         | 
| 2032 | 
            +
                            embedding_dropout=0,
         | 
| 2033 | 
            +
                            attention_dropout=0,
         | 
| 2034 | 
            +
                            residual_dropout=0,
         | 
| 2035 | 
            +
                            rope=True,
         | 
| 2036 | 
            +
                            weight_tying=False,
         | 
| 2037 | 
            +
                            include_bias=False,
         | 
| 2038 | 
            +
                            d_model=config.hidden_size,
         | 
| 2039 | 
            +
                            mlp_hidden_size=config.intermediate_size,
         | 
| 2040 | 
            +
                            n_layers=config.num_hidden_layers,
         | 
| 2041 | 
            +
                            additional_vocab_size=128,
         | 
| 2042 | 
            +
                            n_heads=config.num_attention_heads,
         | 
| 2043 | 
            +
                            n_kv_heads=config.num_key_value_heads,
         | 
| 2044 | 
            +
                            rope_theta=config.rope_theta,
         | 
| 2045 | 
            +
                            layer_norm_eps=config.layer_norm_eps,
         | 
| 2046 | 
            +
                            layer_norm_type=config.layer_norm_type,
         | 
| 2047 | 
            +
                            vit_layers=[-2, -9],
         | 
| 2048 | 
            +
                            vision_backbone=VisionBackboneConfig(
         | 
| 2049 | 
            +
                                image_default_input_size=(336, 336),
         | 
| 2050 | 
            +
                                image_patch_size=14,
         | 
| 2051 | 
            +
                                image_pos_patch_size=14,
         | 
| 2052 | 
            +
                                image_emb_dim=1024,
         | 
| 2053 | 
            +
                                image_num_heads=16,
         | 
| 2054 | 
            +
                                image_num_key_value_heads=16,
         | 
| 2055 | 
            +
                                image_num_layers=23,
         | 
| 2056 | 
            +
                                image_head_dim=64,
         | 
| 2057 | 
            +
                                image_mlp_dim=4096,
         | 
| 2058 | 
            +
                                image_mlp_activations="quick_gelu",
         | 
| 2059 | 
            +
                                image_dropout_rate=0.0,
         | 
| 2060 | 
            +
                                image_num_pos=577,
         | 
| 2061 | 
            +
                                image_norm_eps=1e-5,
         | 
| 2062 | 
            +
                                attention_dropout=0.0,
         | 
| 2063 | 
            +
                                residual_dropout=0.0,
         | 
| 2064 | 
            +
                                initializer_range=0.02,
         | 
| 2065 | 
            +
                            )
         | 
| 2066 | 
            +
                        )
         | 
| 2067 | 
            +
                        self.model = Molmo(full_config, init_params=init_params)
         | 
| 2068 | 
            +
                    else:
         | 
| 2069 | 
            +
                        self.model = model
         | 
| 2070 | 
            +
             | 
| 2071 | 
            +
             | 
| 2072 | 
            +
                def forward(
         | 
| 2073 | 
            +
                    self,
         | 
| 2074 | 
            +
                    input_ids: torch.LongTensor = None,
         | 
| 2075 | 
            +
                    inputs_embeds: Optional[torch.FloatTensor] = None,
         | 
| 2076 | 
            +
                    attention_mask: Optional[torch.Tensor] = None,
         | 
| 2077 | 
            +
                    attention_bias: Optional[torch.Tensor] = None,
         | 
| 2078 | 
            +
                    response_mask: Optional[torch.Tensor] = None,
         | 
| 2079 | 
            +
                    images: Optional[torch.Tensor] = None,
         | 
| 2080 | 
            +
                    image_masks: Optional[torch.Tensor] = None,
         | 
| 2081 | 
            +
                    image_input_idx: Optional[torch.Tensor] = None,
         | 
| 2082 | 
            +
                    subsegment_ids: Optional[torch.Tensor] = None,
         | 
| 2083 | 
            +
                    position_ids: Optional[torch.Tensor] = None,
         | 
| 2084 | 
            +
                    past_key_values: Optional[List[torch.FloatTensor]] = None,
         | 
| 2085 | 
            +
                    labels: Optional[torch.LongTensor] = None,
         | 
| 2086 | 
            +
                    loss_masks: Optional[torch.Tensor] = None,
         | 
| 2087 | 
            +
                    use_cache: Optional[bool] = None,
         | 
| 2088 | 
            +
                    last_logits_only: Optional[bool] = None,
         | 
| 2089 | 
            +
                    output_attentions: Optional[bool] = None,
         | 
| 2090 | 
            +
                    output_hidden_states: Optional[bool] = None,
         | 
| 2091 | 
            +
                    append_last_valid_logits: Optional[torch.Tensor] = None,
         | 
| 2092 | 
            +
                    return_dict: Optional[bool] = None,
         | 
| 2093 | 
            +
                    cache_position: Optional[
         | 
| 2094 | 
            +
                        Cache
         | 
| 2095 | 
            +
                    ] = None,  # This is a hack mitigation of an issue in transformers `4.39.x` https://github.com/huggingface/transformers/issues/29426
         | 
| 2096 | 
            +
                ) -> Union[Tuple, CausalLMOutputWithPast]:
         | 
| 2097 | 
            +
                    if use_cache is None:
         | 
| 2098 | 
            +
                        use_cache = self.config.use_cache
         | 
| 2099 | 
            +
             | 
| 2100 | 
            +
                    if output_attentions:
         | 
| 2101 | 
            +
                        raise ValueError("output_attentions is not yet supported in Molmo")
         | 
| 2102 | 
            +
             | 
| 2103 | 
            +
                    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         | 
| 2104 | 
            +
             | 
| 2105 | 
            +
                    # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
         | 
| 2106 | 
            +
                    outputs = self.model.forward(
         | 
| 2107 | 
            +
                        input_ids=input_ids,
         | 
| 2108 | 
            +
                        input_embeddings=inputs_embeds,
         | 
| 2109 | 
            +
                        attention_mask=attention_mask,
         | 
| 2110 | 
            +
                        attention_bias=attention_bias,
         | 
| 2111 | 
            +
                        response_mask=response_mask,
         | 
| 2112 | 
            +
                        images=images,
         | 
| 2113 | 
            +
                        image_masks=image_masks,
         | 
| 2114 | 
            +
                        image_input_idx=image_input_idx,
         | 
| 2115 | 
            +
                        subsegment_ids=subsegment_ids,
         | 
| 2116 | 
            +
                        position_ids=position_ids,
         | 
| 2117 | 
            +
                        past_key_values=past_key_values,
         | 
| 2118 | 
            +
                        use_cache=use_cache,
         | 
| 2119 | 
            +
                        last_logits_only=last_logits_only,
         | 
| 2120 | 
            +
                        output_hidden_states=output_hidden_states,
         | 
| 2121 | 
            +
                        append_last_valid_logits=append_last_valid_logits,
         | 
| 2122 | 
            +
                    )
         | 
| 2123 | 
            +
             | 
| 2124 | 
            +
                    logits = outputs.logits
         | 
| 2125 | 
            +
                    hidden_states = outputs.hidden_states
         | 
| 2126 | 
            +
             | 
| 2127 | 
            +
                    loss = None
         | 
| 2128 | 
            +
                    if labels is not None:
         | 
| 2129 | 
            +
                        if loss_masks is not None:
         | 
| 2130 | 
            +
                            loss_masks = loss_masks * (loss_masks > 0)
         | 
| 2131 | 
            +
                            batch_size_in_tokens = max(loss_masks.sum().item(), 1)
         | 
| 2132 | 
            +
                            labels = labels.long()
         | 
| 2133 | 
            +
                            labels.masked_fill_(~(loss_masks > 0), -100)
         | 
| 2134 | 
            +
                            labels = labels.view(-1)
         | 
| 2135 | 
            +
                            logits_for_loss = logits.to(torch.float32).view(-1, logits.size(-1))
         | 
| 2136 | 
            +
                            loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100, reduction='none')
         | 
| 2137 | 
            +
                            loss = loss_fct(logits_for_loss, labels)
         | 
| 2138 | 
            +
                            loss = loss.view(input_ids.shape[0], -1)
         | 
| 2139 | 
            +
                            loss = loss * loss_masks
         | 
| 2140 | 
            +
                            loss = loss.sum() / batch_size_in_tokens
         | 
| 2141 | 
            +
                            use_zloss = getattr(self.config, "softmax_auxiliary_loss", False)
         | 
| 2142 | 
            +
                            if use_zloss:
         | 
| 2143 | 
            +
                                z_squared = logits_for_loss.logsumexp(-1).pow(2)
         | 
| 2144 | 
            +
                                z_loss = self.config.softmax_auxiliary_loss_scale * z_squared
         | 
| 2145 | 
            +
                                z_loss = z_loss.view(input_ids.shape[0], -1)
         | 
| 2146 | 
            +
                                z_loss = z_loss * loss_masks
         | 
| 2147 | 
            +
                                z_loss = z_loss.sum() / batch_size_in_tokens
         | 
| 2148 | 
            +
                                loss += z_loss
         | 
| 2149 | 
            +
                        else:
         | 
| 2150 | 
            +
                            # Shift so that tokens < n predict n
         | 
| 2151 | 
            +
                            shift_logits = logits[..., :-1, :].contiguous()
         | 
| 2152 | 
            +
                            shift_labels = labels[..., 1:].contiguous()
         | 
| 2153 | 
            +
                            # Flatten the tokens
         | 
| 2154 | 
            +
                            loss_fct = torch.nn.CrossEntropyLoss()
         | 
| 2155 | 
            +
                            shift_logits = shift_logits.view(-1, self.config.embedding_size)
         | 
| 2156 | 
            +
                            shift_labels = shift_labels.view(-1)
         | 
| 2157 | 
            +
                            # Enable model parallelism
         | 
| 2158 | 
            +
                            shift_labels = shift_labels.to(shift_logits.device)
         | 
| 2159 | 
            +
                            loss = loss_fct(shift_logits, shift_labels)
         | 
| 2160 | 
            +
             | 
| 2161 | 
            +
                    if not return_dict:
         | 
| 2162 | 
            +
                        output = (logits,) + outputs[1:]
         | 
| 2163 | 
            +
                        return (loss,) + output if loss is not None else output
         | 
| 2164 | 
            +
             | 
| 2165 | 
            +
                    return CausalLMOutputWithPast(
         | 
| 2166 | 
            +
                        loss=loss,
         | 
| 2167 | 
            +
                        logits=logits,
         | 
| 2168 | 
            +
                        past_key_values=outputs.attn_key_values,
         | 
| 2169 | 
            +
                        hidden_states=hidden_states,
         | 
| 2170 | 
            +
                    )
         | 
| 2171 | 
            +
             | 
| 2172 | 
            +
                def can_generate(self) -> bool:
         | 
| 2173 | 
            +
                    return True
         | 
| 2174 | 
            +
             | 
| 2175 | 
            +
                @torch.no_grad()
         | 
| 2176 | 
            +
                def generate_from_batch(
         | 
| 2177 | 
            +
                    self,
         | 
| 2178 | 
            +
                    batch: Dict[str, Any],
         | 
| 2179 | 
            +
                    generation_config: Optional[GenerationConfig] = None,
         | 
| 2180 | 
            +
                    **kwargs,
         | 
| 2181 | 
            +
                ):
         | 
| 2182 | 
            +
                    if generation_config is not None:
         | 
| 2183 | 
            +
                        assert generation_config.use_cache
         | 
| 2184 | 
            +
             | 
| 2185 | 
            +
                    images = batch.get("images")
         | 
| 2186 | 
            +
                    image_masks = batch.get("image_masks")
         | 
| 2187 | 
            +
                    image_input_idx = batch.get("image_input_idx")
         | 
| 2188 | 
            +
             | 
| 2189 | 
            +
                    # Validate inputs.
         | 
| 2190 | 
            +
                    input_ids = batch["input_ids"]
         | 
| 2191 | 
            +
                    batch_size, seq_len = input_ids.shape
         | 
| 2192 | 
            +
                    attention_mask = batch.get("attention_mask", None)
         | 
| 2193 | 
            +
                    max_new_tokens = generation_config.max_new_tokens
         | 
| 2194 | 
            +
                    assert max_new_tokens is not None
         | 
| 2195 | 
            +
                    mask_len = seq_len + max_new_tokens if self.config.use_position_ids else seq_len
         | 
| 2196 | 
            +
                    position_ids: Optional[torch.Tensor] = None
         | 
| 2197 | 
            +
                    append_last_valid_logits: Optional[torch.Tensor] = None
         | 
| 2198 | 
            +
                    if self.config.use_position_ids and attention_mask is None:
         | 
| 2199 | 
            +
                        attention_mask = input_ids != -1
         | 
| 2200 | 
            +
                        position_ids = torch.clamp(
         | 
| 2201 | 
            +
                            torch.cumsum(attention_mask.to(torch.int32), dim=-1) - 1,
         | 
| 2202 | 
            +
                            min=0
         | 
| 2203 | 
            +
                        )
         | 
| 2204 | 
            +
                        append_last_valid_logits = attention_mask.long().sum(dim=-1) - 1
         | 
| 2205 | 
            +
                        attention_mask = torch.cat(
         | 
| 2206 | 
            +
                            [attention_mask, attention_mask.new_ones((batch_size, max_new_tokens))],
         | 
| 2207 | 
            +
                            dim=1,
         | 
| 2208 | 
            +
                        )
         | 
| 2209 | 
            +
                    if attention_mask is not None:
         | 
| 2210 | 
            +
                        assert attention_mask.shape == (batch_size, mask_len)
         | 
| 2211 | 
            +
             | 
| 2212 | 
            +
                    out = super().generate(
         | 
| 2213 | 
            +
                        batch["input_ids"],
         | 
| 2214 | 
            +
                        generation_config,
         | 
| 2215 | 
            +
                        attention_mask=attention_mask,
         | 
| 2216 | 
            +
                        images=images,
         | 
| 2217 | 
            +
                        image_masks=image_masks,
         | 
| 2218 | 
            +
                        image_input_idx=image_input_idx,
         | 
| 2219 | 
            +
                        position_ids=position_ids,
         | 
| 2220 | 
            +
                        append_last_valid_logits=append_last_valid_logits,
         | 
| 2221 | 
            +
                        **kwargs,
         | 
| 2222 | 
            +
                    )
         | 
| 2223 | 
            +
             | 
| 2224 | 
            +
                    return out
         | 
| 2225 | 
            +
             | 
| 2226 | 
            +
                def prepare_inputs_for_generation(
         | 
| 2227 | 
            +
                    self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple]] = None, **kwargs
         | 
| 2228 | 
            +
                ):
         | 
| 2229 | 
            +
                    if past_key_values:
         | 
| 2230 | 
            +
                        # This is because we want the model to only process the last generated token.
         | 
| 2231 | 
            +
                        input_ids = input_ids[:, -1:]
         | 
| 2232 | 
            +
             | 
| 2233 | 
            +
                    if self.config.use_position_ids:
         | 
| 2234 | 
            +
                        attention_mask = kwargs.get("attention_mask")
         | 
| 2235 | 
            +
                        images = kwargs.get("images")
         | 
| 2236 | 
            +
                        image_masks = kwargs.get("image_masks")
         | 
| 2237 | 
            +
                        image_input_idx = kwargs.get("image_input_idx")
         | 
| 2238 | 
            +
                        position_ids = kwargs.get("position_ids")
         | 
| 2239 | 
            +
                        append_last_valid_logits = kwargs.get("append_last_valid_logits")
         | 
| 2240 | 
            +
                        model_inputs = {
         | 
| 2241 | 
            +
                            "input_ids": input_ids,
         | 
| 2242 | 
            +
                            "attention_mask": attention_mask,
         | 
| 2243 | 
            +
                            "position_ids": position_ids,
         | 
| 2244 | 
            +
                            "past_key_values": past_key_values,
         | 
| 2245 | 
            +
                            "use_cache": True,
         | 
| 2246 | 
            +
                            "last_logits_only": True,
         | 
| 2247 | 
            +
                        }
         | 
| 2248 | 
            +
                        if past_key_values is None:
         | 
| 2249 | 
            +
                            model_inputs["images"] = images
         | 
| 2250 | 
            +
                            model_inputs["image_masks"] = image_masks
         | 
| 2251 | 
            +
                            model_inputs["image_input_idx"] = image_input_idx
         | 
| 2252 | 
            +
                            model_inputs["append_last_valid_logits"] = append_last_valid_logits
         | 
| 2253 | 
            +
                    else:
         | 
| 2254 | 
            +
                        model_inputs = {"input_ids": input_ids, "past_key_values": past_key_values}
         | 
| 2255 | 
            +
             | 
| 2256 | 
            +
                        model_inputs.update(kwargs)
         | 
| 2257 | 
            +
                        model_inputs["use_cache"] = kwargs.pop("use_cache", self.config.use_cache)
         | 
| 2258 | 
            +
                    return model_inputs
         | 
| 2259 | 
            +
             | 
| 2260 | 
            +
                def _update_model_kwargs_for_generation(
         | 
| 2261 | 
            +
                    self,
         | 
| 2262 | 
            +
                    outputs: ModelOutput,
         | 
| 2263 | 
            +
                    model_kwargs: Dict[str, Any],
         | 
| 2264 | 
            +
                    is_encoder_decoder: bool = False,
         | 
| 2265 | 
            +
                    num_new_tokens: int = 1,
         | 
| 2266 | 
            +
                ) -> Dict[str, Any]:
         | 
| 2267 | 
            +
                    if self.config.use_position_ids:
         | 
| 2268 | 
            +
                        model_kwargs["position_ids"] = model_kwargs["position_ids"][:, -1:] + 1
         | 
| 2269 | 
            +
                        if "append_last_valid_logits" in model_kwargs:
         | 
| 2270 | 
            +
                            del model_kwargs["append_last_valid_logits"]
         | 
| 2271 | 
            +
                        if "images" in model_kwargs:
         | 
| 2272 | 
            +
                            del model_kwargs["images"]
         | 
| 2273 | 
            +
                            del model_kwargs["image_masks"]
         | 
| 2274 | 
            +
                            del model_kwargs["image_input_idx"]
         | 
| 2275 | 
            +
                    cache_name, cache = super()._extract_past_from_model_output(outputs)
         | 
| 2276 | 
            +
                    model_kwargs[cache_name] = cache
         | 
| 2277 | 
            +
                    model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens
         | 
| 2278 | 
            +
                    return model_kwargs
         | 
| 2279 | 
            +
             | 
| 2280 | 
            +
                def get_input_embeddings(self) -> torch.nn.Module:
         | 
| 2281 | 
            +
                    return self.model.transformer.wte
         | 
| 2282 | 
            +
             | 
| 2283 | 
            +
                def set_input_embeddings(self, value: torch.nn.Module):
         | 
| 2284 | 
            +
                    self.model.transformer.wte = value
         | 
| 2285 | 
            +
             | 
| 2286 | 
            +
                def get_output_embeddings(self):
         | 
| 2287 | 
            +
                    if self.config.weight_tying:
         | 
| 2288 | 
            +
                        return self.model.transformer.wte
         | 
| 2289 | 
            +
                    else:
         | 
| 2290 | 
            +
                        return self.model.transformer.ff_out
         | 
| 2291 | 
            +
             | 
| 2292 | 
            +
                def set_output_embeddings(self, value: torch.nn.Module):
         | 
| 2293 | 
            +
                    if self.config.weight_tying:
         | 
| 2294 | 
            +
                        self.model.transformer.wte = value
         | 
| 2295 | 
            +
                    else:
         | 
| 2296 | 
            +
                        self.model.transformer.ff_out = value
         | 
| 2297 | 
            +
             | 
| 2298 | 
            +
                def tie_weights(self):
         | 
| 2299 | 
            +
                    """
         | 
| 2300 | 
            +
                    This function is intentionally left as a no-op.
         | 
| 2301 | 
            +
             | 
| 2302 | 
            +
                    Weight tying is handled as follows:
         | 
| 2303 | 
            +
                    - When the model is initialized, the `ff_out` layer is conditionally defined based on the `weight_tying` configuration.
         | 
| 2304 | 
            +
                    See: `if not config.weight_tying: self.transformer.update(...)` in `olmo/model.py`.
         | 
| 2305 | 
            +
                    - When computing logits, the `wte` weights are used directly if `weight_tying` is enabled.
         | 
| 2306 | 
            +
                    See: `if self.config.weight_tying: logits = F.linear(x, self.transformer.wte.weight, None)` in the `forward` method.
         | 
| 2307 | 
            +
             | 
| 2308 | 
            +
                    Therefore, there is no need to explicitly tie the weights in this function.
         | 
| 2309 | 
            +
                    """
         | 
| 2310 | 
            +
                    pass
         | 
| 2311 | 
            +
             | 
| 2312 | 
            +
                def resize_token_embeddings(
         | 
| 2313 | 
            +
                    self, new_num_tokens: Optional[int] = None, pad_to_multiple_of: Optional[int] = None
         | 
| 2314 | 
            +
                ) -> torch.nn.Embedding:
         | 
| 2315 | 
            +
                    """
         | 
| 2316 | 
            +
                    Resizes input token embeddings matrix of the model if `new_num_tokens != config.embedding_size`.
         | 
| 2317 | 
            +
             | 
| 2318 | 
            +
                    Takes care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.
         | 
| 2319 | 
            +
             | 
| 2320 | 
            +
                    Arguments:
         | 
| 2321 | 
            +
                        new_num_tokens (`int`, *optional*):
         | 
| 2322 | 
            +
                            The new number of tokens in the embedding matrix. Increasing the size will add newly initialized
         | 
| 2323 | 
            +
                            vectors at the end. Reducing the size will remove vectors from the end. If not provided or `None`, just
         | 
| 2324 | 
            +
                            returns a pointer to the input tokens `torch.nn.Embedding` module of the model without doing anything.
         | 
| 2325 | 
            +
                        pad_to_multiple_of (`int`, *optional*):
         | 
| 2326 | 
            +
                            If set will pad the embedding matrix to a multiple of the provided value. If `new_num_tokens` is set to
         | 
| 2327 | 
            +
                            `None` will just pad the embedding to a multiple of `pad_to_multiple_of`.
         | 
| 2328 | 
            +
             | 
| 2329 | 
            +
                            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
         | 
| 2330 | 
            +
                            `>= 7.5` (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128. For more
         | 
| 2331 | 
            +
                            details about this, or help on choosing the correct value for resizing, refer to this guide:
         | 
| 2332 | 
            +
                            https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc
         | 
| 2333 | 
            +
             | 
| 2334 | 
            +
                    Return:
         | 
| 2335 | 
            +
                        `torch.nn.Embedding`: Pointer to the input tokens Embeddings Module of the model.
         | 
| 2336 | 
            +
             | 
| 2337 | 
            +
                    Note:
         | 
| 2338 | 
            +
                        This method differs from the base class implementation by resizing the `embedding_size` attribute of the
         | 
| 2339 | 
            +
                        model configuration instead of the `vocab_size`. It also includes a warning if the resized `embedding_size`
         | 
| 2340 | 
            +
                        is less than the `vocab_size`. In OLMo, `embedding_size` refers to the dimensionality of the model's token
         | 
| 2341 | 
            +
                        embeddings, while `vocab_size` refers to the number of unique tokens in the vocabulary.
         | 
| 2342 | 
            +
                    """
         | 
| 2343 | 
            +
                    model_embeds = self._resize_token_embeddings(new_num_tokens, pad_to_multiple_of)
         | 
| 2344 | 
            +
                    if new_num_tokens is None and pad_to_multiple_of is None:
         | 
| 2345 | 
            +
                        return model_embeds
         | 
| 2346 | 
            +
             | 
| 2347 | 
            +
                    # Update base model and current model config
         | 
| 2348 | 
            +
                    self.config.embedding_size = model_embeds.weight.shape[0]
         | 
| 2349 | 
            +
                    self.model.config.embedding_size = model_embeds.weight.shape[0]
         | 
| 2350 | 
            +
             | 
| 2351 | 
            +
                    # Check if the embedding size is less than the vocab size
         | 
| 2352 | 
            +
                    if self.config.embedding_size < self.config.vocab_size:
         | 
| 2353 | 
            +
                        warning_message = (
         | 
| 2354 | 
            +
                            f"Resizing token embeddings to size {self.config.embedding_size}, which is less than the vocab size "
         | 
| 2355 | 
            +
                            f"{self.config.vocab_size} defined in the model configuration. Make sure your tokenizer's vocabulary "
         | 
| 2356 | 
            +
                            "size is less than or equal to the new token embedding size."
         | 
| 2357 | 
            +
                        )
         | 
| 2358 | 
            +
                        log.warning(warning_message)
         | 
| 2359 | 
            +
             | 
| 2360 | 
            +
                    # Tie weights again if needed
         | 
| 2361 | 
            +
                    self.tie_weights()
         | 
| 2362 | 
            +
             | 
| 2363 | 
            +
                    return model_embeds
         | 
| 2364 | 
            +
             | 
| 2365 | 
            +
             | 
| 2366 | 
            +
            # Always register for multi-modal features
         | 
| 2367 | 
            +
            AutoModelForCausalLM.register(MolmoConfig, MolmoForCausalLM)
         | 
    	
        preprocessing_molmo.py
    ADDED
    
    | @@ -0,0 +1,192 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            Processor class for Molmo.
         | 
| 3 | 
            +
            """
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            from typing import Optional
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import PIL
         | 
| 8 | 
            +
            from PIL import ImageOps
         | 
| 9 | 
            +
            from PIL.Image import Image
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            try:
         | 
| 12 | 
            +
                from typing import Unpack
         | 
| 13 | 
            +
            except ImportError:
         | 
| 14 | 
            +
                from typing_extensions import Unpack
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            import numpy as np
         | 
| 17 | 
            +
            import torch
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            from transformers.image_utils import ImageInput
         | 
| 20 | 
            +
            from transformers.processing_utils import (
         | 
| 21 | 
            +
                TextKwargs,
         | 
| 22 | 
            +
                ProcessingKwargs,
         | 
| 23 | 
            +
                ProcessorMixin,
         | 
| 24 | 
            +
            )
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
         | 
| 27 | 
            +
            from transformers.utils import logging
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            from transformers import AutoTokenizer
         | 
| 30 | 
            +
            from .image_preprocessing_molmo import MolmoImagesKwargs, MolmoImageProcessor
         | 
| 31 | 
            +
             | 
| 32 | 
            +
             | 
| 33 | 
            +
            logger = logging.get_logger(__name__)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 36 | 
            +
            DEFAULT_IMAGE_PATCH_TOKEN = f"<im_patch>"
         | 
| 37 | 
            +
            DEFAULT_IM_START_TOKEN = f"<im_start>"
         | 
| 38 | 
            +
            DEFAULT_IM_END_TOKEN = f"<im_end>"
         | 
| 39 | 
            +
            DEFAULT_IM_COL_TOKEN = f"<im_col>"
         | 
| 40 | 
            +
            IMAGE_PROMPT = "<|image|>"
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            EXTRA_TOKENS = (DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_COL_TOKEN, IMAGE_PROMPT)
         | 
| 43 | 
            +
             | 
| 44 | 
            +
             | 
| 45 | 
            +
            def get_special_token_ids(tokenizer):
         | 
| 46 | 
            +
                ids = tokenizer.encode("".join(EXTRA_TOKENS), add_special_tokens=False)
         | 
| 47 | 
            +
                assert len(ids) == len(EXTRA_TOKENS)
         | 
| 48 | 
            +
                return {k: i for k, i in zip(EXTRA_TOKENS, ids)}
         | 
| 49 | 
            +
             | 
| 50 | 
            +
             | 
| 51 | 
            +
            class MolmoTextKwargs(TextKwargs, total=False):
         | 
| 52 | 
            +
                style: Optional[str]
         | 
| 53 | 
            +
                system_prompt: Optional[str]
         | 
| 54 | 
            +
                message_format: Optional[str]
         | 
| 55 | 
            +
                always_start_with_space: Optional[bool]
         | 
| 56 | 
            +
                sequence_length: Optional[int]
         | 
| 57 | 
            +
             | 
| 58 | 
            +
             | 
| 59 | 
            +
            class MolmoProcessorKwargs(ProcessingKwargs, total=False):
         | 
| 60 | 
            +
                text_kwargs: MolmoTextKwargs
         | 
| 61 | 
            +
                images_kwargs: MolmoImagesKwargs
         | 
| 62 | 
            +
                _defaults = {
         | 
| 63 | 
            +
                    "images_kwargs": {
         | 
| 64 | 
            +
                        "max_crops": 12,
         | 
| 65 | 
            +
                        "overlap_margins": [4, 4],
         | 
| 66 | 
            +
                        "base_image_input_size": [336, 336],
         | 
| 67 | 
            +
                        "image_token_length_w": 12,
         | 
| 68 | 
            +
                        "image_token_length_h": 12,
         | 
| 69 | 
            +
                        "image_patch_size": 14,
         | 
| 70 | 
            +
                        "image_padding_mask": True,
         | 
| 71 | 
            +
                    },
         | 
| 72 | 
            +
                    "text_kwargs": {
         | 
| 73 | 
            +
                        "style": "long_caption",
         | 
| 74 | 
            +
                        "system_prompt": "none",
         | 
| 75 | 
            +
                        "message_format": "role",
         | 
| 76 | 
            +
                        "always_start_with_space": True,
         | 
| 77 | 
            +
                        "sequence_length": 1536,
         | 
| 78 | 
            +
                        "padding": False,
         | 
| 79 | 
            +
                    },
         | 
| 80 | 
            +
                }
         | 
| 81 | 
            +
             | 
| 82 | 
            +
             | 
| 83 | 
            +
            class MolmoProcessor(ProcessorMixin):
         | 
| 84 | 
            +
                attributes = ["image_processor", "tokenizer"]
         | 
| 85 | 
            +
                image_processor_class = "AutoImageProcessor"
         | 
| 86 | 
            +
                tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
         | 
| 89 | 
            +
                    # self.image_processor = image_processor
         | 
| 90 | 
            +
                    # self.tokenizer = tokenizer
         | 
| 91 | 
            +
                    super().__init__(image_processor, tokenizer)
         | 
| 92 | 
            +
                    self._special_tokens = None
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                @property
         | 
| 95 | 
            +
                def special_token_ids(self):
         | 
| 96 | 
            +
                    if self._special_tokens is None:
         | 
| 97 | 
            +
                        self._special_tokens = get_special_token_ids(self.tokenizer)
         | 
| 98 | 
            +
                    return self._special_tokens
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                def get_tokens_input(self, prompt, message_format, always_start_with_space):
         | 
| 101 | 
            +
                    if message_format == "none" or message_format is None:
         | 
| 102 | 
            +
                        pass
         | 
| 103 | 
            +
                    elif message_format == "role":
         | 
| 104 | 
            +
                        prompt = "User: " + prompt + " Assistant:"
         | 
| 105 | 
            +
                    else:
         | 
| 106 | 
            +
                        raise NotImplementedError(f"Message format {message_format} not implemented")
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                    if always_start_with_space:
         | 
| 109 | 
            +
                        prompt = " " + prompt
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                    tokens = self.tokenizer.encode(prompt, add_special_tokens=False)
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                    return tokens
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                def process(
         | 
| 116 | 
            +
                    self,
         | 
| 117 | 
            +
                    text: TextInput = None,
         | 
| 118 | 
            +
                    images: ImageInput = None,
         | 
| 119 | 
            +
                    *,
         | 
| 120 | 
            +
                    tokens: Optional[PreTokenizedInput] = None,
         | 
| 121 | 
            +
                    **kwargs: Unpack[MolmoProcessorKwargs],
         | 
| 122 | 
            +
                ):
         | 
| 123 | 
            +
                    output_kwargs = self._merge_kwargs(
         | 
| 124 | 
            +
                        MolmoProcessorKwargs,
         | 
| 125 | 
            +
                        tokenizer_init_kwargs=self.tokenizer.init_kwargs,
         | 
| 126 | 
            +
                        **kwargs,
         | 
| 127 | 
            +
                    )
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                    if tokens is None:
         | 
| 130 | 
            +
                        tokens = self.get_tokens_input(
         | 
| 131 | 
            +
                            text,
         | 
| 132 | 
            +
                            output_kwargs["text_kwargs"]["message_format"],
         | 
| 133 | 
            +
                            output_kwargs["text_kwargs"]["always_start_with_space"],
         | 
| 134 | 
            +
                        )
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                    image_token_id = self.special_token_ids[IMAGE_PROMPT]
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                    if images is not None:
         | 
| 139 | 
            +
                        if not isinstance(images, (list, tuple)):
         | 
| 140 | 
            +
                            images = [images]
         | 
| 141 | 
            +
                        image_arrays = []
         | 
| 142 | 
            +
                        for image in images:
         | 
| 143 | 
            +
                            if isinstance(image, Image):
         | 
| 144 | 
            +
                                image = image.convert("RGB")
         | 
| 145 | 
            +
                                # Handle images with EXIF orientation tags, which PIL will ignore by default
         | 
| 146 | 
            +
                                # https://github.com/python-pillow/Pillow/issues/4703
         | 
| 147 | 
            +
                                img = ImageOps.exif_transpose(image)
         | 
| 148 | 
            +
                                image_arrays.append(np.array(image))
         | 
| 149 | 
            +
                            else:
         | 
| 150 | 
            +
                                assert len(image.shape) == 3 and image.shape[-1] == 3
         | 
| 151 | 
            +
                                image_arrays.append(image.astype(np.uint8))
         | 
| 152 | 
            +
                        images = image_arrays
         | 
| 153 | 
            +
                        # For now only support inserting images at the start
         | 
| 154 | 
            +
                        image_idx = [-1]*len(images)
         | 
| 155 | 
            +
                    else:
         | 
| 156 | 
            +
                        image_idx = None
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                    sequence_length = output_kwargs["text_kwargs"]["sequence_length"]
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                    image_patch_token_id = self.special_token_ids[DEFAULT_IMAGE_PATCH_TOKEN]
         | 
| 161 | 
            +
                    image_col_token_id = self.special_token_ids[DEFAULT_IM_COL_TOKEN]
         | 
| 162 | 
            +
                    image_start_token_id = self.special_token_ids[DEFAULT_IM_START_TOKEN]
         | 
| 163 | 
            +
                    image_end_token_id = self.special_token_ids[DEFAULT_IM_END_TOKEN]
         | 
| 164 | 
            +
                    out = self.image_processor.multimodal_preprocess(
         | 
| 165 | 
            +
                        images=images,
         | 
| 166 | 
            +
                        image_idx=image_idx,
         | 
| 167 | 
            +
                        tokens=np.asarray(tokens).astype(np.int32),
         | 
| 168 | 
            +
                        sequence_length=sequence_length,
         | 
| 169 | 
            +
                        image_patch_token_id=image_patch_token_id,
         | 
| 170 | 
            +
                        image_col_token_id=image_col_token_id,
         | 
| 171 | 
            +
                        image_start_token_id=image_start_token_id,
         | 
| 172 | 
            +
                        image_end_token_id=image_end_token_id,
         | 
| 173 | 
            +
                        **output_kwargs["images_kwargs"]
         | 
| 174 | 
            +
                    )
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                    # Prepend BOS
         | 
| 177 | 
            +
                    # qwen2 and olmo do not have a BOS, and instead use EOS as a generic seperator token.
         | 
| 178 | 
            +
                    bos = self.tokenizer.bos_token_id or self.tokenizer.eos_token_id
         | 
| 179 | 
            +
                    decoder_input_tokens = np.pad(out["input_ids"], [[1, 0]], constant_values=bos)
         | 
| 180 | 
            +
                    out["input_ids"] = decoder_input_tokens
         | 
| 181 | 
            +
                    if "image_input_idx" in out:
         | 
| 182 | 
            +
                        # Shift patch mapping up by one since we added BOS
         | 
| 183 | 
            +
                        image_input_idx = out["image_input_idx"]
         | 
| 184 | 
            +
                        out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                    for k, v in out.items():
         | 
| 187 | 
            +
                        out[k] = torch.from_numpy(v)
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                    return out
         | 
| 190 | 
            +
             | 
| 191 | 
            +
             | 
| 192 | 
            +
            MolmoProcessor.register_for_auto_class()
         | 
    	
        preprocessor_config.json
    ADDED
    
    | @@ -0,0 +1,32 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "auto_map": {
         | 
| 3 | 
            +
                "AutoImageProcessor": "image_preprocessing_molmo.MolmoImageProcessor",
         | 
| 4 | 
            +
                "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
         | 
| 5 | 
            +
              },
         | 
| 6 | 
            +
              "base_image_input_size": [
         | 
| 7 | 
            +
                336,
         | 
| 8 | 
            +
                336
         | 
| 9 | 
            +
              ],
         | 
| 10 | 
            +
              "do_normalize": true,
         | 
| 11 | 
            +
              "image_mean": [
         | 
| 12 | 
            +
                0.48145466,
         | 
| 13 | 
            +
                0.4578275,
         | 
| 14 | 
            +
                0.40821073
         | 
| 15 | 
            +
              ],
         | 
| 16 | 
            +
              "image_padding_mask": true,
         | 
| 17 | 
            +
              "image_patch_size": 14,
         | 
| 18 | 
            +
              "image_processor_type": "MolmoImageProcessor",
         | 
| 19 | 
            +
              "image_std": [
         | 
| 20 | 
            +
                0.26862954,
         | 
| 21 | 
            +
                0.26130258,
         | 
| 22 | 
            +
                0.27577711
         | 
| 23 | 
            +
              ],
         | 
| 24 | 
            +
              "image_token_length_h": 12,
         | 
| 25 | 
            +
              "image_token_length_w": 12,
         | 
| 26 | 
            +
              "max_crops": 12,
         | 
| 27 | 
            +
              "overlap_margins": [
         | 
| 28 | 
            +
                4,
         | 
| 29 | 
            +
                4
         | 
| 30 | 
            +
              ],
         | 
| 31 | 
            +
              "processor_class": "MolmoProcessor"
         | 
| 32 | 
            +
            }
         | 
    	
        processor_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "auto_map": {
         | 
| 3 | 
            +
                "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
         | 
| 4 | 
            +
              },
         | 
| 5 | 
            +
              "processor_class": "MolmoProcessor"
         | 
| 6 | 
            +
            }
         | 
    	
        rng_state_0.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:982bd0c16d66c3eece7beee8bddaa14b1c90bf8f69dc67202bcca3b4b9a47099
         | 
| 3 | 
            +
            size 15920
         | 
    	
        rng_state_1.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:83524844d92e9feac04f6aecfa5db5c81a81d9dd5372c0920e79335c77d682d2
         | 
| 3 | 
            +
            size 15984
         | 
    	
        rng_state_2.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7b06a6c6d5a48ebb0153ad37758d2b03ad29fe4301cfc26478c192c7ed04c307
         | 
| 3 | 
            +
            size 15984
         | 
    	
        rng_state_3.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:88d74037aa582128d227eb27cb2d038952cccdf99e8e8e8219de21446998ab84
         | 
| 3 | 
            +
            size 15984
         | 
    	
        rng_state_4.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:69215d1dfcdd2160c9e8935122d3736bdb9208c4f4e5cdb64010ac9f930b6f85
         | 
| 3 | 
            +
            size 15984
         | 
    	
        rng_state_5.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1dc6cbe198a406c6be157f6aad5cd9ebe4769a6af9021fec774c7ab74dc307bb
         | 
| 3 | 
            +
            size 16048
         | 
    	
        rng_state_6.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9116f6b7b82a60cdbd043bc8057decfa76d0d733c9d501f0d1fe4d6cd6a85f9c
         | 
| 3 | 
            +
            size 15920
         | 
    	
        rng_state_7.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ba0d0c71b643a2e6f949ade56d2178689b3639c840dc953d1d1223cf4eb00839
         | 
| 3 | 
            +
            size 15920
         | 
    	
        sft_args.json
    ADDED
    
    | @@ -0,0 +1,302 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "model_type": "molmo-7b-d",
         | 
| 3 | 
            +
              "model_id_or_path": "allenai/Molmo-7B-D-0924",
         | 
| 4 | 
            +
              "model_revision": "main",
         | 
| 5 | 
            +
              "full_determinism": false,
         | 
| 6 | 
            +
              "sft_type": "full",
         | 
| 7 | 
            +
              "freeze_parameters": [],
         | 
| 8 | 
            +
              "freeze_vit": false,
         | 
| 9 | 
            +
              "freeze_parameters_ratio": 0.0,
         | 
| 10 | 
            +
              "additional_trainable_parameters": [],
         | 
| 11 | 
            +
              "tuner_backend": "peft",
         | 
| 12 | 
            +
              "template_type": "molmo",
         | 
| 13 | 
            +
              "output_dir": "/workspace/output/molmo-7b-d/v0-20250103-184047",
         | 
| 14 | 
            +
              "add_output_dir_suffix": true,
         | 
| 15 | 
            +
              "ddp_backend": "nccl",
         | 
| 16 | 
            +
              "ddp_find_unused_parameters": null,
         | 
| 17 | 
            +
              "ddp_broadcast_buffers": null,
         | 
| 18 | 
            +
              "ddp_timeout": 1800,
         | 
| 19 | 
            +
              "seed": 42,
         | 
| 20 | 
            +
              "resume_from_checkpoint": null,
         | 
| 21 | 
            +
              "resume_only_model": false,
         | 
| 22 | 
            +
              "ignore_data_skip": false,
         | 
| 23 | 
            +
              "dtype": "bf16",
         | 
| 24 | 
            +
              "packing": false,
         | 
| 25 | 
            +
              "train_backend": "transformers",
         | 
| 26 | 
            +
              "tp": 1,
         | 
| 27 | 
            +
              "pp": 1,
         | 
| 28 | 
            +
              "min_lr": null,
         | 
| 29 | 
            +
              "sequence_parallel": false,
         | 
| 30 | 
            +
              "model_kwargs": {},
         | 
| 31 | 
            +
              "loss_name": null,
         | 
| 32 | 
            +
              "dataset": [
         | 
| 33 | 
            +
                "/workspace/train.jsonl"
         | 
| 34 | 
            +
              ],
         | 
| 35 | 
            +
              "val_dataset": [
         | 
| 36 | 
            +
                "/workspace/val.jsonl"
         | 
| 37 | 
            +
              ],
         | 
| 38 | 
            +
              "dataset_seed": 42,
         | 
| 39 | 
            +
              "dataset_test_ratio": 0.0,
         | 
| 40 | 
            +
              "use_loss_scale": false,
         | 
| 41 | 
            +
              "loss_scale_config_path": "/workspace/miniconda/lib/python3.12/site-packages/swift/llm/agent/default_loss_scale_config.json",
         | 
| 42 | 
            +
              "system": null,
         | 
| 43 | 
            +
              "tools_prompt": "react_en",
         | 
| 44 | 
            +
              "max_length": 4096,
         | 
| 45 | 
            +
              "truncation_strategy": "delete",
         | 
| 46 | 
            +
              "check_dataset_strategy": "none",
         | 
| 47 | 
            +
              "streaming": false,
         | 
| 48 | 
            +
              "streaming_val_size": 0,
         | 
| 49 | 
            +
              "streaming_buffer_size": 16384,
         | 
| 50 | 
            +
              "model_name": [
         | 
| 51 | 
            +
                null,
         | 
| 52 | 
            +
                null
         | 
| 53 | 
            +
              ],
         | 
| 54 | 
            +
              "model_author": [
         | 
| 55 | 
            +
                null,
         | 
| 56 | 
            +
                null
         | 
| 57 | 
            +
              ],
         | 
| 58 | 
            +
              "quant_method": null,
         | 
| 59 | 
            +
              "quantization_bit": 0,
         | 
| 60 | 
            +
              "hqq_axis": 0,
         | 
| 61 | 
            +
              "hqq_dynamic_config_path": null,
         | 
| 62 | 
            +
              "bnb_4bit_comp_dtype": "bf16",
         | 
| 63 | 
            +
              "bnb_4bit_quant_type": "nf4",
         | 
| 64 | 
            +
              "bnb_4bit_use_double_quant": true,
         | 
| 65 | 
            +
              "bnb_4bit_quant_storage": null,
         | 
| 66 | 
            +
              "rescale_image": -1,
         | 
| 67 | 
            +
              "target_modules": "^(model.transformer)(?!.*(lm_head|output|emb|wte|shared)).*",
         | 
| 68 | 
            +
              "target_regex": null,
         | 
| 69 | 
            +
              "modules_to_save": [],
         | 
| 70 | 
            +
              "lora_rank": 8,
         | 
| 71 | 
            +
              "lora_alpha": 32,
         | 
| 72 | 
            +
              "lora_dropout": 0.05,
         | 
| 73 | 
            +
              "lora_bias_trainable": "none",
         | 
| 74 | 
            +
              "lora_dtype": "AUTO",
         | 
| 75 | 
            +
              "lora_lr_ratio": null,
         | 
| 76 | 
            +
              "use_rslora": false,
         | 
| 77 | 
            +
              "use_dora": false,
         | 
| 78 | 
            +
              "init_lora_weights": "true",
         | 
| 79 | 
            +
              "fourier_n_frequency": 2000,
         | 
| 80 | 
            +
              "fourier_scaling": 300.0,
         | 
| 81 | 
            +
              "rope_scaling": null,
         | 
| 82 | 
            +
              "boft_block_size": 4,
         | 
| 83 | 
            +
              "boft_block_num": 0,
         | 
| 84 | 
            +
              "boft_n_butterfly_factor": 1,
         | 
| 85 | 
            +
              "boft_dropout": 0.0,
         | 
| 86 | 
            +
              "vera_rank": 256,
         | 
| 87 | 
            +
              "vera_projection_prng_key": 0,
         | 
| 88 | 
            +
              "vera_dropout": 0.0,
         | 
| 89 | 
            +
              "vera_d_initial": 0.1,
         | 
| 90 | 
            +
              "adapter_act": "gelu",
         | 
| 91 | 
            +
              "adapter_length": 128,
         | 
| 92 | 
            +
              "use_galore": false,
         | 
| 93 | 
            +
              "galore_target_modules": null,
         | 
| 94 | 
            +
              "galore_rank": 128,
         | 
| 95 | 
            +
              "galore_update_proj_gap": 50,
         | 
| 96 | 
            +
              "galore_scale": 1.0,
         | 
| 97 | 
            +
              "galore_proj_type": "std",
         | 
| 98 | 
            +
              "galore_optim_per_parameter": false,
         | 
| 99 | 
            +
              "galore_with_embedding": false,
         | 
| 100 | 
            +
              "galore_quantization": false,
         | 
| 101 | 
            +
              "galore_proj_quant": false,
         | 
| 102 | 
            +
              "galore_proj_bits": 4,
         | 
| 103 | 
            +
              "galore_proj_group_size": 256,
         | 
| 104 | 
            +
              "galore_cos_threshold": 0.4,
         | 
| 105 | 
            +
              "galore_gamma_proj": 2,
         | 
| 106 | 
            +
              "galore_queue_size": 5,
         | 
| 107 | 
            +
              "adalora_target_r": 8,
         | 
| 108 | 
            +
              "adalora_init_r": 12,
         | 
| 109 | 
            +
              "adalora_tinit": 0,
         | 
| 110 | 
            +
              "adalora_tfinal": 0,
         | 
| 111 | 
            +
              "adalora_deltaT": 1,
         | 
| 112 | 
            +
              "adalora_beta1": 0.85,
         | 
| 113 | 
            +
              "adalora_beta2": 0.85,
         | 
| 114 | 
            +
              "adalora_orth_reg_weight": 0.5,
         | 
| 115 | 
            +
              "ia3_feedforward_modules": [],
         | 
| 116 | 
            +
              "llamapro_num_new_blocks": 4,
         | 
| 117 | 
            +
              "llamapro_num_groups": null,
         | 
| 118 | 
            +
              "neftune_noise_alpha": null,
         | 
| 119 | 
            +
              "neftune_backend": "transformers",
         | 
| 120 | 
            +
              "lisa_activated_layers": 0,
         | 
| 121 | 
            +
              "lisa_step_interval": 20,
         | 
| 122 | 
            +
              "reft_layer_key": null,
         | 
| 123 | 
            +
              "reft_layers": null,
         | 
| 124 | 
            +
              "reft_rank": 4,
         | 
| 125 | 
            +
              "reft_intervention_type": "LoreftIntervention",
         | 
| 126 | 
            +
              "reft_args": null,
         | 
| 127 | 
            +
              "use_liger": false,
         | 
| 128 | 
            +
              "gradient_checkpointing": false,
         | 
| 129 | 
            +
              "vit_use_gc": true,
         | 
| 130 | 
            +
              "deepspeed": {
         | 
| 131 | 
            +
                "fp16": {
         | 
| 132 | 
            +
                  "enabled": "auto",
         | 
| 133 | 
            +
                  "loss_scale": 0,
         | 
| 134 | 
            +
                  "loss_scale_window": 1000,
         | 
| 135 | 
            +
                  "initial_scale_power": 16,
         | 
| 136 | 
            +
                  "hysteresis": 2,
         | 
| 137 | 
            +
                  "min_loss_scale": 1
         | 
| 138 | 
            +
                },
         | 
| 139 | 
            +
                "bf16": {
         | 
| 140 | 
            +
                  "enabled": "auto"
         | 
| 141 | 
            +
                },
         | 
| 142 | 
            +
                "optimizer": {
         | 
| 143 | 
            +
                  "type": "AdamW",
         | 
| 144 | 
            +
                  "params": {
         | 
| 145 | 
            +
                    "lr": "auto",
         | 
| 146 | 
            +
                    "betas": "auto",
         | 
| 147 | 
            +
                    "eps": "auto",
         | 
| 148 | 
            +
                    "weight_decay": "auto"
         | 
| 149 | 
            +
                  }
         | 
| 150 | 
            +
                },
         | 
| 151 | 
            +
                "scheduler": {
         | 
| 152 | 
            +
                  "type": "WarmupCosineLR",
         | 
| 153 | 
            +
                  "params": {
         | 
| 154 | 
            +
                    "total_num_steps": "auto",
         | 
| 155 | 
            +
                    "warmup_num_steps": "auto"
         | 
| 156 | 
            +
                  }
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                "zero_optimization": {
         | 
| 159 | 
            +
                  "stage": 3,
         | 
| 160 | 
            +
                  "offload_optimizer": {
         | 
| 161 | 
            +
                    "device": "none",
         | 
| 162 | 
            +
                    "pin_memory": true
         | 
| 163 | 
            +
                  },
         | 
| 164 | 
            +
                  "offload_param": {
         | 
| 165 | 
            +
                    "device": "none",
         | 
| 166 | 
            +
                    "pin_memory": true
         | 
| 167 | 
            +
                  },
         | 
| 168 | 
            +
                  "overlap_comm": true,
         | 
| 169 | 
            +
                  "contiguous_gradients": true,
         | 
| 170 | 
            +
                  "sub_group_size": 1000000000.0,
         | 
| 171 | 
            +
                  "reduce_bucket_size": "auto",
         | 
| 172 | 
            +
                  "stage3_prefetch_bucket_size": "auto",
         | 
| 173 | 
            +
                  "stage3_param_persistence_threshold": "auto",
         | 
| 174 | 
            +
                  "stage3_max_live_parameters": 1000000000.0,
         | 
| 175 | 
            +
                  "stage3_max_reuse_distance": 1000000000.0,
         | 
| 176 | 
            +
                  "stage3_gather_16bit_weights_on_model_save": true
         | 
| 177 | 
            +
                },
         | 
| 178 | 
            +
                "gradient_accumulation_steps": "auto",
         | 
| 179 | 
            +
                "gradient_clipping": "auto",
         | 
| 180 | 
            +
                "steps_per_print": 2000,
         | 
| 181 | 
            +
                "train_batch_size": "auto",
         | 
| 182 | 
            +
                "train_micro_batch_size_per_gpu": "auto",
         | 
| 183 | 
            +
                "wall_clock_breakdown": false
         | 
| 184 | 
            +
              },
         | 
| 185 | 
            +
              "batch_size": 1,
         | 
| 186 | 
            +
              "eval_batch_size": 1,
         | 
| 187 | 
            +
              "auto_find_batch_size": false,
         | 
| 188 | 
            +
              "num_train_epochs": 6,
         | 
| 189 | 
            +
              "max_steps": -1,
         | 
| 190 | 
            +
              "optim": "adamw_torch",
         | 
| 191 | 
            +
              "adam_beta1": 0.9,
         | 
| 192 | 
            +
              "adam_beta2": 0.95,
         | 
| 193 | 
            +
              "adam_epsilon": 1e-08,
         | 
| 194 | 
            +
              "learning_rate": 1e-05,
         | 
| 195 | 
            +
              "weight_decay": 0.1,
         | 
| 196 | 
            +
              "gradient_accumulation_steps": 2,
         | 
| 197 | 
            +
              "max_grad_norm": 1,
         | 
| 198 | 
            +
              "predict_with_generate": false,
         | 
| 199 | 
            +
              "lr_scheduler_type": "cosine",
         | 
| 200 | 
            +
              "lr_scheduler_kwargs": {},
         | 
| 201 | 
            +
              "warmup_ratio": 0.05,
         | 
| 202 | 
            +
              "warmup_steps": 0,
         | 
| 203 | 
            +
              "eval_steps": 200,
         | 
| 204 | 
            +
              "save_steps": 200,
         | 
| 205 | 
            +
              "save_only_model": false,
         | 
| 206 | 
            +
              "save_total_limit": 5,
         | 
| 207 | 
            +
              "logging_steps": 5,
         | 
| 208 | 
            +
              "acc_steps": 1,
         | 
| 209 | 
            +
              "dataloader_num_workers": 1,
         | 
| 210 | 
            +
              "dataloader_pin_memory": true,
         | 
| 211 | 
            +
              "dataloader_drop_last": false,
         | 
| 212 | 
            +
              "push_to_hub": false,
         | 
| 213 | 
            +
              "hub_model_id": null,
         | 
| 214 | 
            +
              "hub_token": null,
         | 
| 215 | 
            +
              "hub_private_repo": false,
         | 
| 216 | 
            +
              "hub_strategy": "every_save",
         | 
| 217 | 
            +
              "test_oom_error": false,
         | 
| 218 | 
            +
              "disable_tqdm": false,
         | 
| 219 | 
            +
              "lazy_tokenize": true,
         | 
| 220 | 
            +
              "preprocess_num_proc": 1,
         | 
| 221 | 
            +
              "use_flash_attn": null,
         | 
| 222 | 
            +
              "ignore_args_error": false,
         | 
| 223 | 
            +
              "check_model_is_latest": true,
         | 
| 224 | 
            +
              "logging_dir": "/workspace/output/molmo-7b-d/v0-20250103-184047/runs",
         | 
| 225 | 
            +
              "report_to": [
         | 
| 226 | 
            +
                "wandb"
         | 
| 227 | 
            +
              ],
         | 
| 228 | 
            +
              "acc_strategy": "token",
         | 
| 229 | 
            +
              "save_on_each_node": false,
         | 
| 230 | 
            +
              "evaluation_strategy": "epoch",
         | 
| 231 | 
            +
              "save_strategy": "epoch",
         | 
| 232 | 
            +
              "save_safetensors": true,
         | 
| 233 | 
            +
              "gpu_memory_fraction": null,
         | 
| 234 | 
            +
              "include_num_input_tokens_seen": false,
         | 
| 235 | 
            +
              "local_repo_path": null,
         | 
| 236 | 
            +
              "custom_register_path": null,
         | 
| 237 | 
            +
              "custom_dataset_info": null,
         | 
| 238 | 
            +
              "device_map_config": null,
         | 
| 239 | 
            +
              "device_max_memory": [],
         | 
| 240 | 
            +
              "max_new_tokens": 2048,
         | 
| 241 | 
            +
              "do_sample": null,
         | 
| 242 | 
            +
              "temperature": null,
         | 
| 243 | 
            +
              "top_k": null,
         | 
| 244 | 
            +
              "top_p": null,
         | 
| 245 | 
            +
              "repetition_penalty": null,
         | 
| 246 | 
            +
              "num_beams": 1,
         | 
| 247 | 
            +
              "fsdp": "",
         | 
| 248 | 
            +
              "fsdp_config": null,
         | 
| 249 | 
            +
              "sequence_parallel_size": 1,
         | 
| 250 | 
            +
              "model_layer_cls_name": null,
         | 
| 251 | 
            +
              "metric_warmup_step": 0,
         | 
| 252 | 
            +
              "fsdp_num": 1,
         | 
| 253 | 
            +
              "per_device_train_batch_size": null,
         | 
| 254 | 
            +
              "per_device_eval_batch_size": null,
         | 
| 255 | 
            +
              "eval_strategy": "epoch",
         | 
| 256 | 
            +
              "self_cognition_sample": 0,
         | 
| 257 | 
            +
              "train_dataset_mix_ratio": 0.0,
         | 
| 258 | 
            +
              "train_dataset_mix_ds": [
         | 
| 259 | 
            +
                "ms-bench"
         | 
| 260 | 
            +
              ],
         | 
| 261 | 
            +
              "train_dataset_sample": -1,
         | 
| 262 | 
            +
              "val_dataset_sample": null,
         | 
| 263 | 
            +
              "safe_serialization": null,
         | 
| 264 | 
            +
              "only_save_model": null,
         | 
| 265 | 
            +
              "neftune_alpha": null,
         | 
| 266 | 
            +
              "deepspeed_config_path": null,
         | 
| 267 | 
            +
              "model_cache_dir": null,
         | 
| 268 | 
            +
              "lora_dropout_p": null,
         | 
| 269 | 
            +
              "lora_target_modules": [],
         | 
| 270 | 
            +
              "lora_target_regex": null,
         | 
| 271 | 
            +
              "lora_modules_to_save": [],
         | 
| 272 | 
            +
              "boft_target_modules": [],
         | 
| 273 | 
            +
              "boft_modules_to_save": [],
         | 
| 274 | 
            +
              "vera_target_modules": [],
         | 
| 275 | 
            +
              "vera_modules_to_save": [],
         | 
| 276 | 
            +
              "ia3_target_modules": [],
         | 
| 277 | 
            +
              "ia3_modules_to_save": [],
         | 
| 278 | 
            +
              "custom_train_dataset_path": [],
         | 
| 279 | 
            +
              "custom_val_dataset_path": [],
         | 
| 280 | 
            +
              "device_map_config_path": null,
         | 
| 281 | 
            +
              "push_hub_strategy": null,
         | 
| 282 | 
            +
              "use_self_cognition": false,
         | 
| 283 | 
            +
              "is_multimodal": true,
         | 
| 284 | 
            +
              "is_vision": true,
         | 
| 285 | 
            +
              "lora_use_embedding": false,
         | 
| 286 | 
            +
              "lora_use_all": false,
         | 
| 287 | 
            +
              "lora_m2s_use_embedding": false,
         | 
| 288 | 
            +
              "lora_m2s_use_ln": false,
         | 
| 289 | 
            +
              "torch_dtype": "torch.bfloat16",
         | 
| 290 | 
            +
              "fp16": false,
         | 
| 291 | 
            +
              "bf16": true,
         | 
| 292 | 
            +
              "rank": 0,
         | 
| 293 | 
            +
              "local_rank": 0,
         | 
| 294 | 
            +
              "world_size": 8,
         | 
| 295 | 
            +
              "local_world_size": 8,
         | 
| 296 | 
            +
              "bnb_4bit_compute_dtype": "torch.bfloat16",
         | 
| 297 | 
            +
              "load_in_4bit": false,
         | 
| 298 | 
            +
              "load_in_8bit": false,
         | 
| 299 | 
            +
              "train_sampler_random": true,
         | 
| 300 | 
            +
              "train_type": "sft",
         | 
| 301 | 
            +
              "training_args": "Seq2SeqTrainingArguments(output_dir='/workspace/output/molmo-7b-d/v0-20250103-184047', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.EPOCH: 'epoch'>, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=2, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=1e-05, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1, num_train_epochs=6, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, lr_scheduler_kwargs={}, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/workspace/output/molmo-7b-d/v0-20250103-184047/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.EPOCH: 'epoch'>, save_steps=200, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend='nccl', tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=200, dataloader_num_workers=1, dataloader_prefetch_factor=None, past_index=-1, run_name='/workspace/output/molmo-7b-d/v0-20250103-184047', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'optimizer': {'type': 'AdamW', 'params': {'lr': 'auto', 'betas': 'auto', 'eps': 'auto', 'weight_decay': 'auto'}}, 'scheduler': {'type': 'WarmupCosineLR', 'params': {'total_num_steps': 'auto', 'warmup_num_steps': 'auto'}}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=True, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=True, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=False, hub_always_push=False, gradient_checkpointing=False, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy=None, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=False, include_num_input_tokens_seen=False, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=False, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=GenerationConfig {\n  \"eos_token_id\": 151643,\n  \"max_new_tokens\": 2048,\n  \"pad_token_id\": 151643\n}\n, acc_strategy='token', loss_name=None, additional_saved_files=[], train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1)"
         | 
| 302 | 
            +
            }
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,435 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                "|<EXTRA_TOKENS_0>|",
         | 
| 4 | 
            +
                "|<EXTRA_TOKENS_1>|",
         | 
| 5 | 
            +
                "|<EXTRA_TOKENS_2>|",
         | 
| 6 | 
            +
                "|<EXTRA_TOKENS_3>|",
         | 
| 7 | 
            +
                "|<EXTRA_TOKENS_4>|",
         | 
| 8 | 
            +
                "|<EXTRA_TOKENS_5>|",
         | 
| 9 | 
            +
                "|<EXTRA_TOKENS_6>|",
         | 
| 10 | 
            +
                "|<EXTRA_TOKENS_7>|",
         | 
| 11 | 
            +
                "|<EXTRA_TOKENS_8>|",
         | 
| 12 | 
            +
                "|<EXTRA_TOKENS_9>|",
         | 
| 13 | 
            +
                "|<EXTRA_TOKENS_10>|",
         | 
| 14 | 
            +
                "|<EXTRA_TOKENS_11>|",
         | 
| 15 | 
            +
                "|<EXTRA_TOKENS_12>|",
         | 
| 16 | 
            +
                "|<EXTRA_TOKENS_13>|",
         | 
| 17 | 
            +
                "|<EXTRA_TOKENS_14>|",
         | 
| 18 | 
            +
                "|<EXTRA_TOKENS_15>|",
         | 
| 19 | 
            +
                "|<EXTRA_TOKENS_16>|",
         | 
| 20 | 
            +
                "|<EXTRA_TOKENS_17>|",
         | 
| 21 | 
            +
                "|<EXTRA_TOKENS_18>|",
         | 
| 22 | 
            +
                "|<EXTRA_TOKENS_19>|",
         | 
| 23 | 
            +
                "|<EXTRA_TOKENS_20>|",
         | 
| 24 | 
            +
                "|<EXTRA_TOKENS_21>|",
         | 
| 25 | 
            +
                "|<EXTRA_TOKENS_22>|",
         | 
| 26 | 
            +
                "|<EXTRA_TOKENS_23>|",
         | 
| 27 | 
            +
                "|<EXTRA_TOKENS_24>|",
         | 
| 28 | 
            +
                "|<EXTRA_TOKENS_25>|",
         | 
| 29 | 
            +
                "|<EXTRA_TOKENS_26>|",
         | 
| 30 | 
            +
                "|<EXTRA_TOKENS_27>|",
         | 
| 31 | 
            +
                "|<EXTRA_TOKENS_28>|",
         | 
| 32 | 
            +
                "|<EXTRA_TOKENS_29>|",
         | 
| 33 | 
            +
                "|<EXTRA_TOKENS_30>|",
         | 
| 34 | 
            +
                "|<EXTRA_TOKENS_31>|",
         | 
| 35 | 
            +
                "|<EXTRA_TOKENS_32>|",
         | 
| 36 | 
            +
                "|<EXTRA_TOKENS_33>|",
         | 
| 37 | 
            +
                "|<EXTRA_TOKENS_34>|",
         | 
| 38 | 
            +
                "|<EXTRA_TOKENS_35>|",
         | 
| 39 | 
            +
                "|<EXTRA_TOKENS_36>|",
         | 
| 40 | 
            +
                "|<EXTRA_TOKENS_37>|",
         | 
| 41 | 
            +
                "|<EXTRA_TOKENS_38>|",
         | 
| 42 | 
            +
                "|<EXTRA_TOKENS_39>|",
         | 
| 43 | 
            +
                "|<EXTRA_TOKENS_40>|",
         | 
| 44 | 
            +
                "|<EXTRA_TOKENS_41>|",
         | 
| 45 | 
            +
                "|<EXTRA_TOKENS_42>|",
         | 
| 46 | 
            +
                "|<EXTRA_TOKENS_43>|",
         | 
| 47 | 
            +
                "|<EXTRA_TOKENS_44>|",
         | 
| 48 | 
            +
                "|<EXTRA_TOKENS_45>|",
         | 
| 49 | 
            +
                "|<EXTRA_TOKENS_46>|",
         | 
| 50 | 
            +
                "|<EXTRA_TOKENS_47>|",
         | 
| 51 | 
            +
                "|<EXTRA_TOKENS_48>|",
         | 
| 52 | 
            +
                "|<EXTRA_TOKENS_49>|",
         | 
| 53 | 
            +
                "|<EXTRA_TOKENS_50>|",
         | 
| 54 | 
            +
                "|<EXTRA_TOKENS_51>|",
         | 
| 55 | 
            +
                "|<EXTRA_TOKENS_52>|",
         | 
| 56 | 
            +
                "|<EXTRA_TOKENS_53>|",
         | 
| 57 | 
            +
                "|<EXTRA_TOKENS_54>|",
         | 
| 58 | 
            +
                "|<EXTRA_TOKENS_55>|",
         | 
| 59 | 
            +
                "|<EXTRA_TOKENS_56>|",
         | 
| 60 | 
            +
                "|<EXTRA_TOKENS_57>|",
         | 
| 61 | 
            +
                "|<EXTRA_TOKENS_58>|",
         | 
| 62 | 
            +
                "|<EXTRA_TOKENS_59>|",
         | 
| 63 | 
            +
                "|<EXTRA_TOKENS_60>|",
         | 
| 64 | 
            +
                "|<EXTRA_TOKENS_61>|",
         | 
| 65 | 
            +
                "|<EXTRA_TOKENS_62>|",
         | 
| 66 | 
            +
                "|<EXTRA_TOKENS_63>|",
         | 
| 67 | 
            +
                "|<EXTRA_TOKENS_64>|",
         | 
| 68 | 
            +
                "|<EXTRA_TOKENS_65>|",
         | 
| 69 | 
            +
                "|<EXTRA_TOKENS_66>|",
         | 
| 70 | 
            +
                "|<EXTRA_TOKENS_67>|",
         | 
| 71 | 
            +
                "|<EXTRA_TOKENS_68>|",
         | 
| 72 | 
            +
                "|<EXTRA_TOKENS_69>|",
         | 
| 73 | 
            +
                "|<EXTRA_TOKENS_70>|",
         | 
| 74 | 
            +
                "|<EXTRA_TOKENS_71>|",
         | 
| 75 | 
            +
                "|<EXTRA_TOKENS_72>|",
         | 
| 76 | 
            +
                "|<EXTRA_TOKENS_73>|",
         | 
| 77 | 
            +
                "|<EXTRA_TOKENS_74>|",
         | 
| 78 | 
            +
                "|<EXTRA_TOKENS_75>|",
         | 
| 79 | 
            +
                "|<EXTRA_TOKENS_76>|",
         | 
| 80 | 
            +
                "|<EXTRA_TOKENS_77>|",
         | 
| 81 | 
            +
                "|<EXTRA_TOKENS_78>|",
         | 
| 82 | 
            +
                "|<EXTRA_TOKENS_79>|",
         | 
| 83 | 
            +
                "|<EXTRA_TOKENS_80>|",
         | 
| 84 | 
            +
                "|<EXTRA_TOKENS_81>|",
         | 
| 85 | 
            +
                "|<EXTRA_TOKENS_82>|",
         | 
| 86 | 
            +
                "|<EXTRA_TOKENS_83>|",
         | 
| 87 | 
            +
                "|<EXTRA_TOKENS_84>|",
         | 
| 88 | 
            +
                "|<EXTRA_TOKENS_85>|",
         | 
| 89 | 
            +
                "|<EXTRA_TOKENS_86>|",
         | 
| 90 | 
            +
                "|<EXTRA_TOKENS_87>|",
         | 
| 91 | 
            +
                "|<EXTRA_TOKENS_88>|",
         | 
| 92 | 
            +
                "|<EXTRA_TOKENS_89>|",
         | 
| 93 | 
            +
                "|<EXTRA_TOKENS_90>|",
         | 
| 94 | 
            +
                "|<EXTRA_TOKENS_91>|",
         | 
| 95 | 
            +
                "|<EXTRA_TOKENS_92>|",
         | 
| 96 | 
            +
                "|<EXTRA_TOKENS_93>|",
         | 
| 97 | 
            +
                "|<EXTRA_TOKENS_94>|",
         | 
| 98 | 
            +
                "|<EXTRA_TOKENS_95>|",
         | 
| 99 | 
            +
                "|<EXTRA_TOKENS_96>|",
         | 
| 100 | 
            +
                "|<EXTRA_TOKENS_97>|",
         | 
| 101 | 
            +
                "|<EXTRA_TOKENS_98>|",
         | 
| 102 | 
            +
                "|<EXTRA_TOKENS_99>|",
         | 
| 103 | 
            +
                "|<EXTRA_TOKENS_100>|",
         | 
| 104 | 
            +
                "|<EXTRA_TOKENS_101>|",
         | 
| 105 | 
            +
                "|<EXTRA_TOKENS_102>|",
         | 
| 106 | 
            +
                "|<EXTRA_TOKENS_103>|",
         | 
| 107 | 
            +
                "|<EXTRA_TOKENS_104>|",
         | 
| 108 | 
            +
                "|<EXTRA_TOKENS_105>|",
         | 
| 109 | 
            +
                "|<EXTRA_TOKENS_106>|",
         | 
| 110 | 
            +
                "|<EXTRA_TOKENS_107>|",
         | 
| 111 | 
            +
                "|<EXTRA_TOKENS_108>|",
         | 
| 112 | 
            +
                "|<EXTRA_TOKENS_109>|",
         | 
| 113 | 
            +
                "|<EXTRA_TOKENS_110>|",
         | 
| 114 | 
            +
                "|<EXTRA_TOKENS_111>|",
         | 
| 115 | 
            +
                "|<EXTRA_TOKENS_112>|",
         | 
| 116 | 
            +
                "|<EXTRA_TOKENS_113>|",
         | 
| 117 | 
            +
                "|<EXTRA_TOKENS_114>|",
         | 
| 118 | 
            +
                "|<EXTRA_TOKENS_115>|",
         | 
| 119 | 
            +
                "|<EXTRA_TOKENS_116>|",
         | 
| 120 | 
            +
                "|<EXTRA_TOKENS_117>|",
         | 
| 121 | 
            +
                "|<EXTRA_TOKENS_118>|",
         | 
| 122 | 
            +
                "|<EXTRA_TOKENS_119>|",
         | 
| 123 | 
            +
                "|<EXTRA_TOKENS_120>|",
         | 
| 124 | 
            +
                "|<EXTRA_TOKENS_121>|",
         | 
| 125 | 
            +
                "|<EXTRA_TOKENS_122>|",
         | 
| 126 | 
            +
                "|<EXTRA_TOKENS_123>|",
         | 
| 127 | 
            +
                "|<EXTRA_TOKENS_124>|",
         | 
| 128 | 
            +
                "|<EXTRA_TOKENS_125>|",
         | 
| 129 | 
            +
                "|<EXTRA_TOKENS_126>|",
         | 
| 130 | 
            +
                "|<EXTRA_TOKENS_127>|",
         | 
| 131 | 
            +
                "|<EXTRA_TOKENS_128>|",
         | 
| 132 | 
            +
                "|<EXTRA_TOKENS_129>|",
         | 
| 133 | 
            +
                "|<EXTRA_TOKENS_130>|",
         | 
| 134 | 
            +
                "|<EXTRA_TOKENS_131>|",
         | 
| 135 | 
            +
                "|<EXTRA_TOKENS_132>|",
         | 
| 136 | 
            +
                "|<EXTRA_TOKENS_133>|",
         | 
| 137 | 
            +
                "|<EXTRA_TOKENS_134>|",
         | 
| 138 | 
            +
                "|<EXTRA_TOKENS_135>|",
         | 
| 139 | 
            +
                "|<EXTRA_TOKENS_136>|",
         | 
| 140 | 
            +
                "|<EXTRA_TOKENS_137>|",
         | 
| 141 | 
            +
                "|<EXTRA_TOKENS_138>|",
         | 
| 142 | 
            +
                "|<EXTRA_TOKENS_139>|",
         | 
| 143 | 
            +
                "|<EXTRA_TOKENS_140>|",
         | 
| 144 | 
            +
                "|<EXTRA_TOKENS_141>|",
         | 
| 145 | 
            +
                "|<EXTRA_TOKENS_142>|",
         | 
| 146 | 
            +
                "|<EXTRA_TOKENS_143>|",
         | 
| 147 | 
            +
                "|<EXTRA_TOKENS_144>|",
         | 
| 148 | 
            +
                "|<EXTRA_TOKENS_145>|",
         | 
| 149 | 
            +
                "|<EXTRA_TOKENS_146>|",
         | 
| 150 | 
            +
                "|<EXTRA_TOKENS_147>|",
         | 
| 151 | 
            +
                "|<EXTRA_TOKENS_148>|",
         | 
| 152 | 
            +
                "|<EXTRA_TOKENS_149>|",
         | 
| 153 | 
            +
                "|<EXTRA_TOKENS_150>|",
         | 
| 154 | 
            +
                "|<EXTRA_TOKENS_151>|",
         | 
| 155 | 
            +
                "|<EXTRA_TOKENS_152>|",
         | 
| 156 | 
            +
                "|<EXTRA_TOKENS_153>|",
         | 
| 157 | 
            +
                "|<EXTRA_TOKENS_154>|",
         | 
| 158 | 
            +
                "|<EXTRA_TOKENS_155>|",
         | 
| 159 | 
            +
                "|<EXTRA_TOKENS_156>|",
         | 
| 160 | 
            +
                "|<EXTRA_TOKENS_157>|",
         | 
| 161 | 
            +
                "|<EXTRA_TOKENS_158>|",
         | 
| 162 | 
            +
                "|<EXTRA_TOKENS_159>|",
         | 
| 163 | 
            +
                "|<EXTRA_TOKENS_160>|",
         | 
| 164 | 
            +
                "|<EXTRA_TOKENS_161>|",
         | 
| 165 | 
            +
                "|<EXTRA_TOKENS_162>|",
         | 
| 166 | 
            +
                "|<EXTRA_TOKENS_163>|",
         | 
| 167 | 
            +
                "|<EXTRA_TOKENS_164>|",
         | 
| 168 | 
            +
                "|<EXTRA_TOKENS_165>|",
         | 
| 169 | 
            +
                "|<EXTRA_TOKENS_166>|",
         | 
| 170 | 
            +
                "|<EXTRA_TOKENS_167>|",
         | 
| 171 | 
            +
                "|<EXTRA_TOKENS_168>|",
         | 
| 172 | 
            +
                "|<EXTRA_TOKENS_169>|",
         | 
| 173 | 
            +
                "|<EXTRA_TOKENS_170>|",
         | 
| 174 | 
            +
                "|<EXTRA_TOKENS_171>|",
         | 
| 175 | 
            +
                "|<EXTRA_TOKENS_172>|",
         | 
| 176 | 
            +
                "|<EXTRA_TOKENS_173>|",
         | 
| 177 | 
            +
                "|<EXTRA_TOKENS_174>|",
         | 
| 178 | 
            +
                "|<EXTRA_TOKENS_175>|",
         | 
| 179 | 
            +
                "|<EXTRA_TOKENS_176>|",
         | 
| 180 | 
            +
                "|<EXTRA_TOKENS_177>|",
         | 
| 181 | 
            +
                "|<EXTRA_TOKENS_178>|",
         | 
| 182 | 
            +
                "|<EXTRA_TOKENS_179>|",
         | 
| 183 | 
            +
                "|<EXTRA_TOKENS_180>|",
         | 
| 184 | 
            +
                "|<EXTRA_TOKENS_181>|",
         | 
| 185 | 
            +
                "|<EXTRA_TOKENS_182>|",
         | 
| 186 | 
            +
                "|<EXTRA_TOKENS_183>|",
         | 
| 187 | 
            +
                "|<EXTRA_TOKENS_184>|",
         | 
| 188 | 
            +
                "|<EXTRA_TOKENS_185>|",
         | 
| 189 | 
            +
                "|<EXTRA_TOKENS_186>|",
         | 
| 190 | 
            +
                "|<EXTRA_TOKENS_187>|",
         | 
| 191 | 
            +
                "|<EXTRA_TOKENS_188>|",
         | 
| 192 | 
            +
                "|<EXTRA_TOKENS_189>|",
         | 
| 193 | 
            +
                "|<EXTRA_TOKENS_190>|",
         | 
| 194 | 
            +
                "|<EXTRA_TOKENS_191>|",
         | 
| 195 | 
            +
                "|<EXTRA_TOKENS_192>|",
         | 
| 196 | 
            +
                "|<EXTRA_TOKENS_193>|",
         | 
| 197 | 
            +
                "|<EXTRA_TOKENS_194>|",
         | 
| 198 | 
            +
                "|<EXTRA_TOKENS_195>|",
         | 
| 199 | 
            +
                "|<EXTRA_TOKENS_196>|",
         | 
| 200 | 
            +
                "|<EXTRA_TOKENS_197>|",
         | 
| 201 | 
            +
                "|<EXTRA_TOKENS_198>|",
         | 
| 202 | 
            +
                "|<EXTRA_TOKENS_199>|",
         | 
| 203 | 
            +
                "|<EXTRA_TOKENS_200>|",
         | 
| 204 | 
            +
                "|<EXTRA_TOKENS_201>|",
         | 
| 205 | 
            +
                "|<EXTRA_TOKENS_202>|",
         | 
| 206 | 
            +
                "|<EXTRA_TOKENS_203>|",
         | 
| 207 | 
            +
                "|<EXTRA_TOKENS_204>|",
         | 
| 208 | 
            +
                "|<EXTRA_TOKENS_205>|",
         | 
| 209 | 
            +
                "|<EXTRA_TOKENS_206>|",
         | 
| 210 | 
            +
                "|<EXTRA_TOKENS_207>|",
         | 
| 211 | 
            +
                "|<EXTRA_TOKENS_208>|",
         | 
| 212 | 
            +
                "|<EXTRA_TOKENS_209>|",
         | 
| 213 | 
            +
                "|<EXTRA_TOKENS_210>|",
         | 
| 214 | 
            +
                "|<EXTRA_TOKENS_211>|",
         | 
| 215 | 
            +
                "|<EXTRA_TOKENS_212>|",
         | 
| 216 | 
            +
                "|<EXTRA_TOKENS_213>|",
         | 
| 217 | 
            +
                "|<EXTRA_TOKENS_214>|",
         | 
| 218 | 
            +
                "|<EXTRA_TOKENS_215>|",
         | 
| 219 | 
            +
                "|<EXTRA_TOKENS_216>|",
         | 
| 220 | 
            +
                "|<EXTRA_TOKENS_217>|",
         | 
| 221 | 
            +
                "|<EXTRA_TOKENS_218>|",
         | 
| 222 | 
            +
                "|<EXTRA_TOKENS_219>|",
         | 
| 223 | 
            +
                "|<EXTRA_TOKENS_220>|",
         | 
| 224 | 
            +
                "|<EXTRA_TOKENS_221>|",
         | 
| 225 | 
            +
                "|<EXTRA_TOKENS_222>|",
         | 
| 226 | 
            +
                "|<EXTRA_TOKENS_223>|",
         | 
| 227 | 
            +
                "|<EXTRA_TOKENS_224>|",
         | 
| 228 | 
            +
                "|<EXTRA_TOKENS_225>|",
         | 
| 229 | 
            +
                "|<EXTRA_TOKENS_226>|",
         | 
| 230 | 
            +
                "|<EXTRA_TOKENS_227>|",
         | 
| 231 | 
            +
                "|<EXTRA_TOKENS_228>|",
         | 
| 232 | 
            +
                "|<EXTRA_TOKENS_229>|",
         | 
| 233 | 
            +
                "|<EXTRA_TOKENS_230>|",
         | 
| 234 | 
            +
                "|<EXTRA_TOKENS_231>|",
         | 
| 235 | 
            +
                "|<EXTRA_TOKENS_232>|",
         | 
| 236 | 
            +
                "|<EXTRA_TOKENS_233>|",
         | 
| 237 | 
            +
                "|<EXTRA_TOKENS_234>|",
         | 
| 238 | 
            +
                "|<EXTRA_TOKENS_235>|",
         | 
| 239 | 
            +
                "|<EXTRA_TOKENS_236>|",
         | 
| 240 | 
            +
                "|<EXTRA_TOKENS_237>|",
         | 
| 241 | 
            +
                "|<EXTRA_TOKENS_238>|",
         | 
| 242 | 
            +
                "|<EXTRA_TOKENS_239>|",
         | 
| 243 | 
            +
                "|<EXTRA_TOKENS_240>|",
         | 
| 244 | 
            +
                "|<EXTRA_TOKENS_241>|",
         | 
| 245 | 
            +
                "|<EXTRA_TOKENS_242>|",
         | 
| 246 | 
            +
                "|<EXTRA_TOKENS_243>|",
         | 
| 247 | 
            +
                "|<EXTRA_TOKENS_244>|",
         | 
| 248 | 
            +
                "|<EXTRA_TOKENS_245>|",
         | 
| 249 | 
            +
                "|<EXTRA_TOKENS_246>|",
         | 
| 250 | 
            +
                "|<EXTRA_TOKENS_247>|",
         | 
| 251 | 
            +
                "|<EXTRA_TOKENS_248>|",
         | 
| 252 | 
            +
                "|<EXTRA_TOKENS_249>|",
         | 
| 253 | 
            +
                "|<EXTRA_TOKENS_250>|",
         | 
| 254 | 
            +
                "|<EXTRA_TOKENS_251>|",
         | 
| 255 | 
            +
                "|<EXTRA_TOKENS_252>|",
         | 
| 256 | 
            +
                "|<EXTRA_TOKENS_253>|",
         | 
| 257 | 
            +
                "|<EXTRA_TOKENS_254>|",
         | 
| 258 | 
            +
                "|<EXTRA_TOKENS_255>|",
         | 
| 259 | 
            +
                "|<EXTRA_TOKENS_256>|",
         | 
| 260 | 
            +
                "|<EXTRA_TOKENS_257>|",
         | 
| 261 | 
            +
                "|<EXTRA_TOKENS_258>|",
         | 
| 262 | 
            +
                "|<EXTRA_TOKENS_259>|",
         | 
| 263 | 
            +
                "|<EXTRA_TOKENS_260>|",
         | 
| 264 | 
            +
                "|<EXTRA_TOKENS_261>|",
         | 
| 265 | 
            +
                "|<EXTRA_TOKENS_262>|",
         | 
| 266 | 
            +
                "|<EXTRA_TOKENS_263>|",
         | 
| 267 | 
            +
                "|<EXTRA_TOKENS_264>|",
         | 
| 268 | 
            +
                "|<EXTRA_TOKENS_265>|",
         | 
| 269 | 
            +
                "|<EXTRA_TOKENS_266>|",
         | 
| 270 | 
            +
                "|<EXTRA_TOKENS_267>|",
         | 
| 271 | 
            +
                "|<EXTRA_TOKENS_268>|",
         | 
| 272 | 
            +
                "|<EXTRA_TOKENS_269>|",
         | 
| 273 | 
            +
                "|<EXTRA_TOKENS_270>|",
         | 
| 274 | 
            +
                "|<EXTRA_TOKENS_271>|",
         | 
| 275 | 
            +
                "|<EXTRA_TOKENS_272>|",
         | 
| 276 | 
            +
                "|<EXTRA_TOKENS_273>|",
         | 
| 277 | 
            +
                "|<EXTRA_TOKENS_274>|",
         | 
| 278 | 
            +
                "|<EXTRA_TOKENS_275>|",
         | 
| 279 | 
            +
                "|<EXTRA_TOKENS_276>|",
         | 
| 280 | 
            +
                "|<EXTRA_TOKENS_277>|",
         | 
| 281 | 
            +
                "|<EXTRA_TOKENS_278>|",
         | 
| 282 | 
            +
                "|<EXTRA_TOKENS_279>|",
         | 
| 283 | 
            +
                "|<EXTRA_TOKENS_280>|",
         | 
| 284 | 
            +
                "|<EXTRA_TOKENS_281>|",
         | 
| 285 | 
            +
                "|<EXTRA_TOKENS_282>|",
         | 
| 286 | 
            +
                "|<EXTRA_TOKENS_283>|",
         | 
| 287 | 
            +
                "|<EXTRA_TOKENS_284>|",
         | 
| 288 | 
            +
                "|<EXTRA_TOKENS_285>|",
         | 
| 289 | 
            +
                "|<EXTRA_TOKENS_286>|",
         | 
| 290 | 
            +
                "|<EXTRA_TOKENS_287>|",
         | 
| 291 | 
            +
                "|<EXTRA_TOKENS_288>|",
         | 
| 292 | 
            +
                "|<EXTRA_TOKENS_289>|",
         | 
| 293 | 
            +
                "|<EXTRA_TOKENS_290>|",
         | 
| 294 | 
            +
                "|<EXTRA_TOKENS_291>|",
         | 
| 295 | 
            +
                "|<EXTRA_TOKENS_292>|",
         | 
| 296 | 
            +
                "|<EXTRA_TOKENS_293>|",
         | 
| 297 | 
            +
                "|<EXTRA_TOKENS_294>|",
         | 
| 298 | 
            +
                "|<EXTRA_TOKENS_295>|",
         | 
| 299 | 
            +
                "|<EXTRA_TOKENS_296>|",
         | 
| 300 | 
            +
                "|<EXTRA_TOKENS_297>|",
         | 
| 301 | 
            +
                "|<EXTRA_TOKENS_298>|",
         | 
| 302 | 
            +
                "|<EXTRA_TOKENS_299>|",
         | 
| 303 | 
            +
                "|<EXTRA_TOKENS_300>|",
         | 
| 304 | 
            +
                "|<EXTRA_TOKENS_301>|",
         | 
| 305 | 
            +
                "|<EXTRA_TOKENS_302>|",
         | 
| 306 | 
            +
                "|<EXTRA_TOKENS_303>|",
         | 
| 307 | 
            +
                "|<EXTRA_TOKENS_304>|",
         | 
| 308 | 
            +
                "|<EXTRA_TOKENS_305>|",
         | 
| 309 | 
            +
                "|<EXTRA_TOKENS_306>|",
         | 
| 310 | 
            +
                "|<EXTRA_TOKENS_307>|",
         | 
| 311 | 
            +
                "|<EXTRA_TOKENS_308>|",
         | 
| 312 | 
            +
                "|<EXTRA_TOKENS_309>|",
         | 
| 313 | 
            +
                "|<EXTRA_TOKENS_310>|",
         | 
| 314 | 
            +
                "|<EXTRA_TOKENS_311>|",
         | 
| 315 | 
            +
                "|<EXTRA_TOKENS_312>|",
         | 
| 316 | 
            +
                "|<EXTRA_TOKENS_313>|",
         | 
| 317 | 
            +
                "|<EXTRA_TOKENS_314>|",
         | 
| 318 | 
            +
                "|<EXTRA_TOKENS_315>|",
         | 
| 319 | 
            +
                "|<EXTRA_TOKENS_316>|",
         | 
| 320 | 
            +
                "|<EXTRA_TOKENS_317>|",
         | 
| 321 | 
            +
                "|<EXTRA_TOKENS_318>|",
         | 
| 322 | 
            +
                "|<EXTRA_TOKENS_319>|",
         | 
| 323 | 
            +
                "|<EXTRA_TOKENS_320>|",
         | 
| 324 | 
            +
                "|<EXTRA_TOKENS_321>|",
         | 
| 325 | 
            +
                "|<EXTRA_TOKENS_322>|",
         | 
| 326 | 
            +
                "|<EXTRA_TOKENS_323>|",
         | 
| 327 | 
            +
                "|<EXTRA_TOKENS_324>|",
         | 
| 328 | 
            +
                "|<EXTRA_TOKENS_325>|",
         | 
| 329 | 
            +
                "|<EXTRA_TOKENS_326>|",
         | 
| 330 | 
            +
                "|<EXTRA_TOKENS_327>|",
         | 
| 331 | 
            +
                "|<EXTRA_TOKENS_328>|",
         | 
| 332 | 
            +
                "|<EXTRA_TOKENS_329>|",
         | 
| 333 | 
            +
                "|<EXTRA_TOKENS_330>|",
         | 
| 334 | 
            +
                "|<EXTRA_TOKENS_331>|",
         | 
| 335 | 
            +
                "|<EXTRA_TOKENS_332>|",
         | 
| 336 | 
            +
                "|<EXTRA_TOKENS_333>|",
         | 
| 337 | 
            +
                "|<EXTRA_TOKENS_334>|",
         | 
| 338 | 
            +
                "|<EXTRA_TOKENS_335>|",
         | 
| 339 | 
            +
                "|<EXTRA_TOKENS_336>|",
         | 
| 340 | 
            +
                "|<EXTRA_TOKENS_337>|",
         | 
| 341 | 
            +
                "|<EXTRA_TOKENS_338>|",
         | 
| 342 | 
            +
                "|<EXTRA_TOKENS_339>|",
         | 
| 343 | 
            +
                "|<EXTRA_TOKENS_340>|",
         | 
| 344 | 
            +
                "|<EXTRA_TOKENS_341>|",
         | 
| 345 | 
            +
                "|<EXTRA_TOKENS_342>|",
         | 
| 346 | 
            +
                "|<EXTRA_TOKENS_343>|",
         | 
| 347 | 
            +
                "|<EXTRA_TOKENS_344>|",
         | 
| 348 | 
            +
                "|<EXTRA_TOKENS_345>|",
         | 
| 349 | 
            +
                "|<EXTRA_TOKENS_346>|",
         | 
| 350 | 
            +
                "|<EXTRA_TOKENS_347>|",
         | 
| 351 | 
            +
                "|<EXTRA_TOKENS_348>|",
         | 
| 352 | 
            +
                "|<EXTRA_TOKENS_349>|",
         | 
| 353 | 
            +
                "|<EXTRA_TOKENS_350>|",
         | 
| 354 | 
            +
                "|<EXTRA_TOKENS_351>|",
         | 
| 355 | 
            +
                "|<EXTRA_TOKENS_352>|",
         | 
| 356 | 
            +
                "|<EXTRA_TOKENS_353>|",
         | 
| 357 | 
            +
                "|<EXTRA_TOKENS_354>|",
         | 
| 358 | 
            +
                "|<EXTRA_TOKENS_355>|",
         | 
| 359 | 
            +
                "|<EXTRA_TOKENS_356>|",
         | 
| 360 | 
            +
                "|<EXTRA_TOKENS_357>|",
         | 
| 361 | 
            +
                "|<EXTRA_TOKENS_358>|",
         | 
| 362 | 
            +
                "|<EXTRA_TOKENS_359>|",
         | 
| 363 | 
            +
                "|<EXTRA_TOKENS_360>|",
         | 
| 364 | 
            +
                "|<EXTRA_TOKENS_361>|",
         | 
| 365 | 
            +
                "|<EXTRA_TOKENS_362>|",
         | 
| 366 | 
            +
                "|<EXTRA_TOKENS_363>|",
         | 
| 367 | 
            +
                "|<EXTRA_TOKENS_364>|",
         | 
| 368 | 
            +
                "|<EXTRA_TOKENS_365>|",
         | 
| 369 | 
            +
                "|<EXTRA_TOKENS_366>|",
         | 
| 370 | 
            +
                "|<EXTRA_TOKENS_367>|",
         | 
| 371 | 
            +
                "|<EXTRA_TOKENS_368>|",
         | 
| 372 | 
            +
                "|<EXTRA_TOKENS_369>|",
         | 
| 373 | 
            +
                "|<EXTRA_TOKENS_370>|",
         | 
| 374 | 
            +
                "|<EXTRA_TOKENS_371>|",
         | 
| 375 | 
            +
                "|<EXTRA_TOKENS_372>|",
         | 
| 376 | 
            +
                "|<EXTRA_TOKENS_373>|",
         | 
| 377 | 
            +
                "|<EXTRA_TOKENS_374>|",
         | 
| 378 | 
            +
                "|<EXTRA_TOKENS_375>|",
         | 
| 379 | 
            +
                "|<EXTRA_TOKENS_376>|",
         | 
| 380 | 
            +
                "|<EXTRA_TOKENS_377>|",
         | 
| 381 | 
            +
                "|<EXTRA_TOKENS_378>|",
         | 
| 382 | 
            +
                "|<EXTRA_TOKENS_379>|",
         | 
| 383 | 
            +
                "|<EXTRA_TOKENS_380>|",
         | 
| 384 | 
            +
                "|<EXTRA_TOKENS_381>|",
         | 
| 385 | 
            +
                "|<EXTRA_TOKENS_382>|",
         | 
| 386 | 
            +
                "|<EXTRA_TOKENS_383>|",
         | 
| 387 | 
            +
                "|<EXTRA_TOKENS_384>|",
         | 
| 388 | 
            +
                "|<EXTRA_TOKENS_385>|",
         | 
| 389 | 
            +
                "|<EXTRA_TOKENS_386>|",
         | 
| 390 | 
            +
                "|<EXTRA_TOKENS_387>|",
         | 
| 391 | 
            +
                "|<EXTRA_TOKENS_388>|",
         | 
| 392 | 
            +
                "|<EXTRA_TOKENS_389>|",
         | 
| 393 | 
            +
                "|<EXTRA_TOKENS_390>|",
         | 
| 394 | 
            +
                "|<EXTRA_TOKENS_391>|",
         | 
| 395 | 
            +
                "|<EXTRA_TOKENS_392>|",
         | 
| 396 | 
            +
                "|<EXTRA_TOKENS_393>|",
         | 
| 397 | 
            +
                "|<EXTRA_TOKENS_394>|",
         | 
| 398 | 
            +
                "|<EXTRA_TOKENS_395>|",
         | 
| 399 | 
            +
                "|<EXTRA_TOKENS_396>|",
         | 
| 400 | 
            +
                "|<EXTRA_TOKENS_397>|",
         | 
| 401 | 
            +
                "|<EXTRA_TOKENS_398>|",
         | 
| 402 | 
            +
                "|<EXTRA_TOKENS_399>|",
         | 
| 403 | 
            +
                "|<EXTRA_TOKENS_400>|",
         | 
| 404 | 
            +
                "|<EXTRA_TOKENS_401>|",
         | 
| 405 | 
            +
                "|<EXTRA_TOKENS_402>|",
         | 
| 406 | 
            +
                "|<EXTRA_TOKENS_403>|",
         | 
| 407 | 
            +
                "|<EXTRA_TOKENS_404>|",
         | 
| 408 | 
            +
                "|<EXTRA_TOKENS_405>|",
         | 
| 409 | 
            +
                "|<EXTRA_TOKENS_406>|",
         | 
| 410 | 
            +
                "|<EXTRA_TOKENS_407>|",
         | 
| 411 | 
            +
                "|<EXTRA_TOKENS_408>|",
         | 
| 412 | 
            +
                "|<EXTRA_TOKENS_409>|",
         | 
| 413 | 
            +
                "|<EXTRA_TOKENS_410>|",
         | 
| 414 | 
            +
                "|<EXTRA_TOKENS_411>|",
         | 
| 415 | 
            +
                "|<EXTRA_TOKENS_412>|",
         | 
| 416 | 
            +
                "|<EXTRA_TOKENS_413>|",
         | 
| 417 | 
            +
                "|<EXTRA_TOKENS_414>|",
         | 
| 418 | 
            +
                "|<EXTRA_TOKENS_415>|",
         | 
| 419 | 
            +
                "|<EXTRA_TOKENS_416>|",
         | 
| 420 | 
            +
                "|<EXTRA_TOKENS_417>|",
         | 
| 421 | 
            +
                "<im_start>",
         | 
| 422 | 
            +
                "<im_end>",
         | 
| 423 | 
            +
                "<im_patch>",
         | 
| 424 | 
            +
                "<im_col>",
         | 
| 425 | 
            +
                "<|image|>"
         | 
| 426 | 
            +
              ],
         | 
| 427 | 
            +
              "eos_token": "<|endoftext|>",
         | 
| 428 | 
            +
              "pad_token": {
         | 
| 429 | 
            +
                "content": "<|endoftext|>",
         | 
| 430 | 
            +
                "lstrip": false,
         | 
| 431 | 
            +
                "normalized": false,
         | 
| 432 | 
            +
                "rstrip": false,
         | 
| 433 | 
            +
                "single_word": false
         | 
| 434 | 
            +
              }
         | 
| 435 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6248048a83152ce87663c799492fe7e60c8086f3ae51ce7bd255ccc445746fc0
         | 
| 3 | 
            +
            size 11501432
         | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,3853 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_prefix_space": false,
         | 
| 3 | 
            +
              "added_tokens_decoder": {
         | 
| 4 | 
            +
                "151643": {
         | 
| 5 | 
            +
                  "content": "<|endoftext|>",
         | 
| 6 | 
            +
                  "lstrip": false,
         | 
| 7 | 
            +
                  "normalized": false,
         | 
| 8 | 
            +
                  "rstrip": false,
         | 
| 9 | 
            +
                  "single_word": false,
         | 
| 10 | 
            +
                  "special": true
         | 
| 11 | 
            +
                },
         | 
| 12 | 
            +
                "151644": {
         | 
| 13 | 
            +
                  "content": "<|im_start|>",
         | 
| 14 | 
            +
                  "lstrip": false,
         | 
| 15 | 
            +
                  "normalized": false,
         | 
| 16 | 
            +
                  "rstrip": false,
         | 
| 17 | 
            +
                  "single_word": false,
         | 
| 18 | 
            +
                  "special": true
         | 
| 19 | 
            +
                },
         | 
| 20 | 
            +
                "151645": {
         | 
| 21 | 
            +
                  "content": "<|im_end|>",
         | 
| 22 | 
            +
                  "lstrip": false,
         | 
| 23 | 
            +
                  "normalized": false,
         | 
| 24 | 
            +
                  "rstrip": false,
         | 
| 25 | 
            +
                  "single_word": false,
         | 
| 26 | 
            +
                  "special": true
         | 
| 27 | 
            +
                },
         | 
| 28 | 
            +
                "151646": {
         | 
| 29 | 
            +
                  "content": "|<EXTRA_TOKENS_0>|",
         | 
| 30 | 
            +
                  "lstrip": false,
         | 
| 31 | 
            +
                  "normalized": false,
         | 
| 32 | 
            +
                  "rstrip": false,
         | 
| 33 | 
            +
                  "single_word": false,
         | 
| 34 | 
            +
                  "special": true
         | 
| 35 | 
            +
                },
         | 
| 36 | 
            +
                "151647": {
         | 
| 37 | 
            +
                  "content": "|<EXTRA_TOKENS_1>|",
         | 
| 38 | 
            +
                  "lstrip": false,
         | 
| 39 | 
            +
                  "normalized": false,
         | 
| 40 | 
            +
                  "rstrip": false,
         | 
| 41 | 
            +
                  "single_word": false,
         | 
| 42 | 
            +
                  "special": true
         | 
| 43 | 
            +
                },
         | 
| 44 | 
            +
                "151648": {
         | 
| 45 | 
            +
                  "content": "|<EXTRA_TOKENS_2>|",
         | 
| 46 | 
            +
                  "lstrip": false,
         | 
| 47 | 
            +
                  "normalized": false,
         | 
| 48 | 
            +
                  "rstrip": false,
         | 
| 49 | 
            +
                  "single_word": false,
         | 
| 50 | 
            +
                  "special": true
         | 
| 51 | 
            +
                },
         | 
| 52 | 
            +
                "151649": {
         | 
| 53 | 
            +
                  "content": "|<EXTRA_TOKENS_3>|",
         | 
| 54 | 
            +
                  "lstrip": false,
         | 
| 55 | 
            +
                  "normalized": false,
         | 
| 56 | 
            +
                  "rstrip": false,
         | 
| 57 | 
            +
                  "single_word": false,
         | 
| 58 | 
            +
                  "special": true
         | 
| 59 | 
            +
                },
         | 
| 60 | 
            +
                "151650": {
         | 
| 61 | 
            +
                  "content": "|<EXTRA_TOKENS_4>|",
         | 
| 62 | 
            +
                  "lstrip": false,
         | 
| 63 | 
            +
                  "normalized": false,
         | 
| 64 | 
            +
                  "rstrip": false,
         | 
| 65 | 
            +
                  "single_word": false,
         | 
| 66 | 
            +
                  "special": true
         | 
| 67 | 
            +
                },
         | 
| 68 | 
            +
                "151651": {
         | 
| 69 | 
            +
                  "content": "|<EXTRA_TOKENS_5>|",
         | 
| 70 | 
            +
                  "lstrip": false,
         | 
| 71 | 
            +
                  "normalized": false,
         | 
| 72 | 
            +
                  "rstrip": false,
         | 
| 73 | 
            +
                  "single_word": false,
         | 
| 74 | 
            +
                  "special": true
         | 
| 75 | 
            +
                },
         | 
| 76 | 
            +
                "151652": {
         | 
| 77 | 
            +
                  "content": "|<EXTRA_TOKENS_6>|",
         | 
| 78 | 
            +
                  "lstrip": false,
         | 
| 79 | 
            +
                  "normalized": false,
         | 
| 80 | 
            +
                  "rstrip": false,
         | 
| 81 | 
            +
                  "single_word": false,
         | 
| 82 | 
            +
                  "special": true
         | 
| 83 | 
            +
                },
         | 
| 84 | 
            +
                "151653": {
         | 
| 85 | 
            +
                  "content": "|<EXTRA_TOKENS_7>|",
         | 
| 86 | 
            +
                  "lstrip": false,
         | 
| 87 | 
            +
                  "normalized": false,
         | 
| 88 | 
            +
                  "rstrip": false,
         | 
| 89 | 
            +
                  "single_word": false,
         | 
| 90 | 
            +
                  "special": true
         | 
| 91 | 
            +
                },
         | 
| 92 | 
            +
                "151654": {
         | 
| 93 | 
            +
                  "content": "|<EXTRA_TOKENS_8>|",
         | 
| 94 | 
            +
                  "lstrip": false,
         | 
| 95 | 
            +
                  "normalized": false,
         | 
| 96 | 
            +
                  "rstrip": false,
         | 
| 97 | 
            +
                  "single_word": false,
         | 
| 98 | 
            +
                  "special": true
         | 
| 99 | 
            +
                },
         | 
| 100 | 
            +
                "151655": {
         | 
| 101 | 
            +
                  "content": "|<EXTRA_TOKENS_9>|",
         | 
| 102 | 
            +
                  "lstrip": false,
         | 
| 103 | 
            +
                  "normalized": false,
         | 
| 104 | 
            +
                  "rstrip": false,
         | 
| 105 | 
            +
                  "single_word": false,
         | 
| 106 | 
            +
                  "special": true
         | 
| 107 | 
            +
                },
         | 
| 108 | 
            +
                "151656": {
         | 
| 109 | 
            +
                  "content": "|<EXTRA_TOKENS_10>|",
         | 
| 110 | 
            +
                  "lstrip": false,
         | 
| 111 | 
            +
                  "normalized": false,
         | 
| 112 | 
            +
                  "rstrip": false,
         | 
| 113 | 
            +
                  "single_word": false,
         | 
| 114 | 
            +
                  "special": true
         | 
| 115 | 
            +
                },
         | 
| 116 | 
            +
                "151657": {
         | 
| 117 | 
            +
                  "content": "|<EXTRA_TOKENS_11>|",
         | 
| 118 | 
            +
                  "lstrip": false,
         | 
| 119 | 
            +
                  "normalized": false,
         | 
| 120 | 
            +
                  "rstrip": false,
         | 
| 121 | 
            +
                  "single_word": false,
         | 
| 122 | 
            +
                  "special": true
         | 
| 123 | 
            +
                },
         | 
| 124 | 
            +
                "151658": {
         | 
| 125 | 
            +
                  "content": "|<EXTRA_TOKENS_12>|",
         | 
| 126 | 
            +
                  "lstrip": false,
         | 
| 127 | 
            +
                  "normalized": false,
         | 
| 128 | 
            +
                  "rstrip": false,
         | 
| 129 | 
            +
                  "single_word": false,
         | 
| 130 | 
            +
                  "special": true
         | 
| 131 | 
            +
                },
         | 
| 132 | 
            +
                "151659": {
         | 
| 133 | 
            +
                  "content": "|<EXTRA_TOKENS_13>|",
         | 
| 134 | 
            +
                  "lstrip": false,
         | 
| 135 | 
            +
                  "normalized": false,
         | 
| 136 | 
            +
                  "rstrip": false,
         | 
| 137 | 
            +
                  "single_word": false,
         | 
| 138 | 
            +
                  "special": true
         | 
| 139 | 
            +
                },
         | 
| 140 | 
            +
                "151660": {
         | 
| 141 | 
            +
                  "content": "|<EXTRA_TOKENS_14>|",
         | 
| 142 | 
            +
                  "lstrip": false,
         | 
| 143 | 
            +
                  "normalized": false,
         | 
| 144 | 
            +
                  "rstrip": false,
         | 
| 145 | 
            +
                  "single_word": false,
         | 
| 146 | 
            +
                  "special": true
         | 
| 147 | 
            +
                },
         | 
| 148 | 
            +
                "151661": {
         | 
| 149 | 
            +
                  "content": "|<EXTRA_TOKENS_15>|",
         | 
| 150 | 
            +
                  "lstrip": false,
         | 
| 151 | 
            +
                  "normalized": false,
         | 
| 152 | 
            +
                  "rstrip": false,
         | 
| 153 | 
            +
                  "single_word": false,
         | 
| 154 | 
            +
                  "special": true
         | 
| 155 | 
            +
                },
         | 
| 156 | 
            +
                "151662": {
         | 
| 157 | 
            +
                  "content": "|<EXTRA_TOKENS_16>|",
         | 
| 158 | 
            +
                  "lstrip": false,
         | 
| 159 | 
            +
                  "normalized": false,
         | 
| 160 | 
            +
                  "rstrip": false,
         | 
| 161 | 
            +
                  "single_word": false,
         | 
| 162 | 
            +
                  "special": true
         | 
| 163 | 
            +
                },
         | 
| 164 | 
            +
                "151663": {
         | 
| 165 | 
            +
                  "content": "|<EXTRA_TOKENS_17>|",
         | 
| 166 | 
            +
                  "lstrip": false,
         | 
| 167 | 
            +
                  "normalized": false,
         | 
| 168 | 
            +
                  "rstrip": false,
         | 
| 169 | 
            +
                  "single_word": false,
         | 
| 170 | 
            +
                  "special": true
         | 
| 171 | 
            +
                },
         | 
| 172 | 
            +
                "151664": {
         | 
| 173 | 
            +
                  "content": "|<EXTRA_TOKENS_18>|",
         | 
| 174 | 
            +
                  "lstrip": false,
         | 
| 175 | 
            +
                  "normalized": false,
         | 
| 176 | 
            +
                  "rstrip": false,
         | 
| 177 | 
            +
                  "single_word": false,
         | 
| 178 | 
            +
                  "special": true
         | 
| 179 | 
            +
                },
         | 
| 180 | 
            +
                "151665": {
         | 
| 181 | 
            +
                  "content": "|<EXTRA_TOKENS_19>|",
         | 
| 182 | 
            +
                  "lstrip": false,
         | 
| 183 | 
            +
                  "normalized": false,
         | 
| 184 | 
            +
                  "rstrip": false,
         | 
| 185 | 
            +
                  "single_word": false,
         | 
| 186 | 
            +
                  "special": true
         | 
| 187 | 
            +
                },
         | 
| 188 | 
            +
                "151666": {
         | 
| 189 | 
            +
                  "content": "|<EXTRA_TOKENS_20>|",
         | 
| 190 | 
            +
                  "lstrip": false,
         | 
| 191 | 
            +
                  "normalized": false,
         | 
| 192 | 
            +
                  "rstrip": false,
         | 
| 193 | 
            +
                  "single_word": false,
         | 
| 194 | 
            +
                  "special": true
         | 
| 195 | 
            +
                },
         | 
| 196 | 
            +
                "151667": {
         | 
| 197 | 
            +
                  "content": "|<EXTRA_TOKENS_21>|",
         | 
| 198 | 
            +
                  "lstrip": false,
         | 
| 199 | 
            +
                  "normalized": false,
         | 
| 200 | 
            +
                  "rstrip": false,
         | 
| 201 | 
            +
                  "single_word": false,
         | 
| 202 | 
            +
                  "special": true
         | 
| 203 | 
            +
                },
         | 
| 204 | 
            +
                "151668": {
         | 
| 205 | 
            +
                  "content": "|<EXTRA_TOKENS_22>|",
         | 
| 206 | 
            +
                  "lstrip": false,
         | 
| 207 | 
            +
                  "normalized": false,
         | 
| 208 | 
            +
                  "rstrip": false,
         | 
| 209 | 
            +
                  "single_word": false,
         | 
| 210 | 
            +
                  "special": true
         | 
| 211 | 
            +
                },
         | 
| 212 | 
            +
                "151669": {
         | 
| 213 | 
            +
                  "content": "|<EXTRA_TOKENS_23>|",
         | 
| 214 | 
            +
                  "lstrip": false,
         | 
| 215 | 
            +
                  "normalized": false,
         | 
| 216 | 
            +
                  "rstrip": false,
         | 
| 217 | 
            +
                  "single_word": false,
         | 
| 218 | 
            +
                  "special": true
         | 
| 219 | 
            +
                },
         | 
| 220 | 
            +
                "151670": {
         | 
| 221 | 
            +
                  "content": "|<EXTRA_TOKENS_24>|",
         | 
| 222 | 
            +
                  "lstrip": false,
         | 
| 223 | 
            +
                  "normalized": false,
         | 
| 224 | 
            +
                  "rstrip": false,
         | 
| 225 | 
            +
                  "single_word": false,
         | 
| 226 | 
            +
                  "special": true
         | 
| 227 | 
            +
                },
         | 
| 228 | 
            +
                "151671": {
         | 
| 229 | 
            +
                  "content": "|<EXTRA_TOKENS_25>|",
         | 
| 230 | 
            +
                  "lstrip": false,
         | 
| 231 | 
            +
                  "normalized": false,
         | 
| 232 | 
            +
                  "rstrip": false,
         | 
| 233 | 
            +
                  "single_word": false,
         | 
| 234 | 
            +
                  "special": true
         | 
| 235 | 
            +
                },
         | 
| 236 | 
            +
                "151672": {
         | 
| 237 | 
            +
                  "content": "|<EXTRA_TOKENS_26>|",
         | 
| 238 | 
            +
                  "lstrip": false,
         | 
| 239 | 
            +
                  "normalized": false,
         | 
| 240 | 
            +
                  "rstrip": false,
         | 
| 241 | 
            +
                  "single_word": false,
         | 
| 242 | 
            +
                  "special": true
         | 
| 243 | 
            +
                },
         | 
| 244 | 
            +
                "151673": {
         | 
| 245 | 
            +
                  "content": "|<EXTRA_TOKENS_27>|",
         | 
| 246 | 
            +
                  "lstrip": false,
         | 
| 247 | 
            +
                  "normalized": false,
         | 
| 248 | 
            +
                  "rstrip": false,
         | 
| 249 | 
            +
                  "single_word": false,
         | 
| 250 | 
            +
                  "special": true
         | 
| 251 | 
            +
                },
         | 
| 252 | 
            +
                "151674": {
         | 
| 253 | 
            +
                  "content": "|<EXTRA_TOKENS_28>|",
         | 
| 254 | 
            +
                  "lstrip": false,
         | 
| 255 | 
            +
                  "normalized": false,
         | 
| 256 | 
            +
                  "rstrip": false,
         | 
| 257 | 
            +
                  "single_word": false,
         | 
| 258 | 
            +
                  "special": true
         | 
| 259 | 
            +
                },
         | 
| 260 | 
            +
                "151675": {
         | 
| 261 | 
            +
                  "content": "|<EXTRA_TOKENS_29>|",
         | 
| 262 | 
            +
                  "lstrip": false,
         | 
| 263 | 
            +
                  "normalized": false,
         | 
| 264 | 
            +
                  "rstrip": false,
         | 
| 265 | 
            +
                  "single_word": false,
         | 
| 266 | 
            +
                  "special": true
         | 
| 267 | 
            +
                },
         | 
| 268 | 
            +
                "151676": {
         | 
| 269 | 
            +
                  "content": "|<EXTRA_TOKENS_30>|",
         | 
| 270 | 
            +
                  "lstrip": false,
         | 
| 271 | 
            +
                  "normalized": false,
         | 
| 272 | 
            +
                  "rstrip": false,
         | 
| 273 | 
            +
                  "single_word": false,
         | 
| 274 | 
            +
                  "special": true
         | 
| 275 | 
            +
                },
         | 
| 276 | 
            +
                "151677": {
         | 
| 277 | 
            +
                  "content": "|<EXTRA_TOKENS_31>|",
         | 
| 278 | 
            +
                  "lstrip": false,
         | 
| 279 | 
            +
                  "normalized": false,
         | 
| 280 | 
            +
                  "rstrip": false,
         | 
| 281 | 
            +
                  "single_word": false,
         | 
| 282 | 
            +
                  "special": true
         | 
| 283 | 
            +
                },
         | 
| 284 | 
            +
                "151678": {
         | 
| 285 | 
            +
                  "content": "|<EXTRA_TOKENS_32>|",
         | 
| 286 | 
            +
                  "lstrip": false,
         | 
| 287 | 
            +
                  "normalized": false,
         | 
| 288 | 
            +
                  "rstrip": false,
         | 
| 289 | 
            +
                  "single_word": false,
         | 
| 290 | 
            +
                  "special": true
         | 
| 291 | 
            +
                },
         | 
| 292 | 
            +
                "151679": {
         | 
| 293 | 
            +
                  "content": "|<EXTRA_TOKENS_33>|",
         | 
| 294 | 
            +
                  "lstrip": false,
         | 
| 295 | 
            +
                  "normalized": false,
         | 
| 296 | 
            +
                  "rstrip": false,
         | 
| 297 | 
            +
                  "single_word": false,
         | 
| 298 | 
            +
                  "special": true
         | 
| 299 | 
            +
                },
         | 
| 300 | 
            +
                "151680": {
         | 
| 301 | 
            +
                  "content": "|<EXTRA_TOKENS_34>|",
         | 
| 302 | 
            +
                  "lstrip": false,
         | 
| 303 | 
            +
                  "normalized": false,
         | 
| 304 | 
            +
                  "rstrip": false,
         | 
| 305 | 
            +
                  "single_word": false,
         | 
| 306 | 
            +
                  "special": true
         | 
| 307 | 
            +
                },
         | 
| 308 | 
            +
                "151681": {
         | 
| 309 | 
            +
                  "content": "|<EXTRA_TOKENS_35>|",
         | 
| 310 | 
            +
                  "lstrip": false,
         | 
| 311 | 
            +
                  "normalized": false,
         | 
| 312 | 
            +
                  "rstrip": false,
         | 
| 313 | 
            +
                  "single_word": false,
         | 
| 314 | 
            +
                  "special": true
         | 
| 315 | 
            +
                },
         | 
| 316 | 
            +
                "151682": {
         | 
| 317 | 
            +
                  "content": "|<EXTRA_TOKENS_36>|",
         | 
| 318 | 
            +
                  "lstrip": false,
         | 
| 319 | 
            +
                  "normalized": false,
         | 
| 320 | 
            +
                  "rstrip": false,
         | 
| 321 | 
            +
                  "single_word": false,
         | 
| 322 | 
            +
                  "special": true
         | 
| 323 | 
            +
                },
         | 
| 324 | 
            +
                "151683": {
         | 
| 325 | 
            +
                  "content": "|<EXTRA_TOKENS_37>|",
         | 
| 326 | 
            +
                  "lstrip": false,
         | 
| 327 | 
            +
                  "normalized": false,
         | 
| 328 | 
            +
                  "rstrip": false,
         | 
| 329 | 
            +
                  "single_word": false,
         | 
| 330 | 
            +
                  "special": true
         | 
| 331 | 
            +
                },
         | 
| 332 | 
            +
                "151684": {
         | 
| 333 | 
            +
                  "content": "|<EXTRA_TOKENS_38>|",
         | 
| 334 | 
            +
                  "lstrip": false,
         | 
| 335 | 
            +
                  "normalized": false,
         | 
| 336 | 
            +
                  "rstrip": false,
         | 
| 337 | 
            +
                  "single_word": false,
         | 
| 338 | 
            +
                  "special": true
         | 
| 339 | 
            +
                },
         | 
| 340 | 
            +
                "151685": {
         | 
| 341 | 
            +
                  "content": "|<EXTRA_TOKENS_39>|",
         | 
| 342 | 
            +
                  "lstrip": false,
         | 
| 343 | 
            +
                  "normalized": false,
         | 
| 344 | 
            +
                  "rstrip": false,
         | 
| 345 | 
            +
                  "single_word": false,
         | 
| 346 | 
            +
                  "special": true
         | 
| 347 | 
            +
                },
         | 
| 348 | 
            +
                "151686": {
         | 
| 349 | 
            +
                  "content": "|<EXTRA_TOKENS_40>|",
         | 
| 350 | 
            +
                  "lstrip": false,
         | 
| 351 | 
            +
                  "normalized": false,
         | 
| 352 | 
            +
                  "rstrip": false,
         | 
| 353 | 
            +
                  "single_word": false,
         | 
| 354 | 
            +
                  "special": true
         | 
| 355 | 
            +
                },
         | 
| 356 | 
            +
                "151687": {
         | 
| 357 | 
            +
                  "content": "|<EXTRA_TOKENS_41>|",
         | 
| 358 | 
            +
                  "lstrip": false,
         | 
| 359 | 
            +
                  "normalized": false,
         | 
| 360 | 
            +
                  "rstrip": false,
         | 
| 361 | 
            +
                  "single_word": false,
         | 
| 362 | 
            +
                  "special": true
         | 
| 363 | 
            +
                },
         | 
| 364 | 
            +
                "151688": {
         | 
| 365 | 
            +
                  "content": "|<EXTRA_TOKENS_42>|",
         | 
| 366 | 
            +
                  "lstrip": false,
         | 
| 367 | 
            +
                  "normalized": false,
         | 
| 368 | 
            +
                  "rstrip": false,
         | 
| 369 | 
            +
                  "single_word": false,
         | 
| 370 | 
            +
                  "special": true
         | 
| 371 | 
            +
                },
         | 
| 372 | 
            +
                "151689": {
         | 
| 373 | 
            +
                  "content": "|<EXTRA_TOKENS_43>|",
         | 
| 374 | 
            +
                  "lstrip": false,
         | 
| 375 | 
            +
                  "normalized": false,
         | 
| 376 | 
            +
                  "rstrip": false,
         | 
| 377 | 
            +
                  "single_word": false,
         | 
| 378 | 
            +
                  "special": true
         | 
| 379 | 
            +
                },
         | 
| 380 | 
            +
                "151690": {
         | 
| 381 | 
            +
                  "content": "|<EXTRA_TOKENS_44>|",
         | 
| 382 | 
            +
                  "lstrip": false,
         | 
| 383 | 
            +
                  "normalized": false,
         | 
| 384 | 
            +
                  "rstrip": false,
         | 
| 385 | 
            +
                  "single_word": false,
         | 
| 386 | 
            +
                  "special": true
         | 
| 387 | 
            +
                },
         | 
| 388 | 
            +
                "151691": {
         | 
| 389 | 
            +
                  "content": "|<EXTRA_TOKENS_45>|",
         | 
| 390 | 
            +
                  "lstrip": false,
         | 
| 391 | 
            +
                  "normalized": false,
         | 
| 392 | 
            +
                  "rstrip": false,
         | 
| 393 | 
            +
                  "single_word": false,
         | 
| 394 | 
            +
                  "special": true
         | 
| 395 | 
            +
                },
         | 
| 396 | 
            +
                "151692": {
         | 
| 397 | 
            +
                  "content": "|<EXTRA_TOKENS_46>|",
         | 
| 398 | 
            +
                  "lstrip": false,
         | 
| 399 | 
            +
                  "normalized": false,
         | 
| 400 | 
            +
                  "rstrip": false,
         | 
| 401 | 
            +
                  "single_word": false,
         | 
| 402 | 
            +
                  "special": true
         | 
| 403 | 
            +
                },
         | 
| 404 | 
            +
                "151693": {
         | 
| 405 | 
            +
                  "content": "|<EXTRA_TOKENS_47>|",
         | 
| 406 | 
            +
                  "lstrip": false,
         | 
| 407 | 
            +
                  "normalized": false,
         | 
| 408 | 
            +
                  "rstrip": false,
         | 
| 409 | 
            +
                  "single_word": false,
         | 
| 410 | 
            +
                  "special": true
         | 
| 411 | 
            +
                },
         | 
| 412 | 
            +
                "151694": {
         | 
| 413 | 
            +
                  "content": "|<EXTRA_TOKENS_48>|",
         | 
| 414 | 
            +
                  "lstrip": false,
         | 
| 415 | 
            +
                  "normalized": false,
         | 
| 416 | 
            +
                  "rstrip": false,
         | 
| 417 | 
            +
                  "single_word": false,
         | 
| 418 | 
            +
                  "special": true
         | 
| 419 | 
            +
                },
         | 
| 420 | 
            +
                "151695": {
         | 
| 421 | 
            +
                  "content": "|<EXTRA_TOKENS_49>|",
         | 
| 422 | 
            +
                  "lstrip": false,
         | 
| 423 | 
            +
                  "normalized": false,
         | 
| 424 | 
            +
                  "rstrip": false,
         | 
| 425 | 
            +
                  "single_word": false,
         | 
| 426 | 
            +
                  "special": true
         | 
| 427 | 
            +
                },
         | 
| 428 | 
            +
                "151696": {
         | 
| 429 | 
            +
                  "content": "|<EXTRA_TOKENS_50>|",
         | 
| 430 | 
            +
                  "lstrip": false,
         | 
| 431 | 
            +
                  "normalized": false,
         | 
| 432 | 
            +
                  "rstrip": false,
         | 
| 433 | 
            +
                  "single_word": false,
         | 
| 434 | 
            +
                  "special": true
         | 
| 435 | 
            +
                },
         | 
| 436 | 
            +
                "151697": {
         | 
| 437 | 
            +
                  "content": "|<EXTRA_TOKENS_51>|",
         | 
| 438 | 
            +
                  "lstrip": false,
         | 
| 439 | 
            +
                  "normalized": false,
         | 
| 440 | 
            +
                  "rstrip": false,
         | 
| 441 | 
            +
                  "single_word": false,
         | 
| 442 | 
            +
                  "special": true
         | 
| 443 | 
            +
                },
         | 
| 444 | 
            +
                "151698": {
         | 
| 445 | 
            +
                  "content": "|<EXTRA_TOKENS_52>|",
         | 
| 446 | 
            +
                  "lstrip": false,
         | 
| 447 | 
            +
                  "normalized": false,
         | 
| 448 | 
            +
                  "rstrip": false,
         | 
| 449 | 
            +
                  "single_word": false,
         | 
| 450 | 
            +
                  "special": true
         | 
| 451 | 
            +
                },
         | 
| 452 | 
            +
                "151699": {
         | 
| 453 | 
            +
                  "content": "|<EXTRA_TOKENS_53>|",
         | 
| 454 | 
            +
                  "lstrip": false,
         | 
| 455 | 
            +
                  "normalized": false,
         | 
| 456 | 
            +
                  "rstrip": false,
         | 
| 457 | 
            +
                  "single_word": false,
         | 
| 458 | 
            +
                  "special": true
         | 
| 459 | 
            +
                },
         | 
| 460 | 
            +
                "151700": {
         | 
| 461 | 
            +
                  "content": "|<EXTRA_TOKENS_54>|",
         | 
| 462 | 
            +
                  "lstrip": false,
         | 
| 463 | 
            +
                  "normalized": false,
         | 
| 464 | 
            +
                  "rstrip": false,
         | 
| 465 | 
            +
                  "single_word": false,
         | 
| 466 | 
            +
                  "special": true
         | 
| 467 | 
            +
                },
         | 
| 468 | 
            +
                "151701": {
         | 
| 469 | 
            +
                  "content": "|<EXTRA_TOKENS_55>|",
         | 
| 470 | 
            +
                  "lstrip": false,
         | 
| 471 | 
            +
                  "normalized": false,
         | 
| 472 | 
            +
                  "rstrip": false,
         | 
| 473 | 
            +
                  "single_word": false,
         | 
| 474 | 
            +
                  "special": true
         | 
| 475 | 
            +
                },
         | 
| 476 | 
            +
                "151702": {
         | 
| 477 | 
            +
                  "content": "|<EXTRA_TOKENS_56>|",
         | 
| 478 | 
            +
                  "lstrip": false,
         | 
| 479 | 
            +
                  "normalized": false,
         | 
| 480 | 
            +
                  "rstrip": false,
         | 
| 481 | 
            +
                  "single_word": false,
         | 
| 482 | 
            +
                  "special": true
         | 
| 483 | 
            +
                },
         | 
| 484 | 
            +
                "151703": {
         | 
| 485 | 
            +
                  "content": "|<EXTRA_TOKENS_57>|",
         | 
| 486 | 
            +
                  "lstrip": false,
         | 
| 487 | 
            +
                  "normalized": false,
         | 
| 488 | 
            +
                  "rstrip": false,
         | 
| 489 | 
            +
                  "single_word": false,
         | 
| 490 | 
            +
                  "special": true
         | 
| 491 | 
            +
                },
         | 
| 492 | 
            +
                "151704": {
         | 
| 493 | 
            +
                  "content": "|<EXTRA_TOKENS_58>|",
         | 
| 494 | 
            +
                  "lstrip": false,
         | 
| 495 | 
            +
                  "normalized": false,
         | 
| 496 | 
            +
                  "rstrip": false,
         | 
| 497 | 
            +
                  "single_word": false,
         | 
| 498 | 
            +
                  "special": true
         | 
| 499 | 
            +
                },
         | 
| 500 | 
            +
                "151705": {
         | 
| 501 | 
            +
                  "content": "|<EXTRA_TOKENS_59>|",
         | 
| 502 | 
            +
                  "lstrip": false,
         | 
| 503 | 
            +
                  "normalized": false,
         | 
| 504 | 
            +
                  "rstrip": false,
         | 
| 505 | 
            +
                  "single_word": false,
         | 
| 506 | 
            +
                  "special": true
         | 
| 507 | 
            +
                },
         | 
| 508 | 
            +
                "151706": {
         | 
| 509 | 
            +
                  "content": "|<EXTRA_TOKENS_60>|",
         | 
| 510 | 
            +
                  "lstrip": false,
         | 
| 511 | 
            +
                  "normalized": false,
         | 
| 512 | 
            +
                  "rstrip": false,
         | 
| 513 | 
            +
                  "single_word": false,
         | 
| 514 | 
            +
                  "special": true
         | 
| 515 | 
            +
                },
         | 
| 516 | 
            +
                "151707": {
         | 
| 517 | 
            +
                  "content": "|<EXTRA_TOKENS_61>|",
         | 
| 518 | 
            +
                  "lstrip": false,
         | 
| 519 | 
            +
                  "normalized": false,
         | 
| 520 | 
            +
                  "rstrip": false,
         | 
| 521 | 
            +
                  "single_word": false,
         | 
| 522 | 
            +
                  "special": true
         | 
| 523 | 
            +
                },
         | 
| 524 | 
            +
                "151708": {
         | 
| 525 | 
            +
                  "content": "|<EXTRA_TOKENS_62>|",
         | 
| 526 | 
            +
                  "lstrip": false,
         | 
| 527 | 
            +
                  "normalized": false,
         | 
| 528 | 
            +
                  "rstrip": false,
         | 
| 529 | 
            +
                  "single_word": false,
         | 
| 530 | 
            +
                  "special": true
         | 
| 531 | 
            +
                },
         | 
| 532 | 
            +
                "151709": {
         | 
| 533 | 
            +
                  "content": "|<EXTRA_TOKENS_63>|",
         | 
| 534 | 
            +
                  "lstrip": false,
         | 
| 535 | 
            +
                  "normalized": false,
         | 
| 536 | 
            +
                  "rstrip": false,
         | 
| 537 | 
            +
                  "single_word": false,
         | 
| 538 | 
            +
                  "special": true
         | 
| 539 | 
            +
                },
         | 
| 540 | 
            +
                "151710": {
         | 
| 541 | 
            +
                  "content": "|<EXTRA_TOKENS_64>|",
         | 
| 542 | 
            +
                  "lstrip": false,
         | 
| 543 | 
            +
                  "normalized": false,
         | 
| 544 | 
            +
                  "rstrip": false,
         | 
| 545 | 
            +
                  "single_word": false,
         | 
| 546 | 
            +
                  "special": true
         | 
| 547 | 
            +
                },
         | 
| 548 | 
            +
                "151711": {
         | 
| 549 | 
            +
                  "content": "|<EXTRA_TOKENS_65>|",
         | 
| 550 | 
            +
                  "lstrip": false,
         | 
| 551 | 
            +
                  "normalized": false,
         | 
| 552 | 
            +
                  "rstrip": false,
         | 
| 553 | 
            +
                  "single_word": false,
         | 
| 554 | 
            +
                  "special": true
         | 
| 555 | 
            +
                },
         | 
| 556 | 
            +
                "151712": {
         | 
| 557 | 
            +
                  "content": "|<EXTRA_TOKENS_66>|",
         | 
| 558 | 
            +
                  "lstrip": false,
         | 
| 559 | 
            +
                  "normalized": false,
         | 
| 560 | 
            +
                  "rstrip": false,
         | 
| 561 | 
            +
                  "single_word": false,
         | 
| 562 | 
            +
                  "special": true
         | 
| 563 | 
            +
                },
         | 
| 564 | 
            +
                "151713": {
         | 
| 565 | 
            +
                  "content": "|<EXTRA_TOKENS_67>|",
         | 
| 566 | 
            +
                  "lstrip": false,
         | 
| 567 | 
            +
                  "normalized": false,
         | 
| 568 | 
            +
                  "rstrip": false,
         | 
| 569 | 
            +
                  "single_word": false,
         | 
| 570 | 
            +
                  "special": true
         | 
| 571 | 
            +
                },
         | 
| 572 | 
            +
                "151714": {
         | 
| 573 | 
            +
                  "content": "|<EXTRA_TOKENS_68>|",
         | 
| 574 | 
            +
                  "lstrip": false,
         | 
| 575 | 
            +
                  "normalized": false,
         | 
| 576 | 
            +
                  "rstrip": false,
         | 
| 577 | 
            +
                  "single_word": false,
         | 
| 578 | 
            +
                  "special": true
         | 
| 579 | 
            +
                },
         | 
| 580 | 
            +
                "151715": {
         | 
| 581 | 
            +
                  "content": "|<EXTRA_TOKENS_69>|",
         | 
| 582 | 
            +
                  "lstrip": false,
         | 
| 583 | 
            +
                  "normalized": false,
         | 
| 584 | 
            +
                  "rstrip": false,
         | 
| 585 | 
            +
                  "single_word": false,
         | 
| 586 | 
            +
                  "special": true
         | 
| 587 | 
            +
                },
         | 
| 588 | 
            +
                "151716": {
         | 
| 589 | 
            +
                  "content": "|<EXTRA_TOKENS_70>|",
         | 
| 590 | 
            +
                  "lstrip": false,
         | 
| 591 | 
            +
                  "normalized": false,
         | 
| 592 | 
            +
                  "rstrip": false,
         | 
| 593 | 
            +
                  "single_word": false,
         | 
| 594 | 
            +
                  "special": true
         | 
| 595 | 
            +
                },
         | 
| 596 | 
            +
                "151717": {
         | 
| 597 | 
            +
                  "content": "|<EXTRA_TOKENS_71>|",
         | 
| 598 | 
            +
                  "lstrip": false,
         | 
| 599 | 
            +
                  "normalized": false,
         | 
| 600 | 
            +
                  "rstrip": false,
         | 
| 601 | 
            +
                  "single_word": false,
         | 
| 602 | 
            +
                  "special": true
         | 
| 603 | 
            +
                },
         | 
| 604 | 
            +
                "151718": {
         | 
| 605 | 
            +
                  "content": "|<EXTRA_TOKENS_72>|",
         | 
| 606 | 
            +
                  "lstrip": false,
         | 
| 607 | 
            +
                  "normalized": false,
         | 
| 608 | 
            +
                  "rstrip": false,
         | 
| 609 | 
            +
                  "single_word": false,
         | 
| 610 | 
            +
                  "special": true
         | 
| 611 | 
            +
                },
         | 
| 612 | 
            +
                "151719": {
         | 
| 613 | 
            +
                  "content": "|<EXTRA_TOKENS_73>|",
         | 
| 614 | 
            +
                  "lstrip": false,
         | 
| 615 | 
            +
                  "normalized": false,
         | 
| 616 | 
            +
                  "rstrip": false,
         | 
| 617 | 
            +
                  "single_word": false,
         | 
| 618 | 
            +
                  "special": true
         | 
| 619 | 
            +
                },
         | 
| 620 | 
            +
                "151720": {
         | 
| 621 | 
            +
                  "content": "|<EXTRA_TOKENS_74>|",
         | 
| 622 | 
            +
                  "lstrip": false,
         | 
| 623 | 
            +
                  "normalized": false,
         | 
| 624 | 
            +
                  "rstrip": false,
         | 
| 625 | 
            +
                  "single_word": false,
         | 
| 626 | 
            +
                  "special": true
         | 
| 627 | 
            +
                },
         | 
| 628 | 
            +
                "151721": {
         | 
| 629 | 
            +
                  "content": "|<EXTRA_TOKENS_75>|",
         | 
| 630 | 
            +
                  "lstrip": false,
         | 
| 631 | 
            +
                  "normalized": false,
         | 
| 632 | 
            +
                  "rstrip": false,
         | 
| 633 | 
            +
                  "single_word": false,
         | 
| 634 | 
            +
                  "special": true
         | 
| 635 | 
            +
                },
         | 
| 636 | 
            +
                "151722": {
         | 
| 637 | 
            +
                  "content": "|<EXTRA_TOKENS_76>|",
         | 
| 638 | 
            +
                  "lstrip": false,
         | 
| 639 | 
            +
                  "normalized": false,
         | 
| 640 | 
            +
                  "rstrip": false,
         | 
| 641 | 
            +
                  "single_word": false,
         | 
| 642 | 
            +
                  "special": true
         | 
| 643 | 
            +
                },
         | 
| 644 | 
            +
                "151723": {
         | 
| 645 | 
            +
                  "content": "|<EXTRA_TOKENS_77>|",
         | 
| 646 | 
            +
                  "lstrip": false,
         | 
| 647 | 
            +
                  "normalized": false,
         | 
| 648 | 
            +
                  "rstrip": false,
         | 
| 649 | 
            +
                  "single_word": false,
         | 
| 650 | 
            +
                  "special": true
         | 
| 651 | 
            +
                },
         | 
| 652 | 
            +
                "151724": {
         | 
| 653 | 
            +
                  "content": "|<EXTRA_TOKENS_78>|",
         | 
| 654 | 
            +
                  "lstrip": false,
         | 
| 655 | 
            +
                  "normalized": false,
         | 
| 656 | 
            +
                  "rstrip": false,
         | 
| 657 | 
            +
                  "single_word": false,
         | 
| 658 | 
            +
                  "special": true
         | 
| 659 | 
            +
                },
         | 
| 660 | 
            +
                "151725": {
         | 
| 661 | 
            +
                  "content": "|<EXTRA_TOKENS_79>|",
         | 
| 662 | 
            +
                  "lstrip": false,
         | 
| 663 | 
            +
                  "normalized": false,
         | 
| 664 | 
            +
                  "rstrip": false,
         | 
| 665 | 
            +
                  "single_word": false,
         | 
| 666 | 
            +
                  "special": true
         | 
| 667 | 
            +
                },
         | 
| 668 | 
            +
                "151726": {
         | 
| 669 | 
            +
                  "content": "|<EXTRA_TOKENS_80>|",
         | 
| 670 | 
            +
                  "lstrip": false,
         | 
| 671 | 
            +
                  "normalized": false,
         | 
| 672 | 
            +
                  "rstrip": false,
         | 
| 673 | 
            +
                  "single_word": false,
         | 
| 674 | 
            +
                  "special": true
         | 
| 675 | 
            +
                },
         | 
| 676 | 
            +
                "151727": {
         | 
| 677 | 
            +
                  "content": "|<EXTRA_TOKENS_81>|",
         | 
| 678 | 
            +
                  "lstrip": false,
         | 
| 679 | 
            +
                  "normalized": false,
         | 
| 680 | 
            +
                  "rstrip": false,
         | 
| 681 | 
            +
                  "single_word": false,
         | 
| 682 | 
            +
                  "special": true
         | 
| 683 | 
            +
                },
         | 
| 684 | 
            +
                "151728": {
         | 
| 685 | 
            +
                  "content": "|<EXTRA_TOKENS_82>|",
         | 
| 686 | 
            +
                  "lstrip": false,
         | 
| 687 | 
            +
                  "normalized": false,
         | 
| 688 | 
            +
                  "rstrip": false,
         | 
| 689 | 
            +
                  "single_word": false,
         | 
| 690 | 
            +
                  "special": true
         | 
| 691 | 
            +
                },
         | 
| 692 | 
            +
                "151729": {
         | 
| 693 | 
            +
                  "content": "|<EXTRA_TOKENS_83>|",
         | 
| 694 | 
            +
                  "lstrip": false,
         | 
| 695 | 
            +
                  "normalized": false,
         | 
| 696 | 
            +
                  "rstrip": false,
         | 
| 697 | 
            +
                  "single_word": false,
         | 
| 698 | 
            +
                  "special": true
         | 
| 699 | 
            +
                },
         | 
| 700 | 
            +
                "151730": {
         | 
| 701 | 
            +
                  "content": "|<EXTRA_TOKENS_84>|",
         | 
| 702 | 
            +
                  "lstrip": false,
         | 
| 703 | 
            +
                  "normalized": false,
         | 
| 704 | 
            +
                  "rstrip": false,
         | 
| 705 | 
            +
                  "single_word": false,
         | 
| 706 | 
            +
                  "special": true
         | 
| 707 | 
            +
                },
         | 
| 708 | 
            +
                "151731": {
         | 
| 709 | 
            +
                  "content": "|<EXTRA_TOKENS_85>|",
         | 
| 710 | 
            +
                  "lstrip": false,
         | 
| 711 | 
            +
                  "normalized": false,
         | 
| 712 | 
            +
                  "rstrip": false,
         | 
| 713 | 
            +
                  "single_word": false,
         | 
| 714 | 
            +
                  "special": true
         | 
| 715 | 
            +
                },
         | 
| 716 | 
            +
                "151732": {
         | 
| 717 | 
            +
                  "content": "|<EXTRA_TOKENS_86>|",
         | 
| 718 | 
            +
                  "lstrip": false,
         | 
| 719 | 
            +
                  "normalized": false,
         | 
| 720 | 
            +
                  "rstrip": false,
         | 
| 721 | 
            +
                  "single_word": false,
         | 
| 722 | 
            +
                  "special": true
         | 
| 723 | 
            +
                },
         | 
| 724 | 
            +
                "151733": {
         | 
| 725 | 
            +
                  "content": "|<EXTRA_TOKENS_87>|",
         | 
| 726 | 
            +
                  "lstrip": false,
         | 
| 727 | 
            +
                  "normalized": false,
         | 
| 728 | 
            +
                  "rstrip": false,
         | 
| 729 | 
            +
                  "single_word": false,
         | 
| 730 | 
            +
                  "special": true
         | 
| 731 | 
            +
                },
         | 
| 732 | 
            +
                "151734": {
         | 
| 733 | 
            +
                  "content": "|<EXTRA_TOKENS_88>|",
         | 
| 734 | 
            +
                  "lstrip": false,
         | 
| 735 | 
            +
                  "normalized": false,
         | 
| 736 | 
            +
                  "rstrip": false,
         | 
| 737 | 
            +
                  "single_word": false,
         | 
| 738 | 
            +
                  "special": true
         | 
| 739 | 
            +
                },
         | 
| 740 | 
            +
                "151735": {
         | 
| 741 | 
            +
                  "content": "|<EXTRA_TOKENS_89>|",
         | 
| 742 | 
            +
                  "lstrip": false,
         | 
| 743 | 
            +
                  "normalized": false,
         | 
| 744 | 
            +
                  "rstrip": false,
         | 
| 745 | 
            +
                  "single_word": false,
         | 
| 746 | 
            +
                  "special": true
         | 
| 747 | 
            +
                },
         | 
| 748 | 
            +
                "151736": {
         | 
| 749 | 
            +
                  "content": "|<EXTRA_TOKENS_90>|",
         | 
| 750 | 
            +
                  "lstrip": false,
         | 
| 751 | 
            +
                  "normalized": false,
         | 
| 752 | 
            +
                  "rstrip": false,
         | 
| 753 | 
            +
                  "single_word": false,
         | 
| 754 | 
            +
                  "special": true
         | 
| 755 | 
            +
                },
         | 
| 756 | 
            +
                "151737": {
         | 
| 757 | 
            +
                  "content": "|<EXTRA_TOKENS_91>|",
         | 
| 758 | 
            +
                  "lstrip": false,
         | 
| 759 | 
            +
                  "normalized": false,
         | 
| 760 | 
            +
                  "rstrip": false,
         | 
| 761 | 
            +
                  "single_word": false,
         | 
| 762 | 
            +
                  "special": true
         | 
| 763 | 
            +
                },
         | 
| 764 | 
            +
                "151738": {
         | 
| 765 | 
            +
                  "content": "|<EXTRA_TOKENS_92>|",
         | 
| 766 | 
            +
                  "lstrip": false,
         | 
| 767 | 
            +
                  "normalized": false,
         | 
| 768 | 
            +
                  "rstrip": false,
         | 
| 769 | 
            +
                  "single_word": false,
         | 
| 770 | 
            +
                  "special": true
         | 
| 771 | 
            +
                },
         | 
| 772 | 
            +
                "151739": {
         | 
| 773 | 
            +
                  "content": "|<EXTRA_TOKENS_93>|",
         | 
| 774 | 
            +
                  "lstrip": false,
         | 
| 775 | 
            +
                  "normalized": false,
         | 
| 776 | 
            +
                  "rstrip": false,
         | 
| 777 | 
            +
                  "single_word": false,
         | 
| 778 | 
            +
                  "special": true
         | 
| 779 | 
            +
                },
         | 
| 780 | 
            +
                "151740": {
         | 
| 781 | 
            +
                  "content": "|<EXTRA_TOKENS_94>|",
         | 
| 782 | 
            +
                  "lstrip": false,
         | 
| 783 | 
            +
                  "normalized": false,
         | 
| 784 | 
            +
                  "rstrip": false,
         | 
| 785 | 
            +
                  "single_word": false,
         | 
| 786 | 
            +
                  "special": true
         | 
| 787 | 
            +
                },
         | 
| 788 | 
            +
                "151741": {
         | 
| 789 | 
            +
                  "content": "|<EXTRA_TOKENS_95>|",
         | 
| 790 | 
            +
                  "lstrip": false,
         | 
| 791 | 
            +
                  "normalized": false,
         | 
| 792 | 
            +
                  "rstrip": false,
         | 
| 793 | 
            +
                  "single_word": false,
         | 
| 794 | 
            +
                  "special": true
         | 
| 795 | 
            +
                },
         | 
| 796 | 
            +
                "151742": {
         | 
| 797 | 
            +
                  "content": "|<EXTRA_TOKENS_96>|",
         | 
| 798 | 
            +
                  "lstrip": false,
         | 
| 799 | 
            +
                  "normalized": false,
         | 
| 800 | 
            +
                  "rstrip": false,
         | 
| 801 | 
            +
                  "single_word": false,
         | 
| 802 | 
            +
                  "special": true
         | 
| 803 | 
            +
                },
         | 
| 804 | 
            +
                "151743": {
         | 
| 805 | 
            +
                  "content": "|<EXTRA_TOKENS_97>|",
         | 
| 806 | 
            +
                  "lstrip": false,
         | 
| 807 | 
            +
                  "normalized": false,
         | 
| 808 | 
            +
                  "rstrip": false,
         | 
| 809 | 
            +
                  "single_word": false,
         | 
| 810 | 
            +
                  "special": true
         | 
| 811 | 
            +
                },
         | 
| 812 | 
            +
                "151744": {
         | 
| 813 | 
            +
                  "content": "|<EXTRA_TOKENS_98>|",
         | 
| 814 | 
            +
                  "lstrip": false,
         | 
| 815 | 
            +
                  "normalized": false,
         | 
| 816 | 
            +
                  "rstrip": false,
         | 
| 817 | 
            +
                  "single_word": false,
         | 
| 818 | 
            +
                  "special": true
         | 
| 819 | 
            +
                },
         | 
| 820 | 
            +
                "151745": {
         | 
| 821 | 
            +
                  "content": "|<EXTRA_TOKENS_99>|",
         | 
| 822 | 
            +
                  "lstrip": false,
         | 
| 823 | 
            +
                  "normalized": false,
         | 
| 824 | 
            +
                  "rstrip": false,
         | 
| 825 | 
            +
                  "single_word": false,
         | 
| 826 | 
            +
                  "special": true
         | 
| 827 | 
            +
                },
         | 
| 828 | 
            +
                "151746": {
         | 
| 829 | 
            +
                  "content": "|<EXTRA_TOKENS_100>|",
         | 
| 830 | 
            +
                  "lstrip": false,
         | 
| 831 | 
            +
                  "normalized": false,
         | 
| 832 | 
            +
                  "rstrip": false,
         | 
| 833 | 
            +
                  "single_word": false,
         | 
| 834 | 
            +
                  "special": true
         | 
| 835 | 
            +
                },
         | 
| 836 | 
            +
                "151747": {
         | 
| 837 | 
            +
                  "content": "|<EXTRA_TOKENS_101>|",
         | 
| 838 | 
            +
                  "lstrip": false,
         | 
| 839 | 
            +
                  "normalized": false,
         | 
| 840 | 
            +
                  "rstrip": false,
         | 
| 841 | 
            +
                  "single_word": false,
         | 
| 842 | 
            +
                  "special": true
         | 
| 843 | 
            +
                },
         | 
| 844 | 
            +
                "151748": {
         | 
| 845 | 
            +
                  "content": "|<EXTRA_TOKENS_102>|",
         | 
| 846 | 
            +
                  "lstrip": false,
         | 
| 847 | 
            +
                  "normalized": false,
         | 
| 848 | 
            +
                  "rstrip": false,
         | 
| 849 | 
            +
                  "single_word": false,
         | 
| 850 | 
            +
                  "special": true
         | 
| 851 | 
            +
                },
         | 
| 852 | 
            +
                "151749": {
         | 
| 853 | 
            +
                  "content": "|<EXTRA_TOKENS_103>|",
         | 
| 854 | 
            +
                  "lstrip": false,
         | 
| 855 | 
            +
                  "normalized": false,
         | 
| 856 | 
            +
                  "rstrip": false,
         | 
| 857 | 
            +
                  "single_word": false,
         | 
| 858 | 
            +
                  "special": true
         | 
| 859 | 
            +
                },
         | 
| 860 | 
            +
                "151750": {
         | 
| 861 | 
            +
                  "content": "|<EXTRA_TOKENS_104>|",
         | 
| 862 | 
            +
                  "lstrip": false,
         | 
| 863 | 
            +
                  "normalized": false,
         | 
| 864 | 
            +
                  "rstrip": false,
         | 
| 865 | 
            +
                  "single_word": false,
         | 
| 866 | 
            +
                  "special": true
         | 
| 867 | 
            +
                },
         | 
| 868 | 
            +
                "151751": {
         | 
| 869 | 
            +
                  "content": "|<EXTRA_TOKENS_105>|",
         | 
| 870 | 
            +
                  "lstrip": false,
         | 
| 871 | 
            +
                  "normalized": false,
         | 
| 872 | 
            +
                  "rstrip": false,
         | 
| 873 | 
            +
                  "single_word": false,
         | 
| 874 | 
            +
                  "special": true
         | 
| 875 | 
            +
                },
         | 
| 876 | 
            +
                "151752": {
         | 
| 877 | 
            +
                  "content": "|<EXTRA_TOKENS_106>|",
         | 
| 878 | 
            +
                  "lstrip": false,
         | 
| 879 | 
            +
                  "normalized": false,
         | 
| 880 | 
            +
                  "rstrip": false,
         | 
| 881 | 
            +
                  "single_word": false,
         | 
| 882 | 
            +
                  "special": true
         | 
| 883 | 
            +
                },
         | 
| 884 | 
            +
                "151753": {
         | 
| 885 | 
            +
                  "content": "|<EXTRA_TOKENS_107>|",
         | 
| 886 | 
            +
                  "lstrip": false,
         | 
| 887 | 
            +
                  "normalized": false,
         | 
| 888 | 
            +
                  "rstrip": false,
         | 
| 889 | 
            +
                  "single_word": false,
         | 
| 890 | 
            +
                  "special": true
         | 
| 891 | 
            +
                },
         | 
| 892 | 
            +
                "151754": {
         | 
| 893 | 
            +
                  "content": "|<EXTRA_TOKENS_108>|",
         | 
| 894 | 
            +
                  "lstrip": false,
         | 
| 895 | 
            +
                  "normalized": false,
         | 
| 896 | 
            +
                  "rstrip": false,
         | 
| 897 | 
            +
                  "single_word": false,
         | 
| 898 | 
            +
                  "special": true
         | 
| 899 | 
            +
                },
         | 
| 900 | 
            +
                "151755": {
         | 
| 901 | 
            +
                  "content": "|<EXTRA_TOKENS_109>|",
         | 
| 902 | 
            +
                  "lstrip": false,
         | 
| 903 | 
            +
                  "normalized": false,
         | 
| 904 | 
            +
                  "rstrip": false,
         | 
| 905 | 
            +
                  "single_word": false,
         | 
| 906 | 
            +
                  "special": true
         | 
| 907 | 
            +
                },
         | 
| 908 | 
            +
                "151756": {
         | 
| 909 | 
            +
                  "content": "|<EXTRA_TOKENS_110>|",
         | 
| 910 | 
            +
                  "lstrip": false,
         | 
| 911 | 
            +
                  "normalized": false,
         | 
| 912 | 
            +
                  "rstrip": false,
         | 
| 913 | 
            +
                  "single_word": false,
         | 
| 914 | 
            +
                  "special": true
         | 
| 915 | 
            +
                },
         | 
| 916 | 
            +
                "151757": {
         | 
| 917 | 
            +
                  "content": "|<EXTRA_TOKENS_111>|",
         | 
| 918 | 
            +
                  "lstrip": false,
         | 
| 919 | 
            +
                  "normalized": false,
         | 
| 920 | 
            +
                  "rstrip": false,
         | 
| 921 | 
            +
                  "single_word": false,
         | 
| 922 | 
            +
                  "special": true
         | 
| 923 | 
            +
                },
         | 
| 924 | 
            +
                "151758": {
         | 
| 925 | 
            +
                  "content": "|<EXTRA_TOKENS_112>|",
         | 
| 926 | 
            +
                  "lstrip": false,
         | 
| 927 | 
            +
                  "normalized": false,
         | 
| 928 | 
            +
                  "rstrip": false,
         | 
| 929 | 
            +
                  "single_word": false,
         | 
| 930 | 
            +
                  "special": true
         | 
| 931 | 
            +
                },
         | 
| 932 | 
            +
                "151759": {
         | 
| 933 | 
            +
                  "content": "|<EXTRA_TOKENS_113>|",
         | 
| 934 | 
            +
                  "lstrip": false,
         | 
| 935 | 
            +
                  "normalized": false,
         | 
| 936 | 
            +
                  "rstrip": false,
         | 
| 937 | 
            +
                  "single_word": false,
         | 
| 938 | 
            +
                  "special": true
         | 
| 939 | 
            +
                },
         | 
| 940 | 
            +
                "151760": {
         | 
| 941 | 
            +
                  "content": "|<EXTRA_TOKENS_114>|",
         | 
| 942 | 
            +
                  "lstrip": false,
         | 
| 943 | 
            +
                  "normalized": false,
         | 
| 944 | 
            +
                  "rstrip": false,
         | 
| 945 | 
            +
                  "single_word": false,
         | 
| 946 | 
            +
                  "special": true
         | 
| 947 | 
            +
                },
         | 
| 948 | 
            +
                "151761": {
         | 
| 949 | 
            +
                  "content": "|<EXTRA_TOKENS_115>|",
         | 
| 950 | 
            +
                  "lstrip": false,
         | 
| 951 | 
            +
                  "normalized": false,
         | 
| 952 | 
            +
                  "rstrip": false,
         | 
| 953 | 
            +
                  "single_word": false,
         | 
| 954 | 
            +
                  "special": true
         | 
| 955 | 
            +
                },
         | 
| 956 | 
            +
                "151762": {
         | 
| 957 | 
            +
                  "content": "|<EXTRA_TOKENS_116>|",
         | 
| 958 | 
            +
                  "lstrip": false,
         | 
| 959 | 
            +
                  "normalized": false,
         | 
| 960 | 
            +
                  "rstrip": false,
         | 
| 961 | 
            +
                  "single_word": false,
         | 
| 962 | 
            +
                  "special": true
         | 
| 963 | 
            +
                },
         | 
| 964 | 
            +
                "151763": {
         | 
| 965 | 
            +
                  "content": "|<EXTRA_TOKENS_117>|",
         | 
| 966 | 
            +
                  "lstrip": false,
         | 
| 967 | 
            +
                  "normalized": false,
         | 
| 968 | 
            +
                  "rstrip": false,
         | 
| 969 | 
            +
                  "single_word": false,
         | 
| 970 | 
            +
                  "special": true
         | 
| 971 | 
            +
                },
         | 
| 972 | 
            +
                "151764": {
         | 
| 973 | 
            +
                  "content": "|<EXTRA_TOKENS_118>|",
         | 
| 974 | 
            +
                  "lstrip": false,
         | 
| 975 | 
            +
                  "normalized": false,
         | 
| 976 | 
            +
                  "rstrip": false,
         | 
| 977 | 
            +
                  "single_word": false,
         | 
| 978 | 
            +
                  "special": true
         | 
| 979 | 
            +
                },
         | 
| 980 | 
            +
                "151765": {
         | 
| 981 | 
            +
                  "content": "|<EXTRA_TOKENS_119>|",
         | 
| 982 | 
            +
                  "lstrip": false,
         | 
| 983 | 
            +
                  "normalized": false,
         | 
| 984 | 
            +
                  "rstrip": false,
         | 
| 985 | 
            +
                  "single_word": false,
         | 
| 986 | 
            +
                  "special": true
         | 
| 987 | 
            +
                },
         | 
| 988 | 
            +
                "151766": {
         | 
| 989 | 
            +
                  "content": "|<EXTRA_TOKENS_120>|",
         | 
| 990 | 
            +
                  "lstrip": false,
         | 
| 991 | 
            +
                  "normalized": false,
         | 
| 992 | 
            +
                  "rstrip": false,
         | 
| 993 | 
            +
                  "single_word": false,
         | 
| 994 | 
            +
                  "special": true
         | 
| 995 | 
            +
                },
         | 
| 996 | 
            +
                "151767": {
         | 
| 997 | 
            +
                  "content": "|<EXTRA_TOKENS_121>|",
         | 
| 998 | 
            +
                  "lstrip": false,
         | 
| 999 | 
            +
                  "normalized": false,
         | 
| 1000 | 
            +
                  "rstrip": false,
         | 
| 1001 | 
            +
                  "single_word": false,
         | 
| 1002 | 
            +
                  "special": true
         | 
| 1003 | 
            +
                },
         | 
| 1004 | 
            +
                "151768": {
         | 
| 1005 | 
            +
                  "content": "|<EXTRA_TOKENS_122>|",
         | 
| 1006 | 
            +
                  "lstrip": false,
         | 
| 1007 | 
            +
                  "normalized": false,
         | 
| 1008 | 
            +
                  "rstrip": false,
         | 
| 1009 | 
            +
                  "single_word": false,
         | 
| 1010 | 
            +
                  "special": true
         | 
| 1011 | 
            +
                },
         | 
| 1012 | 
            +
                "151769": {
         | 
| 1013 | 
            +
                  "content": "|<EXTRA_TOKENS_123>|",
         | 
| 1014 | 
            +
                  "lstrip": false,
         | 
| 1015 | 
            +
                  "normalized": false,
         | 
| 1016 | 
            +
                  "rstrip": false,
         | 
| 1017 | 
            +
                  "single_word": false,
         | 
| 1018 | 
            +
                  "special": true
         | 
| 1019 | 
            +
                },
         | 
| 1020 | 
            +
                "151770": {
         | 
| 1021 | 
            +
                  "content": "|<EXTRA_TOKENS_124>|",
         | 
| 1022 | 
            +
                  "lstrip": false,
         | 
| 1023 | 
            +
                  "normalized": false,
         | 
| 1024 | 
            +
                  "rstrip": false,
         | 
| 1025 | 
            +
                  "single_word": false,
         | 
| 1026 | 
            +
                  "special": true
         | 
| 1027 | 
            +
                },
         | 
| 1028 | 
            +
                "151771": {
         | 
| 1029 | 
            +
                  "content": "|<EXTRA_TOKENS_125>|",
         | 
| 1030 | 
            +
                  "lstrip": false,
         | 
| 1031 | 
            +
                  "normalized": false,
         | 
| 1032 | 
            +
                  "rstrip": false,
         | 
| 1033 | 
            +
                  "single_word": false,
         | 
| 1034 | 
            +
                  "special": true
         | 
| 1035 | 
            +
                },
         | 
| 1036 | 
            +
                "151772": {
         | 
| 1037 | 
            +
                  "content": "|<EXTRA_TOKENS_126>|",
         | 
| 1038 | 
            +
                  "lstrip": false,
         | 
| 1039 | 
            +
                  "normalized": false,
         | 
| 1040 | 
            +
                  "rstrip": false,
         | 
| 1041 | 
            +
                  "single_word": false,
         | 
| 1042 | 
            +
                  "special": true
         | 
| 1043 | 
            +
                },
         | 
| 1044 | 
            +
                "151773": {
         | 
| 1045 | 
            +
                  "content": "|<EXTRA_TOKENS_127>|",
         | 
| 1046 | 
            +
                  "lstrip": false,
         | 
| 1047 | 
            +
                  "normalized": false,
         | 
| 1048 | 
            +
                  "rstrip": false,
         | 
| 1049 | 
            +
                  "single_word": false,
         | 
| 1050 | 
            +
                  "special": true
         | 
| 1051 | 
            +
                },
         | 
| 1052 | 
            +
                "151774": {
         | 
| 1053 | 
            +
                  "content": "|<EXTRA_TOKENS_128>|",
         | 
| 1054 | 
            +
                  "lstrip": false,
         | 
| 1055 | 
            +
                  "normalized": false,
         | 
| 1056 | 
            +
                  "rstrip": false,
         | 
| 1057 | 
            +
                  "single_word": false,
         | 
| 1058 | 
            +
                  "special": true
         | 
| 1059 | 
            +
                },
         | 
| 1060 | 
            +
                "151775": {
         | 
| 1061 | 
            +
                  "content": "|<EXTRA_TOKENS_129>|",
         | 
| 1062 | 
            +
                  "lstrip": false,
         | 
| 1063 | 
            +
                  "normalized": false,
         | 
| 1064 | 
            +
                  "rstrip": false,
         | 
| 1065 | 
            +
                  "single_word": false,
         | 
| 1066 | 
            +
                  "special": true
         | 
| 1067 | 
            +
                },
         | 
| 1068 | 
            +
                "151776": {
         | 
| 1069 | 
            +
                  "content": "|<EXTRA_TOKENS_130>|",
         | 
| 1070 | 
            +
                  "lstrip": false,
         | 
| 1071 | 
            +
                  "normalized": false,
         | 
| 1072 | 
            +
                  "rstrip": false,
         | 
| 1073 | 
            +
                  "single_word": false,
         | 
| 1074 | 
            +
                  "special": true
         | 
| 1075 | 
            +
                },
         | 
| 1076 | 
            +
                "151777": {
         | 
| 1077 | 
            +
                  "content": "|<EXTRA_TOKENS_131>|",
         | 
| 1078 | 
            +
                  "lstrip": false,
         | 
| 1079 | 
            +
                  "normalized": false,
         | 
| 1080 | 
            +
                  "rstrip": false,
         | 
| 1081 | 
            +
                  "single_word": false,
         | 
| 1082 | 
            +
                  "special": true
         | 
| 1083 | 
            +
                },
         | 
| 1084 | 
            +
                "151778": {
         | 
| 1085 | 
            +
                  "content": "|<EXTRA_TOKENS_132>|",
         | 
| 1086 | 
            +
                  "lstrip": false,
         | 
| 1087 | 
            +
                  "normalized": false,
         | 
| 1088 | 
            +
                  "rstrip": false,
         | 
| 1089 | 
            +
                  "single_word": false,
         | 
| 1090 | 
            +
                  "special": true
         | 
| 1091 | 
            +
                },
         | 
| 1092 | 
            +
                "151779": {
         | 
| 1093 | 
            +
                  "content": "|<EXTRA_TOKENS_133>|",
         | 
| 1094 | 
            +
                  "lstrip": false,
         | 
| 1095 | 
            +
                  "normalized": false,
         | 
| 1096 | 
            +
                  "rstrip": false,
         | 
| 1097 | 
            +
                  "single_word": false,
         | 
| 1098 | 
            +
                  "special": true
         | 
| 1099 | 
            +
                },
         | 
| 1100 | 
            +
                "151780": {
         | 
| 1101 | 
            +
                  "content": "|<EXTRA_TOKENS_134>|",
         | 
| 1102 | 
            +
                  "lstrip": false,
         | 
| 1103 | 
            +
                  "normalized": false,
         | 
| 1104 | 
            +
                  "rstrip": false,
         | 
| 1105 | 
            +
                  "single_word": false,
         | 
| 1106 | 
            +
                  "special": true
         | 
| 1107 | 
            +
                },
         | 
| 1108 | 
            +
                "151781": {
         | 
| 1109 | 
            +
                  "content": "|<EXTRA_TOKENS_135>|",
         | 
| 1110 | 
            +
                  "lstrip": false,
         | 
| 1111 | 
            +
                  "normalized": false,
         | 
| 1112 | 
            +
                  "rstrip": false,
         | 
| 1113 | 
            +
                  "single_word": false,
         | 
| 1114 | 
            +
                  "special": true
         | 
| 1115 | 
            +
                },
         | 
| 1116 | 
            +
                "151782": {
         | 
| 1117 | 
            +
                  "content": "|<EXTRA_TOKENS_136>|",
         | 
| 1118 | 
            +
                  "lstrip": false,
         | 
| 1119 | 
            +
                  "normalized": false,
         | 
| 1120 | 
            +
                  "rstrip": false,
         | 
| 1121 | 
            +
                  "single_word": false,
         | 
| 1122 | 
            +
                  "special": true
         | 
| 1123 | 
            +
                },
         | 
| 1124 | 
            +
                "151783": {
         | 
| 1125 | 
            +
                  "content": "|<EXTRA_TOKENS_137>|",
         | 
| 1126 | 
            +
                  "lstrip": false,
         | 
| 1127 | 
            +
                  "normalized": false,
         | 
| 1128 | 
            +
                  "rstrip": false,
         | 
| 1129 | 
            +
                  "single_word": false,
         | 
| 1130 | 
            +
                  "special": true
         | 
| 1131 | 
            +
                },
         | 
| 1132 | 
            +
                "151784": {
         | 
| 1133 | 
            +
                  "content": "|<EXTRA_TOKENS_138>|",
         | 
| 1134 | 
            +
                  "lstrip": false,
         | 
| 1135 | 
            +
                  "normalized": false,
         | 
| 1136 | 
            +
                  "rstrip": false,
         | 
| 1137 | 
            +
                  "single_word": false,
         | 
| 1138 | 
            +
                  "special": true
         | 
| 1139 | 
            +
                },
         | 
| 1140 | 
            +
                "151785": {
         | 
| 1141 | 
            +
                  "content": "|<EXTRA_TOKENS_139>|",
         | 
| 1142 | 
            +
                  "lstrip": false,
         | 
| 1143 | 
            +
                  "normalized": false,
         | 
| 1144 | 
            +
                  "rstrip": false,
         | 
| 1145 | 
            +
                  "single_word": false,
         | 
| 1146 | 
            +
                  "special": true
         | 
| 1147 | 
            +
                },
         | 
| 1148 | 
            +
                "151786": {
         | 
| 1149 | 
            +
                  "content": "|<EXTRA_TOKENS_140>|",
         | 
| 1150 | 
            +
                  "lstrip": false,
         | 
| 1151 | 
            +
                  "normalized": false,
         | 
| 1152 | 
            +
                  "rstrip": false,
         | 
| 1153 | 
            +
                  "single_word": false,
         | 
| 1154 | 
            +
                  "special": true
         | 
| 1155 | 
            +
                },
         | 
| 1156 | 
            +
                "151787": {
         | 
| 1157 | 
            +
                  "content": "|<EXTRA_TOKENS_141>|",
         | 
| 1158 | 
            +
                  "lstrip": false,
         | 
| 1159 | 
            +
                  "normalized": false,
         | 
| 1160 | 
            +
                  "rstrip": false,
         | 
| 1161 | 
            +
                  "single_word": false,
         | 
| 1162 | 
            +
                  "special": true
         | 
| 1163 | 
            +
                },
         | 
| 1164 | 
            +
                "151788": {
         | 
| 1165 | 
            +
                  "content": "|<EXTRA_TOKENS_142>|",
         | 
| 1166 | 
            +
                  "lstrip": false,
         | 
| 1167 | 
            +
                  "normalized": false,
         | 
| 1168 | 
            +
                  "rstrip": false,
         | 
| 1169 | 
            +
                  "single_word": false,
         | 
| 1170 | 
            +
                  "special": true
         | 
| 1171 | 
            +
                },
         | 
| 1172 | 
            +
                "151789": {
         | 
| 1173 | 
            +
                  "content": "|<EXTRA_TOKENS_143>|",
         | 
| 1174 | 
            +
                  "lstrip": false,
         | 
| 1175 | 
            +
                  "normalized": false,
         | 
| 1176 | 
            +
                  "rstrip": false,
         | 
| 1177 | 
            +
                  "single_word": false,
         | 
| 1178 | 
            +
                  "special": true
         | 
| 1179 | 
            +
                },
         | 
| 1180 | 
            +
                "151790": {
         | 
| 1181 | 
            +
                  "content": "|<EXTRA_TOKENS_144>|",
         | 
| 1182 | 
            +
                  "lstrip": false,
         | 
| 1183 | 
            +
                  "normalized": false,
         | 
| 1184 | 
            +
                  "rstrip": false,
         | 
| 1185 | 
            +
                  "single_word": false,
         | 
| 1186 | 
            +
                  "special": true
         | 
| 1187 | 
            +
                },
         | 
| 1188 | 
            +
                "151791": {
         | 
| 1189 | 
            +
                  "content": "|<EXTRA_TOKENS_145>|",
         | 
| 1190 | 
            +
                  "lstrip": false,
         | 
| 1191 | 
            +
                  "normalized": false,
         | 
| 1192 | 
            +
                  "rstrip": false,
         | 
| 1193 | 
            +
                  "single_word": false,
         | 
| 1194 | 
            +
                  "special": true
         | 
| 1195 | 
            +
                },
         | 
| 1196 | 
            +
                "151792": {
         | 
| 1197 | 
            +
                  "content": "|<EXTRA_TOKENS_146>|",
         | 
| 1198 | 
            +
                  "lstrip": false,
         | 
| 1199 | 
            +
                  "normalized": false,
         | 
| 1200 | 
            +
                  "rstrip": false,
         | 
| 1201 | 
            +
                  "single_word": false,
         | 
| 1202 | 
            +
                  "special": true
         | 
| 1203 | 
            +
                },
         | 
| 1204 | 
            +
                "151793": {
         | 
| 1205 | 
            +
                  "content": "|<EXTRA_TOKENS_147>|",
         | 
| 1206 | 
            +
                  "lstrip": false,
         | 
| 1207 | 
            +
                  "normalized": false,
         | 
| 1208 | 
            +
                  "rstrip": false,
         | 
| 1209 | 
            +
                  "single_word": false,
         | 
| 1210 | 
            +
                  "special": true
         | 
| 1211 | 
            +
                },
         | 
| 1212 | 
            +
                "151794": {
         | 
| 1213 | 
            +
                  "content": "|<EXTRA_TOKENS_148>|",
         | 
| 1214 | 
            +
                  "lstrip": false,
         | 
| 1215 | 
            +
                  "normalized": false,
         | 
| 1216 | 
            +
                  "rstrip": false,
         | 
| 1217 | 
            +
                  "single_word": false,
         | 
| 1218 | 
            +
                  "special": true
         | 
| 1219 | 
            +
                },
         | 
| 1220 | 
            +
                "151795": {
         | 
| 1221 | 
            +
                  "content": "|<EXTRA_TOKENS_149>|",
         | 
| 1222 | 
            +
                  "lstrip": false,
         | 
| 1223 | 
            +
                  "normalized": false,
         | 
| 1224 | 
            +
                  "rstrip": false,
         | 
| 1225 | 
            +
                  "single_word": false,
         | 
| 1226 | 
            +
                  "special": true
         | 
| 1227 | 
            +
                },
         | 
| 1228 | 
            +
                "151796": {
         | 
| 1229 | 
            +
                  "content": "|<EXTRA_TOKENS_150>|",
         | 
| 1230 | 
            +
                  "lstrip": false,
         | 
| 1231 | 
            +
                  "normalized": false,
         | 
| 1232 | 
            +
                  "rstrip": false,
         | 
| 1233 | 
            +
                  "single_word": false,
         | 
| 1234 | 
            +
                  "special": true
         | 
| 1235 | 
            +
                },
         | 
| 1236 | 
            +
                "151797": {
         | 
| 1237 | 
            +
                  "content": "|<EXTRA_TOKENS_151>|",
         | 
| 1238 | 
            +
                  "lstrip": false,
         | 
| 1239 | 
            +
                  "normalized": false,
         | 
| 1240 | 
            +
                  "rstrip": false,
         | 
| 1241 | 
            +
                  "single_word": false,
         | 
| 1242 | 
            +
                  "special": true
         | 
| 1243 | 
            +
                },
         | 
| 1244 | 
            +
                "151798": {
         | 
| 1245 | 
            +
                  "content": "|<EXTRA_TOKENS_152>|",
         | 
| 1246 | 
            +
                  "lstrip": false,
         | 
| 1247 | 
            +
                  "normalized": false,
         | 
| 1248 | 
            +
                  "rstrip": false,
         | 
| 1249 | 
            +
                  "single_word": false,
         | 
| 1250 | 
            +
                  "special": true
         | 
| 1251 | 
            +
                },
         | 
| 1252 | 
            +
                "151799": {
         | 
| 1253 | 
            +
                  "content": "|<EXTRA_TOKENS_153>|",
         | 
| 1254 | 
            +
                  "lstrip": false,
         | 
| 1255 | 
            +
                  "normalized": false,
         | 
| 1256 | 
            +
                  "rstrip": false,
         | 
| 1257 | 
            +
                  "single_word": false,
         | 
| 1258 | 
            +
                  "special": true
         | 
| 1259 | 
            +
                },
         | 
| 1260 | 
            +
                "151800": {
         | 
| 1261 | 
            +
                  "content": "|<EXTRA_TOKENS_154>|",
         | 
| 1262 | 
            +
                  "lstrip": false,
         | 
| 1263 | 
            +
                  "normalized": false,
         | 
| 1264 | 
            +
                  "rstrip": false,
         | 
| 1265 | 
            +
                  "single_word": false,
         | 
| 1266 | 
            +
                  "special": true
         | 
| 1267 | 
            +
                },
         | 
| 1268 | 
            +
                "151801": {
         | 
| 1269 | 
            +
                  "content": "|<EXTRA_TOKENS_155>|",
         | 
| 1270 | 
            +
                  "lstrip": false,
         | 
| 1271 | 
            +
                  "normalized": false,
         | 
| 1272 | 
            +
                  "rstrip": false,
         | 
| 1273 | 
            +
                  "single_word": false,
         | 
| 1274 | 
            +
                  "special": true
         | 
| 1275 | 
            +
                },
         | 
| 1276 | 
            +
                "151802": {
         | 
| 1277 | 
            +
                  "content": "|<EXTRA_TOKENS_156>|",
         | 
| 1278 | 
            +
                  "lstrip": false,
         | 
| 1279 | 
            +
                  "normalized": false,
         | 
| 1280 | 
            +
                  "rstrip": false,
         | 
| 1281 | 
            +
                  "single_word": false,
         | 
| 1282 | 
            +
                  "special": true
         | 
| 1283 | 
            +
                },
         | 
| 1284 | 
            +
                "151803": {
         | 
| 1285 | 
            +
                  "content": "|<EXTRA_TOKENS_157>|",
         | 
| 1286 | 
            +
                  "lstrip": false,
         | 
| 1287 | 
            +
                  "normalized": false,
         | 
| 1288 | 
            +
                  "rstrip": false,
         | 
| 1289 | 
            +
                  "single_word": false,
         | 
| 1290 | 
            +
                  "special": true
         | 
| 1291 | 
            +
                },
         | 
| 1292 | 
            +
                "151804": {
         | 
| 1293 | 
            +
                  "content": "|<EXTRA_TOKENS_158>|",
         | 
| 1294 | 
            +
                  "lstrip": false,
         | 
| 1295 | 
            +
                  "normalized": false,
         | 
| 1296 | 
            +
                  "rstrip": false,
         | 
| 1297 | 
            +
                  "single_word": false,
         | 
| 1298 | 
            +
                  "special": true
         | 
| 1299 | 
            +
                },
         | 
| 1300 | 
            +
                "151805": {
         | 
| 1301 | 
            +
                  "content": "|<EXTRA_TOKENS_159>|",
         | 
| 1302 | 
            +
                  "lstrip": false,
         | 
| 1303 | 
            +
                  "normalized": false,
         | 
| 1304 | 
            +
                  "rstrip": false,
         | 
| 1305 | 
            +
                  "single_word": false,
         | 
| 1306 | 
            +
                  "special": true
         | 
| 1307 | 
            +
                },
         | 
| 1308 | 
            +
                "151806": {
         | 
| 1309 | 
            +
                  "content": "|<EXTRA_TOKENS_160>|",
         | 
| 1310 | 
            +
                  "lstrip": false,
         | 
| 1311 | 
            +
                  "normalized": false,
         | 
| 1312 | 
            +
                  "rstrip": false,
         | 
| 1313 | 
            +
                  "single_word": false,
         | 
| 1314 | 
            +
                  "special": true
         | 
| 1315 | 
            +
                },
         | 
| 1316 | 
            +
                "151807": {
         | 
| 1317 | 
            +
                  "content": "|<EXTRA_TOKENS_161>|",
         | 
| 1318 | 
            +
                  "lstrip": false,
         | 
| 1319 | 
            +
                  "normalized": false,
         | 
| 1320 | 
            +
                  "rstrip": false,
         | 
| 1321 | 
            +
                  "single_word": false,
         | 
| 1322 | 
            +
                  "special": true
         | 
| 1323 | 
            +
                },
         | 
| 1324 | 
            +
                "151808": {
         | 
| 1325 | 
            +
                  "content": "|<EXTRA_TOKENS_162>|",
         | 
| 1326 | 
            +
                  "lstrip": false,
         | 
| 1327 | 
            +
                  "normalized": false,
         | 
| 1328 | 
            +
                  "rstrip": false,
         | 
| 1329 | 
            +
                  "single_word": false,
         | 
| 1330 | 
            +
                  "special": true
         | 
| 1331 | 
            +
                },
         | 
| 1332 | 
            +
                "151809": {
         | 
| 1333 | 
            +
                  "content": "|<EXTRA_TOKENS_163>|",
         | 
| 1334 | 
            +
                  "lstrip": false,
         | 
| 1335 | 
            +
                  "normalized": false,
         | 
| 1336 | 
            +
                  "rstrip": false,
         | 
| 1337 | 
            +
                  "single_word": false,
         | 
| 1338 | 
            +
                  "special": true
         | 
| 1339 | 
            +
                },
         | 
| 1340 | 
            +
                "151810": {
         | 
| 1341 | 
            +
                  "content": "|<EXTRA_TOKENS_164>|",
         | 
| 1342 | 
            +
                  "lstrip": false,
         | 
| 1343 | 
            +
                  "normalized": false,
         | 
| 1344 | 
            +
                  "rstrip": false,
         | 
| 1345 | 
            +
                  "single_word": false,
         | 
| 1346 | 
            +
                  "special": true
         | 
| 1347 | 
            +
                },
         | 
| 1348 | 
            +
                "151811": {
         | 
| 1349 | 
            +
                  "content": "|<EXTRA_TOKENS_165>|",
         | 
| 1350 | 
            +
                  "lstrip": false,
         | 
| 1351 | 
            +
                  "normalized": false,
         | 
| 1352 | 
            +
                  "rstrip": false,
         | 
| 1353 | 
            +
                  "single_word": false,
         | 
| 1354 | 
            +
                  "special": true
         | 
| 1355 | 
            +
                },
         | 
| 1356 | 
            +
                "151812": {
         | 
| 1357 | 
            +
                  "content": "|<EXTRA_TOKENS_166>|",
         | 
| 1358 | 
            +
                  "lstrip": false,
         | 
| 1359 | 
            +
                  "normalized": false,
         | 
| 1360 | 
            +
                  "rstrip": false,
         | 
| 1361 | 
            +
                  "single_word": false,
         | 
| 1362 | 
            +
                  "special": true
         | 
| 1363 | 
            +
                },
         | 
| 1364 | 
            +
                "151813": {
         | 
| 1365 | 
            +
                  "content": "|<EXTRA_TOKENS_167>|",
         | 
| 1366 | 
            +
                  "lstrip": false,
         | 
| 1367 | 
            +
                  "normalized": false,
         | 
| 1368 | 
            +
                  "rstrip": false,
         | 
| 1369 | 
            +
                  "single_word": false,
         | 
| 1370 | 
            +
                  "special": true
         | 
| 1371 | 
            +
                },
         | 
| 1372 | 
            +
                "151814": {
         | 
| 1373 | 
            +
                  "content": "|<EXTRA_TOKENS_168>|",
         | 
| 1374 | 
            +
                  "lstrip": false,
         | 
| 1375 | 
            +
                  "normalized": false,
         | 
| 1376 | 
            +
                  "rstrip": false,
         | 
| 1377 | 
            +
                  "single_word": false,
         | 
| 1378 | 
            +
                  "special": true
         | 
| 1379 | 
            +
                },
         | 
| 1380 | 
            +
                "151815": {
         | 
| 1381 | 
            +
                  "content": "|<EXTRA_TOKENS_169>|",
         | 
| 1382 | 
            +
                  "lstrip": false,
         | 
| 1383 | 
            +
                  "normalized": false,
         | 
| 1384 | 
            +
                  "rstrip": false,
         | 
| 1385 | 
            +
                  "single_word": false,
         | 
| 1386 | 
            +
                  "special": true
         | 
| 1387 | 
            +
                },
         | 
| 1388 | 
            +
                "151816": {
         | 
| 1389 | 
            +
                  "content": "|<EXTRA_TOKENS_170>|",
         | 
| 1390 | 
            +
                  "lstrip": false,
         | 
| 1391 | 
            +
                  "normalized": false,
         | 
| 1392 | 
            +
                  "rstrip": false,
         | 
| 1393 | 
            +
                  "single_word": false,
         | 
| 1394 | 
            +
                  "special": true
         | 
| 1395 | 
            +
                },
         | 
| 1396 | 
            +
                "151817": {
         | 
| 1397 | 
            +
                  "content": "|<EXTRA_TOKENS_171>|",
         | 
| 1398 | 
            +
                  "lstrip": false,
         | 
| 1399 | 
            +
                  "normalized": false,
         | 
| 1400 | 
            +
                  "rstrip": false,
         | 
| 1401 | 
            +
                  "single_word": false,
         | 
| 1402 | 
            +
                  "special": true
         | 
| 1403 | 
            +
                },
         | 
| 1404 | 
            +
                "151818": {
         | 
| 1405 | 
            +
                  "content": "|<EXTRA_TOKENS_172>|",
         | 
| 1406 | 
            +
                  "lstrip": false,
         | 
| 1407 | 
            +
                  "normalized": false,
         | 
| 1408 | 
            +
                  "rstrip": false,
         | 
| 1409 | 
            +
                  "single_word": false,
         | 
| 1410 | 
            +
                  "special": true
         | 
| 1411 | 
            +
                },
         | 
| 1412 | 
            +
                "151819": {
         | 
| 1413 | 
            +
                  "content": "|<EXTRA_TOKENS_173>|",
         | 
| 1414 | 
            +
                  "lstrip": false,
         | 
| 1415 | 
            +
                  "normalized": false,
         | 
| 1416 | 
            +
                  "rstrip": false,
         | 
| 1417 | 
            +
                  "single_word": false,
         | 
| 1418 | 
            +
                  "special": true
         | 
| 1419 | 
            +
                },
         | 
| 1420 | 
            +
                "151820": {
         | 
| 1421 | 
            +
                  "content": "|<EXTRA_TOKENS_174>|",
         | 
| 1422 | 
            +
                  "lstrip": false,
         | 
| 1423 | 
            +
                  "normalized": false,
         | 
| 1424 | 
            +
                  "rstrip": false,
         | 
| 1425 | 
            +
                  "single_word": false,
         | 
| 1426 | 
            +
                  "special": true
         | 
| 1427 | 
            +
                },
         | 
| 1428 | 
            +
                "151821": {
         | 
| 1429 | 
            +
                  "content": "|<EXTRA_TOKENS_175>|",
         | 
| 1430 | 
            +
                  "lstrip": false,
         | 
| 1431 | 
            +
                  "normalized": false,
         | 
| 1432 | 
            +
                  "rstrip": false,
         | 
| 1433 | 
            +
                  "single_word": false,
         | 
| 1434 | 
            +
                  "special": true
         | 
| 1435 | 
            +
                },
         | 
| 1436 | 
            +
                "151822": {
         | 
| 1437 | 
            +
                  "content": "|<EXTRA_TOKENS_176>|",
         | 
| 1438 | 
            +
                  "lstrip": false,
         | 
| 1439 | 
            +
                  "normalized": false,
         | 
| 1440 | 
            +
                  "rstrip": false,
         | 
| 1441 | 
            +
                  "single_word": false,
         | 
| 1442 | 
            +
                  "special": true
         | 
| 1443 | 
            +
                },
         | 
| 1444 | 
            +
                "151823": {
         | 
| 1445 | 
            +
                  "content": "|<EXTRA_TOKENS_177>|",
         | 
| 1446 | 
            +
                  "lstrip": false,
         | 
| 1447 | 
            +
                  "normalized": false,
         | 
| 1448 | 
            +
                  "rstrip": false,
         | 
| 1449 | 
            +
                  "single_word": false,
         | 
| 1450 | 
            +
                  "special": true
         | 
| 1451 | 
            +
                },
         | 
| 1452 | 
            +
                "151824": {
         | 
| 1453 | 
            +
                  "content": "|<EXTRA_TOKENS_178>|",
         | 
| 1454 | 
            +
                  "lstrip": false,
         | 
| 1455 | 
            +
                  "normalized": false,
         | 
| 1456 | 
            +
                  "rstrip": false,
         | 
| 1457 | 
            +
                  "single_word": false,
         | 
| 1458 | 
            +
                  "special": true
         | 
| 1459 | 
            +
                },
         | 
| 1460 | 
            +
                "151825": {
         | 
| 1461 | 
            +
                  "content": "|<EXTRA_TOKENS_179>|",
         | 
| 1462 | 
            +
                  "lstrip": false,
         | 
| 1463 | 
            +
                  "normalized": false,
         | 
| 1464 | 
            +
                  "rstrip": false,
         | 
| 1465 | 
            +
                  "single_word": false,
         | 
| 1466 | 
            +
                  "special": true
         | 
| 1467 | 
            +
                },
         | 
| 1468 | 
            +
                "151826": {
         | 
| 1469 | 
            +
                  "content": "|<EXTRA_TOKENS_180>|",
         | 
| 1470 | 
            +
                  "lstrip": false,
         | 
| 1471 | 
            +
                  "normalized": false,
         | 
| 1472 | 
            +
                  "rstrip": false,
         | 
| 1473 | 
            +
                  "single_word": false,
         | 
| 1474 | 
            +
                  "special": true
         | 
| 1475 | 
            +
                },
         | 
| 1476 | 
            +
                "151827": {
         | 
| 1477 | 
            +
                  "content": "|<EXTRA_TOKENS_181>|",
         | 
| 1478 | 
            +
                  "lstrip": false,
         | 
| 1479 | 
            +
                  "normalized": false,
         | 
| 1480 | 
            +
                  "rstrip": false,
         | 
| 1481 | 
            +
                  "single_word": false,
         | 
| 1482 | 
            +
                  "special": true
         | 
| 1483 | 
            +
                },
         | 
| 1484 | 
            +
                "151828": {
         | 
| 1485 | 
            +
                  "content": "|<EXTRA_TOKENS_182>|",
         | 
| 1486 | 
            +
                  "lstrip": false,
         | 
| 1487 | 
            +
                  "normalized": false,
         | 
| 1488 | 
            +
                  "rstrip": false,
         | 
| 1489 | 
            +
                  "single_word": false,
         | 
| 1490 | 
            +
                  "special": true
         | 
| 1491 | 
            +
                },
         | 
| 1492 | 
            +
                "151829": {
         | 
| 1493 | 
            +
                  "content": "|<EXTRA_TOKENS_183>|",
         | 
| 1494 | 
            +
                  "lstrip": false,
         | 
| 1495 | 
            +
                  "normalized": false,
         | 
| 1496 | 
            +
                  "rstrip": false,
         | 
| 1497 | 
            +
                  "single_word": false,
         | 
| 1498 | 
            +
                  "special": true
         | 
| 1499 | 
            +
                },
         | 
| 1500 | 
            +
                "151830": {
         | 
| 1501 | 
            +
                  "content": "|<EXTRA_TOKENS_184>|",
         | 
| 1502 | 
            +
                  "lstrip": false,
         | 
| 1503 | 
            +
                  "normalized": false,
         | 
| 1504 | 
            +
                  "rstrip": false,
         | 
| 1505 | 
            +
                  "single_word": false,
         | 
| 1506 | 
            +
                  "special": true
         | 
| 1507 | 
            +
                },
         | 
| 1508 | 
            +
                "151831": {
         | 
| 1509 | 
            +
                  "content": "|<EXTRA_TOKENS_185>|",
         | 
| 1510 | 
            +
                  "lstrip": false,
         | 
| 1511 | 
            +
                  "normalized": false,
         | 
| 1512 | 
            +
                  "rstrip": false,
         | 
| 1513 | 
            +
                  "single_word": false,
         | 
| 1514 | 
            +
                  "special": true
         | 
| 1515 | 
            +
                },
         | 
| 1516 | 
            +
                "151832": {
         | 
| 1517 | 
            +
                  "content": "|<EXTRA_TOKENS_186>|",
         | 
| 1518 | 
            +
                  "lstrip": false,
         | 
| 1519 | 
            +
                  "normalized": false,
         | 
| 1520 | 
            +
                  "rstrip": false,
         | 
| 1521 | 
            +
                  "single_word": false,
         | 
| 1522 | 
            +
                  "special": true
         | 
| 1523 | 
            +
                },
         | 
| 1524 | 
            +
                "151833": {
         | 
| 1525 | 
            +
                  "content": "|<EXTRA_TOKENS_187>|",
         | 
| 1526 | 
            +
                  "lstrip": false,
         | 
| 1527 | 
            +
                  "normalized": false,
         | 
| 1528 | 
            +
                  "rstrip": false,
         | 
| 1529 | 
            +
                  "single_word": false,
         | 
| 1530 | 
            +
                  "special": true
         | 
| 1531 | 
            +
                },
         | 
| 1532 | 
            +
                "151834": {
         | 
| 1533 | 
            +
                  "content": "|<EXTRA_TOKENS_188>|",
         | 
| 1534 | 
            +
                  "lstrip": false,
         | 
| 1535 | 
            +
                  "normalized": false,
         | 
| 1536 | 
            +
                  "rstrip": false,
         | 
| 1537 | 
            +
                  "single_word": false,
         | 
| 1538 | 
            +
                  "special": true
         | 
| 1539 | 
            +
                },
         | 
| 1540 | 
            +
                "151835": {
         | 
| 1541 | 
            +
                  "content": "|<EXTRA_TOKENS_189>|",
         | 
| 1542 | 
            +
                  "lstrip": false,
         | 
| 1543 | 
            +
                  "normalized": false,
         | 
| 1544 | 
            +
                  "rstrip": false,
         | 
| 1545 | 
            +
                  "single_word": false,
         | 
| 1546 | 
            +
                  "special": true
         | 
| 1547 | 
            +
                },
         | 
| 1548 | 
            +
                "151836": {
         | 
| 1549 | 
            +
                  "content": "|<EXTRA_TOKENS_190>|",
         | 
| 1550 | 
            +
                  "lstrip": false,
         | 
| 1551 | 
            +
                  "normalized": false,
         | 
| 1552 | 
            +
                  "rstrip": false,
         | 
| 1553 | 
            +
                  "single_word": false,
         | 
| 1554 | 
            +
                  "special": true
         | 
| 1555 | 
            +
                },
         | 
| 1556 | 
            +
                "151837": {
         | 
| 1557 | 
            +
                  "content": "|<EXTRA_TOKENS_191>|",
         | 
| 1558 | 
            +
                  "lstrip": false,
         | 
| 1559 | 
            +
                  "normalized": false,
         | 
| 1560 | 
            +
                  "rstrip": false,
         | 
| 1561 | 
            +
                  "single_word": false,
         | 
| 1562 | 
            +
                  "special": true
         | 
| 1563 | 
            +
                },
         | 
| 1564 | 
            +
                "151838": {
         | 
| 1565 | 
            +
                  "content": "|<EXTRA_TOKENS_192>|",
         | 
| 1566 | 
            +
                  "lstrip": false,
         | 
| 1567 | 
            +
                  "normalized": false,
         | 
| 1568 | 
            +
                  "rstrip": false,
         | 
| 1569 | 
            +
                  "single_word": false,
         | 
| 1570 | 
            +
                  "special": true
         | 
| 1571 | 
            +
                },
         | 
| 1572 | 
            +
                "151839": {
         | 
| 1573 | 
            +
                  "content": "|<EXTRA_TOKENS_193>|",
         | 
| 1574 | 
            +
                  "lstrip": false,
         | 
| 1575 | 
            +
                  "normalized": false,
         | 
| 1576 | 
            +
                  "rstrip": false,
         | 
| 1577 | 
            +
                  "single_word": false,
         | 
| 1578 | 
            +
                  "special": true
         | 
| 1579 | 
            +
                },
         | 
| 1580 | 
            +
                "151840": {
         | 
| 1581 | 
            +
                  "content": "|<EXTRA_TOKENS_194>|",
         | 
| 1582 | 
            +
                  "lstrip": false,
         | 
| 1583 | 
            +
                  "normalized": false,
         | 
| 1584 | 
            +
                  "rstrip": false,
         | 
| 1585 | 
            +
                  "single_word": false,
         | 
| 1586 | 
            +
                  "special": true
         | 
| 1587 | 
            +
                },
         | 
| 1588 | 
            +
                "151841": {
         | 
| 1589 | 
            +
                  "content": "|<EXTRA_TOKENS_195>|",
         | 
| 1590 | 
            +
                  "lstrip": false,
         | 
| 1591 | 
            +
                  "normalized": false,
         | 
| 1592 | 
            +
                  "rstrip": false,
         | 
| 1593 | 
            +
                  "single_word": false,
         | 
| 1594 | 
            +
                  "special": true
         | 
| 1595 | 
            +
                },
         | 
| 1596 | 
            +
                "151842": {
         | 
| 1597 | 
            +
                  "content": "|<EXTRA_TOKENS_196>|",
         | 
| 1598 | 
            +
                  "lstrip": false,
         | 
| 1599 | 
            +
                  "normalized": false,
         | 
| 1600 | 
            +
                  "rstrip": false,
         | 
| 1601 | 
            +
                  "single_word": false,
         | 
| 1602 | 
            +
                  "special": true
         | 
| 1603 | 
            +
                },
         | 
| 1604 | 
            +
                "151843": {
         | 
| 1605 | 
            +
                  "content": "|<EXTRA_TOKENS_197>|",
         | 
| 1606 | 
            +
                  "lstrip": false,
         | 
| 1607 | 
            +
                  "normalized": false,
         | 
| 1608 | 
            +
                  "rstrip": false,
         | 
| 1609 | 
            +
                  "single_word": false,
         | 
| 1610 | 
            +
                  "special": true
         | 
| 1611 | 
            +
                },
         | 
| 1612 | 
            +
                "151844": {
         | 
| 1613 | 
            +
                  "content": "|<EXTRA_TOKENS_198>|",
         | 
| 1614 | 
            +
                  "lstrip": false,
         | 
| 1615 | 
            +
                  "normalized": false,
         | 
| 1616 | 
            +
                  "rstrip": false,
         | 
| 1617 | 
            +
                  "single_word": false,
         | 
| 1618 | 
            +
                  "special": true
         | 
| 1619 | 
            +
                },
         | 
| 1620 | 
            +
                "151845": {
         | 
| 1621 | 
            +
                  "content": "|<EXTRA_TOKENS_199>|",
         | 
| 1622 | 
            +
                  "lstrip": false,
         | 
| 1623 | 
            +
                  "normalized": false,
         | 
| 1624 | 
            +
                  "rstrip": false,
         | 
| 1625 | 
            +
                  "single_word": false,
         | 
| 1626 | 
            +
                  "special": true
         | 
| 1627 | 
            +
                },
         | 
| 1628 | 
            +
                "151846": {
         | 
| 1629 | 
            +
                  "content": "|<EXTRA_TOKENS_200>|",
         | 
| 1630 | 
            +
                  "lstrip": false,
         | 
| 1631 | 
            +
                  "normalized": false,
         | 
| 1632 | 
            +
                  "rstrip": false,
         | 
| 1633 | 
            +
                  "single_word": false,
         | 
| 1634 | 
            +
                  "special": true
         | 
| 1635 | 
            +
                },
         | 
| 1636 | 
            +
                "151847": {
         | 
| 1637 | 
            +
                  "content": "|<EXTRA_TOKENS_201>|",
         | 
| 1638 | 
            +
                  "lstrip": false,
         | 
| 1639 | 
            +
                  "normalized": false,
         | 
| 1640 | 
            +
                  "rstrip": false,
         | 
| 1641 | 
            +
                  "single_word": false,
         | 
| 1642 | 
            +
                  "special": true
         | 
| 1643 | 
            +
                },
         | 
| 1644 | 
            +
                "151848": {
         | 
| 1645 | 
            +
                  "content": "|<EXTRA_TOKENS_202>|",
         | 
| 1646 | 
            +
                  "lstrip": false,
         | 
| 1647 | 
            +
                  "normalized": false,
         | 
| 1648 | 
            +
                  "rstrip": false,
         | 
| 1649 | 
            +
                  "single_word": false,
         | 
| 1650 | 
            +
                  "special": true
         | 
| 1651 | 
            +
                },
         | 
| 1652 | 
            +
                "151849": {
         | 
| 1653 | 
            +
                  "content": "|<EXTRA_TOKENS_203>|",
         | 
| 1654 | 
            +
                  "lstrip": false,
         | 
| 1655 | 
            +
                  "normalized": false,
         | 
| 1656 | 
            +
                  "rstrip": false,
         | 
| 1657 | 
            +
                  "single_word": false,
         | 
| 1658 | 
            +
                  "special": true
         | 
| 1659 | 
            +
                },
         | 
| 1660 | 
            +
                "151850": {
         | 
| 1661 | 
            +
                  "content": "|<EXTRA_TOKENS_204>|",
         | 
| 1662 | 
            +
                  "lstrip": false,
         | 
| 1663 | 
            +
                  "normalized": false,
         | 
| 1664 | 
            +
                  "rstrip": false,
         | 
| 1665 | 
            +
                  "single_word": false,
         | 
| 1666 | 
            +
                  "special": true
         | 
| 1667 | 
            +
                },
         | 
| 1668 | 
            +
                "151851": {
         | 
| 1669 | 
            +
                  "content": "|<EXTRA_TOKENS_205>|",
         | 
| 1670 | 
            +
                  "lstrip": false,
         | 
| 1671 | 
            +
                  "normalized": false,
         | 
| 1672 | 
            +
                  "rstrip": false,
         | 
| 1673 | 
            +
                  "single_word": false,
         | 
| 1674 | 
            +
                  "special": true
         | 
| 1675 | 
            +
                },
         | 
| 1676 | 
            +
                "151852": {
         | 
| 1677 | 
            +
                  "content": "|<EXTRA_TOKENS_206>|",
         | 
| 1678 | 
            +
                  "lstrip": false,
         | 
| 1679 | 
            +
                  "normalized": false,
         | 
| 1680 | 
            +
                  "rstrip": false,
         | 
| 1681 | 
            +
                  "single_word": false,
         | 
| 1682 | 
            +
                  "special": true
         | 
| 1683 | 
            +
                },
         | 
| 1684 | 
            +
                "151853": {
         | 
| 1685 | 
            +
                  "content": "|<EXTRA_TOKENS_207>|",
         | 
| 1686 | 
            +
                  "lstrip": false,
         | 
| 1687 | 
            +
                  "normalized": false,
         | 
| 1688 | 
            +
                  "rstrip": false,
         | 
| 1689 | 
            +
                  "single_word": false,
         | 
| 1690 | 
            +
                  "special": true
         | 
| 1691 | 
            +
                },
         | 
| 1692 | 
            +
                "151854": {
         | 
| 1693 | 
            +
                  "content": "|<EXTRA_TOKENS_208>|",
         | 
| 1694 | 
            +
                  "lstrip": false,
         | 
| 1695 | 
            +
                  "normalized": false,
         | 
| 1696 | 
            +
                  "rstrip": false,
         | 
| 1697 | 
            +
                  "single_word": false,
         | 
| 1698 | 
            +
                  "special": true
         | 
| 1699 | 
            +
                },
         | 
| 1700 | 
            +
                "151855": {
         | 
| 1701 | 
            +
                  "content": "|<EXTRA_TOKENS_209>|",
         | 
| 1702 | 
            +
                  "lstrip": false,
         | 
| 1703 | 
            +
                  "normalized": false,
         | 
| 1704 | 
            +
                  "rstrip": false,
         | 
| 1705 | 
            +
                  "single_word": false,
         | 
| 1706 | 
            +
                  "special": true
         | 
| 1707 | 
            +
                },
         | 
| 1708 | 
            +
                "151856": {
         | 
| 1709 | 
            +
                  "content": "|<EXTRA_TOKENS_210>|",
         | 
| 1710 | 
            +
                  "lstrip": false,
         | 
| 1711 | 
            +
                  "normalized": false,
         | 
| 1712 | 
            +
                  "rstrip": false,
         | 
| 1713 | 
            +
                  "single_word": false,
         | 
| 1714 | 
            +
                  "special": true
         | 
| 1715 | 
            +
                },
         | 
| 1716 | 
            +
                "151857": {
         | 
| 1717 | 
            +
                  "content": "|<EXTRA_TOKENS_211>|",
         | 
| 1718 | 
            +
                  "lstrip": false,
         | 
| 1719 | 
            +
                  "normalized": false,
         | 
| 1720 | 
            +
                  "rstrip": false,
         | 
| 1721 | 
            +
                  "single_word": false,
         | 
| 1722 | 
            +
                  "special": true
         | 
| 1723 | 
            +
                },
         | 
| 1724 | 
            +
                "151858": {
         | 
| 1725 | 
            +
                  "content": "|<EXTRA_TOKENS_212>|",
         | 
| 1726 | 
            +
                  "lstrip": false,
         | 
| 1727 | 
            +
                  "normalized": false,
         | 
| 1728 | 
            +
                  "rstrip": false,
         | 
| 1729 | 
            +
                  "single_word": false,
         | 
| 1730 | 
            +
                  "special": true
         | 
| 1731 | 
            +
                },
         | 
| 1732 | 
            +
                "151859": {
         | 
| 1733 | 
            +
                  "content": "|<EXTRA_TOKENS_213>|",
         | 
| 1734 | 
            +
                  "lstrip": false,
         | 
| 1735 | 
            +
                  "normalized": false,
         | 
| 1736 | 
            +
                  "rstrip": false,
         | 
| 1737 | 
            +
                  "single_word": false,
         | 
| 1738 | 
            +
                  "special": true
         | 
| 1739 | 
            +
                },
         | 
| 1740 | 
            +
                "151860": {
         | 
| 1741 | 
            +
                  "content": "|<EXTRA_TOKENS_214>|",
         | 
| 1742 | 
            +
                  "lstrip": false,
         | 
| 1743 | 
            +
                  "normalized": false,
         | 
| 1744 | 
            +
                  "rstrip": false,
         | 
| 1745 | 
            +
                  "single_word": false,
         | 
| 1746 | 
            +
                  "special": true
         | 
| 1747 | 
            +
                },
         | 
| 1748 | 
            +
                "151861": {
         | 
| 1749 | 
            +
                  "content": "|<EXTRA_TOKENS_215>|",
         | 
| 1750 | 
            +
                  "lstrip": false,
         | 
| 1751 | 
            +
                  "normalized": false,
         | 
| 1752 | 
            +
                  "rstrip": false,
         | 
| 1753 | 
            +
                  "single_word": false,
         | 
| 1754 | 
            +
                  "special": true
         | 
| 1755 | 
            +
                },
         | 
| 1756 | 
            +
                "151862": {
         | 
| 1757 | 
            +
                  "content": "|<EXTRA_TOKENS_216>|",
         | 
| 1758 | 
            +
                  "lstrip": false,
         | 
| 1759 | 
            +
                  "normalized": false,
         | 
| 1760 | 
            +
                  "rstrip": false,
         | 
| 1761 | 
            +
                  "single_word": false,
         | 
| 1762 | 
            +
                  "special": true
         | 
| 1763 | 
            +
                },
         | 
| 1764 | 
            +
                "151863": {
         | 
| 1765 | 
            +
                  "content": "|<EXTRA_TOKENS_217>|",
         | 
| 1766 | 
            +
                  "lstrip": false,
         | 
| 1767 | 
            +
                  "normalized": false,
         | 
| 1768 | 
            +
                  "rstrip": false,
         | 
| 1769 | 
            +
                  "single_word": false,
         | 
| 1770 | 
            +
                  "special": true
         | 
| 1771 | 
            +
                },
         | 
| 1772 | 
            +
                "151864": {
         | 
| 1773 | 
            +
                  "content": "|<EXTRA_TOKENS_218>|",
         | 
| 1774 | 
            +
                  "lstrip": false,
         | 
| 1775 | 
            +
                  "normalized": false,
         | 
| 1776 | 
            +
                  "rstrip": false,
         | 
| 1777 | 
            +
                  "single_word": false,
         | 
| 1778 | 
            +
                  "special": true
         | 
| 1779 | 
            +
                },
         | 
| 1780 | 
            +
                "151865": {
         | 
| 1781 | 
            +
                  "content": "|<EXTRA_TOKENS_219>|",
         | 
| 1782 | 
            +
                  "lstrip": false,
         | 
| 1783 | 
            +
                  "normalized": false,
         | 
| 1784 | 
            +
                  "rstrip": false,
         | 
| 1785 | 
            +
                  "single_word": false,
         | 
| 1786 | 
            +
                  "special": true
         | 
| 1787 | 
            +
                },
         | 
| 1788 | 
            +
                "151866": {
         | 
| 1789 | 
            +
                  "content": "|<EXTRA_TOKENS_220>|",
         | 
| 1790 | 
            +
                  "lstrip": false,
         | 
| 1791 | 
            +
                  "normalized": false,
         | 
| 1792 | 
            +
                  "rstrip": false,
         | 
| 1793 | 
            +
                  "single_word": false,
         | 
| 1794 | 
            +
                  "special": true
         | 
| 1795 | 
            +
                },
         | 
| 1796 | 
            +
                "151867": {
         | 
| 1797 | 
            +
                  "content": "|<EXTRA_TOKENS_221>|",
         | 
| 1798 | 
            +
                  "lstrip": false,
         | 
| 1799 | 
            +
                  "normalized": false,
         | 
| 1800 | 
            +
                  "rstrip": false,
         | 
| 1801 | 
            +
                  "single_word": false,
         | 
| 1802 | 
            +
                  "special": true
         | 
| 1803 | 
            +
                },
         | 
| 1804 | 
            +
                "151868": {
         | 
| 1805 | 
            +
                  "content": "|<EXTRA_TOKENS_222>|",
         | 
| 1806 | 
            +
                  "lstrip": false,
         | 
| 1807 | 
            +
                  "normalized": false,
         | 
| 1808 | 
            +
                  "rstrip": false,
         | 
| 1809 | 
            +
                  "single_word": false,
         | 
| 1810 | 
            +
                  "special": true
         | 
| 1811 | 
            +
                },
         | 
| 1812 | 
            +
                "151869": {
         | 
| 1813 | 
            +
                  "content": "|<EXTRA_TOKENS_223>|",
         | 
| 1814 | 
            +
                  "lstrip": false,
         | 
| 1815 | 
            +
                  "normalized": false,
         | 
| 1816 | 
            +
                  "rstrip": false,
         | 
| 1817 | 
            +
                  "single_word": false,
         | 
| 1818 | 
            +
                  "special": true
         | 
| 1819 | 
            +
                },
         | 
| 1820 | 
            +
                "151870": {
         | 
| 1821 | 
            +
                  "content": "|<EXTRA_TOKENS_224>|",
         | 
| 1822 | 
            +
                  "lstrip": false,
         | 
| 1823 | 
            +
                  "normalized": false,
         | 
| 1824 | 
            +
                  "rstrip": false,
         | 
| 1825 | 
            +
                  "single_word": false,
         | 
| 1826 | 
            +
                  "special": true
         | 
| 1827 | 
            +
                },
         | 
| 1828 | 
            +
                "151871": {
         | 
| 1829 | 
            +
                  "content": "|<EXTRA_TOKENS_225>|",
         | 
| 1830 | 
            +
                  "lstrip": false,
         | 
| 1831 | 
            +
                  "normalized": false,
         | 
| 1832 | 
            +
                  "rstrip": false,
         | 
| 1833 | 
            +
                  "single_word": false,
         | 
| 1834 | 
            +
                  "special": true
         | 
| 1835 | 
            +
                },
         | 
| 1836 | 
            +
                "151872": {
         | 
| 1837 | 
            +
                  "content": "|<EXTRA_TOKENS_226>|",
         | 
| 1838 | 
            +
                  "lstrip": false,
         | 
| 1839 | 
            +
                  "normalized": false,
         | 
| 1840 | 
            +
                  "rstrip": false,
         | 
| 1841 | 
            +
                  "single_word": false,
         | 
| 1842 | 
            +
                  "special": true
         | 
| 1843 | 
            +
                },
         | 
| 1844 | 
            +
                "151873": {
         | 
| 1845 | 
            +
                  "content": "|<EXTRA_TOKENS_227>|",
         | 
| 1846 | 
            +
                  "lstrip": false,
         | 
| 1847 | 
            +
                  "normalized": false,
         | 
| 1848 | 
            +
                  "rstrip": false,
         | 
| 1849 | 
            +
                  "single_word": false,
         | 
| 1850 | 
            +
                  "special": true
         | 
| 1851 | 
            +
                },
         | 
| 1852 | 
            +
                "151874": {
         | 
| 1853 | 
            +
                  "content": "|<EXTRA_TOKENS_228>|",
         | 
| 1854 | 
            +
                  "lstrip": false,
         | 
| 1855 | 
            +
                  "normalized": false,
         | 
| 1856 | 
            +
                  "rstrip": false,
         | 
| 1857 | 
            +
                  "single_word": false,
         | 
| 1858 | 
            +
                  "special": true
         | 
| 1859 | 
            +
                },
         | 
| 1860 | 
            +
                "151875": {
         | 
| 1861 | 
            +
                  "content": "|<EXTRA_TOKENS_229>|",
         | 
| 1862 | 
            +
                  "lstrip": false,
         | 
| 1863 | 
            +
                  "normalized": false,
         | 
| 1864 | 
            +
                  "rstrip": false,
         | 
| 1865 | 
            +
                  "single_word": false,
         | 
| 1866 | 
            +
                  "special": true
         | 
| 1867 | 
            +
                },
         | 
| 1868 | 
            +
                "151876": {
         | 
| 1869 | 
            +
                  "content": "|<EXTRA_TOKENS_230>|",
         | 
| 1870 | 
            +
                  "lstrip": false,
         | 
| 1871 | 
            +
                  "normalized": false,
         | 
| 1872 | 
            +
                  "rstrip": false,
         | 
| 1873 | 
            +
                  "single_word": false,
         | 
| 1874 | 
            +
                  "special": true
         | 
| 1875 | 
            +
                },
         | 
| 1876 | 
            +
                "151877": {
         | 
| 1877 | 
            +
                  "content": "|<EXTRA_TOKENS_231>|",
         | 
| 1878 | 
            +
                  "lstrip": false,
         | 
| 1879 | 
            +
                  "normalized": false,
         | 
| 1880 | 
            +
                  "rstrip": false,
         | 
| 1881 | 
            +
                  "single_word": false,
         | 
| 1882 | 
            +
                  "special": true
         | 
| 1883 | 
            +
                },
         | 
| 1884 | 
            +
                "151878": {
         | 
| 1885 | 
            +
                  "content": "|<EXTRA_TOKENS_232>|",
         | 
| 1886 | 
            +
                  "lstrip": false,
         | 
| 1887 | 
            +
                  "normalized": false,
         | 
| 1888 | 
            +
                  "rstrip": false,
         | 
| 1889 | 
            +
                  "single_word": false,
         | 
| 1890 | 
            +
                  "special": true
         | 
| 1891 | 
            +
                },
         | 
| 1892 | 
            +
                "151879": {
         | 
| 1893 | 
            +
                  "content": "|<EXTRA_TOKENS_233>|",
         | 
| 1894 | 
            +
                  "lstrip": false,
         | 
| 1895 | 
            +
                  "normalized": false,
         | 
| 1896 | 
            +
                  "rstrip": false,
         | 
| 1897 | 
            +
                  "single_word": false,
         | 
| 1898 | 
            +
                  "special": true
         | 
| 1899 | 
            +
                },
         | 
| 1900 | 
            +
                "151880": {
         | 
| 1901 | 
            +
                  "content": "|<EXTRA_TOKENS_234>|",
         | 
| 1902 | 
            +
                  "lstrip": false,
         | 
| 1903 | 
            +
                  "normalized": false,
         | 
| 1904 | 
            +
                  "rstrip": false,
         | 
| 1905 | 
            +
                  "single_word": false,
         | 
| 1906 | 
            +
                  "special": true
         | 
| 1907 | 
            +
                },
         | 
| 1908 | 
            +
                "151881": {
         | 
| 1909 | 
            +
                  "content": "|<EXTRA_TOKENS_235>|",
         | 
| 1910 | 
            +
                  "lstrip": false,
         | 
| 1911 | 
            +
                  "normalized": false,
         | 
| 1912 | 
            +
                  "rstrip": false,
         | 
| 1913 | 
            +
                  "single_word": false,
         | 
| 1914 | 
            +
                  "special": true
         | 
| 1915 | 
            +
                },
         | 
| 1916 | 
            +
                "151882": {
         | 
| 1917 | 
            +
                  "content": "|<EXTRA_TOKENS_236>|",
         | 
| 1918 | 
            +
                  "lstrip": false,
         | 
| 1919 | 
            +
                  "normalized": false,
         | 
| 1920 | 
            +
                  "rstrip": false,
         | 
| 1921 | 
            +
                  "single_word": false,
         | 
| 1922 | 
            +
                  "special": true
         | 
| 1923 | 
            +
                },
         | 
| 1924 | 
            +
                "151883": {
         | 
| 1925 | 
            +
                  "content": "|<EXTRA_TOKENS_237>|",
         | 
| 1926 | 
            +
                  "lstrip": false,
         | 
| 1927 | 
            +
                  "normalized": false,
         | 
| 1928 | 
            +
                  "rstrip": false,
         | 
| 1929 | 
            +
                  "single_word": false,
         | 
| 1930 | 
            +
                  "special": true
         | 
| 1931 | 
            +
                },
         | 
| 1932 | 
            +
                "151884": {
         | 
| 1933 | 
            +
                  "content": "|<EXTRA_TOKENS_238>|",
         | 
| 1934 | 
            +
                  "lstrip": false,
         | 
| 1935 | 
            +
                  "normalized": false,
         | 
| 1936 | 
            +
                  "rstrip": false,
         | 
| 1937 | 
            +
                  "single_word": false,
         | 
| 1938 | 
            +
                  "special": true
         | 
| 1939 | 
            +
                },
         | 
| 1940 | 
            +
                "151885": {
         | 
| 1941 | 
            +
                  "content": "|<EXTRA_TOKENS_239>|",
         | 
| 1942 | 
            +
                  "lstrip": false,
         | 
| 1943 | 
            +
                  "normalized": false,
         | 
| 1944 | 
            +
                  "rstrip": false,
         | 
| 1945 | 
            +
                  "single_word": false,
         | 
| 1946 | 
            +
                  "special": true
         | 
| 1947 | 
            +
                },
         | 
| 1948 | 
            +
                "151886": {
         | 
| 1949 | 
            +
                  "content": "|<EXTRA_TOKENS_240>|",
         | 
| 1950 | 
            +
                  "lstrip": false,
         | 
| 1951 | 
            +
                  "normalized": false,
         | 
| 1952 | 
            +
                  "rstrip": false,
         | 
| 1953 | 
            +
                  "single_word": false,
         | 
| 1954 | 
            +
                  "special": true
         | 
| 1955 | 
            +
                },
         | 
| 1956 | 
            +
                "151887": {
         | 
| 1957 | 
            +
                  "content": "|<EXTRA_TOKENS_241>|",
         | 
| 1958 | 
            +
                  "lstrip": false,
         | 
| 1959 | 
            +
                  "normalized": false,
         | 
| 1960 | 
            +
                  "rstrip": false,
         | 
| 1961 | 
            +
                  "single_word": false,
         | 
| 1962 | 
            +
                  "special": true
         | 
| 1963 | 
            +
                },
         | 
| 1964 | 
            +
                "151888": {
         | 
| 1965 | 
            +
                  "content": "|<EXTRA_TOKENS_242>|",
         | 
| 1966 | 
            +
                  "lstrip": false,
         | 
| 1967 | 
            +
                  "normalized": false,
         | 
| 1968 | 
            +
                  "rstrip": false,
         | 
| 1969 | 
            +
                  "single_word": false,
         | 
| 1970 | 
            +
                  "special": true
         | 
| 1971 | 
            +
                },
         | 
| 1972 | 
            +
                "151889": {
         | 
| 1973 | 
            +
                  "content": "|<EXTRA_TOKENS_243>|",
         | 
| 1974 | 
            +
                  "lstrip": false,
         | 
| 1975 | 
            +
                  "normalized": false,
         | 
| 1976 | 
            +
                  "rstrip": false,
         | 
| 1977 | 
            +
                  "single_word": false,
         | 
| 1978 | 
            +
                  "special": true
         | 
| 1979 | 
            +
                },
         | 
| 1980 | 
            +
                "151890": {
         | 
| 1981 | 
            +
                  "content": "|<EXTRA_TOKENS_244>|",
         | 
| 1982 | 
            +
                  "lstrip": false,
         | 
| 1983 | 
            +
                  "normalized": false,
         | 
| 1984 | 
            +
                  "rstrip": false,
         | 
| 1985 | 
            +
                  "single_word": false,
         | 
| 1986 | 
            +
                  "special": true
         | 
| 1987 | 
            +
                },
         | 
| 1988 | 
            +
                "151891": {
         | 
| 1989 | 
            +
                  "content": "|<EXTRA_TOKENS_245>|",
         | 
| 1990 | 
            +
                  "lstrip": false,
         | 
| 1991 | 
            +
                  "normalized": false,
         | 
| 1992 | 
            +
                  "rstrip": false,
         | 
| 1993 | 
            +
                  "single_word": false,
         | 
| 1994 | 
            +
                  "special": true
         | 
| 1995 | 
            +
                },
         | 
| 1996 | 
            +
                "151892": {
         | 
| 1997 | 
            +
                  "content": "|<EXTRA_TOKENS_246>|",
         | 
| 1998 | 
            +
                  "lstrip": false,
         | 
| 1999 | 
            +
                  "normalized": false,
         | 
| 2000 | 
            +
                  "rstrip": false,
         | 
| 2001 | 
            +
                  "single_word": false,
         | 
| 2002 | 
            +
                  "special": true
         | 
| 2003 | 
            +
                },
         | 
| 2004 | 
            +
                "151893": {
         | 
| 2005 | 
            +
                  "content": "|<EXTRA_TOKENS_247>|",
         | 
| 2006 | 
            +
                  "lstrip": false,
         | 
| 2007 | 
            +
                  "normalized": false,
         | 
| 2008 | 
            +
                  "rstrip": false,
         | 
| 2009 | 
            +
                  "single_word": false,
         | 
| 2010 | 
            +
                  "special": true
         | 
| 2011 | 
            +
                },
         | 
| 2012 | 
            +
                "151894": {
         | 
| 2013 | 
            +
                  "content": "|<EXTRA_TOKENS_248>|",
         | 
| 2014 | 
            +
                  "lstrip": false,
         | 
| 2015 | 
            +
                  "normalized": false,
         | 
| 2016 | 
            +
                  "rstrip": false,
         | 
| 2017 | 
            +
                  "single_word": false,
         | 
| 2018 | 
            +
                  "special": true
         | 
| 2019 | 
            +
                },
         | 
| 2020 | 
            +
                "151895": {
         | 
| 2021 | 
            +
                  "content": "|<EXTRA_TOKENS_249>|",
         | 
| 2022 | 
            +
                  "lstrip": false,
         | 
| 2023 | 
            +
                  "normalized": false,
         | 
| 2024 | 
            +
                  "rstrip": false,
         | 
| 2025 | 
            +
                  "single_word": false,
         | 
| 2026 | 
            +
                  "special": true
         | 
| 2027 | 
            +
                },
         | 
| 2028 | 
            +
                "151896": {
         | 
| 2029 | 
            +
                  "content": "|<EXTRA_TOKENS_250>|",
         | 
| 2030 | 
            +
                  "lstrip": false,
         | 
| 2031 | 
            +
                  "normalized": false,
         | 
| 2032 | 
            +
                  "rstrip": false,
         | 
| 2033 | 
            +
                  "single_word": false,
         | 
| 2034 | 
            +
                  "special": true
         | 
| 2035 | 
            +
                },
         | 
| 2036 | 
            +
                "151897": {
         | 
| 2037 | 
            +
                  "content": "|<EXTRA_TOKENS_251>|",
         | 
| 2038 | 
            +
                  "lstrip": false,
         | 
| 2039 | 
            +
                  "normalized": false,
         | 
| 2040 | 
            +
                  "rstrip": false,
         | 
| 2041 | 
            +
                  "single_word": false,
         | 
| 2042 | 
            +
                  "special": true
         | 
| 2043 | 
            +
                },
         | 
| 2044 | 
            +
                "151898": {
         | 
| 2045 | 
            +
                  "content": "|<EXTRA_TOKENS_252>|",
         | 
| 2046 | 
            +
                  "lstrip": false,
         | 
| 2047 | 
            +
                  "normalized": false,
         | 
| 2048 | 
            +
                  "rstrip": false,
         | 
| 2049 | 
            +
                  "single_word": false,
         | 
| 2050 | 
            +
                  "special": true
         | 
| 2051 | 
            +
                },
         | 
| 2052 | 
            +
                "151899": {
         | 
| 2053 | 
            +
                  "content": "|<EXTRA_TOKENS_253>|",
         | 
| 2054 | 
            +
                  "lstrip": false,
         | 
| 2055 | 
            +
                  "normalized": false,
         | 
| 2056 | 
            +
                  "rstrip": false,
         | 
| 2057 | 
            +
                  "single_word": false,
         | 
| 2058 | 
            +
                  "special": true
         | 
| 2059 | 
            +
                },
         | 
| 2060 | 
            +
                "151900": {
         | 
| 2061 | 
            +
                  "content": "|<EXTRA_TOKENS_254>|",
         | 
| 2062 | 
            +
                  "lstrip": false,
         | 
| 2063 | 
            +
                  "normalized": false,
         | 
| 2064 | 
            +
                  "rstrip": false,
         | 
| 2065 | 
            +
                  "single_word": false,
         | 
| 2066 | 
            +
                  "special": true
         | 
| 2067 | 
            +
                },
         | 
| 2068 | 
            +
                "151901": {
         | 
| 2069 | 
            +
                  "content": "|<EXTRA_TOKENS_255>|",
         | 
| 2070 | 
            +
                  "lstrip": false,
         | 
| 2071 | 
            +
                  "normalized": false,
         | 
| 2072 | 
            +
                  "rstrip": false,
         | 
| 2073 | 
            +
                  "single_word": false,
         | 
| 2074 | 
            +
                  "special": true
         | 
| 2075 | 
            +
                },
         | 
| 2076 | 
            +
                "151902": {
         | 
| 2077 | 
            +
                  "content": "|<EXTRA_TOKENS_256>|",
         | 
| 2078 | 
            +
                  "lstrip": false,
         | 
| 2079 | 
            +
                  "normalized": false,
         | 
| 2080 | 
            +
                  "rstrip": false,
         | 
| 2081 | 
            +
                  "single_word": false,
         | 
| 2082 | 
            +
                  "special": true
         | 
| 2083 | 
            +
                },
         | 
| 2084 | 
            +
                "151903": {
         | 
| 2085 | 
            +
                  "content": "|<EXTRA_TOKENS_257>|",
         | 
| 2086 | 
            +
                  "lstrip": false,
         | 
| 2087 | 
            +
                  "normalized": false,
         | 
| 2088 | 
            +
                  "rstrip": false,
         | 
| 2089 | 
            +
                  "single_word": false,
         | 
| 2090 | 
            +
                  "special": true
         | 
| 2091 | 
            +
                },
         | 
| 2092 | 
            +
                "151904": {
         | 
| 2093 | 
            +
                  "content": "|<EXTRA_TOKENS_258>|",
         | 
| 2094 | 
            +
                  "lstrip": false,
         | 
| 2095 | 
            +
                  "normalized": false,
         | 
| 2096 | 
            +
                  "rstrip": false,
         | 
| 2097 | 
            +
                  "single_word": false,
         | 
| 2098 | 
            +
                  "special": true
         | 
| 2099 | 
            +
                },
         | 
| 2100 | 
            +
                "151905": {
         | 
| 2101 | 
            +
                  "content": "|<EXTRA_TOKENS_259>|",
         | 
| 2102 | 
            +
                  "lstrip": false,
         | 
| 2103 | 
            +
                  "normalized": false,
         | 
| 2104 | 
            +
                  "rstrip": false,
         | 
| 2105 | 
            +
                  "single_word": false,
         | 
| 2106 | 
            +
                  "special": true
         | 
| 2107 | 
            +
                },
         | 
| 2108 | 
            +
                "151906": {
         | 
| 2109 | 
            +
                  "content": "|<EXTRA_TOKENS_260>|",
         | 
| 2110 | 
            +
                  "lstrip": false,
         | 
| 2111 | 
            +
                  "normalized": false,
         | 
| 2112 | 
            +
                  "rstrip": false,
         | 
| 2113 | 
            +
                  "single_word": false,
         | 
| 2114 | 
            +
                  "special": true
         | 
| 2115 | 
            +
                },
         | 
| 2116 | 
            +
                "151907": {
         | 
| 2117 | 
            +
                  "content": "|<EXTRA_TOKENS_261>|",
         | 
| 2118 | 
            +
                  "lstrip": false,
         | 
| 2119 | 
            +
                  "normalized": false,
         | 
| 2120 | 
            +
                  "rstrip": false,
         | 
| 2121 | 
            +
                  "single_word": false,
         | 
| 2122 | 
            +
                  "special": true
         | 
| 2123 | 
            +
                },
         | 
| 2124 | 
            +
                "151908": {
         | 
| 2125 | 
            +
                  "content": "|<EXTRA_TOKENS_262>|",
         | 
| 2126 | 
            +
                  "lstrip": false,
         | 
| 2127 | 
            +
                  "normalized": false,
         | 
| 2128 | 
            +
                  "rstrip": false,
         | 
| 2129 | 
            +
                  "single_word": false,
         | 
| 2130 | 
            +
                  "special": true
         | 
| 2131 | 
            +
                },
         | 
| 2132 | 
            +
                "151909": {
         | 
| 2133 | 
            +
                  "content": "|<EXTRA_TOKENS_263>|",
         | 
| 2134 | 
            +
                  "lstrip": false,
         | 
| 2135 | 
            +
                  "normalized": false,
         | 
| 2136 | 
            +
                  "rstrip": false,
         | 
| 2137 | 
            +
                  "single_word": false,
         | 
| 2138 | 
            +
                  "special": true
         | 
| 2139 | 
            +
                },
         | 
| 2140 | 
            +
                "151910": {
         | 
| 2141 | 
            +
                  "content": "|<EXTRA_TOKENS_264>|",
         | 
| 2142 | 
            +
                  "lstrip": false,
         | 
| 2143 | 
            +
                  "normalized": false,
         | 
| 2144 | 
            +
                  "rstrip": false,
         | 
| 2145 | 
            +
                  "single_word": false,
         | 
| 2146 | 
            +
                  "special": true
         | 
| 2147 | 
            +
                },
         | 
| 2148 | 
            +
                "151911": {
         | 
| 2149 | 
            +
                  "content": "|<EXTRA_TOKENS_265>|",
         | 
| 2150 | 
            +
                  "lstrip": false,
         | 
| 2151 | 
            +
                  "normalized": false,
         | 
| 2152 | 
            +
                  "rstrip": false,
         | 
| 2153 | 
            +
                  "single_word": false,
         | 
| 2154 | 
            +
                  "special": true
         | 
| 2155 | 
            +
                },
         | 
| 2156 | 
            +
                "151912": {
         | 
| 2157 | 
            +
                  "content": "|<EXTRA_TOKENS_266>|",
         | 
| 2158 | 
            +
                  "lstrip": false,
         | 
| 2159 | 
            +
                  "normalized": false,
         | 
| 2160 | 
            +
                  "rstrip": false,
         | 
| 2161 | 
            +
                  "single_word": false,
         | 
| 2162 | 
            +
                  "special": true
         | 
| 2163 | 
            +
                },
         | 
| 2164 | 
            +
                "151913": {
         | 
| 2165 | 
            +
                  "content": "|<EXTRA_TOKENS_267>|",
         | 
| 2166 | 
            +
                  "lstrip": false,
         | 
| 2167 | 
            +
                  "normalized": false,
         | 
| 2168 | 
            +
                  "rstrip": false,
         | 
| 2169 | 
            +
                  "single_word": false,
         | 
| 2170 | 
            +
                  "special": true
         | 
| 2171 | 
            +
                },
         | 
| 2172 | 
            +
                "151914": {
         | 
| 2173 | 
            +
                  "content": "|<EXTRA_TOKENS_268>|",
         | 
| 2174 | 
            +
                  "lstrip": false,
         | 
| 2175 | 
            +
                  "normalized": false,
         | 
| 2176 | 
            +
                  "rstrip": false,
         | 
| 2177 | 
            +
                  "single_word": false,
         | 
| 2178 | 
            +
                  "special": true
         | 
| 2179 | 
            +
                },
         | 
| 2180 | 
            +
                "151915": {
         | 
| 2181 | 
            +
                  "content": "|<EXTRA_TOKENS_269>|",
         | 
| 2182 | 
            +
                  "lstrip": false,
         | 
| 2183 | 
            +
                  "normalized": false,
         | 
| 2184 | 
            +
                  "rstrip": false,
         | 
| 2185 | 
            +
                  "single_word": false,
         | 
| 2186 | 
            +
                  "special": true
         | 
| 2187 | 
            +
                },
         | 
| 2188 | 
            +
                "151916": {
         | 
| 2189 | 
            +
                  "content": "|<EXTRA_TOKENS_270>|",
         | 
| 2190 | 
            +
                  "lstrip": false,
         | 
| 2191 | 
            +
                  "normalized": false,
         | 
| 2192 | 
            +
                  "rstrip": false,
         | 
| 2193 | 
            +
                  "single_word": false,
         | 
| 2194 | 
            +
                  "special": true
         | 
| 2195 | 
            +
                },
         | 
| 2196 | 
            +
                "151917": {
         | 
| 2197 | 
            +
                  "content": "|<EXTRA_TOKENS_271>|",
         | 
| 2198 | 
            +
                  "lstrip": false,
         | 
| 2199 | 
            +
                  "normalized": false,
         | 
| 2200 | 
            +
                  "rstrip": false,
         | 
| 2201 | 
            +
                  "single_word": false,
         | 
| 2202 | 
            +
                  "special": true
         | 
| 2203 | 
            +
                },
         | 
| 2204 | 
            +
                "151918": {
         | 
| 2205 | 
            +
                  "content": "|<EXTRA_TOKENS_272>|",
         | 
| 2206 | 
            +
                  "lstrip": false,
         | 
| 2207 | 
            +
                  "normalized": false,
         | 
| 2208 | 
            +
                  "rstrip": false,
         | 
| 2209 | 
            +
                  "single_word": false,
         | 
| 2210 | 
            +
                  "special": true
         | 
| 2211 | 
            +
                },
         | 
| 2212 | 
            +
                "151919": {
         | 
| 2213 | 
            +
                  "content": "|<EXTRA_TOKENS_273>|",
         | 
| 2214 | 
            +
                  "lstrip": false,
         | 
| 2215 | 
            +
                  "normalized": false,
         | 
| 2216 | 
            +
                  "rstrip": false,
         | 
| 2217 | 
            +
                  "single_word": false,
         | 
| 2218 | 
            +
                  "special": true
         | 
| 2219 | 
            +
                },
         | 
| 2220 | 
            +
                "151920": {
         | 
| 2221 | 
            +
                  "content": "|<EXTRA_TOKENS_274>|",
         | 
| 2222 | 
            +
                  "lstrip": false,
         | 
| 2223 | 
            +
                  "normalized": false,
         | 
| 2224 | 
            +
                  "rstrip": false,
         | 
| 2225 | 
            +
                  "single_word": false,
         | 
| 2226 | 
            +
                  "special": true
         | 
| 2227 | 
            +
                },
         | 
| 2228 | 
            +
                "151921": {
         | 
| 2229 | 
            +
                  "content": "|<EXTRA_TOKENS_275>|",
         | 
| 2230 | 
            +
                  "lstrip": false,
         | 
| 2231 | 
            +
                  "normalized": false,
         | 
| 2232 | 
            +
                  "rstrip": false,
         | 
| 2233 | 
            +
                  "single_word": false,
         | 
| 2234 | 
            +
                  "special": true
         | 
| 2235 | 
            +
                },
         | 
| 2236 | 
            +
                "151922": {
         | 
| 2237 | 
            +
                  "content": "|<EXTRA_TOKENS_276>|",
         | 
| 2238 | 
            +
                  "lstrip": false,
         | 
| 2239 | 
            +
                  "normalized": false,
         | 
| 2240 | 
            +
                  "rstrip": false,
         | 
| 2241 | 
            +
                  "single_word": false,
         | 
| 2242 | 
            +
                  "special": true
         | 
| 2243 | 
            +
                },
         | 
| 2244 | 
            +
                "151923": {
         | 
| 2245 | 
            +
                  "content": "|<EXTRA_TOKENS_277>|",
         | 
| 2246 | 
            +
                  "lstrip": false,
         | 
| 2247 | 
            +
                  "normalized": false,
         | 
| 2248 | 
            +
                  "rstrip": false,
         | 
| 2249 | 
            +
                  "single_word": false,
         | 
| 2250 | 
            +
                  "special": true
         | 
| 2251 | 
            +
                },
         | 
| 2252 | 
            +
                "151924": {
         | 
| 2253 | 
            +
                  "content": "|<EXTRA_TOKENS_278>|",
         | 
| 2254 | 
            +
                  "lstrip": false,
         | 
| 2255 | 
            +
                  "normalized": false,
         | 
| 2256 | 
            +
                  "rstrip": false,
         | 
| 2257 | 
            +
                  "single_word": false,
         | 
| 2258 | 
            +
                  "special": true
         | 
| 2259 | 
            +
                },
         | 
| 2260 | 
            +
                "151925": {
         | 
| 2261 | 
            +
                  "content": "|<EXTRA_TOKENS_279>|",
         | 
| 2262 | 
            +
                  "lstrip": false,
         | 
| 2263 | 
            +
                  "normalized": false,
         | 
| 2264 | 
            +
                  "rstrip": false,
         | 
| 2265 | 
            +
                  "single_word": false,
         | 
| 2266 | 
            +
                  "special": true
         | 
| 2267 | 
            +
                },
         | 
| 2268 | 
            +
                "151926": {
         | 
| 2269 | 
            +
                  "content": "|<EXTRA_TOKENS_280>|",
         | 
| 2270 | 
            +
                  "lstrip": false,
         | 
| 2271 | 
            +
                  "normalized": false,
         | 
| 2272 | 
            +
                  "rstrip": false,
         | 
| 2273 | 
            +
                  "single_word": false,
         | 
| 2274 | 
            +
                  "special": true
         | 
| 2275 | 
            +
                },
         | 
| 2276 | 
            +
                "151927": {
         | 
| 2277 | 
            +
                  "content": "|<EXTRA_TOKENS_281>|",
         | 
| 2278 | 
            +
                  "lstrip": false,
         | 
| 2279 | 
            +
                  "normalized": false,
         | 
| 2280 | 
            +
                  "rstrip": false,
         | 
| 2281 | 
            +
                  "single_word": false,
         | 
| 2282 | 
            +
                  "special": true
         | 
| 2283 | 
            +
                },
         | 
| 2284 | 
            +
                "151928": {
         | 
| 2285 | 
            +
                  "content": "|<EXTRA_TOKENS_282>|",
         | 
| 2286 | 
            +
                  "lstrip": false,
         | 
| 2287 | 
            +
                  "normalized": false,
         | 
| 2288 | 
            +
                  "rstrip": false,
         | 
| 2289 | 
            +
                  "single_word": false,
         | 
| 2290 | 
            +
                  "special": true
         | 
| 2291 | 
            +
                },
         | 
| 2292 | 
            +
                "151929": {
         | 
| 2293 | 
            +
                  "content": "|<EXTRA_TOKENS_283>|",
         | 
| 2294 | 
            +
                  "lstrip": false,
         | 
| 2295 | 
            +
                  "normalized": false,
         | 
| 2296 | 
            +
                  "rstrip": false,
         | 
| 2297 | 
            +
                  "single_word": false,
         | 
| 2298 | 
            +
                  "special": true
         | 
| 2299 | 
            +
                },
         | 
| 2300 | 
            +
                "151930": {
         | 
| 2301 | 
            +
                  "content": "|<EXTRA_TOKENS_284>|",
         | 
| 2302 | 
            +
                  "lstrip": false,
         | 
| 2303 | 
            +
                  "normalized": false,
         | 
| 2304 | 
            +
                  "rstrip": false,
         | 
| 2305 | 
            +
                  "single_word": false,
         | 
| 2306 | 
            +
                  "special": true
         | 
| 2307 | 
            +
                },
         | 
| 2308 | 
            +
                "151931": {
         | 
| 2309 | 
            +
                  "content": "|<EXTRA_TOKENS_285>|",
         | 
| 2310 | 
            +
                  "lstrip": false,
         | 
| 2311 | 
            +
                  "normalized": false,
         | 
| 2312 | 
            +
                  "rstrip": false,
         | 
| 2313 | 
            +
                  "single_word": false,
         | 
| 2314 | 
            +
                  "special": true
         | 
| 2315 | 
            +
                },
         | 
| 2316 | 
            +
                "151932": {
         | 
| 2317 | 
            +
                  "content": "|<EXTRA_TOKENS_286>|",
         | 
| 2318 | 
            +
                  "lstrip": false,
         | 
| 2319 | 
            +
                  "normalized": false,
         | 
| 2320 | 
            +
                  "rstrip": false,
         | 
| 2321 | 
            +
                  "single_word": false,
         | 
| 2322 | 
            +
                  "special": true
         | 
| 2323 | 
            +
                },
         | 
| 2324 | 
            +
                "151933": {
         | 
| 2325 | 
            +
                  "content": "|<EXTRA_TOKENS_287>|",
         | 
| 2326 | 
            +
                  "lstrip": false,
         | 
| 2327 | 
            +
                  "normalized": false,
         | 
| 2328 | 
            +
                  "rstrip": false,
         | 
| 2329 | 
            +
                  "single_word": false,
         | 
| 2330 | 
            +
                  "special": true
         | 
| 2331 | 
            +
                },
         | 
| 2332 | 
            +
                "151934": {
         | 
| 2333 | 
            +
                  "content": "|<EXTRA_TOKENS_288>|",
         | 
| 2334 | 
            +
                  "lstrip": false,
         | 
| 2335 | 
            +
                  "normalized": false,
         | 
| 2336 | 
            +
                  "rstrip": false,
         | 
| 2337 | 
            +
                  "single_word": false,
         | 
| 2338 | 
            +
                  "special": true
         | 
| 2339 | 
            +
                },
         | 
| 2340 | 
            +
                "151935": {
         | 
| 2341 | 
            +
                  "content": "|<EXTRA_TOKENS_289>|",
         | 
| 2342 | 
            +
                  "lstrip": false,
         | 
| 2343 | 
            +
                  "normalized": false,
         | 
| 2344 | 
            +
                  "rstrip": false,
         | 
| 2345 | 
            +
                  "single_word": false,
         | 
| 2346 | 
            +
                  "special": true
         | 
| 2347 | 
            +
                },
         | 
| 2348 | 
            +
                "151936": {
         | 
| 2349 | 
            +
                  "content": "|<EXTRA_TOKENS_290>|",
         | 
| 2350 | 
            +
                  "lstrip": false,
         | 
| 2351 | 
            +
                  "normalized": false,
         | 
| 2352 | 
            +
                  "rstrip": false,
         | 
| 2353 | 
            +
                  "single_word": false,
         | 
| 2354 | 
            +
                  "special": true
         | 
| 2355 | 
            +
                },
         | 
| 2356 | 
            +
                "151937": {
         | 
| 2357 | 
            +
                  "content": "|<EXTRA_TOKENS_291>|",
         | 
| 2358 | 
            +
                  "lstrip": false,
         | 
| 2359 | 
            +
                  "normalized": false,
         | 
| 2360 | 
            +
                  "rstrip": false,
         | 
| 2361 | 
            +
                  "single_word": false,
         | 
| 2362 | 
            +
                  "special": true
         | 
| 2363 | 
            +
                },
         | 
| 2364 | 
            +
                "151938": {
         | 
| 2365 | 
            +
                  "content": "|<EXTRA_TOKENS_292>|",
         | 
| 2366 | 
            +
                  "lstrip": false,
         | 
| 2367 | 
            +
                  "normalized": false,
         | 
| 2368 | 
            +
                  "rstrip": false,
         | 
| 2369 | 
            +
                  "single_word": false,
         | 
| 2370 | 
            +
                  "special": true
         | 
| 2371 | 
            +
                },
         | 
| 2372 | 
            +
                "151939": {
         | 
| 2373 | 
            +
                  "content": "|<EXTRA_TOKENS_293>|",
         | 
| 2374 | 
            +
                  "lstrip": false,
         | 
| 2375 | 
            +
                  "normalized": false,
         | 
| 2376 | 
            +
                  "rstrip": false,
         | 
| 2377 | 
            +
                  "single_word": false,
         | 
| 2378 | 
            +
                  "special": true
         | 
| 2379 | 
            +
                },
         | 
| 2380 | 
            +
                "151940": {
         | 
| 2381 | 
            +
                  "content": "|<EXTRA_TOKENS_294>|",
         | 
| 2382 | 
            +
                  "lstrip": false,
         | 
| 2383 | 
            +
                  "normalized": false,
         | 
| 2384 | 
            +
                  "rstrip": false,
         | 
| 2385 | 
            +
                  "single_word": false,
         | 
| 2386 | 
            +
                  "special": true
         | 
| 2387 | 
            +
                },
         | 
| 2388 | 
            +
                "151941": {
         | 
| 2389 | 
            +
                  "content": "|<EXTRA_TOKENS_295>|",
         | 
| 2390 | 
            +
                  "lstrip": false,
         | 
| 2391 | 
            +
                  "normalized": false,
         | 
| 2392 | 
            +
                  "rstrip": false,
         | 
| 2393 | 
            +
                  "single_word": false,
         | 
| 2394 | 
            +
                  "special": true
         | 
| 2395 | 
            +
                },
         | 
| 2396 | 
            +
                "151942": {
         | 
| 2397 | 
            +
                  "content": "|<EXTRA_TOKENS_296>|",
         | 
| 2398 | 
            +
                  "lstrip": false,
         | 
| 2399 | 
            +
                  "normalized": false,
         | 
| 2400 | 
            +
                  "rstrip": false,
         | 
| 2401 | 
            +
                  "single_word": false,
         | 
| 2402 | 
            +
                  "special": true
         | 
| 2403 | 
            +
                },
         | 
| 2404 | 
            +
                "151943": {
         | 
| 2405 | 
            +
                  "content": "|<EXTRA_TOKENS_297>|",
         | 
| 2406 | 
            +
                  "lstrip": false,
         | 
| 2407 | 
            +
                  "normalized": false,
         | 
| 2408 | 
            +
                  "rstrip": false,
         | 
| 2409 | 
            +
                  "single_word": false,
         | 
| 2410 | 
            +
                  "special": true
         | 
| 2411 | 
            +
                },
         | 
| 2412 | 
            +
                "151944": {
         | 
| 2413 | 
            +
                  "content": "|<EXTRA_TOKENS_298>|",
         | 
| 2414 | 
            +
                  "lstrip": false,
         | 
| 2415 | 
            +
                  "normalized": false,
         | 
| 2416 | 
            +
                  "rstrip": false,
         | 
| 2417 | 
            +
                  "single_word": false,
         | 
| 2418 | 
            +
                  "special": true
         | 
| 2419 | 
            +
                },
         | 
| 2420 | 
            +
                "151945": {
         | 
| 2421 | 
            +
                  "content": "|<EXTRA_TOKENS_299>|",
         | 
| 2422 | 
            +
                  "lstrip": false,
         | 
| 2423 | 
            +
                  "normalized": false,
         | 
| 2424 | 
            +
                  "rstrip": false,
         | 
| 2425 | 
            +
                  "single_word": false,
         | 
| 2426 | 
            +
                  "special": true
         | 
| 2427 | 
            +
                },
         | 
| 2428 | 
            +
                "151946": {
         | 
| 2429 | 
            +
                  "content": "|<EXTRA_TOKENS_300>|",
         | 
| 2430 | 
            +
                  "lstrip": false,
         | 
| 2431 | 
            +
                  "normalized": false,
         | 
| 2432 | 
            +
                  "rstrip": false,
         | 
| 2433 | 
            +
                  "single_word": false,
         | 
| 2434 | 
            +
                  "special": true
         | 
| 2435 | 
            +
                },
         | 
| 2436 | 
            +
                "151947": {
         | 
| 2437 | 
            +
                  "content": "|<EXTRA_TOKENS_301>|",
         | 
| 2438 | 
            +
                  "lstrip": false,
         | 
| 2439 | 
            +
                  "normalized": false,
         | 
| 2440 | 
            +
                  "rstrip": false,
         | 
| 2441 | 
            +
                  "single_word": false,
         | 
| 2442 | 
            +
                  "special": true
         | 
| 2443 | 
            +
                },
         | 
| 2444 | 
            +
                "151948": {
         | 
| 2445 | 
            +
                  "content": "|<EXTRA_TOKENS_302>|",
         | 
| 2446 | 
            +
                  "lstrip": false,
         | 
| 2447 | 
            +
                  "normalized": false,
         | 
| 2448 | 
            +
                  "rstrip": false,
         | 
| 2449 | 
            +
                  "single_word": false,
         | 
| 2450 | 
            +
                  "special": true
         | 
| 2451 | 
            +
                },
         | 
| 2452 | 
            +
                "151949": {
         | 
| 2453 | 
            +
                  "content": "|<EXTRA_TOKENS_303>|",
         | 
| 2454 | 
            +
                  "lstrip": false,
         | 
| 2455 | 
            +
                  "normalized": false,
         | 
| 2456 | 
            +
                  "rstrip": false,
         | 
| 2457 | 
            +
                  "single_word": false,
         | 
| 2458 | 
            +
                  "special": true
         | 
| 2459 | 
            +
                },
         | 
| 2460 | 
            +
                "151950": {
         | 
| 2461 | 
            +
                  "content": "|<EXTRA_TOKENS_304>|",
         | 
| 2462 | 
            +
                  "lstrip": false,
         | 
| 2463 | 
            +
                  "normalized": false,
         | 
| 2464 | 
            +
                  "rstrip": false,
         | 
| 2465 | 
            +
                  "single_word": false,
         | 
| 2466 | 
            +
                  "special": true
         | 
| 2467 | 
            +
                },
         | 
| 2468 | 
            +
                "151951": {
         | 
| 2469 | 
            +
                  "content": "|<EXTRA_TOKENS_305>|",
         | 
| 2470 | 
            +
                  "lstrip": false,
         | 
| 2471 | 
            +
                  "normalized": false,
         | 
| 2472 | 
            +
                  "rstrip": false,
         | 
| 2473 | 
            +
                  "single_word": false,
         | 
| 2474 | 
            +
                  "special": true
         | 
| 2475 | 
            +
                },
         | 
| 2476 | 
            +
                "151952": {
         | 
| 2477 | 
            +
                  "content": "|<EXTRA_TOKENS_306>|",
         | 
| 2478 | 
            +
                  "lstrip": false,
         | 
| 2479 | 
            +
                  "normalized": false,
         | 
| 2480 | 
            +
                  "rstrip": false,
         | 
| 2481 | 
            +
                  "single_word": false,
         | 
| 2482 | 
            +
                  "special": true
         | 
| 2483 | 
            +
                },
         | 
| 2484 | 
            +
                "151953": {
         | 
| 2485 | 
            +
                  "content": "|<EXTRA_TOKENS_307>|",
         | 
| 2486 | 
            +
                  "lstrip": false,
         | 
| 2487 | 
            +
                  "normalized": false,
         | 
| 2488 | 
            +
                  "rstrip": false,
         | 
| 2489 | 
            +
                  "single_word": false,
         | 
| 2490 | 
            +
                  "special": true
         | 
| 2491 | 
            +
                },
         | 
| 2492 | 
            +
                "151954": {
         | 
| 2493 | 
            +
                  "content": "|<EXTRA_TOKENS_308>|",
         | 
| 2494 | 
            +
                  "lstrip": false,
         | 
| 2495 | 
            +
                  "normalized": false,
         | 
| 2496 | 
            +
                  "rstrip": false,
         | 
| 2497 | 
            +
                  "single_word": false,
         | 
| 2498 | 
            +
                  "special": true
         | 
| 2499 | 
            +
                },
         | 
| 2500 | 
            +
                "151955": {
         | 
| 2501 | 
            +
                  "content": "|<EXTRA_TOKENS_309>|",
         | 
| 2502 | 
            +
                  "lstrip": false,
         | 
| 2503 | 
            +
                  "normalized": false,
         | 
| 2504 | 
            +
                  "rstrip": false,
         | 
| 2505 | 
            +
                  "single_word": false,
         | 
| 2506 | 
            +
                  "special": true
         | 
| 2507 | 
            +
                },
         | 
| 2508 | 
            +
                "151956": {
         | 
| 2509 | 
            +
                  "content": "|<EXTRA_TOKENS_310>|",
         | 
| 2510 | 
            +
                  "lstrip": false,
         | 
| 2511 | 
            +
                  "normalized": false,
         | 
| 2512 | 
            +
                  "rstrip": false,
         | 
| 2513 | 
            +
                  "single_word": false,
         | 
| 2514 | 
            +
                  "special": true
         | 
| 2515 | 
            +
                },
         | 
| 2516 | 
            +
                "151957": {
         | 
| 2517 | 
            +
                  "content": "|<EXTRA_TOKENS_311>|",
         | 
| 2518 | 
            +
                  "lstrip": false,
         | 
| 2519 | 
            +
                  "normalized": false,
         | 
| 2520 | 
            +
                  "rstrip": false,
         | 
| 2521 | 
            +
                  "single_word": false,
         | 
| 2522 | 
            +
                  "special": true
         | 
| 2523 | 
            +
                },
         | 
| 2524 | 
            +
                "151958": {
         | 
| 2525 | 
            +
                  "content": "|<EXTRA_TOKENS_312>|",
         | 
| 2526 | 
            +
                  "lstrip": false,
         | 
| 2527 | 
            +
                  "normalized": false,
         | 
| 2528 | 
            +
                  "rstrip": false,
         | 
| 2529 | 
            +
                  "single_word": false,
         | 
| 2530 | 
            +
                  "special": true
         | 
| 2531 | 
            +
                },
         | 
| 2532 | 
            +
                "151959": {
         | 
| 2533 | 
            +
                  "content": "|<EXTRA_TOKENS_313>|",
         | 
| 2534 | 
            +
                  "lstrip": false,
         | 
| 2535 | 
            +
                  "normalized": false,
         | 
| 2536 | 
            +
                  "rstrip": false,
         | 
| 2537 | 
            +
                  "single_word": false,
         | 
| 2538 | 
            +
                  "special": true
         | 
| 2539 | 
            +
                },
         | 
| 2540 | 
            +
                "151960": {
         | 
| 2541 | 
            +
                  "content": "|<EXTRA_TOKENS_314>|",
         | 
| 2542 | 
            +
                  "lstrip": false,
         | 
| 2543 | 
            +
                  "normalized": false,
         | 
| 2544 | 
            +
                  "rstrip": false,
         | 
| 2545 | 
            +
                  "single_word": false,
         | 
| 2546 | 
            +
                  "special": true
         | 
| 2547 | 
            +
                },
         | 
| 2548 | 
            +
                "151961": {
         | 
| 2549 | 
            +
                  "content": "|<EXTRA_TOKENS_315>|",
         | 
| 2550 | 
            +
                  "lstrip": false,
         | 
| 2551 | 
            +
                  "normalized": false,
         | 
| 2552 | 
            +
                  "rstrip": false,
         | 
| 2553 | 
            +
                  "single_word": false,
         | 
| 2554 | 
            +
                  "special": true
         | 
| 2555 | 
            +
                },
         | 
| 2556 | 
            +
                "151962": {
         | 
| 2557 | 
            +
                  "content": "|<EXTRA_TOKENS_316>|",
         | 
| 2558 | 
            +
                  "lstrip": false,
         | 
| 2559 | 
            +
                  "normalized": false,
         | 
| 2560 | 
            +
                  "rstrip": false,
         | 
| 2561 | 
            +
                  "single_word": false,
         | 
| 2562 | 
            +
                  "special": true
         | 
| 2563 | 
            +
                },
         | 
| 2564 | 
            +
                "151963": {
         | 
| 2565 | 
            +
                  "content": "|<EXTRA_TOKENS_317>|",
         | 
| 2566 | 
            +
                  "lstrip": false,
         | 
| 2567 | 
            +
                  "normalized": false,
         | 
| 2568 | 
            +
                  "rstrip": false,
         | 
| 2569 | 
            +
                  "single_word": false,
         | 
| 2570 | 
            +
                  "special": true
         | 
| 2571 | 
            +
                },
         | 
| 2572 | 
            +
                "151964": {
         | 
| 2573 | 
            +
                  "content": "|<EXTRA_TOKENS_318>|",
         | 
| 2574 | 
            +
                  "lstrip": false,
         | 
| 2575 | 
            +
                  "normalized": false,
         | 
| 2576 | 
            +
                  "rstrip": false,
         | 
| 2577 | 
            +
                  "single_word": false,
         | 
| 2578 | 
            +
                  "special": true
         | 
| 2579 | 
            +
                },
         | 
| 2580 | 
            +
                "151965": {
         | 
| 2581 | 
            +
                  "content": "|<EXTRA_TOKENS_319>|",
         | 
| 2582 | 
            +
                  "lstrip": false,
         | 
| 2583 | 
            +
                  "normalized": false,
         | 
| 2584 | 
            +
                  "rstrip": false,
         | 
| 2585 | 
            +
                  "single_word": false,
         | 
| 2586 | 
            +
                  "special": true
         | 
| 2587 | 
            +
                },
         | 
| 2588 | 
            +
                "151966": {
         | 
| 2589 | 
            +
                  "content": "|<EXTRA_TOKENS_320>|",
         | 
| 2590 | 
            +
                  "lstrip": false,
         | 
| 2591 | 
            +
                  "normalized": false,
         | 
| 2592 | 
            +
                  "rstrip": false,
         | 
| 2593 | 
            +
                  "single_word": false,
         | 
| 2594 | 
            +
                  "special": true
         | 
| 2595 | 
            +
                },
         | 
| 2596 | 
            +
                "151967": {
         | 
| 2597 | 
            +
                  "content": "|<EXTRA_TOKENS_321>|",
         | 
| 2598 | 
            +
                  "lstrip": false,
         | 
| 2599 | 
            +
                  "normalized": false,
         | 
| 2600 | 
            +
                  "rstrip": false,
         | 
| 2601 | 
            +
                  "single_word": false,
         | 
| 2602 | 
            +
                  "special": true
         | 
| 2603 | 
            +
                },
         | 
| 2604 | 
            +
                "151968": {
         | 
| 2605 | 
            +
                  "content": "|<EXTRA_TOKENS_322>|",
         | 
| 2606 | 
            +
                  "lstrip": false,
         | 
| 2607 | 
            +
                  "normalized": false,
         | 
| 2608 | 
            +
                  "rstrip": false,
         | 
| 2609 | 
            +
                  "single_word": false,
         | 
| 2610 | 
            +
                  "special": true
         | 
| 2611 | 
            +
                },
         | 
| 2612 | 
            +
                "151969": {
         | 
| 2613 | 
            +
                  "content": "|<EXTRA_TOKENS_323>|",
         | 
| 2614 | 
            +
                  "lstrip": false,
         | 
| 2615 | 
            +
                  "normalized": false,
         | 
| 2616 | 
            +
                  "rstrip": false,
         | 
| 2617 | 
            +
                  "single_word": false,
         | 
| 2618 | 
            +
                  "special": true
         | 
| 2619 | 
            +
                },
         | 
| 2620 | 
            +
                "151970": {
         | 
| 2621 | 
            +
                  "content": "|<EXTRA_TOKENS_324>|",
         | 
| 2622 | 
            +
                  "lstrip": false,
         | 
| 2623 | 
            +
                  "normalized": false,
         | 
| 2624 | 
            +
                  "rstrip": false,
         | 
| 2625 | 
            +
                  "single_word": false,
         | 
| 2626 | 
            +
                  "special": true
         | 
| 2627 | 
            +
                },
         | 
| 2628 | 
            +
                "151971": {
         | 
| 2629 | 
            +
                  "content": "|<EXTRA_TOKENS_325>|",
         | 
| 2630 | 
            +
                  "lstrip": false,
         | 
| 2631 | 
            +
                  "normalized": false,
         | 
| 2632 | 
            +
                  "rstrip": false,
         | 
| 2633 | 
            +
                  "single_word": false,
         | 
| 2634 | 
            +
                  "special": true
         | 
| 2635 | 
            +
                },
         | 
| 2636 | 
            +
                "151972": {
         | 
| 2637 | 
            +
                  "content": "|<EXTRA_TOKENS_326>|",
         | 
| 2638 | 
            +
                  "lstrip": false,
         | 
| 2639 | 
            +
                  "normalized": false,
         | 
| 2640 | 
            +
                  "rstrip": false,
         | 
| 2641 | 
            +
                  "single_word": false,
         | 
| 2642 | 
            +
                  "special": true
         | 
| 2643 | 
            +
                },
         | 
| 2644 | 
            +
                "151973": {
         | 
| 2645 | 
            +
                  "content": "|<EXTRA_TOKENS_327>|",
         | 
| 2646 | 
            +
                  "lstrip": false,
         | 
| 2647 | 
            +
                  "normalized": false,
         | 
| 2648 | 
            +
                  "rstrip": false,
         | 
| 2649 | 
            +
                  "single_word": false,
         | 
| 2650 | 
            +
                  "special": true
         | 
| 2651 | 
            +
                },
         | 
| 2652 | 
            +
                "151974": {
         | 
| 2653 | 
            +
                  "content": "|<EXTRA_TOKENS_328>|",
         | 
| 2654 | 
            +
                  "lstrip": false,
         | 
| 2655 | 
            +
                  "normalized": false,
         | 
| 2656 | 
            +
                  "rstrip": false,
         | 
| 2657 | 
            +
                  "single_word": false,
         | 
| 2658 | 
            +
                  "special": true
         | 
| 2659 | 
            +
                },
         | 
| 2660 | 
            +
                "151975": {
         | 
| 2661 | 
            +
                  "content": "|<EXTRA_TOKENS_329>|",
         | 
| 2662 | 
            +
                  "lstrip": false,
         | 
| 2663 | 
            +
                  "normalized": false,
         | 
| 2664 | 
            +
                  "rstrip": false,
         | 
| 2665 | 
            +
                  "single_word": false,
         | 
| 2666 | 
            +
                  "special": true
         | 
| 2667 | 
            +
                },
         | 
| 2668 | 
            +
                "151976": {
         | 
| 2669 | 
            +
                  "content": "|<EXTRA_TOKENS_330>|",
         | 
| 2670 | 
            +
                  "lstrip": false,
         | 
| 2671 | 
            +
                  "normalized": false,
         | 
| 2672 | 
            +
                  "rstrip": false,
         | 
| 2673 | 
            +
                  "single_word": false,
         | 
| 2674 | 
            +
                  "special": true
         | 
| 2675 | 
            +
                },
         | 
| 2676 | 
            +
                "151977": {
         | 
| 2677 | 
            +
                  "content": "|<EXTRA_TOKENS_331>|",
         | 
| 2678 | 
            +
                  "lstrip": false,
         | 
| 2679 | 
            +
                  "normalized": false,
         | 
| 2680 | 
            +
                  "rstrip": false,
         | 
| 2681 | 
            +
                  "single_word": false,
         | 
| 2682 | 
            +
                  "special": true
         | 
| 2683 | 
            +
                },
         | 
| 2684 | 
            +
                "151978": {
         | 
| 2685 | 
            +
                  "content": "|<EXTRA_TOKENS_332>|",
         | 
| 2686 | 
            +
                  "lstrip": false,
         | 
| 2687 | 
            +
                  "normalized": false,
         | 
| 2688 | 
            +
                  "rstrip": false,
         | 
| 2689 | 
            +
                  "single_word": false,
         | 
| 2690 | 
            +
                  "special": true
         | 
| 2691 | 
            +
                },
         | 
| 2692 | 
            +
                "151979": {
         | 
| 2693 | 
            +
                  "content": "|<EXTRA_TOKENS_333>|",
         | 
| 2694 | 
            +
                  "lstrip": false,
         | 
| 2695 | 
            +
                  "normalized": false,
         | 
| 2696 | 
            +
                  "rstrip": false,
         | 
| 2697 | 
            +
                  "single_word": false,
         | 
| 2698 | 
            +
                  "special": true
         | 
| 2699 | 
            +
                },
         | 
| 2700 | 
            +
                "151980": {
         | 
| 2701 | 
            +
                  "content": "|<EXTRA_TOKENS_334>|",
         | 
| 2702 | 
            +
                  "lstrip": false,
         | 
| 2703 | 
            +
                  "normalized": false,
         | 
| 2704 | 
            +
                  "rstrip": false,
         | 
| 2705 | 
            +
                  "single_word": false,
         | 
| 2706 | 
            +
                  "special": true
         | 
| 2707 | 
            +
                },
         | 
| 2708 | 
            +
                "151981": {
         | 
| 2709 | 
            +
                  "content": "|<EXTRA_TOKENS_335>|",
         | 
| 2710 | 
            +
                  "lstrip": false,
         | 
| 2711 | 
            +
                  "normalized": false,
         | 
| 2712 | 
            +
                  "rstrip": false,
         | 
| 2713 | 
            +
                  "single_word": false,
         | 
| 2714 | 
            +
                  "special": true
         | 
| 2715 | 
            +
                },
         | 
| 2716 | 
            +
                "151982": {
         | 
| 2717 | 
            +
                  "content": "|<EXTRA_TOKENS_336>|",
         | 
| 2718 | 
            +
                  "lstrip": false,
         | 
| 2719 | 
            +
                  "normalized": false,
         | 
| 2720 | 
            +
                  "rstrip": false,
         | 
| 2721 | 
            +
                  "single_word": false,
         | 
| 2722 | 
            +
                  "special": true
         | 
| 2723 | 
            +
                },
         | 
| 2724 | 
            +
                "151983": {
         | 
| 2725 | 
            +
                  "content": "|<EXTRA_TOKENS_337>|",
         | 
| 2726 | 
            +
                  "lstrip": false,
         | 
| 2727 | 
            +
                  "normalized": false,
         | 
| 2728 | 
            +
                  "rstrip": false,
         | 
| 2729 | 
            +
                  "single_word": false,
         | 
| 2730 | 
            +
                  "special": true
         | 
| 2731 | 
            +
                },
         | 
| 2732 | 
            +
                "151984": {
         | 
| 2733 | 
            +
                  "content": "|<EXTRA_TOKENS_338>|",
         | 
| 2734 | 
            +
                  "lstrip": false,
         | 
| 2735 | 
            +
                  "normalized": false,
         | 
| 2736 | 
            +
                  "rstrip": false,
         | 
| 2737 | 
            +
                  "single_word": false,
         | 
| 2738 | 
            +
                  "special": true
         | 
| 2739 | 
            +
                },
         | 
| 2740 | 
            +
                "151985": {
         | 
| 2741 | 
            +
                  "content": "|<EXTRA_TOKENS_339>|",
         | 
| 2742 | 
            +
                  "lstrip": false,
         | 
| 2743 | 
            +
                  "normalized": false,
         | 
| 2744 | 
            +
                  "rstrip": false,
         | 
| 2745 | 
            +
                  "single_word": false,
         | 
| 2746 | 
            +
                  "special": true
         | 
| 2747 | 
            +
                },
         | 
| 2748 | 
            +
                "151986": {
         | 
| 2749 | 
            +
                  "content": "|<EXTRA_TOKENS_340>|",
         | 
| 2750 | 
            +
                  "lstrip": false,
         | 
| 2751 | 
            +
                  "normalized": false,
         | 
| 2752 | 
            +
                  "rstrip": false,
         | 
| 2753 | 
            +
                  "single_word": false,
         | 
| 2754 | 
            +
                  "special": true
         | 
| 2755 | 
            +
                },
         | 
| 2756 | 
            +
                "151987": {
         | 
| 2757 | 
            +
                  "content": "|<EXTRA_TOKENS_341>|",
         | 
| 2758 | 
            +
                  "lstrip": false,
         | 
| 2759 | 
            +
                  "normalized": false,
         | 
| 2760 | 
            +
                  "rstrip": false,
         | 
| 2761 | 
            +
                  "single_word": false,
         | 
| 2762 | 
            +
                  "special": true
         | 
| 2763 | 
            +
                },
         | 
| 2764 | 
            +
                "151988": {
         | 
| 2765 | 
            +
                  "content": "|<EXTRA_TOKENS_342>|",
         | 
| 2766 | 
            +
                  "lstrip": false,
         | 
| 2767 | 
            +
                  "normalized": false,
         | 
| 2768 | 
            +
                  "rstrip": false,
         | 
| 2769 | 
            +
                  "single_word": false,
         | 
| 2770 | 
            +
                  "special": true
         | 
| 2771 | 
            +
                },
         | 
| 2772 | 
            +
                "151989": {
         | 
| 2773 | 
            +
                  "content": "|<EXTRA_TOKENS_343>|",
         | 
| 2774 | 
            +
                  "lstrip": false,
         | 
| 2775 | 
            +
                  "normalized": false,
         | 
| 2776 | 
            +
                  "rstrip": false,
         | 
| 2777 | 
            +
                  "single_word": false,
         | 
| 2778 | 
            +
                  "special": true
         | 
| 2779 | 
            +
                },
         | 
| 2780 | 
            +
                "151990": {
         | 
| 2781 | 
            +
                  "content": "|<EXTRA_TOKENS_344>|",
         | 
| 2782 | 
            +
                  "lstrip": false,
         | 
| 2783 | 
            +
                  "normalized": false,
         | 
| 2784 | 
            +
                  "rstrip": false,
         | 
| 2785 | 
            +
                  "single_word": false,
         | 
| 2786 | 
            +
                  "special": true
         | 
| 2787 | 
            +
                },
         | 
| 2788 | 
            +
                "151991": {
         | 
| 2789 | 
            +
                  "content": "|<EXTRA_TOKENS_345>|",
         | 
| 2790 | 
            +
                  "lstrip": false,
         | 
| 2791 | 
            +
                  "normalized": false,
         | 
| 2792 | 
            +
                  "rstrip": false,
         | 
| 2793 | 
            +
                  "single_word": false,
         | 
| 2794 | 
            +
                  "special": true
         | 
| 2795 | 
            +
                },
         | 
| 2796 | 
            +
                "151992": {
         | 
| 2797 | 
            +
                  "content": "|<EXTRA_TOKENS_346>|",
         | 
| 2798 | 
            +
                  "lstrip": false,
         | 
| 2799 | 
            +
                  "normalized": false,
         | 
| 2800 | 
            +
                  "rstrip": false,
         | 
| 2801 | 
            +
                  "single_word": false,
         | 
| 2802 | 
            +
                  "special": true
         | 
| 2803 | 
            +
                },
         | 
| 2804 | 
            +
                "151993": {
         | 
| 2805 | 
            +
                  "content": "|<EXTRA_TOKENS_347>|",
         | 
| 2806 | 
            +
                  "lstrip": false,
         | 
| 2807 | 
            +
                  "normalized": false,
         | 
| 2808 | 
            +
                  "rstrip": false,
         | 
| 2809 | 
            +
                  "single_word": false,
         | 
| 2810 | 
            +
                  "special": true
         | 
| 2811 | 
            +
                },
         | 
| 2812 | 
            +
                "151994": {
         | 
| 2813 | 
            +
                  "content": "|<EXTRA_TOKENS_348>|",
         | 
| 2814 | 
            +
                  "lstrip": false,
         | 
| 2815 | 
            +
                  "normalized": false,
         | 
| 2816 | 
            +
                  "rstrip": false,
         | 
| 2817 | 
            +
                  "single_word": false,
         | 
| 2818 | 
            +
                  "special": true
         | 
| 2819 | 
            +
                },
         | 
| 2820 | 
            +
                "151995": {
         | 
| 2821 | 
            +
                  "content": "|<EXTRA_TOKENS_349>|",
         | 
| 2822 | 
            +
                  "lstrip": false,
         | 
| 2823 | 
            +
                  "normalized": false,
         | 
| 2824 | 
            +
                  "rstrip": false,
         | 
| 2825 | 
            +
                  "single_word": false,
         | 
| 2826 | 
            +
                  "special": true
         | 
| 2827 | 
            +
                },
         | 
| 2828 | 
            +
                "151996": {
         | 
| 2829 | 
            +
                  "content": "|<EXTRA_TOKENS_350>|",
         | 
| 2830 | 
            +
                  "lstrip": false,
         | 
| 2831 | 
            +
                  "normalized": false,
         | 
| 2832 | 
            +
                  "rstrip": false,
         | 
| 2833 | 
            +
                  "single_word": false,
         | 
| 2834 | 
            +
                  "special": true
         | 
| 2835 | 
            +
                },
         | 
| 2836 | 
            +
                "151997": {
         | 
| 2837 | 
            +
                  "content": "|<EXTRA_TOKENS_351>|",
         | 
| 2838 | 
            +
                  "lstrip": false,
         | 
| 2839 | 
            +
                  "normalized": false,
         | 
| 2840 | 
            +
                  "rstrip": false,
         | 
| 2841 | 
            +
                  "single_word": false,
         | 
| 2842 | 
            +
                  "special": true
         | 
| 2843 | 
            +
                },
         | 
| 2844 | 
            +
                "151998": {
         | 
| 2845 | 
            +
                  "content": "|<EXTRA_TOKENS_352>|",
         | 
| 2846 | 
            +
                  "lstrip": false,
         | 
| 2847 | 
            +
                  "normalized": false,
         | 
| 2848 | 
            +
                  "rstrip": false,
         | 
| 2849 | 
            +
                  "single_word": false,
         | 
| 2850 | 
            +
                  "special": true
         | 
| 2851 | 
            +
                },
         | 
| 2852 | 
            +
                "151999": {
         | 
| 2853 | 
            +
                  "content": "|<EXTRA_TOKENS_353>|",
         | 
| 2854 | 
            +
                  "lstrip": false,
         | 
| 2855 | 
            +
                  "normalized": false,
         | 
| 2856 | 
            +
                  "rstrip": false,
         | 
| 2857 | 
            +
                  "single_word": false,
         | 
| 2858 | 
            +
                  "special": true
         | 
| 2859 | 
            +
                },
         | 
| 2860 | 
            +
                "152000": {
         | 
| 2861 | 
            +
                  "content": "|<EXTRA_TOKENS_354>|",
         | 
| 2862 | 
            +
                  "lstrip": false,
         | 
| 2863 | 
            +
                  "normalized": false,
         | 
| 2864 | 
            +
                  "rstrip": false,
         | 
| 2865 | 
            +
                  "single_word": false,
         | 
| 2866 | 
            +
                  "special": true
         | 
| 2867 | 
            +
                },
         | 
| 2868 | 
            +
                "152001": {
         | 
| 2869 | 
            +
                  "content": "|<EXTRA_TOKENS_355>|",
         | 
| 2870 | 
            +
                  "lstrip": false,
         | 
| 2871 | 
            +
                  "normalized": false,
         | 
| 2872 | 
            +
                  "rstrip": false,
         | 
| 2873 | 
            +
                  "single_word": false,
         | 
| 2874 | 
            +
                  "special": true
         | 
| 2875 | 
            +
                },
         | 
| 2876 | 
            +
                "152002": {
         | 
| 2877 | 
            +
                  "content": "|<EXTRA_TOKENS_356>|",
         | 
| 2878 | 
            +
                  "lstrip": false,
         | 
| 2879 | 
            +
                  "normalized": false,
         | 
| 2880 | 
            +
                  "rstrip": false,
         | 
| 2881 | 
            +
                  "single_word": false,
         | 
| 2882 | 
            +
                  "special": true
         | 
| 2883 | 
            +
                },
         | 
| 2884 | 
            +
                "152003": {
         | 
| 2885 | 
            +
                  "content": "|<EXTRA_TOKENS_357>|",
         | 
| 2886 | 
            +
                  "lstrip": false,
         | 
| 2887 | 
            +
                  "normalized": false,
         | 
| 2888 | 
            +
                  "rstrip": false,
         | 
| 2889 | 
            +
                  "single_word": false,
         | 
| 2890 | 
            +
                  "special": true
         | 
| 2891 | 
            +
                },
         | 
| 2892 | 
            +
                "152004": {
         | 
| 2893 | 
            +
                  "content": "|<EXTRA_TOKENS_358>|",
         | 
| 2894 | 
            +
                  "lstrip": false,
         | 
| 2895 | 
            +
                  "normalized": false,
         | 
| 2896 | 
            +
                  "rstrip": false,
         | 
| 2897 | 
            +
                  "single_word": false,
         | 
| 2898 | 
            +
                  "special": true
         | 
| 2899 | 
            +
                },
         | 
| 2900 | 
            +
                "152005": {
         | 
| 2901 | 
            +
                  "content": "|<EXTRA_TOKENS_359>|",
         | 
| 2902 | 
            +
                  "lstrip": false,
         | 
| 2903 | 
            +
                  "normalized": false,
         | 
| 2904 | 
            +
                  "rstrip": false,
         | 
| 2905 | 
            +
                  "single_word": false,
         | 
| 2906 | 
            +
                  "special": true
         | 
| 2907 | 
            +
                },
         | 
| 2908 | 
            +
                "152006": {
         | 
| 2909 | 
            +
                  "content": "|<EXTRA_TOKENS_360>|",
         | 
| 2910 | 
            +
                  "lstrip": false,
         | 
| 2911 | 
            +
                  "normalized": false,
         | 
| 2912 | 
            +
                  "rstrip": false,
         | 
| 2913 | 
            +
                  "single_word": false,
         | 
| 2914 | 
            +
                  "special": true
         | 
| 2915 | 
            +
                },
         | 
| 2916 | 
            +
                "152007": {
         | 
| 2917 | 
            +
                  "content": "|<EXTRA_TOKENS_361>|",
         | 
| 2918 | 
            +
                  "lstrip": false,
         | 
| 2919 | 
            +
                  "normalized": false,
         | 
| 2920 | 
            +
                  "rstrip": false,
         | 
| 2921 | 
            +
                  "single_word": false,
         | 
| 2922 | 
            +
                  "special": true
         | 
| 2923 | 
            +
                },
         | 
| 2924 | 
            +
                "152008": {
         | 
| 2925 | 
            +
                  "content": "|<EXTRA_TOKENS_362>|",
         | 
| 2926 | 
            +
                  "lstrip": false,
         | 
| 2927 | 
            +
                  "normalized": false,
         | 
| 2928 | 
            +
                  "rstrip": false,
         | 
| 2929 | 
            +
                  "single_word": false,
         | 
| 2930 | 
            +
                  "special": true
         | 
| 2931 | 
            +
                },
         | 
| 2932 | 
            +
                "152009": {
         | 
| 2933 | 
            +
                  "content": "|<EXTRA_TOKENS_363>|",
         | 
| 2934 | 
            +
                  "lstrip": false,
         | 
| 2935 | 
            +
                  "normalized": false,
         | 
| 2936 | 
            +
                  "rstrip": false,
         | 
| 2937 | 
            +
                  "single_word": false,
         | 
| 2938 | 
            +
                  "special": true
         | 
| 2939 | 
            +
                },
         | 
| 2940 | 
            +
                "152010": {
         | 
| 2941 | 
            +
                  "content": "|<EXTRA_TOKENS_364>|",
         | 
| 2942 | 
            +
                  "lstrip": false,
         | 
| 2943 | 
            +
                  "normalized": false,
         | 
| 2944 | 
            +
                  "rstrip": false,
         | 
| 2945 | 
            +
                  "single_word": false,
         | 
| 2946 | 
            +
                  "special": true
         | 
| 2947 | 
            +
                },
         | 
| 2948 | 
            +
                "152011": {
         | 
| 2949 | 
            +
                  "content": "|<EXTRA_TOKENS_365>|",
         | 
| 2950 | 
            +
                  "lstrip": false,
         | 
| 2951 | 
            +
                  "normalized": false,
         | 
| 2952 | 
            +
                  "rstrip": false,
         | 
| 2953 | 
            +
                  "single_word": false,
         | 
| 2954 | 
            +
                  "special": true
         | 
| 2955 | 
            +
                },
         | 
| 2956 | 
            +
                "152012": {
         | 
| 2957 | 
            +
                  "content": "|<EXTRA_TOKENS_366>|",
         | 
| 2958 | 
            +
                  "lstrip": false,
         | 
| 2959 | 
            +
                  "normalized": false,
         | 
| 2960 | 
            +
                  "rstrip": false,
         | 
| 2961 | 
            +
                  "single_word": false,
         | 
| 2962 | 
            +
                  "special": true
         | 
| 2963 | 
            +
                },
         | 
| 2964 | 
            +
                "152013": {
         | 
| 2965 | 
            +
                  "content": "|<EXTRA_TOKENS_367>|",
         | 
| 2966 | 
            +
                  "lstrip": false,
         | 
| 2967 | 
            +
                  "normalized": false,
         | 
| 2968 | 
            +
                  "rstrip": false,
         | 
| 2969 | 
            +
                  "single_word": false,
         | 
| 2970 | 
            +
                  "special": true
         | 
| 2971 | 
            +
                },
         | 
| 2972 | 
            +
                "152014": {
         | 
| 2973 | 
            +
                  "content": "|<EXTRA_TOKENS_368>|",
         | 
| 2974 | 
            +
                  "lstrip": false,
         | 
| 2975 | 
            +
                  "normalized": false,
         | 
| 2976 | 
            +
                  "rstrip": false,
         | 
| 2977 | 
            +
                  "single_word": false,
         | 
| 2978 | 
            +
                  "special": true
         | 
| 2979 | 
            +
                },
         | 
| 2980 | 
            +
                "152015": {
         | 
| 2981 | 
            +
                  "content": "|<EXTRA_TOKENS_369>|",
         | 
| 2982 | 
            +
                  "lstrip": false,
         | 
| 2983 | 
            +
                  "normalized": false,
         | 
| 2984 | 
            +
                  "rstrip": false,
         | 
| 2985 | 
            +
                  "single_word": false,
         | 
| 2986 | 
            +
                  "special": true
         | 
| 2987 | 
            +
                },
         | 
| 2988 | 
            +
                "152016": {
         | 
| 2989 | 
            +
                  "content": "|<EXTRA_TOKENS_370>|",
         | 
| 2990 | 
            +
                  "lstrip": false,
         | 
| 2991 | 
            +
                  "normalized": false,
         | 
| 2992 | 
            +
                  "rstrip": false,
         | 
| 2993 | 
            +
                  "single_word": false,
         | 
| 2994 | 
            +
                  "special": true
         | 
| 2995 | 
            +
                },
         | 
| 2996 | 
            +
                "152017": {
         | 
| 2997 | 
            +
                  "content": "|<EXTRA_TOKENS_371>|",
         | 
| 2998 | 
            +
                  "lstrip": false,
         | 
| 2999 | 
            +
                  "normalized": false,
         | 
| 3000 | 
            +
                  "rstrip": false,
         | 
| 3001 | 
            +
                  "single_word": false,
         | 
| 3002 | 
            +
                  "special": true
         | 
| 3003 | 
            +
                },
         | 
| 3004 | 
            +
                "152018": {
         | 
| 3005 | 
            +
                  "content": "|<EXTRA_TOKENS_372>|",
         | 
| 3006 | 
            +
                  "lstrip": false,
         | 
| 3007 | 
            +
                  "normalized": false,
         | 
| 3008 | 
            +
                  "rstrip": false,
         | 
| 3009 | 
            +
                  "single_word": false,
         | 
| 3010 | 
            +
                  "special": true
         | 
| 3011 | 
            +
                },
         | 
| 3012 | 
            +
                "152019": {
         | 
| 3013 | 
            +
                  "content": "|<EXTRA_TOKENS_373>|",
         | 
| 3014 | 
            +
                  "lstrip": false,
         | 
| 3015 | 
            +
                  "normalized": false,
         | 
| 3016 | 
            +
                  "rstrip": false,
         | 
| 3017 | 
            +
                  "single_word": false,
         | 
| 3018 | 
            +
                  "special": true
         | 
| 3019 | 
            +
                },
         | 
| 3020 | 
            +
                "152020": {
         | 
| 3021 | 
            +
                  "content": "|<EXTRA_TOKENS_374>|",
         | 
| 3022 | 
            +
                  "lstrip": false,
         | 
| 3023 | 
            +
                  "normalized": false,
         | 
| 3024 | 
            +
                  "rstrip": false,
         | 
| 3025 | 
            +
                  "single_word": false,
         | 
| 3026 | 
            +
                  "special": true
         | 
| 3027 | 
            +
                },
         | 
| 3028 | 
            +
                "152021": {
         | 
| 3029 | 
            +
                  "content": "|<EXTRA_TOKENS_375>|",
         | 
| 3030 | 
            +
                  "lstrip": false,
         | 
| 3031 | 
            +
                  "normalized": false,
         | 
| 3032 | 
            +
                  "rstrip": false,
         | 
| 3033 | 
            +
                  "single_word": false,
         | 
| 3034 | 
            +
                  "special": true
         | 
| 3035 | 
            +
                },
         | 
| 3036 | 
            +
                "152022": {
         | 
| 3037 | 
            +
                  "content": "|<EXTRA_TOKENS_376>|",
         | 
| 3038 | 
            +
                  "lstrip": false,
         | 
| 3039 | 
            +
                  "normalized": false,
         | 
| 3040 | 
            +
                  "rstrip": false,
         | 
| 3041 | 
            +
                  "single_word": false,
         | 
| 3042 | 
            +
                  "special": true
         | 
| 3043 | 
            +
                },
         | 
| 3044 | 
            +
                "152023": {
         | 
| 3045 | 
            +
                  "content": "|<EXTRA_TOKENS_377>|",
         | 
| 3046 | 
            +
                  "lstrip": false,
         | 
| 3047 | 
            +
                  "normalized": false,
         | 
| 3048 | 
            +
                  "rstrip": false,
         | 
| 3049 | 
            +
                  "single_word": false,
         | 
| 3050 | 
            +
                  "special": true
         | 
| 3051 | 
            +
                },
         | 
| 3052 | 
            +
                "152024": {
         | 
| 3053 | 
            +
                  "content": "|<EXTRA_TOKENS_378>|",
         | 
| 3054 | 
            +
                  "lstrip": false,
         | 
| 3055 | 
            +
                  "normalized": false,
         | 
| 3056 | 
            +
                  "rstrip": false,
         | 
| 3057 | 
            +
                  "single_word": false,
         | 
| 3058 | 
            +
                  "special": true
         | 
| 3059 | 
            +
                },
         | 
| 3060 | 
            +
                "152025": {
         | 
| 3061 | 
            +
                  "content": "|<EXTRA_TOKENS_379>|",
         | 
| 3062 | 
            +
                  "lstrip": false,
         | 
| 3063 | 
            +
                  "normalized": false,
         | 
| 3064 | 
            +
                  "rstrip": false,
         | 
| 3065 | 
            +
                  "single_word": false,
         | 
| 3066 | 
            +
                  "special": true
         | 
| 3067 | 
            +
                },
         | 
| 3068 | 
            +
                "152026": {
         | 
| 3069 | 
            +
                  "content": "|<EXTRA_TOKENS_380>|",
         | 
| 3070 | 
            +
                  "lstrip": false,
         | 
| 3071 | 
            +
                  "normalized": false,
         | 
| 3072 | 
            +
                  "rstrip": false,
         | 
| 3073 | 
            +
                  "single_word": false,
         | 
| 3074 | 
            +
                  "special": true
         | 
| 3075 | 
            +
                },
         | 
| 3076 | 
            +
                "152027": {
         | 
| 3077 | 
            +
                  "content": "|<EXTRA_TOKENS_381>|",
         | 
| 3078 | 
            +
                  "lstrip": false,
         | 
| 3079 | 
            +
                  "normalized": false,
         | 
| 3080 | 
            +
                  "rstrip": false,
         | 
| 3081 | 
            +
                  "single_word": false,
         | 
| 3082 | 
            +
                  "special": true
         | 
| 3083 | 
            +
                },
         | 
| 3084 | 
            +
                "152028": {
         | 
| 3085 | 
            +
                  "content": "|<EXTRA_TOKENS_382>|",
         | 
| 3086 | 
            +
                  "lstrip": false,
         | 
| 3087 | 
            +
                  "normalized": false,
         | 
| 3088 | 
            +
                  "rstrip": false,
         | 
| 3089 | 
            +
                  "single_word": false,
         | 
| 3090 | 
            +
                  "special": true
         | 
| 3091 | 
            +
                },
         | 
| 3092 | 
            +
                "152029": {
         | 
| 3093 | 
            +
                  "content": "|<EXTRA_TOKENS_383>|",
         | 
| 3094 | 
            +
                  "lstrip": false,
         | 
| 3095 | 
            +
                  "normalized": false,
         | 
| 3096 | 
            +
                  "rstrip": false,
         | 
| 3097 | 
            +
                  "single_word": false,
         | 
| 3098 | 
            +
                  "special": true
         | 
| 3099 | 
            +
                },
         | 
| 3100 | 
            +
                "152030": {
         | 
| 3101 | 
            +
                  "content": "|<EXTRA_TOKENS_384>|",
         | 
| 3102 | 
            +
                  "lstrip": false,
         | 
| 3103 | 
            +
                  "normalized": false,
         | 
| 3104 | 
            +
                  "rstrip": false,
         | 
| 3105 | 
            +
                  "single_word": false,
         | 
| 3106 | 
            +
                  "special": true
         | 
| 3107 | 
            +
                },
         | 
| 3108 | 
            +
                "152031": {
         | 
| 3109 | 
            +
                  "content": "|<EXTRA_TOKENS_385>|",
         | 
| 3110 | 
            +
                  "lstrip": false,
         | 
| 3111 | 
            +
                  "normalized": false,
         | 
| 3112 | 
            +
                  "rstrip": false,
         | 
| 3113 | 
            +
                  "single_word": false,
         | 
| 3114 | 
            +
                  "special": true
         | 
| 3115 | 
            +
                },
         | 
| 3116 | 
            +
                "152032": {
         | 
| 3117 | 
            +
                  "content": "|<EXTRA_TOKENS_386>|",
         | 
| 3118 | 
            +
                  "lstrip": false,
         | 
| 3119 | 
            +
                  "normalized": false,
         | 
| 3120 | 
            +
                  "rstrip": false,
         | 
| 3121 | 
            +
                  "single_word": false,
         | 
| 3122 | 
            +
                  "special": true
         | 
| 3123 | 
            +
                },
         | 
| 3124 | 
            +
                "152033": {
         | 
| 3125 | 
            +
                  "content": "|<EXTRA_TOKENS_387>|",
         | 
| 3126 | 
            +
                  "lstrip": false,
         | 
| 3127 | 
            +
                  "normalized": false,
         | 
| 3128 | 
            +
                  "rstrip": false,
         | 
| 3129 | 
            +
                  "single_word": false,
         | 
| 3130 | 
            +
                  "special": true
         | 
| 3131 | 
            +
                },
         | 
| 3132 | 
            +
                "152034": {
         | 
| 3133 | 
            +
                  "content": "|<EXTRA_TOKENS_388>|",
         | 
| 3134 | 
            +
                  "lstrip": false,
         | 
| 3135 | 
            +
                  "normalized": false,
         | 
| 3136 | 
            +
                  "rstrip": false,
         | 
| 3137 | 
            +
                  "single_word": false,
         | 
| 3138 | 
            +
                  "special": true
         | 
| 3139 | 
            +
                },
         | 
| 3140 | 
            +
                "152035": {
         | 
| 3141 | 
            +
                  "content": "|<EXTRA_TOKENS_389>|",
         | 
| 3142 | 
            +
                  "lstrip": false,
         | 
| 3143 | 
            +
                  "normalized": false,
         | 
| 3144 | 
            +
                  "rstrip": false,
         | 
| 3145 | 
            +
                  "single_word": false,
         | 
| 3146 | 
            +
                  "special": true
         | 
| 3147 | 
            +
                },
         | 
| 3148 | 
            +
                "152036": {
         | 
| 3149 | 
            +
                  "content": "|<EXTRA_TOKENS_390>|",
         | 
| 3150 | 
            +
                  "lstrip": false,
         | 
| 3151 | 
            +
                  "normalized": false,
         | 
| 3152 | 
            +
                  "rstrip": false,
         | 
| 3153 | 
            +
                  "single_word": false,
         | 
| 3154 | 
            +
                  "special": true
         | 
| 3155 | 
            +
                },
         | 
| 3156 | 
            +
                "152037": {
         | 
| 3157 | 
            +
                  "content": "|<EXTRA_TOKENS_391>|",
         | 
| 3158 | 
            +
                  "lstrip": false,
         | 
| 3159 | 
            +
                  "normalized": false,
         | 
| 3160 | 
            +
                  "rstrip": false,
         | 
| 3161 | 
            +
                  "single_word": false,
         | 
| 3162 | 
            +
                  "special": true
         | 
| 3163 | 
            +
                },
         | 
| 3164 | 
            +
                "152038": {
         | 
| 3165 | 
            +
                  "content": "|<EXTRA_TOKENS_392>|",
         | 
| 3166 | 
            +
                  "lstrip": false,
         | 
| 3167 | 
            +
                  "normalized": false,
         | 
| 3168 | 
            +
                  "rstrip": false,
         | 
| 3169 | 
            +
                  "single_word": false,
         | 
| 3170 | 
            +
                  "special": true
         | 
| 3171 | 
            +
                },
         | 
| 3172 | 
            +
                "152039": {
         | 
| 3173 | 
            +
                  "content": "|<EXTRA_TOKENS_393>|",
         | 
| 3174 | 
            +
                  "lstrip": false,
         | 
| 3175 | 
            +
                  "normalized": false,
         | 
| 3176 | 
            +
                  "rstrip": false,
         | 
| 3177 | 
            +
                  "single_word": false,
         | 
| 3178 | 
            +
                  "special": true
         | 
| 3179 | 
            +
                },
         | 
| 3180 | 
            +
                "152040": {
         | 
| 3181 | 
            +
                  "content": "|<EXTRA_TOKENS_394>|",
         | 
| 3182 | 
            +
                  "lstrip": false,
         | 
| 3183 | 
            +
                  "normalized": false,
         | 
| 3184 | 
            +
                  "rstrip": false,
         | 
| 3185 | 
            +
                  "single_word": false,
         | 
| 3186 | 
            +
                  "special": true
         | 
| 3187 | 
            +
                },
         | 
| 3188 | 
            +
                "152041": {
         | 
| 3189 | 
            +
                  "content": "|<EXTRA_TOKENS_395>|",
         | 
| 3190 | 
            +
                  "lstrip": false,
         | 
| 3191 | 
            +
                  "normalized": false,
         | 
| 3192 | 
            +
                  "rstrip": false,
         | 
| 3193 | 
            +
                  "single_word": false,
         | 
| 3194 | 
            +
                  "special": true
         | 
| 3195 | 
            +
                },
         | 
| 3196 | 
            +
                "152042": {
         | 
| 3197 | 
            +
                  "content": "|<EXTRA_TOKENS_396>|",
         | 
| 3198 | 
            +
                  "lstrip": false,
         | 
| 3199 | 
            +
                  "normalized": false,
         | 
| 3200 | 
            +
                  "rstrip": false,
         | 
| 3201 | 
            +
                  "single_word": false,
         | 
| 3202 | 
            +
                  "special": true
         | 
| 3203 | 
            +
                },
         | 
| 3204 | 
            +
                "152043": {
         | 
| 3205 | 
            +
                  "content": "|<EXTRA_TOKENS_397>|",
         | 
| 3206 | 
            +
                  "lstrip": false,
         | 
| 3207 | 
            +
                  "normalized": false,
         | 
| 3208 | 
            +
                  "rstrip": false,
         | 
| 3209 | 
            +
                  "single_word": false,
         | 
| 3210 | 
            +
                  "special": true
         | 
| 3211 | 
            +
                },
         | 
| 3212 | 
            +
                "152044": {
         | 
| 3213 | 
            +
                  "content": "|<EXTRA_TOKENS_398>|",
         | 
| 3214 | 
            +
                  "lstrip": false,
         | 
| 3215 | 
            +
                  "normalized": false,
         | 
| 3216 | 
            +
                  "rstrip": false,
         | 
| 3217 | 
            +
                  "single_word": false,
         | 
| 3218 | 
            +
                  "special": true
         | 
| 3219 | 
            +
                },
         | 
| 3220 | 
            +
                "152045": {
         | 
| 3221 | 
            +
                  "content": "|<EXTRA_TOKENS_399>|",
         | 
| 3222 | 
            +
                  "lstrip": false,
         | 
| 3223 | 
            +
                  "normalized": false,
         | 
| 3224 | 
            +
                  "rstrip": false,
         | 
| 3225 | 
            +
                  "single_word": false,
         | 
| 3226 | 
            +
                  "special": true
         | 
| 3227 | 
            +
                },
         | 
| 3228 | 
            +
                "152046": {
         | 
| 3229 | 
            +
                  "content": "|<EXTRA_TOKENS_400>|",
         | 
| 3230 | 
            +
                  "lstrip": false,
         | 
| 3231 | 
            +
                  "normalized": false,
         | 
| 3232 | 
            +
                  "rstrip": false,
         | 
| 3233 | 
            +
                  "single_word": false,
         | 
| 3234 | 
            +
                  "special": true
         | 
| 3235 | 
            +
                },
         | 
| 3236 | 
            +
                "152047": {
         | 
| 3237 | 
            +
                  "content": "|<EXTRA_TOKENS_401>|",
         | 
| 3238 | 
            +
                  "lstrip": false,
         | 
| 3239 | 
            +
                  "normalized": false,
         | 
| 3240 | 
            +
                  "rstrip": false,
         | 
| 3241 | 
            +
                  "single_word": false,
         | 
| 3242 | 
            +
                  "special": true
         | 
| 3243 | 
            +
                },
         | 
| 3244 | 
            +
                "152048": {
         | 
| 3245 | 
            +
                  "content": "|<EXTRA_TOKENS_402>|",
         | 
| 3246 | 
            +
                  "lstrip": false,
         | 
| 3247 | 
            +
                  "normalized": false,
         | 
| 3248 | 
            +
                  "rstrip": false,
         | 
| 3249 | 
            +
                  "single_word": false,
         | 
| 3250 | 
            +
                  "special": true
         | 
| 3251 | 
            +
                },
         | 
| 3252 | 
            +
                "152049": {
         | 
| 3253 | 
            +
                  "content": "|<EXTRA_TOKENS_403>|",
         | 
| 3254 | 
            +
                  "lstrip": false,
         | 
| 3255 | 
            +
                  "normalized": false,
         | 
| 3256 | 
            +
                  "rstrip": false,
         | 
| 3257 | 
            +
                  "single_word": false,
         | 
| 3258 | 
            +
                  "special": true
         | 
| 3259 | 
            +
                },
         | 
| 3260 | 
            +
                "152050": {
         | 
| 3261 | 
            +
                  "content": "|<EXTRA_TOKENS_404>|",
         | 
| 3262 | 
            +
                  "lstrip": false,
         | 
| 3263 | 
            +
                  "normalized": false,
         | 
| 3264 | 
            +
                  "rstrip": false,
         | 
| 3265 | 
            +
                  "single_word": false,
         | 
| 3266 | 
            +
                  "special": true
         | 
| 3267 | 
            +
                },
         | 
| 3268 | 
            +
                "152051": {
         | 
| 3269 | 
            +
                  "content": "|<EXTRA_TOKENS_405>|",
         | 
| 3270 | 
            +
                  "lstrip": false,
         | 
| 3271 | 
            +
                  "normalized": false,
         | 
| 3272 | 
            +
                  "rstrip": false,
         | 
| 3273 | 
            +
                  "single_word": false,
         | 
| 3274 | 
            +
                  "special": true
         | 
| 3275 | 
            +
                },
         | 
| 3276 | 
            +
                "152052": {
         | 
| 3277 | 
            +
                  "content": "|<EXTRA_TOKENS_406>|",
         | 
| 3278 | 
            +
                  "lstrip": false,
         | 
| 3279 | 
            +
                  "normalized": false,
         | 
| 3280 | 
            +
                  "rstrip": false,
         | 
| 3281 | 
            +
                  "single_word": false,
         | 
| 3282 | 
            +
                  "special": true
         | 
| 3283 | 
            +
                },
         | 
| 3284 | 
            +
                "152053": {
         | 
| 3285 | 
            +
                  "content": "|<EXTRA_TOKENS_407>|",
         | 
| 3286 | 
            +
                  "lstrip": false,
         | 
| 3287 | 
            +
                  "normalized": false,
         | 
| 3288 | 
            +
                  "rstrip": false,
         | 
| 3289 | 
            +
                  "single_word": false,
         | 
| 3290 | 
            +
                  "special": true
         | 
| 3291 | 
            +
                },
         | 
| 3292 | 
            +
                "152054": {
         | 
| 3293 | 
            +
                  "content": "|<EXTRA_TOKENS_408>|",
         | 
| 3294 | 
            +
                  "lstrip": false,
         | 
| 3295 | 
            +
                  "normalized": false,
         | 
| 3296 | 
            +
                  "rstrip": false,
         | 
| 3297 | 
            +
                  "single_word": false,
         | 
| 3298 | 
            +
                  "special": true
         | 
| 3299 | 
            +
                },
         | 
| 3300 | 
            +
                "152055": {
         | 
| 3301 | 
            +
                  "content": "|<EXTRA_TOKENS_409>|",
         | 
| 3302 | 
            +
                  "lstrip": false,
         | 
| 3303 | 
            +
                  "normalized": false,
         | 
| 3304 | 
            +
                  "rstrip": false,
         | 
| 3305 | 
            +
                  "single_word": false,
         | 
| 3306 | 
            +
                  "special": true
         | 
| 3307 | 
            +
                },
         | 
| 3308 | 
            +
                "152056": {
         | 
| 3309 | 
            +
                  "content": "|<EXTRA_TOKENS_410>|",
         | 
| 3310 | 
            +
                  "lstrip": false,
         | 
| 3311 | 
            +
                  "normalized": false,
         | 
| 3312 | 
            +
                  "rstrip": false,
         | 
| 3313 | 
            +
                  "single_word": false,
         | 
| 3314 | 
            +
                  "special": true
         | 
| 3315 | 
            +
                },
         | 
| 3316 | 
            +
                "152057": {
         | 
| 3317 | 
            +
                  "content": "|<EXTRA_TOKENS_411>|",
         | 
| 3318 | 
            +
                  "lstrip": false,
         | 
| 3319 | 
            +
                  "normalized": false,
         | 
| 3320 | 
            +
                  "rstrip": false,
         | 
| 3321 | 
            +
                  "single_word": false,
         | 
| 3322 | 
            +
                  "special": true
         | 
| 3323 | 
            +
                },
         | 
| 3324 | 
            +
                "152058": {
         | 
| 3325 | 
            +
                  "content": "|<EXTRA_TOKENS_412>|",
         | 
| 3326 | 
            +
                  "lstrip": false,
         | 
| 3327 | 
            +
                  "normalized": false,
         | 
| 3328 | 
            +
                  "rstrip": false,
         | 
| 3329 | 
            +
                  "single_word": false,
         | 
| 3330 | 
            +
                  "special": true
         | 
| 3331 | 
            +
                },
         | 
| 3332 | 
            +
                "152059": {
         | 
| 3333 | 
            +
                  "content": "|<EXTRA_TOKENS_413>|",
         | 
| 3334 | 
            +
                  "lstrip": false,
         | 
| 3335 | 
            +
                  "normalized": false,
         | 
| 3336 | 
            +
                  "rstrip": false,
         | 
| 3337 | 
            +
                  "single_word": false,
         | 
| 3338 | 
            +
                  "special": true
         | 
| 3339 | 
            +
                },
         | 
| 3340 | 
            +
                "152060": {
         | 
| 3341 | 
            +
                  "content": "|<EXTRA_TOKENS_414>|",
         | 
| 3342 | 
            +
                  "lstrip": false,
         | 
| 3343 | 
            +
                  "normalized": false,
         | 
| 3344 | 
            +
                  "rstrip": false,
         | 
| 3345 | 
            +
                  "single_word": false,
         | 
| 3346 | 
            +
                  "special": true
         | 
| 3347 | 
            +
                },
         | 
| 3348 | 
            +
                "152061": {
         | 
| 3349 | 
            +
                  "content": "|<EXTRA_TOKENS_415>|",
         | 
| 3350 | 
            +
                  "lstrip": false,
         | 
| 3351 | 
            +
                  "normalized": false,
         | 
| 3352 | 
            +
                  "rstrip": false,
         | 
| 3353 | 
            +
                  "single_word": false,
         | 
| 3354 | 
            +
                  "special": true
         | 
| 3355 | 
            +
                },
         | 
| 3356 | 
            +
                "152062": {
         | 
| 3357 | 
            +
                  "content": "|<EXTRA_TOKENS_416>|",
         | 
| 3358 | 
            +
                  "lstrip": false,
         | 
| 3359 | 
            +
                  "normalized": false,
         | 
| 3360 | 
            +
                  "rstrip": false,
         | 
| 3361 | 
            +
                  "single_word": false,
         | 
| 3362 | 
            +
                  "special": true
         | 
| 3363 | 
            +
                },
         | 
| 3364 | 
            +
                "152063": {
         | 
| 3365 | 
            +
                  "content": "|<EXTRA_TOKENS_417>|",
         | 
| 3366 | 
            +
                  "lstrip": false,
         | 
| 3367 | 
            +
                  "normalized": false,
         | 
| 3368 | 
            +
                  "rstrip": false,
         | 
| 3369 | 
            +
                  "single_word": false,
         | 
| 3370 | 
            +
                  "special": true
         | 
| 3371 | 
            +
                },
         | 
| 3372 | 
            +
                "152064": {
         | 
| 3373 | 
            +
                  "content": "<im_start>",
         | 
| 3374 | 
            +
                  "lstrip": false,
         | 
| 3375 | 
            +
                  "normalized": false,
         | 
| 3376 | 
            +
                  "rstrip": false,
         | 
| 3377 | 
            +
                  "single_word": false,
         | 
| 3378 | 
            +
                  "special": true
         | 
| 3379 | 
            +
                },
         | 
| 3380 | 
            +
                "152065": {
         | 
| 3381 | 
            +
                  "content": "<im_end>",
         | 
| 3382 | 
            +
                  "lstrip": false,
         | 
| 3383 | 
            +
                  "normalized": false,
         | 
| 3384 | 
            +
                  "rstrip": false,
         | 
| 3385 | 
            +
                  "single_word": false,
         | 
| 3386 | 
            +
                  "special": true
         | 
| 3387 | 
            +
                },
         | 
| 3388 | 
            +
                "152066": {
         | 
| 3389 | 
            +
                  "content": "<im_patch>",
         | 
| 3390 | 
            +
                  "lstrip": false,
         | 
| 3391 | 
            +
                  "normalized": false,
         | 
| 3392 | 
            +
                  "rstrip": false,
         | 
| 3393 | 
            +
                  "single_word": false,
         | 
| 3394 | 
            +
                  "special": true
         | 
| 3395 | 
            +
                },
         | 
| 3396 | 
            +
                "152067": {
         | 
| 3397 | 
            +
                  "content": "<im_col>",
         | 
| 3398 | 
            +
                  "lstrip": false,
         | 
| 3399 | 
            +
                  "normalized": false,
         | 
| 3400 | 
            +
                  "rstrip": false,
         | 
| 3401 | 
            +
                  "single_word": false,
         | 
| 3402 | 
            +
                  "special": true
         | 
| 3403 | 
            +
                },
         | 
| 3404 | 
            +
                "152068": {
         | 
| 3405 | 
            +
                  "content": "<|image|>",
         | 
| 3406 | 
            +
                  "lstrip": false,
         | 
| 3407 | 
            +
                  "normalized": false,
         | 
| 3408 | 
            +
                  "rstrip": false,
         | 
| 3409 | 
            +
                  "single_word": false,
         | 
| 3410 | 
            +
                  "special": true
         | 
| 3411 | 
            +
                }
         | 
| 3412 | 
            +
              },
         | 
| 3413 | 
            +
              "additional_special_tokens": [
         | 
| 3414 | 
            +
                "|<EXTRA_TOKENS_0>|",
         | 
| 3415 | 
            +
                "|<EXTRA_TOKENS_1>|",
         | 
| 3416 | 
            +
                "|<EXTRA_TOKENS_2>|",
         | 
| 3417 | 
            +
                "|<EXTRA_TOKENS_3>|",
         | 
| 3418 | 
            +
                "|<EXTRA_TOKENS_4>|",
         | 
| 3419 | 
            +
                "|<EXTRA_TOKENS_5>|",
         | 
| 3420 | 
            +
                "|<EXTRA_TOKENS_6>|",
         | 
| 3421 | 
            +
                "|<EXTRA_TOKENS_7>|",
         | 
| 3422 | 
            +
                "|<EXTRA_TOKENS_8>|",
         | 
| 3423 | 
            +
                "|<EXTRA_TOKENS_9>|",
         | 
| 3424 | 
            +
                "|<EXTRA_TOKENS_10>|",
         | 
| 3425 | 
            +
                "|<EXTRA_TOKENS_11>|",
         | 
| 3426 | 
            +
                "|<EXTRA_TOKENS_12>|",
         | 
| 3427 | 
            +
                "|<EXTRA_TOKENS_13>|",
         | 
| 3428 | 
            +
                "|<EXTRA_TOKENS_14>|",
         | 
| 3429 | 
            +
                "|<EXTRA_TOKENS_15>|",
         | 
| 3430 | 
            +
                "|<EXTRA_TOKENS_16>|",
         | 
| 3431 | 
            +
                "|<EXTRA_TOKENS_17>|",
         | 
| 3432 | 
            +
                "|<EXTRA_TOKENS_18>|",
         | 
| 3433 | 
            +
                "|<EXTRA_TOKENS_19>|",
         | 
| 3434 | 
            +
                "|<EXTRA_TOKENS_20>|",
         | 
| 3435 | 
            +
                "|<EXTRA_TOKENS_21>|",
         | 
| 3436 | 
            +
                "|<EXTRA_TOKENS_22>|",
         | 
| 3437 | 
            +
                "|<EXTRA_TOKENS_23>|",
         | 
| 3438 | 
            +
                "|<EXTRA_TOKENS_24>|",
         | 
| 3439 | 
            +
                "|<EXTRA_TOKENS_25>|",
         | 
| 3440 | 
            +
                "|<EXTRA_TOKENS_26>|",
         | 
| 3441 | 
            +
                "|<EXTRA_TOKENS_27>|",
         | 
| 3442 | 
            +
                "|<EXTRA_TOKENS_28>|",
         | 
| 3443 | 
            +
                "|<EXTRA_TOKENS_29>|",
         | 
| 3444 | 
            +
                "|<EXTRA_TOKENS_30>|",
         | 
| 3445 | 
            +
                "|<EXTRA_TOKENS_31>|",
         | 
| 3446 | 
            +
                "|<EXTRA_TOKENS_32>|",
         | 
| 3447 | 
            +
                "|<EXTRA_TOKENS_33>|",
         | 
| 3448 | 
            +
                "|<EXTRA_TOKENS_34>|",
         | 
| 3449 | 
            +
                "|<EXTRA_TOKENS_35>|",
         | 
| 3450 | 
            +
                "|<EXTRA_TOKENS_36>|",
         | 
| 3451 | 
            +
                "|<EXTRA_TOKENS_37>|",
         | 
| 3452 | 
            +
                "|<EXTRA_TOKENS_38>|",
         | 
| 3453 | 
            +
                "|<EXTRA_TOKENS_39>|",
         | 
| 3454 | 
            +
                "|<EXTRA_TOKENS_40>|",
         | 
| 3455 | 
            +
                "|<EXTRA_TOKENS_41>|",
         | 
| 3456 | 
            +
                "|<EXTRA_TOKENS_42>|",
         | 
| 3457 | 
            +
                "|<EXTRA_TOKENS_43>|",
         | 
| 3458 | 
            +
                "|<EXTRA_TOKENS_44>|",
         | 
| 3459 | 
            +
                "|<EXTRA_TOKENS_45>|",
         | 
| 3460 | 
            +
                "|<EXTRA_TOKENS_46>|",
         | 
| 3461 | 
            +
                "|<EXTRA_TOKENS_47>|",
         | 
| 3462 | 
            +
                "|<EXTRA_TOKENS_48>|",
         | 
| 3463 | 
            +
                "|<EXTRA_TOKENS_49>|",
         | 
| 3464 | 
            +
                "|<EXTRA_TOKENS_50>|",
         | 
| 3465 | 
            +
                "|<EXTRA_TOKENS_51>|",
         | 
| 3466 | 
            +
                "|<EXTRA_TOKENS_52>|",
         | 
| 3467 | 
            +
                "|<EXTRA_TOKENS_53>|",
         | 
| 3468 | 
            +
                "|<EXTRA_TOKENS_54>|",
         | 
| 3469 | 
            +
                "|<EXTRA_TOKENS_55>|",
         | 
| 3470 | 
            +
                "|<EXTRA_TOKENS_56>|",
         | 
| 3471 | 
            +
                "|<EXTRA_TOKENS_57>|",
         | 
| 3472 | 
            +
                "|<EXTRA_TOKENS_58>|",
         | 
| 3473 | 
            +
                "|<EXTRA_TOKENS_59>|",
         | 
| 3474 | 
            +
                "|<EXTRA_TOKENS_60>|",
         | 
| 3475 | 
            +
                "|<EXTRA_TOKENS_61>|",
         | 
| 3476 | 
            +
                "|<EXTRA_TOKENS_62>|",
         | 
| 3477 | 
            +
                "|<EXTRA_TOKENS_63>|",
         | 
| 3478 | 
            +
                "|<EXTRA_TOKENS_64>|",
         | 
| 3479 | 
            +
                "|<EXTRA_TOKENS_65>|",
         | 
| 3480 | 
            +
                "|<EXTRA_TOKENS_66>|",
         | 
| 3481 | 
            +
                "|<EXTRA_TOKENS_67>|",
         | 
| 3482 | 
            +
                "|<EXTRA_TOKENS_68>|",
         | 
| 3483 | 
            +
                "|<EXTRA_TOKENS_69>|",
         | 
| 3484 | 
            +
                "|<EXTRA_TOKENS_70>|",
         | 
| 3485 | 
            +
                "|<EXTRA_TOKENS_71>|",
         | 
| 3486 | 
            +
                "|<EXTRA_TOKENS_72>|",
         | 
| 3487 | 
            +
                "|<EXTRA_TOKENS_73>|",
         | 
| 3488 | 
            +
                "|<EXTRA_TOKENS_74>|",
         | 
| 3489 | 
            +
                "|<EXTRA_TOKENS_75>|",
         | 
| 3490 | 
            +
                "|<EXTRA_TOKENS_76>|",
         | 
| 3491 | 
            +
                "|<EXTRA_TOKENS_77>|",
         | 
| 3492 | 
            +
                "|<EXTRA_TOKENS_78>|",
         | 
| 3493 | 
            +
                "|<EXTRA_TOKENS_79>|",
         | 
| 3494 | 
            +
                "|<EXTRA_TOKENS_80>|",
         | 
| 3495 | 
            +
                "|<EXTRA_TOKENS_81>|",
         | 
| 3496 | 
            +
                "|<EXTRA_TOKENS_82>|",
         | 
| 3497 | 
            +
                "|<EXTRA_TOKENS_83>|",
         | 
| 3498 | 
            +
                "|<EXTRA_TOKENS_84>|",
         | 
| 3499 | 
            +
                "|<EXTRA_TOKENS_85>|",
         | 
| 3500 | 
            +
                "|<EXTRA_TOKENS_86>|",
         | 
| 3501 | 
            +
                "|<EXTRA_TOKENS_87>|",
         | 
| 3502 | 
            +
                "|<EXTRA_TOKENS_88>|",
         | 
| 3503 | 
            +
                "|<EXTRA_TOKENS_89>|",
         | 
| 3504 | 
            +
                "|<EXTRA_TOKENS_90>|",
         | 
| 3505 | 
            +
                "|<EXTRA_TOKENS_91>|",
         | 
| 3506 | 
            +
                "|<EXTRA_TOKENS_92>|",
         | 
| 3507 | 
            +
                "|<EXTRA_TOKENS_93>|",
         | 
| 3508 | 
            +
                "|<EXTRA_TOKENS_94>|",
         | 
| 3509 | 
            +
                "|<EXTRA_TOKENS_95>|",
         | 
| 3510 | 
            +
                "|<EXTRA_TOKENS_96>|",
         | 
| 3511 | 
            +
                "|<EXTRA_TOKENS_97>|",
         | 
| 3512 | 
            +
                "|<EXTRA_TOKENS_98>|",
         | 
| 3513 | 
            +
                "|<EXTRA_TOKENS_99>|",
         | 
| 3514 | 
            +
                "|<EXTRA_TOKENS_100>|",
         | 
| 3515 | 
            +
                "|<EXTRA_TOKENS_101>|",
         | 
| 3516 | 
            +
                "|<EXTRA_TOKENS_102>|",
         | 
| 3517 | 
            +
                "|<EXTRA_TOKENS_103>|",
         | 
| 3518 | 
            +
                "|<EXTRA_TOKENS_104>|",
         | 
| 3519 | 
            +
                "|<EXTRA_TOKENS_105>|",
         | 
| 3520 | 
            +
                "|<EXTRA_TOKENS_106>|",
         | 
| 3521 | 
            +
                "|<EXTRA_TOKENS_107>|",
         | 
| 3522 | 
            +
                "|<EXTRA_TOKENS_108>|",
         | 
| 3523 | 
            +
                "|<EXTRA_TOKENS_109>|",
         | 
| 3524 | 
            +
                "|<EXTRA_TOKENS_110>|",
         | 
| 3525 | 
            +
                "|<EXTRA_TOKENS_111>|",
         | 
| 3526 | 
            +
                "|<EXTRA_TOKENS_112>|",
         | 
| 3527 | 
            +
                "|<EXTRA_TOKENS_113>|",
         | 
| 3528 | 
            +
                "|<EXTRA_TOKENS_114>|",
         | 
| 3529 | 
            +
                "|<EXTRA_TOKENS_115>|",
         | 
| 3530 | 
            +
                "|<EXTRA_TOKENS_116>|",
         | 
| 3531 | 
            +
                "|<EXTRA_TOKENS_117>|",
         | 
| 3532 | 
            +
                "|<EXTRA_TOKENS_118>|",
         | 
| 3533 | 
            +
                "|<EXTRA_TOKENS_119>|",
         | 
| 3534 | 
            +
                "|<EXTRA_TOKENS_120>|",
         | 
| 3535 | 
            +
                "|<EXTRA_TOKENS_121>|",
         | 
| 3536 | 
            +
                "|<EXTRA_TOKENS_122>|",
         | 
| 3537 | 
            +
                "|<EXTRA_TOKENS_123>|",
         | 
| 3538 | 
            +
                "|<EXTRA_TOKENS_124>|",
         | 
| 3539 | 
            +
                "|<EXTRA_TOKENS_125>|",
         | 
| 3540 | 
            +
                "|<EXTRA_TOKENS_126>|",
         | 
| 3541 | 
            +
                "|<EXTRA_TOKENS_127>|",
         | 
| 3542 | 
            +
                "|<EXTRA_TOKENS_128>|",
         | 
| 3543 | 
            +
                "|<EXTRA_TOKENS_129>|",
         | 
| 3544 | 
            +
                "|<EXTRA_TOKENS_130>|",
         | 
| 3545 | 
            +
                "|<EXTRA_TOKENS_131>|",
         | 
| 3546 | 
            +
                "|<EXTRA_TOKENS_132>|",
         | 
| 3547 | 
            +
                "|<EXTRA_TOKENS_133>|",
         | 
| 3548 | 
            +
                "|<EXTRA_TOKENS_134>|",
         | 
| 3549 | 
            +
                "|<EXTRA_TOKENS_135>|",
         | 
| 3550 | 
            +
                "|<EXTRA_TOKENS_136>|",
         | 
| 3551 | 
            +
                "|<EXTRA_TOKENS_137>|",
         | 
| 3552 | 
            +
                "|<EXTRA_TOKENS_138>|",
         | 
| 3553 | 
            +
                "|<EXTRA_TOKENS_139>|",
         | 
| 3554 | 
            +
                "|<EXTRA_TOKENS_140>|",
         | 
| 3555 | 
            +
                "|<EXTRA_TOKENS_141>|",
         | 
| 3556 | 
            +
                "|<EXTRA_TOKENS_142>|",
         | 
| 3557 | 
            +
                "|<EXTRA_TOKENS_143>|",
         | 
| 3558 | 
            +
                "|<EXTRA_TOKENS_144>|",
         | 
| 3559 | 
            +
                "|<EXTRA_TOKENS_145>|",
         | 
| 3560 | 
            +
                "|<EXTRA_TOKENS_146>|",
         | 
| 3561 | 
            +
                "|<EXTRA_TOKENS_147>|",
         | 
| 3562 | 
            +
                "|<EXTRA_TOKENS_148>|",
         | 
| 3563 | 
            +
                "|<EXTRA_TOKENS_149>|",
         | 
| 3564 | 
            +
                "|<EXTRA_TOKENS_150>|",
         | 
| 3565 | 
            +
                "|<EXTRA_TOKENS_151>|",
         | 
| 3566 | 
            +
                "|<EXTRA_TOKENS_152>|",
         | 
| 3567 | 
            +
                "|<EXTRA_TOKENS_153>|",
         | 
| 3568 | 
            +
                "|<EXTRA_TOKENS_154>|",
         | 
| 3569 | 
            +
                "|<EXTRA_TOKENS_155>|",
         | 
| 3570 | 
            +
                "|<EXTRA_TOKENS_156>|",
         | 
| 3571 | 
            +
                "|<EXTRA_TOKENS_157>|",
         | 
| 3572 | 
            +
                "|<EXTRA_TOKENS_158>|",
         | 
| 3573 | 
            +
                "|<EXTRA_TOKENS_159>|",
         | 
| 3574 | 
            +
                "|<EXTRA_TOKENS_160>|",
         | 
| 3575 | 
            +
                "|<EXTRA_TOKENS_161>|",
         | 
| 3576 | 
            +
                "|<EXTRA_TOKENS_162>|",
         | 
| 3577 | 
            +
                "|<EXTRA_TOKENS_163>|",
         | 
| 3578 | 
            +
                "|<EXTRA_TOKENS_164>|",
         | 
| 3579 | 
            +
                "|<EXTRA_TOKENS_165>|",
         | 
| 3580 | 
            +
                "|<EXTRA_TOKENS_166>|",
         | 
| 3581 | 
            +
                "|<EXTRA_TOKENS_167>|",
         | 
| 3582 | 
            +
                "|<EXTRA_TOKENS_168>|",
         | 
| 3583 | 
            +
                "|<EXTRA_TOKENS_169>|",
         | 
| 3584 | 
            +
                "|<EXTRA_TOKENS_170>|",
         | 
| 3585 | 
            +
                "|<EXTRA_TOKENS_171>|",
         | 
| 3586 | 
            +
                "|<EXTRA_TOKENS_172>|",
         | 
| 3587 | 
            +
                "|<EXTRA_TOKENS_173>|",
         | 
| 3588 | 
            +
                "|<EXTRA_TOKENS_174>|",
         | 
| 3589 | 
            +
                "|<EXTRA_TOKENS_175>|",
         | 
| 3590 | 
            +
                "|<EXTRA_TOKENS_176>|",
         | 
| 3591 | 
            +
                "|<EXTRA_TOKENS_177>|",
         | 
| 3592 | 
            +
                "|<EXTRA_TOKENS_178>|",
         | 
| 3593 | 
            +
                "|<EXTRA_TOKENS_179>|",
         | 
| 3594 | 
            +
                "|<EXTRA_TOKENS_180>|",
         | 
| 3595 | 
            +
                "|<EXTRA_TOKENS_181>|",
         | 
| 3596 | 
            +
                "|<EXTRA_TOKENS_182>|",
         | 
| 3597 | 
            +
                "|<EXTRA_TOKENS_183>|",
         | 
| 3598 | 
            +
                "|<EXTRA_TOKENS_184>|",
         | 
| 3599 | 
            +
                "|<EXTRA_TOKENS_185>|",
         | 
| 3600 | 
            +
                "|<EXTRA_TOKENS_186>|",
         | 
| 3601 | 
            +
                "|<EXTRA_TOKENS_187>|",
         | 
| 3602 | 
            +
                "|<EXTRA_TOKENS_188>|",
         | 
| 3603 | 
            +
                "|<EXTRA_TOKENS_189>|",
         | 
| 3604 | 
            +
                "|<EXTRA_TOKENS_190>|",
         | 
| 3605 | 
            +
                "|<EXTRA_TOKENS_191>|",
         | 
| 3606 | 
            +
                "|<EXTRA_TOKENS_192>|",
         | 
| 3607 | 
            +
                "|<EXTRA_TOKENS_193>|",
         | 
| 3608 | 
            +
                "|<EXTRA_TOKENS_194>|",
         | 
| 3609 | 
            +
                "|<EXTRA_TOKENS_195>|",
         | 
| 3610 | 
            +
                "|<EXTRA_TOKENS_196>|",
         | 
| 3611 | 
            +
                "|<EXTRA_TOKENS_197>|",
         | 
| 3612 | 
            +
                "|<EXTRA_TOKENS_198>|",
         | 
| 3613 | 
            +
                "|<EXTRA_TOKENS_199>|",
         | 
| 3614 | 
            +
                "|<EXTRA_TOKENS_200>|",
         | 
| 3615 | 
            +
                "|<EXTRA_TOKENS_201>|",
         | 
| 3616 | 
            +
                "|<EXTRA_TOKENS_202>|",
         | 
| 3617 | 
            +
                "|<EXTRA_TOKENS_203>|",
         | 
| 3618 | 
            +
                "|<EXTRA_TOKENS_204>|",
         | 
| 3619 | 
            +
                "|<EXTRA_TOKENS_205>|",
         | 
| 3620 | 
            +
                "|<EXTRA_TOKENS_206>|",
         | 
| 3621 | 
            +
                "|<EXTRA_TOKENS_207>|",
         | 
| 3622 | 
            +
                "|<EXTRA_TOKENS_208>|",
         | 
| 3623 | 
            +
                "|<EXTRA_TOKENS_209>|",
         | 
| 3624 | 
            +
                "|<EXTRA_TOKENS_210>|",
         | 
| 3625 | 
            +
                "|<EXTRA_TOKENS_211>|",
         | 
| 3626 | 
            +
                "|<EXTRA_TOKENS_212>|",
         | 
| 3627 | 
            +
                "|<EXTRA_TOKENS_213>|",
         | 
| 3628 | 
            +
                "|<EXTRA_TOKENS_214>|",
         | 
| 3629 | 
            +
                "|<EXTRA_TOKENS_215>|",
         | 
| 3630 | 
            +
                "|<EXTRA_TOKENS_216>|",
         | 
| 3631 | 
            +
                "|<EXTRA_TOKENS_217>|",
         | 
| 3632 | 
            +
                "|<EXTRA_TOKENS_218>|",
         | 
| 3633 | 
            +
                "|<EXTRA_TOKENS_219>|",
         | 
| 3634 | 
            +
                "|<EXTRA_TOKENS_220>|",
         | 
| 3635 | 
            +
                "|<EXTRA_TOKENS_221>|",
         | 
| 3636 | 
            +
                "|<EXTRA_TOKENS_222>|",
         | 
| 3637 | 
            +
                "|<EXTRA_TOKENS_223>|",
         | 
| 3638 | 
            +
                "|<EXTRA_TOKENS_224>|",
         | 
| 3639 | 
            +
                "|<EXTRA_TOKENS_225>|",
         | 
| 3640 | 
            +
                "|<EXTRA_TOKENS_226>|",
         | 
| 3641 | 
            +
                "|<EXTRA_TOKENS_227>|",
         | 
| 3642 | 
            +
                "|<EXTRA_TOKENS_228>|",
         | 
| 3643 | 
            +
                "|<EXTRA_TOKENS_229>|",
         | 
| 3644 | 
            +
                "|<EXTRA_TOKENS_230>|",
         | 
| 3645 | 
            +
                "|<EXTRA_TOKENS_231>|",
         | 
| 3646 | 
            +
                "|<EXTRA_TOKENS_232>|",
         | 
| 3647 | 
            +
                "|<EXTRA_TOKENS_233>|",
         | 
| 3648 | 
            +
                "|<EXTRA_TOKENS_234>|",
         | 
| 3649 | 
            +
                "|<EXTRA_TOKENS_235>|",
         | 
| 3650 | 
            +
                "|<EXTRA_TOKENS_236>|",
         | 
| 3651 | 
            +
                "|<EXTRA_TOKENS_237>|",
         | 
| 3652 | 
            +
                "|<EXTRA_TOKENS_238>|",
         | 
| 3653 | 
            +
                "|<EXTRA_TOKENS_239>|",
         | 
| 3654 | 
            +
                "|<EXTRA_TOKENS_240>|",
         | 
| 3655 | 
            +
                "|<EXTRA_TOKENS_241>|",
         | 
| 3656 | 
            +
                "|<EXTRA_TOKENS_242>|",
         | 
| 3657 | 
            +
                "|<EXTRA_TOKENS_243>|",
         | 
| 3658 | 
            +
                "|<EXTRA_TOKENS_244>|",
         | 
| 3659 | 
            +
                "|<EXTRA_TOKENS_245>|",
         | 
| 3660 | 
            +
                "|<EXTRA_TOKENS_246>|",
         | 
| 3661 | 
            +
                "|<EXTRA_TOKENS_247>|",
         | 
| 3662 | 
            +
                "|<EXTRA_TOKENS_248>|",
         | 
| 3663 | 
            +
                "|<EXTRA_TOKENS_249>|",
         | 
| 3664 | 
            +
                "|<EXTRA_TOKENS_250>|",
         | 
| 3665 | 
            +
                "|<EXTRA_TOKENS_251>|",
         | 
| 3666 | 
            +
                "|<EXTRA_TOKENS_252>|",
         | 
| 3667 | 
            +
                "|<EXTRA_TOKENS_253>|",
         | 
| 3668 | 
            +
                "|<EXTRA_TOKENS_254>|",
         | 
| 3669 | 
            +
                "|<EXTRA_TOKENS_255>|",
         | 
| 3670 | 
            +
                "|<EXTRA_TOKENS_256>|",
         | 
| 3671 | 
            +
                "|<EXTRA_TOKENS_257>|",
         | 
| 3672 | 
            +
                "|<EXTRA_TOKENS_258>|",
         | 
| 3673 | 
            +
                "|<EXTRA_TOKENS_259>|",
         | 
| 3674 | 
            +
                "|<EXTRA_TOKENS_260>|",
         | 
| 3675 | 
            +
                "|<EXTRA_TOKENS_261>|",
         | 
| 3676 | 
            +
                "|<EXTRA_TOKENS_262>|",
         | 
| 3677 | 
            +
                "|<EXTRA_TOKENS_263>|",
         | 
| 3678 | 
            +
                "|<EXTRA_TOKENS_264>|",
         | 
| 3679 | 
            +
                "|<EXTRA_TOKENS_265>|",
         | 
| 3680 | 
            +
                "|<EXTRA_TOKENS_266>|",
         | 
| 3681 | 
            +
                "|<EXTRA_TOKENS_267>|",
         | 
| 3682 | 
            +
                "|<EXTRA_TOKENS_268>|",
         | 
| 3683 | 
            +
                "|<EXTRA_TOKENS_269>|",
         | 
| 3684 | 
            +
                "|<EXTRA_TOKENS_270>|",
         | 
| 3685 | 
            +
                "|<EXTRA_TOKENS_271>|",
         | 
| 3686 | 
            +
                "|<EXTRA_TOKENS_272>|",
         | 
| 3687 | 
            +
                "|<EXTRA_TOKENS_273>|",
         | 
| 3688 | 
            +
                "|<EXTRA_TOKENS_274>|",
         | 
| 3689 | 
            +
                "|<EXTRA_TOKENS_275>|",
         | 
| 3690 | 
            +
                "|<EXTRA_TOKENS_276>|",
         | 
| 3691 | 
            +
                "|<EXTRA_TOKENS_277>|",
         | 
| 3692 | 
            +
                "|<EXTRA_TOKENS_278>|",
         | 
| 3693 | 
            +
                "|<EXTRA_TOKENS_279>|",
         | 
| 3694 | 
            +
                "|<EXTRA_TOKENS_280>|",
         | 
| 3695 | 
            +
                "|<EXTRA_TOKENS_281>|",
         | 
| 3696 | 
            +
                "|<EXTRA_TOKENS_282>|",
         | 
| 3697 | 
            +
                "|<EXTRA_TOKENS_283>|",
         | 
| 3698 | 
            +
                "|<EXTRA_TOKENS_284>|",
         | 
| 3699 | 
            +
                "|<EXTRA_TOKENS_285>|",
         | 
| 3700 | 
            +
                "|<EXTRA_TOKENS_286>|",
         | 
| 3701 | 
            +
                "|<EXTRA_TOKENS_287>|",
         | 
| 3702 | 
            +
                "|<EXTRA_TOKENS_288>|",
         | 
| 3703 | 
            +
                "|<EXTRA_TOKENS_289>|",
         | 
| 3704 | 
            +
                "|<EXTRA_TOKENS_290>|",
         | 
| 3705 | 
            +
                "|<EXTRA_TOKENS_291>|",
         | 
| 3706 | 
            +
                "|<EXTRA_TOKENS_292>|",
         | 
| 3707 | 
            +
                "|<EXTRA_TOKENS_293>|",
         | 
| 3708 | 
            +
                "|<EXTRA_TOKENS_294>|",
         | 
| 3709 | 
            +
                "|<EXTRA_TOKENS_295>|",
         | 
| 3710 | 
            +
                "|<EXTRA_TOKENS_296>|",
         | 
| 3711 | 
            +
                "|<EXTRA_TOKENS_297>|",
         | 
| 3712 | 
            +
                "|<EXTRA_TOKENS_298>|",
         | 
| 3713 | 
            +
                "|<EXTRA_TOKENS_299>|",
         | 
| 3714 | 
            +
                "|<EXTRA_TOKENS_300>|",
         | 
| 3715 | 
            +
                "|<EXTRA_TOKENS_301>|",
         | 
| 3716 | 
            +
                "|<EXTRA_TOKENS_302>|",
         | 
| 3717 | 
            +
                "|<EXTRA_TOKENS_303>|",
         | 
| 3718 | 
            +
                "|<EXTRA_TOKENS_304>|",
         | 
| 3719 | 
            +
                "|<EXTRA_TOKENS_305>|",
         | 
| 3720 | 
            +
                "|<EXTRA_TOKENS_306>|",
         | 
| 3721 | 
            +
                "|<EXTRA_TOKENS_307>|",
         | 
| 3722 | 
            +
                "|<EXTRA_TOKENS_308>|",
         | 
| 3723 | 
            +
                "|<EXTRA_TOKENS_309>|",
         | 
| 3724 | 
            +
                "|<EXTRA_TOKENS_310>|",
         | 
| 3725 | 
            +
                "|<EXTRA_TOKENS_311>|",
         | 
| 3726 | 
            +
                "|<EXTRA_TOKENS_312>|",
         | 
| 3727 | 
            +
                "|<EXTRA_TOKENS_313>|",
         | 
| 3728 | 
            +
                "|<EXTRA_TOKENS_314>|",
         | 
| 3729 | 
            +
                "|<EXTRA_TOKENS_315>|",
         | 
| 3730 | 
            +
                "|<EXTRA_TOKENS_316>|",
         | 
| 3731 | 
            +
                "|<EXTRA_TOKENS_317>|",
         | 
| 3732 | 
            +
                "|<EXTRA_TOKENS_318>|",
         | 
| 3733 | 
            +
                "|<EXTRA_TOKENS_319>|",
         | 
| 3734 | 
            +
                "|<EXTRA_TOKENS_320>|",
         | 
| 3735 | 
            +
                "|<EXTRA_TOKENS_321>|",
         | 
| 3736 | 
            +
                "|<EXTRA_TOKENS_322>|",
         | 
| 3737 | 
            +
                "|<EXTRA_TOKENS_323>|",
         | 
| 3738 | 
            +
                "|<EXTRA_TOKENS_324>|",
         | 
| 3739 | 
            +
                "|<EXTRA_TOKENS_325>|",
         | 
| 3740 | 
            +
                "|<EXTRA_TOKENS_326>|",
         | 
| 3741 | 
            +
                "|<EXTRA_TOKENS_327>|",
         | 
| 3742 | 
            +
                "|<EXTRA_TOKENS_328>|",
         | 
| 3743 | 
            +
                "|<EXTRA_TOKENS_329>|",
         | 
| 3744 | 
            +
                "|<EXTRA_TOKENS_330>|",
         | 
| 3745 | 
            +
                "|<EXTRA_TOKENS_331>|",
         | 
| 3746 | 
            +
                "|<EXTRA_TOKENS_332>|",
         | 
| 3747 | 
            +
                "|<EXTRA_TOKENS_333>|",
         | 
| 3748 | 
            +
                "|<EXTRA_TOKENS_334>|",
         | 
| 3749 | 
            +
                "|<EXTRA_TOKENS_335>|",
         | 
| 3750 | 
            +
                "|<EXTRA_TOKENS_336>|",
         | 
| 3751 | 
            +
                "|<EXTRA_TOKENS_337>|",
         | 
| 3752 | 
            +
                "|<EXTRA_TOKENS_338>|",
         | 
| 3753 | 
            +
                "|<EXTRA_TOKENS_339>|",
         | 
| 3754 | 
            +
                "|<EXTRA_TOKENS_340>|",
         | 
| 3755 | 
            +
                "|<EXTRA_TOKENS_341>|",
         | 
| 3756 | 
            +
                "|<EXTRA_TOKENS_342>|",
         | 
| 3757 | 
            +
                "|<EXTRA_TOKENS_343>|",
         | 
| 3758 | 
            +
                "|<EXTRA_TOKENS_344>|",
         | 
| 3759 | 
            +
                "|<EXTRA_TOKENS_345>|",
         | 
| 3760 | 
            +
                "|<EXTRA_TOKENS_346>|",
         | 
| 3761 | 
            +
                "|<EXTRA_TOKENS_347>|",
         | 
| 3762 | 
            +
                "|<EXTRA_TOKENS_348>|",
         | 
| 3763 | 
            +
                "|<EXTRA_TOKENS_349>|",
         | 
| 3764 | 
            +
                "|<EXTRA_TOKENS_350>|",
         | 
| 3765 | 
            +
                "|<EXTRA_TOKENS_351>|",
         | 
| 3766 | 
            +
                "|<EXTRA_TOKENS_352>|",
         | 
| 3767 | 
            +
                "|<EXTRA_TOKENS_353>|",
         | 
| 3768 | 
            +
                "|<EXTRA_TOKENS_354>|",
         | 
| 3769 | 
            +
                "|<EXTRA_TOKENS_355>|",
         | 
| 3770 | 
            +
                "|<EXTRA_TOKENS_356>|",
         | 
| 3771 | 
            +
                "|<EXTRA_TOKENS_357>|",
         | 
| 3772 | 
            +
                "|<EXTRA_TOKENS_358>|",
         | 
| 3773 | 
            +
                "|<EXTRA_TOKENS_359>|",
         | 
| 3774 | 
            +
                "|<EXTRA_TOKENS_360>|",
         | 
| 3775 | 
            +
                "|<EXTRA_TOKENS_361>|",
         | 
| 3776 | 
            +
                "|<EXTRA_TOKENS_362>|",
         | 
| 3777 | 
            +
                "|<EXTRA_TOKENS_363>|",
         | 
| 3778 | 
            +
                "|<EXTRA_TOKENS_364>|",
         | 
| 3779 | 
            +
                "|<EXTRA_TOKENS_365>|",
         | 
| 3780 | 
            +
                "|<EXTRA_TOKENS_366>|",
         | 
| 3781 | 
            +
                "|<EXTRA_TOKENS_367>|",
         | 
| 3782 | 
            +
                "|<EXTRA_TOKENS_368>|",
         | 
| 3783 | 
            +
                "|<EXTRA_TOKENS_369>|",
         | 
| 3784 | 
            +
                "|<EXTRA_TOKENS_370>|",
         | 
| 3785 | 
            +
                "|<EXTRA_TOKENS_371>|",
         | 
| 3786 | 
            +
                "|<EXTRA_TOKENS_372>|",
         | 
| 3787 | 
            +
                "|<EXTRA_TOKENS_373>|",
         | 
| 3788 | 
            +
                "|<EXTRA_TOKENS_374>|",
         | 
| 3789 | 
            +
                "|<EXTRA_TOKENS_375>|",
         | 
| 3790 | 
            +
                "|<EXTRA_TOKENS_376>|",
         | 
| 3791 | 
            +
                "|<EXTRA_TOKENS_377>|",
         | 
| 3792 | 
            +
                "|<EXTRA_TOKENS_378>|",
         | 
| 3793 | 
            +
                "|<EXTRA_TOKENS_379>|",
         | 
| 3794 | 
            +
                "|<EXTRA_TOKENS_380>|",
         | 
| 3795 | 
            +
                "|<EXTRA_TOKENS_381>|",
         | 
| 3796 | 
            +
                "|<EXTRA_TOKENS_382>|",
         | 
| 3797 | 
            +
                "|<EXTRA_TOKENS_383>|",
         | 
| 3798 | 
            +
                "|<EXTRA_TOKENS_384>|",
         | 
| 3799 | 
            +
                "|<EXTRA_TOKENS_385>|",
         | 
| 3800 | 
            +
                "|<EXTRA_TOKENS_386>|",
         | 
| 3801 | 
            +
                "|<EXTRA_TOKENS_387>|",
         | 
| 3802 | 
            +
                "|<EXTRA_TOKENS_388>|",
         | 
| 3803 | 
            +
                "|<EXTRA_TOKENS_389>|",
         | 
| 3804 | 
            +
                "|<EXTRA_TOKENS_390>|",
         | 
| 3805 | 
            +
                "|<EXTRA_TOKENS_391>|",
         | 
| 3806 | 
            +
                "|<EXTRA_TOKENS_392>|",
         | 
| 3807 | 
            +
                "|<EXTRA_TOKENS_393>|",
         | 
| 3808 | 
            +
                "|<EXTRA_TOKENS_394>|",
         | 
| 3809 | 
            +
                "|<EXTRA_TOKENS_395>|",
         | 
| 3810 | 
            +
                "|<EXTRA_TOKENS_396>|",
         | 
| 3811 | 
            +
                "|<EXTRA_TOKENS_397>|",
         | 
| 3812 | 
            +
                "|<EXTRA_TOKENS_398>|",
         | 
| 3813 | 
            +
                "|<EXTRA_TOKENS_399>|",
         | 
| 3814 | 
            +
                "|<EXTRA_TOKENS_400>|",
         | 
| 3815 | 
            +
                "|<EXTRA_TOKENS_401>|",
         | 
| 3816 | 
            +
                "|<EXTRA_TOKENS_402>|",
         | 
| 3817 | 
            +
                "|<EXTRA_TOKENS_403>|",
         | 
| 3818 | 
            +
                "|<EXTRA_TOKENS_404>|",
         | 
| 3819 | 
            +
                "|<EXTRA_TOKENS_405>|",
         | 
| 3820 | 
            +
                "|<EXTRA_TOKENS_406>|",
         | 
| 3821 | 
            +
                "|<EXTRA_TOKENS_407>|",
         | 
| 3822 | 
            +
                "|<EXTRA_TOKENS_408>|",
         | 
| 3823 | 
            +
                "|<EXTRA_TOKENS_409>|",
         | 
| 3824 | 
            +
                "|<EXTRA_TOKENS_410>|",
         | 
| 3825 | 
            +
                "|<EXTRA_TOKENS_411>|",
         | 
| 3826 | 
            +
                "|<EXTRA_TOKENS_412>|",
         | 
| 3827 | 
            +
                "|<EXTRA_TOKENS_413>|",
         | 
| 3828 | 
            +
                "|<EXTRA_TOKENS_414>|",
         | 
| 3829 | 
            +
                "|<EXTRA_TOKENS_415>|",
         | 
| 3830 | 
            +
                "|<EXTRA_TOKENS_416>|",
         | 
| 3831 | 
            +
                "|<EXTRA_TOKENS_417>|",
         | 
| 3832 | 
            +
                "<im_start>",
         | 
| 3833 | 
            +
                "<im_end>",
         | 
| 3834 | 
            +
                "<im_patch>",
         | 
| 3835 | 
            +
                "<im_col>",
         | 
| 3836 | 
            +
                "<|image|>"
         | 
| 3837 | 
            +
              ],
         | 
| 3838 | 
            +
              "auto_map": {
         | 
| 3839 | 
            +
                "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
         | 
| 3840 | 
            +
              },
         | 
| 3841 | 
            +
              "bos_token": null,
         | 
| 3842 | 
            +
              "chat_template": "{% for message in messages -%}\n        {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n          (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n        {%- endif -%}\n        {{ message['role'].capitalize() + ': ' + message['content'] }}\n        {%- if not loop.last -%}\n        {{ ' ' }}\n        {%- endif %}\n        {%- endfor -%}\n        {%- if add_generation_prompt -%}\n        {{ ' Assistant:' }}\n        {%- endif %}",
         | 
| 3843 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 3844 | 
            +
              "eos_token": "<|endoftext|>",
         | 
| 3845 | 
            +
              "errors": "replace",
         | 
| 3846 | 
            +
              "extra_special_tokens": {},
         | 
| 3847 | 
            +
              "model_max_length": 32768,
         | 
| 3848 | 
            +
              "pad_token": "<|endoftext|>",
         | 
| 3849 | 
            +
              "processor_class": "MolmoProcessor",
         | 
| 3850 | 
            +
              "split_special_tokens": false,
         | 
| 3851 | 
            +
              "tokenizer_class": "Qwen2Tokenizer",
         | 
| 3852 | 
            +
              "unk_token": null
         | 
| 3853 | 
            +
            }
         | 
    	
        trainer_state.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        training_args.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e2902de978a145646d4fc04bc669ce69e49fd7ff0cfa86e3899f29824355cbdf
         | 
| 3 | 
            +
            size 10680
         | 
    	
        vocab.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        zero_to_fp32.py
    ADDED
    
    | @@ -0,0 +1,760 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/env python
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Copyright (c) Microsoft Corporation.
         | 
| 4 | 
            +
            # SPDX-License-Identifier: Apache-2.0
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # DeepSpeed Team
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
         | 
| 9 | 
            +
            # copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
         | 
| 10 | 
            +
            # the future. Once extracted, the weights don't require DeepSpeed and can be used in any
         | 
| 11 | 
            +
            # application.
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
            # example:
         | 
| 14 | 
            +
            #   python zero_to_fp32.py . output_dir/
         | 
| 15 | 
            +
            #   or
         | 
| 16 | 
            +
            #   python zero_to_fp32.py . output_dir/ --safe_serialization
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            import argparse
         | 
| 19 | 
            +
            import torch
         | 
| 20 | 
            +
            import glob
         | 
| 21 | 
            +
            import math
         | 
| 22 | 
            +
            import os
         | 
| 23 | 
            +
            import re
         | 
| 24 | 
            +
            import gc
         | 
| 25 | 
            +
            import json
         | 
| 26 | 
            +
            import numpy as np
         | 
| 27 | 
            +
            from tqdm import tqdm
         | 
| 28 | 
            +
            from collections import OrderedDict
         | 
| 29 | 
            +
            from dataclasses import dataclass
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            # while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
         | 
| 32 | 
            +
            # DeepSpeed data structures it has to be available in the current python environment.
         | 
| 33 | 
            +
            from deepspeed.utils import logger
         | 
| 34 | 
            +
            from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
         | 
| 35 | 
            +
                                                        FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
         | 
| 36 | 
            +
                                                        FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
             | 
| 39 | 
            +
            @dataclass
         | 
| 40 | 
            +
            class zero_model_state:
         | 
| 41 | 
            +
                buffers: dict()
         | 
| 42 | 
            +
                param_shapes: dict()
         | 
| 43 | 
            +
                shared_params: list
         | 
| 44 | 
            +
                ds_version: int
         | 
| 45 | 
            +
                frozen_param_shapes: dict()
         | 
| 46 | 
            +
                frozen_param_fragments: dict()
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            debug = 0
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            # load to cpu
         | 
| 52 | 
            +
            device = torch.device('cpu')
         | 
| 53 | 
            +
             | 
| 54 | 
            +
             | 
| 55 | 
            +
            def atoi(text):
         | 
| 56 | 
            +
                return int(text) if text.isdigit() else text
         | 
| 57 | 
            +
             | 
| 58 | 
            +
             | 
| 59 | 
            +
            def natural_keys(text):
         | 
| 60 | 
            +
                '''
         | 
| 61 | 
            +
                alist.sort(key=natural_keys) sorts in human order
         | 
| 62 | 
            +
                http://nedbatchelder.com/blog/200712/human_sorting.html
         | 
| 63 | 
            +
                (See Toothy's implementation in the comments)
         | 
| 64 | 
            +
                '''
         | 
| 65 | 
            +
                return [atoi(c) for c in re.split(r'(\d+)', text)]
         | 
| 66 | 
            +
             | 
| 67 | 
            +
             | 
| 68 | 
            +
            def get_model_state_file(checkpoint_dir, zero_stage):
         | 
| 69 | 
            +
                if not os.path.isdir(checkpoint_dir):
         | 
| 70 | 
            +
                    raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                # there should be only one file
         | 
| 73 | 
            +
                if zero_stage <= 2:
         | 
| 74 | 
            +
                    file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
         | 
| 75 | 
            +
                elif zero_stage == 3:
         | 
| 76 | 
            +
                    file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                if not os.path.exists(file):
         | 
| 79 | 
            +
                    raise FileNotFoundError(f"can't find model states file at '{file}'")
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                return file
         | 
| 82 | 
            +
             | 
| 83 | 
            +
             | 
| 84 | 
            +
            def get_checkpoint_files(checkpoint_dir, glob_pattern):
         | 
| 85 | 
            +
                # XXX: need to test that this simple glob rule works for multi-node setup too
         | 
| 86 | 
            +
                ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                if len(ckpt_files) == 0:
         | 
| 89 | 
            +
                    raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                return ckpt_files
         | 
| 92 | 
            +
             | 
| 93 | 
            +
             | 
| 94 | 
            +
            def get_optim_files(checkpoint_dir):
         | 
| 95 | 
            +
                return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
         | 
| 96 | 
            +
             | 
| 97 | 
            +
             | 
| 98 | 
            +
            def get_model_state_files(checkpoint_dir):
         | 
| 99 | 
            +
                return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
         | 
| 100 | 
            +
             | 
| 101 | 
            +
             | 
| 102 | 
            +
            def parse_model_states(files):
         | 
| 103 | 
            +
                zero_model_states = []
         | 
| 104 | 
            +
                for file in files:
         | 
| 105 | 
            +
                    state_dict = torch.load(file, map_location=device, weights_only=False)
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    if BUFFER_NAMES not in state_dict:
         | 
| 108 | 
            +
                        raise ValueError(f"{file} is not a model state checkpoint")
         | 
| 109 | 
            +
                    buffer_names = state_dict[BUFFER_NAMES]
         | 
| 110 | 
            +
                    if debug:
         | 
| 111 | 
            +
                        print("Found buffers:", buffer_names)
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                    # recover just the buffers while restoring them to fp32 if they were saved in fp16
         | 
| 114 | 
            +
                    buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
         | 
| 115 | 
            +
                    param_shapes = state_dict[PARAM_SHAPES]
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                    # collect parameters that are included in param_shapes
         | 
| 118 | 
            +
                    param_names = []
         | 
| 119 | 
            +
                    for s in param_shapes:
         | 
| 120 | 
            +
                        for name in s.keys():
         | 
| 121 | 
            +
                            param_names.append(name)
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                    # update with frozen parameters
         | 
| 124 | 
            +
                    frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
         | 
| 125 | 
            +
                    if frozen_param_shapes is not None:
         | 
| 126 | 
            +
                        if debug:
         | 
| 127 | 
            +
                            print(f"Found frozen_param_shapes: {frozen_param_shapes}")
         | 
| 128 | 
            +
                        param_names += list(frozen_param_shapes.keys())
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                    # handle shared params
         | 
| 131 | 
            +
                    shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                    ds_version = state_dict.get(DS_VERSION, None)
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                    z_model_state = zero_model_state(buffers=buffers,
         | 
| 138 | 
            +
                                                     param_shapes=param_shapes,
         | 
| 139 | 
            +
                                                     shared_params=shared_params,
         | 
| 140 | 
            +
                                                     ds_version=ds_version,
         | 
| 141 | 
            +
                                                     frozen_param_shapes=frozen_param_shapes,
         | 
| 142 | 
            +
                                                     frozen_param_fragments=frozen_param_fragments)
         | 
| 143 | 
            +
                    zero_model_states.append(z_model_state)
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                return zero_model_states
         | 
| 146 | 
            +
             | 
| 147 | 
            +
             | 
| 148 | 
            +
            def parse_optim_states(files, ds_checkpoint_dir):
         | 
| 149 | 
            +
                total_files = len(files)
         | 
| 150 | 
            +
                state_dicts = []
         | 
| 151 | 
            +
                for f in tqdm(files, desc='Loading checkpoint shards'):
         | 
| 152 | 
            +
                    state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False)
         | 
| 153 | 
            +
                    # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
         | 
| 154 | 
            +
                    # and also handle the case where it was already removed by another helper script
         | 
| 155 | 
            +
                    state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
         | 
| 156 | 
            +
                    state_dicts.append(state_dict)
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
         | 
| 159 | 
            +
                    raise ValueError(f"{files[0]} is not a zero checkpoint")
         | 
| 160 | 
            +
                zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
         | 
| 161 | 
            +
                world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
         | 
| 162 | 
            +
             | 
| 163 | 
            +
                # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
         | 
| 164 | 
            +
                # parameters can be different from data parallelism for non-expert parameters. So we can just
         | 
| 165 | 
            +
                # use the max of the partition_count to get the dp world_size.
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                if type(world_size) is list:
         | 
| 168 | 
            +
                    world_size = max(world_size)
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                if world_size != total_files:
         | 
| 171 | 
            +
                    raise ValueError(
         | 
| 172 | 
            +
                        f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
         | 
| 173 | 
            +
                        "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
         | 
| 174 | 
            +
                    )
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                # the groups are named differently in each stage
         | 
| 177 | 
            +
                if zero_stage <= 2:
         | 
| 178 | 
            +
                    fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
         | 
| 179 | 
            +
                elif zero_stage == 3:
         | 
| 180 | 
            +
                    fp32_groups_key = FP32_FLAT_GROUPS
         | 
| 181 | 
            +
                else:
         | 
| 182 | 
            +
                    raise ValueError(f"unknown zero stage {zero_stage}")
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
         | 
| 185 | 
            +
                return zero_stage, world_size, fp32_flat_groups
         | 
| 186 | 
            +
             | 
| 187 | 
            +
             | 
| 188 | 
            +
            def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
         | 
| 189 | 
            +
                """
         | 
| 190 | 
            +
                Returns fp32 state_dict reconstructed from ds checkpoint
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                Args:
         | 
| 193 | 
            +
                    - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                """
         | 
| 196 | 
            +
                print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                optim_files = get_optim_files(ds_checkpoint_dir)
         | 
| 199 | 
            +
                zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
         | 
| 200 | 
            +
                print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                model_files = get_model_state_files(ds_checkpoint_dir)
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                zero_model_states = parse_model_states(model_files)
         | 
| 205 | 
            +
                print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                if zero_stage <= 2:
         | 
| 208 | 
            +
                    return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
         | 
| 209 | 
            +
                                                                      exclude_frozen_parameters)
         | 
| 210 | 
            +
                elif zero_stage == 3:
         | 
| 211 | 
            +
                    return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
         | 
| 212 | 
            +
                                                                      exclude_frozen_parameters)
         | 
| 213 | 
            +
             | 
| 214 | 
            +
             | 
| 215 | 
            +
            def _zero2_merge_frozen_params(state_dict, zero_model_states):
         | 
| 216 | 
            +
                if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
         | 
| 217 | 
            +
                    return
         | 
| 218 | 
            +
             | 
| 219 | 
            +
                frozen_param_shapes = zero_model_states[0].frozen_param_shapes
         | 
| 220 | 
            +
                frozen_param_fragments = zero_model_states[0].frozen_param_fragments
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                if debug:
         | 
| 223 | 
            +
                    num_elem = sum(s.numel() for s in frozen_param_shapes.values())
         | 
| 224 | 
            +
                    print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                    wanted_params = len(frozen_param_shapes)
         | 
| 227 | 
            +
                    wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
         | 
| 228 | 
            +
                    avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
         | 
| 229 | 
            +
                    print(f'Frozen params: Have {avail_numel} numels to process.')
         | 
| 230 | 
            +
                    print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                total_params = 0
         | 
| 233 | 
            +
                total_numel = 0
         | 
| 234 | 
            +
                for name, shape in frozen_param_shapes.items():
         | 
| 235 | 
            +
                    total_params += 1
         | 
| 236 | 
            +
                    unpartitioned_numel = shape.numel()
         | 
| 237 | 
            +
                    total_numel += unpartitioned_numel
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                    state_dict[name] = frozen_param_fragments[name]
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                    if debug:
         | 
| 242 | 
            +
                        print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
         | 
| 245 | 
            +
             | 
| 246 | 
            +
             | 
| 247 | 
            +
            def _has_callable(obj, fn):
         | 
| 248 | 
            +
                attr = getattr(obj, fn, None)
         | 
| 249 | 
            +
                return callable(attr)
         | 
| 250 | 
            +
             | 
| 251 | 
            +
             | 
| 252 | 
            +
            def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
         | 
| 253 | 
            +
                param_shapes = zero_model_states[0].param_shapes
         | 
| 254 | 
            +
             | 
| 255 | 
            +
                # Reconstruction protocol:
         | 
| 256 | 
            +
                #
         | 
| 257 | 
            +
                # XXX: document this
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                if debug:
         | 
| 260 | 
            +
                    for i in range(world_size):
         | 
| 261 | 
            +
                        for j in range(len(fp32_flat_groups[0])):
         | 
| 262 | 
            +
                            print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
         | 
| 263 | 
            +
             | 
| 264 | 
            +
                # XXX: memory usage doubles here (zero2)
         | 
| 265 | 
            +
                num_param_groups = len(fp32_flat_groups[0])
         | 
| 266 | 
            +
                merged_single_partition_of_fp32_groups = []
         | 
| 267 | 
            +
                for i in range(num_param_groups):
         | 
| 268 | 
            +
                    merged_partitions = [sd[i] for sd in fp32_flat_groups]
         | 
| 269 | 
            +
                    full_single_fp32_vector = torch.cat(merged_partitions, 0)
         | 
| 270 | 
            +
                    merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
         | 
| 271 | 
            +
                avail_numel = sum(
         | 
| 272 | 
            +
                    [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                if debug:
         | 
| 275 | 
            +
                    wanted_params = sum([len(shapes) for shapes in param_shapes])
         | 
| 276 | 
            +
                    wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
         | 
| 277 | 
            +
                    # not asserting if there is a mismatch due to possible padding
         | 
| 278 | 
            +
                    print(f"Have {avail_numel} numels to process.")
         | 
| 279 | 
            +
                    print(f"Need {wanted_numel} numels in {wanted_params} params.")
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                # params
         | 
| 282 | 
            +
                # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
         | 
| 283 | 
            +
                # out-of-core computing solution
         | 
| 284 | 
            +
                total_numel = 0
         | 
| 285 | 
            +
                total_params = 0
         | 
| 286 | 
            +
                for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
         | 
| 287 | 
            +
                    offset = 0
         | 
| 288 | 
            +
                    avail_numel = full_single_fp32_vector.numel()
         | 
| 289 | 
            +
                    for name, shape in shapes.items():
         | 
| 290 | 
            +
             | 
| 291 | 
            +
                        unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
         | 
| 292 | 
            +
                        total_numel += unpartitioned_numel
         | 
| 293 | 
            +
                        total_params += 1
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                        if debug:
         | 
| 296 | 
            +
                            print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
         | 
| 297 | 
            +
                        state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
         | 
| 298 | 
            +
                        offset += unpartitioned_numel
         | 
| 299 | 
            +
             | 
| 300 | 
            +
                    # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
         | 
| 301 | 
            +
                    # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
         | 
| 302 | 
            +
                    # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
         | 
| 303 | 
            +
                    # live optimizer object, so we are checking that the numbers are within the right range
         | 
| 304 | 
            +
                    align_to = 2 * world_size
         | 
| 305 | 
            +
             | 
| 306 | 
            +
                    def zero2_align(x):
         | 
| 307 | 
            +
                        return align_to * math.ceil(x / align_to)
         | 
| 308 | 
            +
             | 
| 309 | 
            +
                    if debug:
         | 
| 310 | 
            +
                        print(f"original offset={offset}, avail_numel={avail_numel}")
         | 
| 311 | 
            +
             | 
| 312 | 
            +
                    offset = zero2_align(offset)
         | 
| 313 | 
            +
                    avail_numel = zero2_align(avail_numel)
         | 
| 314 | 
            +
             | 
| 315 | 
            +
                    if debug:
         | 
| 316 | 
            +
                        print(f"aligned  offset={offset}, avail_numel={avail_numel}")
         | 
| 317 | 
            +
             | 
| 318 | 
            +
                    # Sanity check
         | 
| 319 | 
            +
                    if offset != avail_numel:
         | 
| 320 | 
            +
                        raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
         | 
| 321 | 
            +
             | 
| 322 | 
            +
                print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
         | 
| 323 | 
            +
             | 
| 324 | 
            +
             | 
| 325 | 
            +
            def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
         | 
| 326 | 
            +
                                                           exclude_frozen_parameters):
         | 
| 327 | 
            +
                state_dict = OrderedDict()
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                # buffers
         | 
| 330 | 
            +
                buffers = zero_model_states[0].buffers
         | 
| 331 | 
            +
                state_dict.update(buffers)
         | 
| 332 | 
            +
                if debug:
         | 
| 333 | 
            +
                    print(f"added {len(buffers)} buffers")
         | 
| 334 | 
            +
             | 
| 335 | 
            +
                if not exclude_frozen_parameters:
         | 
| 336 | 
            +
                    _zero2_merge_frozen_params(state_dict, zero_model_states)
         | 
| 337 | 
            +
             | 
| 338 | 
            +
                _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
         | 
| 339 | 
            +
             | 
| 340 | 
            +
                # recover shared parameters
         | 
| 341 | 
            +
                for pair in zero_model_states[0].shared_params:
         | 
| 342 | 
            +
                    if pair[1] in state_dict:
         | 
| 343 | 
            +
                        state_dict[pair[0]] = state_dict[pair[1]]
         | 
| 344 | 
            +
             | 
| 345 | 
            +
                return state_dict
         | 
| 346 | 
            +
             | 
| 347 | 
            +
             | 
| 348 | 
            +
            def zero3_partitioned_param_info(unpartitioned_numel, world_size):
         | 
| 349 | 
            +
                remainder = unpartitioned_numel % world_size
         | 
| 350 | 
            +
                padding_numel = (world_size - remainder) if remainder else 0
         | 
| 351 | 
            +
                partitioned_numel = math.ceil(unpartitioned_numel / world_size)
         | 
| 352 | 
            +
                return partitioned_numel, padding_numel
         | 
| 353 | 
            +
             | 
| 354 | 
            +
             | 
| 355 | 
            +
            def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
         | 
| 356 | 
            +
                if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
         | 
| 357 | 
            +
                    return
         | 
| 358 | 
            +
             | 
| 359 | 
            +
                if debug:
         | 
| 360 | 
            +
                    for i in range(world_size):
         | 
| 361 | 
            +
                        num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
         | 
| 362 | 
            +
                        print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
         | 
| 363 | 
            +
             | 
| 364 | 
            +
                    frozen_param_shapes = zero_model_states[0].frozen_param_shapes
         | 
| 365 | 
            +
                    wanted_params = len(frozen_param_shapes)
         | 
| 366 | 
            +
                    wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
         | 
| 367 | 
            +
                    avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
         | 
| 368 | 
            +
                    print(f'Frozen params: Have {avail_numel} numels to process.')
         | 
| 369 | 
            +
                    print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
         | 
| 370 | 
            +
             | 
| 371 | 
            +
                total_params = 0
         | 
| 372 | 
            +
                total_numel = 0
         | 
| 373 | 
            +
                for name, shape in zero_model_states[0].frozen_param_shapes.items():
         | 
| 374 | 
            +
                    total_params += 1
         | 
| 375 | 
            +
                    unpartitioned_numel = shape.numel()
         | 
| 376 | 
            +
                    total_numel += unpartitioned_numel
         | 
| 377 | 
            +
             | 
| 378 | 
            +
                    param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
         | 
| 379 | 
            +
                    state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
         | 
| 380 | 
            +
             | 
| 381 | 
            +
                    partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                    if debug:
         | 
| 384 | 
            +
                        print(
         | 
| 385 | 
            +
                            f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
         | 
| 386 | 
            +
                        )
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
         | 
| 389 | 
            +
             | 
| 390 | 
            +
             | 
| 391 | 
            +
            class GatheredTensor:
         | 
| 392 | 
            +
                """
         | 
| 393 | 
            +
                A pseudo tensor that collects partitioned weights.
         | 
| 394 | 
            +
                It is more memory efficient when there are multiple groups.
         | 
| 395 | 
            +
                """
         | 
| 396 | 
            +
             | 
| 397 | 
            +
                def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape):
         | 
| 398 | 
            +
                    self.flat_groups = flat_groups
         | 
| 399 | 
            +
                    self.flat_groups_offset = flat_groups_offset
         | 
| 400 | 
            +
                    self.offset = offset
         | 
| 401 | 
            +
                    self.partitioned_numel = partitioned_numel
         | 
| 402 | 
            +
                    self.shape = shape
         | 
| 403 | 
            +
                    self.dtype = self.flat_groups[0][0].dtype
         | 
| 404 | 
            +
             | 
| 405 | 
            +
                def contiguous(self):
         | 
| 406 | 
            +
                    """
         | 
| 407 | 
            +
                    Merge partitioned weights from flat_groups into a single tensor.
         | 
| 408 | 
            +
                    """
         | 
| 409 | 
            +
                    end_idx = self.offset + self.partitioned_numel
         | 
| 410 | 
            +
                    world_size = len(self.flat_groups)
         | 
| 411 | 
            +
                    pad_flat_param_chunks = []
         | 
| 412 | 
            +
             | 
| 413 | 
            +
                    for rank_i in range(world_size):
         | 
| 414 | 
            +
                        # for each rank, we need to collect weights from related group/groups
         | 
| 415 | 
            +
                        flat_groups_at_rank_i = self.flat_groups[rank_i]
         | 
| 416 | 
            +
                        start_group_id = None
         | 
| 417 | 
            +
                        end_group_id = None
         | 
| 418 | 
            +
                        for group_id in range(len(self.flat_groups_offset)):
         | 
| 419 | 
            +
                            if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]:
         | 
| 420 | 
            +
                                start_group_id = group_id
         | 
| 421 | 
            +
                            if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]:
         | 
| 422 | 
            +
                                end_group_id = group_id
         | 
| 423 | 
            +
                                break
         | 
| 424 | 
            +
                        # collect weights from related group/groups
         | 
| 425 | 
            +
                        for group_id in range(start_group_id, end_group_id + 1):
         | 
| 426 | 
            +
                            flat_tensor = flat_groups_at_rank_i[group_id]
         | 
| 427 | 
            +
                            start_offset = self.offset - self.flat_groups_offset[group_id]
         | 
| 428 | 
            +
                            end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id]
         | 
| 429 | 
            +
                            pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset])
         | 
| 430 | 
            +
             | 
| 431 | 
            +
                    # collect weights from all ranks
         | 
| 432 | 
            +
                    pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0)
         | 
| 433 | 
            +
                    param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous()
         | 
| 434 | 
            +
                    return param
         | 
| 435 | 
            +
             | 
| 436 | 
            +
             | 
| 437 | 
            +
            def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
         | 
| 438 | 
            +
                param_shapes = zero_model_states[0].param_shapes
         | 
| 439 | 
            +
                avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size
         | 
| 440 | 
            +
             | 
| 441 | 
            +
                # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
         | 
| 442 | 
            +
                # param, re-consolidating each param, while dealing with padding if any
         | 
| 443 | 
            +
             | 
| 444 | 
            +
                # merge list of dicts, preserving order
         | 
| 445 | 
            +
                param_shapes = {k: v for d in param_shapes for k, v in d.items()}
         | 
| 446 | 
            +
             | 
| 447 | 
            +
                if debug:
         | 
| 448 | 
            +
                    for i in range(world_size):
         | 
| 449 | 
            +
                        print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
         | 
| 450 | 
            +
             | 
| 451 | 
            +
                    wanted_params = len(param_shapes)
         | 
| 452 | 
            +
                    wanted_numel = sum(shape.numel() for shape in param_shapes.values())
         | 
| 453 | 
            +
                    # not asserting if there is a mismatch due to possible padding
         | 
| 454 | 
            +
                    avail_numel = fp32_flat_groups[0].numel() * world_size
         | 
| 455 | 
            +
                    print(f"Trainable params: Have {avail_numel} numels to process.")
         | 
| 456 | 
            +
                    print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
         | 
| 457 | 
            +
             | 
| 458 | 
            +
                # params
         | 
| 459 | 
            +
                # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
         | 
| 460 | 
            +
                # out-of-core computing solution
         | 
| 461 | 
            +
                offset = 0
         | 
| 462 | 
            +
                total_numel = 0
         | 
| 463 | 
            +
                total_params = 0
         | 
| 464 | 
            +
                flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]]))
         | 
| 465 | 
            +
                for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'):
         | 
| 466 | 
            +
                    unpartitioned_numel = shape.numel()
         | 
| 467 | 
            +
                    total_numel += unpartitioned_numel
         | 
| 468 | 
            +
                    total_params += 1
         | 
| 469 | 
            +
                    partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
         | 
| 470 | 
            +
             | 
| 471 | 
            +
                    if debug:
         | 
| 472 | 
            +
                        print(
         | 
| 473 | 
            +
                            f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
         | 
| 474 | 
            +
                        )
         | 
| 475 | 
            +
             | 
| 476 | 
            +
                    # memory efficient tensor
         | 
| 477 | 
            +
                    tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape)
         | 
| 478 | 
            +
                    state_dict[name] = tensor
         | 
| 479 | 
            +
                    offset += partitioned_numel
         | 
| 480 | 
            +
             | 
| 481 | 
            +
                offset *= world_size
         | 
| 482 | 
            +
             | 
| 483 | 
            +
                # Sanity check
         | 
| 484 | 
            +
                if offset != avail_numel:
         | 
| 485 | 
            +
                    raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
         | 
| 486 | 
            +
             | 
| 487 | 
            +
                print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
         | 
| 488 | 
            +
             | 
| 489 | 
            +
             | 
| 490 | 
            +
            def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
         | 
| 491 | 
            +
                                                           exclude_frozen_parameters):
         | 
| 492 | 
            +
                state_dict = OrderedDict()
         | 
| 493 | 
            +
             | 
| 494 | 
            +
                # buffers
         | 
| 495 | 
            +
                buffers = zero_model_states[0].buffers
         | 
| 496 | 
            +
                state_dict.update(buffers)
         | 
| 497 | 
            +
                if debug:
         | 
| 498 | 
            +
                    print(f"added {len(buffers)} buffers")
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                if not exclude_frozen_parameters:
         | 
| 501 | 
            +
                    _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
         | 
| 502 | 
            +
             | 
| 503 | 
            +
                _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
         | 
| 504 | 
            +
             | 
| 505 | 
            +
                # recover shared parameters
         | 
| 506 | 
            +
                for pair in zero_model_states[0].shared_params:
         | 
| 507 | 
            +
                    if pair[1] in state_dict:
         | 
| 508 | 
            +
                        state_dict[pair[0]] = state_dict[pair[1]]
         | 
| 509 | 
            +
             | 
| 510 | 
            +
                return state_dict
         | 
| 511 | 
            +
             | 
| 512 | 
            +
             | 
| 513 | 
            +
            def to_torch_tensor(state_dict, return_empty_tensor=False):
         | 
| 514 | 
            +
                """
         | 
| 515 | 
            +
                Convert state_dict of GatheredTensor to torch tensor
         | 
| 516 | 
            +
                """
         | 
| 517 | 
            +
                torch_state_dict = {}
         | 
| 518 | 
            +
                converted_tensors = {}
         | 
| 519 | 
            +
                for name, tensor in state_dict.items():
         | 
| 520 | 
            +
                    tensor_id = id(tensor)
         | 
| 521 | 
            +
                    if tensor_id in converted_tensors:  # shared tensors
         | 
| 522 | 
            +
                        shared_tensor = torch_state_dict[converted_tensors[tensor_id]]
         | 
| 523 | 
            +
                        torch_state_dict[name] = shared_tensor
         | 
| 524 | 
            +
                    else:
         | 
| 525 | 
            +
                        converted_tensors[tensor_id] = name
         | 
| 526 | 
            +
                        if return_empty_tensor:
         | 
| 527 | 
            +
                            torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype)
         | 
| 528 | 
            +
                        else:
         | 
| 529 | 
            +
                            torch_state_dict[name] = tensor.contiguous()
         | 
| 530 | 
            +
                return torch_state_dict
         | 
| 531 | 
            +
             | 
| 532 | 
            +
             | 
| 533 | 
            +
            def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir,
         | 
| 534 | 
            +
                                                         tag=None,
         | 
| 535 | 
            +
                                                         exclude_frozen_parameters=False,
         | 
| 536 | 
            +
                                                         lazy_mode=False):
         | 
| 537 | 
            +
                """
         | 
| 538 | 
            +
                Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
         | 
| 539 | 
            +
                ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
         | 
| 540 | 
            +
                via a model hub.
         | 
| 541 | 
            +
             | 
| 542 | 
            +
                Args:
         | 
| 543 | 
            +
                    - ``checkpoint_dir``: path to the desired checkpoint folder
         | 
| 544 | 
            +
                    - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
         | 
| 545 | 
            +
                    - ``exclude_frozen_parameters``: exclude frozen parameters
         | 
| 546 | 
            +
                    - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient.
         | 
| 547 | 
            +
                      Convert the pesduo tensor to torch tensor by ``.contiguous()``
         | 
| 548 | 
            +
             | 
| 549 | 
            +
                Returns:
         | 
| 550 | 
            +
                    - pytorch ``state_dict``
         | 
| 551 | 
            +
             | 
| 552 | 
            +
                A typical usage might be ::
         | 
| 553 | 
            +
             | 
| 554 | 
            +
                    from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
         | 
| 555 | 
            +
                    # do the training and checkpoint saving
         | 
| 556 | 
            +
                    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
         | 
| 557 | 
            +
                    model = model.cpu() # move to cpu
         | 
| 558 | 
            +
                    model.load_state_dict(state_dict)
         | 
| 559 | 
            +
                    # submit to model hub or save the model to share with others
         | 
| 560 | 
            +
             | 
| 561 | 
            +
                In this example the ``model`` will no longer be usable in the deepspeed context of the same
         | 
| 562 | 
            +
                application. i.e. you will need to re-initialize the deepspeed engine, since
         | 
| 563 | 
            +
                ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
         | 
| 564 | 
            +
             | 
| 565 | 
            +
                If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
         | 
| 566 | 
            +
             | 
| 567 | 
            +
                Note: the above usage may not work if your application doesn't have sufficient free CPU memory.
         | 
| 568 | 
            +
                You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
         | 
| 569 | 
            +
                the checkpoint. Or you can load state_dict in lazy mode ::
         | 
| 570 | 
            +
             | 
| 571 | 
            +
                    from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
         | 
| 572 | 
            +
                    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu
         | 
| 573 | 
            +
                    for name, lazy_tensor in state_dict.item():
         | 
| 574 | 
            +
                        tensor = lazy_tensor.contiguous()  # to cpu
         | 
| 575 | 
            +
                        print(name, tensor)
         | 
| 576 | 
            +
                        # del tensor to release memory if it no longer in use
         | 
| 577 | 
            +
                """
         | 
| 578 | 
            +
                if tag is None:
         | 
| 579 | 
            +
                    latest_path = os.path.join(checkpoint_dir, 'latest')
         | 
| 580 | 
            +
                    if os.path.isfile(latest_path):
         | 
| 581 | 
            +
                        with open(latest_path, 'r') as fd:
         | 
| 582 | 
            +
                            tag = fd.read().strip()
         | 
| 583 | 
            +
                    else:
         | 
| 584 | 
            +
                        raise ValueError(f"Unable to find 'latest' file at {latest_path}")
         | 
| 585 | 
            +
             | 
| 586 | 
            +
                ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
         | 
| 587 | 
            +
             | 
| 588 | 
            +
                if not os.path.isdir(ds_checkpoint_dir):
         | 
| 589 | 
            +
                    raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
         | 
| 590 | 
            +
             | 
| 591 | 
            +
                state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
         | 
| 592 | 
            +
                if lazy_mode:
         | 
| 593 | 
            +
                    return state_dict
         | 
| 594 | 
            +
                else:
         | 
| 595 | 
            +
                    return to_torch_tensor(state_dict)
         | 
| 596 | 
            +
             | 
| 597 | 
            +
             | 
| 598 | 
            +
            def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir,
         | 
| 599 | 
            +
                                                           output_dir,
         | 
| 600 | 
            +
                                                           max_shard_size="5GB",
         | 
| 601 | 
            +
                                                           safe_serialization=False,
         | 
| 602 | 
            +
                                                           tag=None,
         | 
| 603 | 
            +
                                                           exclude_frozen_parameters=False):
         | 
| 604 | 
            +
                """
         | 
| 605 | 
            +
                Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
         | 
| 606 | 
            +
                loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
         | 
| 607 | 
            +
             | 
| 608 | 
            +
                Args:
         | 
| 609 | 
            +
                    - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
         | 
| 610 | 
            +
                    - ``output_dir``: directory to the pytorch fp32 state_dict output files
         | 
| 611 | 
            +
                    - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB
         | 
| 612 | 
            +
                    - ``safe_serialization``:  whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
         | 
| 613 | 
            +
                    - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
         | 
| 614 | 
            +
                    - ``exclude_frozen_parameters``: exclude frozen parameters
         | 
| 615 | 
            +
                """
         | 
| 616 | 
            +
             | 
| 617 | 
            +
                # Dependency pre-check
         | 
| 618 | 
            +
                if safe_serialization:
         | 
| 619 | 
            +
                    try:
         | 
| 620 | 
            +
                        from safetensors.torch import save_file
         | 
| 621 | 
            +
                    except ImportError:
         | 
| 622 | 
            +
                        print('If you want to use `safe_serialization`, please `pip install safetensors`')
         | 
| 623 | 
            +
                        raise
         | 
| 624 | 
            +
                if max_shard_size is not None:
         | 
| 625 | 
            +
                    try:
         | 
| 626 | 
            +
                        from huggingface_hub import split_torch_state_dict_into_shards
         | 
| 627 | 
            +
                    except ImportError:
         | 
| 628 | 
            +
                        print('If you want to use `max_shard_size`, please `pip install huggingface_hub`')
         | 
| 629 | 
            +
                        raise
         | 
| 630 | 
            +
             | 
| 631 | 
            +
                # Convert zero checkpoint to state_dict
         | 
| 632 | 
            +
                state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir,
         | 
| 633 | 
            +
                                                                      tag,
         | 
| 634 | 
            +
                                                                      exclude_frozen_parameters,
         | 
| 635 | 
            +
                                                                      lazy_mode=True)
         | 
| 636 | 
            +
             | 
| 637 | 
            +
                # Shard the model if it is too big.
         | 
| 638 | 
            +
                weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin"
         | 
| 639 | 
            +
                if max_shard_size is not None:
         | 
| 640 | 
            +
                    filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
         | 
| 641 | 
            +
                    # an memory-efficient approach for sharding
         | 
| 642 | 
            +
                    empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True)
         | 
| 643 | 
            +
                    state_dict_split = split_torch_state_dict_into_shards(empty_state_dict,
         | 
| 644 | 
            +
                                                                          filename_pattern=filename_pattern,
         | 
| 645 | 
            +
                                                                          max_shard_size=max_shard_size)
         | 
| 646 | 
            +
                else:
         | 
| 647 | 
            +
                    from collections import namedtuple
         | 
| 648 | 
            +
                    StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"])
         | 
| 649 | 
            +
                    state_dict_split = StateDictSplit(is_sharded=False,
         | 
| 650 | 
            +
                                                      filename_to_tensors={weights_name: list(state_dict.keys())})
         | 
| 651 | 
            +
             | 
| 652 | 
            +
                # Save the model by shard
         | 
| 653 | 
            +
                os.makedirs(output_dir, exist_ok=True)
         | 
| 654 | 
            +
                filename_to_tensors = state_dict_split.filename_to_tensors.items()
         | 
| 655 | 
            +
                for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"):
         | 
| 656 | 
            +
                    shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors}
         | 
| 657 | 
            +
                    shard_state_dict = to_torch_tensor(shard_state_dict)
         | 
| 658 | 
            +
                    output_path = os.path.join(output_dir, shard_file)
         | 
| 659 | 
            +
                    if safe_serialization:
         | 
| 660 | 
            +
                        save_file(shard_state_dict, output_path, metadata={"format": "pt"})
         | 
| 661 | 
            +
                    else:
         | 
| 662 | 
            +
                        torch.save(shard_state_dict, output_path)
         | 
| 663 | 
            +
                    # release the memory of current shard
         | 
| 664 | 
            +
                    for tensor_name in list(shard_state_dict.keys()):
         | 
| 665 | 
            +
                        del state_dict[tensor_name]
         | 
| 666 | 
            +
                        del shard_state_dict[tensor_name]
         | 
| 667 | 
            +
                    del shard_state_dict
         | 
| 668 | 
            +
                    gc.collect()
         | 
| 669 | 
            +
             | 
| 670 | 
            +
                # Save index if sharded
         | 
| 671 | 
            +
                if state_dict_split.is_sharded:
         | 
| 672 | 
            +
                    index = {
         | 
| 673 | 
            +
                        "metadata": state_dict_split.metadata,
         | 
| 674 | 
            +
                        "weight_map": state_dict_split.tensor_to_filename,
         | 
| 675 | 
            +
                    }
         | 
| 676 | 
            +
                    save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json"
         | 
| 677 | 
            +
                    save_index_file = os.path.join(output_dir, save_index_file)
         | 
| 678 | 
            +
                    with open(save_index_file, "w", encoding="utf-8") as f:
         | 
| 679 | 
            +
                        content = json.dumps(index, indent=2, sort_keys=True) + "\n"
         | 
| 680 | 
            +
                        f.write(content)
         | 
| 681 | 
            +
             | 
| 682 | 
            +
             | 
| 683 | 
            +
            def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
         | 
| 684 | 
            +
                """
         | 
| 685 | 
            +
                1. Put the provided model to cpu
         | 
| 686 | 
            +
                2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
         | 
| 687 | 
            +
                3. Load it into the provided model
         | 
| 688 | 
            +
             | 
| 689 | 
            +
                Args:
         | 
| 690 | 
            +
                    - ``model``: the model object to update
         | 
| 691 | 
            +
                    - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
         | 
| 692 | 
            +
                    - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
         | 
| 693 | 
            +
             | 
| 694 | 
            +
                Returns:
         | 
| 695 | 
            +
                    - ``model`: modified model
         | 
| 696 | 
            +
             | 
| 697 | 
            +
                Make sure you have plenty of CPU memory available before you call this function. If you don't
         | 
| 698 | 
            +
                have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
         | 
| 699 | 
            +
                conveniently placed for you in the checkpoint folder.
         | 
| 700 | 
            +
             | 
| 701 | 
            +
                A typical usage might be ::
         | 
| 702 | 
            +
             | 
| 703 | 
            +
                    from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
         | 
| 704 | 
            +
                    model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
         | 
| 705 | 
            +
                    # submit to model hub or save the model to share with others
         | 
| 706 | 
            +
             | 
| 707 | 
            +
                Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
         | 
| 708 | 
            +
                of the same application. i.e. you will need to re-initialize the deepspeed engine, since
         | 
| 709 | 
            +
                ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
         | 
| 710 | 
            +
             | 
| 711 | 
            +
                """
         | 
| 712 | 
            +
                logger.info(f"Extracting fp32 weights")
         | 
| 713 | 
            +
                state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
         | 
| 714 | 
            +
             | 
| 715 | 
            +
                logger.info(f"Overwriting model with fp32 weights")
         | 
| 716 | 
            +
                model = model.cpu()
         | 
| 717 | 
            +
                model.load_state_dict(state_dict, strict=False)
         | 
| 718 | 
            +
             | 
| 719 | 
            +
                return model
         | 
| 720 | 
            +
             | 
| 721 | 
            +
             | 
| 722 | 
            +
            if __name__ == "__main__":
         | 
| 723 | 
            +
                parser = argparse.ArgumentParser()
         | 
| 724 | 
            +
                parser.add_argument("checkpoint_dir",
         | 
| 725 | 
            +
                                    type=str,
         | 
| 726 | 
            +
                                    help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
         | 
| 727 | 
            +
                parser.add_argument("output_dir",
         | 
| 728 | 
            +
                                    type=str,
         | 
| 729 | 
            +
                                    help="directory to the pytorch fp32 state_dict output files"
         | 
| 730 | 
            +
                                    "(e.g. path/checkpoint-12-output/)")
         | 
| 731 | 
            +
                parser.add_argument(
         | 
| 732 | 
            +
                    "--max_shard_size",
         | 
| 733 | 
            +
                    type=str,
         | 
| 734 | 
            +
                    default="5GB",
         | 
| 735 | 
            +
                    help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size"
         | 
| 736 | 
            +
                    "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`"
         | 
| 737 | 
            +
                    "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances"
         | 
| 738 | 
            +
                    "without CPU OOM issues.")
         | 
| 739 | 
            +
                parser.add_argument(
         | 
| 740 | 
            +
                    "--safe_serialization",
         | 
| 741 | 
            +
                    default=False,
         | 
| 742 | 
            +
                    action='store_true',
         | 
| 743 | 
            +
                    help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).")
         | 
| 744 | 
            +
                parser.add_argument("-t",
         | 
| 745 | 
            +
                                    "--tag",
         | 
| 746 | 
            +
                                    type=str,
         | 
| 747 | 
            +
                                    default=None,
         | 
| 748 | 
            +
                                    help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
         | 
| 749 | 
            +
                parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
         | 
| 750 | 
            +
                parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
         | 
| 751 | 
            +
                args = parser.parse_args()
         | 
| 752 | 
            +
             | 
| 753 | 
            +
                debug = args.debug
         | 
| 754 | 
            +
             | 
| 755 | 
            +
                convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
         | 
| 756 | 
            +
                                                           args.output_dir,
         | 
| 757 | 
            +
                                                           max_shard_size=args.max_shard_size,
         | 
| 758 | 
            +
                                                           safe_serialization=args.safe_serialization,
         | 
| 759 | 
            +
                                                           tag=args.tag,
         | 
| 760 | 
            +
                                                           exclude_frozen_parameters=args.exclude_frozen_parameters)
         | 
