ankandrew commited on
Commit
72e690a
·
verified ·
1 Parent(s): 6b1d813

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +119 -1
README.md CHANGED
@@ -4,4 +4,122 @@ base_model:
4
  library_name: transformers
5
  ---
6
 
7
- Quantized with GPTQModel 4.0.0 dev.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  library_name: transformers
5
  ---
6
 
7
+ Quantized with GPTQModel 4.0.0 dev with the following code:
8
+
9
+ <details>
10
+ <summary>quantization code</summary>
11
+
12
+
13
+ ```python
14
+ import base64
15
+ from io import BytesIO
16
+ from random import seed, shuffle
17
+
18
+ from datasets import concatenate_datasets, load_dataset
19
+ from gptqmodel import GPTQModel, QuantizeConfig
20
+ from transformers import AutoTokenizer
21
+
22
+ seed(0)
23
+
24
+ MODEL_ID = "XiaomiMiMo/MiMo-VL-7B-RL-2508"
25
+ SAVE_DIR = "MiMo-VL-7B-RL-2508-gptq-q4"
26
+
27
+ NUM_TEXT_SAMPLES = 128
28
+ NUM_IMAGE_SAMPLES = 128
29
+ MAX_TOKENS = 1024
30
+
31
+
32
+ def encode_pil_to_data_uri(pil_image) -> str:
33
+ buff = BytesIO()
34
+ pil_image.save(buff, format="PNG")
35
+ encoded = base64.b64encode(buff.getvalue()).decode("utf-8")
36
+ return f"data:image;base64,{encoded}"
37
+
38
+
39
+ def make_text_conversations(texts, tok, max_tokens=1024):
40
+ convs = []
41
+ for t in texts:
42
+ if not isinstance(t, str):
43
+ continue
44
+ tt = t.strip()
45
+ if not tt:
46
+ continue
47
+ ids = tok.encode(tt, add_special_tokens=False)[:max_tokens]
48
+ if not ids:
49
+ continue
50
+ trunc = tok.decode(ids, skip_special_tokens=True)
51
+ convs.append(
52
+ [
53
+ {
54
+ "role": "user",
55
+ "content": [{"type": "text", "text": trunc}],
56
+ }
57
+ ]
58
+ )
59
+ return convs
60
+
61
+
62
+ def make_image_conversations(hf_dataset, num_samples=64):
63
+ convs = []
64
+ for ex in hf_dataset.select(range(min(num_samples, len(hf_dataset)))):
65
+ data_uri = encode_pil_to_data_uri(ex["image"])
66
+ convs.append(
67
+ [
68
+ {
69
+ "role": "user",
70
+ "content": [
71
+ {"type": "image", "image": data_uri},
72
+ {"type": "text", "text": "What does the image show?"},
73
+ ],
74
+ }
75
+ ]
76
+ )
77
+ return convs
78
+
79
+
80
+ en_ds = load_dataset(
81
+ "allenai/c4", data_files="en/c4-train.00001-of-01024.json.gz", split="train"
82
+ ).shuffle(seed=0)
83
+ es_ds = load_dataset(
84
+ "allenai/c4", data_files="multilingual/c4-es.tfrecord-00001-of-02048.json.gz", split="train"
85
+ ).shuffle(seed=0)
86
+
87
+ texts = [x["text"] for x in concatenate_datasets([en_ds, es_ds])]
88
+ texts = [t for t in texts if isinstance(t, str) and t.strip()]
89
+ shuffle(texts)
90
+ texts = texts[:NUM_TEXT_SAMPLES]
91
+
92
+ tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
93
+ text_conversations = make_text_conversations(texts, tok, max_tokens=MAX_TOKENS)
94
+
95
+ img_ds = load_dataset("lmms-lab/flickr30k", split="test[:512]").shuffle(seed=42)
96
+ image_conversations = make_image_conversations(img_ds, num_samples=NUM_IMAGE_SAMPLES)
97
+
98
+ calibration_conversations = text_conversations + image_conversations
99
+ shuffle(calibration_conversations)
100
+
101
+ print(
102
+ f"Prepared {len(text_conversations)} text-only and "
103
+ f"{len(image_conversations)} image+text conversations "
104
+ f"(total {len(calibration_conversations)})."
105
+ )
106
+
107
+ qconf = QuantizeConfig(
108
+ bits=4,
109
+ group_size=128,
110
+ device="cuda:0",
111
+ v2=False, # v2 is giving much worse results
112
+ )
113
+
114
+ model = GPTQModel.load(MODEL_ID, qconf)
115
+
116
+ model.quantize(
117
+ calibration_conversations,
118
+ batch_size=1,
119
+ )
120
+
121
+ model.save(SAVE_DIR)
122
+ print(f"Saved quantized model to: {SAVE_DIR}")
123
+ ```
124
+
125
+ </details>