--- base_model: - XiaomiMiMo/MiMo-VL-7B-RL-2508 library_name: transformers --- Quantized with GPTQModel 4.0.0 dev with the following code:
quantization code ```python import base64 from io import BytesIO from random import seed, shuffle from datasets import concatenate_datasets, load_dataset from gptqmodel import GPTQModel, QuantizeConfig from transformers import AutoTokenizer seed(0) MODEL_ID = "XiaomiMiMo/MiMo-VL-7B-RL-2508" SAVE_DIR = "MiMo-VL-7B-RL-2508-gptq-q4" NUM_TEXT_SAMPLES = 128 NUM_IMAGE_SAMPLES = 128 MAX_TOKENS = 1024 def encode_pil_to_data_uri(pil_image) -> str: buff = BytesIO() pil_image.save(buff, format="PNG") encoded = base64.b64encode(buff.getvalue()).decode("utf-8") return f"data:image;base64,{encoded}" def make_text_conversations(texts, tok, max_tokens=1024): convs = [] for t in texts: if not isinstance(t, str): continue tt = t.strip() if not tt: continue ids = tok.encode(tt, add_special_tokens=False)[:max_tokens] if not ids: continue trunc = tok.decode(ids, skip_special_tokens=True) convs.append( [ { "role": "user", "content": [{"type": "text", "text": trunc}], } ] ) return convs def make_image_conversations(hf_dataset, num_samples=64): convs = [] for ex in hf_dataset.select(range(min(num_samples, len(hf_dataset)))): data_uri = encode_pil_to_data_uri(ex["image"]) convs.append( [ { "role": "user", "content": [ {"type": "image", "image": data_uri}, {"type": "text", "text": "What does the image show?"}, ], } ] ) return convs en_ds = load_dataset( "allenai/c4", data_files="en/c4-train.00001-of-01024.json.gz", split="train" ).shuffle(seed=0) es_ds = load_dataset( "allenai/c4", data_files="multilingual/c4-es.tfrecord-00001-of-02048.json.gz", split="train" ).shuffle(seed=0) texts = [x["text"] for x in concatenate_datasets([en_ds, es_ds])] texts = [t for t in texts if isinstance(t, str) and t.strip()] shuffle(texts) texts = texts[:NUM_TEXT_SAMPLES] tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False) text_conversations = make_text_conversations(texts, tok, max_tokens=MAX_TOKENS) img_ds = load_dataset("lmms-lab/flickr30k", split="test[:512]").shuffle(seed=42) image_conversations = make_image_conversations(img_ds, num_samples=NUM_IMAGE_SAMPLES) calibration_conversations = text_conversations + image_conversations shuffle(calibration_conversations) print( f"Prepared {len(text_conversations)} text-only and " f"{len(image_conversations)} image+text conversations " f"(total {len(calibration_conversations)})." ) qconf = QuantizeConfig( bits=4, group_size=128, device="cuda:0", v2=False, # v2 is giving much worse results ) model = GPTQModel.load(MODEL_ID, qconf) model.quantize( calibration_conversations, batch_size=1, ) model.save(SAVE_DIR) print(f"Saved quantized model to: {SAVE_DIR}") ```