24 GB (~22918MiB to be exact) via torchao on 4090
#9
by
C0nsumption
- opened
| Input | Output | Controlnet |
|---|---|---|
![]() |
![]() |
![]() |
This will get you going. Currently have it in CLI format so this was just a quick and dirty test.
Remember to reference the Qwen team/Diffusers team code for the main weights even when using this.
Everything seems to be working fine, I will drop some more examples soon
#!/usr/bin/env python3
# qwen_edit_test.py
import sys
import time
from pathlib import Path
import torch
from PIL import Image
from diffusers import AutoModel, DiffusionPipeline, TorchAoConfig
# 🚑 TorchAO hotfix: patch missing operator for quantized tensors
from torchao.dtypes.affine_quantized_tensor import AffineQuantizedTensor
def _safe_has_compatible_shallow_copy_type(t1, t2):
# Always return True to bypass crash inside .to() / Accelerate offload
return True
torch._has_compatible_shallow_copy_type = _safe_has_compatible_shallow_copy_type
AffineQuantizedTensor.__torch_function__ = torch._C._disabled_torch_function_impl # disable torch_function dispatch
def main():
# WHERE YOUR ACTUAL SETTINGS GO
# 🔧 Hardcoded config
model_dir = Path("./Qwen-Image-Edit-2509")
input_paths = [Path("./sub.jpeg"), Path("./pose1.png")]
out_path = Path("qwen_edit_test.png")
lora_path = None # set Path(".../weights.safetensors") if needed
prompt = """Convert subject in image 1 to match the pose in image 2, keep the character the same and the
background consistent with the inside of a fast food store. ensure to keep the face, clothing, and hair
consistent. She should be sitting on the counter with the cash registers and advertisements to her back"""
negative_prompt = "ugly"
steps = 40
seed = 1234
# sanity checks
if not model_dir.exists():
print(f"[ERR] model_dir not found: {model_dir}", file=sys.stderr)
sys.exit(2)
for pth in input_paths:
if not pth.is_file():
print(f"[ERR] input image not found: {pth}", file=sys.stderr)
sys.exit(2)
if lora_path and not lora_path.is_file():
print(f"[ERR] lora_path not found: {lora_path}", file=sys.stderr)
sys.exit(2)
out_path.parent.mkdir(parents=True, exist_ok=True)
print(f"[INFO] model_dir={model_dir} lora={lora_path} inputs={input_paths}")
t0 = time.time()
# TorchAO quantization config
torch_dtype = torch.bfloat16
quantization_config = TorchAoConfig("int8wo")
# Quantized transformer backbone
transformer = AutoModel.from_pretrained(
str(model_dir),
subfolder="transformer",
quantization_config=quantization_config,
torch_dtype=torch_dtype,
)
# Build pipeline
pipe = DiffusionPipeline.from_pretrained(
str(model_dir),
transformer=transformer,
torch_dtype=torch_dtype,
)
if lora_path:
pipe.load_lora_weights(str(lora_path))
# Save VRAM
pipe.enable_model_cpu_offload()
generator = torch.manual_seed(seed) if seed is not None else None
# Load input images
images = [Image.open(str(p)).convert("RGB") for p in input_paths]
print("[INFO] editing…")
kwargs = dict(
image=images if len(images) > 1 else images[0],
prompt=prompt,
negative_prompt=negative_prompt,
generator=generator,
num_inference_steps=steps,
num_images_per_prompt=1,
)
with torch.inference_mode():
result = pipe(**kwargs)
img = result.images[0]
img.save(out_path)
dt = time.time() - t0
print(f"[OK] saved → {out_path} ({dt:.2f}s)")
if __name__ == "__main__":
main()
| Setting | Subject 1 | Subject 2 |
|---|---|---|
![]() |
![]() |
![]() |
OUTPUT
Place the girl from image1 and the girl from
image2 on the sofa in image3, drinking coffee.
ensure to keep the face, clothing, and hair consistent.
I'd say, considering that it's quantized and ran on 24 GB of VRAM, it did pretty darn good 🧍🏽♂️
Hey there, good work. I was wondering how long this process usually takes to complete?





