import gradio as gr import numpy as np import random import torch import spaces from PIL import Image from diffusers import QwenImageEditPipeline import os import base64 import json from huggingface_hub import InferenceClient def get_caption_language(prompt): """Detects if the prompt contains Chinese characters.""" ranges = [ ('\u4e00', '\u9fff'), # CJK Unified Ideographs ] for char in prompt: if any(start <= char <= end for start, end in ranges): return 'zh' return 'en' def polish_prompt(original_prompt, system_prompt): """ Rewrites the prompt using a Hugging Face InferenceClient. """ api_key = os.environ.get("HF_TOKEN") if not api_key: raise EnvironmentError("HF_TOKEN is not set. Please set it in your environment.") client = InferenceClient( provider="cerebras", api_key=api_key, ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": original_prompt} ] try: completion = client.chat.completions.create( model="Qwen/Qwen3-235B-A22B-Instruct-2507", messages=messages, max_tokens=2000, ) polished_prompt = completion.choices[0].message.content polished_prompt = polished_prompt.strip().replace("\n", " ") return polished_prompt except Exception as e: print(f"Error during Hugging Face API call: {e}") return original_prompt SYSTEM_PROMPT_EDIT = ''' # Edit Instruction Rewriter You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image. ## 1. General Principles - Keep the rewritten instruction **concise** and clear. - Avoid contradictions, vagueness, or unachievable instructions. - Maintain the core logic of the original instruction; only enhance clarity and feasibility. - Ensure new added elements or modifications align with the image's original context and art style. ## 2. Task Types ### Add, Delete, Replace: - When the input is detailed, only refine grammar and clarity. - For vague instructions, infer minimal but sufficient details. - For replacement, use the format: `"Replace X with Y"`. ### Text Editing (e.g., text replacement): - Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`. - Preserving the original structure and language—**do not translate** or alter style. ### Human Editing (e.g., change a person’s face/hair): - Preserve core visual identity (gender, ethnic features). - Describe expressions in subtle and natural terms. - Maintain key clothing or styling details unless explicitly replaced. ### Style Transformation: - If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits. - Use a fixed template for **coloring/restoration**: `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"` if applicable. ## 4. Output Format Please provide the rewritten instruction in a clean `json` format as: { "Rewritten": "..." } ''' dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device) # Load LoRA weights for acceleration pipe.load_lora_weights( "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors" ) pipe.fuse_lora() @spaces.GPU(duration=60) def infer( image, prompt, seed=42, randomize_seed=False, true_guidance_scale=1.0, num_inference_steps=8, rewrite_prompt=False, num_images_per_prompt=1, progress=gr.Progress(track_tqdm=True), ): """ Uses Qwen-Image-Edit with optional prompt rewriting before execution. """ negative_prompt = " " if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) print(f"Calling pipeline with prompt: '{prompt}'") print(f"Negative Prompt: '{negative_prompt}'") print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}") if rewrite_prompt: lang = get_caption_language(prompt) system_prompt = SYSTEM_PROMPT_EDIT polished_prompt = polish_prompt(prompt, system_prompt) print(f"Rewritten Prompt: {polished_prompt}") prompt = polished_prompt edited_images = pipe( image, prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, generator=generator, true_cfg_scale=true_guidance_scale, num_images_per_prompt=num_images_per_prompt, ).images return edited_images, seed MAX_SEED = np.iinfo(np.int32).max examples = [ "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.", "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.", "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.", "Remove the blue sky and replace it with a dark night cityscape.", """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font.""" ] with gr.Blocks() as demo: gr.Markdown("# Qwen-Image-Edit with Prompt Enhancement and Fast Inference") gr.Markdown("Try editing images with multi-modal instruction polishing.") with gr.Column(): input_image = gr.Image(label="Input Image", type="pil") prompt = gr.Text(label="Edit Instruction", placeholder="e.g. Add a dog to the right side.") run_button = gr.Button("Edit", variant="primary") result = gr.Gallery(label="Output Images", show_label=False) with gr.Accordion("Advanced Settings", open=False): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0 ) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) with gr.Row(): true_guidance_scale = gr.Slider( label="True Guidance Scale", minimum=1.0, maximum=5.0, step=0.1, value=4.0 ) num_inference_steps = gr.Slider( label="Inference Steps (Fast 8-step mode)", minimum=4, maximum=8, step=1, value=8 ) num_images_per_prompt = gr.Slider( label="Images per Prompt", minimum=1, maximum=4, step=1, value=1 ) rewrite_prompt = gr.Checkbox(label="Use Prompt Rewriter", value=False, visible=True) gr.on( triggers=[run_button.click, prompt.submit], fn=infer, inputs=[ input_image, prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, rewrite_prompt, num_images_per_prompt ], outputs=[result, seed], ) if __name__ == "__main__": demo.launch()