Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import torch | |
| import spaces | |
| from PIL import Image | |
| from diffusers import QwenImageEditPipeline | |
| from diffusers.utils import is_xformers_available | |
| import os | |
| import base64 | |
| import json | |
| from huggingface_hub import InferenceClient | |
| import logging | |
| ############################# | |
| os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False') | |
| os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1') | |
| logging.basicConfig(level=logging.DEBUG) | |
| logger = logging.getLogger(__name__) | |
| ############################# | |
| def get_caption_language(prompt): | |
| """Detects if the prompt contains Chinese characters.""" | |
| ranges = [ | |
| ('\u4e00', '\u9fff'), # CJK Unified Ideographs | |
| ] | |
| for char in prompt: | |
| if any(start <= char <= end for start, end in ranges): | |
| return 'zh' | |
| return 'en' | |
| def polish_prompt(original_prompt, system_prompt, hf_token): | |
| """ | |
| Rewrites the prompt using a Hugging Face InferenceClient. | |
| Requires user-provided HF token for API access. | |
| """ | |
| if not hf_token or not hf_token.strip(): | |
| gr.Warning("HF Token is required for prompt rewriting but was not provided!") | |
| return original_prompt | |
| client = InferenceClient( | |
| provider="cerebras", | |
| api_key=hf_token, | |
| ) | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": original_prompt} | |
| ] | |
| try: | |
| completion = client.chat.completions.create( | |
| model="Qwen/Qwen3-235B-A22B-Instruct-2507", | |
| messages=messages, | |
| max_tokens=512, | |
| ) | |
| polished_prompt = completion.choices[0].message.content | |
| polished_prompt = polished_prompt.strip().replace("\n", " ") | |
| return polished_prompt | |
| except Exception as e: | |
| print(f"Error during Hugging Face API call: {e}") | |
| gr.Warning("Failed to rewrite prompt. Using original.") | |
| return original_prompt | |
| SYSTEM_PROMPT_EDIT = ''' | |
| # Edit Instruction Rewriter | |
| You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image. | |
| ## 1. General Principles | |
| - Keep the rewritten instruction **concise** and clear. | |
| - Avoid contradictions, vagueness, or unachievable instructions. | |
| - Maintain the core logic of the original instruction; only enhance clarity and feasibility. | |
| - Ensure new added elements or modifications align with the image's original context and art style. | |
| ## 2. Task Types | |
| ### Add, Delete, Replace: | |
| - When the input is detailed, only refine grammar and clarity. | |
| - For vague instructions, infer minimal but sufficient details. | |
| - For replacement, use the format: `"Replace X with Y"`. | |
| ### Text Editing (e.g., text replacement): | |
| - Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`. | |
| - Preserving the original structure and language—**do not translate** or alter style. | |
| ### Human Editing (e.g., change a person’s face/hair): | |
| - Preserve core visual identity (gender, ethnic features). | |
| - Describe expressions in subtle and natural terms. | |
| - Maintain key clothing or styling details unless explicitly replaced. | |
| ### Style Transformation: | |
| - If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits. | |
| - Use a fixed template for **coloring/restoration**: | |
| `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"` | |
| if applicable. | |
| ## 4. Output Format | |
| Please provide the rewritten instruction in a clean `json` format as: | |
| { | |
| "Rewritten": "..." | |
| } | |
| ''' | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device) | |
| # Load LoRA weights for acceleration | |
| pipe.load_lora_weights( | |
| "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors" | |
| ) | |
| pipe.fuse_lora() | |
| if is_xformers_available(): | |
| pipe.enable_xformers_memory_efficient_attention() | |
| else: | |
| print("xformers not available or failed to load.") | |
| def infer( | |
| image, | |
| prompt, | |
| seed=42, | |
| randomize_seed=False, | |
| true_guidance_scale=1.0, | |
| num_inference_steps=8, | |
| rewrite_prompt=False, | |
| hf_token="", | |
| num_images_per_prompt=1, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| """ | |
| Requires user-provided HF token for prompt rewriting. | |
| """ | |
| original_prompt = prompt # Save original prompt for display | |
| negative_prompt = " " | |
| prompt_info = "" # Initialize info text | |
| # Handle prompt rewriting with status messages | |
| if rewrite_prompt: | |
| if not hf_token.strip(): | |
| gr.Warning("HF Token is required for prompt rewriting but was not provided!") | |
| prompt_info = f"""## ⚠️ Prompt Rewriting Skipped (No HF Token) | |
| **Original Prompt:** | |
| {original_prompt}""" | |
| rewritten_prompt = original_prompt | |
| else: | |
| try: | |
| rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token) | |
| prompt_info = f"""## ✅ Prompt Rewrite Successful | |
| **Original Prompt:** | |
| {original_prompt} | |
| **Enhanced Prompt:** | |
| {rewritten_prompt}""" | |
| except Exception as e: | |
| gr.Warning(f"Prompt rewriting failed: {str(e)}") | |
| rewritten_prompt = original_prompt | |
| prompt_info = f"""## ❌ Prompt Rewrite Failed | |
| **Original Prompt:** | |
| {original_prompt} | |
| **Error:** | |
| {str(e)}""" | |
| else: | |
| rewritten_prompt = original_prompt | |
| prompt_info = f"""## Original Prompt (No Rewrite) | |
| **User Input:** | |
| {original_prompt}""" | |
| # Generate images | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| edited_images = pipe( | |
| image, | |
| prompt=rewritten_prompt, | |
| negative_prompt=negative_prompt, | |
| num_inference_steps=num_inference_steps, | |
| generator=generator, | |
| true_cfg_scale=true_guidance_scale, | |
| num_images_per_prompt=num_images_per_prompt, | |
| ).images | |
| return edited_images, seed, prompt_info | |
| MAX_SEED = np.iinfo(np.int32).max | |
| examples = [ | |
| "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.", | |
| "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.", | |
| "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.", | |
| "Remove the blue sky and replace it with a dark night cityscape.", | |
| """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font.""" | |
| ] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Qwen-Image-Edit [FAST] with HF Prompt Enhancement") | |
| gr.Markdown("✨ **8-step lightning inferencing with lightx2v's LoRA.**") | |
| gr.Markdown("⚠️ **Prompt rewriting requires your own [Hugging Face token](https://huggingface.co/settings/tokens)**") | |
| gr.Markdown("🚧 **Work in progress, further improvements coming soon.**") | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image(label="Input Image", type="pil") | |
| prompt = gr.Text(label="Edit Instruction", placeholder="e.g. Add a dog to the right side.") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0 | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) | |
| with gr.Row(): | |
| true_guidance_scale = gr.Slider( | |
| label="True Guidance Scale", | |
| minimum=1.0, | |
| maximum=5.0, | |
| step=0.1, | |
| value=4.0 | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="Inference Steps (Fast 8-step mode)", | |
| minimum=4, | |
| maximum=16, | |
| step=1, | |
| value=8 | |
| ) | |
| num_images_per_prompt = gr.Slider( | |
| label="Images per Prompt", | |
| minimum=1, | |
| maximum=4, | |
| step=1, | |
| value=1 | |
| ) | |
| run_button = gr.Button("Edit", variant="primary") | |
| with gr.Column(): | |
| result = gr.Gallery(label="Output Images", show_label=False, columns=1) | |
| # New prompt display component | |
| prompt_info = gr.Markdown("## Prompt Details", visible=False) | |
| with gr.Group(): | |
| rewrite_toggle = gr.Checkbox(label="Use Prompt Rewriter (Requires HF Token)", value=False, interactive=True) | |
| hf_token_input = gr.Textbox( | |
| label="Your Hugging Face Token", | |
| type="password", | |
| placeholder="hf_xxxxxxxxxxxxxxxx", | |
| visible=False, | |
| info="Required for prompt rewriting - get yours from [Hugging Face settings](https://huggingface.co/settings/tokens). API tokens are kept safe locally, but as good practice please make sure to double check the source code. Invalid or missing keys will revert to the original prompt entered." | |
| ) | |
| def toggle_token_visibility(checked): | |
| return gr.update(visible=checked) | |
| rewrite_toggle.change( | |
| toggle_token_visibility, | |
| inputs=[rewrite_toggle], | |
| outputs=[hf_token_input] | |
| ) | |
| gr.on( | |
| triggers=[run_button.click, prompt.submit], | |
| fn=infer, | |
| inputs=[ | |
| input_image, | |
| prompt, | |
| seed, | |
| randomize_seed, | |
| true_guidance_scale, | |
| num_inference_steps, | |
| rewrite_toggle, | |
| hf_token_input, | |
| num_images_per_prompt | |
| ], | |
| outputs=[result, seed, prompt_info] | |
| ) | |
| # Show prompt info box after processing | |
| def set_prompt_visible(): | |
| return gr.update(visible=True) | |
| run_button.click( | |
| fn=set_prompt_visible, | |
| inputs=None, | |
| outputs=[prompt_info], | |
| queue=False | |
| ) | |
| prompt.submit( | |
| fn=set_prompt_visible, | |
| inputs=None, | |
| outputs=[prompt_info], | |
| queue=False | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |