Qwen-Image-ControlNet-Inpainting

Running on Zero

App Files Files Community

Qwen-Image-ControlNet-Inpainting / app.py

linoyts HF Staff

Update app.py

dbfd737 verified 3 months ago

raw

history blame

12.6 kB

	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import random
	import os
	import json


	from diffusers.utils import load_image

	from diffusers import QwenImageControlNetModel, QwenImageControlNetInpaintPipeline

	import math
	from huggingface_hub import InferenceClient

	from PIL import Image

	# Set environment variable for parallel loading
	# os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES"

	# --- Prompt Enhancement using Hugging Face InferenceClient ---
	def polish_prompt_hf(original_prompt, system_prompt):
	"""
	Rewrites the prompt using a Hugging Face InferenceClient.
	"""
	# Ensure HF_TOKEN is set
	api_key = os.environ.get("HF_TOKEN")
	if not api_key:
	print("Warning: HF_TOKEN not set. Falling back to original prompt.")
	return original_prompt

	try:
	# Initialize the client
	client = InferenceClient(
	provider="cerebras",
	api_key=api_key,
	)

	# Format the messages for the chat completions API
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": original_prompt}
	]

	# Call the API
	completion = client.chat.completions.create(
	model="Qwen/Qwen3-235B-A22B-Instruct-2507",
	messages=messages,
	)

	# Parse the response
	result = completion.choices[0].message.content

	# Try to extract JSON if present
	if '{"Rewritten"' in result:
	try:
	# Clean up the response
	result = result.replace('```json', '').replace('```', '')
	result_json = json.loads(result)
	polished_prompt = result_json.get('Rewritten', result)
	except:
	polished_prompt = result
	else:
	polished_prompt = result

	polished_prompt = polished_prompt.strip().replace("\n", " ")
	return polished_prompt

	except Exception as e:
	print(f"Error during API call to Hugging Face: {e}")
	# Fallback to original prompt if enhancement fails
	return original_prompt


	def polish_prompt(prompt, img):
	"""
	Main function to polish prompts for image editing using HF inference.
	"""
	SYSTEM_PROMPT = '''
	# Edit Instruction Rewriter
	You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
	Please strictly follow the rewriting rules below:
	## 1. General Principles
	- Keep the rewritten prompt concise. Avoid overly long sentences and reduce unnecessary descriptive language.
	- If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
	- Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
	- All added objects or modifications must align with the logic and style of the edited input image's overall scene.
	## 2. Task Type Handling Rules
	### 1. Add, Delete, Replace Tasks
	- If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
	- If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
	> Original: "Add an animal"
	> Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
	- Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
	- For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
	### 2. Text Editing Tasks
	- All text content must be enclosed in English double quotes " ". Do not translate or alter the original language of the text, and do not change the capitalization.
	- For text replacement tasks, always use the fixed template:
	- Replace "xx" to "yy".
	- Replace the xx bounding box to "yy".
	- If the user does not specify text content, infer and add concise text based on the instruction and the input image's context. For example:
	> Original: "Add a line of text" (poster)
	> Rewritten: "Add text "LIMITED EDITION" at the top center with slight shadow"
	- Specify text position, color, and layout in a concise way.
	### 3. Human Editing Tasks
	- Maintain the person's core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
	- If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
	- For expression changes, they must be natural and subtle, never exaggerated.
	- If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
	- For background change tasks, emphasize maintaining subject consistency at first.
	- Example:
	> Original: "Change the person's hat"
	> Rewritten: "Replace the man's hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
	### 4. Style Transformation or Enhancement Tasks
	- If a style is specified, describe it concisely with key visual traits. For example:
	> Original: "Disco style"
	> Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
	- If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
	- For coloring tasks, including restoring old photos, always use the fixed template: "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
	- If there are other changes, place the style description at the end.
	## 3. Rationality and Logic Checks
	- Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
	- Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
	# Output Format
	Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
	'''

	# Note: We're not actually using the image in the HF version,
	# but keeping the interface consistent
	full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"

	return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)


	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 2048

	# --- Helper functions for reuse feature ---
	def clear_result():
	"""Clears the result image."""
	return gr.update(value=None)

	def use_output_as_input(output_image):
	"""Sets the generated output as the new input image."""
	if output_image is not None:
	return gr.update(value=output_image[1])
	return gr.update()


	base_model = "Qwen/Qwen-Image"
	controlnet_model = "InstantX/Qwen-Image-ControlNet-Inpainting"

	controlnet = QwenImageControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)

	pipe = QwenImageControlNetInpaintPipeline.from_pretrained(
	base_model, controlnet=controlnet, torch_dtype=torch.bfloat16
	)
	pipe.to("cuda")


	@spaces.GPU(duration=120)
	def infer(edit_images,
	prompt,
	negative_prompt=" ",
	seed=42,
	randomize_seed=False,
	strength=1.0,
	num_inference_steps=30,
	true_cfg_scale=4.0,
	rewrite_prompt=True,
	progress=gr.Progress(track_tqdm=True)):

	image = edit_images["background"]
	mask = edit_images["layers"][0]

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	if rewrite_prompt:
	prompt = polish_prompt(prompt, image)
	print(f"Rewritten Prompt: {prompt}")

	# Generate image using Qwen pipeline
	result_image = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	control_image=image,
	control_mask=mask,
	controlnet_conditioning_scale=strength,
	num_inference_steps=num_inference_steps,
	true_cfg_scale=true_cfg_scale,
	generator=torch.Generator(device="cuda").manual_seed(seed)
	).images[0]

	return [image, result_image], seed

	examples = [
	"change the hat to red",
	"make the background a beautiful sunset",
	"replace the object with a flower vase",
	]

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 1024px;
	}
	#logo-title {
	text-align: center;
	}
	#logo-title img {
	width: 400px;
	}
	#edit_text{margin-top: -62px !important}
	"""


	with gr.Blocks(css=css, theme=gr.themes.Citrus()) as demo:
	gr.HTML("<h1 style='text-align: center'>Qwen-Image with InstantX Inpainting ControlNet</style>")
	gr.Markdown(
	"Generate images with the [InstantX/Qwen-Image-ControlNet-Inpainting](https://huggingface.co/InstantX/Qwen-Image-ControlNet-Inpainting) that takes depth, pose and canny conditionings"
	)
	with gr.Row():
	with gr.Column():
	edit_image = gr.ImageEditor(
	label='Upload and draw mask for inpainting',
	type='pil',
	sources=["upload", "webcam"],
	image_mode='RGB',
	layers=False,
	brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"),
	height=600
	)
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt (e.g., 'change the hat to red')",
	container=False,
	)
	negative_prompt = gr.Text(
	label="Negative Prompt",
	show_label=True,
	max_lines=1,
	placeholder="Enter what you don't want (optional)",
	container=False,
	value="",
	visible=False
	)
	run_button = gr.Button("Run")

	with gr.Column():
	result = gr.ImageSlider(label="Result", show_label=False, interactive=False)
	use_as_input_button = gr.Button("🔄 Use as Input Image", visible=False, variant="secondary")

	with gr.Accordion("Advanced Settings", open=False):

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)


	with gr.Row():
	strength = gr.Slider(
	label="Conditioning Scale",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=1.0,
	info="Controls how much the inpainted region should change"
	)

	true_cfg_scale = gr.Slider(
	label="True CFG Scale",
	minimum=1.0,
	maximum=10.0,
	step=0.5,
	value=4.0,
	info="Classifier-free guidance scale"
	)

	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=30,
	)

	rewrite_prompt = gr.Checkbox(
	label="Enhance prompt (using HF Inference)",
	value=True
	)

	# Event handlers for reuse functionality
	use_as_input_button.click(
	fn=use_output_as_input,
	inputs=[result],
	outputs=[edit_image],
	show_api=False
	)

	# Main generation pipeline with result clearing and button visibility
	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn=clear_result,
	inputs=None,
	outputs=result,
	show_api=False
	).then(
	fn=infer,
	inputs=[edit_image, prompt, negative_prompt, seed, randomize_seed, strength, num_inference_steps, true_cfg_scale, rewrite_prompt],
	outputs=[result, seed]
	).then(
	fn=lambda: gr.update(visible=True),
	inputs=None,
	outputs=use_as_input_button,
	show_api=False
	)

	demo.launch()