import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import QwenImageEditPipeline
import os
import base64
import json
from huggingface_hub import InferenceClient

def get_caption_language(prompt):
    """Detects if the prompt contains Chinese characters."""
    ranges = [
        ('\u4e00', '\u9fff'),  # CJK Unified Ideographs
    ]
    for char in prompt:
        if any(start <= char <= end for start, end in ranges):
            return 'zh'
    return 'en'

def polish_prompt(original_prompt, system_prompt):
    """
    Rewrites the prompt using a Hugging Face InferenceClient.
    """
    api_key = os.environ.get("HF_TOKEN")
    if not api_key:
        raise EnvironmentError("HF_TOKEN is not set. Please set it in your environment.")
    client = InferenceClient(
        provider="cerebras",
        api_key=api_key,
    )
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": original_prompt}
    ]
    try:
        completion = client.chat.completions.create(
            model="Qwen/Qwen3-235B-A22B-Instruct-2507",
            messages=messages,
            max_tokens=2000,
        )
        polished_prompt = completion.choices[0].message.content
        polished_prompt = polished_prompt.strip().replace("\n", " ")
        return polished_prompt
    except Exception as e:
        print(f"Error during Hugging Face API call: {e}")
        return original_prompt

SYSTEM_PROMPT_EDIT = '''
# Edit Instruction Rewriter
You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
## 1. General Principles
- Keep the rewritten instruction **concise** and clear.
- Avoid contradictions, vagueness, or unachievable instructions.
- Maintain the core logic of the original instruction; only enhance clarity and feasibility.
- Ensure new added elements or modifications align with the image's original context and art style.
## 2. Task Types
### Add, Delete, Replace:
- When the input is detailed, only refine grammar and clarity.
- For vague instructions, infer minimal but sufficient details.
- For replacement, use the format: `"Replace X with Y"`.
### Text Editing (e.g., text replacement):
- Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
- Preserving the original structure and language—**do not translate** or alter style.
### Human Editing (e.g., change a person’s face/hair):
- Preserve core visual identity (gender, ethnic features).
- Describe expressions in subtle and natural terms.
- Maintain key clothing or styling details unless explicitly replaced.
### Style Transformation:
- If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
- Use a fixed template for **coloring/restoration**:  
  `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`  
  if applicable.
## 4. Output Format
Please provide the rewritten instruction in a clean `json` format as:
{
  "Rewritten": "..."
}
'''

dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
# Load LoRA weights for acceleration
pipe.load_lora_weights(
    "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
)
pipe.fuse_lora()

@spaces.GPU(duration=60)
def infer(
    image,
    prompt,
    seed=42,
    randomize_seed=False,
    true_guidance_scale=1.0,
    num_inference_steps=8,
    rewrite_prompt=False,
    num_images_per_prompt=1,
    progress=gr.Progress(track_tqdm=True),
):
    """
    Uses Qwen-Image-Edit with optional prompt rewriting before execution.
    """
    negative_prompt = " "
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)
    print(f"Calling pipeline with prompt: '{prompt}'")
    print(f"Negative Prompt: '{negative_prompt}'")
    print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
    if rewrite_prompt:
        lang = get_caption_language(prompt)
        system_prompt = SYSTEM_PROMPT_EDIT
        polished_prompt = polish_prompt(prompt, system_prompt)
        print(f"Rewritten Prompt: {polished_prompt}")
        prompt = polished_prompt
    edited_images = pipe(
        image,
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=num_images_per_prompt,
    ).images
    return edited_images, seed

MAX_SEED = np.iinfo(np.int32).max
examples = [
    "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
    "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
    "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
    "Remove the blue sky and replace it with a dark night cityscape.",
    """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font."""
]

with gr.Blocks() as demo:
    gr.Markdown("# Qwen-Image-Edit with Prompt Enhancement and Fast Inference")
    gr.Markdown("Try editing images with multi-modal instruction polishing.")
    with gr.Column():
        input_image = gr.Image(label="Input Image", type="pil")
        prompt = gr.Text(label="Edit Instruction", placeholder="e.g. Add a dog to the right side.")
        run_button = gr.Button("Edit", variant="primary")
        result = gr.Gallery(label="Output Images", show_label=False)
    with gr.Accordion("Advanced Settings", open=False):
        seed = gr.Slider(
            label="Seed",
            minimum=0,
            maximum=MAX_SEED,
            step=1,
            value=0
        )
        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
        with gr.Row():
            true_guidance_scale = gr.Slider(
                label="True Guidance Scale",
                minimum=1.0,
                maximum=5.0,
                step=0.1,
                value=4.0
            )
            num_inference_steps = gr.Slider(
                label="Inference Steps (Fast 8-step mode)",
                minimum=4,
                maximum=8,
                step=1,
                value=8
            )
            num_images_per_prompt = gr.Slider(
                label="Images per Prompt",
                minimum=1,
                maximum=4,
                step=1,
                value=1
            )
            rewrite_prompt = gr.Checkbox(label="Use Prompt Rewriter", value=False, visible=True)
    
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            input_image,
            prompt,
            seed,
            randomize_seed,
            true_guidance_scale,
            num_inference_steps,
            rewrite_prompt,
            num_images_per_prompt
        ],
        outputs=[result, seed],
    )

if __name__ == "__main__":
    demo.launch()