Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import torch
|
| 6 |
-
from PIL import Image
|
| 7 |
from tqdm import tqdm
|
| 8 |
import gradio as gr
|
| 9 |
|
|
@@ -55,6 +55,79 @@ def single_condition_generate_image(user_prompt, spatial_img, height, width, see
|
|
| 55 |
clear_cache(pipe.transformer)
|
| 56 |
return image
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# Load example images
|
| 59 |
def load_examples():
|
| 60 |
examples = []
|
|
@@ -88,6 +161,64 @@ def load_examples():
|
|
| 88 |
|
| 89 |
return examples
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# CSS for improved UI
|
| 92 |
css = """
|
| 93 |
:root {
|
|
@@ -236,6 +367,13 @@ body {
|
|
| 236 |
.gr-accent-3 {
|
| 237 |
background-color: #f9c06b;
|
| 238 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
"""
|
| 240 |
|
| 241 |
# Create the Gradio Blocks interface
|
|
@@ -247,64 +385,168 @@ with gr.Blocks(css=css) as demo:
|
|
| 247 |
</div>
|
| 248 |
""")
|
| 249 |
|
| 250 |
-
with gr.
|
| 251 |
-
with gr.
|
| 252 |
-
with gr.
|
| 253 |
-
gr.
|
| 254 |
-
|
| 255 |
-
<
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
user_prompt = gr.Textbox(
|
| 261 |
-
label="Your description",
|
| 262 |
-
placeholder="Describe what you want to see (e.g., a cat sitting by the window)",
|
| 263 |
-
lines=2
|
| 264 |
-
)
|
| 265 |
-
|
| 266 |
-
spatial_img = gr.Image(
|
| 267 |
-
label="Reference Image (Optional)",
|
| 268 |
-
type="pil",
|
| 269 |
-
elem_classes="gr-image-upload"
|
| 270 |
-
)
|
| 271 |
-
|
| 272 |
-
with gr.Group():
|
| 273 |
-
with gr.Row():
|
| 274 |
-
height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
|
| 275 |
-
width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
|
| 276 |
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
<
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
-
#
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
gr.HTML("""
|
| 310 |
<div class="gr-footer">
|
|
@@ -312,12 +554,5 @@ with gr.Blocks(css=css) as demo:
|
|
| 312 |
</div>
|
| 313 |
""")
|
| 314 |
|
| 315 |
-
# Link the button to the function
|
| 316 |
-
generate_btn.click(
|
| 317 |
-
single_condition_generate_image,
|
| 318 |
-
inputs=[user_prompt, spatial_img, height, width, seed],
|
| 319 |
-
outputs=output_image
|
| 320 |
-
)
|
| 321 |
-
|
| 322 |
# Launch the Gradio app
|
| 323 |
demo.queue().launch()
|
|
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import torch
|
| 6 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 7 |
from tqdm import tqdm
|
| 8 |
import gradio as gr
|
| 9 |
|
|
|
|
| 55 |
clear_cache(pipe.transformer)
|
| 56 |
return image
|
| 57 |
|
| 58 |
+
# New function for multilingual text rendering
|
| 59 |
+
@spaces.GPU()
|
| 60 |
+
def text_rendering_generate_image(user_prompt, input_text, text_color, text_size, text_position, spatial_img, height, width, seed):
|
| 61 |
+
# Combine the system prompt with user prompt
|
| 62 |
+
full_prompt = f"{SYSTEM_PROMPT}, {user_prompt}" if user_prompt else SYSTEM_PROMPT
|
| 63 |
+
|
| 64 |
+
# Set the Ghibli LoRA
|
| 65 |
+
lora_path = os.path.join(lora_base_path, "Ghibli.safetensors")
|
| 66 |
+
set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512)
|
| 67 |
+
|
| 68 |
+
# Process the image
|
| 69 |
+
spatial_imgs = [spatial_img] if spatial_img else []
|
| 70 |
+
image = pipe(
|
| 71 |
+
full_prompt,
|
| 72 |
+
height=int(height),
|
| 73 |
+
width=int(width),
|
| 74 |
+
guidance_scale=3.5,
|
| 75 |
+
num_inference_steps=25,
|
| 76 |
+
max_sequence_length=512,
|
| 77 |
+
generator=torch.Generator("cpu").manual_seed(seed),
|
| 78 |
+
subject_images=[],
|
| 79 |
+
spatial_images=spatial_imgs,
|
| 80 |
+
cond_size=512,
|
| 81 |
+
).images[0]
|
| 82 |
+
|
| 83 |
+
# Add text to the generated image if text is provided
|
| 84 |
+
if input_text:
|
| 85 |
+
# Convert to PIL Image if needed
|
| 86 |
+
if not isinstance(image, Image.Image):
|
| 87 |
+
image = Image.fromarray(image)
|
| 88 |
+
|
| 89 |
+
# Create a drawing context
|
| 90 |
+
draw = ImageDraw.Draw(image)
|
| 91 |
+
|
| 92 |
+
# Try to load a font that supports multilingual text
|
| 93 |
+
try:
|
| 94 |
+
# Attempt to load a system font that supports multilingual text
|
| 95 |
+
font = ImageFont.truetype("Arial Unicode.ttf", text_size)
|
| 96 |
+
except IOError:
|
| 97 |
+
# Fallback to default font
|
| 98 |
+
font = ImageFont.load_default()
|
| 99 |
+
|
| 100 |
+
# Parse position (top, center, bottom)
|
| 101 |
+
if text_position == "top":
|
| 102 |
+
position = (width // 2, text_size + 10)
|
| 103 |
+
elif text_position == "bottom":
|
| 104 |
+
position = (width // 2, height - text_size - 10)
|
| 105 |
+
else: # center
|
| 106 |
+
position = (width // 2, height // 2)
|
| 107 |
+
|
| 108 |
+
# Add text with outline for better visibility
|
| 109 |
+
# Draw text outline (shadow)
|
| 110 |
+
for offset in [(1, 1), (-1, -1), (1, -1), (-1, 1)]:
|
| 111 |
+
draw.text(
|
| 112 |
+
(position[0] + offset[0], position[1] + offset[1]),
|
| 113 |
+
input_text,
|
| 114 |
+
fill="black",
|
| 115 |
+
font=font,
|
| 116 |
+
anchor="mm" # Center align the text
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# Draw the main text
|
| 120 |
+
draw.text(
|
| 121 |
+
position,
|
| 122 |
+
input_text,
|
| 123 |
+
fill=text_color,
|
| 124 |
+
font=font,
|
| 125 |
+
anchor="mm" # Center align the text
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
clear_cache(pipe.transformer)
|
| 129 |
+
return image
|
| 130 |
+
|
| 131 |
# Load example images
|
| 132 |
def load_examples():
|
| 133 |
examples = []
|
|
|
|
| 161 |
|
| 162 |
return examples
|
| 163 |
|
| 164 |
+
# Load examples for text rendering tab
|
| 165 |
+
def load_text_examples():
|
| 166 |
+
examples = []
|
| 167 |
+
test_img_dir = "./test_imgs"
|
| 168 |
+
|
| 169 |
+
example_data = [
|
| 170 |
+
{
|
| 171 |
+
"prompt": "cute character with speech bubble",
|
| 172 |
+
"text": "Hello World!",
|
| 173 |
+
"color": "#ffffff",
|
| 174 |
+
"size": 36,
|
| 175 |
+
"position": "center",
|
| 176 |
+
"filename": "00.jpg",
|
| 177 |
+
"height": 680,
|
| 178 |
+
"width": 1024,
|
| 179 |
+
"seed": 123
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"prompt": "landscape with message",
|
| 183 |
+
"text": "안녕하세요!",
|
| 184 |
+
"color": "#ffff00",
|
| 185 |
+
"size": 48,
|
| 186 |
+
"position": "top",
|
| 187 |
+
"filename": "03.jpg",
|
| 188 |
+
"height": 1024,
|
| 189 |
+
"width": 768,
|
| 190 |
+
"seed": 456
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"prompt": "character with subtitles",
|
| 194 |
+
"text": "こんにちは世界!",
|
| 195 |
+
"color": "#00ffff",
|
| 196 |
+
"size": 42,
|
| 197 |
+
"position": "bottom",
|
| 198 |
+
"filename": "02.jpg",
|
| 199 |
+
"height": 560,
|
| 200 |
+
"width": 1024,
|
| 201 |
+
"seed": 789
|
| 202 |
+
}
|
| 203 |
+
]
|
| 204 |
+
|
| 205 |
+
for example in example_data:
|
| 206 |
+
img_path = os.path.join(test_img_dir, example["filename"])
|
| 207 |
+
if os.path.exists(img_path):
|
| 208 |
+
examples.append([
|
| 209 |
+
example["prompt"],
|
| 210 |
+
example["text"],
|
| 211 |
+
example["color"],
|
| 212 |
+
example["size"],
|
| 213 |
+
example["position"],
|
| 214 |
+
Image.open(img_path),
|
| 215 |
+
example["height"],
|
| 216 |
+
example["width"],
|
| 217 |
+
example["seed"]
|
| 218 |
+
])
|
| 219 |
+
|
| 220 |
+
return examples
|
| 221 |
+
|
| 222 |
# CSS for improved UI
|
| 223 |
css = """
|
| 224 |
:root {
|
|
|
|
| 367 |
.gr-accent-3 {
|
| 368 |
background-color: #f9c06b;
|
| 369 |
}
|
| 370 |
+
|
| 371 |
+
.text-rendering-options {
|
| 372 |
+
background-color: #f0f8ff;
|
| 373 |
+
padding: 16px;
|
| 374 |
+
border-radius: var(--border-radius);
|
| 375 |
+
margin-top: 16px;
|
| 376 |
+
}
|
| 377 |
"""
|
| 378 |
|
| 379 |
# Create the Gradio Blocks interface
|
|
|
|
| 385 |
</div>
|
| 386 |
""")
|
| 387 |
|
| 388 |
+
with gr.Tabs():
|
| 389 |
+
with gr.Tab("Create Ghibli Art"):
|
| 390 |
+
with gr.Row():
|
| 391 |
+
with gr.Column(scale=1):
|
| 392 |
+
gr.HTML("""
|
| 393 |
+
<div class="gr-box">
|
| 394 |
+
<h3>🎨 Your Creative Input</h3>
|
| 395 |
+
<p>Describe what you want to see in your Ghibli-inspired image</p>
|
| 396 |
+
</div>
|
| 397 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
+
user_prompt = gr.Textbox(
|
| 400 |
+
label="Your description",
|
| 401 |
+
placeholder="Describe what you want to see (e.g., a cat sitting by the window)",
|
| 402 |
+
lines=2
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
spatial_img = gr.Image(
|
| 406 |
+
label="Reference Image (Optional)",
|
| 407 |
+
type="pil",
|
| 408 |
+
elem_classes="gr-image-upload"
|
| 409 |
+
)
|
| 410 |
+
|
| 411 |
+
with gr.Group():
|
| 412 |
+
with gr.Row():
|
| 413 |
+
height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
|
| 414 |
+
width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
|
| 415 |
+
|
| 416 |
+
seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
|
| 417 |
+
info="Change for different variations")
|
| 418 |
+
|
| 419 |
+
generate_btn = gr.Button("✨ Generate Ghibli Art", elem_classes="gr-button")
|
| 420 |
|
| 421 |
+
with gr.Column(scale=1):
|
| 422 |
+
gr.HTML("""
|
| 423 |
+
<div class="gr-box">
|
| 424 |
+
<h3>✨ Your Magical Creation</h3>
|
| 425 |
+
<p>Your Ghibli-inspired artwork will appear here</p>
|
| 426 |
+
</div>
|
| 427 |
+
""")
|
| 428 |
+
output_image = gr.Image(label="Generated Image", elem_classes="gr-output-image")
|
| 429 |
|
| 430 |
+
gr.HTML("""
|
| 431 |
+
<div class="gr-box gr-examples-gallery">
|
| 432 |
+
<h3>✨ Inspiration Gallery</h3>
|
| 433 |
+
<p>Click on any example to try it out</p>
|
| 434 |
+
</div>
|
| 435 |
+
""")
|
| 436 |
+
|
| 437 |
+
# Add examples
|
| 438 |
+
examples = load_examples()
|
| 439 |
+
gr.Examples(
|
| 440 |
+
examples=examples,
|
| 441 |
+
inputs=[user_prompt, spatial_img, height, width, seed],
|
| 442 |
+
outputs=output_image,
|
| 443 |
+
fn=single_condition_generate_image,
|
| 444 |
+
cache_examples=False,
|
| 445 |
+
examples_per_page=4
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
# Link the button to the function
|
| 449 |
+
generate_btn.click(
|
| 450 |
+
single_condition_generate_image,
|
| 451 |
+
inputs=[user_prompt, spatial_img, height, width, seed],
|
| 452 |
+
outputs=output_image
|
| 453 |
+
)
|
| 454 |
|
| 455 |
+
# Second tab for Image & Multilingual Text Rendering
|
| 456 |
+
with gr.Tab("Image & Multilingual Text Rendering"):
|
| 457 |
+
with gr.Row():
|
| 458 |
+
with gr.Column(scale=1):
|
| 459 |
+
gr.HTML("""
|
| 460 |
+
<div class="gr-box">
|
| 461 |
+
<h3>🌈 Art with Text</h3>
|
| 462 |
+
<p>Create Ghibli-style images with beautiful text in any language</p>
|
| 463 |
+
</div>
|
| 464 |
+
""")
|
| 465 |
+
|
| 466 |
+
text_user_prompt = gr.Textbox(
|
| 467 |
+
label="Image Description",
|
| 468 |
+
placeholder="Describe what you want to see (e.g., a character with speech bubble)",
|
| 469 |
+
lines=2
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
with gr.Group(elem_classes="text-rendering-options"):
|
| 473 |
+
input_text = gr.Textbox(
|
| 474 |
+
label="Overlay Text",
|
| 475 |
+
placeholder="Enter text in any language",
|
| 476 |
+
lines=1
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
with gr.Row():
|
| 480 |
+
text_color = gr.ColorPicker(
|
| 481 |
+
label="Text Color",
|
| 482 |
+
value="#FFFFFF"
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
text_size = gr.Slider(
|
| 486 |
+
minimum=12,
|
| 487 |
+
maximum=72,
|
| 488 |
+
step=2,
|
| 489 |
+
label="Text Size",
|
| 490 |
+
value=36
|
| 491 |
+
)
|
| 492 |
+
|
| 493 |
+
text_position = gr.Radio(
|
| 494 |
+
["top", "center", "bottom"],
|
| 495 |
+
label="Text Position",
|
| 496 |
+
value="center"
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
text_spatial_img = gr.Image(
|
| 500 |
+
label="Reference Image (Optional)",
|
| 501 |
+
type="pil",
|
| 502 |
+
elem_classes="gr-image-upload"
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
with gr.Group():
|
| 506 |
+
with gr.Row():
|
| 507 |
+
text_height = gr.Slider(minimum=256, maximum=1024, step=64, label="Height", value=768)
|
| 508 |
+
text_width = gr.Slider(minimum=256, maximum=1024, step=64, label="Width", value=768)
|
| 509 |
+
|
| 510 |
+
text_seed = gr.Slider(minimum=1, maximum=9999, step=1, label="Seed", value=42,
|
| 511 |
+
info="Change for different variations")
|
| 512 |
+
|
| 513 |
+
text_generate_btn = gr.Button("✨ Generate Art with Text", elem_classes="gr-button")
|
| 514 |
+
|
| 515 |
+
with gr.Column(scale=1):
|
| 516 |
+
gr.HTML("""
|
| 517 |
+
<div class="gr-box">
|
| 518 |
+
<h3>✨ Your Text Creation</h3>
|
| 519 |
+
<p>Your Ghibli-inspired artwork with text will appear here</p>
|
| 520 |
+
</div>
|
| 521 |
+
""")
|
| 522 |
+
text_output_image = gr.Image(label="Generated Image with Text", elem_classes="gr-output-image")
|
| 523 |
+
|
| 524 |
+
gr.HTML("""
|
| 525 |
+
<div class="gr-box gr-examples-gallery">
|
| 526 |
+
<h3>✨ Text Rendering Examples</h3>
|
| 527 |
+
<p>Click on any example to try it out</p>
|
| 528 |
+
</div>
|
| 529 |
+
""")
|
| 530 |
+
|
| 531 |
+
# Add text rendering examples
|
| 532 |
+
text_examples = load_text_examples()
|
| 533 |
+
gr.Examples(
|
| 534 |
+
examples=text_examples,
|
| 535 |
+
inputs=[text_user_prompt, input_text, text_color, text_size, text_position,
|
| 536 |
+
text_spatial_img, text_height, text_width, text_seed],
|
| 537 |
+
outputs=text_output_image,
|
| 538 |
+
fn=text_rendering_generate_image,
|
| 539 |
+
cache_examples=False,
|
| 540 |
+
examples_per_page=3
|
| 541 |
+
)
|
| 542 |
+
|
| 543 |
+
# Link the text render button to the function
|
| 544 |
+
text_generate_btn.click(
|
| 545 |
+
text_rendering_generate_image,
|
| 546 |
+
inputs=[text_user_prompt, input_text, text_color, text_size, text_position,
|
| 547 |
+
text_spatial_img, text_height, text_width, text_seed],
|
| 548 |
+
outputs=text_output_image
|
| 549 |
+
)
|
| 550 |
|
| 551 |
gr.HTML("""
|
| 552 |
<div class="gr-footer">
|
|
|
|
| 554 |
</div>
|
| 555 |
""")
|
| 556 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
# Launch the Gradio app
|
| 558 |
demo.queue().launch()
|