Spaces:
Sleeping
Sleeping
new image generator (drops tier designs)
Browse files
app.py
CHANGED
|
@@ -253,7 +253,7 @@ class PicletGeneratorService:
|
|
| 253 |
# Space endpoints
|
| 254 |
JOY_CAPTION_SPACE = "fancyfeast/joy-caption-alpha-two"
|
| 255 |
GPT_OSS_SPACE = "amd/gpt-oss-120b-chatbot"
|
| 256 |
-
|
| 257 |
|
| 258 |
@staticmethod
|
| 259 |
def generate_enhanced_caption(image_path: str, hf_token: str) -> str:
|
|
@@ -304,7 +304,7 @@ class PicletGeneratorService:
|
|
| 304 |
print(f"Generating text...")
|
| 305 |
result = client.predict(
|
| 306 |
prompt, # message (positional)
|
| 307 |
-
"You are a helpful assistant that creates
|
| 308 |
0.7, # temperature (positional)
|
| 309 |
api_name="/chat"
|
| 310 |
)
|
|
@@ -335,7 +335,7 @@ class PicletGeneratorService:
|
|
| 335 |
Generate complete Piclet concept from image caption
|
| 336 |
Returns parsed concept with object name, variation, stats, etc.
|
| 337 |
"""
|
| 338 |
-
concept_prompt = f"""You are analyzing an image to create a
|
| 339 |
|
| 340 |
"{caption}"
|
| 341 |
|
|
@@ -388,7 +388,7 @@ Weight: {{e.g., "15kg" or "33 lbs"}}
|
|
| 388 |
{{2-3 paragraphs describing the creature's behavior, habitat, abilities, and role in its ecosystem. What does it DO? Where does it live? How does it interact with its environment? What are its natural behaviors and powers that reflect the object's real-world function? This is the creature's background story and behavioral profile.}}
|
| 389 |
|
| 390 |
# Monster Image Prompt
|
| 391 |
-
{{Detailed 3-4 sentence visual description for image generation.
|
| 392 |
```
|
| 393 |
|
| 394 |
CRITICAL RULES:
|
|
@@ -397,8 +397,8 @@ CRITICAL RULES:
|
|
| 397 |
- Variation should be meaningful and distinctive (material, style, color, context, or model variant)
|
| 398 |
- Physical Appearance must describe the CREATURE'S BODY with references to the specific object's visual features
|
| 399 |
- Lore & Behavior must describe WHAT THE CREATURE DOES, not how it looks
|
| 400 |
-
- Monster Image Prompt must
|
| 401 |
-
- Monster Image Prompt must NOT include the monster's name or style prefixes like "Anime-style" or "
|
| 402 |
- Primary Type must match the object category (machina for electronics/vehicles, structure for buildings, etc.)"""
|
| 403 |
|
| 404 |
response_text = PicletGeneratorService.generate_text_with_gpt(concept_prompt, hf_token)
|
|
@@ -434,15 +434,10 @@ CRITICAL RULES:
|
|
| 434 |
monster_name = extract_section(concept_text, 'Monster Name')
|
| 435 |
primary_type = extract_section(concept_text, 'Primary Type').lower()
|
| 436 |
|
| 437 |
-
# Extract both appearance and lore sections
|
| 438 |
physical_appearance = extract_section(concept_text, 'Physical Appearance')
|
| 439 |
lore_behavior = extract_section(concept_text, 'Lore & Behavior')
|
| 440 |
|
| 441 |
-
# Combine both sections with a separator for complete description
|
| 442 |
-
description = physical_appearance
|
| 443 |
-
if lore_behavior:
|
| 444 |
-
description = f"{physical_appearance}\n\n{lore_behavior}" if physical_appearance else lore_behavior
|
| 445 |
-
|
| 446 |
image_prompt = extract_section(concept_text, 'Monster Image Prompt')
|
| 447 |
|
| 448 |
# Parse physical stats
|
|
@@ -484,7 +479,8 @@ CRITICAL RULES:
|
|
| 484 |
'concept': concept_text,
|
| 485 |
'stats': {
|
| 486 |
'name': monster_name or 'Unknown',
|
| 487 |
-
'
|
|
|
|
| 488 |
'tier': tier,
|
| 489 |
'primaryType': primary_type or 'beast',
|
| 490 |
'height': height,
|
|
@@ -496,55 +492,48 @@ CRITICAL RULES:
|
|
| 496 |
|
| 497 |
@staticmethod
|
| 498 |
def generate_piclet_image(image_prompt: str, tier: str, hf_token: str) -> dict:
|
| 499 |
-
"""Generate Piclet image using
|
| 500 |
try:
|
| 501 |
-
print(f"Connecting to
|
| 502 |
client = Client(
|
| 503 |
-
PicletGeneratorService.
|
| 504 |
hf_token=hf_token
|
| 505 |
)
|
| 506 |
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
'medium': 'detailed and well-crafted design',
|
| 510 |
-
'high': 'highly detailed and impressive design with special effects',
|
| 511 |
-
'legendary': 'highly detailed and majestic design with dramatic lighting and aura effects'
|
| 512 |
-
}
|
| 513 |
-
|
| 514 |
-
background_styles = {
|
| 515 |
-
'low': 'simple natural environment',
|
| 516 |
-
'medium': 'detailed thematic natural environment',
|
| 517 |
-
'high': 'atmospheric thematic environment with special lighting effects',
|
| 518 |
-
'legendary': 'epic thematic environment with dramatic atmospheric lighting and mystical aura'
|
| 519 |
-
}
|
| 520 |
|
| 521 |
-
|
|
|
|
| 522 |
|
| 523 |
-
|
| 524 |
result = client.predict(
|
| 525 |
-
full_prompt,
|
| 526 |
-
0,
|
| 527 |
-
True,
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
|
|
|
| 531 |
api_name="/infer"
|
| 532 |
)
|
| 533 |
|
| 534 |
-
#
|
| 535 |
result_data = result.data if hasattr(result, 'data') else result
|
| 536 |
image_data = result_data[0] if isinstance(result_data, (list, tuple)) else result_data
|
| 537 |
seed = result_data[1] if isinstance(result_data, (list, tuple)) and len(result_data) > 1 else 0
|
| 538 |
|
| 539 |
-
# Handle different return formats
|
| 540 |
image_url = None
|
| 541 |
if isinstance(image_data, str):
|
| 542 |
image_url = image_data
|
| 543 |
elif isinstance(image_data, dict):
|
| 544 |
image_url = image_data.get('url') or image_data.get('path')
|
|
|
|
|
|
|
| 545 |
|
| 546 |
if not image_url:
|
| 547 |
-
raise Exception("Failed to extract image URL from
|
| 548 |
|
| 549 |
return {
|
| 550 |
'imageUrl': image_url,
|
|
|
|
| 253 |
# Space endpoints
|
| 254 |
JOY_CAPTION_SPACE = "fancyfeast/joy-caption-alpha-two"
|
| 255 |
GPT_OSS_SPACE = "amd/gpt-oss-120b-chatbot"
|
| 256 |
+
QWEN_IMAGE_SPACE = "multimodalart/Qwen-Image-Fast"
|
| 257 |
|
| 258 |
@staticmethod
|
| 259 |
def generate_enhanced_caption(image_path: str, hf_token: str) -> str:
|
|
|
|
| 304 |
print(f"Generating text...")
|
| 305 |
result = client.predict(
|
| 306 |
prompt, # message (positional)
|
| 307 |
+
"You are a helpful assistant that creates Pokémon-style monster concepts based on real-world objects.", # system_prompt (positional)
|
| 308 |
0.7, # temperature (positional)
|
| 309 |
api_name="/chat"
|
| 310 |
)
|
|
|
|
| 335 |
Generate complete Piclet concept from image caption
|
| 336 |
Returns parsed concept with object name, variation, stats, etc.
|
| 337 |
"""
|
| 338 |
+
concept_prompt = f"""You are analyzing an image to create a Pokémon-style creature. Here's the image description:
|
| 339 |
|
| 340 |
"{caption}"
|
| 341 |
|
|
|
|
| 388 |
{{2-3 paragraphs describing the creature's behavior, habitat, abilities, and role in its ecosystem. What does it DO? Where does it live? How does it interact with its environment? What are its natural behaviors and powers that reflect the object's real-world function? This is the creature's background story and behavioral profile.}}
|
| 389 |
|
| 390 |
# Monster Image Prompt
|
| 391 |
+
{{Detailed 3-4 sentence visual description for anime-style image generation. Describe body structure, colors, textures, materials, distinctive features, personality-driven pose/expression, dynamic action or stance, environment/background setting, and atmospheric lighting. Be specific and detailed about visual elements. DO NOT mention the source object name or include phrases like "Inspired by [object]".}}
|
| 392 |
```
|
| 393 |
|
| 394 |
CRITICAL RULES:
|
|
|
|
| 397 |
- Variation should be meaningful and distinctive (material, style, color, context, or model variant)
|
| 398 |
- Physical Appearance must describe the CREATURE'S BODY with references to the specific object's visual features
|
| 399 |
- Lore & Behavior must describe WHAT THE CREATURE DOES, not how it looks
|
| 400 |
+
- Monster Image Prompt must be a detailed (3-4 sentences) pure visual description without mentioning the source object name
|
| 401 |
+
- Monster Image Prompt must NOT include the monster's name or style prefixes like "Anime-style" or "Pokémon-style"
|
| 402 |
- Primary Type must match the object category (machina for electronics/vehicles, structure for buildings, etc.)"""
|
| 403 |
|
| 404 |
response_text = PicletGeneratorService.generate_text_with_gpt(concept_prompt, hf_token)
|
|
|
|
| 434 |
monster_name = extract_section(concept_text, 'Monster Name')
|
| 435 |
primary_type = extract_section(concept_text, 'Primary Type').lower()
|
| 436 |
|
| 437 |
+
# Extract both appearance and lore sections separately (keep them separate!)
|
| 438 |
physical_appearance = extract_section(concept_text, 'Physical Appearance')
|
| 439 |
lore_behavior = extract_section(concept_text, 'Lore & Behavior')
|
| 440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
image_prompt = extract_section(concept_text, 'Monster Image Prompt')
|
| 442 |
|
| 443 |
# Parse physical stats
|
|
|
|
| 479 |
'concept': concept_text,
|
| 480 |
'stats': {
|
| 481 |
'name': monster_name or 'Unknown',
|
| 482 |
+
'physicalAppearance': physical_appearance,
|
| 483 |
+
'lore': lore_behavior,
|
| 484 |
'tier': tier,
|
| 485 |
'primaryType': primary_type or 'beast',
|
| 486 |
'height': height,
|
|
|
|
| 492 |
|
| 493 |
@staticmethod
|
| 494 |
def generate_piclet_image(image_prompt: str, tier: str, hf_token: str) -> dict:
|
| 495 |
+
"""Generate Piclet image using Qwen-Image-Fast"""
|
| 496 |
try:
|
| 497 |
+
print(f"Connecting to Qwen-Image-Fast space...")
|
| 498 |
client = Client(
|
| 499 |
+
PicletGeneratorService.QWEN_IMAGE_SPACE,
|
| 500 |
hf_token=hf_token
|
| 501 |
)
|
| 502 |
|
| 503 |
+
# Build enhanced prompt for Pokemon-style anime art
|
| 504 |
+
full_prompt = f"{image_prompt} Pokémon anime art style, idle pose, centered, full body visible in frame."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
|
| 506 |
+
print(f"Generating image with Qwen-Image-Fast...")
|
| 507 |
+
print(f"Prompt: {full_prompt[:100]}...")
|
| 508 |
|
| 509 |
+
# Qwen-Image-Fast API: infer(prompt, seed, randomize_seed, aspect_ratio, guidance_scale, num_inference_steps, prompt_enhance)
|
| 510 |
result = client.predict(
|
| 511 |
+
full_prompt, # prompt
|
| 512 |
+
0, # seed (will be randomized)
|
| 513 |
+
True, # randomize_seed
|
| 514 |
+
"3:4", # aspect_ratio (768x1024 - portrait)
|
| 515 |
+
1.0, # guidance_scale (default)
|
| 516 |
+
8, # num_inference_steps (default, optimized with Lightning LoRA)
|
| 517 |
+
True, # prompt_enhance (uses LLM to enhance prompt)
|
| 518 |
api_name="/infer"
|
| 519 |
)
|
| 520 |
|
| 521 |
+
# Qwen returns: (PIL.Image, seed) tuple
|
| 522 |
result_data = result.data if hasattr(result, 'data') else result
|
| 523 |
image_data = result_data[0] if isinstance(result_data, (list, tuple)) else result_data
|
| 524 |
seed = result_data[1] if isinstance(result_data, (list, tuple)) and len(result_data) > 1 else 0
|
| 525 |
|
| 526 |
+
# Handle different return formats (URL or PIL Image object)
|
| 527 |
image_url = None
|
| 528 |
if isinstance(image_data, str):
|
| 529 |
image_url = image_data
|
| 530 |
elif isinstance(image_data, dict):
|
| 531 |
image_url = image_data.get('url') or image_data.get('path')
|
| 532 |
+
elif hasattr(image_data, 'url'):
|
| 533 |
+
image_url = image_data.url
|
| 534 |
|
| 535 |
if not image_url:
|
| 536 |
+
raise Exception("Failed to extract image URL from Qwen response")
|
| 537 |
|
| 538 |
return {
|
| 539 |
'imageUrl': image_url,
|