Fraser commited on
Commit
17077fb
·
1 Parent(s): a44fa5c

new image generator (drops tier designs)

Browse files
Files changed (1) hide show
  1. app.py +29 -40
app.py CHANGED
@@ -253,7 +253,7 @@ class PicletGeneratorService:
253
  # Space endpoints
254
  JOY_CAPTION_SPACE = "fancyfeast/joy-caption-alpha-two"
255
  GPT_OSS_SPACE = "amd/gpt-oss-120b-chatbot"
256
- FLUX_SPACE = "black-forest-labs/FLUX.1-schnell"
257
 
258
  @staticmethod
259
  def generate_enhanced_caption(image_path: str, hf_token: str) -> str:
@@ -304,7 +304,7 @@ class PicletGeneratorService:
304
  print(f"Generating text...")
305
  result = client.predict(
306
  prompt, # message (positional)
307
- "You are a helpful assistant that creates Pokemon-style monster concepts based on real-world objects.", # system_prompt (positional)
308
  0.7, # temperature (positional)
309
  api_name="/chat"
310
  )
@@ -335,7 +335,7 @@ class PicletGeneratorService:
335
  Generate complete Piclet concept from image caption
336
  Returns parsed concept with object name, variation, stats, etc.
337
  """
338
- concept_prompt = f"""You are analyzing an image to create a Pokemon-style creature. Here's the image description:
339
 
340
  "{caption}"
341
 
@@ -388,7 +388,7 @@ Weight: {{e.g., "15kg" or "33 lbs"}}
388
  {{2-3 paragraphs describing the creature's behavior, habitat, abilities, and role in its ecosystem. What does it DO? Where does it live? How does it interact with its environment? What are its natural behaviors and powers that reflect the object's real-world function? This is the creature's background story and behavioral profile.}}
389
 
390
  # Monster Image Prompt
391
- {{Detailed 3-4 sentence visual description for image generation. Must include: "Inspired by [specific object name]" at the start. Describe body structure, colors, textures, materials, distinctive features, personality-driven pose/expression, dynamic action or stance, environment/background setting, and atmospheric lighting. Be specific and detailed about visual elements.}}
392
  ```
393
 
394
  CRITICAL RULES:
@@ -397,8 +397,8 @@ CRITICAL RULES:
397
  - Variation should be meaningful and distinctive (material, style, color, context, or model variant)
398
  - Physical Appearance must describe the CREATURE'S BODY with references to the specific object's visual features
399
  - Lore & Behavior must describe WHAT THE CREATURE DOES, not how it looks
400
- - Monster Image Prompt must START with "Inspired by [object name]" and be detailed (3-4 sentences)
401
- - Monster Image Prompt must NOT include the monster's name or style prefixes like "Anime-style" or "Pokemon-style"
402
  - Primary Type must match the object category (machina for electronics/vehicles, structure for buildings, etc.)"""
403
 
404
  response_text = PicletGeneratorService.generate_text_with_gpt(concept_prompt, hf_token)
@@ -434,15 +434,10 @@ CRITICAL RULES:
434
  monster_name = extract_section(concept_text, 'Monster Name')
435
  primary_type = extract_section(concept_text, 'Primary Type').lower()
436
 
437
- # Extract both appearance and lore sections, combine them
438
  physical_appearance = extract_section(concept_text, 'Physical Appearance')
439
  lore_behavior = extract_section(concept_text, 'Lore & Behavior')
440
 
441
- # Combine both sections with a separator for complete description
442
- description = physical_appearance
443
- if lore_behavior:
444
- description = f"{physical_appearance}\n\n{lore_behavior}" if physical_appearance else lore_behavior
445
-
446
  image_prompt = extract_section(concept_text, 'Monster Image Prompt')
447
 
448
  # Parse physical stats
@@ -484,7 +479,8 @@ CRITICAL RULES:
484
  'concept': concept_text,
485
  'stats': {
486
  'name': monster_name or 'Unknown',
487
- 'description': description,
 
488
  'tier': tier,
489
  'primaryType': primary_type or 'beast',
490
  'height': height,
@@ -496,55 +492,48 @@ CRITICAL RULES:
496
 
497
  @staticmethod
498
  def generate_piclet_image(image_prompt: str, tier: str, hf_token: str) -> dict:
499
- """Generate Piclet image using Flux"""
500
  try:
501
- print(f"Connecting to Flux space...")
502
  client = Client(
503
- PicletGeneratorService.FLUX_SPACE,
504
  hf_token=hf_token
505
  )
506
 
507
- tier_descriptions = {
508
- 'low': 'simple and iconic design',
509
- 'medium': 'detailed and well-crafted design',
510
- 'high': 'highly detailed and impressive design with special effects',
511
- 'legendary': 'highly detailed and majestic design with dramatic lighting and aura effects'
512
- }
513
-
514
- background_styles = {
515
- 'low': 'simple natural environment',
516
- 'medium': 'detailed thematic natural environment',
517
- 'high': 'atmospheric thematic environment with special lighting effects',
518
- 'legendary': 'epic thematic environment with dramatic atmospheric lighting and mystical aura'
519
- }
520
 
521
- full_prompt = f"{image_prompt}\nNow generate a Pokémon Anime image of the monster in an idle pose with a {background_styles.get(tier, background_styles['medium'])} background that complements the creature's type and characteristics. This is a {tier} tier monster with a {tier_descriptions.get(tier, tier_descriptions['medium'])}. The monster should not be attacking or in motion. The full monster must be visible and centered in the frame."
 
522
 
523
- print(f"Generating image with prompt: {full_prompt[:100]}...")
524
  result = client.predict(
525
- full_prompt, # prompt
526
- 0, # seed
527
- True, # randomize_seed
528
- 1024, # width
529
- 1024, # height
530
- 4, # num_inference_steps
 
531
  api_name="/infer"
532
  )
533
 
534
- # Extract image URL and seed
535
  result_data = result.data if hasattr(result, 'data') else result
536
  image_data = result_data[0] if isinstance(result_data, (list, tuple)) else result_data
537
  seed = result_data[1] if isinstance(result_data, (list, tuple)) and len(result_data) > 1 else 0
538
 
539
- # Handle different return formats
540
  image_url = None
541
  if isinstance(image_data, str):
542
  image_url = image_data
543
  elif isinstance(image_data, dict):
544
  image_url = image_data.get('url') or image_data.get('path')
 
 
545
 
546
  if not image_url:
547
- raise Exception("Failed to extract image URL from Flux response")
548
 
549
  return {
550
  'imageUrl': image_url,
 
253
  # Space endpoints
254
  JOY_CAPTION_SPACE = "fancyfeast/joy-caption-alpha-two"
255
  GPT_OSS_SPACE = "amd/gpt-oss-120b-chatbot"
256
+ QWEN_IMAGE_SPACE = "multimodalart/Qwen-Image-Fast"
257
 
258
  @staticmethod
259
  def generate_enhanced_caption(image_path: str, hf_token: str) -> str:
 
304
  print(f"Generating text...")
305
  result = client.predict(
306
  prompt, # message (positional)
307
+ "You are a helpful assistant that creates Pokémon-style monster concepts based on real-world objects.", # system_prompt (positional)
308
  0.7, # temperature (positional)
309
  api_name="/chat"
310
  )
 
335
  Generate complete Piclet concept from image caption
336
  Returns parsed concept with object name, variation, stats, etc.
337
  """
338
+ concept_prompt = f"""You are analyzing an image to create a Pokémon-style creature. Here's the image description:
339
 
340
  "{caption}"
341
 
 
388
  {{2-3 paragraphs describing the creature's behavior, habitat, abilities, and role in its ecosystem. What does it DO? Where does it live? How does it interact with its environment? What are its natural behaviors and powers that reflect the object's real-world function? This is the creature's background story and behavioral profile.}}
389
 
390
  # Monster Image Prompt
391
+ {{Detailed 3-4 sentence visual description for anime-style image generation. Describe body structure, colors, textures, materials, distinctive features, personality-driven pose/expression, dynamic action or stance, environment/background setting, and atmospheric lighting. Be specific and detailed about visual elements. DO NOT mention the source object name or include phrases like "Inspired by [object]".}}
392
  ```
393
 
394
  CRITICAL RULES:
 
397
  - Variation should be meaningful and distinctive (material, style, color, context, or model variant)
398
  - Physical Appearance must describe the CREATURE'S BODY with references to the specific object's visual features
399
  - Lore & Behavior must describe WHAT THE CREATURE DOES, not how it looks
400
+ - Monster Image Prompt must be a detailed (3-4 sentences) pure visual description without mentioning the source object name
401
+ - Monster Image Prompt must NOT include the monster's name or style prefixes like "Anime-style" or "Pokémon-style"
402
  - Primary Type must match the object category (machina for electronics/vehicles, structure for buildings, etc.)"""
403
 
404
  response_text = PicletGeneratorService.generate_text_with_gpt(concept_prompt, hf_token)
 
434
  monster_name = extract_section(concept_text, 'Monster Name')
435
  primary_type = extract_section(concept_text, 'Primary Type').lower()
436
 
437
+ # Extract both appearance and lore sections separately (keep them separate!)
438
  physical_appearance = extract_section(concept_text, 'Physical Appearance')
439
  lore_behavior = extract_section(concept_text, 'Lore & Behavior')
440
 
 
 
 
 
 
441
  image_prompt = extract_section(concept_text, 'Monster Image Prompt')
442
 
443
  # Parse physical stats
 
479
  'concept': concept_text,
480
  'stats': {
481
  'name': monster_name or 'Unknown',
482
+ 'physicalAppearance': physical_appearance,
483
+ 'lore': lore_behavior,
484
  'tier': tier,
485
  'primaryType': primary_type or 'beast',
486
  'height': height,
 
492
 
493
  @staticmethod
494
  def generate_piclet_image(image_prompt: str, tier: str, hf_token: str) -> dict:
495
+ """Generate Piclet image using Qwen-Image-Fast"""
496
  try:
497
+ print(f"Connecting to Qwen-Image-Fast space...")
498
  client = Client(
499
+ PicletGeneratorService.QWEN_IMAGE_SPACE,
500
  hf_token=hf_token
501
  )
502
 
503
+ # Build enhanced prompt for Pokemon-style anime art
504
+ full_prompt = f"{image_prompt} Pokémon anime art style, idle pose, centered, full body visible in frame."
 
 
 
 
 
 
 
 
 
 
 
505
 
506
+ print(f"Generating image with Qwen-Image-Fast...")
507
+ print(f"Prompt: {full_prompt[:100]}...")
508
 
509
+ # Qwen-Image-Fast API: infer(prompt, seed, randomize_seed, aspect_ratio, guidance_scale, num_inference_steps, prompt_enhance)
510
  result = client.predict(
511
+ full_prompt, # prompt
512
+ 0, # seed (will be randomized)
513
+ True, # randomize_seed
514
+ "3:4", # aspect_ratio (768x1024 - portrait)
515
+ 1.0, # guidance_scale (default)
516
+ 8, # num_inference_steps (default, optimized with Lightning LoRA)
517
+ True, # prompt_enhance (uses LLM to enhance prompt)
518
  api_name="/infer"
519
  )
520
 
521
+ # Qwen returns: (PIL.Image, seed) tuple
522
  result_data = result.data if hasattr(result, 'data') else result
523
  image_data = result_data[0] if isinstance(result_data, (list, tuple)) else result_data
524
  seed = result_data[1] if isinstance(result_data, (list, tuple)) and len(result_data) > 1 else 0
525
 
526
+ # Handle different return formats (URL or PIL Image object)
527
  image_url = None
528
  if isinstance(image_data, str):
529
  image_url = image_data
530
  elif isinstance(image_data, dict):
531
  image_url = image_data.get('url') or image_data.get('path')
532
+ elif hasattr(image_data, 'url'):
533
+ image_url = image_data.url
534
 
535
  if not image_url:
536
+ raise Exception("Failed to extract image URL from Qwen response")
537
 
538
  return {
539
  'imageUrl': image_url,