Spaces:

Fraser
/

piclets-server

Sleeping

App Files Files Community

Fraser commited on Oct 5

Commit

17077fb

1 Parent(s): a44fa5c

new image generator (drops tier designs)

Browse files

Files changed (1) hide show

app.py +29 -40

app.py CHANGED Viewed

@@ -253,7 +253,7 @@ class PicletGeneratorService:
     # Space endpoints
     JOY_CAPTION_SPACE = "fancyfeast/joy-caption-alpha-two"
     GPT_OSS_SPACE = "amd/gpt-oss-120b-chatbot"
-    FLUX_SPACE = "black-forest-labs/FLUX.1-schnell"
     @staticmethod
     def generate_enhanced_caption(image_path: str, hf_token: str) -> str:
@@ -304,7 +304,7 @@ class PicletGeneratorService:
             print(f"Generating text...")
             result = client.predict(
                 prompt,  # message (positional)
-                "You are a helpful assistant that creates Pokemon-style monster concepts based on real-world objects.",  # system_prompt (positional)
                 0.7,  # temperature (positional)
                 api_name="/chat"
             )
@@ -335,7 +335,7 @@ class PicletGeneratorService:
         Generate complete Piclet concept from image caption
         Returns parsed concept with object name, variation, stats, etc.
         """
-        concept_prompt = f"""You are analyzing an image to create a Pokemon-style creature. Here's the image description:
 "{caption}"
@@ -388,7 +388,7 @@ Weight: {{e.g., "15kg" or "33 lbs"}}
 {{2-3 paragraphs describing the creature's behavior, habitat, abilities, and role in its ecosystem. What does it DO? Where does it live? How does it interact with its environment? What are its natural behaviors and powers that reflect the object's real-world function? This is the creature's background story and behavioral profile.}}
 # Monster Image Prompt
-{{Detailed 3-4 sentence visual description for image generation. Must include: "Inspired by [specific object name]" at the start. Describe body structure, colors, textures, materials, distinctive features, personality-driven pose/expression, dynamic action or stance, environment/background setting, and atmospheric lighting. Be specific and detailed about visual elements.}}
 ```
 CRITICAL RULES:
@@ -397,8 +397,8 @@ CRITICAL RULES:
 - Variation should be meaningful and distinctive (material, style, color, context, or model variant)
 - Physical Appearance must describe the CREATURE'S BODY with references to the specific object's visual features
 - Lore & Behavior must describe WHAT THE CREATURE DOES, not how it looks
-- Monster Image Prompt must START with "Inspired by [object name]" and be detailed (3-4 sentences)
-- Monster Image Prompt must NOT include the monster's name or style prefixes like "Anime-style" or "Pokemon-style"
 - Primary Type must match the object category (machina for electronics/vehicles, structure for buildings, etc.)"""
         response_text = PicletGeneratorService.generate_text_with_gpt(concept_prompt, hf_token)
@@ -434,15 +434,10 @@ CRITICAL RULES:
         monster_name = extract_section(concept_text, 'Monster Name')
         primary_type = extract_section(concept_text, 'Primary Type').lower()
-        # Extract both appearance and lore sections, combine them
         physical_appearance = extract_section(concept_text, 'Physical Appearance')
         lore_behavior = extract_section(concept_text, 'Lore & Behavior')
-        # Combine both sections with a separator for complete description
-        description = physical_appearance
-        if lore_behavior:
-            description = f"{physical_appearance}\n\n{lore_behavior}" if physical_appearance else lore_behavior
         image_prompt = extract_section(concept_text, 'Monster Image Prompt')
         # Parse physical stats
@@ -484,7 +479,8 @@ CRITICAL RULES:
             'concept': concept_text,
             'stats': {
                 'name': monster_name or 'Unknown',
-                'description': description,
                 'tier': tier,
                 'primaryType': primary_type or 'beast',
                 'height': height,
@@ -496,55 +492,48 @@ CRITICAL RULES:
     @staticmethod
     def generate_piclet_image(image_prompt: str, tier: str, hf_token: str) -> dict:
-        """Generate Piclet image using Flux"""
         try:
-            print(f"Connecting to Flux space...")
             client = Client(
-                PicletGeneratorService.FLUX_SPACE,
                 hf_token=hf_token
             )
-            tier_descriptions = {
-                'low': 'simple and iconic design',
-                'medium': 'detailed and well-crafted design',
-                'high': 'highly detailed and impressive design with special effects',
-                'legendary': 'highly detailed and majestic design with dramatic lighting and aura effects'
-            }
-            background_styles = {
-                'low': 'simple natural environment',
-                'medium': 'detailed thematic natural environment',
-                'high': 'atmospheric thematic environment with special lighting effects',
-                'legendary': 'epic thematic environment with dramatic atmospheric lighting and mystical aura'
-            }
-            full_prompt = f"{image_prompt}\nNow generate a Pokémon Anime image of the monster in an idle pose with a {background_styles.get(tier, background_styles['medium'])} background that complements the creature's type and characteristics. This is a {tier} tier monster with a {tier_descriptions.get(tier, tier_descriptions['medium'])}. The monster should not be attacking or in motion. The full monster must be visible and centered in the frame."
-            print(f"Generating image with prompt: {full_prompt[:100]}...")
             result = client.predict(
-                full_prompt,  # prompt
-                0,  # seed
-                True,  # randomize_seed
-                1024,  # width
-                1024,  # height
-                4,  # num_inference_steps
                 api_name="/infer"
             )
-            # Extract image URL and seed
             result_data = result.data if hasattr(result, 'data') else result
             image_data = result_data[0] if isinstance(result_data, (list, tuple)) else result_data
             seed = result_data[1] if isinstance(result_data, (list, tuple)) and len(result_data) > 1 else 0
-            # Handle different return formats
             image_url = None
             if isinstance(image_data, str):
                 image_url = image_data
             elif isinstance(image_data, dict):
                 image_url = image_data.get('url') or image_data.get('path')
             if not image_url:
-                raise Exception("Failed to extract image URL from Flux response")
             return {
                 'imageUrl': image_url,

     # Space endpoints
     JOY_CAPTION_SPACE = "fancyfeast/joy-caption-alpha-two"
     GPT_OSS_SPACE = "amd/gpt-oss-120b-chatbot"
+    QWEN_IMAGE_SPACE = "multimodalart/Qwen-Image-Fast"
     @staticmethod
     def generate_enhanced_caption(image_path: str, hf_token: str) -> str:
             print(f"Generating text...")
             result = client.predict(
                 prompt,  # message (positional)
+                "You are a helpful assistant that creates Pokémon-style monster concepts based on real-world objects.",  # system_prompt (positional)
                 0.7,  # temperature (positional)
                 api_name="/chat"
             )
         Generate complete Piclet concept from image caption
         Returns parsed concept with object name, variation, stats, etc.
         """
+        concept_prompt = f"""You are analyzing an image to create a Pokémon-style creature. Here's the image description:
 "{caption}"
 {{2-3 paragraphs describing the creature's behavior, habitat, abilities, and role in its ecosystem. What does it DO? Where does it live? How does it interact with its environment? What are its natural behaviors and powers that reflect the object's real-world function? This is the creature's background story and behavioral profile.}}
 # Monster Image Prompt
+{{Detailed 3-4 sentence visual description for anime-style image generation. Describe body structure, colors, textures, materials, distinctive features, personality-driven pose/expression, dynamic action or stance, environment/background setting, and atmospheric lighting. Be specific and detailed about visual elements. DO NOT mention the source object name or include phrases like "Inspired by [object]".}}
 ```
 CRITICAL RULES:
 - Variation should be meaningful and distinctive (material, style, color, context, or model variant)
 - Physical Appearance must describe the CREATURE'S BODY with references to the specific object's visual features
 - Lore & Behavior must describe WHAT THE CREATURE DOES, not how it looks
+- Monster Image Prompt must be a detailed (3-4 sentences) pure visual description without mentioning the source object name
+- Monster Image Prompt must NOT include the monster's name or style prefixes like "Anime-style" or "Pokémon-style"
 - Primary Type must match the object category (machina for electronics/vehicles, structure for buildings, etc.)"""
         response_text = PicletGeneratorService.generate_text_with_gpt(concept_prompt, hf_token)
         monster_name = extract_section(concept_text, 'Monster Name')
         primary_type = extract_section(concept_text, 'Primary Type').lower()
+        # Extract both appearance and lore sections separately (keep them separate!)
         physical_appearance = extract_section(concept_text, 'Physical Appearance')
         lore_behavior = extract_section(concept_text, 'Lore & Behavior')
         image_prompt = extract_section(concept_text, 'Monster Image Prompt')
         # Parse physical stats
             'concept': concept_text,
             'stats': {
                 'name': monster_name or 'Unknown',
+                'physicalAppearance': physical_appearance,
+                'lore': lore_behavior,
                 'tier': tier,
                 'primaryType': primary_type or 'beast',
                 'height': height,
     @staticmethod
     def generate_piclet_image(image_prompt: str, tier: str, hf_token: str) -> dict:
+        """Generate Piclet image using Qwen-Image-Fast"""
         try:
+            print(f"Connecting to Qwen-Image-Fast space...")
             client = Client(
+                PicletGeneratorService.QWEN_IMAGE_SPACE,
                 hf_token=hf_token
             )
+            # Build enhanced prompt for Pokemon-style anime art
+            full_prompt = f"{image_prompt} Pokémon anime art style, idle pose, centered, full body visible in frame."
+            print(f"Generating image with Qwen-Image-Fast...")
+            print(f"Prompt: {full_prompt[:100]}...")
+            # Qwen-Image-Fast API: infer(prompt, seed, randomize_seed, aspect_ratio, guidance_scale, num_inference_steps, prompt_enhance)
             result = client.predict(
+                full_prompt,     # prompt
+                0,               # seed (will be randomized)
+                True,            # randomize_seed
+                "3:4",           # aspect_ratio (768x1024 - portrait)
+                1.0,             # guidance_scale (default)
+                8,               # num_inference_steps (default, optimized with Lightning LoRA)
+                True,            # prompt_enhance (uses LLM to enhance prompt)
                 api_name="/infer"
             )
+            # Qwen returns: (PIL.Image, seed) tuple
             result_data = result.data if hasattr(result, 'data') else result
             image_data = result_data[0] if isinstance(result_data, (list, tuple)) else result_data
             seed = result_data[1] if isinstance(result_data, (list, tuple)) and len(result_data) > 1 else 0
+            # Handle different return formats (URL or PIL Image object)
             image_url = None
             if isinstance(image_data, str):
                 image_url = image_data
             elif isinstance(image_data, dict):
                 image_url = image_data.get('url') or image_data.get('path')
+            elif hasattr(image_data, 'url'):
+                image_url = image_data.url
             if not image_url:
+                raise Exception("Failed to extract image URL from Qwen response")
             return {
                 'imageUrl': image_url,