Spaces:

Tonic
/

l-operator-demo

Running on Zero

App Files Files Community

Joseph Pollack commited on Aug 29

Commit

c8e9e6f

unverified ·

1 Parent(s): d3f57e1

use gradio blocks

Browse files

Files changed (1) hide show

app.py +117 -96

app.py CHANGED Viewed

@@ -129,58 +129,50 @@ class LOperatorDemo:
             logger.error(f"Error generating action: {str(e)}")
             return f"❌ Error generating action: {str(e)}"
-    @spaces.GPU(duration=90)  # 1.5 minutes for chat responses
-    def chat_with_model(self, message: str, history: List[Dict[str, str]], image=None) -> List[Dict[str, str]]:
-        """Chat interface function for Gradio"""
-        if not self.is_loaded:
-            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Model not loaded. Please load the model first."}]
-        if image is None:
-            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please upload an Android screenshot image."}]
-        try:
-            # Handle different image formats
-            pil_image = None
-            if hasattr(image, 'mode'):  # PIL Image object
-                pil_image = image
-            elif isinstance(image, str) and os.path.exists(image):
-                # Handle file path (from examples)
-                pil_image = Image.open(image)
-            elif hasattr(image, 'name') and os.path.exists(image.name):
-                # Handle Gradio file object
-                pil_image = Image.open(image.name)
-            else:
-                return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Invalid image format. Please upload a valid image."}]
-            if pil_image is None:
-                return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Failed to process image. Please try again."}]
-            # Use the message as the goal/instruction
-            goal = "Complete the requested action"
-            instruction = message
-            # Generate action
-            response = self.generate_action(pil_image, goal, instruction)
-            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}]
-        except Exception as e:
-            logger.error(f"Error in chat: {str(e)}")
-            return history + [{"role": "user", "content": message}, {"role": "assistant", "content": f"❌ Error: {str(e)}"}]
 # Initialize demo
 demo_instance = LOperatorDemo()
-def load_model():
-    """Load model normally"""
     try:
-        logger.info("Loading L-Operator model...")
-        result = demo_instance.load_model()
-        logger.info(f"Model loading result: {result}")
-        return result
     except Exception as e:
-        logger.error(f"Error loading model: {str(e)}")
-        return f"❌ Error loading model: {str(e)}"
 def load_example_episodes():
@@ -227,7 +219,7 @@ def load_example_episodes():
 # Create Gradio interface
 def create_demo():
-    """Create the Gradio demo interface"""
     with gr.Blocks(
         title="L-Operator: Android Device Control Demo",
@@ -236,8 +228,8 @@ def create_demo():
         .gradio-container {
             max-width: 1200px !important;
         }
-        .chat-container {
-            height: 600px;
         }
         """
     ) as demo:
@@ -252,10 +244,9 @@ def create_demo():
         ## 🚀 How to Use
-        1. **Model Loading**: The L-Operator model loads automatically on startup
-        2. **Upload Screenshot**: Upload an Android device screenshot
-        3. **Provide Instructions**: Enter your goal and step instructions
-        4. **Get Actions**: The model will generate JSON actions for Android device control
         ## 📋 Expected Output Format
@@ -276,55 +267,85 @@ def create_demo():
         with gr.Row():
             with gr.Column(scale=1):
-                gr.Markdown("### 🤖 Model Status")
-                model_status = gr.Textbox(
-                    label="L-Operator Model",
-                    value="🔄 Loading model on startup...",
-                    interactive=False
                 )
-            with gr.Column(scale=3):
-                gr.Markdown("### 💬 L-Operator Chat Interface")
-                # Load examples with error handling
-                try:
-                    examples = load_example_episodes()
-                except Exception as e:
-                    logger.warning(f"Failed to load examples: {str(e)}")
-                    examples = []
-                chat_interface = gr.ChatInterface(
-                    fn=demo_instance.chat_with_model,
-                    title="L-Operator: Android Device Control",
-                    description="Upload an Android screenshot and describe your goal. The model will generate JSON actions for device control.",
-                    examples=examples,
-                    type="messages",
-                    cache_examples=False,
-                    textbox=gr.Textbox(
-                        label="Goal",
-                        placeholder="e.g., Open the Settings app and navigate to Display settings",
-                        lines=2,
-                        show_label=True
-                    )
                 )
-        # Update model status on page load
-        def update_model_status():
             if not demo_instance.is_loaded:
-                logger.info("Loading model on Gradio startup...")
-                result = load_model()
-                logger.info(f"Model loading result: {result}")
-                return result
-            if demo_instance.is_loaded:
-                return "✅ L-Operator model loaded and ready!"
-            else:
-                return "❌ Model failed to load. Please check logs."
-        # Load model and update status on page load
-        demo.load(
-            fn=update_model_status,
-            outputs=model_status
-        )
         gr.Markdown("""
         ---
@@ -345,7 +366,7 @@ def create_demo():
         - **Remote Support**: Remote device troubleshooting
         - **Development Workflows**: UI/UX testing automation
-                            ---
         **Made with ❤️ by Tonic** | [Model on Hugging Face](https://huggingface.co/Tonic/l-android-control)
         """)

             logger.error(f"Error generating action: {str(e)}")
             return f"❌ Error generating action: {str(e)}"
 # Initialize demo
 demo_instance = LOperatorDemo()
+def process_input(image, goal):
+    """Process the input and generate action"""
+    if image is None:
+        return "❌ Please upload an Android screenshot image."
+    if not goal.strip():
+        return "❌ Please provide a goal."
+    if not demo_instance.is_loaded:
+        return "❌ Model not loaded. Please wait for it to load automatically."
     try:
+        # Handle different image formats
+        pil_image = None
+        if hasattr(image, 'mode'):  # PIL Image object
+            pil_image = image
+        elif isinstance(image, str) and os.path.exists(image):
+            # Handle file path (from examples)
+            pil_image = Image.open(image)
+        elif hasattr(image, 'name') and os.path.exists(image.name):
+            # Handle Gradio file object
+            pil_image = Image.open(image.name)
+        else:
+            return "❌ Invalid image format. Please upload a valid image."
+        if pil_image is None:
+            return "❌ Failed to process image. Please try again."
+        # Convert image to RGB if needed
+        if pil_image.mode != "RGB":
+            pil_image = pil_image.convert("RGB")
+        # Generate action using goal as both goal and instruction
+        response = demo_instance.generate_action(pil_image, goal, goal)
+        return response
     except Exception as e:
+        logger.error(f"Error processing input: {str(e)}")
+        return f"❌ Error: {str(e)}"
 def load_example_episodes():
 # Create Gradio interface
 def create_demo():
+    """Create the Gradio demo interface using Blocks"""
     with gr.Blocks(
         title="L-Operator: Android Device Control Demo",
         .gradio-container {
             max-width: 1200px !important;
         }
+        .output-container {
+            min-height: 200px;
         }
         """
     ) as demo:
         ## 🚀 How to Use
+        1. **Upload Screenshot**: Upload an Android device screenshot
+        2. **Describe Goal**: Enter what you want to accomplish
+        3. **Get Actions**: The model will generate JSON actions for Android device control
         ## 📋 Expected Output Format
         with gr.Row():
             with gr.Column(scale=1):
+                gr.Markdown("### 📱 Upload Screenshot")
+                image_input = gr.Image(
+                    label="Android Screenshot",
+                    type="pil",
+                    height=400
                 )
+                gr.Markdown("### 🎯 Goal")
+                goal_input = gr.Textbox(
+                    label="What would you like to accomplish?",
+                    placeholder="e.g., Open the Settings app and navigate to Display settings",
+                    lines=3
+                )
+                # Process button
+                process_btn = gr.Button("🚀 Generate Action", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 Generated Action")
+                output_text = gr.Textbox(
+                    label="JSON Action Output",
+                    lines=15,
+                    max_lines=20,
+                    interactive=False,
+                    elem_classes=["output-container"]
                 )
+        # Connect the process button
+        process_btn.click(
+            fn=process_input,
+            inputs=[image_input, goal_input],
+            outputs=output_text
+        )
+        # Load examples
+        gr.Markdown("### 📚 Example Episodes")
+        try:
+            examples = load_example_episodes()
+            if examples:
+                with gr.Row():
+                    for i, (image, goal) in enumerate(examples):
+                        with gr.Column(scale=1):
+                            gr.Markdown(f"**Episode {i+1}**")
+                            example_image = gr.Image(
+                                value=image,
+                                label=f"Example {i+1}",
+                                height=200,
+                                interactive=False
+                            )
+                            example_goal = gr.Textbox(
+                                value=goal,
+                                label="Goal",
+                                lines=2,
+                                interactive=False
+                            )
+                            # Create a button to load this example
+                            load_example_btn = gr.Button(f"Load Example {i+1}", size="sm")
+                            load_example_btn.click(
+                                fn=lambda img, g: (img, g),
+                                inputs=[example_image, example_goal],
+                                outputs=[image_input, goal_input]
+                            )
+        except Exception as e:
+            logger.warning(f"Failed to load examples: {str(e)}")
+            gr.Markdown("❌ Failed to load examples. Please upload your own screenshot.")
+        # Load model automatically on startup
+        def load_model_on_startup():
+            """Load model automatically without user feedback"""
             if not demo_instance.is_loaded:
+                logger.info("Loading L-Operator model automatically...")
+                try:
+                    demo_instance.load_model()
+                    logger.info("Model loaded successfully in background")
+                except Exception as e:
+                    logger.error(f"Failed to load model: {str(e)}")
+        # Load model automatically on page load
+        demo.load(fn=load_model_on_startup)
         gr.Markdown("""
         ---
         - **Remote Support**: Remote device troubleshooting
         - **Development Workflows**: UI/UX testing automation
+        ---
         **Made with ❤️ by Tonic** | [Model on Hugging Face](https://huggingface.co/Tonic/l-android-control)
         """)