Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| import subprocess # For calling generate.py | |
| import tempfile # For handling temporary image files | |
| from typing import Optional | |
| from PIL import Image as PILImage | |
| import gradio as gr | |
| # Add the cloned nanoVLM directory to Python's system path (generate.py might need this too if it imports from 'models') | |
| NANOVLM_REPO_PATH = "/app/nanoVLM" | |
| if NANOVLM_REPO_PATH not in sys.path: | |
| print(f"DEBUG: Adding {NANOVLM_REPO_PATH} to sys.path") | |
| sys.path.insert(0, NANOVLM_REPO_PATH) | |
| print(f"DEBUG: Python sys.path: {sys.path}") | |
| # Path to the generate.py script within our Docker container | |
| GENERATE_SCRIPT_PATH = "/app/nanoVLM/generate.py" | |
| MODEL_REPO_ID = "lusxvr/nanoVLM-222M" # Model ID for generate.py | |
| print(f"DEBUG: Using generate.py script at: {GENERATE_SCRIPT_PATH}") | |
| print(f"DEBUG: Using model repo ID: {MODEL_REPO_ID}") | |
| def call_generate_script(image_path: str, prompt_text: str) -> str: | |
| """ | |
| Calls the generate.py script as a subprocess and returns its output. | |
| """ | |
| print(f"DEBUG (call_generate_script): Calling with image_path='{image_path}', prompt='{prompt_text}'") | |
| # Arguments for generate.py (ensure they match its expected format) | |
| # From previous success: --hf_model, --image, --prompt, --generations, --max_new_tokens | |
| cmd_args = [ | |
| "python", "-u", GENERATE_SCRIPT_PATH, | |
| "--hf_model", MODEL_REPO_ID, | |
| "--image", image_path, | |
| "--prompt", prompt_text, | |
| "--generations", "1", # Get one generation for the UI | |
| "--max_new_tokens", "70" # Adjust as needed | |
| # --device is handled by generate.py internally | |
| ] | |
| print(f"DEBUG (call_generate_script): Executing command: {' '.join(cmd_args)}") | |
| try: | |
| # Execute the command | |
| # capture_output=True, text=True are for Python 3.7+ | |
| # For Python 3.9 (as in your Dockerfile base), this is fine. | |
| process = subprocess.run( | |
| cmd_args, | |
| capture_output=True, | |
| text=True, | |
| check=True, # Raise an exception for non-zero exit codes | |
| timeout=120 # Add a timeout (e.g., 2 minutes) | |
| ) | |
| stdout = process.stdout | |
| stderr = process.stderr | |
| print(f"DEBUG (call_generate_script): generate.py STDOUT:\n{stdout}") | |
| if stderr: | |
| print(f"DEBUG (call_generate_script): generate.py STDERR:\n{stderr}") | |
| # --- Parse the output from generate.py --- | |
| # The generate.py script prints: | |
| # Outputs: | |
| # >> Generation 1: Actual generated text here. | |
| # We need to extract "Actual generated text here." | |
| output_lines = stdout.splitlines() | |
| generated_text = "Error: Could not parse output from generate.py script." # Default | |
| parsing_output = False | |
| for line in output_lines: | |
| if "Outputs:" in line: | |
| parsing_output = True | |
| continue | |
| if parsing_output and line.strip().startswith(">> Generation 1:"): | |
| # Extract text after ">> Generation 1: " (note the two spaces) | |
| generated_text = line.split(">> Generation 1: ", 1)[-1].strip() | |
| break # Found the first generation | |
| print(f"DEBUG (call_generate_script): Parsed generated text: '{generated_text}'") | |
| return generated_text | |
| except subprocess.CalledProcessError as e: | |
| print(f"ERROR (call_generate_script): generate.py exited with error code {e.returncode}") | |
| print(f"ERROR (call_generate_script): STDOUT: {e.stdout}") | |
| print(f"ERROR (call_generate_script): STDERR: {e.stderr}") | |
| return f"Error executing generation script (Code {e.returncode}). Check logs." | |
| except subprocess.TimeoutExpired: | |
| print("ERROR (call_generate_script): generate.py timed out.") | |
| return "Error: Generation script timed out." | |
| except Exception as e: | |
| print(f"ERROR (call_generate_script): An unexpected error occurred: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"An unexpected error occurred while calling generation script: {str(e)}" | |
| def gradio_interface_fn(image_input_pil: Optional[PILImage.Image], prompt_input_str: Optional[str]) -> str: | |
| print(f"DEBUG (gradio_interface_fn): Received prompt: '{prompt_input_str}'") | |
| if image_input_pil is None: | |
| return "Please upload an image." | |
| if not prompt_input_str: | |
| return "Please provide a prompt." | |
| # Save the uploaded PIL image to a temporary file | |
| # tempfile.NamedTemporaryFile creates a file that is deleted when closed. | |
| # We need to ensure it has a .jpg extension for some image libraries if they are picky. | |
| # The 'delete=False' allows us to close it, pass its name, and then delete it manually. | |
| try: | |
| with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_image_file: | |
| image_input_pil.save(tmp_image_file, format="JPEG") | |
| tmp_image_path = tmp_image_file.name | |
| print(f"DEBUG (gradio_interface_fn): Temporary image saved to: {tmp_image_path}") | |
| # Call the generate.py script with the path to the temporary image | |
| result_text = call_generate_script(tmp_image_path, prompt_input_str) | |
| return result_text | |
| except Exception as e: | |
| print(f"ERROR (gradio_interface_fn): Error processing image or calling script: {e}") | |
| import traceback; traceback.print_exc() | |
| return f"An error occurred: {str(e)}" | |
| finally: | |
| # Clean up the temporary image file | |
| if 'tmp_image_path' in locals() and os.path.exists(tmp_image_path): | |
| try: | |
| os.remove(tmp_image_path) | |
| print(f"DEBUG (gradio_interface_fn): Temporary image {tmp_image_path} removed.") | |
| except Exception as e_remove: | |
| print(f"WARN (gradio_interface_fn): Could not remove temporary image {tmp_image_path}: {e_remove}") | |
| # --- Gradio Interface Definition --- | |
| description_md = """ | |
| ## nanoVLM-222M Interactive Demo (via generate.py) | |
| Upload an image and type a prompt. This interface calls the `generate.py` script from | |
| `huggingface/nanoVLM` under the hood to perform inference. | |
| """ | |
| print("DEBUG: Defining Gradio interface...") | |
| iface = None | |
| try: | |
| iface = gr.Interface( | |
| fn=gradio_interface_fn, | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload Image"), | |
| gr.Textbox(label="Your Prompt / Question", info="e.g., 'describe this image in detail'") | |
| ], | |
| outputs=gr.Textbox(label="Generated Text", show_copy_button=True), | |
| title="nanoVLM-222M Demo (via Script)", | |
| description=description_md, | |
| allow_flagging="never" | |
| ) | |
| print("DEBUG: Gradio interface defined successfully.") | |
| except Exception as e: | |
| print(f"CRITICAL ERROR defining Gradio interface: {e}") | |
| import traceback; traceback.print_exc() | |
| # --- Launch Gradio App --- | |
| if __name__ == "__main__": | |
| print("DEBUG: Entered __main__ block for Gradio launch.") | |
| if not os.path.exists(GENERATE_SCRIPT_PATH): | |
| print(f"CRITICAL ERROR: The script {GENERATE_SCRIPT_PATH} was not found. Cannot launch app.") | |
| iface = None # Prevent launch | |
| if iface is not None: | |
| print("DEBUG: Attempting to launch Gradio interface...") | |
| try: | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |
| print("DEBUG: Gradio launch command issued.") | |
| except Exception as e: | |
| print(f"CRITICAL ERROR launching Gradio interface: {e}") | |
| import traceback; traceback.print_exc() | |
| else: | |
| print("CRITICAL ERROR: Gradio interface (iface) is None or not defined. Cannot launch.") |