Spaces:
Running
on
Zero
Running
on
Zero
| # No need for dynamic package installation since we're using requirements.txt | |
| import subprocess | |
| import sys | |
| # Optional: Try to install flash-attn if not present (but don't fail if it doesn't work) | |
| try: | |
| import flash_attn | |
| print("Flash attention is available") | |
| except ImportError: | |
| print("Flash attention not available - using standard attention (this is fine)") | |
| # Ensure PEFT is available for LoRA | |
| try: | |
| import peft | |
| print("PEFT library is available") | |
| except ImportError: | |
| print("Installing PEFT for LoRA support...") | |
| subprocess.run([sys.executable, "-m", "pip", "install", "peft>=0.7.0"], check=True) | |
| import peft | |
| print("PEFT installed successfully") | |
| import spaces | |
| import argparse | |
| import os | |
| import time | |
| from os import path | |
| import shutil | |
| from datetime import datetime | |
| from safetensors.torch import load_file | |
| from huggingface_hub import hf_hub_download | |
| import gradio as gr | |
| import torch | |
| from diffusers import FluxPipeline | |
| from diffusers.pipelines.stable_diffusion import safety_checker | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| # No special attention mechanisms needed - the model will work fine without them | |
| # Setup and initialization code | |
| cache_path = path.join(path.dirname(path.abspath(__file__)), "models") | |
| PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", ".") | |
| os.environ["TRANSFORMERS_CACHE"] = cache_path | |
| os.environ["HF_HUB_CACHE"] = cache_path | |
| os.environ["HF_HOME"] = cache_path | |
| torch.backends.cuda.matmul.allow_tf32 = True | |
| # Florence 모델 초기화 | |
| print("Initializing Florence models...") | |
| florence_models = {} | |
| florence_processors = {} | |
| # Check if timm is available | |
| try: | |
| import timm | |
| print("timm library is available") | |
| except ImportError: | |
| print("ERROR: timm library not found. Installing...") | |
| subprocess.run([sys.executable, "-m", "pip", "install", "timm"], check=True) | |
| import timm | |
| print("timm installed successfully") | |
| # Initialize Florence models with better error handling | |
| model_configs = [ | |
| ('gokaygokay/Florence-2-Flux-Large', True), # Primary model | |
| ('gokaygokay/Florence-2-Flux', False), # Fallback model | |
| ] | |
| for model_name, is_primary in model_configs: | |
| try: | |
| print(f"Loading {model_name}...") | |
| florence_models[model_name] = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, # Add memory optimization | |
| ).eval() | |
| florence_processors[model_name] = AutoProcessor.from_pretrained( | |
| model_name, | |
| trust_remote_code=True | |
| ) | |
| print(f"Successfully loaded {model_name}") | |
| # If we successfully loaded the primary model, we can skip the fallback | |
| if is_primary: | |
| break | |
| except Exception as e: | |
| print(f"Warning: Could not load {model_name}: {e}") | |
| if is_primary: | |
| print("Attempting to load fallback model...") | |
| continue | |
| if not florence_models: | |
| print("WARNING: No Florence models could be loaded. Caption generation will not be available.") | |
| print("Users will need to manually enter image descriptions.") | |
| else: | |
| print(f"Successfully loaded {len(florence_models)} Florence model(s)") | |
| def filter_prompt(prompt): | |
| inappropriate_keywords = [ | |
| "sex" | |
| ] | |
| prompt_lower = prompt.lower() | |
| for keyword in inappropriate_keywords: | |
| if keyword in prompt_lower: | |
| return False, "부적절한 내용이 포함된 프롬프트입니다." | |
| return True, prompt | |
| class timer: | |
| def __init__(self, method_name="timed process"): | |
| self.method = method_name | |
| def __enter__(self): | |
| self.start = time.time() | |
| print(f"{self.method} starts") | |
| def __exit__(self, exc_type, exc_val, exc_tb): | |
| end = time.time() | |
| print(f"{self.method} took {str(round(end - self.start, 2))}s") | |
| # Model initialization | |
| if not path.exists(cache_path): | |
| os.makedirs(cache_path, exist_ok=True) | |
| print("Loading FLUX pipeline...") | |
| pipe = FluxPipeline.from_pretrained( | |
| "black-forest-labs/FLUX.1-dev", | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| # Track whether LoRA was loaded successfully | |
| LORA_LOADED = False | |
| print("Loading LoRA weights...") | |
| try: | |
| # Method 1: Try loading with file path | |
| lora_path = hf_hub_download( | |
| "ByteDance/Hyper-SD", | |
| "Hyper-FLUX.1-dev-8steps-lora.safetensors" | |
| ) | |
| pipe.load_lora_weights(lora_path) | |
| pipe.fuse_lora(lora_scale=0.125) | |
| LORA_LOADED = True | |
| print("LoRA weights loaded and fused successfully (Method 1)") | |
| except Exception as e1: | |
| print(f"Method 1 failed: {e1}") | |
| try: | |
| # Method 2: Try loading directly from repo | |
| pipe.load_lora_weights("ByteDance/Hyper-SD", weight_name="Hyper-FLUX.1-dev-8steps-lora.safetensors") | |
| pipe.fuse_lora(lora_scale=0.125) | |
| LORA_LOADED = True | |
| print("LoRA weights loaded and fused successfully (Method 2)") | |
| except Exception as e2: | |
| print(f"Method 2 failed: {e2}") | |
| print("WARNING: Could not load LoRA weights. Continuing without LoRA.") | |
| print("The model will still work but may require more inference steps for good quality.") | |
| print("Recommended: Use 20-30 inference steps instead of 8.") | |
| pipe.to(device="cuda", dtype=torch.bfloat16) | |
| # Safety checker initialization | |
| try: | |
| pipe.safety_checker = safety_checker.StableDiffusionSafetyChecker.from_pretrained( | |
| "CompVis/stable-diffusion-safety-checker" | |
| ) | |
| except Exception as e: | |
| print(f"Warning: Could not load safety checker: {e}") | |
| pipe.safety_checker = None | |
| def generate_caption(image, model_name='gokaygokay/Florence-2-Flux-Large'): | |
| if not florence_models: | |
| gr.Warning("Caption models are not loaded. Please refresh the page.") | |
| return "Caption generation unavailable - please describe your image manually" | |
| # Use fallback model if the requested one isn't available | |
| if model_name not in florence_models: | |
| model_name = list(florence_models.keys())[0] | |
| print(f"Using fallback model: {model_name}") | |
| image = Image.fromarray(image) | |
| task_prompt = "<DESCRIPTION>" | |
| prompt = task_prompt + "Describe this image in great detail." | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| model = florence_models[model_name] | |
| processor = florence_processors[model_name] | |
| inputs = processor(text=prompt, images=image, return_tensors="pt") | |
| generated_ids = model.generate( | |
| input_ids=inputs["input_ids"], | |
| pixel_values=inputs["pixel_values"], | |
| max_new_tokens=1024, | |
| num_beams=3, | |
| repetition_penalty=1.10, | |
| ) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
| parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height)) | |
| return parsed_answer["<DESCRIPTION>"] | |
| def process_and_save_image(height, width, steps, scales, prompt, seed): | |
| is_safe, filtered_prompt = filter_prompt(prompt) | |
| if not is_safe: | |
| gr.Warning("The prompt contains inappropriate content.") | |
| return None | |
| with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"): | |
| try: | |
| generated_image = pipe( | |
| prompt=[filtered_prompt], | |
| generator=torch.Generator().manual_seed(int(seed)), | |
| num_inference_steps=int(steps), | |
| guidance_scale=float(scales), | |
| height=int(height), | |
| width=int(width), | |
| max_sequence_length=256 | |
| ).images[0] | |
| return generated_image | |
| except Exception as e: | |
| print(f"Error in image generation: {str(e)}") | |
| gr.Warning(f"Error generating image: {str(e)}") | |
| return None | |
| def get_random_seed(): | |
| return torch.randint(0, 1000000, (1,)).item() | |
| def update_seed(): | |
| return get_random_seed() | |
| # CSS 스타일 | |
| css = """ | |
| footer {display: none !important} | |
| .gradio-container { | |
| max-width: 1200px; | |
| margin: auto; | |
| } | |
| .contain { | |
| background: rgba(255, 255, 255, 0.05); | |
| border-radius: 12px; | |
| padding: 20px; | |
| } | |
| .generate-btn { | |
| background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important; | |
| border: none !important; | |
| color: white !important; | |
| } | |
| .generate-btn:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 5px 15px rgba(0,0,0,0.2); | |
| } | |
| .title { | |
| text-align: center; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 1em; | |
| background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .tabs { | |
| margin-top: 20px; | |
| border-radius: 10px; | |
| overflow: hidden; | |
| } | |
| .tab-nav { | |
| background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%); | |
| padding: 10px; | |
| } | |
| .tab-nav button { | |
| color: white; | |
| border: none; | |
| padding: 10px 20px; | |
| margin: 0 5px; | |
| border-radius: 5px; | |
| transition: all 0.3s ease; | |
| } | |
| .tab-nav button.selected { | |
| background: rgba(255, 255, 255, 0.2); | |
| } | |
| .image-upload-container { | |
| border: 2px dashed #4B79A1; | |
| border-radius: 10px; | |
| padding: 20px; | |
| text-align: center; | |
| transition: all 0.3s ease; | |
| } | |
| .image-upload-container:hover { | |
| border-color: #283E51; | |
| background: rgba(75, 121, 161, 0.1); | |
| } | |
| .primary-btn { | |
| background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important; | |
| font-size: 1.2em !important; | |
| padding: 12px 20px !important; | |
| margin-top: 20px !important; | |
| } | |
| hr { | |
| border: none; | |
| border-top: 1px solid rgba(75, 121, 161, 0.2); | |
| margin: 20px 0; | |
| } | |
| .input-section { | |
| background: rgba(255, 255, 255, 0.03); | |
| border-radius: 12px; | |
| padding: 20px; | |
| margin-bottom: 20px; | |
| } | |
| .output-section { | |
| background: rgba(255, 255, 255, 0.03); | |
| border-radius: 12px; | |
| padding: 20px; | |
| } | |
| .example-images { | |
| display: grid; | |
| grid-template-columns: repeat(4, 1fr); | |
| gap: 10px; | |
| margin-bottom: 20px; | |
| } | |
| .example-images img { | |
| width: 100%; | |
| height: 150px; | |
| object-fit: cover; | |
| border-radius: 8px; | |
| cursor: pointer; | |
| transition: transform 0.2s; | |
| } | |
| .example-images img:hover { | |
| transform: scale(1.05); | |
| } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: | |
| gr.HTML('<div class="title">FLUX VisionReply</div>') | |
| gr.HTML('<div style="text-align: center; margin-bottom: 2em;">Upload an image(Image2Text2Image)</div>') | |
| with gr.Row(): | |
| # 왼쪽 컬럼: 입력 섹션 | |
| with gr.Column(scale=3): | |
| # 이미지 업로드 섹션 | |
| input_image = gr.Image( | |
| label="Upload Image (Optional)", | |
| type="numpy", | |
| elem_classes=["image-upload-container"] | |
| ) | |
| # 예시 이미지 갤러리 추가 | |
| example_images = [ | |
| "5.jpg", | |
| "6.jpg", | |
| "2.jpg", | |
| "3.jpg", | |
| "1.jpg", | |
| "4.jpg", | |
| ] | |
| gr.Examples( | |
| examples=example_images, | |
| inputs=input_image, | |
| label="Example Images", | |
| examples_per_page=4 | |
| ) | |
| # Florence 모델 선택 - 숨김 처리 | |
| available_models = list(florence_models.keys()) if florence_models else [] | |
| florence_model = gr.Dropdown( | |
| choices=available_models, | |
| label="Caption Model", | |
| value=available_models[0] if available_models else None, | |
| visible=False | |
| ) | |
| # Caption button - only show if models are available | |
| if florence_models: | |
| caption_button = gr.Button( | |
| "🔍 Generate Caption from Image", | |
| elem_classes=["generate-btn"] | |
| ) | |
| else: | |
| caption_button = gr.Button( | |
| "⚠️ Caption Generation Unavailable - Enter Description Manually", | |
| elem_classes=["generate-btn"], | |
| interactive=False | |
| ) | |
| # 구분선 | |
| gr.HTML('<hr style="margin: 20px 0;">') | |
| # 텍스트 프롬프트 섹션 | |
| prompt = gr.Textbox( | |
| label="Image Description", | |
| placeholder="Enter text description or use generated caption above...", | |
| lines=3 | |
| ) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| with gr.Row(): | |
| height = gr.Slider( | |
| label="Height", | |
| minimum=256, | |
| maximum=1152, | |
| step=64, | |
| value=1024 | |
| ) | |
| width = gr.Slider( | |
| label="Width", | |
| minimum=256, | |
| maximum=1152, | |
| step=64, | |
| value=1024 | |
| ) | |
| with gr.Row(): | |
| # Adjust default steps based on whether LoRA is loaded | |
| default_steps = 8 if LORA_LOADED else 20 | |
| steps = gr.Slider( | |
| label="Inference Steps" + (" (LoRA Enabled)" if LORA_LOADED else " (No LoRA - More Steps Recommended)"), | |
| minimum=6, | |
| maximum=50, | |
| step=1, | |
| value=default_steps | |
| ) | |
| scales = gr.Slider( | |
| label="Guidance Scale", | |
| minimum=0.0, | |
| maximum=5.0, | |
| step=0.1, | |
| value=3.5 | |
| ) | |
| seed = gr.Number( | |
| label="Seed", | |
| value=get_random_seed(), | |
| precision=0 | |
| ) | |
| randomize_seed = gr.Button( | |
| "🎲 Randomize Seed", | |
| elem_classes=["generate-btn"] | |
| ) | |
| generate_btn = gr.Button( | |
| "✨ Generate Image", | |
| elem_classes=["generate-btn", "primary-btn"] | |
| ) | |
| # 오른쪽 컬럼: 출력 섹션 | |
| with gr.Column(scale=4): | |
| output = gr.Image( | |
| label="Generated Image", | |
| elem_classes=["output-image"] | |
| ) | |
| # Event handlers | |
| if florence_models: | |
| caption_button.click( | |
| generate_caption, | |
| inputs=[input_image, florence_model], | |
| outputs=[prompt] | |
| ) | |
| generate_btn.click( | |
| process_and_save_image, | |
| inputs=[height, width, steps, scales, prompt, seed], | |
| outputs=[output] | |
| ) | |
| randomize_seed.click( | |
| update_seed, | |
| outputs=[seed] | |
| ) | |
| generate_btn.click( | |
| update_seed, | |
| outputs=[seed] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(allowed_paths=[PERSISTENT_DIR]) |