FLUX-Vision

Running on Zero

App Files Files Community

FLUX-Vision / app.py

seawolf2357

Update app.py

b0c95e3 verified 3 months ago

raw

history blame contribute delete

15.3 kB

	# No need for dynamic package installation since we're using requirements.txt
	import subprocess
	import sys

	# Optional: Try to install flash-attn if not present (but don't fail if it doesn't work)
	try:
	import flash_attn
	print("Flash attention is available")
	except ImportError:
	print("Flash attention not available - using standard attention (this is fine)")

	# Ensure PEFT is available for LoRA
	try:
	import peft
	print("PEFT library is available")
	except ImportError:
	print("Installing PEFT for LoRA support...")
	subprocess.run([sys.executable, "-m", "pip", "install", "peft>=0.7.0"], check=True)
	import peft
	print("PEFT installed successfully")

	import spaces
	import argparse
	import os
	import time
	from os import path
	import shutil
	from datetime import datetime
	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download
	import gradio as gr
	import torch
	from diffusers import FluxPipeline
	from diffusers.pipelines.stable_diffusion import safety_checker
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForCausalLM

	# No special attention mechanisms needed - the model will work fine without them

	# Setup and initialization code
	cache_path = path.join(path.dirname(path.abspath(__file__)), "models")
	PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", ".")

	os.environ["TRANSFORMERS_CACHE"] = cache_path
	os.environ["HF_HUB_CACHE"] = cache_path
	os.environ["HF_HOME"] = cache_path

	torch.backends.cuda.matmul.allow_tf32 = True

	# Florence 모델 초기화
	print("Initializing Florence models...")
	florence_models = {}
	florence_processors = {}

	# Check if timm is available
	try:
	import timm
	print("timm library is available")
	except ImportError:
	print("ERROR: timm library not found. Installing...")
	subprocess.run([sys.executable, "-m", "pip", "install", "timm"], check=True)
	import timm
	print("timm installed successfully")

	# Initialize Florence models with better error handling
	model_configs = [
	('gokaygokay/Florence-2-Flux-Large', True), # Primary model
	('gokaygokay/Florence-2-Flux', False), # Fallback model
	]

	for model_name, is_primary in model_configs:
	try:
	print(f"Loading {model_name}...")
	florence_models[model_name] = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	low_cpu_mem_usage=True, # Add memory optimization
	).eval()

	florence_processors[model_name] = AutoProcessor.from_pretrained(
	model_name,
	trust_remote_code=True
	)
	print(f"Successfully loaded {model_name}")

	# If we successfully loaded the primary model, we can skip the fallback
	if is_primary:
	break

	except Exception as e:
	print(f"Warning: Could not load {model_name}: {e}")
	if is_primary:
	print("Attempting to load fallback model...")
	continue

	if not florence_models:
	print("WARNING: No Florence models could be loaded. Caption generation will not be available.")
	print("Users will need to manually enter image descriptions.")
	else:
	print(f"Successfully loaded {len(florence_models)} Florence model(s)")

	def filter_prompt(prompt):
	inappropriate_keywords = [
	"sex"
	]

	prompt_lower = prompt.lower()

	for keyword in inappropriate_keywords:
	if keyword in prompt_lower:
	return False, "부적절한 내용이 포함된 프롬프트입니다."

	return True, prompt

	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"{self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"{self.method} took {str(round(end - self.start, 2))}s")

	# Model initialization
	if not path.exists(cache_path):
	os.makedirs(cache_path, exist_ok=True)

	print("Loading FLUX pipeline...")
	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)

	# Track whether LoRA was loaded successfully
	LORA_LOADED = False

	print("Loading LoRA weights...")
	try:
	# Method 1: Try loading with file path
	lora_path = hf_hub_download(
	"ByteDance/Hyper-SD",
	"Hyper-FLUX.1-dev-8steps-lora.safetensors"
	)
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.125)
	LORA_LOADED = True
	print("LoRA weights loaded and fused successfully (Method 1)")
	except Exception as e1:
	print(f"Method 1 failed: {e1}")
	try:
	# Method 2: Try loading directly from repo
	pipe.load_lora_weights("ByteDance/Hyper-SD", weight_name="Hyper-FLUX.1-dev-8steps-lora.safetensors")
	pipe.fuse_lora(lora_scale=0.125)
	LORA_LOADED = True
	print("LoRA weights loaded and fused successfully (Method 2)")
	except Exception as e2:
	print(f"Method 2 failed: {e2}")
	print("WARNING: Could not load LoRA weights. Continuing without LoRA.")
	print("The model will still work but may require more inference steps for good quality.")
	print("Recommended: Use 20-30 inference steps instead of 8.")

	pipe.to(device="cuda", dtype=torch.bfloat16)

	# Safety checker initialization
	try:
	pipe.safety_checker = safety_checker.StableDiffusionSafetyChecker.from_pretrained(
	"CompVis/stable-diffusion-safety-checker"
	)
	except Exception as e:
	print(f"Warning: Could not load safety checker: {e}")
	pipe.safety_checker = None

	@spaces.GPU
	def generate_caption(image, model_name='gokaygokay/Florence-2-Flux-Large'):
	if not florence_models:
	gr.Warning("Caption models are not loaded. Please refresh the page.")
	return "Caption generation unavailable - please describe your image manually"

	# Use fallback model if the requested one isn't available
	if model_name not in florence_models:
	model_name = list(florence_models.keys())[0]
	print(f"Using fallback model: {model_name}")

	image = Image.fromarray(image)
	task_prompt = "<DESCRIPTION>"
	prompt = task_prompt + "Describe this image in great detail."

	if image.mode != "RGB":
	image = image.convert("RGB")

	model = florence_models[model_name]
	processor = florence_processors[model_name]

	inputs = processor(text=prompt, images=image, return_tensors="pt")
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	num_beams=3,
	repetition_penalty=1.10,
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
	return parsed_answer["<DESCRIPTION>"]

	@spaces.GPU
	def process_and_save_image(height, width, steps, scales, prompt, seed):
	is_safe, filtered_prompt = filter_prompt(prompt)
	if not is_safe:
	gr.Warning("The prompt contains inappropriate content.")
	return None

	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"):
	try:
	generated_image = pipe(
	prompt=[filtered_prompt],
	generator=torch.Generator().manual_seed(int(seed)),
	num_inference_steps=int(steps),
	guidance_scale=float(scales),
	height=int(height),
	width=int(width),
	max_sequence_length=256
	).images[0]

	return generated_image
	except Exception as e:
	print(f"Error in image generation: {str(e)}")
	gr.Warning(f"Error generating image: {str(e)}")
	return None

	def get_random_seed():
	return torch.randint(0, 1000000, (1,)).item()

	def update_seed():
	return get_random_seed()

	# CSS 스타일
	css = """
	footer {display: none !important}
	.gradio-container {
	max-width: 1200px;
	margin: auto;
	}
	.contain {
	background: rgba(255, 255, 255, 0.05);
	border-radius: 12px;
	padding: 20px;
	}
	.generate-btn {
	background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
	border: none !important;
	color: white !important;
	}
	.generate-btn:hover {
	transform: translateY(-2px);
	box-shadow: 0 5px 15px rgba(0,0,0,0.2);
	}
	.title {
	text-align: center;
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 1em;
	background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}
	.tabs {
	margin-top: 20px;
	border-radius: 10px;
	overflow: hidden;
	}
	.tab-nav {
	background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%);
	padding: 10px;
	}
	.tab-nav button {
	color: white;
	border: none;
	padding: 10px 20px;
	margin: 0 5px;
	border-radius: 5px;
	transition: all 0.3s ease;
	}
	.tab-nav button.selected {
	background: rgba(255, 255, 255, 0.2);
	}
	.image-upload-container {
	border: 2px dashed #4B79A1;
	border-radius: 10px;
	padding: 20px;
	text-align: center;
	transition: all 0.3s ease;
	}
	.image-upload-container:hover {
	border-color: #283E51;
	background: rgba(75, 121, 161, 0.1);
	}
	.primary-btn {
	background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
	font-size: 1.2em !important;
	padding: 12px 20px !important;
	margin-top: 20px !important;
	}
	hr {
	border: none;
	border-top: 1px solid rgba(75, 121, 161, 0.2);
	margin: 20px 0;
	}
	.input-section {
	background: rgba(255, 255, 255, 0.03);
	border-radius: 12px;
	padding: 20px;
	margin-bottom: 20px;
	}
	.output-section {
	background: rgba(255, 255, 255, 0.03);
	border-radius: 12px;
	padding: 20px;
	}
	.example-images {
	display: grid;
	grid-template-columns: repeat(4, 1fr);
	gap: 10px;
	margin-bottom: 20px;
	}
	.example-images img {
	width: 100%;
	height: 150px;
	object-fit: cover;
	border-radius: 8px;
	cursor: pointer;
	transition: transform 0.2s;
	}
	.example-images img:hover {
	transform: scale(1.05);
	}
	"""

	with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
	gr.HTML('<div class="title">FLUX VisionReply</div>')
	gr.HTML('<div style="text-align: center; margin-bottom: 2em;">Upload an image(Image2Text2Image)</div>')

	with gr.Row():
	# 왼쪽 컬럼: 입력 섹션
	with gr.Column(scale=3):
	# 이미지 업로드 섹션
	input_image = gr.Image(
	label="Upload Image (Optional)",
	type="numpy",
	elem_classes=["image-upload-container"]
	)

	# 예시 이미지 갤러리 추가
	example_images = [
	"5.jpg",
	"6.jpg",
	"2.jpg",
	"3.jpg",
	"1.jpg",
	"4.jpg",
	]
	gr.Examples(
	examples=example_images,
	inputs=input_image,
	label="Example Images",
	examples_per_page=4
	)

	# Florence 모델 선택 - 숨김 처리
	available_models = list(florence_models.keys()) if florence_models else []
	florence_model = gr.Dropdown(
	choices=available_models,
	label="Caption Model",
	value=available_models[0] if available_models else None,
	visible=False
	)

	# Caption button - only show if models are available
	if florence_models:
	caption_button = gr.Button(
	"🔍 Generate Caption from Image",
	elem_classes=["generate-btn"]
	)
	else:
	caption_button = gr.Button(
	"⚠️ Caption Generation Unavailable - Enter Description Manually",
	elem_classes=["generate-btn"],
	interactive=False
	)

	# 구분선
	gr.HTML('<hr style="margin: 20px 0;">')

	# 텍스트 프롬프트 섹션
	prompt = gr.Textbox(
	label="Image Description",
	placeholder="Enter text description or use generated caption above...",
	lines=3
	)

	with gr.Accordion("Advanced Settings", open=False):
	with gr.Row():
	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=1152,
	step=64,
	value=1024
	)
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=1152,
	step=64,
	value=1024
	)

	with gr.Row():
	# Adjust default steps based on whether LoRA is loaded
	default_steps = 8 if LORA_LOADED else 20
	steps = gr.Slider(
	label="Inference Steps" + (" (LoRA Enabled)" if LORA_LOADED else " (No LoRA - More Steps Recommended)"),
	minimum=6,
	maximum=50,
	step=1,
	value=default_steps
	)
	scales = gr.Slider(
	label="Guidance Scale",
	minimum=0.0,
	maximum=5.0,
	step=0.1,
	value=3.5
	)

	seed = gr.Number(
	label="Seed",
	value=get_random_seed(),
	precision=0
	)

	randomize_seed = gr.Button(
	"🎲 Randomize Seed",
	elem_classes=["generate-btn"]
	)

	generate_btn = gr.Button(
	"✨ Generate Image",
	elem_classes=["generate-btn", "primary-btn"]
	)

	# 오른쪽 컬럼: 출력 섹션
	with gr.Column(scale=4):
	output = gr.Image(
	label="Generated Image",
	elem_classes=["output-image"]
	)

	# Event handlers
	if florence_models:
	caption_button.click(
	generate_caption,
	inputs=[input_image, florence_model],
	outputs=[prompt]
	)

	generate_btn.click(
	process_and_save_image,
	inputs=[height, width, steps, scales, prompt, seed],
	outputs=[output]
	)

	randomize_seed.click(
	update_seed,
	outputs=[seed]
	)

	generate_btn.click(
	update_seed,
	outputs=[seed]
	)

	if __name__ == "__main__":
	demo.launch(allowed_paths=[PERSISTENT_DIR])