canvas-studio

Running on Zero

App Files Files Community

ginipick commited on Dec 10, 2024

Commit

b6cefc8

verified ·

1 Parent(s): a891019

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -179

app.py CHANGED Viewed

@@ -20,25 +20,19 @@ from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
 from diffusers import FluxPipeline
 from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gc
-import base64
-# GPU 설정
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
-###--------------ZERO GPU 필수/ 메모리 관리 공통 --------------------###
 def clear_memory():
     """메모리 정리 함수"""
     gc.collect()
-    if torch.cuda.is_available():
-        try:
-            with torch.cuda.device('cuda:0'):
                 torch.cuda.empty_cache()
-                torch.cuda.synchronize()
-        except Exception as e:
-            print(f"Warning: Could not clear CUDA memory: {e}")
-###---------------------------------------------------------------
 # GPU 설정을 try-except로 감싸기
 if torch.cuda.is_available():
@@ -94,35 +88,14 @@ gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_
 gd_model = gd_model.to(device=device)
 assert isinstance(gd_model, GroundingDinoForObjectDetection)
-# 파이프라인 초기화 및 최적화 설정
 pipe = FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     torch_dtype=torch.float16,
     use_auth_token=HF_TOKEN
 )
-# 메모리 최적화 설정 - FluxPipeline에서 지원하는 메서드만 사용
 pipe.enable_attention_slicing(slice_size="auto")
-# xformers 최적화 (설치되어 있는 경우에만)
-try:
-    import xformers
-    pipe.enable_xformers_memory_efficient_attention()
-except ImportError:
-    print("xformers is not installed. Skipping memory efficient attention.")
-# GPU 설정
-if torch.cuda.is_available():
-    try:
-        pipe = pipe.to("cuda:0")
-        # CPU 오프로딩이 지원되는 경우에만 활성화
-        if hasattr(pipe, 'enable_model_cpu_offload'):
-            pipe.enable_model_cpu_offload()
-    except Exception as e:
-        print(f"Warning: Could not move pipeline to CUDA: {str(e)}")
 # LoRA 가중치 로드
 pipe.load_lora_weights(
     hf_hub_download(
@@ -194,66 +167,56 @@ def apply_mask(img: Image.Image, mask_img: Image.Image, defringe: bool = True) -
     return result
 def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int, int]:
     """선택된 비율에 따라 이미지 크기 계산"""
-    # FLUX 파이프라인이 지원하는 안전한 크기 사용
     if aspect_ratio == "1:1":
-        width = height = 512
     elif aspect_ratio == "16:9":
-        width, height = 576, 320  # 16:9에 가까운 안전한 크기
     elif aspect_ratio == "9:16":
-        width, height = 320, 576  # 9:16에 가까운 안전한 크기
     elif aspect_ratio == "4:3":
-        width, height = 512, 384  # 4:3에 가까운 안전한 크기
-    else:
-        width = height = 512
-    # 8의 배수로 조정
-    width = (width // 8) * 8
-    height = (height // 8) * 8
-    return width, height
 def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
     try:
-        # 안전한 크기 계산
         width, height = calculate_dimensions(aspect_ratio)
-        print(f"Generating background with size: {width}x{height}")
         with timer("Background generation"):
             try:
-                # 먼저 512x512로 생성
                 with torch.inference_mode():
                     image = pipe(
                         prompt=prompt,
-                        width=512,
-                        height=512,
                         num_inference_steps=8,
-                        guidance_scale=4.0,
                     ).images[0]
-                # 원하는 크기로 리사이즈
-                if width != 512 or height != 512:
-                    image = image.resize((width, height), Image.LANCZOS)
-                return image
             except Exception as e:
                 print(f"Pipeline error: {str(e)}")
-                # 에러 발생 시 흰색 배경 반환
                 return Image.new('RGB', (width, height), 'white')
     except Exception as e:
         print(f"Background generation error: {str(e)}")
         return Image.new('RGB', (512, 512), 'white')
-def adjust_size_to_multiple_of_8(width: int, height: int) -> tuple[int, int]:
-    """이미지 크기를 8의 배수로 조정"""
-    new_width = max(8, ((width + 7) // 8) * 8)  # 최소 8픽셀 보장
-    new_height = max(8, ((height + 7) // 8) * 8)  # 최소 8픽셀 보장
-    return new_width, new_height
 def create_position_grid():
     return """
     <div class="position-grid" style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; width: 150px; margin: auto;">
@@ -310,24 +273,26 @@ def combine_with_background(foreground: Image.Image, background: Image.Image,
     result.paste(scaled_foreground, (x, y), scaled_foreground)
     return result
-@spaces.GPU(duration=20)  # 30초에서 20초로 감소
 def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
     try:
-        with torch.inference_mode(), torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
-            if isinstance(prompt, str):
-                bbox = gd_detect(img, prompt)
-                if not bbox:
-                    raise gr.Error("No object detected in image")
-            else:
-                bbox = prompt
-            mask = segmenter(img, bbox)
-            return mask, bbox, []
     except Exception as e:
         print(f"GPU process error: {str(e)}")
         raise
-    finally:
-        clear_memory()
 def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
     try:
@@ -338,12 +303,16 @@ def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str
             new_size = (int(img.width * ratio), int(img.height * ratio))
             img = img.resize(new_size, Image.LANCZOS)
-        # CUDA 메모리 관리
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        # 새로운 autocast 구문 사용
-        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
             mask, bbox, time_log = _gpu_process(img, prompt)
             masked_alpha = apply_mask(img, mask, defringe=True)
@@ -376,42 +345,37 @@ def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
                   aspect_ratio: str = "1:1", position: str = "bottom-center",
                   scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
     try:
-        if img is None or not prompt or prompt.isspace():
             raise gr.Error("Please provide both image and prompt")
-        print(f"Processing with position: {position}, scale: {scale_percent}")  # 디버깅용
-        # 입력 이미지 크기 제한
-        max_size = 1024
-        if img.width > max_size or img.height > max_size:
-            ratio = max_size / max(img.width, img.height)
-            img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
-        # 번역 처리
-        translated_prompt = translate_to_english(prompt)
-        translated_bg_prompt = translate_to_english(bg_prompt) if bg_prompt else None
-        # 이미지 처리
-        with torch.inference_mode():
-            results, _ = _process(img, translated_prompt, translated_bg_prompt, aspect_ratio)
-            if translated_bg_prompt:
-                try:
-                    combined = combine_with_background(
-                        foreground=results[2],
-                        background=results[1],
-                        position=position,
-                        scale_percent=scale_percent  # scale_slider가 아닌 scale_percent 사용
-                    )
-                    return combined, results[2]
-                except Exception as e:
-                    print(f"Background combination error: {e}")
-                    return results[1], results[2]
-            return results[1], results[2]
     except Exception as e:
-        print(f"Process error: {str(e)}")
         raise gr.Error(str(e))
     finally:
         clear_memory()
@@ -518,61 +482,9 @@ button.primary:hover {
 }
 """
-def get_image_base64(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode()
-# 이미지를 Base64로 변환
-try:
-    example_img1 = get_image_base64("aa1.png")
-    example_img2 = get_image_base64("aa2.png")
-    example_img3 = get_image_base64("aa3.png")
-except Exception as e:
-    print(f"Error loading example images: {e}")
-    example_img1 = example_img2 = example_img3 = ""
-# HTML 템플릿 수정
-example_html = f"""
-<div style="margin-top: 50px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
-    <h2 style="text-align: center; color: #2196F3; margin-bottom: 30px;">How It Works: Step by Step Guide</h2>
-    <div style="display: flex; justify-content: space-around; align-items: center; flex-wrap: wrap; gap: 20px;">
-        <div style="text-align: center; flex: 1; min-width: 250px; max-width: 300px;">
-            <img src="data:image/png;base64,{example_img1}"
-                 style="width: 100%; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
-            <h3 style="color: #333; margin: 15px 0;">Step 1: Original Image</h3>
-            <p style="color: #666;">Upload your original image containing the object you want to extract.</p>
-        </div>
-        <div style="text-align: center; flex: 1; min-width: 250px; max-width: 300px;">
-            <img src="data:image/png;base64,{example_img2}"
-                 style="width: 100%; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
-            <h3 style="color: #333; margin: 15px 0;">Step 2: Object Extraction</h3>
-            <p style="color: #666;">AI automatically detects and extracts the specified object.</p>
-        </div>
-        <div style="text-align: center; flex: 1; min-width: 250px; max-width: 300px;">
-            <img src="data:image/png;base64,{example_img3}"
-                 style="width: 100%; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
-            <h3 style="color: #333; margin: 15px 0;">Step 3: Final Result</h3>
-            <p style="color: #666;">The extracted object is placed on an AI-generated background.</p>
-        </div>
-    </div>
-    <div style="margin-top: 30px; text-align: center; padding: 20px; background-color: #e3f2fd; border-radius: 8px;">
-        <h4 style="color: #1976D2; margin-bottom: 10px;">Key Features:</h4>
-        <ul style="list-style: none; padding: 0;">
-            <li style="margin: 5px 0;">✨ Advanced AI-powered object detection and extraction</li>
-            <li style="margin: 5px 0;">🎨 Custom background generation with text prompts</li>
-            <li style="margin: 5px 0;">🔄 Flexible object positioning and sizing options</li>
-            <li style="margin: 5px 0;">📐 Multiple aspect ratio support for various use cases</li>
-        </ul>
-    </div>
-</div>
-"""
 # UI 구성
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     gr.HTML("""
@@ -582,9 +494,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
         </div>
     """)
-    # 예제 섹션 추가
-    gr.HTML(example_html)
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(
@@ -632,7 +541,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
                     scale_slider = gr.Slider(
                         minimum=10,
                         maximum=200,
-                        value=100,
                         step=5,
                         label="Object Size (%)"
                     )
@@ -689,13 +598,12 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     )
     def update_controls(bg_prompt):
         is_visible = bool(bg_prompt)
         return [
-            gr.update(visible=is_visible, interactive=is_visible),  # aspect_ratio
             gr.update(visible=is_visible),  # object_controls
         ]
     bg_prompt.change(
         fn=update_controls,
@@ -724,4 +632,5 @@ demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=False,
-    max_threads=2)  # 스레드 수 제한

 from diffusers import FluxPipeline
 from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import gc
 def clear_memory():
     """메모리 정리 함수"""
     gc.collect()
+    try:
+        if torch.cuda.is_available():
+            with torch.cuda.device(0):  # 명시적으로 device 0 사용
                 torch.cuda.empty_cache()
+    except:
+        pass
+# GPU 설정
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 명시적으로 cuda:0 지정
 # GPU 설정을 try-except로 감싸기
 if torch.cuda.is_available():
 gd_model = gd_model.to(device=device)
 assert isinstance(gd_model, GroundingDinoForObjectDetection)
+# FLUX 파이프라인 초기화
 pipe = FluxPipeline.from_pretrained(
     "black-forest-labs/FLUX.1-dev",
     torch_dtype=torch.float16,
     use_auth_token=HF_TOKEN
 )
 pipe.enable_attention_slicing(slice_size="auto")
 # LoRA 가중치 로드
 pipe.load_lora_weights(
     hf_hub_download(
     return result
+def adjust_size_to_multiple_of_8(width: int, height: int) -> tuple[int, int]:
+    """이미지 크기를 8의 배수로 조정하는 함수"""
+    new_width = ((width + 7) // 8) * 8
+    new_height = ((height + 7) // 8) * 8
+    return new_width, new_height
 def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int, int]:
     """선택된 비율에 따라 이미지 크기 계산"""
     if aspect_ratio == "1:1":
+        return base_size, base_size
     elif aspect_ratio == "16:9":
+        return base_size * 16 // 9, base_size
     elif aspect_ratio == "9:16":
+        return base_size, base_size * 16 // 9
     elif aspect_ratio == "4:3":
+        return base_size * 4 // 3, base_size
+    return base_size, base_size
+@spaces.GPU(duration=20)  # 40초에서 20초로 감소
 def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
     try:
         width, height = calculate_dimensions(aspect_ratio)
+        width, height = adjust_size_to_multiple_of_8(width, height)
+        max_size = 768
+        if width > max_size or height > max_size:
+            ratio = max_size / max(width, height)
+            width = int(width * ratio)
+            height = int(height * ratio)
+            width, height = adjust_size_to_multiple_of_8(width, height)
         with timer("Background generation"):
             try:
                 with torch.inference_mode():
                     image = pipe(
                         prompt=prompt,
+                        width=width,
+                        height=height,
                         num_inference_steps=8,
+                        guidance_scale=4.0
                     ).images[0]
             except Exception as e:
                 print(f"Pipeline error: {str(e)}")
                 return Image.new('RGB', (width, height), 'white')
+        return image
     except Exception as e:
         print(f"Background generation error: {str(e)}")
         return Image.new('RGB', (512, 512), 'white')
 def create_position_grid():
     return """
     <div class="position-grid" style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; width: 150px; margin: auto;">
     result.paste(scaled_foreground, (x, y), scaled_foreground)
     return result
+@spaces.GPU(duration=30)  # 120초에서 30초로 감소
 def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
+    time_log: list[str] = []
     try:
+        if isinstance(prompt, str):
+            t0 = time.time()
+            bbox = gd_detect(img, prompt)
+            time_log.append(f"detect: {time.time() - t0}")
+            if not bbox:
+                print(time_log[0])
+                raise gr.Error("No object detected")
+        else:
+            bbox = prompt
+        t0 = time.time()
+        mask = segmenter(img, bbox)
+        time_log.append(f"segment: {time.time() - t0}")
+        return mask, bbox, time_log
     except Exception as e:
         print(f"GPU process error: {str(e)}")
         raise
 def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
     try:
             new_size = (int(img.width * ratio), int(img.height * ratio))
             img = img.resize(new_size, Image.LANCZOS)
+        # CUDA 메모리 관리 수정
+        try:
+            if torch.cuda.is_available():
+                current_device = torch.cuda.current_device()
+                with torch.cuda.device(current_device):
+                    torch.cuda.empty_cache()
+        except Exception as e:
+            print(f"CUDA memory management failed: {e}")
+        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
             mask, bbox, time_log = _gpu_process(img, prompt)
             masked_alpha = apply_mask(img, mask, defringe=True)
                   aspect_ratio: str = "1:1", position: str = "bottom-center",
                   scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
     try:
+        if img is None or prompt.strip() == "":
             raise gr.Error("Please provide both image and prompt")
+        print(f"Processing with position: {position}, scale: {scale_percent}")
+        try:
+            prompt = translate_to_english(prompt)
+            if bg_prompt:
+                bg_prompt = translate_to_english(bg_prompt)
+        except Exception as e:
+            print(f"Translation error (continuing with original text): {str(e)}")
+        results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
+        if bg_prompt:
+            try:
+                combined = combine_with_background(
+                    foreground=results[2],
+                    background=results[1],
+                    position=position,
+                    scale_percent=scale_percent
+                )
+                print(f"Combined image created with position: {position}")
+                return combined, results[2]
+            except Exception as e:
+                print(f"Combination error: {str(e)}")
+                return results[1], results[2]
+        return results[1], results[2]
     except Exception as e:
+        print(f"Error in process_prompt: {str(e)}")
         raise gr.Error(str(e))
     finally:
         clear_memory()
 }
 """
+# UI 구성
+# UI 구성 부분에서 process_btn을 위로 이동하고 position_grid.click 부분 제거
 # UI 구성
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     gr.HTML("""
         </div>
     """)
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(
                     scale_slider = gr.Slider(
                         minimum=10,
                         maximum=200,
+                        value=50,
                         step=5,
                         label="Object Size (%)"
                     )
     )
     def update_controls(bg_prompt):
+        """배경 프롬프트 입력 여부에 따라 컨트롤 표시 업데이트"""
         is_visible = bool(bg_prompt)
         return [
+            gr.update(visible=is_visible),  # aspect_ratio
             gr.update(visible=is_visible),  # object_controls
         ]
     bg_prompt.change(
         fn=update_controls,
     server_name="0.0.0.0",
     server_port=7860,
     share=False,
+    max_threads=2  # 스레드 수 제한
+)