Spaces:

Ntdeseb
/

ntia

Running

App Files Files Community

Ntdeseb commited on Jul 28

Commit

cea89ef

1 Parent(s): 250115f

fix: FLUX model configuration and add sentencepiece dependency

Browse files

Files changed (2) hide show

app.py +81 -54
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -113,12 +113,12 @@ def load_text_model(model_name):
         print(f"Cargando modelo de texto: {model_name}")
         try:
-            # Detectar tipo de modelo
-            if "opus-mt" in model_name.lower():
-                # Modelo de traducción
-                from transformers import MarianMTModel, MarianTokenizer
-                tokenizer = MarianTokenizer.from_pretrained(model_name)
-                model = MarianMTModel.from_pretrained(model_name)
             elif "flan-t5" in model_name.lower():
                 # Modelo Flan-T5
@@ -144,13 +144,13 @@ def load_text_model(model_name):
                 if tokenizer.pad_token is None:
                     tokenizer.pad_token = tokenizer.eos_token
-            else:
                 # Modelo de generación de texto estándar
-                tokenizer = AutoTokenizer.from_pretrained(model_name)
-                model = AutoModelForCausalLM.from_pretrained(model_name)
-                # Configurar para chat si es DialoGPT
-                if "dialogpt" in model_name.lower():
                     tokenizer.pad_token = tokenizer.eos_token
                     model.config.pad_token_id = model.config.eos_token_id
@@ -165,14 +165,14 @@ def load_text_model(model_name):
             # Fallback a un modelo básico
             tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
             model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
-            tokenizer.pad_token = tokenizer.eos_token
-            model.config.pad_token_id = model.config.eos_token_id
-            model_cache[model_name] = {
-                "tokenizer": tokenizer,
-                "model": model,
-                "type": "text"
-            }
     return model_cache[model_name]
@@ -186,15 +186,23 @@ def load_image_model(model_name):
             if "flux" in model_name.lower():
                 try:
                     from diffusers import FluxPipeline
                     pipe = FluxPipeline.from_pretrained(
                         model_name,
                         torch_dtype=torch.bfloat16,
                         use_auth_token=HF_TOKEN if HF_TOKEN else None
                     )
-                    pipe.enable_model_cpu_offload()
                 except Exception as e:
-                    print(f"Error cargando FLUX: {e}")
                     # Fallback a Stable Diffusion
                     pipe = StableDiffusionPipeline.from_pretrained(
                         "CompVis/stable-diffusion-v1-4",
                         torch_dtype=torch.float32,
@@ -248,13 +256,16 @@ def load_image_model(model_name):
             # Optimizaciones básicas de memoria
             pipe.enable_attention_slicing()
             if hasattr(pipe, 'enable_model_cpu_offload'):
-                pipe.enable_model_cpu_offload()
             model_cache[model_name] = {
                 "pipeline": pipe,
                 "type": "image"
             }
         except Exception as e:
             print(f"Error general cargando modelo de imagen {model_name}: {e}")
             # Fallback final a SD 1.4
@@ -287,16 +298,16 @@ def load_video_model(model_name):
                 # Modelos de texto a video
                 from diffusers import DiffusionPipeline
                 pipe = DiffusionPipeline.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float32,
-                    variant="fp16"
-                )
             elif "modelscope" in model_name.lower():
                 # ModelScope models
-                from diffusers import DiffusionPipeline
-                pipe = DiffusionPipeline.from_pretrained(
                     model_name,
-                    torch_dtype=torch.float32
                 )
             elif "zeroscope" in model_name.lower():
                 # Zeroscope models
@@ -307,25 +318,25 @@ def load_video_model(model_name):
                 )
             elif "animatediff" in model_name.lower():
                 # AnimateDiff models
-                from diffusers import DiffusionPipeline
-                pipe = DiffusionPipeline.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float32
-                )
             elif "cogvideo" in model_name.lower():
                 # CogVideo models
-                from diffusers import DiffusionPipeline
-                pipe = DiffusionPipeline.from_pretrained(
                     model_name,
-                    torch_dtype=torch.float32
-                )
             elif "pyramid-flow" in model_name.lower():
                 # Pyramid Flow models
-                from diffusers import DiffusionPipeline
-                pipe = DiffusionPipeline.from_pretrained(
                     model_name,
-                    torch_dtype=torch.float32
-                )
             else:
                 # Fallback a text-to-video genérico
                 from diffusers import DiffusionPipeline
@@ -402,7 +413,7 @@ def generate_text(prompt, model_name, max_length=100):
                 response = response.replace(prompt, "").strip()
         return response
     except Exception as e:
         return f"Error generando texto: {str(e)}"
@@ -413,17 +424,33 @@ def generate_image(prompt, model_name, num_inference_steps=20):
         print(f"Prompt: {prompt}")
         print(f"Pasos: {num_inference_steps}")
         model_data = load_image_model(model_name)
         pipeline = model_data["pipeline"]
         # Configuración específica para FLUX
         if "flux" in model_name.lower():
             image = pipeline(
                 prompt,
-                guidance_scale=0.0,
-                num_inference_steps=4,  # FLUX usa solo 4 pasos
-                max_sequence_length=256,
-                generator=torch.Generator("cpu").manual_seed(0)
             ).images[0]
         else:
             # Configuración básica para otros modelos
@@ -431,11 +458,11 @@ def generate_image(prompt, model_name, num_inference_steps=20):
                 prompt,
                 num_inference_steps=num_inference_steps,
                 guidance_scale=7.5
-            ).images[0]
         print("Imagen generada exitosamente")
         return image
     except Exception as e:
         print(f"Error generando imagen: {str(e)}")
         return f"Error generando imagen: {str(e)}"
@@ -478,7 +505,7 @@ def generate_video(prompt, model_name, num_frames=16, num_inference_steps=20):
         print("Video generado exitosamente")
         return video_frames
     except Exception as e:
         print(f"Error generando video: {str(e)}")
         return f"Error generando video: {str(e)}"
@@ -522,7 +549,7 @@ def chat_with_model(message, history, model_name):
         history.append({"role": "assistant", "content": response})
         return history
     except Exception as e:
         error_msg = f"Error en el chat: {str(e)}"
         history.append({"role": "user", "content": message})
@@ -705,7 +732,7 @@ with gr.Blocks(title="Modelos Libres de IA", theme=gr.themes.Soft()) as demo:
                         label="Video Generado",
                         format="mp4"
                     )
             video_btn.click(
                 generate_video,
                 inputs=[video_prompt, video_model, num_frames, video_steps],

         print(f"Cargando modelo de texto: {model_name}")
         try:
+        # Detectar tipo de modelo
+        if "opus-mt" in model_name.lower():
+            # Modelo de traducción
+            from transformers import MarianMTModel, MarianTokenizer
+            tokenizer = MarianTokenizer.from_pretrained(model_name)
+            model = MarianMTModel.from_pretrained(model_name)
             elif "flan-t5" in model_name.lower():
                 # Modelo Flan-T5
                 if tokenizer.pad_token is None:
                     tokenizer.pad_token = tokenizer.eos_token
+        else:
                 # Modelo de generación de texto estándar
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            model = AutoModelForCausalLM.from_pretrained(model_name)
+            # Configurar para chat si es DialoGPT
+            if "dialogpt" in model_name.lower():
                     tokenizer.pad_token = tokenizer.eos_token
                     model.config.pad_token_id = model.config.eos_token_id
             # Fallback a un modelo básico
             tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
             model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
+                tokenizer.pad_token = tokenizer.eos_token
+                model.config.pad_token_id = model.config.eos_token_id
+        model_cache[model_name] = {
+            "tokenizer": tokenizer,
+            "model": model,
+            "type": "text"
+        }
     return model_cache[model_name]
             if "flux" in model_name.lower():
                 try:
                     from diffusers import FluxPipeline
+                    print("🚀 Cargando FLUX Pipeline...")
                     pipe = FluxPipeline.from_pretrained(
                         model_name,
                         torch_dtype=torch.bfloat16,
                         use_auth_token=HF_TOKEN if HF_TOKEN else None
                     )
+                    # Solo usar enable_model_cpu_offload si hay acelerador disponible
+                    try:
+                        pipe.enable_model_cpu_offload()
+                        print("✅ FLUX con CPU offload habilitado")
+                    except Exception as offload_error:
+                        print(f"⚠️ No se pudo habilitar CPU offload: {offload_error}")
+                        print("✅ FLUX cargado sin CPU offload")
                 except Exception as e:
+                    print(f"❌ Error cargando FLUX: {e}")
                     # Fallback a Stable Diffusion
+                    print("🔄 Fallback a Stable Diffusion...")
                     pipe = StableDiffusionPipeline.from_pretrained(
                         "CompVis/stable-diffusion-v1-4",
                         torch_dtype=torch.float32,
             # Optimizaciones básicas de memoria
             pipe.enable_attention_slicing()
             if hasattr(pipe, 'enable_model_cpu_offload'):
+                try:
+                    pipe.enable_model_cpu_offload()
+                except Exception as e:
+                    print(f"⚠️ No se pudo habilitar CPU offload: {e}")
             model_cache[model_name] = {
                 "pipeline": pipe,
                 "type": "image"
             }
         except Exception as e:
             print(f"Error general cargando modelo de imagen {model_name}: {e}")
             # Fallback final a SD 1.4
                 # Modelos de texto a video
                 from diffusers import DiffusionPipeline
                 pipe = DiffusionPipeline.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float32,
+                        variant="fp16"
+                    )
             elif "modelscope" in model_name.lower():
                 # ModelScope models
+                    from diffusers import DiffusionPipeline
+                    pipe = DiffusionPipeline.from_pretrained(
                     model_name,
+                        torch_dtype=torch.float32
                 )
             elif "zeroscope" in model_name.lower():
                 # Zeroscope models
                 )
             elif "animatediff" in model_name.lower():
                 # AnimateDiff models
+                    from diffusers import DiffusionPipeline
+                    pipe = DiffusionPipeline.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float32
+                    )
             elif "cogvideo" in model_name.lower():
                 # CogVideo models
+                    from diffusers import DiffusionPipeline
+                    pipe = DiffusionPipeline.from_pretrained(
                     model_name,
+                        torch_dtype=torch.float32
+                    )
             elif "pyramid-flow" in model_name.lower():
                 # Pyramid Flow models
+                    from diffusers import DiffusionPipeline
+                    pipe = DiffusionPipeline.from_pretrained(
                     model_name,
+                        torch_dtype=torch.float32
+                    )
             else:
                 # Fallback a text-to-video genérico
                 from diffusers import DiffusionPipeline
                 response = response.replace(prompt, "").strip()
         return response
     except Exception as e:
         return f"Error generando texto: {str(e)}"
         print(f"Prompt: {prompt}")
         print(f"Pasos: {num_inference_steps}")
+        # Convertir num_inference_steps a entero si es string
+        if isinstance(num_inference_steps, str):
+            try:
+                num_inference_steps = int(num_inference_steps)
+            except ValueError:
+                num_inference_steps = 20
+                print(f"⚠️ No se pudo convertir '{num_inference_steps}' a entero, usando 20")
         model_data = load_image_model(model_name)
         pipeline = model_data["pipeline"]
         # Configuración específica para FLUX
         if "flux" in model_name.lower():
+            import random
+            # Generar un seed aleatorio para cada imagen
+            random_seed = random.randint(0, 999999)
+            print(f"🎲 Usando seed aleatorio para FLUX: {random_seed}")
+            print(f"🔧 Parámetros FLUX: guidance_scale=3.5, steps=50, max_seq=512")
             image = pipeline(
                 prompt,
+                height=1024,
+                width=1024,
+                guidance_scale=3.5,  # ✅ Valor recomendado por la documentación
+                num_inference_steps=50,  # ✅ Valor recomendado por la documentación
+                max_sequence_length=512,  # ✅ Valor recomendado por la documentación
+                generator=torch.Generator("cpu").manual_seed(random_seed)  # ✅ Seed aleatorio
             ).images[0]
         else:
             # Configuración básica para otros modelos
                 prompt,
                 num_inference_steps=num_inference_steps,
                 guidance_scale=7.5
+        ).images[0]
         print("Imagen generada exitosamente")
         return image
     except Exception as e:
         print(f"Error generando imagen: {str(e)}")
         return f"Error generando imagen: {str(e)}"
         print("Video generado exitosamente")
         return video_frames
     except Exception as e:
         print(f"Error generando video: {str(e)}")
         return f"Error generando video: {str(e)}"
         history.append({"role": "assistant", "content": response})
         return history
     except Exception as e:
         error_msg = f"Error en el chat: {str(e)}"
         history.append({"role": "user", "content": message})
                         label="Video Generado",
                         format="mp4"
                     )
             video_btn.click(
                 generate_video,
                 inputs=[video_prompt, video_model, num_frames, video_steps],

requirements.txt CHANGED Viewed

@@ -14,4 +14,5 @@ imageio>=2.31.0
 imageio-ffmpeg>=0.4.8
 fastapi>=0.104.0
 uvicorn>=0.24.0
-pydantic>=2.0.0

 imageio-ffmpeg>=0.4.8
 fastapi>=0.104.0
 uvicorn>=0.24.0
+pydantic>=2.0.0
+sentencepiece>=0.1.99