Spaces:

ILLUME-MLLM
/

ILLUME_plus-3b

Sleeping

huangrh9 commited on May 30

Commit

7378375

verified ·

1 Parent(s): d59404b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,8 +21,7 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
 import gradio as gr
-from illume.conversation import default_conversation, conv_templates, SeparatorStyle
-# from conversation import default_conversation, conv_templates, SeparatorStyle
 # --- Global Variables and Model Loading ---
 model = None  # Global variable to hold the loaded ILLUME model
@@ -936,10 +935,10 @@ if __name__ == "__main__":
     # prepare models and processors
     model = AutoModel.from_pretrained(
         args.model_name,
-        # torch_dtype=torch.bfloat16,
-        # attn_implementation='flash_attention_2',  # OR 'sdpa' for Ascend NPUs
-        torch_dtype=args.torch_dtype,
-        attn_implementation='sdpa',  # OR 'sdpa' for Ascend NPUs
         low_cpu_mem_usage=True,
         trust_remote_code=True).eval().cuda()
     processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)

 import gradio as gr
+from conversation import default_conversation, conv_templates, SeparatorStyle
 # --- Global Variables and Model Loading ---
 model = None  # Global variable to hold the loaded ILLUME model
     # prepare models and processors
     model = AutoModel.from_pretrained(
         args.model_name,
+        torch_dtype=torch.bfloat16,
+        attn_implementation='flash_attention_2',  # OR 'sdpa' for Ascend NPUs
+        # torch_dtype=args.torch_dtype,
+        # attn_implementation='sdpa',  # OR 'sdpa' for Ascend NPUs
         low_cpu_mem_usage=True,
         trust_remote_code=True).eval().cuda()
     processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)