Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,8 +21,7 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
|
| 24 |
-
from
|
| 25 |
-
# from conversation import default_conversation, conv_templates, SeparatorStyle
|
| 26 |
|
| 27 |
# --- Global Variables and Model Loading ---
|
| 28 |
model = None # Global variable to hold the loaded ILLUME model
|
|
@@ -936,10 +935,10 @@ if __name__ == "__main__":
|
|
| 936 |
# prepare models and processors
|
| 937 |
model = AutoModel.from_pretrained(
|
| 938 |
args.model_name,
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
torch_dtype=args.torch_dtype,
|
| 942 |
-
attn_implementation='sdpa', # OR 'sdpa' for Ascend NPUs
|
| 943 |
low_cpu_mem_usage=True,
|
| 944 |
trust_remote_code=True).eval().cuda()
|
| 945 |
processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)
|
|
|
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
|
| 24 |
+
from conversation import default_conversation, conv_templates, SeparatorStyle
|
|
|
|
| 25 |
|
| 26 |
# --- Global Variables and Model Loading ---
|
| 27 |
model = None # Global variable to hold the loaded ILLUME model
|
|
|
|
| 935 |
# prepare models and processors
|
| 936 |
model = AutoModel.from_pretrained(
|
| 937 |
args.model_name,
|
| 938 |
+
torch_dtype=torch.bfloat16,
|
| 939 |
+
attn_implementation='flash_attention_2', # OR 'sdpa' for Ascend NPUs
|
| 940 |
+
# torch_dtype=args.torch_dtype,
|
| 941 |
+
# attn_implementation='sdpa', # OR 'sdpa' for Ascend NPUs
|
| 942 |
low_cpu_mem_usage=True,
|
| 943 |
trust_remote_code=True).eval().cuda()
|
| 944 |
processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)
|