Spaces:
Running
on
Zero
Running
on
Zero
Joseph Pollack
commited on
bumpt transformers and fix examples
Browse files- app.py +44 -22
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -37,21 +37,43 @@ class LOperatorDemo:
|
|
| 37 |
if not HF_TOKEN:
|
| 38 |
return "β HF_TOKEN not found. Please set HF_TOKEN in Spaces secrets."
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
if DEVICE == "cpu":
|
| 57 |
self.model = self.model.to(DEVICE)
|
|
@@ -62,7 +84,7 @@ class LOperatorDemo:
|
|
| 62 |
|
| 63 |
except Exception as e:
|
| 64 |
logger.error(f"Error loading model: {str(e)}")
|
| 65 |
-
return f"β Error loading model: {str(e)}"
|
| 66 |
|
| 67 |
@spaces.GPU(duration=120) # 2 minutes for action generation
|
| 68 |
def generate_action(self, image: Image.Image, goal: str, instruction: str) -> str:
|
|
@@ -193,7 +215,7 @@ def load_example_episodes():
|
|
| 193 |
with open("extracted_episodes_duckdb/episode_13/metadata.json", "r") as f:
|
| 194 |
episode_13 = json.load(f)
|
| 195 |
|
| 196 |
-
# Load episode 53
|
| 197 |
with open("extracted_episodes_duckdb/episode_53/metadata.json", "r") as f:
|
| 198 |
episode_53 = json.load(f)
|
| 199 |
|
|
@@ -201,19 +223,19 @@ def load_example_episodes():
|
|
| 201 |
with open("extracted_episodes_duckdb/episode_73/metadata.json", "r") as f:
|
| 202 |
episode_73 = json.load(f)
|
| 203 |
|
| 204 |
-
# Create examples
|
| 205 |
examples = [
|
| 206 |
[
|
| 207 |
"extracted_episodes_duckdb/episode_13/screenshots/screenshot_1.png",
|
| 208 |
-
|
| 209 |
],
|
| 210 |
[
|
| 211 |
-
"extracted_episodes_duckdb/episode_53/screenshots/screenshot_1.png",
|
| 212 |
-
|
| 213 |
],
|
| 214 |
[
|
| 215 |
"extracted_episodes_duckdb/episode_73/screenshots/screenshot_1.png",
|
| 216 |
-
|
| 217 |
]
|
| 218 |
]
|
| 219 |
|
|
|
|
| 37 |
if not HF_TOKEN:
|
| 38 |
return "β HF_TOKEN not found. Please set HF_TOKEN in Spaces secrets."
|
| 39 |
|
| 40 |
+
try:
|
| 41 |
+
# Try loading with standard approach
|
| 42 |
+
self.processor = AutoProcessor.from_pretrained(
|
| 43 |
+
MODEL_ID,
|
| 44 |
+
trust_remote_code=True,
|
| 45 |
+
token=HF_TOKEN
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
self.model = AutoModelForImageTextToText.from_pretrained(
|
| 49 |
+
MODEL_ID,
|
| 50 |
+
torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
|
| 51 |
+
trust_remote_code=True,
|
| 52 |
+
device_map="auto" if DEVICE == "cuda" else None,
|
| 53 |
+
token=HF_TOKEN
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
except Exception as e:
|
| 57 |
+
logger.warning(f"Standard loading failed: {str(e)}")
|
| 58 |
+
logger.info("Attempting fallback loading approach...")
|
| 59 |
+
|
| 60 |
+
# Fallback: try loading with explicit model type
|
| 61 |
+
self.processor = AutoProcessor.from_pretrained(
|
| 62 |
+
MODEL_ID,
|
| 63 |
+
trust_remote_code=True,
|
| 64 |
+
token=HF_TOKEN,
|
| 65 |
+
revision="main"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
self.model = AutoModelForImageTextToText.from_pretrained(
|
| 69 |
+
MODEL_ID,
|
| 70 |
+
torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
|
| 71 |
+
trust_remote_code=True,
|
| 72 |
+
device_map="auto" if DEVICE == "cuda" else None,
|
| 73 |
+
token=HF_TOKEN,
|
| 74 |
+
revision="main",
|
| 75 |
+
ignore_mismatched_sizes=True
|
| 76 |
+
)
|
| 77 |
|
| 78 |
if DEVICE == "cpu":
|
| 79 |
self.model = self.model.to(DEVICE)
|
|
|
|
| 84 |
|
| 85 |
except Exception as e:
|
| 86 |
logger.error(f"Error loading model: {str(e)}")
|
| 87 |
+
return f"β Error loading model: {str(e)} - This may be a custom model requiring special handling"
|
| 88 |
|
| 89 |
@spaces.GPU(duration=120) # 2 minutes for action generation
|
| 90 |
def generate_action(self, image: Image.Image, goal: str, instruction: str) -> str:
|
|
|
|
| 215 |
with open("extracted_episodes_duckdb/episode_13/metadata.json", "r") as f:
|
| 216 |
episode_13 = json.load(f)
|
| 217 |
|
| 218 |
+
# Load episode 53
|
| 219 |
with open("extracted_episodes_duckdb/episode_53/metadata.json", "r") as f:
|
| 220 |
episode_53 = json.load(f)
|
| 221 |
|
|
|
|
| 223 |
with open("extracted_episodes_duckdb/episode_73/metadata.json", "r") as f:
|
| 224 |
episode_73 = json.load(f)
|
| 225 |
|
| 226 |
+
# Create examples with simple identifiers
|
| 227 |
examples = [
|
| 228 |
[
|
| 229 |
"extracted_episodes_duckdb/episode_13/screenshots/screenshot_1.png",
|
| 230 |
+
"Episode 13: Navigate app interface"
|
| 231 |
],
|
| 232 |
[
|
| 233 |
+
"extracted_episodes_duckdb/episode_53/screenshots/screenshot_1.png",
|
| 234 |
+
"Episode 53: App interaction example"
|
| 235 |
],
|
| 236 |
[
|
| 237 |
"extracted_episodes_duckdb/episode_73/screenshots/screenshot_1.png",
|
| 238 |
+
"Episode 73: Device control task"
|
| 239 |
]
|
| 240 |
]
|
| 241 |
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
torch>=2.0.0
|
| 3 |
-
transformers>=4.
|
| 4 |
Pillow>=10.0.0
|
| 5 |
accelerate>=0.20.0
|
| 6 |
huggingface-hub>=0.17.0
|
|
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
torch>=2.0.0
|
| 3 |
+
transformers>=4.55.0
|
| 4 |
Pillow>=10.0.0
|
| 5 |
accelerate>=0.20.0
|
| 6 |
huggingface-hub>=0.17.0
|