prithivMLmods commited on
Commit
be8b851
·
verified ·
1 Parent(s): 16e37bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -87,7 +87,7 @@ model_md3 = AutoModelForCausalLM.from_pretrained(
87
  torch_dtype=torch.bfloat16,
88
  device_map={"": "cuda"},
89
  )
90
- # FIXED: Added trust_remote_code=True to the tokenizer loading
91
  tokenizer_md3 = AutoTokenizer.from_pretrained(MODEL_ID_MD3, trust_remote_code=True)
92
 
93
 
@@ -183,11 +183,11 @@ def process_document_stream(
183
 
184
  # --- Special Handling for Moondream3 ---
185
  if model_name == "Moondream3":
186
- # Moondream3 has a different inference method
187
- enc_image = model_md3.encode_image(image)
188
  answer = model_md3.answer_question(
189
- enc_image,
190
- prompt_input,
191
  tokenizer=tokenizer_md3
192
  )
193
  yield answer, answer
@@ -287,8 +287,8 @@ def create_gradio_interface():
287
  raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
288
  with gr.Row():
289
  examples = gr.Examples(
290
- examples=[["examples/1.png"], ["examples/2.png"], ["examples/3.png"],
291
- ["examples/4.png"], ["examples/5.png"], ["examples/6.png"]],
292
  inputs=image_input, label="Examples"
293
  )
294
  gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-v1.0/discussions) | [prithivMLmods🤗](https://huggingface.co/prithivMLmods)")
 
87
  torch_dtype=torch.bfloat16,
88
  device_map={"": "cuda"},
89
  )
90
+ # FIX: Added trust_remote_code=True to resolve the loading error
91
  tokenizer_md3 = AutoTokenizer.from_pretrained(MODEL_ID_MD3, trust_remote_code=True)
92
 
93
 
 
183
 
184
  # --- Special Handling for Moondream3 ---
185
  if model_name == "Moondream3":
186
+ # Moondream3 uses a different prompt structure and doesn't stream by default in this implementation
187
+ prompt_full = f"<image>\n\nQuestion: {prompt_input}\n\nAnswer:"
188
  answer = model_md3.answer_question(
189
+ model_md3.encode_image(image),
190
+ prompt_full,
191
  tokenizer=tokenizer_md3
192
  )
193
  yield answer, answer
 
287
  raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
288
  with gr.Row():
289
  examples = gr.Examples(
290
+ examples=["examples/1.png", "examples/2.png", "examples/3.png",
291
+ "examples/4.png", "examples/5.png", "examples/6.png"],
292
  inputs=image_input, label="Examples"
293
  )
294
  gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/Multimodal-VLM-v1.0/discussions) | [prithivMLmods🤗](https://huggingface.co/prithivMLmods)")