prithivMLmods commited on
Commit
a8cca74
·
verified ·
1 Parent(s): 7a892bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -21,6 +21,7 @@ import supervision as sv
21
 
22
  from transformers import (
23
  Qwen2_5_VLForConditionalGeneration,
 
24
  Qwen2VLForConditionalGeneration,
25
  AutoModelForCausalLM,
26
  AutoProcessor,
@@ -57,16 +58,16 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
57
  ).to(device).eval()
58
  print("Camel-Doc-OCR-062825 loaded.")
59
 
60
- # MinerU2.5-2509
61
- print("Loading MinerU2.5-2509...")
62
- MODEL_ID_T = "opendatalab/MinerU2.5-2509-1.2B"
63
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
64
- model_t = Qwen2VLForConditionalGeneration.from_pretrained(
65
  MODEL_ID_T,
66
  trust_remote_code=True,
67
  torch_dtype=torch.float16
68
  ).to(device).eval()
69
- print("MinerU2.5-2509 loaded.")
70
 
71
  # Load moondream3
72
  print("Loading moondream3-preview...")
@@ -231,7 +232,7 @@ def process_document_stream(
231
  # Select processor and model based on dropdown choice
232
  if model_name == "Camel-Doc-OCR-062825 (OCR)":
233
  processor, model = processor_m, model_m
234
- elif model_name == "MinerU2.5-2509 (General)":
235
  processor, model = processor_t, model_t
236
  else:
237
  yield "Invalid model selected."
@@ -286,7 +287,7 @@ def create_gradio_interface():
286
  with gr.Column(scale=1):
287
  #gr.Markdown("### 1. Configure Inputs")
288
  model_choice = gr.Dropdown(
289
- choices=["Camel-Doc-OCR-062825 (OCR)", "MinerU2.5-2509 (General)"],
290
  label="Select Model", value= "Camel-Doc-OCR-062825 (OCR)"
291
  )
292
  image_input_doc = gr.Image(label="Upload Image", type="pil", sources=['upload'], height=280)
 
21
 
22
  from transformers import (
23
  Qwen2_5_VLForConditionalGeneration,
24
+ Glm4vForConditionalGeneration,
25
  Qwen2VLForConditionalGeneration,
26
  AutoModelForCausalLM,
27
  AutoProcessor,
 
58
  ).to(device).eval()
59
  print("Camel-Doc-OCR-062825 loaded.")
60
 
61
+ # GLM-4.1V-9B-Thinking
62
+ print("Loading GLM-4.1V-9B-Thinking")
63
+ MODEL_ID_T = "zai-org/GLM-4.1V-9B-Thinking"
64
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
65
+ model_t = Glm4vForConditionalGeneration.from_pretrained(
66
  MODEL_ID_T,
67
  trust_remote_code=True,
68
  torch_dtype=torch.float16
69
  ).to(device).eval()
70
+ print("GLM-4.1V-9B-Thinking loaded.")
71
 
72
  # Load moondream3
73
  print("Loading moondream3-preview...")
 
232
  # Select processor and model based on dropdown choice
233
  if model_name == "Camel-Doc-OCR-062825 (OCR)":
234
  processor, model = processor_m, model_m
235
+ elif model_name == "GLM-4.1V-9B (Thinking)":
236
  processor, model = processor_t, model_t
237
  else:
238
  yield "Invalid model selected."
 
287
  with gr.Column(scale=1):
288
  #gr.Markdown("### 1. Configure Inputs")
289
  model_choice = gr.Dropdown(
290
+ choices=["Camel-Doc-OCR-062825 (OCR)", "GLM-4.1V-9B (Thinking)"],
291
  label="Select Model", value= "Camel-Doc-OCR-062825 (OCR)"
292
  )
293
  image_input_doc = gr.Image(label="Upload Image", type="pil", sources=['upload'], height=280)