Spaces:

prithivMLmods
/

Multimodal-VLM-v1.0

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 3

Commit

a8cca74

verified ·

1 Parent(s): 7a892bd

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -7

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ import supervision as sv
 from transformers import (
     Qwen2_5_VLForConditionalGeneration,
     Qwen2VLForConditionalGeneration,
     AutoModelForCausalLM,
     AutoProcessor,
@@ -57,16 +58,16 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 ).to(device).eval()
 print("Camel-Doc-OCR-062825 loaded.")
-# MinerU2.5-2509
-print("Loading MinerU2.5-2509...")
-MODEL_ID_T = "opendatalab/MinerU2.5-2509-1.2B"
 processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
-model_t = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID_T,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
-print("MinerU2.5-2509 loaded.")
 # Load moondream3
 print("Loading moondream3-preview...")
@@ -231,7 +232,7 @@ def process_document_stream(
     # Select processor and model based on dropdown choice
     if model_name == "Camel-Doc-OCR-062825 (OCR)":
         processor, model = processor_m, model_m
-    elif model_name == "MinerU2.5-2509 (General)":
         processor, model = processor_t, model_t
     else:
         yield "Invalid model selected."
@@ -286,7 +287,7 @@ def create_gradio_interface():
                     with gr.Column(scale=1):
                         #gr.Markdown("### 1. Configure Inputs")
                         model_choice = gr.Dropdown(
-                            choices=["Camel-Doc-OCR-062825 (OCR)", "MinerU2.5-2509 (General)"],
                             label="Select Model", value= "Camel-Doc-OCR-062825 (OCR)"
                         )
                         image_input_doc = gr.Image(label="Upload Image", type="pil", sources=['upload'], height=280)

 from transformers import (
     Qwen2_5_VLForConditionalGeneration,
+    Glm4vForConditionalGeneration,
     Qwen2VLForConditionalGeneration,
     AutoModelForCausalLM,
     AutoProcessor,
 ).to(device).eval()
 print("Camel-Doc-OCR-062825 loaded.")
+# GLM-4.1V-9B-Thinking
+print("Loading GLM-4.1V-9B-Thinking")
+MODEL_ID_T = "zai-org/GLM-4.1V-9B-Thinking"
 processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
+model_t = Glm4vForConditionalGeneration.from_pretrained(
     MODEL_ID_T,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
+print("GLM-4.1V-9B-Thinking loaded.")
 # Load moondream3
 print("Loading moondream3-preview...")
     # Select processor and model based on dropdown choice
     if model_name == "Camel-Doc-OCR-062825 (OCR)":
         processor, model = processor_m, model_m
+    elif model_name == "GLM-4.1V-9B (Thinking)":
         processor, model = processor_t, model_t
     else:
         yield "Invalid model selected."
                     with gr.Column(scale=1):
                         #gr.Markdown("### 1. Configure Inputs")
                         model_choice = gr.Dropdown(
+                            choices=["Camel-Doc-OCR-062825 (OCR)", "GLM-4.1V-9B (Thinking)"],
                             label="Select Model", value= "Camel-Doc-OCR-062825 (OCR)"
                         )
                         image_input_doc = gr.Image(label="Upload Image", type="pil", sources=['upload'], height=280)