Spaces:

xzerus
/

gpuocr

Paused

App Files Files Community

xzerus commited on Dec 21, 2024

Commit

c39921f

verified ·

1 Parent(s): da591e5

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -9

app.py CHANGED Viewed

@@ -3,14 +3,17 @@ from PIL import Image
 from transformers import AutoModel, CLIPImageProcessor
 import gradio as gr
 # Load the model
 model = AutoModel.from_pretrained(
     'OpenGVLab/InternVL2_5-1B',
-    torch_dtype=torch.float32,  # Use float32 for CPU compatibility
     low_cpu_mem_usage=True,
     trust_remote_code=True,
-    use_flash_attn=False  # Disable Flash Attention
-).eval()  # Do not move to CUDA, force CPU execution
 # Load the image processor
 image_processor = CLIPImageProcessor.from_pretrained('OpenGVLab/InternVL2_5-1B')
@@ -22,10 +25,11 @@ def process_image(image):
         image = image.convert('RGB')
         # Preprocess the image
-        pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
-        # Run the model on CPU
-        outputs = model(pixel_values)
         # Assuming the model returns embeddings or features
         return f"Output Shape: {outputs.last_hidden_state.shape}"
@@ -37,10 +41,10 @@ demo = gr.Interface(
     fn=process_image,  # Function to process the input
     inputs=gr.Image(type="pil"),  # Accepts images as input
     outputs=gr.Textbox(label="Model Output"),  # Displays model output
-    title="InternViT Demo",
-    description="Upload an image to process it using the InternViT model from OpenGVLab."
 )
 # Launch the demo
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 from transformers import AutoModel, CLIPImageProcessor
 import gradio as gr
+# Force the use of GPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the model
 model = AutoModel.from_pretrained(
     'OpenGVLab/InternVL2_5-1B',
+    torch_dtype=torch.float16,  # Use float16 for GPU efficiency
     low_cpu_mem_usage=True,
     trust_remote_code=True,
+    use_flash_attn=True  # Enable Flash Attention for improved performance
+).to(device).eval()  # Explicitly move the model to GPU
 # Load the image processor
 image_processor = CLIPImageProcessor.from_pretrained('OpenGVLab/InternVL2_5-1B')
         image = image.convert('RGB')
         # Preprocess the image
+        pixel_values = image_processor(images=image, return_tensors='pt').pixel_values.to(device)  # Ensure tensor is on GPU
+        # Run the model
+        with torch.no_grad():  # Disable gradient calculations for inference
+            outputs = model(pixel_values)
         # Assuming the model returns embeddings or features
         return f"Output Shape: {outputs.last_hidden_state.shape}"
     fn=process_image,  # Function to process the input
     inputs=gr.Image(type="pil"),  # Accepts images as input
     outputs=gr.Textbox(label="Model Output"),  # Displays model output
+    title="InternVL2_5 Demo",
+    description="Upload an image to process it using the InternVL2_5-1B model from OpenGVLab."
 )
 # Launch the demo
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)