Spaces:
Runtime error
Runtime error
Commit
·
2044733
1
Parent(s):
75318f4
changing model
Browse files
main.py
CHANGED
|
@@ -2,22 +2,25 @@ import base64
|
|
| 2 |
from io import BytesIO
|
| 3 |
|
| 4 |
import torch
|
| 5 |
-
from fastapi import FastAPI, Query
|
| 6 |
from PIL import Image
|
| 7 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 8 |
-
from qwen_vl_utils import process_vision_info
|
| 9 |
-
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, Qwen2VLForConditionalGeneration
|
| 10 |
-
|
| 11 |
-
from fastapi import FastAPI, Body
|
| 12 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
app = FastAPI()
|
| 15 |
|
|
|
|
| 16 |
# Define request model
|
| 17 |
class PredictRequest(BaseModel):
|
| 18 |
image_base64: str
|
| 19 |
prompt: str
|
| 20 |
|
|
|
|
| 21 |
# checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
|
| 22 |
# min_pixels = 256 * 28 * 28
|
| 23 |
# max_pixels = 1280 * 28 * 28
|
|
@@ -31,13 +34,11 @@ class PredictRequest(BaseModel):
|
|
| 31 |
# # attn_implementation="flash_attention_2",
|
| 32 |
# )
|
| 33 |
|
| 34 |
-
checkpoint = "Qwen/Qwen2.5-VL-
|
| 35 |
-
min_pixels = 256*28*28
|
| 36 |
-
max_pixels = 1280*28*28
|
| 37 |
processor = AutoProcessor.from_pretrained(
|
| 38 |
-
checkpoint,
|
| 39 |
-
min_pixels=min_pixels,
|
| 40 |
-
max_pixels=max_pixels
|
| 41 |
)
|
| 42 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 43 |
checkpoint,
|
|
@@ -82,6 +83,7 @@ def read_root():
|
|
| 82 |
# print(f"❌ Error encoding image {image_path}: {e}")
|
| 83 |
# return None
|
| 84 |
|
|
|
|
| 85 |
def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
| 86 |
"""
|
| 87 |
Converts an image from file data to a Base64-encoded string with optimized size.
|
|
@@ -96,6 +98,7 @@ def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
|
| 96 |
except Exception as e:
|
| 97 |
raise HTTPException(status_code=500, detail=f"Error encoding image: {e}")
|
| 98 |
|
|
|
|
| 99 |
@app.post("/encode-image/")
|
| 100 |
async def upload_and_encode_image(file: UploadFile = File(...)):
|
| 101 |
"""
|
|
@@ -108,6 +111,7 @@ async def upload_and_encode_image(file: UploadFile = File(...)):
|
|
| 108 |
except Exception as e:
|
| 109 |
raise HTTPException(status_code=400, detail=f"Invalid file: {e}")
|
| 110 |
|
|
|
|
| 111 |
@app.post("/predict")
|
| 112 |
def predict(data: PredictRequest):
|
| 113 |
"""
|
|
@@ -121,7 +125,6 @@ def predict(data: PredictRequest):
|
|
| 121 |
str: The generated description of the image.
|
| 122 |
"""
|
| 123 |
|
| 124 |
-
|
| 125 |
# Create the input message structure
|
| 126 |
messages = [
|
| 127 |
{
|
|
@@ -160,6 +163,7 @@ def predict(data: PredictRequest):
|
|
| 160 |
|
| 161 |
return {"response": output_text[0] if output_text else "No description generated."}
|
| 162 |
|
|
|
|
| 163 |
# @app.get("/predict")
|
| 164 |
# def predict(image_url: str = Query(...), prompt: str = Query(...)):
|
| 165 |
|
|
|
|
| 2 |
from io import BytesIO
|
| 3 |
|
| 4 |
import torch
|
| 5 |
+
from fastapi import Body, FastAPI, File, HTTPException, Query, UploadFile
|
| 6 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
+
from qwen_vl_utils import process_vision_info
|
| 9 |
+
from transformers import (
|
| 10 |
+
AutoProcessor,
|
| 11 |
+
Qwen2_5_VLForConditionalGeneration,
|
| 12 |
+
Qwen2VLForConditionalGeneration,
|
| 13 |
+
)
|
| 14 |
|
| 15 |
app = FastAPI()
|
| 16 |
|
| 17 |
+
|
| 18 |
# Define request model
|
| 19 |
class PredictRequest(BaseModel):
|
| 20 |
image_base64: str
|
| 21 |
prompt: str
|
| 22 |
|
| 23 |
+
|
| 24 |
# checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
|
| 25 |
# min_pixels = 256 * 28 * 28
|
| 26 |
# max_pixels = 1280 * 28 * 28
|
|
|
|
| 34 |
# # attn_implementation="flash_attention_2",
|
| 35 |
# )
|
| 36 |
|
| 37 |
+
checkpoint = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 38 |
+
min_pixels = 256 * 28 * 28
|
| 39 |
+
max_pixels = 1280 * 28 * 28
|
| 40 |
processor = AutoProcessor.from_pretrained(
|
| 41 |
+
checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
|
|
|
|
|
|
|
| 42 |
)
|
| 43 |
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 44 |
checkpoint,
|
|
|
|
| 83 |
# print(f"❌ Error encoding image {image_path}: {e}")
|
| 84 |
# return None
|
| 85 |
|
| 86 |
+
|
| 87 |
def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
| 88 |
"""
|
| 89 |
Converts an image from file data to a Base64-encoded string with optimized size.
|
|
|
|
| 98 |
except Exception as e:
|
| 99 |
raise HTTPException(status_code=500, detail=f"Error encoding image: {e}")
|
| 100 |
|
| 101 |
+
|
| 102 |
@app.post("/encode-image/")
|
| 103 |
async def upload_and_encode_image(file: UploadFile = File(...)):
|
| 104 |
"""
|
|
|
|
| 111 |
except Exception as e:
|
| 112 |
raise HTTPException(status_code=400, detail=f"Invalid file: {e}")
|
| 113 |
|
| 114 |
+
|
| 115 |
@app.post("/predict")
|
| 116 |
def predict(data: PredictRequest):
|
| 117 |
"""
|
|
|
|
| 125 |
str: The generated description of the image.
|
| 126 |
"""
|
| 127 |
|
|
|
|
| 128 |
# Create the input message structure
|
| 129 |
messages = [
|
| 130 |
{
|
|
|
|
| 163 |
|
| 164 |
return {"response": output_text[0] if output_text else "No description generated."}
|
| 165 |
|
| 166 |
+
|
| 167 |
# @app.get("/predict")
|
| 168 |
# def predict(image_url: str = Query(...), prompt: str = Query(...)):
|
| 169 |
|