Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModel, AutoTokenizer | |
| import torch | |
| import torchvision.transforms as T | |
| from torchvision.transforms.functional import InterpolationMode | |
| from Models.modeling_llavaqw import LlavaQwModel | |
| IMAGENET_MEAN = (0.485, 0.456, 0.406) | |
| IMAGENET_STD = (0.229, 0.224, 0.225) | |
| model_name = "torettomarui/Llava-qw" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False) | |
| model = LlavaQwModel.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| ).to(torch.bfloat16).eval()#.cuda() | |
| def build_transform(input_size): | |
| MEAN, STD = IMAGENET_MEAN, IMAGENET_STD | |
| transform = T.Compose([ | |
| T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img), | |
| T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), | |
| T.ToTensor(), | |
| T.Normalize(mean=MEAN, std=STD) | |
| ]) | |
| return transform | |
| def preprocess_image(file_path, image_size=448): | |
| transform = build_transform(image_size) | |
| pixel_values = transform(file_path) | |
| return torch.stack([pixel_values]).to(torch.bfloat16)#.cuda() | |
| def generate_response(image, text): | |
| pixel_values = preprocess_image(image) | |
| generation_config = dict(max_new_tokens=2048, do_sample=False) | |
| question = '<image>\n' + text | |
| response = model.chat(tokenizer, pixel_values, question, generation_config) | |
| return response | |
| iface = gr.Interface( | |
| fn=generate_response, | |
| inputs=[ | |
| gr.Image(type="pil", label="上传图片"), | |
| gr.Textbox(lines=2, placeholder="输入你的问题..."), | |
| ], | |
| outputs="text", | |
| title="Llava-QW", | |
| description="上传一张图片并输入你的问题,模型将生成相应的回答。", | |
| ) | |
| iface.launch() |