Spaces:
Running
on
Zero
Running
on
Zero
| # This file is modified from https://github.com/haotian-liu/LLaVA/ | |
| import torch | |
| from llava.model.multimodal_encoder.vision_encoder import VisionTower | |
| from transformers import ( | |
| PretrainedConfig, | |
| CLIPVisionModel, | |
| CLIPImageProcessor, | |
| ) | |
| class CLIPVisionTower(VisionTower): | |
| def __init__(self, model_name_or_path: str, config: PretrainedConfig): | |
| super().__init__(model_name_or_path, config) | |
| self.image_processor = CLIPImageProcessor.from_pretrained(model_name_or_path) | |
| self.vision_tower = CLIPVisionModel.from_pretrained( | |
| model_name_or_path, torch_dtype=eval(config.model_dtype) | |
| ) | |
| self.is_loaded = True | |