Spaces:
Running
Running
File size: 1,714 Bytes
51a5a00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# minimal loader that uses transformers to load a multimodal model if available.
# This is a thin adapter: it expects model checkpoints on HF that are compatible with transformers.auto.modeling.
# For TinyLLaVA upstream functionality, replace with full repo.
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
import torch
def load_pretrained_model(model_path: str, model_base=None, model_name: str=None):
"""
Minimal loader:
- tokenizer: AutoTokenizer.from_pretrained(model_path)
- model: AutoModelForCausalLM.from_pretrained(model_path, device_map="auto" if cuda else None)
- image_processor: AutoProcessor.from_pretrained(model_path) or AutoProcessor from a known vision model
Returns: tokenizer, model, image_processor, context_len
"""
if model_name is None:
model_name = model_path.split("/")[-1]
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
# Try to load an image processor / processor; fallback to using a BLIP processor if available
try:
image_processor = AutoProcessor.from_pretrained(model_path)
except Exception:
# fallback: try a common image processor (BLIP)
try:
image_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
except Exception:
image_processor = None
# Load causal LM
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32, low_cpu_mem_usage=True)
# context length: use tokenizer model_max_length if available
context_len = getattr(tokenizer, "model_max_length", 2048)
return tokenizer, model, image_processor, context_len
|