Spaces:
Running
Running
| # minimal loader that uses transformers to load a multimodal model if available. | |
| # This is a thin adapter: it expects model checkpoints on HF that are compatible with transformers.auto.modeling. | |
| # For TinyLLaVA upstream functionality, replace with full repo. | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor | |
| import torch | |
| def load_pretrained_model(model_path: str, model_base=None, model_name: str=None): | |
| """ | |
| Minimal loader: | |
| - tokenizer: AutoTokenizer.from_pretrained(model_path) | |
| - model: AutoModelForCausalLM.from_pretrained(model_path, device_map="auto" if cuda else None) | |
| - image_processor: AutoProcessor.from_pretrained(model_path) or AutoProcessor from a known vision model | |
| Returns: tokenizer, model, image_processor, context_len | |
| """ | |
| if model_name is None: | |
| model_name = model_path.split("/")[-1] | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) | |
| # Try to load an image processor / processor; fallback to using a BLIP processor if available | |
| try: | |
| image_processor = AutoProcessor.from_pretrained(model_path) | |
| except Exception: | |
| # fallback: try a common image processor (BLIP) | |
| try: | |
| image_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
| except Exception: | |
| image_processor = None | |
| # Load causal LM | |
| model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32, low_cpu_mem_usage=True) | |
| # context length: use tokenizer model_max_length if available | |
| context_len = getattr(tokenizer, "model_max_length", 2048) | |
| return tokenizer, model, image_processor, context_len | |