Spaces:
Sleeping
Sleeping
| import os | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
| import streamlit as st | |
| import torch | |
| st.title("Tokenizer Test Space") | |
| # model_id = "unsloth/gemma-3-1b-it" # Test with the official model first | |
| model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours | |
| # Define a writable directory for the cache. /tmp is usually writable in Spaces. | |
| cache_directory = "/tmp/hf_cache" | |
| # Ensure the cache directory exists (good practice, though hf_hub might handle it) | |
| os.makedirs(cache_directory, exist_ok=True) | |
| try: | |
| st.write(f"Attempting to load tokenizer for {model_id}...") | |
| # Explicitly pass the cache_dir | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_directory) | |
| st.success("Tokenizer loaded successfully!") | |
| st.write("Tokenizer details:", tokenizer) | |
| except Exception as e: | |
| st.error(f"Error loading tokenizer: {e}") | |
| st.exception(e) # Show full traceback | |
| try: | |
| st.write(f"Attempting to load model for {model_id}...") | |
| # Assuming you want 4-bit quantization for Gemma | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_use_double_quant=False, | |
| ) | |
| # Explicitly pass the cache_dir | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| quantization_config=quantization_config, | |
| low_cpu_mem_usage=True, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| cache_dir=cache_directory # Add this line | |
| ) | |
| st.success("Model loaded successfully!") | |
| st.write("Model details:", model) | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| st.exception(e) |