Spaces:
Build error
Build error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| import torch | |
| from peft import PeftModel | |
| # Model and tokenizer names | |
| model_name = "google/gemma-2-2b-it" | |
| lora_model_name = "Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini" | |
| # Configure 4-bit quantization | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| # Initialize tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Load the base model with 4-bit quantization | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="auto", | |
| quantization_config=bnb_config | |
| ) | |
| # Load the LoRA adapter | |
| model = PeftModel.from_pretrained(model, lora_model_name) | |
| def generate_response(input_text): | |
| inputs = tokenizer(input_text, return_tensors="pt").to(model.device) | |
| generation_config = { | |
| "max_length": 512, | |
| "temperature": 0.01, | |
| "do_sample": True, | |
| "pad_token_id": tokenizer.pad_token_id, | |
| "eos_token_id": tokenizer.eos_token_id, | |
| } | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, **generation_config) | |
| response = tokenizer.decode(outputs[0]) | |
| return response.split("<start_of_turn>model\n")[1].split("<end_of_turn>")[0] | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=generate_response, | |
| inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."), | |
| outputs=gr.Textbox(lines=5, label="Model Çıktısı"), | |
| title="Anlam-Lab" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |