Spaces:
Sleeping
Sleeping
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| device = "cuda" # the device to load the model onto | |
| model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") | |
| tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") | |
| messages = [ | |
| {"role": "user", "content": "What is your favourite condiment?"}, | |
| {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, | |
| {"role": "user", "content": "Do you have mayonnaise recipes?"} | |
| ] | |
| p="Well, I'm quite partial to a good squeeze of fresh lemon juice." | |
| encoded_input = tokenizer(p, return_tensors='pt') | |
| embeds = model.model.embed_tokens(encoded_input.input_ids) | |
| print(embeds.shape) | |
| encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt") | |
| model_inputs = encodeds.to(device) | |
| model.to(device) | |
| generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) | |
| decoded = tokenizer.batch_decode(generated_ids) | |
| print(decoded[0]) | |