| from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
| import torch | |
| # Load the model and tokenizer | |
| model_path = 'model_data/finetuned_gpt' | |
| tokenizer = GPT2Tokenizer.from_pretrained(model_path) | |
| model = GPT2LMHeadModel.from_pretrained(model_path) | |
| # Move model to GPU if available | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| def generate_text(prompt_text, length, temperature, beams): | |
| encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt") | |
| encoded_prompt = encoded_prompt.to(device) | |
| output_sequences = model.generate( | |
| input_ids=encoded_prompt, | |
| max_length=length, | |
| temperature=temperature, | |
| top_k=20, | |
| top_p=0.9, | |
| repetition_penalty=1.2, | |
| do_sample=True, | |
| num_return_sequences=beams, | |
| ) | |
| # Decode the generated text | |
| generated_sequence = output_sequences[0].tolist() | |
| text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True) | |
| # Remove the prompt from the generated text | |
| text = text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :] | |
| return text.strip() | |
| # Streamlit interface | |