| import gradio as gr | |
| gr.load("models/ai-forever/ruGPT-3.5-13B").launch() | |
| model = AutoModel.from_pretrained("models/ai-forever/ruGPT-3.5-13B") | |
| tokenizer = AutoTokenizer.from_pretrained("models/ai-forever/ruGPT-3.5-13B") | |
| print('launch') | |
| def greet(request): | |
| print('greet') | |
| encoded_input = tokenizer(request, return_tensors='pt', \ | |
| add_special_tokens=False).to('cuda:0') | |
| print('encoded_input') | |
| output = model.generate( | |
| **encoded_input, | |
| num_beams=2, | |
| do_sample=True, | |
| max_new_tokens=100 | |
| ) | |
| print('output') | |
| return tokenizer.decode(output[0], skip_special_tokens=True) | |
| iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
| iface.launch() |