| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual") | |
| model = AutoModelForSeq2SeqLM.from_pretrained( | |
| "impresso-project/nel-hipe-multilingual" | |
| ).eval() | |
| def disambiguate_sentences(sentences): | |
| results = [] | |
| for sentence in sentences: | |
| outputs = model.generate( | |
| **tokenizer([sentence], return_tensors="pt"), | |
| num_beams=5, | |
| num_return_sequences=5 | |
| ) | |
| decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| results.append(decoded) | |
| return results | |
| input_sentences = gr.inputs.Textbox( | |
| lines=5, | |
| label="Input Sentences", | |
| placeholder="Enter your sentence here in the following format: \\ `It is reported in [START] Paris [END], " | |
| "that the opening of the chambers will take place on the 27th January.' \\ " | |
| "This format ensures that the model knows which entities to disambiguate, more exactly the entity should " | |
| "be surrounded by `[START]` and `[END]`.", | |
| ) | |
| output_predictions = gr.outputs.Textbox(label="Predictions") | |
| gr.Interface( | |
| fn=disambiguate_sentences, | |
| inputs=input_sentences, | |
| outputs=output_predictions, | |
| title="NEL Disambiguation", | |
| ).launch() | |