Spaces:
Runtime error
Runtime error
ADD: Ability to pull abstracts from article ids
Browse files- app.py +59 -36
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -2,9 +2,11 @@ import streamlit as st
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
def main():
|
|
|
|
| 8 |
|
| 9 |
st.set_page_config(
|
| 10 |
layout="wide",
|
|
@@ -17,16 +19,23 @@ def main():
|
|
| 17 |
st.text("")
|
| 18 |
st.text("")
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
]
|
| 24 |
-
|
| 25 |
-
example = st.selectbox("Choose an example abstract", example_prompts)
|
| 26 |
|
| 27 |
# Take the message which needs to be processed
|
| 28 |
-
message = st.text_area("...or paste a
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
st.text("")
|
| 31 |
models_to_choose = [
|
| 32 |
"AryanLala/autonlp-Scientific_Title_Generator-34558227",
|
|
@@ -42,40 +51,52 @@ def main():
|
|
| 42 |
else:
|
| 43 |
st.error("Please select a model first")
|
| 44 |
|
| 45 |
-
@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=
|
| 46 |
def load_model():
|
| 47 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 48 |
model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
|
| 49 |
return model, tokenizer
|
| 50 |
|
| 51 |
def get_summary(text):
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
# Define function to run when submit is clicked
|
| 68 |
def submit(message):
|
| 69 |
if len(message) > 0:
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
st.markdown(html_str, unsafe_allow_html=True)
|
| 81 |
# st.markdown(emoji)
|
|
@@ -86,7 +107,7 @@ def main():
|
|
| 86 |
if st.button("Submit"):
|
| 87 |
submit(message)
|
| 88 |
|
| 89 |
-
with st.expander("Additional
|
| 90 |
st.markdown("""
|
| 91 |
The models used were fine-tuned on subset of data from the [Arxiv Dataset](https://huggingface.co/datasets/arxiv_dataset)
|
| 92 |
The task of the models is to suggest an appropraite title from the abstract of a scientific paper.
|
|
@@ -96,13 +117,15 @@ def main():
|
|
| 96 |
|
| 97 |
The model [shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full](https://huggingface.co/shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full)
|
| 98 |
was trained on the categories: cs.AI, cs.LG, cs.NI, cs.GR cs.CL, cs.CV (Artificial Intelligence, Machine Learning, Networking and Internet Architecture, Graphics, Computation and Language, Computer Vision and Pattern Recognition)
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
|
| 101 |
st.text('\n')
|
| 102 |
st.text('\n')
|
| 103 |
st.markdown(
|
| 104 |
-
'''<span style="color:blue; font-size:10px">App created by [@shamikbose89](https://huggingface.co/shamikbose89)
|
| 105 |
-
|
| 106 |
unsafe_allow_html=True,
|
| 107 |
)
|
| 108 |
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
+
import arxiv
|
| 6 |
|
| 7 |
|
| 8 |
def main():
|
| 9 |
+
id_provided = True
|
| 10 |
|
| 11 |
st.set_page_config(
|
| 12 |
layout="wide",
|
|
|
|
| 19 |
st.text("")
|
| 20 |
st.text("")
|
| 21 |
|
| 22 |
+
example = st.text_area("Provide the link/id for an arxiv paper", """https://arxiv.org/abs/2111.10339""",
|
| 23 |
+
)
|
| 24 |
+
# st.selectbox("Provide the link/id for an arxiv paper", example_prompts)
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Take the message which needs to be processed
|
| 27 |
+
message = st.text_area("...or paste a paper's abstract to generate a title")
|
| 28 |
+
if len(message)<1:
|
| 29 |
+
message=example
|
| 30 |
+
id_provided = True
|
| 31 |
+
ids = message.split('/')[-1]
|
| 32 |
+
search = arxiv.Search(id_list=[ids])
|
| 33 |
+
for result in search.results():
|
| 34 |
+
message = result.summary
|
| 35 |
+
title = result.title
|
| 36 |
+
else:
|
| 37 |
+
id_provided = False
|
| 38 |
+
|
| 39 |
st.text("")
|
| 40 |
models_to_choose = [
|
| 41 |
"AryanLala/autonlp-Scientific_Title_Generator-34558227",
|
|
|
|
| 51 |
else:
|
| 52 |
st.error("Please select a model first")
|
| 53 |
|
| 54 |
+
@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=False)
|
| 55 |
def load_model():
|
| 56 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 57 |
model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
|
| 58 |
return model, tokenizer
|
| 59 |
|
| 60 |
def get_summary(text):
|
| 61 |
+
with st.spinner(text="Processing your request"):
|
| 62 |
+
model, tokenizer = load_model()
|
| 63 |
+
preprocessed = preprocess(text)
|
| 64 |
+
inputs = tokenizer(
|
| 65 |
+
preprocessed, truncation=True, padding="longest", return_tensors="pt"
|
| 66 |
+
)
|
| 67 |
+
output = model.generate(
|
| 68 |
+
**inputs,
|
| 69 |
+
max_length=60,
|
| 70 |
+
num_beams=10,
|
| 71 |
+
num_return_sequences=1,
|
| 72 |
+
temperature=1.5,
|
| 73 |
+
)
|
| 74 |
+
target_text = tokenizer.batch_decode(output, skip_special_tokens=True)
|
| 75 |
+
return target_text[0]
|
| 76 |
|
| 77 |
# Define function to run when submit is clicked
|
| 78 |
def submit(message):
|
| 79 |
if len(message) > 0:
|
| 80 |
+
summary = get_summary(message)
|
| 81 |
+
if id_provided:
|
| 82 |
+
html_str = f"""
|
| 83 |
+
<style>
|
| 84 |
+
p.a {{
|
| 85 |
+
font: 20px Courier;
|
| 86 |
+
}}
|
| 87 |
+
</style>
|
| 88 |
+
<p class="a"><b>Title Generated:></b> {summary} </p>
|
| 89 |
+
<p class="a"><b>Original Title:></b> {title} </p>
|
| 90 |
+
"""
|
| 91 |
+
else:
|
| 92 |
+
html_str = f"""
|
| 93 |
+
<style>
|
| 94 |
+
p.a {{
|
| 95 |
+
font: 20px Courier;
|
| 96 |
+
}}
|
| 97 |
+
</style>
|
| 98 |
+
<p class="a"><b>Title Generated:></b> {summary} </p>
|
| 99 |
+
"""
|
| 100 |
|
| 101 |
st.markdown(html_str, unsafe_allow_html=True)
|
| 102 |
# st.markdown(emoji)
|
|
|
|
| 107 |
if st.button("Submit"):
|
| 108 |
submit(message)
|
| 109 |
|
| 110 |
+
with st.expander("Additional Information"):
|
| 111 |
st.markdown("""
|
| 112 |
The models used were fine-tuned on subset of data from the [Arxiv Dataset](https://huggingface.co/datasets/arxiv_dataset)
|
| 113 |
The task of the models is to suggest an appropraite title from the abstract of a scientific paper.
|
|
|
|
| 117 |
|
| 118 |
The model [shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full](https://huggingface.co/shamikbose89/mt5-small-finetuned-arxiv-cs-finetuned-arxiv-cs-full)
|
| 119 |
was trained on the categories: cs.AI, cs.LG, cs.NI, cs.GR cs.CL, cs.CV (Artificial Intelligence, Machine Learning, Networking and Internet Architecture, Graphics, Computation and Language, Computer Vision and Pattern Recognition)
|
| 120 |
+
|
| 121 |
+
Also, <b>Thank you to arXiv for use of its open access interoperability.</b> It allows us to pull the required abstracts from passed ids
|
| 122 |
+
""",unsafe_allow_html=True,)
|
| 123 |
|
| 124 |
st.text('\n')
|
| 125 |
st.text('\n')
|
| 126 |
st.markdown(
|
| 127 |
+
'''<span style="color:blue; font-size:10px">App created by [@akshay7](https://huggingface.co/akshay7), [@AryanLala](https://huggingface.co/AryanLala) and [@shamikbose89](https://huggingface.co/shamikbose89)
|
| 128 |
+
</span>''',
|
| 129 |
unsafe_allow_html=True,
|
| 130 |
)
|
| 131 |
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
transformers==4.12.3
|
| 2 |
torch==1.10.0
|
| 3 |
-
numpy==1.19.2
|
|
|
|
|
|
| 1 |
transformers==4.12.3
|
| 2 |
torch==1.10.0
|
| 3 |
+
numpy==1.19.2
|
| 4 |
+
arxiv==1.4.2
|