Spaces:
Running
Running
reorganise a bit the interface
Browse files- streamlit_app.py +35 -18
streamlit_app.py
CHANGED
|
@@ -51,11 +51,24 @@ if 'ner_processing' not in st.session_state:
|
|
| 51 |
if 'uploaded' not in st.session_state:
|
| 52 |
st.session_state['uploaded'] = False
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
def new_file():
|
| 55 |
st.session_state['loaded_embeddings'] = None
|
| 56 |
st.session_state['doc_id'] = None
|
| 57 |
st.session_state['uploaded'] = True
|
| 58 |
|
|
|
|
| 59 |
# @st.cache_resource
|
| 60 |
def init_qa(model):
|
| 61 |
if model == 'chatgpt-3.5-turbo':
|
|
@@ -134,59 +147,63 @@ def play_old_messages():
|
|
| 134 |
# is_api_key_provided = st.session_state['api_key']
|
| 135 |
|
| 136 |
with st.sidebar:
|
| 137 |
-
st.markdown(
|
| 138 |
-
":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
|
| 139 |
-
|
| 140 |
st.session_state['model'] = model = st.radio(
|
| 141 |
"Model",
|
| 142 |
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
|
| 143 |
index=1,
|
| 144 |
captions=[
|
| 145 |
"ChatGPT 3.5 Turbo + Ada-002-text (embeddings)",
|
| 146 |
-
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
|
| 147 |
-
# "LLama2-70B-Chat + Sentence BERT (embeddings)",
|
| 148 |
],
|
| 149 |
help="Select the LLM model and embeddings you want to use.",
|
| 150 |
disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
|
| 151 |
|
|
|
|
|
|
|
|
|
|
| 152 |
if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
|
| 153 |
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
| 154 |
api_key = st.text_input('Huggingface API Key', type="password")
|
| 155 |
|
| 156 |
st.markdown(
|
| 157 |
-
"Get it
|
| 158 |
else:
|
| 159 |
api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
| 160 |
|
| 161 |
if api_key:
|
| 162 |
# st.session_state['api_key'] = is_api_key_provided = True
|
| 163 |
-
|
| 164 |
-
st.
|
| 165 |
-
|
| 166 |
-
os.environ
|
| 167 |
-
|
|
|
|
| 168 |
|
| 169 |
elif model == 'chatgpt-3.5-turbo':
|
| 170 |
if 'OPENAI_API_KEY' not in os.environ:
|
| 171 |
api_key = st.text_input('OpenAI API Key', type="password")
|
| 172 |
st.markdown(
|
| 173 |
-
"Get it
|
| 174 |
else:
|
| 175 |
api_key = os.environ['OPENAI_API_KEY']
|
| 176 |
|
| 177 |
if api_key:
|
| 178 |
# st.session_state['api_key'] = is_api_key_provided = True
|
| 179 |
-
|
| 180 |
-
st.
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
| 184 |
# else:
|
| 185 |
# is_api_key_provided = st.session_state['api_key']
|
| 186 |
|
| 187 |
st.title("π Scientific Document Insight Q&A")
|
| 188 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
| 189 |
|
|
|
|
|
|
|
| 190 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
| 191 |
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
| 192 |
st.session_state['api_keys'],
|
|
@@ -220,7 +237,7 @@ with st.sidebar:
|
|
| 220 |
st.header("Documentation")
|
| 221 |
st.markdown("https://github.com/lfoppiano/document-qa")
|
| 222 |
st.markdown(
|
| 223 |
-
"""
|
| 224 |
|
| 225 |
if st.session_state['git_rev'] != "unknown":
|
| 226 |
st.markdown("**Revision number**: [" + st.session_state[
|
|
|
|
| 51 |
if 'uploaded' not in st.session_state:
|
| 52 |
st.session_state['uploaded'] = False
|
| 53 |
|
| 54 |
+
st.set_page_config(
|
| 55 |
+
page_title="Document Insights QA",
|
| 56 |
+
page_icon="π",
|
| 57 |
+
initial_sidebar_state="expanded",
|
| 58 |
+
menu_items={
|
| 59 |
+
'Get Help': 'https://github.com/lfoppiano/document-qa',
|
| 60 |
+
'Report a bug': "https://github.com/lfoppiano/document-qa/issues",
|
| 61 |
+
'About': "Upload a scientific article in PDF, ask questions, get insights."
|
| 62 |
+
}
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
def new_file():
|
| 67 |
st.session_state['loaded_embeddings'] = None
|
| 68 |
st.session_state['doc_id'] = None
|
| 69 |
st.session_state['uploaded'] = True
|
| 70 |
|
| 71 |
+
|
| 72 |
# @st.cache_resource
|
| 73 |
def init_qa(model):
|
| 74 |
if model == 'chatgpt-3.5-turbo':
|
|
|
|
| 147 |
# is_api_key_provided = st.session_state['api_key']
|
| 148 |
|
| 149 |
with st.sidebar:
|
|
|
|
|
|
|
|
|
|
| 150 |
st.session_state['model'] = model = st.radio(
|
| 151 |
"Model",
|
| 152 |
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
|
| 153 |
index=1,
|
| 154 |
captions=[
|
| 155 |
"ChatGPT 3.5 Turbo + Ada-002-text (embeddings)",
|
| 156 |
+
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings) :free:"
|
| 157 |
+
# "LLama2-70B-Chat + Sentence BERT (embeddings) :free:",
|
| 158 |
],
|
| 159 |
help="Select the LLM model and embeddings you want to use.",
|
| 160 |
disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
|
| 161 |
|
| 162 |
+
st.markdown(
|
| 163 |
+
":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ")
|
| 164 |
+
|
| 165 |
if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
|
| 166 |
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
| 167 |
api_key = st.text_input('Huggingface API Key', type="password")
|
| 168 |
|
| 169 |
st.markdown(
|
| 170 |
+
"Get it [here](https://huggingface.co/docs/hub/security-tokens)")
|
| 171 |
else:
|
| 172 |
api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
| 173 |
|
| 174 |
if api_key:
|
| 175 |
# st.session_state['api_key'] = is_api_key_provided = True
|
| 176 |
+
if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
|
| 177 |
+
with st.spinner("Preparing environment"):
|
| 178 |
+
st.session_state['api_keys'][model] = api_key
|
| 179 |
+
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
| 180 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
|
| 181 |
+
st.session_state['rqa'][model] = init_qa(model)
|
| 182 |
|
| 183 |
elif model == 'chatgpt-3.5-turbo':
|
| 184 |
if 'OPENAI_API_KEY' not in os.environ:
|
| 185 |
api_key = st.text_input('OpenAI API Key', type="password")
|
| 186 |
st.markdown(
|
| 187 |
+
"Get it [here](https://platform.openai.com/account/api-keys)")
|
| 188 |
else:
|
| 189 |
api_key = os.environ['OPENAI_API_KEY']
|
| 190 |
|
| 191 |
if api_key:
|
| 192 |
# st.session_state['api_key'] = is_api_key_provided = True
|
| 193 |
+
if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
|
| 194 |
+
with st.spinner("Preparing environment"):
|
| 195 |
+
st.session_state['api_keys'][model] = api_key
|
| 196 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
| 197 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
| 198 |
+
st.session_state['rqa'][model] = init_qa(model)
|
| 199 |
# else:
|
| 200 |
# is_api_key_provided = st.session_state['api_key']
|
| 201 |
|
| 202 |
st.title("π Scientific Document Insight Q&A")
|
| 203 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
| 204 |
|
| 205 |
+
st.markdown(":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
|
| 206 |
+
|
| 207 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
| 208 |
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
| 209 |
st.session_state['api_keys'],
|
|
|
|
| 237 |
st.header("Documentation")
|
| 238 |
st.markdown("https://github.com/lfoppiano/document-qa")
|
| 239 |
st.markdown(
|
| 240 |
+
"""Upload a scientific article as PDF document. Once the spinner stops, you can proceed to ask your questions.""")
|
| 241 |
|
| 242 |
if st.session_state['git_rev'] != "unknown":
|
| 243 |
st.markdown("**Revision number**: [" + st.session_state[
|