Spaces:
Running
Running
adibak
commited on
Commit
·
9c242be
1
Parent(s):
1e2c128
chat uploads, make slider work
Browse files- app.py +32 -10
- helpers/file_manager.py +15 -6
app.py
CHANGED
|
@@ -13,6 +13,7 @@ import httpx
|
|
| 13 |
import huggingface_hub
|
| 14 |
import json5
|
| 15 |
import ollama
|
|
|
|
| 16 |
import requests
|
| 17 |
import streamlit as st
|
| 18 |
from dotenv import load_dotenv
|
|
@@ -260,6 +261,9 @@ def set_up_chat_ui():
|
|
| 260 |
Prepare the chat interface and related functionality.
|
| 261 |
"""
|
| 262 |
print(f"slider={st.session_state["page_range_slider"][0], st.session_state["page_range_slider"][1]}")
|
|
|
|
|
|
|
|
|
|
| 263 |
with st.expander('Usage Instructions'):
|
| 264 |
st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
|
| 265 |
|
|
@@ -287,19 +291,37 @@ def set_up_chat_ui():
|
|
| 287 |
prompt_text = prompt.text or ''
|
| 288 |
if prompt['files']:
|
| 289 |
uploaded_pdf = prompt['files'][0]
|
| 290 |
-
|
| 291 |
-
# valid_pdf_length = min(50, pdf_length)
|
| 292 |
-
|
| 293 |
-
# st.session_state["page_range_slider"] = list(st.session_state["page_range_slider"])
|
| 294 |
-
# st.session_state["page_range_slider"][1] = valid_pdf_length
|
| 295 |
-
# print(f"length={pdf_length}, validated={valid_pdf_length}={st.session_state["page_range_slider"][-1]}")
|
| 296 |
-
|
| 297 |
-
# print(f"fname={uploaded_pdf.name}")
|
| 298 |
# Apparently, Streamlit stores uploaded files in memory and clears on browser close
|
| 299 |
# https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(uploaded_pdf,
|
| 301 |
-
st.session_state["
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
provider, llm_name = llm_helper.get_provider_model(
|
| 305 |
llm_provider_to_use,
|
|
|
|
| 13 |
import huggingface_hub
|
| 14 |
import json5
|
| 15 |
import ollama
|
| 16 |
+
from pypdf import PdfReader
|
| 17 |
import requests
|
| 18 |
import streamlit as st
|
| 19 |
from dotenv import load_dotenv
|
|
|
|
| 261 |
Prepare the chat interface and related functionality.
|
| 262 |
"""
|
| 263 |
print(f"slider={st.session_state["page_range_slider"][0], st.session_state["page_range_slider"][1]}")
|
| 264 |
+
st.session_state["start_page"] = st.session_state["page_range_slider"][0]
|
| 265 |
+
st.session_state["end_page"] = st.session_state["page_range_slider"][1]
|
| 266 |
+
|
| 267 |
with st.expander('Usage Instructions'):
|
| 268 |
st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
|
| 269 |
|
|
|
|
| 291 |
prompt_text = prompt.text or ''
|
| 292 |
if prompt['files']:
|
| 293 |
uploaded_pdf = prompt['files'][0]
|
| 294 |
+
st.session_state["pdf_file"] = uploaded_pdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
# Apparently, Streamlit stores uploaded files in memory and clears on browser close
|
| 296 |
# https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
|
| 297 |
+
|
| 298 |
+
# get validated page range
|
| 299 |
+
st.session_state["start_page"], st.session_state["end_page"] = filem.validate_page_range(uploaded_pdf,
|
| 300 |
+
st.session_state["start_page"],
|
| 301 |
+
st.session_state["end_page"])
|
| 302 |
+
# update sidebar text
|
| 303 |
+
with st.sidebar:
|
| 304 |
+
st.text(f"Extracting pages {st.session_state["start_page"]} to {st.session_state["end_page"]} in {uploaded_pdf.name}")
|
| 305 |
+
|
| 306 |
+
# get pdf contents
|
| 307 |
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(uploaded_pdf,
|
| 308 |
+
(st.session_state["start_page"],
|
| 309 |
+
st.session_state["end_page"]))
|
| 310 |
+
else:
|
| 311 |
+
# if we're using the same file (nothing new uploaded)
|
| 312 |
+
if "start_page" in st.session_state and "end_page" in st.session_state and "pdf_file" in st.session_state:
|
| 313 |
+
# validate the page range
|
| 314 |
+
st.session_state["start_page"], st.session_state["end_page"] = filem.validate_page_range(st.session_state["pdf_file"],
|
| 315 |
+
st.session_state["start_page"],
|
| 316 |
+
st.session_state["end_page"])
|
| 317 |
+
# update sidebar text
|
| 318 |
+
with st.sidebar:
|
| 319 |
+
st.text(f"Extracting pages {st.session_state["start_page"]} to {st.session_state["end_page"]} in {st.session_state["pdf_file"].name}")
|
| 320 |
+
|
| 321 |
+
# get contents
|
| 322 |
+
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(st.session_state["pdf_file"],
|
| 323 |
+
(st.session_state["start_page"], st.session_state["end_page"]))
|
| 324 |
+
|
| 325 |
|
| 326 |
provider, llm_name = llm_helper.get_provider_model(
|
| 327 |
llm_provider_to_use,
|
helpers/file_manager.py
CHANGED
|
@@ -32,13 +32,9 @@ def get_pdf_contents(
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
reader = PdfReader(pdf_file)
|
| 35 |
-
n_pages = len(reader.pages)
|
| 36 |
|
| 37 |
start, end = page_range # set start and end per the range (user-specified values)
|
| 38 |
-
|
| 39 |
-
end = min(n_pages, end)
|
| 40 |
-
if start >= end:
|
| 41 |
-
start = 1
|
| 42 |
print(f"starting at {start}, ending {end}")
|
| 43 |
|
| 44 |
text = ''
|
|
@@ -46,4 +42,17 @@ def get_pdf_contents(
|
|
| 46 |
page = reader.pages[page_num]
|
| 47 |
text += page.extract_text()
|
| 48 |
|
| 49 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
reader = PdfReader(pdf_file)
|
|
|
|
| 35 |
|
| 36 |
start, end = page_range # set start and end per the range (user-specified values)
|
| 37 |
+
|
|
|
|
|
|
|
|
|
|
| 38 |
print(f"starting at {start}, ending {end}")
|
| 39 |
|
| 40 |
text = ''
|
|
|
|
| 42 |
page = reader.pages[page_num]
|
| 43 |
text += page.extract_text()
|
| 44 |
|
| 45 |
+
return text
|
| 46 |
+
|
| 47 |
+
def validate_page_range(pdf_file: st.runtime.uploaded_file_manager.UploadedFile,
|
| 48 |
+
start:int, end:int) -> tuple[int, int]:
|
| 49 |
+
|
| 50 |
+
n_pages = len(PdfReader(pdf_file).pages)
|
| 51 |
+
#start, end = st.session_state["page_range_slider"]
|
| 52 |
+
start = max(1, start)
|
| 53 |
+
end = min(n_pages, end)
|
| 54 |
+
|
| 55 |
+
if start >= end:
|
| 56 |
+
start = 1
|
| 57 |
+
|
| 58 |
+
return (start, end)
|