Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,7 +39,7 @@ repo = Repository(
|
|
| 39 |
repo.git_pull() # Pull the latest changes (if any)
|
| 40 |
|
| 41 |
# Step 2: Load the PDF File
|
| 42 |
-
pdf_path = "Private_Book/
|
| 43 |
|
| 44 |
# Step 2: Load the PDF File
|
| 45 |
pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" # Replace with your PDF file path
|
|
@@ -53,29 +53,32 @@ api_key = os.getenv("OPENAI_API_KEY")
|
|
| 53 |
# Updated caching mechanism using st.cache_data
|
| 54 |
@st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
|
| 55 |
def load_vector_store(file_path, store_name, force_reload=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
# Check if we need to force reload the vector store (e.g., when the PDF changes)
|
| 58 |
-
if force_reload or not os.path.exists(f"{store_name}.pkl"):
|
| 59 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
| 60 |
-
chunk_size=1000,
|
| 61 |
-
chunk_overlap=200,
|
| 62 |
-
length_function=len
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
text = load_pdf_text(file_path)
|
| 66 |
-
chunks = text_splitter.split_text(text=text)
|
| 67 |
-
|
| 68 |
-
embeddings = OpenAIEmbeddings()
|
| 69 |
-
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
|
| 70 |
-
VectorStore.save_local("faiss_store")
|
| 71 |
-
FAISS.load_local("faiss_store", OpenAIEmbeddings())
|
| 72 |
-
with open(f"{store_name}.pkl", "wb") as f:
|
| 73 |
-
pickle.dump(VectorStore, f)
|
| 74 |
-
else:
|
| 75 |
-
with open(f"{store_name}.pkl", "rb") as f:
|
| 76 |
-
VectorStore = pickle.load(f)
|
| 77 |
-
|
| 78 |
-
return VectorStore
|
| 79 |
|
| 80 |
# Utility function to load text from a PDF
|
| 81 |
def load_pdf_text(file_path):
|
|
@@ -119,6 +122,17 @@ def handle_no_answer(response):
|
|
| 119 |
"i cannot answer that",
|
| 120 |
"unable to provide an answer",
|
| 121 |
"not enough context",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
]
|
| 123 |
|
| 124 |
alternative_responses = [
|
|
@@ -184,12 +198,12 @@ def page1():
|
|
| 184 |
col1, col2 = st.columns(2)
|
| 185 |
|
| 186 |
with col1:
|
| 187 |
-
if st.button("
|
| 188 |
-
query = "
|
| 189 |
-
if st.button("
|
| 190 |
-
query = "
|
| 191 |
-
if st.button("
|
| 192 |
-
query = "
|
| 193 |
|
| 194 |
|
| 195 |
with col2:
|
|
|
|
| 39 |
repo.git_pull() # Pull the latest changes (if any)
|
| 40 |
|
| 41 |
# Step 2: Load the PDF File
|
| 42 |
+
pdf_path = "Private_Book/09012024_Kombi_2.pdf" # Replace with your PDF file path
|
| 43 |
|
| 44 |
# Step 2: Load the PDF File
|
| 45 |
pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" # Replace with your PDF file path
|
|
|
|
| 53 |
# Updated caching mechanism using st.cache_data
|
| 54 |
@st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
|
| 55 |
def load_vector_store(file_path, store_name, force_reload=False):
|
| 56 |
+
vector_store_path = f"{store_name}.pkl"
|
| 57 |
+
|
| 58 |
+
# Check if vector store already exists and force_reload is False
|
| 59 |
+
if not force_reload and os.path.exists(vector_store_path):
|
| 60 |
+
with open(vector_store_path, "rb") as f:
|
| 61 |
+
VectorStore = pickle.load(f)
|
| 62 |
+
else:
|
| 63 |
+
# Load and process the PDF, then create the vector store
|
| 64 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 65 |
+
chunk_size=1000, chunk_overlap=200, length_function=len)
|
| 66 |
+
text = load_pdf_text(file_path)
|
| 67 |
+
chunks = text_splitter.split_text(text=text)
|
| 68 |
+
embeddings = OpenAIEmbeddings()
|
| 69 |
+
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
|
| 70 |
+
|
| 71 |
+
# Serialize the vector store
|
| 72 |
+
with open(vector_store_path, "wb") as f:
|
| 73 |
+
pickle.dump(VectorStore, f)
|
| 74 |
+
|
| 75 |
+
# Commit and push changes to the repository
|
| 76 |
+
repo.git_add(vector_store_path)
|
| 77 |
+
repo.git_commit(f"Update vector store: {store_name}")
|
| 78 |
+
repo.git_push()
|
| 79 |
+
|
| 80 |
+
return VectorStore
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
# Utility function to load text from a PDF
|
| 84 |
def load_pdf_text(file_path):
|
|
|
|
| 122 |
"i cannot answer that",
|
| 123 |
"unable to provide an answer",
|
| 124 |
"not enough context",
|
| 125 |
+
"Sorry, I do not have enough information",
|
| 126 |
+
"I do not have enough information",
|
| 127 |
+
"I don't have enough information",
|
| 128 |
+
"Sorry, I don't have enough context to answer that question.",
|
| 129 |
+
"I don't have enough context to answer that question.",
|
| 130 |
+
"to answer that question.",
|
| 131 |
+
"Sorry",
|
| 132 |
+
"I'm sorry",
|
| 133 |
+
"I don't understand the question",
|
| 134 |
+
"I don't understand"
|
| 135 |
+
|
| 136 |
]
|
| 137 |
|
| 138 |
alternative_responses = [
|
|
|
|
| 198 |
col1, col2 = st.columns(2)
|
| 199 |
|
| 200 |
with col1:
|
| 201 |
+
if st.button("Welche Geräte müssen für die LG Geriatrie vorgehalten werden? "):
|
| 202 |
+
query = "Welche Geräte müssen für die LG Geriatrie vorgehalten werden? "
|
| 203 |
+
if st.button("Welche ärztlichen Vorgaben gibt es für die LG Palliativmedizin?"):
|
| 204 |
+
query = "Welche ärztlichen Vorgaben gibt es für die LG Palliativmedizin?"
|
| 205 |
+
if st.button("Wie haben sich die DiGA in den letzten Jahren entwickelt? Kannst du mir Daten nennen?"):
|
| 206 |
+
query = "Wie haben sich die DiGA in den letzten Jahren entwickelt? Kannst du mir Daten nennen?"
|
| 207 |
|
| 208 |
|
| 209 |
with col2:
|