Spaces:

Anne31415
/

Public_BookBot

Sleeping

App Files Files Community

Anne31415 commited on Feb 6, 2024

Commit

4a2f6f3

verified ·

1 Parent(s): 9a6164d

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -42

app.py CHANGED Viewed

@@ -16,12 +16,14 @@ from langchain.callbacks import get_openai_callback
 import os
 import uuid
 import json
 import pandas as pd
 import pydeck as pdk
 from urllib.error import URLError
 # Initialize session state variables
 if 'chat_history_page1' not in st.session_state:
     st.session_state['chat_history_page1'] = []
@@ -59,6 +61,8 @@ repo.git_pull()  # Pull the latest changes (if any)
 # Step 2: Load the PDF File
 pdf_path = "Private_Book/KH_Reform230124.pdf"  # Replace with your PDF file path
 pdf_path2 = "Private_Book/Buch_23012024.pdf"
@@ -68,6 +72,32 @@ pdf_path3 = "Private_Book/Kosten_Strukturdaten_RAG_vorbereited.pdf"
 api_key = os.getenv("OPENAI_API_KEY")
 # Retrieve the API key from st.secrets
 @st.cache_resource
@@ -115,6 +145,8 @@ def load_vector_store(file_path, store_name, force_reload=False):
     return VectorStore
 # Utility function to load text from a PDF
 def load_pdf_text(file_path):
     pdf_reader = PdfReader(file_path)
@@ -478,6 +510,8 @@ def page2():
 def page3():
     try:
         hide_streamlit_style = """
@@ -488,7 +522,7 @@ def page3():
                 """
         st.markdown(hide_streamlit_style, unsafe_allow_html=True)
-         # Create columns for layout
         col1, col2 = st.columns([3, 1])  # Adjust the ratio to your liking
         with col1:
@@ -499,14 +533,13 @@ def page3():
             image = Image.open('BinDoc Logo (Quadratisch).png')
             st.image(image, use_column_width='always')
-        if not os.path.exists(pdf_path2):
             st.error("File not found. Please check the file path.")
             return
-        VectorStore = load_vector_store(pdf_path3, "Kosten_Str_2301", force_reload=True)
         display_chat_history(st.session_state['chat_history_page3'])
@@ -524,51 +557,42 @@ def page3():
         col1, col2 = st.columns(2)
         with col1:
-            if st.button("Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"):
-                query = "Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"
-            if st.button("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?"):
-                query = ("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?")
-            if st.button("Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern?"):
-                query = "Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern? "
         with col2:
-            if st.button("Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"):
-                query = "Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"
-            if st.button("Welche Sachkosten werden in Krankenhäusern unterschieden?"):
-                query = "Welche Sachkosten werden in Krankenhäusern unterschieden? "
-            if st.button("Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"):
-                query = "Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"
         if query:
             full_query = ask_bot(query)
             st.session_state['chat_history_page3'].append(("User", query, "new"))
-            # Start timing
             start_time = time.time()
-            # Create a placeholder for the response time
-            response_time_placeholder = st.empty()
-            with st.spinner('Eve denkt über Ihre Frage nach...'):
-                chain = load_chatbot()
-                docs = VectorStore.similarity_search(query=query, k=5)
-                with get_openai_callback() as cb:
-                    response = chain.run(input_documents=docs, question=full_query)
-                    response = handle_no_answer(response)  # Process the response through the new function
-            # Stop timing
             end_time = time.time()
-            # Calculate duration
             duration = end_time - start_time
             st.session_state['chat_history_page3'].append(("Eve", response, "new"))
             # Combine chat histories from all pages
             all_chat_histories = [
                 st.session_state['chat_history_page1'],
@@ -579,7 +603,6 @@ def page3():
             # Save the combined chat histories
             save_conversation(all_chat_histories, st.session_state['session_id'])
             # Display new messages at the bottom
             new_messages = st.session_state['chat_history_page3'][-2:]
             for chat in new_messages:
@@ -589,7 +612,6 @@ def page3():
             # Update the response time placeholder after the messages are displayed
             response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
             # Clear the input field after the query is made
             query = ""
@@ -600,6 +622,9 @@ def page3():
         st.error(f"Upsi, an unexpected error occurred: {e}")
         # Optionally log the exception details to a file or error tracking service
 def page4():
     try:
         st.header(":mailbox: Kontakt & Feedback!")

 import os
 import uuid
 import json
 import pandas as pd
 import pydeck as pdk
 from urllib.error import URLError
+import chromadb
+client = chromadb.Client()
+collection = chroma_client.create_collection(name="Kosten_Strukturdaten")
 # Initialize session state variables
 if 'chat_history_page1' not in st.session_state:
     st.session_state['chat_history_page1'] = []
 # Step 2: Load the PDF File
 pdf_path = "Private_Book/KH_Reform230124.pdf"  # Replace with your PDF file path
 pdf_path2 = "Private_Book/Buch_23012024.pdf"
 api_key = os.getenv("OPENAI_API_KEY")
 # Retrieve the API key from st.secrets
+import chromadb
+# Corrected variable name for consistency
+chroma_client = chromadb.Client()
+# Create a collection for your embeddings
+collection_name = "Kosten_Strukturdaten"
+collection = chroma_client.create_collection(name=collection_name)
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    reader = PdfReader(pdf_path)
+    for page in reader.pages:
+        text += page.extract_text() + " "  # Concatenate text from each page
+    return text
+# Example usage
+pdf_text = extract_text_from_pdf(pdf_path3)
+# Add the extracted text from PDF to the Chroma collection
+collection.add(
+    documents=[pdf_text],
+    metadatas=[{"source": pdf_path3}],  # Add any relevant metadata for your document
+    ids=["Kosten_Strukturdaten")]
+)
 @st.cache_resource
     return VectorStore
 # Utility function to load text from a PDF
 def load_pdf_text(file_path):
     pdf_reader = PdfReader(file_path)
+# Correcting the indentation error and completing the CromA database integration in page3()
 def page3():
     try:
         hide_streamlit_style = """
                 """
         st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+        # Create columns for layout
         col1, col2 = st.columns([3, 1])  # Adjust the ratio to your liking
         with col1:
             image = Image.open('BinDoc Logo (Quadratisch).png')
             st.image(image, use_column_width='always')
+        if not os.path.exists(pdf_path3):
             st.error("File not found. Please check the file path.")
             return
+        # Initialize CromA client and collection
+        chroma_client = chromadb.Client()
+        collection = chroma_client.create_collection(name="Kosten_Strukturdaten")
         display_chat_history(st.session_state['chat_history_page3'])
         col1, col2 = st.columns(2)
         with col1:
+            if st.button("Test1"):
+                query = "Test1"
         with col2:
+            if st.button("Test2"):
+                query = "Test2"
+        # Handling query input
         if query:
             full_query = ask_bot(query)
             st.session_state['chat_history_page3'].append(("User", query, "new"))
+            # Start timing for response
             start_time = time.time()
+            # Querying the CromA collection
+            results = collection.query(
+                query_texts=[full_query],
+                n_results=5  # Adjust the number of results as needed
+            )
+            # Calculate the response duration
             end_time = time.time()
             duration = end_time - start_time
+            # Process and display response from CromA results
+            if results:
+                # TODO: Adjust the following logic based on CromA's actual result structure
+                response = f"Top result: {results[0]['text']}"  # Example response using the first result
+            else:
+                response = "No results found for your query."
             st.session_state['chat_history_page3'].append(("Eve", response, "new"))
             # Combine chat histories from all pages
             all_chat_histories = [
                 st.session_state['chat_history_page1'],
             # Save the combined chat histories
             save_conversation(all_chat_histories, st.session_state['session_id'])
             # Display new messages at the bottom
             new_messages = st.session_state['chat_history_page3'][-2:]
             for chat in new_messages:
             # Update the response time placeholder after the messages are displayed
             response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
             # Clear the input field after the query is made
             query = ""
         st.error(f"Upsi, an unexpected error occurred: {e}")
         # Optionally log the exception details to a file or error tracking service
 def page4():
     try:
         st.header(":mailbox: Kontakt & Feedback!")