Spaces:

Pixeltable
/

AI-Chatbot-With-Retrieval-Augmented-Generation

Running

App Files Files Community

PierreBrunelle commited on Oct 7, 2024

Commit

210dd42

verified ·

1 Parent(s): 0e0864e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -27

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ def process_files(pdf_files, chunk_limit, chunk_separator):
     {'document': pxt.DocumentType(nullable=True),
      'question': pxt.StringType(nullable=True)}
     )
     # Insert the PDF files into the documents table
     t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))
@@ -64,19 +64,16 @@ def process_files(pdf_files, chunk_limit, chunk_separator):
     # Add an embedding index to the chunks for similarity search
     chunks_t.add_embedding_index('text', string_embed=e5_embed)
-    try:
-      @chunks_t.query
-      def top_k(query_text: str):
-          sim = chunks_t.text.similarity(query_text)
-          return (
-              chunks_t.order_by(sim, asc=False)
-                  .select(chunks_t.text, sim=sim)
-                  .limit(5)
-          )
-    except Exception:
-      pass
-     # Add computed columns to the table for context retrieval and prompt creation
     t['question_context'] = chunks_t.top_k(t.question)
     t['prompt'] = create_prompt(
         t.question_context, t.question
@@ -115,11 +112,16 @@ def get_answer(msg):
     # Insert the question into the table
     t.insert([{'question': msg}])
-    answer = t.select(t.gpt4omini).tail(1)['gpt4omini'][0]
     return answer
 # Gradio interface
 with gr.Blocks(theme=Monochrome()) as demo:
     gr.Markdown(
@@ -139,9 +141,9 @@ with gr.Blocks(theme=Monochrome()) as demo:
     )
     with gr.Row():
-        with gr.Column():
             pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
-            chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit (only used when the separator is token_/char_limit)")
             chunk_separator = gr.Dropdown(
                 choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
                 value="token_limit",
@@ -150,18 +152,13 @@ with gr.Blocks(theme=Monochrome()) as demo:
             process_button = gr.Button("Process Files")
             process_output = gr.Textbox(label="Processing Output")
-        with gr.Column():
             chatbot = gr.Chatbot(label="Chat History")
-            msg = gr.Textbox(label="Your Question")
             submit = gr.Button("Submit")
-    def respond(message, chat_history):
-        bot_message = get_answer(message)
-        chat_history.append((message, bot_message))
-        return "", chat_history
-    submit.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
     process_button.click(process_files, inputs=[pdf_files, chunk_limit, chunk_separator], outputs=[process_output])
 if __name__ == "__main__":
-    demo.launch(debug=True)

     {'document': pxt.DocumentType(nullable=True),
      'question': pxt.StringType(nullable=True)}
     )
     # Insert the PDF files into the documents table
     t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))
     # Add an embedding index to the chunks for similarity search
     chunks_t.add_embedding_index('text', string_embed=e5_embed)
+    @chunks_t.query
+    def top_k(query_text: str):
+        sim = chunks_t.text.similarity(query_text)
+        return (
+            chunks_t.order_by(sim, asc=False)
+                .select(chunks_t.text, sim=sim)
+                .limit(5)
+        )
+    # Add computed columns to the table for context retrieval and prompt creation
     t['question_context'] = chunks_t.top_k(t.question)
     t['prompt'] = create_prompt(
         t.question_context, t.question
     # Insert the question into the table
     t.insert([{'question': msg}])
+    answer = t.select(t.gpt4omini).where(t.question == msg).collect()['gpt4omini'][0]
     return answer
+def respond(message, chat_history):
+    bot_message = get_answer(message)
+    chat_history.append((message, bot_message))
+    return "", chat_history
 # Gradio interface
 with gr.Blocks(theme=Monochrome()) as demo:
     gr.Markdown(
     )
     with gr.Row():
+        with gr.Column(scale=1):
             pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
+            chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit")
             chunk_separator = gr.Dropdown(
                 choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
                 value="token_limit",
             process_button = gr.Button("Process Files")
             process_output = gr.Textbox(label="Processing Output")
+        with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Chat History")
+            msg = gr.Textbox(label="Your Question", placeholder="Ask a question about the uploaded documents")
             submit = gr.Button("Submit")
     process_button.click(process_files, inputs=[pdf_files, chunk_limit, chunk_separator], outputs=[process_output])
+    submit.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
 if __name__ == "__main__":
+    demo.launch()