Spaces:
Paused
Paused
Commit
·
b745365
1
Parent(s):
cc60679
file loading testing
Browse files- app.py +4 -3
- helper_functions.py +12 -20
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import chainlit as cl
|
| 2 |
-
from helper_functions import process_file, load_documents_from_url,
|
| 3 |
import models
|
| 4 |
import agents
|
| 5 |
import asyncio
|
|
@@ -44,7 +44,7 @@ async def main(message: cl.Message):
|
|
| 44 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
| 45 |
|
| 46 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
| 47 |
-
|
| 48 |
except Exception as e:
|
| 49 |
await cl.Message(f"Error processing the document: {e}").send()
|
| 50 |
else:
|
|
@@ -85,7 +85,8 @@ async def handle_response(res):
|
|
| 85 |
await msg.send()
|
| 86 |
|
| 87 |
# load the file
|
| 88 |
-
|
|
|
|
| 89 |
await cl.Message(content="loaded docs").send()
|
| 90 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
| 91 |
await cl.Message(content="split docs").send()
|
|
|
|
| 1 |
import chainlit as cl
|
| 2 |
+
from helper_functions import process_file, load_documents_from_url, store_uploaded_file
|
| 3 |
import models
|
| 4 |
import agents
|
| 5 |
import asyncio
|
|
|
|
| 44 |
await asyncio.to_thread(qdrant_store.add_documents, splits)
|
| 45 |
|
| 46 |
await cl.Message(f"Processing `{message.content}` done. You can now ask questions!").send()
|
| 47 |
+
|
| 48 |
except Exception as e:
|
| 49 |
await cl.Message(f"Error processing the document: {e}").send()
|
| 50 |
else:
|
|
|
|
| 85 |
await msg.send()
|
| 86 |
|
| 87 |
# load the file
|
| 88 |
+
file_path = store_uploaded_file(file)
|
| 89 |
+
docs = await asyncio.to_thread(process_file, file_path)
|
| 90 |
await cl.Message(content="loaded docs").send()
|
| 91 |
splits = await asyncio.to_thread(models.semanticChunker_tuned.split_documents, docs)
|
| 92 |
await cl.Message(content="split docs").send()
|
helper_functions.py
CHANGED
|
@@ -8,32 +8,24 @@ from langchain_core.language_models import BaseLanguageModel
|
|
| 8 |
import os
|
| 9 |
import functools
|
| 10 |
import requests
|
|
|
|
| 11 |
|
| 12 |
-
def
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# Determine the file type and load it accordingly
|
| 21 |
-
if uploaded_file.name.endswith(".pdf"):
|
| 22 |
# Load PDF with PyMuPDFLoader
|
| 23 |
-
loader = PyMuPDFLoader(
|
| 24 |
-
elif
|
| 25 |
# Load text file with TextLoader
|
| 26 |
-
loader = TextLoader(
|
| 27 |
else:
|
| 28 |
raise ValueError("Unsupported file format. Only PDF and TXT are supported.")
|
| 29 |
|
| 30 |
-
|
| 31 |
-
documents = loader.load()
|
| 32 |
-
|
| 33 |
-
# Clean up the temporary file
|
| 34 |
-
os.remove(temp_file_path)
|
| 35 |
-
|
| 36 |
-
return documents
|
| 37 |
|
| 38 |
def load_documents_from_url(url):
|
| 39 |
try:
|
|
|
|
| 8 |
import os
|
| 9 |
import functools
|
| 10 |
import requests
|
| 11 |
+
from chainlit.types import AskFileResponse
|
| 12 |
|
| 13 |
+
def store_uploaded_file(uploaded_file: AskFileResponse):
|
| 14 |
+
file_path = f"./tmp/{uploaded_file.name}"
|
| 15 |
+
open(file_path, "wb").write(uploaded_file.content)
|
| 16 |
+
return file_path
|
| 17 |
+
|
| 18 |
+
def process_file(file_path):
|
| 19 |
+
if file_path.endswith(".pdf"):
|
|
|
|
|
|
|
|
|
|
| 20 |
# Load PDF with PyMuPDFLoader
|
| 21 |
+
loader = PyMuPDFLoader(file_path)
|
| 22 |
+
elif file_path.endswith(".txt"):
|
| 23 |
# Load text file with TextLoader
|
| 24 |
+
loader = TextLoader(file_path)
|
| 25 |
else:
|
| 26 |
raise ValueError("Unsupported file format. Only PDF and TXT are supported.")
|
| 27 |
|
| 28 |
+
return loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def load_documents_from_url(url):
|
| 31 |
try:
|