Spaces:
Paused
Paused
| from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, WebBaseLoader | |
| from langchain_community.vectorstores import Qdrant | |
| import os | |
| def process_file(file_or_url): | |
| if isinstance(file_or_url, str) and file_or_url.startswith(('http://', 'https://')): | |
| # Handle URL | |
| loader = WebBaseLoader(file_or_url) | |
| docs = loader.load() | |
| documents.extend(docs) | |
| # save the file temporarily | |
| temp_file = "./"+file_or_url.path | |
| with open(temp_file, "wb") as file: | |
| file.write(file_or_url.content) | |
| file_name = file_or_url.name | |
| documents = [] | |
| if file_or_url.path.endswith(".pdf"): | |
| loader = PyMuPDFLoader(temp_file) | |
| docs = loader.load() | |
| documents.extend(docs) | |
| else: | |
| loader = TextLoader(temp_file) | |
| docs = loader.load() | |
| documents.extend(docs) | |
| return documents | |
| def add_to_qdrant(documents, embeddings, qdrant_client, collection_name): | |
| Qdrant.from_documents( | |
| documents, | |
| embeddings, | |
| url=qdrant_client.url, | |
| prefer_grpc=True, | |
| collection_name=collection_name, | |
| ) |