Spaces:
Runtime error
Runtime error
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings | |
| import os | |
| import time | |
| import streamlit as st | |
| def embed_doc(filename): | |
| if len(os.listdir("."))>0: | |
| loader=PyPDFLoader(filename) | |
| start = time.time() | |
| raw_documents = loader.load() | |
| # Split text | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=0, | |
| length_function=len | |
| ) | |
| documents = text_splitter.split_documents(raw_documents) | |
| end = time.time() | |
| st.text("Load and split text: "+str(round(end - start,1))) | |
| start = time.time() | |
| embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base") | |
| end = time.time() | |
| st.text("Embedding time: "+str(round(end - start,1))) | |
| start = time.time() | |
| vectorstore = Chroma.from_documents(documents, embeddings) | |
| end = time.time() | |
| st.text("Vectorizing time: "+str(round(end - start,1))) | |
| return vectorstore | |