Spaces:
Runtime error
Runtime error
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import UnstructuredFileLoader, CSVLoader | |
| from langchain.vectorstores.faiss import FAISS | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma, Pinecone | |
| import pickle | |
| import pinecone | |
| # Load Data | |
| # loader = UnstructuredFileLoader("output.md") | |
| # raw_documents = loader.load() | |
| loader = CSVLoader(file_path='./posts.csv', source_column="Post Title", encoding='utf-8') | |
| raw_documents = loader.load() | |
| # Split text | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=0) | |
| documents = text_splitter.split_documents(raw_documents) | |
| # | |
| # # Load Data to vectorstore | |
| embeddings = OpenAIEmbeddings() | |
| # vectorstore = FAISS.from_documents(documents, embeddings) | |
| # # Save vectorstore | |
| # with open("posts.pkl", "wb") as f: | |
| # pickle.dump(vectorstore, f) | |
| PINECONE_API_KEY = '6af52b8a-a3df-4189-899b-b21163027bb8' | |
| PINECONE_API_ENV = 'asia-southeast1-gcp' | |
| # initialize pinecone | |
| pinecone.init( | |
| api_key=PINECONE_API_KEY, # find at app.pinecone.io | |
| environment=PINECONE_API_ENV # next to api key in console | |
| ) | |
| index_name = "twimbit-answer" | |
| Pinecone.from_texts([t.page_content for t in documents], embeddings, index_name=index_name) | |
| # query = "How many neo banks are in india ?" | |
| # | |
| # docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings) | |
| # | |
| # docs = docsearch.similarity_search(query, include_metadata=True) | |