Spaces:
Sleeping
Sleeping
| from langchain.document_loaders import DirectoryLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| import os | |
| import pinecone | |
| from langchain.vectorstores import Pinecone | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') | |
| PINECONE_ENV = os.getenv('PINECONE_ENV') | |
| OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
| os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
| def embedding_db(): | |
| embeddings = OpenAIEmbeddings() | |
| # Using 'from pinecone import Pinecone' explicitly: | |
| from pinecone import Pinecone | |
| pc = Pinecone( | |
| api_key=PINECONE_API_KEY, | |
| environment=PINECONE_ENV | |
| ) | |
| docs_split = doc_preprocessing() # Make sure this function is defined | |
| doc_db = Pinecone.from_documents( | |
| docs_split, | |
| embeddings, | |
| index_name='langchain-demo-indexes', | |
| client=pc | |
| ) | |
| return doc_db | |
| def doc_preprocessing(): | |
| loader = DirectoryLoader( | |
| 'data/', | |
| glob='**/*.pdf', | |
| show_progress=True | |
| ) # Extra closing parenthesis removed | |
| docs = loader.load() | |
| text_splitter = CharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=0 | |
| ) | |
| docs_split = text_splitter.split_documents(docs) | |
| return docs_split | |
| def retrieval_answer(query): | |
| chat_model = ChatOpenAI() | |
| qa = RetrievalQA.from_chain_type( | |
| llm=chat_model, | |
| chain_type='stuff', | |
| retriever=doc_db.as_retriever(), | |
| ) | |
| result = qa.run(query) | |
| return result | |
| def main(): | |
| st.title("Question and Answering App powered by LLM and Pinecone") | |
| text_input = st.text_input("Ask your query...") | |
| if st.button("Ask Query"): | |
| if len(text_input) > 0: | |
| st.info("Your Query: " + text_input) | |
| # Potential loading message | |
| with st.spinner("Processing your query..."): | |
| doc_db = embedding_db() # Create the embedding database | |
| answer = retrieval_answer(text_input) | |
| st.success(answer) | |
| if __name__ == "__main__": | |
| main() |