Spaces:

HanLee
/

Demo

Sleeping

App Files Files Community

HanLee commited on Nov 25, 2023

Commit

ceaa8ef

1 Parent(s): 4a49d79

feat: 01_06 End

Browse files

Files changed (2) hide show

app/app.py +2 -200
app/prompt.py +0 -26

app/app.py CHANGED Viewed

@@ -1,207 +1,9 @@
-# Chroma compatibility issue resolution
-# https://docs.trychroma.com/troubleshooting#sqlite
-__import__('pysqlite3')
-import sys
-sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
-from tempfile import NamedTemporaryFile
 import chainlit as cl
-from chainlit.types import AskFileResponse
-import chromadb
-from chromadb.config import Settings
-from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
-from langchain.chains.base import Chain
-from langchain.chat_models import ChatOpenAI
-from langchain.document_loaders import PDFPlumberLoader
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.vectorstores import Chroma
-from langchain.vectorstores.base import VectorStore
-from prompt import EXAMPLE_PROMPT, PROMPT, WELCOME_MESSAGE
-namespaces = set()
-def process_file(*, file: AskFileResponse) -> list:
-    if file.type != "application/pdf":
-        raise TypeError("Only PDF files are supported")
-    with NamedTemporaryFile() as tempfile:
-        tempfile.write(file.content)
-        ######################################################################
-        #
-        # 1. Load the PDF
-        #
-        ######################################################################
-        loader = PDFPlumberLoader(tempfile.name)
-        ######################################################################
-        documents = loader.load()
-        ######################################################################
-        #
-        # 2. Split the text
-        #
-        ######################################################################
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=3000,
-            chunk_overlap=100
-        )
-        ######################################################################
-        docs = text_splitter.split_documents(documents)
-        for i, doc in enumerate(docs):
-            doc.metadata["source"] = f"source_{i}"
-        if not docs:
-            raise ValueError("PDF file parsing failed.")
-        return docs
-def create_search_engine(*, file: AskFileResponse) -> VectorStore:
-    # Process and save data in the user session
-    docs = process_file(file=file)
-    cl.user_session.set("docs", docs)
-    ##########################################################################
-    #
-    # 3. Set the Encoder model for creating embeddings
-    #
-    ##########################################################################
-    encoder = OpenAIEmbeddings(
-        model="text-embedding-ada-002"
-    )
-    ##########################################################################
-    # Initialize Chromadb client and settings, reset to ensure we get a clean
-    # search engine
-    client = chromadb.EphemeralClient()
-    client_settings=Settings(
-        allow_reset=True,
-        anonymized_telemetry=False
-    )
-    search_engine = Chroma(
-        client=client,
-        client_settings=client_settings
-    )
-    search_engine._client.reset()
-    ##########################################################################
-    #
-    # 4. Create the document search engine. Remember to add
-    # client_settings using the above settings.
-    #
-    ##########################################################################
-    search_engine = Chroma.from_documents(
-        client=client,
-        documents=docs,
-        embedding=encoder,
-        client_settings=client_settings
-    )
-    ##########################################################################
-    return search_engine
-@cl.on_chat_start
-async def start():
-    files = None
-    while files is None:
-        files = await cl.AskFileMessage(
-            content=WELCOME_MESSAGE,
-            accept=["application/pdf"],
-            max_size_mb=20,
-        ).send()
-    file = files[0]
-    msg = cl.Message(content=f"Processing `{file.name}`...")
-    await msg.send()
-    try:
-        search_engine = await cl.make_async(create_search_engine)(file=file)
-    except Exception as e:
-        await cl.Message(content=f"Error: {e}").send()
-        raise SystemError
-    llm = ChatOpenAI(
-        model='gpt-3.5-turbo-16k-0613',
-        temperature=0,
-        streaming=True
-    )
-    ##########################################################################
-    #
-    # 5. Create the chain / tool for RetrievalQAWithSourcesChain.
-    #
-    ##########################################################################
-    chain = RetrievalQAWithSourcesChain.from_chain_type(
-        llm=llm,
-        chain_type="stuff",
-        retriever=search_engine.as_retriever(max_tokens_limit=4097),
-        ######################################################################
-        # 6. Customize prompts to improve summarization and question
-        # answering performance. Perhaps create your own prompt in prompts.py?
-        ######################################################################
-        chain_type_kwargs={
-            "prompt": PROMPT,
-            "document_prompt": EXAMPLE_PROMPT
-        },
-    )
-    ##########################################################################
-    # await msg.update(content=f"`{file.name}` processed. You can now ask questions!")
-    msg.content = f"`{file.name}` processed. You can now ask questions!"
-    await msg.update()
-    cl.user_session.set("chain", chain)
 @cl.on_message
 async def main(message: cl.Message):
-    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
-    cb = cl.AsyncLangchainCallbackHandler()
-    response = await chain.acall(message.content, callbacks=[cb])
-    answer = response["answer"]
-    sources = response["sources"].strip()
-    source_elements = []
-    # Get the documents from the user session
-    docs = cl.user_session.get("docs")
-    metadatas = [doc.metadata for doc in docs]
-    all_sources = [m["source"] for m in metadatas]
-    # Adding sources to the answer
-    if sources:
-        found_sources = []
-        # Add the sources to the message
-        for source in sources.split(","):
-            source_name = source.strip().replace(".", "")
-            # Get the index of the source
-            try:
-                index = all_sources.index(source_name)
-            except ValueError:
-                continue
-            text = docs[index].page_content
-            found_sources.append(source_name)
-            # Create the text element referenced in the message
-            source_elements.append(cl.Text(content=text, name=source_name))
-        if found_sources:
-            answer += f"\nSources: {', '.join(found_sources)}"
-        else:
-            answer += "\nNo sources found"
-    await cl.Message(content=answer, elements=source_elements).send()

 import chainlit as cl
 @cl.on_message
 async def main(message: cl.Message):
+    response = message.content
+    await cl.Message(content=response).send()

app/prompt.py DELETED Viewed

@@ -1,26 +0,0 @@
-# flake8: noqa
-from langchain.prompts import PromptTemplate
-WELCOME_MESSAGE = """\
-Welcome to Introduction to LLM App Development Sample PDF QA Application!
-To get started:
-1. Upload a PDF or text file
-2. Ask any question about the file!
-"""
-template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
-If you don't know the answer, just say that you don't know. Don't try to make up an answer.
-ALWAYS return a "SOURCES" field in your answer, with the format "SOURCES: <source1>, <source2>, <source3>, ...".
-QUESTION: {question}
-=========
-{summaries}
-=========
-FINAL ANSWER:"""
-PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])
-EXAMPLE_PROMPT = PromptTemplate(
-    template="Content: {page_content}\nSource: {source}",
-    input_variables=["page_content", "source"],
-)