Spaces:

ai-maker-space
/

ChatLGO

Paused

App Files Files Community

CSAle commited on Dec 4, 2023

Commit

5795fcf

1 Parent(s): 2266398

Releasing ChatLGONoData

Browse files

Files changed (4) hide show

Dockerfile +11 -0
app.py +108 -0
chainlit.md +11 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+from typing import List
+import chainlit as cl
+from llama_index.callbacks.base import CallbackManager
+from llama_index import (
+    ServiceContext,
+    StorageContext,
+    load_index_from_storage,
+)
+from llama_index.llms import OpenAI
+from llama_index.postprocessor.cohere_rerank import CohereRerank
+from llama_index.tools import QueryEngineTool, ToolMetadata
+from llama_index.query_engine import SubQuestionQueryEngine
+from llama_index.embeddings import HuggingFaceEmbedding
+from chainlit.types import AskFileResponse
+from llama_index import download_loader
+print("Loading Storage Context...")
+storage_context = StorageContext.from_defaults(persist_dir="index/")
+print("Loading Index...")
+index = load_index_from_storage(storage_context)
+def process_file(file: AskFileResponse):
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
+        with open(tempfile.name, "wb") as f:
+            f.write(file.content)
+    PDFReader = download_loader("PDFReader")
+    loader = PDFReader()
+    documents = loader.load_data(tempfile.name)
+    return documents
+@cl.on_chat_start
+async def on_chat_start():
+    files = None
+    # Wait for the user to upload a file
+    while files == None:
+        files = await cl.AskFileMessage(
+            content="Please upload a PDF file to begin!",
+            accept=["application/pdf"],
+            max_size_mb=20,
+            timeout=180,
+        ).send()
+    file = files[0]
+    msg = cl.Message(
+        content=f"Processing `{file.name}`...", disable_human_feedback=True
+    )
+    await msg.send()
+    # load the file
+    documents = process_file(file)
+    index = await cl.make_async(index.add_documents)(documents)
+    llm = OpenAI(model="gpt-4-1106-preview", temperature=0)
+    embed_model = HuggingFaceEmbedding(model_name="ai-maker-space/chatlgo-finetuned")
+    service_context = ServiceContext.from_defaults(
+        embed_model=embed_model,
+        llm=llm,
+    )
+    cohere_rerank = CohereRerank(top_n=5)
+    query_engine = index.as_query_engine(
+        similarity_top_k=10,
+        node_postprocessors=[cohere_rerank],
+        service_context=service_context,
+    )
+    query_engine_tools = [
+        QueryEngineTool(
+            query_engine=query_engine,
+            metadata=ToolMetadata(
+                name="mit_theses",
+                description="A collection of MIT theses.",
+            ),
+        ),
+    ]
+    query_engine = SubQuestionQueryEngine.from_defaults(
+        query_engine_tools=query_engine_tools,
+        service_context=service_context,
+    )
+    cl.user_session.set("query_engine", query_engine)
+@cl.on_message
+async def main(message: cl.Message):
+    query_engine = cl.user_session.get("query_engine")
+    response = await cl.make_async(query_engine.query)(message.content)
+    response_message = cl.Message(content=str(response))
+    await response_message.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,11 @@

+# Welcome to ChatLGO
+In this space we'll examine a chat application that is able to query a large repository of LGO Theses using a combination of tools provided through the LlamaIndex library.
+We'll be leveraging:
+- [Fine-tuned Embedding Model](https://huggingface.co/ai-maker-space/chatlgo-finetuned)
+- [Cohere's Reranking Service](https://cohere.com/rerank)
+- [Sub Query Query Engine](https://docs.llamaindex.ai/en/stable/examples/query_engine/sub_question_query_engine.html)
+This should enable us to retrieve documents with expanded context generated by our LLM!

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+tiktoken
+chainlit
+openai
+llamaindex