Spaces:
Build error
Build error
| import re | |
| from txtai import Embeddings, LLM | |
| import gradio as gr | |
| def cot(system, user): | |
| system = f""" | |
| {system} | |
| You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries. Follow these steps: | |
| 1. Think through the problem step by step within the <thinking> tags. | |
| 2. Reflect on your thinking to check for any errors or improvements within the <reflection> tags. | |
| 3. Make any necessary adjustments based on your reflection. | |
| 4. Provide your final, concise answer within the <output> tags. | |
| Important: The <thinking> and <reflection> sections are for your internal reasoning process only. | |
| Do not include any part of the final answer in these sections. | |
| The actual response to the query must be entirely contained within the <output> tags. | |
| Use the following format for your response: | |
| <thinking> | |
| [Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.] | |
| <reflection> | |
| [Your reflection on your reasoning, checking for errors or improvements] | |
| </reflection> | |
| [Any adjustments to your thinking based on your reflection] | |
| </thinking> | |
| <output> | |
| [Your final, concise answer to the query. This is the only part that will be shown to the user.] | |
| </output> | |
| """ | |
| response = llm( | |
| [ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user}, | |
| ], | |
| maxlength=4096, | |
| ) | |
| match = re.search(r"<output>(.*?)(?:</output>|$)", response, re.DOTALL) | |
| return match.group(1).strip() if match else response | |
| def rag(question): | |
| prompt = """ | |
| Answer the following question using only the context below. Only include information | |
| specifically discussed. | |
| question: {question} | |
| context: {context} | |
| """ | |
| system = "You are a friendly assistant. You answer questions from users." | |
| context = "\n".join([x["text"] for x in embeddings.search(question)]) | |
| return cot(system, prompt.format(question=question, context=context)) | |
| embeddings = Embeddings() | |
| embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia") | |
| llm = LLM("hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4", gpu=True) | |
| def predict(message, history): | |
| response = rag(message) | |
| return response | |
| gr.ChatInterface( | |
| predict, | |
| title="txtai Reflection Chatbot", | |
| description="A chatbot that uses Chain of Thought (CoT) with self-reflection to answer queries.", | |
| ).launch() | |