Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
| 1 |
-
|
| 2 |
-
from
|
| 3 |
-
from
|
| 4 |
from langchain.text_splitter import CharacterTextSplitter
|
| 5 |
from langchain.prompts import PromptTemplate
|
| 6 |
from langchain.chains.question_answering import load_qa_chain
|
| 7 |
from datasets import load_dataset
|
| 8 |
import pandas as pd
|
| 9 |
from functools import lru_cache
|
| 10 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
| 11 |
-
import gradio as gr
|
| 12 |
from huggingface_hub import InferenceClient
|
|
|
|
| 13 |
|
| 14 |
# Initialize the Hugging Face Inference Client
|
| 15 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
@@ -18,26 +17,34 @@ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
| 18 |
dataset = load_dataset('arbml/LK_Hadith')
|
| 19 |
df = pd.DataFrame(dataset['train'])
|
| 20 |
|
| 21 |
-
# Filter data
|
| 22 |
filtered_df = df[df['Arabic_Grade'] != 'ุถุนูู']
|
| 23 |
documents = list(filtered_df['Arabic_Matn'])
|
| 24 |
metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
| 28 |
nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
|
| 29 |
|
| 30 |
-
# LLM
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
-
# Create an embedding model
|
| 34 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
|
| 35 |
|
|
|
|
| 36 |
docs_text = [doc.page_content for doc in nltk_chunks]
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
# Create Chroma vector store
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Question answering prompt template
|
| 43 |
qna_template = "\n".join([
|
|
@@ -134,16 +141,19 @@ def respond(
|
|
| 134 |
|
| 135 |
response = ""
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
# Gradio Chat Interface
|
| 149 |
demo = gr.ChatInterface(
|
|
@@ -162,5 +172,6 @@ demo = gr.ChatInterface(
|
|
| 162 |
],
|
| 163 |
)
|
| 164 |
|
|
|
|
| 165 |
if __name__ == "__main__":
|
| 166 |
demo.launch()
|
|
|
|
| 1 |
+
# Necessary imports
|
| 2 |
+
from langchain.vectorstores import Chroma
|
| 3 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 4 |
from langchain.text_splitter import CharacterTextSplitter
|
| 5 |
from langchain.prompts import PromptTemplate
|
| 6 |
from langchain.chains.question_answering import load_qa_chain
|
| 7 |
from datasets import load_dataset
|
| 8 |
import pandas as pd
|
| 9 |
from functools import lru_cache
|
|
|
|
|
|
|
| 10 |
from huggingface_hub import InferenceClient
|
| 11 |
+
import gradio as gr
|
| 12 |
|
| 13 |
# Initialize the Hugging Face Inference Client
|
| 14 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
|
| 17 |
dataset = load_dataset('arbml/LK_Hadith')
|
| 18 |
df = pd.DataFrame(dataset['train'])
|
| 19 |
|
| 20 |
+
# Filter data (Only retain Hadiths with non-weak grades)
|
| 21 |
filtered_df = df[df['Arabic_Grade'] != 'ุถุนูู']
|
| 22 |
documents = list(filtered_df['Arabic_Matn'])
|
| 23 |
metadatas = [{"Hadith_Grade": grade} for grade in filtered_df['Arabic_Grade']]
|
| 24 |
|
| 25 |
+
# Text splitter (using a smaller chunk size for memory efficiency)
|
| 26 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000)
|
| 27 |
nltk_chunks = text_splitter.create_documents(documents, metadatas=metadatas)
|
| 28 |
|
| 29 |
+
# LLM (Replace Ollama with a Hugging Face Hub model)
|
| 30 |
+
from langchain.llms import HuggingFaceHub
|
| 31 |
+
llm = HuggingFaceHub(repo_id="salmatrafi/acegpt:7b")
|
| 32 |
|
| 33 |
+
# Create an embedding model (Hugging Face transformer model for embeddings)
|
| 34 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
|
| 35 |
|
| 36 |
+
# Generate document embeddings
|
| 37 |
docs_text = [doc.page_content for doc in nltk_chunks]
|
| 38 |
+
try:
|
| 39 |
+
docs_embedding = embeddings.embed_documents(docs_text)
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Error in embedding generation: {str(e)}")
|
| 42 |
|
| 43 |
+
# Create Chroma vector store with embeddings
|
| 44 |
+
try:
|
| 45 |
+
vector_store = Chroma.from_documents(nltk_chunks, embedding=embeddings)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"Error in creating vector store: {str(e)}")
|
| 48 |
|
| 49 |
# Question answering prompt template
|
| 50 |
qna_template = "\n".join([
|
|
|
|
| 141 |
|
| 142 |
response = ""
|
| 143 |
|
| 144 |
+
try:
|
| 145 |
+
for msg in client.chat_completion(
|
| 146 |
+
messages,
|
| 147 |
+
max_tokens=max_tokens,
|
| 148 |
+
stream=True,
|
| 149 |
+
temperature=temperature,
|
| 150 |
+
top_p=top_p,
|
| 151 |
+
):
|
| 152 |
+
token = msg.choices[0].delta.content
|
| 153 |
+
response += token
|
| 154 |
+
yield response
|
| 155 |
+
except Exception as e:
|
| 156 |
+
yield f"An error occurred during chat completion: {str(e)}"
|
| 157 |
|
| 158 |
# Gradio Chat Interface
|
| 159 |
demo = gr.ChatInterface(
|
|
|
|
| 172 |
],
|
| 173 |
)
|
| 174 |
|
| 175 |
+
# Launch the Gradio interface
|
| 176 |
if __name__ == "__main__":
|
| 177 |
demo.launch()
|