Spaces:
Build error
Build error
Added history
Browse files
run.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
# Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
|
| 3 |
# Author: Andreas Fischer
|
| 4 |
# Date: December 30th, 2023
|
| 5 |
-
# Last update:
|
| 6 |
##############################################################################################################
|
| 7 |
|
| 8 |
|
|
@@ -57,14 +57,53 @@ print(collection.count())
|
|
| 57 |
|
| 58 |
# Model
|
| 59 |
#-------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
# Gradio-GUI
|
|
@@ -72,19 +111,30 @@ client = InferenceClient(
|
|
| 72 |
|
| 73 |
import gradio as gr
|
| 74 |
import json
|
|
|
|
| 75 |
|
| 76 |
-
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=
|
| 77 |
startOfString=""
|
| 78 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
| 79 |
template0=" [INST]{system}\n [/INST] </s>"
|
| 80 |
template1=" [INST] {message} [/INST]"
|
| 81 |
template2=" {response}</s>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
| 83 |
template0="<start_of_turn>user{system}</end_of_turn>"
|
| 84 |
template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
|
| 85 |
-
template2="{response}</end_of_turn>"
|
| 86 |
-
if("Mixtral-
|
| 87 |
startOfString="<s>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
template0=" [INST]{system}\n [/INST] </s>"
|
| 89 |
template1=" [INST] {message} [/INST]"
|
| 90 |
template2=" {response}</s>"
|
|
@@ -100,7 +150,11 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
| 100 |
if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
| 101 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
| 102 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 103 |
-
template2="{response}<|im_end|>\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
|
| 105 |
template0="{system} " #<s>
|
| 106 |
template1="USER: {message} ASSISTANT: "
|
|
@@ -128,8 +182,9 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
| 128 |
return startOfString+prompt
|
| 129 |
|
| 130 |
|
|
|
|
| 131 |
def response(
|
| 132 |
-
|
| 133 |
):
|
| 134 |
temperature = float(temperature)
|
| 135 |
if temperature < 1e-2: temperature = 1e-2
|
|
@@ -144,7 +199,7 @@ def response(
|
|
| 144 |
)
|
| 145 |
addon=""
|
| 146 |
results=collection.query(
|
| 147 |
-
query_texts=[
|
| 148 |
n_results=2,
|
| 149 |
#where={"source": "google-docs"}
|
| 150 |
#where_document={"$contains":"search_string"}
|
|
@@ -157,9 +212,18 @@ def response(
|
|
| 157 |
print(combination)
|
| 158 |
if(len(results)>1):
|
| 159 |
addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
|
| 160 |
-
system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."
|
| 161 |
#body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
|
| 162 |
-
formatted_prompt = extend_prompt(system+"\n"+prompt, None) #history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
| 164 |
output = ""
|
| 165 |
for response in stream:
|
|
|
|
| 2 |
# Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
|
| 3 |
# Author: Andreas Fischer
|
| 4 |
# Date: December 30th, 2023
|
| 5 |
+
# Last update: May 27th, 2024
|
| 6 |
##############################################################################################################
|
| 7 |
|
| 8 |
|
|
|
|
| 57 |
|
| 58 |
# Model
|
| 59 |
#-------
|
| 60 |
+
onPrem=False
|
| 61 |
+
myModel="mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 62 |
+
if(onPrem==False):
|
| 63 |
+
modelPath=myModel
|
| 64 |
+
from huggingface_hub import InferenceClient
|
| 65 |
+
import gradio as gr
|
| 66 |
+
client = InferenceClient(
|
| 67 |
+
model=modelPath,
|
| 68 |
+
#token="hf_..."
|
| 69 |
+
)
|
| 70 |
+
else:
|
| 71 |
+
import os
|
| 72 |
+
import requests
|
| 73 |
+
import subprocess
|
| 74 |
+
#modelPath="/home/af/gguf/models/c4ai-command-r-v01-Q4_0.gguf"
|
| 75 |
+
#modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
|
| 76 |
+
modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
| 77 |
+
if(os.path.exists(modelPath)==False):
|
| 78 |
+
#url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
|
| 79 |
+
url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
| 80 |
+
response = requests.get(url)
|
| 81 |
+
with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
|
| 82 |
+
file.write(response.content)
|
| 83 |
+
print("Model downloaded")
|
| 84 |
+
modelPath="./Mixtral-8x7b-instruct.gguf"
|
| 85 |
+
print(modelPath)
|
| 86 |
+
n="20"
|
| 87 |
+
if("Mixtral-8x7b-instruct" in modelPath): n="0" # mixtral seems to cause problems here...
|
| 88 |
+
command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "8", "--n_gpu_layers", n]
|
| 89 |
+
subprocess.Popen(command)
|
| 90 |
+
print("Server ready!")
|
| 91 |
|
| 92 |
+
|
| 93 |
+
# Check template
|
| 94 |
+
#----------------
|
| 95 |
+
if(False):
|
| 96 |
+
from transformers import AutoTokenizer
|
| 97 |
+
#mod="mistralai/Mixtral-8x22B-Instruct-v0.1"
|
| 98 |
+
#mod="mistralai/Mixtral-8x7b-instruct-v0.1"
|
| 99 |
+
mod="VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct"
|
| 100 |
+
tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
|
| 101 |
+
cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
|
| 102 |
+
res=tok.apply_chat_template(cha)
|
| 103 |
+
print(tok.decode(res))
|
| 104 |
+
cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
|
| 105 |
+
res=tok.apply_chat_template(cha)
|
| 106 |
+
print(tok.decode(res))
|
| 107 |
|
| 108 |
|
| 109 |
# Gradio-GUI
|
|
|
|
| 111 |
|
| 112 |
import gradio as gr
|
| 113 |
import json
|
| 114 |
+
import re
|
| 115 |
|
| 116 |
+
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
|
| 117 |
startOfString=""
|
| 118 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
| 119 |
template0=" [INST]{system}\n [/INST] </s>"
|
| 120 |
template1=" [INST] {message} [/INST]"
|
| 121 |
template2=" {response}</s>"
|
| 122 |
+
if("command-r" in modelPath): #https://huggingface.co/CohereForAI/c4ai-command-r-v01
|
| 123 |
+
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
| 124 |
+
template0="<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>"
|
| 125 |
+
template1="<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
|
| 126 |
+
template2="{response}<|END_OF_TURN_TOKEN|>"
|
| 127 |
if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
| 128 |
template0="<start_of_turn>user{system}</end_of_turn>"
|
| 129 |
template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
|
| 130 |
+
template2="{response}</end_of_turn>"
|
| 131 |
+
if("Mixtral-8x22B-Instruct" in modelPath): # AutoTokenizer: <s>[INST] U1[/INST] A1</s>[INST] U2[/INST] A2</s>
|
| 132 |
startOfString="<s>"
|
| 133 |
+
template0="[INST]{system}\n [/INST] </s>"
|
| 134 |
+
template1="[INST] {message}[/INST]"
|
| 135 |
+
template2=" {response}</s>"
|
| 136 |
+
if("Mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
| 137 |
+
startOfString="<s>" # AutoTokenzizer: <s> [INST] U1 [/INST]A1</s> [INST] U2 [/INST]A2</s>
|
| 138 |
template0=" [INST]{system}\n [/INST] </s>"
|
| 139 |
template1=" [INST] {message} [/INST]"
|
| 140 |
template2=" {response}</s>"
|
|
|
|
| 150 |
if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
| 151 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
| 152 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 153 |
+
template2="{response}<|im_end|>\n"
|
| 154 |
+
if("Llama-3-SauerkrautLM-8b-Instruct" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
| 155 |
+
template0="<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>"
|
| 156 |
+
template1="<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
| 157 |
+
template2="{response}<|eot_id|>\n"
|
| 158 |
if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
|
| 159 |
template0="{system} " #<s>
|
| 160 |
template1="USER: {message} ASSISTANT: "
|
|
|
|
| 182 |
return startOfString+prompt
|
| 183 |
|
| 184 |
|
| 185 |
+
|
| 186 |
def response(
|
| 187 |
+
message, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
|
| 188 |
):
|
| 189 |
temperature = float(temperature)
|
| 190 |
if temperature < 1e-2: temperature = 1e-2
|
|
|
|
| 199 |
)
|
| 200 |
addon=""
|
| 201 |
results=collection.query(
|
| 202 |
+
query_texts=[message],
|
| 203 |
n_results=2,
|
| 204 |
#where={"source": "google-docs"}
|
| 205 |
#where_document={"$contains":"search_string"}
|
|
|
|
| 212 |
print(combination)
|
| 213 |
if(len(results)>1):
|
| 214 |
addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
|
| 215 |
+
system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt." #+addon #+"\n\nUser-Anliegen:"
|
| 216 |
#body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
|
| 217 |
+
#formatted_prompt = extend_prompt(system+"\n"+prompt, None) #history)
|
| 218 |
+
prompt=extend_prompt(
|
| 219 |
+
message, # current message of the user
|
| 220 |
+
history, # complete history
|
| 221 |
+
system, # system prompt
|
| 222 |
+
addon, # RAG-component added to the system prompt
|
| 223 |
+
None, # fictive first words of the AI (neither displayed nor stored)
|
| 224 |
+
historylimit=4, # number of past messages to consider for response to current message
|
| 225 |
+
removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
|
| 226 |
+
)
|
| 227 |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
| 228 |
output = ""
|
| 229 |
for response in stream:
|