Spaces:
Paused
Paused
Commit
·
f687064
1
Parent(s):
21f4f83
Updating to use env var model
Browse files- backend/query_llm.py +7 -6
backend/query_llm.py
CHANGED
|
@@ -5,14 +5,15 @@ import gradio as gr
|
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from transformers import AutoTokenizer
|
| 7 |
|
| 8 |
-
|
|
|
|
| 9 |
|
| 10 |
temperature = 0.9
|
| 11 |
top_p = 0.6
|
| 12 |
repetition_penalty = 1.2
|
| 13 |
|
| 14 |
text_client = InferenceClient(
|
| 15 |
-
|
| 16 |
token=getenv("HUGGING_FACE_HUB_TOKEN")
|
| 17 |
)
|
| 18 |
|
|
@@ -38,7 +39,7 @@ def format_prompt(message: str) -> str:
|
|
| 38 |
def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
|
| 39 |
top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
|
| 40 |
"""
|
| 41 |
-
Generate a sequence of tokens based on a given prompt and history using
|
| 42 |
|
| 43 |
Args:
|
| 44 |
prompt (str): The initial prompt for the text generation.
|
|
@@ -77,12 +78,12 @@ def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens
|
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
if "Too Many Requests" in str(e):
|
| 80 |
-
print("ERROR: Too many requests on
|
| 81 |
-
gr.Warning("Unfortunately
|
| 82 |
return "Unfortunately, I am not able to process your request now."
|
| 83 |
else:
|
| 84 |
print("Unhandled Exception:", str(e))
|
| 85 |
-
gr.Warning("Unfortunately
|
| 86 |
return "I do not know what happened, but I couldn't understand you."
|
| 87 |
|
| 88 |
return output
|
|
|
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from transformers import AutoTokenizer
|
| 7 |
|
| 8 |
+
MODEL = getenv("MODEL")
|
| 9 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
| 10 |
|
| 11 |
temperature = 0.9
|
| 12 |
top_p = 0.6
|
| 13 |
repetition_penalty = 1.2
|
| 14 |
|
| 15 |
text_client = InferenceClient(
|
| 16 |
+
MODEL,
|
| 17 |
token=getenv("HUGGING_FACE_HUB_TOKEN")
|
| 18 |
)
|
| 19 |
|
|
|
|
| 39 |
def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
|
| 40 |
top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
|
| 41 |
"""
|
| 42 |
+
Generate a sequence of tokens based on a given prompt and history using MODEL client.
|
| 43 |
|
| 44 |
Args:
|
| 45 |
prompt (str): The initial prompt for the text generation.
|
|
|
|
| 78 |
|
| 79 |
except Exception as e:
|
| 80 |
if "Too Many Requests" in str(e):
|
| 81 |
+
print(f"ERROR: Too many requests on {MODEL} client")
|
| 82 |
+
gr.Warning(f"Unfortunately {MODEL} is unable to process")
|
| 83 |
return "Unfortunately, I am not able to process your request now."
|
| 84 |
else:
|
| 85 |
print("Unhandled Exception:", str(e))
|
| 86 |
+
gr.Warning(f"Unfortunately {MODEL} is unable to process")
|
| 87 |
return "I do not know what happened, but I couldn't understand you."
|
| 88 |
|
| 89 |
return output
|