Spaces:
Paused
Paused
Commit
·
df1aa0b
1
Parent(s):
07ce11e
Doing token checking client side
Browse files
app.py
CHANGED
|
@@ -22,6 +22,10 @@ env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
|
|
| 22 |
template = env.get_template('template.j2')
|
| 23 |
template_html = env.get_template('template_html.j2')
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def check_endpoint_status():
|
| 26 |
# Replace with the actual API URL and headers
|
| 27 |
api_url = os.getenv("ENDPOINT_URL")
|
|
@@ -50,7 +54,7 @@ def add_text(history, text):
|
|
| 50 |
|
| 51 |
|
| 52 |
def bot(history, system_prompt=""):
|
| 53 |
-
top_k =
|
| 54 |
query = history[-1][0]
|
| 55 |
|
| 56 |
logger.warning('Retrieving documents...')
|
|
@@ -60,8 +64,22 @@ def bot(history, system_prompt=""):
|
|
| 60 |
document_time = document_start - perf_counter()
|
| 61 |
logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# Create Prompt
|
| 64 |
prompt = template.render(documents=documents, query=query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
prompt_html = template_html.render(documents=documents, query=query)
|
| 66 |
logger.warning(prompt)
|
| 67 |
|
|
|
|
| 22 |
template = env.get_template('template.j2')
|
| 23 |
template_html = env.get_template('template_html.j2')
|
| 24 |
|
| 25 |
+
# Initialize tokenizer
|
| 26 |
+
tokenizer = AutoTokenizer.from_pretrained('inception-mbzuai/jais-13b-chat')
|
| 27 |
+
|
| 28 |
+
|
| 29 |
def check_endpoint_status():
|
| 30 |
# Replace with the actual API URL and headers
|
| 31 |
api_url = os.getenv("ENDPOINT_URL")
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
def bot(history, system_prompt=""):
|
| 57 |
+
top_k = 5
|
| 58 |
query = history[-1][0]
|
| 59 |
|
| 60 |
logger.warning('Retrieving documents...')
|
|
|
|
| 64 |
document_time = document_start - perf_counter()
|
| 65 |
logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
|
| 66 |
|
| 67 |
+
|
| 68 |
+
# Function to count tokens
|
| 69 |
+
def count_tokens(text):
|
| 70 |
+
return len(tokenizer.encode(text))
|
| 71 |
+
|
| 72 |
# Create Prompt
|
| 73 |
prompt = template.render(documents=documents, query=query)
|
| 74 |
+
|
| 75 |
+
# Check if the prompt is too long
|
| 76 |
+
token_count = count_tokens(prompt)
|
| 77 |
+
while token_count > 2048:
|
| 78 |
+
# Shorten your documents here. This is just a placeholder for the logic you'd use.
|
| 79 |
+
documents.pop() # Remove the last document
|
| 80 |
+
prompt = template.render(documents=documents, query=query) # Re-render the prompt
|
| 81 |
+
token_count = count_tokens(prompt) # Re-count tokens
|
| 82 |
+
|
| 83 |
prompt_html = template_html.render(documents=documents, query=query)
|
| 84 |
logger.warning(prompt)
|
| 85 |
|