Update app.py
Browse filesAdded inference endpoint.
app.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
from time import sleep
|
|
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
import spaces
|
|
|
|
| 6 |
import torch
|
| 7 |
from duckduckgo_search import DDGS
|
| 8 |
import re
|
|
@@ -15,6 +17,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
| 15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
model.to(device)
|
| 17 |
|
|
|
|
|
|
|
| 18 |
class DDGSSearchClient:
|
| 19 |
def __init__(self, max_retries=4, timeout=35, backoff_factor=1):
|
| 20 |
"""
|
|
@@ -323,10 +327,40 @@ class Applicant:
|
|
| 323 |
def __init__(self, resume):
|
| 324 |
self.resume = resume
|
| 325 |
|
| 326 |
-
@spaces.GPU
|
| 327 |
-
def write(inputs, max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1):
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
| 332 |
prompt = f"""<|im_start|>system
|
|
@@ -335,12 +369,13 @@ def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
|
| 335 |
{job_description}<|im_end|>
|
| 336 |
<|im_start|>assistant
|
| 337 |
"""
|
| 338 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
| 339 |
-
output = write(inputs, max_new_tokens=max_new_tokens)
|
| 340 |
-
response = tokenizer.decode(output[0], skip_special_tokens=False)
|
| 341 |
-
start_idx = response.find("<|im_start|>assistant")
|
| 342 |
-
end_idx = response.find("<|im_end|>", start_idx)
|
| 343 |
-
response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
|
|
|
|
| 344 |
return response
|
| 345 |
|
| 346 |
def process_job_description(company_name, company_url, job_description, resume):
|
|
|
|
| 1 |
from time import sleep
|
| 2 |
+
from os import getenv
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 6 |
import spaces
|
| 7 |
+
from openai import OpenAI
|
| 8 |
import torch
|
| 9 |
from duckduckgo_search import DDGS
|
| 10 |
import re
|
|
|
|
| 17 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
model.to(device)
|
| 19 |
|
| 20 |
+
|
| 21 |
+
|
| 22 |
class DDGSSearchClient:
|
| 23 |
def __init__(self, max_retries=4, timeout=35, backoff_factor=1):
|
| 24 |
"""
|
|
|
|
| 327 |
def __init__(self, resume):
|
| 328 |
self.resume = resume
|
| 329 |
|
| 330 |
+
# @spaces.GPU
|
| 331 |
+
# def write(inputs, max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1):
|
| 332 |
+
# _output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
|
| 333 |
+
# return _output
|
| 334 |
+
|
| 335 |
+
def writing_task(prompt: str) -> str:
|
| 336 |
+
api_key = getenv("HF_TOKEN")
|
| 337 |
+
if not api_key:
|
| 338 |
+
raise ValueError("Huggingface token missing. Need to set HF_TOKEN, refer to https://discuss.huggingface.co/t/how-to-manage-user-secrets-and-api-keys/67948")
|
| 339 |
+
client = OpenAI(
|
| 340 |
+
base_url="https://router.huggingface.co/v1",
|
| 341 |
+
api_key = getenv("HF_TOKEN")
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
completion = client.chat.completions.create(
|
| 345 |
+
model="HuggingFaceTB/SmolLM3-3B:hf-inference",
|
| 346 |
+
messages=[
|
| 347 |
+
{
|
| 348 |
+
"role": "user",
|
| 349 |
+
"content": prompt
|
| 350 |
+
}
|
| 351 |
+
],
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
raw_response_content = completion.choices[0].message.content
|
| 355 |
+
content_split = raw_response_content.split("</think>")
|
| 356 |
+
if len(content_split) > 1:
|
| 357 |
+
think = content_split[0]
|
| 358 |
+
content = "".join(content_split[1:])
|
| 359 |
+
else:
|
| 360 |
+
think = content_split[0]
|
| 361 |
+
content = "No data found."
|
| 362 |
+
|
| 363 |
+
return content
|
| 364 |
|
| 365 |
def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
|
| 366 |
prompt = f"""<|im_start|>system
|
|
|
|
| 369 |
{job_description}<|im_end|>
|
| 370 |
<|im_start|>assistant
|
| 371 |
"""
|
| 372 |
+
# inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
| 373 |
+
# output = write(inputs, max_new_tokens=max_new_tokens)
|
| 374 |
+
# response = tokenizer.decode(output[0], skip_special_tokens=False)
|
| 375 |
+
# start_idx = response.find("<|im_start|>assistant")
|
| 376 |
+
# end_idx = response.find("<|im_end|>", start_idx)
|
| 377 |
+
# response = response[start_idx + len("<|im_start|>assistant\n"):end_idx].strip()
|
| 378 |
+
response = writing_task(prompt)
|
| 379 |
return response
|
| 380 |
|
| 381 |
def process_job_description(company_name, company_url, job_description, resume):
|