david-thrower commited on
Commit
a7d0d3f
·
verified ·
1 Parent(s): a61b4aa

Update app.py

Browse files

Update to use fireworks...

Files changed (1) hide show
  1. app.py +67 -24
app.py CHANGED
@@ -1,21 +1,23 @@
1
  from time import sleep
2
  from os import getenv
 
 
3
 
4
  import gradio as gr
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import spaces
7
- from openai import OpenAI
8
- import torch
9
  from duckduckgo_search import DDGS
10
  import re
11
 
12
- # Load the SmolLM model and tokenizer
13
- # model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
14
- model_name = "HuggingFaceTB/SmolLM3-3B" # "HuggingFaceTB/SmolLM2-1.7B-Instruct"
15
- model = AutoModelForCausalLM.from_pretrained(model_name)
16
- tokenizer = AutoTokenizer.from_pretrained(model_name)
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
- model.to(device)
19
 
20
 
21
 
@@ -332,26 +334,66 @@ class Applicant:
332
  # _output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
333
  # return _output
334
 
335
- def writing_task(prompt: str) -> str:
336
- api_key = getenv("HF_TOKEN")
337
- if not api_key:
338
- raise ValueError("Huggingface token missing. Need to set HF_TOKEN, refer to https://discuss.huggingface.co/t/how-to-manage-user-secrets-and-api-keys/67948")
339
- client = OpenAI(
340
- base_url="https://router.huggingface.co/v1",
341
- api_key = getenv("HF_TOKEN")
342
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- completion = client.chat.completions.create(
345
- model="HuggingFaceTB/SmolLM3-3B:hf-inference",
346
- messages=[
 
 
 
 
 
 
 
 
 
 
347
  {
348
  "role": "user",
349
  "content": prompt
350
  }
351
- ],
352
- )
353
-
354
- raw_response_content = completion.choices[0].message.content
 
 
 
 
 
 
 
 
 
355
  content_split = raw_response_content.split("</think>")
356
  if len(content_split) > 1:
357
  think = content_split[0]
@@ -362,6 +404,7 @@ def writing_task(prompt: str) -> str:
362
 
363
  return content
364
 
 
365
  def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
366
  prompt = f"""<|im_start|>system
367
  {system_prompt}<|im_end|>
 
1
  from time import sleep
2
  from os import getenv
3
+ from json import dumps
4
+ from requests import post
5
 
6
  import gradio as gr
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  import spaces
9
+ # from openai import OpenAI
10
+ # import torch
11
  from duckduckgo_search import DDGS
12
  import re
13
 
14
+ # # Load the SmolLM model and tokenizer
15
+ # # model_name = "HuggingFaceTB/SmolLM2-360M-Instruct"
16
+ # model_name = "HuggingFaceTB/SmolLM3-3B" # "HuggingFaceTB/SmolLM2-1.7B-Instruct"
17
+ # model = AutoModelForCausalLM.from_pretrained(model_name)
18
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ # model.to(device)
21
 
22
 
23
 
 
334
  # _output = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, top_k=40, top_p=0.9, repetition_penalty=1.1)
335
  # return _output
336
 
337
+ # def writing_task(prompt: str) -> str:
338
+ # api_key = getenv("HF_TOKEN")
339
+ # if not api_key:
340
+ # raise ValueError("Huggingface token missing. Need to set HF_TOKEN, refer to https://discuss.huggingface.co/t/how-to-manage-user-secrets-and-api-keys/67948")
341
+ # client = OpenAI(
342
+ # base_url="https://router.huggingface.co/v1",
343
+ # api_key = getenv("HF_TOKEN")
344
+ # )
345
+
346
+ # completion = client.chat.completions.create(
347
+ # model="HuggingFaceTB/SmolLM3-3B:hf-inference",
348
+ # messages=[
349
+ # {
350
+ # "role": "user",
351
+ # "content": prompt
352
+ # }
353
+ # ],
354
+ # )
355
+
356
+ # raw_response_content = completion.choices[0].message.content
357
+ # content_split = raw_response_content.split("</think>")
358
+ # if len(content_split) > 1:
359
+ # think = content_split[0]
360
+ # content = "".join(content_split[1:])
361
+ # else:
362
+ # think = content_split[0]
363
+ # content = "No data found."
364
+
365
+ # return content
366
 
367
+ def writing_task(prompt: str) -> str:
368
+ url = "https://api.fireworks.ai/inference/v1/chat/completions"
369
+ model = "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507"
370
+ # "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
371
+ payload = {
372
+ "model": model,
373
+ "max_tokens": 32768,
374
+ "top_p": 1,
375
+ "top_k": 40,
376
+ "presence_penalty": 0,
377
+ "frequency_penalty": 0,
378
+ "temperature": 0.6,
379
+ "messages": [
380
  {
381
  "role": "user",
382
  "content": prompt
383
  }
384
+ ]
385
+ }
386
+ headers = {
387
+ "Accept": "application/json",
388
+ "Content-Type": "application/json",
389
+ "Authorization": f"Bearer {FIREWORKS_API_TOKEN}" # Replace with your actual API key
390
+ }
391
+
392
+ response = post(url, headers=headers, data=dumps(payload))
393
+ response.raise_for_status()
394
+ raw_response_content =\
395
+ response.json()["choices"][0]["message"]["content"]
396
+ print(f"Content with reasoning: {raw_response_content}")
397
  content_split = raw_response_content.split("</think>")
398
  if len(content_split) > 1:
399
  think = content_split[0]
 
404
 
405
  return content
406
 
407
+
408
  def smol_lm_jd_process(job_description, system_prompt, max_new_tokens=512):
409
  prompt = f"""<|im_start|>system
410
  {system_prompt}<|im_end|>