dx2102 commited on
Commit
6d0d5bf
·
verified ·
1 Parent(s): 67ca57a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -25,7 +25,7 @@ gpu_pipe = transformers.pipeline(
25
  # revision='c303c108399aba837146e893375849b918f413b3',
26
  torch_dtype='bfloat16',
27
  device='cuda',
28
- )
29
  cpu_pipe = transformers.pipeline(
30
  'text-generation',
31
  model='dx2102/llama-midi',
@@ -215,8 +215,8 @@ CPUs will be slower but there is no time limit.
215
  break
216
  history[-1]['content'] += text
217
  yield history
218
-
219
- zerogpu_model_fn = spaces.GPU(cpu_model_fn)
220
 
221
  def runpod_model_fn(prefix, history):
222
  # NOTE
 
25
  # revision='c303c108399aba837146e893375849b918f413b3',
26
  torch_dtype='bfloat16',
27
  device='cuda',
28
+ ).to('cuda')
29
  cpu_pipe = transformers.pipeline(
30
  'text-generation',
31
  model='dx2102/llama-midi',
 
215
  break
216
  history[-1]['content'] += text
217
  yield history
218
+ # duration: GPU time out (seconds)
219
+ zerogpu_model_fn = spaces.GPU(duration=15)(cpu_model_fn)
220
 
221
  def runpod_model_fn(prefix, history):
222
  # NOTE