Spaces:

dx2102
/

llama-midi

Running on Zero

dx2102 commited on 13 days ago

Commit

6d0d5bf

verified ·

1 Parent(s): 67ca57a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ gpu_pipe = transformers.pipeline(
     # revision='c303c108399aba837146e893375849b918f413b3',
     torch_dtype='bfloat16',
     device='cuda',
-)
 cpu_pipe = transformers.pipeline(
     'text-generation',
     model='dx2102/llama-midi',
@@ -215,8 +215,8 @@ CPUs will be slower but there is no time limit.
                 break
             history[-1]['content'] += text
             yield history
-    zerogpu_model_fn = spaces.GPU(cpu_model_fn)
     def runpod_model_fn(prefix, history):
         # NOTE

     # revision='c303c108399aba837146e893375849b918f413b3',
     torch_dtype='bfloat16',
     device='cuda',
+).to('cuda')
 cpu_pipe = transformers.pipeline(
     'text-generation',
     model='dx2102/llama-midi',
                 break
             history[-1]['content'] += text
             yield history
+    # duration: GPU time out (seconds)
+    zerogpu_model_fn = spaces.GPU(duration=15)(cpu_model_fn)
     def runpod_model_fn(prefix, history):
         # NOTE