Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,7 @@ gpu_pipe = transformers.pipeline(
|
|
| 25 |
# revision='c303c108399aba837146e893375849b918f413b3',
|
| 26 |
torch_dtype='bfloat16',
|
| 27 |
device='cuda',
|
| 28 |
-
)
|
| 29 |
cpu_pipe = transformers.pipeline(
|
| 30 |
'text-generation',
|
| 31 |
model='dx2102/llama-midi',
|
|
@@ -215,8 +215,8 @@ CPUs will be slower but there is no time limit.
|
|
| 215 |
break
|
| 216 |
history[-1]['content'] += text
|
| 217 |
yield history
|
| 218 |
-
|
| 219 |
-
zerogpu_model_fn = spaces.GPU(cpu_model_fn)
|
| 220 |
|
| 221 |
def runpod_model_fn(prefix, history):
|
| 222 |
# NOTE
|
|
|
|
| 25 |
# revision='c303c108399aba837146e893375849b918f413b3',
|
| 26 |
torch_dtype='bfloat16',
|
| 27 |
device='cuda',
|
| 28 |
+
).to('cuda')
|
| 29 |
cpu_pipe = transformers.pipeline(
|
| 30 |
'text-generation',
|
| 31 |
model='dx2102/llama-midi',
|
|
|
|
| 215 |
break
|
| 216 |
history[-1]['content'] += text
|
| 217 |
yield history
|
| 218 |
+
# duration: GPU time out (seconds)
|
| 219 |
+
zerogpu_model_fn = spaces.GPU(duration=15)(cpu_model_fn)
|
| 220 |
|
| 221 |
def runpod_model_fn(prefix, history):
|
| 222 |
# NOTE
|