Spaces:
Runtime error
Runtime error
Use better clean up
Browse files- qwen2_inference.py +20 -11
qwen2_inference.py
CHANGED
|
@@ -73,21 +73,24 @@ def run_inference(input_file, model_path, args):
|
|
| 73 |
videos=video_inputs,
|
| 74 |
padding=True,
|
| 75 |
return_tensors="pt",
|
| 76 |
-
)
|
| 77 |
|
| 78 |
# GPU Memory after input processing
|
| 79 |
after_input_dump = (torch.cuda.memory_allocated(), torch.cuda.memory_reserved())
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
| 91 |
generated_ids = [
|
| 92 |
output_ids[len(input_ids):]
|
| 93 |
for input_ids, output_ids in zip(inputs.input_ids, output_ids)
|
|
@@ -103,4 +106,10 @@ def run_inference(input_file, model_path, args):
|
|
| 103 |
print_gpu_memory("After Input", after_input_dump[0], after_input_dump[1])
|
| 104 |
print_gpu_memory("After Generation", after_gen_dump[0], after_gen_dump[1])
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
return output_text
|
|
|
|
| 73 |
videos=video_inputs,
|
| 74 |
padding=True,
|
| 75 |
return_tensors="pt",
|
| 76 |
+
)
|
| 77 |
|
| 78 |
# GPU Memory after input processing
|
| 79 |
after_input_dump = (torch.cuda.memory_allocated(), torch.cuda.memory_reserved())
|
| 80 |
|
| 81 |
+
with torch.inference_mode():
|
| 82 |
+
output_ids = model.generate(
|
| 83 |
+
**inputs,
|
| 84 |
+
max_new_tokens=int(args["max_length"]),
|
| 85 |
+
do_sample=True,
|
| 86 |
+
top_p=float(args["top_p"]),
|
| 87 |
+
top_k=int(args["top_k"]),
|
| 88 |
+
temperature=float(args["temperature"]),
|
| 89 |
+
use_cache=True,
|
| 90 |
+
num_return_sequences=1,
|
| 91 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
generated_ids = [
|
| 95 |
output_ids[len(input_ids):]
|
| 96 |
for input_ids, output_ids in zip(inputs.input_ids, output_ids)
|
|
|
|
| 106 |
print_gpu_memory("After Input", after_input_dump[0], after_input_dump[1])
|
| 107 |
print_gpu_memory("After Generation", after_gen_dump[0], after_gen_dump[1])
|
| 108 |
|
| 109 |
+
# Clean up
|
| 110 |
+
del inputs, output_ids, generated_ids, image, image_input, video_inputs
|
| 111 |
+
if torch.cuda.is_available():
|
| 112 |
+
torch.cuda.empty_cache()
|
| 113 |
+
torch.cuda.ipc_collect()
|
| 114 |
+
|
| 115 |
return output_text
|