use multi processing only for 500 or more samples
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
|
@@ -314,18 +314,21 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
|
|
| 314 |
|
| 315 |
eval_inputs.append((prediction, validation_program, eval_config))
|
| 316 |
|
| 317 |
-
#
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
| 329 |
|
| 330 |
# Calculate metrics
|
| 331 |
partial_scores = [result["partial_score"] for result in results]
|
|
|
|
| 314 |
|
| 315 |
eval_inputs.append((prediction, validation_program, eval_config))
|
| 316 |
|
| 317 |
+
# if more than 1k predictions, we use multiprocessing to speed up the evaluation
|
| 318 |
+
if len(eval_inputs) > 1000:
|
| 319 |
+
# Process evaluations in parallel
|
| 320 |
+
num_cpus = max(1, mp.cpu_count() - 1) # Leave one CPU free
|
| 321 |
+
with mp.Pool(processes=num_cpus) as pool:
|
| 322 |
+
results = list(tqdm(
|
| 323 |
+
pool.starmap(_evaluate_with_prolog, eval_inputs),
|
| 324 |
+
total=len(eval_inputs),
|
| 325 |
+
desc="Evaluating rules (parallel)"
|
| 326 |
+
))
|
| 327 |
+
else:
|
| 328 |
+
# Evaluate in the main thread (no multiprocessing)
|
| 329 |
+
results = []
|
| 330 |
+
for prediction, validation_program, eval_config in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
|
| 331 |
+
results.append(_evaluate_with_prolog(prediction, validation_program, eval_config))
|
| 332 |
|
| 333 |
# Calculate metrics
|
| 334 |
partial_scores = [result["partial_score"] for result in results]
|