typo
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
|
@@ -339,7 +339,7 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
|
|
| 339 |
if not validation_program:
|
| 340 |
raise ValueError(f"Example {i} does not contain validation program field")
|
| 341 |
|
| 342 |
-
eval_inputs.append((prediction, validation_program,
|
| 343 |
|
| 344 |
# if more than 1k predictions, we use multiprocessing to speed up the evaluation
|
| 345 |
if len(eval_inputs) > 500:
|
|
@@ -349,7 +349,7 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
|
|
| 349 |
results = list(tqdm(
|
| 350 |
pool.starmap(_evaluate_with_prolog, eval_inputs),
|
| 351 |
total=len(eval_inputs),
|
| 352 |
-
desc="Evaluating rules (parallel)"
|
| 353 |
))
|
| 354 |
else:
|
| 355 |
# Evaluate in the main thread (no multiprocessing)
|
|
|
|
| 339 |
if not validation_program:
|
| 340 |
raise ValueError(f"Example {i} does not contain validation program field")
|
| 341 |
|
| 342 |
+
eval_inputs.append((prediction, validation_program, eval_config, TIMEOUT))
|
| 343 |
|
| 344 |
# if more than 1k predictions, we use multiprocessing to speed up the evaluation
|
| 345 |
if len(eval_inputs) > 500:
|
|
|
|
| 349 |
results = list(tqdm(
|
| 350 |
pool.starmap(_evaluate_with_prolog, eval_inputs),
|
| 351 |
total=len(eval_inputs),
|
| 352 |
+
desc=f"Evaluating rules (parallel processing with {num_cpus} CPUs)"
|
| 353 |
))
|
| 354 |
else:
|
| 355 |
# Evaluate in the main thread (no multiprocessing)
|