timer back to 10 sec, remove logging messages
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
|
@@ -136,12 +136,11 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
|
|
| 136 |
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
|
| 137 |
if positive_pred not in rule_to_evaluate:
|
| 138 |
p = prediction.replace('\n', ' ')
|
| 139 |
-
logger.warning(f"Rule does not contain predicate '{positive_pred}': {p}")
|
| 140 |
return {
|
| 141 |
"is_correct": False,
|
| 142 |
"partial_score": 0.0,
|
| 143 |
"syntax_valid": False,
|
| 144 |
-
"error": f"Invalid Syntax: Logic Rule not found for symbol '{positive_pred}'"
|
| 145 |
}
|
| 146 |
|
| 147 |
pos_examples = re.findall(rf'{positive_pred}\(([^)]+)\)', validation_program)
|
|
@@ -214,13 +213,11 @@ check_all :- forall((pos({vars});neg({vars})), check({vars})).
|
|
| 214 |
|
| 215 |
except subprocess.TimeoutExpired:
|
| 216 |
r = rule_to_evaluate.replace('\n', ' ')
|
| 217 |
-
logger.warning(f"Evaluation timed out after {timeout} seconds for rule: '{r}'")
|
| 218 |
return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
|
| 219 |
-
"error":
|
| 220 |
except Exception as e:
|
| 221 |
-
logger.warning(f"Error evaluating rule '{rule_to_evaluate}' returns: '{result.stdout.strip() if result else 'No error message'}' with error: {e}")
|
| 222 |
return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
|
| 223 |
-
"error": f"
|
| 224 |
finally:
|
| 225 |
if os.path.exists(temp_file):
|
| 226 |
os.remove(temp_file)
|
|
@@ -325,7 +322,7 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
|
|
| 325 |
raise ValueError(
|
| 326 |
f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
|
| 327 |
|
| 328 |
-
TIMEOUT =
|
| 329 |
# Prepare evaluation inputs
|
| 330 |
eval_inputs = []
|
| 331 |
for i, (prediction, reference) in enumerate(zip(predictions, references)):
|
|
|
|
| 136 |
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
|
| 137 |
if positive_pred not in rule_to_evaluate:
|
| 138 |
p = prediction.replace('\n', ' ')
|
|
|
|
| 139 |
return {
|
| 140 |
"is_correct": False,
|
| 141 |
"partial_score": 0.0,
|
| 142 |
"syntax_valid": False,
|
| 143 |
+
"error": f"Invalid Syntax: Logic Rule not found for symbol '{positive_pred}': {p}"
|
| 144 |
}
|
| 145 |
|
| 146 |
pos_examples = re.findall(rf'{positive_pred}\(([^)]+)\)', validation_program)
|
|
|
|
| 213 |
|
| 214 |
except subprocess.TimeoutExpired:
|
| 215 |
r = rule_to_evaluate.replace('\n', ' ')
|
|
|
|
| 216 |
return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
|
| 217 |
+
"error": "Evaluation timed out after {timeout} seconds for rule: '{r}'"}
|
| 218 |
except Exception as e:
|
|
|
|
| 219 |
return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
|
| 220 |
+
"error": f"Error evaluating rule '{rule_to_evaluate}' returns: '{result.stdout.strip() if result else 'No error message'}' with error: {e}"}
|
| 221 |
finally:
|
| 222 |
if os.path.exists(temp_file):
|
| 223 |
os.remove(temp_file)
|
|
|
|
| 322 |
raise ValueError(
|
| 323 |
f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
|
| 324 |
|
| 325 |
+
TIMEOUT = 10 if len(predictions) > 500 else 5
|
| 326 |
# Prepare evaluation inputs
|
| 327 |
eval_inputs = []
|
| 328 |
for i, (prediction, reference) in enumerate(zip(predictions, references)):
|