rule validation post extraction
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
|
@@ -118,14 +118,7 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
|
|
| 118 |
"""
|
| 119 |
Evaluates a predicted rule against the validation program using Prolog.
|
| 120 |
"""
|
| 121 |
-
|
| 122 |
-
if not is_valid:
|
| 123 |
-
return {
|
| 124 |
-
"is_correct": False,
|
| 125 |
-
"partial_score": 0.0,
|
| 126 |
-
"syntax_valid": False,
|
| 127 |
-
"error": f"Rule validation failed: {validation_msg}"
|
| 128 |
-
}
|
| 129 |
|
| 130 |
# Extract configuration
|
| 131 |
positive_pred = eval_config.get("positive_predicate", "eastbound")
|
|
@@ -134,6 +127,16 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
|
|
| 134 |
|
| 135 |
# extract predicate from rule_to_evaluate
|
| 136 |
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
if positive_pred not in rule_to_evaluate:
|
| 138 |
p = prediction.replace('\n', ' ')
|
| 139 |
return {
|
|
|
|
| 118 |
"""
|
| 119 |
Evaluates a predicted rule against the validation program using Prolog.
|
| 120 |
"""
|
| 121 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
# Extract configuration
|
| 124 |
positive_pred = eval_config.get("positive_predicate", "eastbound")
|
|
|
|
| 127 |
|
| 128 |
# extract predicate from rule_to_evaluate
|
| 129 |
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
|
| 130 |
+
|
| 131 |
+
is_valid, validation_msg = validate_rule_no_hardcoded_cars(rule_to_evaluate)
|
| 132 |
+
if not is_valid:
|
| 133 |
+
return {
|
| 134 |
+
"is_correct": False,
|
| 135 |
+
"partial_score": 0.0,
|
| 136 |
+
"syntax_valid": False,
|
| 137 |
+
"error": f"Rule validation failed: {validation_msg}"
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
if positive_pred not in rule_to_evaluate:
|
| 141 |
p = prediction.replace('\n', ' ')
|
| 142 |
return {
|