update reward, prevent reward hacking
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
|
@@ -109,7 +109,7 @@ def validate_rule_no_hardcoded_cars(prediction):
|
|
| 109 |
matches = re.findall(hardcoded_pattern, prediction)
|
| 110 |
|
| 111 |
if matches:
|
| 112 |
-
return False, f"
|
| 113 |
|
| 114 |
return True, "Rule is valid"
|
| 115 |
|
|
@@ -131,12 +131,8 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
|
|
| 131 |
positive_pred = eval_config.get("positive_predicate", "eastbound")
|
| 132 |
negative_pred = eval_config.get("negative_predicate", "westbound")
|
| 133 |
|
| 134 |
-
|
| 135 |
-
validation_program = anonymize_entities(validation_program)
|
| 136 |
-
|
| 137 |
-
|
| 138 |
# extract predicate from rule_to_evaluate
|
| 139 |
-
rule_to_evaluate = extract_ilp_from_text_v2(prediction)
|
| 140 |
if positive_pred not in rule_to_evaluate:
|
| 141 |
logger.warning(f"Rule '{rule_to_evaluate}' does not contain positive predicate '{positive_pred}'")
|
| 142 |
return {
|
|
@@ -245,15 +241,16 @@ def extract_ilp_from_text(text):
|
|
| 245 |
return p_code
|
| 246 |
|
| 247 |
|
| 248 |
-
def extract_ilp_from_text_v2(text,
|
|
|
|
| 249 |
# Pre-process: collapse code blocks to single lines
|
| 250 |
text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
|
| 251 |
-
# Optionally restrict to only some predicates
|
| 252 |
-
preds = '|'.join([re.escape(p) for p in (target_predicates or [])])
|
| 253 |
-
head_pat = rf"(?:{preds})" if preds else r"[a-zA-Z_][a-zA-Z0-9_]*"
|
| 254 |
# Rule pattern, across newlines
|
| 255 |
-
rule_pattern = re.compile(rf'({
|
| 256 |
-
rules =
|
|
|
|
|
|
|
|
|
|
| 257 |
# Remove rules that are also captured as facts
|
| 258 |
p_code = ''
|
| 259 |
for rule in rules:
|
|
@@ -262,7 +259,6 @@ def extract_ilp_from_text_v2(text, target_predicates=None):
|
|
| 262 |
if not statement.endswith('.'):
|
| 263 |
statement += '.'
|
| 264 |
p_code += statement + '\n'
|
| 265 |
-
print(p_code)
|
| 266 |
return p_code.strip() # Ensure no trailing whitespace
|
| 267 |
|
| 268 |
|
|
|
|
| 109 |
matches = re.findall(hardcoded_pattern, prediction)
|
| 110 |
|
| 111 |
if matches:
|
| 112 |
+
return False, f"Cars must be variables: {matches[0]}"
|
| 113 |
|
| 114 |
return True, "Rule is valid"
|
| 115 |
|
|
|
|
| 131 |
positive_pred = eval_config.get("positive_predicate", "eastbound")
|
| 132 |
negative_pred = eval_config.get("negative_predicate", "westbound")
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# extract predicate from rule_to_evaluate
|
| 135 |
+
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred)
|
| 136 |
if positive_pred not in rule_to_evaluate:
|
| 137 |
logger.warning(f"Rule '{rule_to_evaluate}' does not contain positive predicate '{positive_pred}'")
|
| 138 |
return {
|
|
|
|
| 241 |
return p_code
|
| 242 |
|
| 243 |
|
| 244 |
+
def extract_ilp_from_text_v2(text, target_predicate=None):
|
| 245 |
+
text = re.sub(r'%.*?(?=\n|$)', '', text) # remove comments
|
| 246 |
# Pre-process: collapse code blocks to single lines
|
| 247 |
text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
|
|
|
|
|
|
|
|
|
|
| 248 |
# Rule pattern, across newlines
|
| 249 |
+
rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
|
| 250 |
+
rules = list(rule_pattern.findall(text))
|
| 251 |
+
if len(rules) > 1:
|
| 252 |
+
logger.warning(f"Found multiple rules in text: {rules}. Using only the first one.")
|
| 253 |
+
rules = rules[:1] # Use only the first match
|
| 254 |
# Remove rules that are also captured as facts
|
| 255 |
p_code = ''
|
| 256 |
for rule in rules:
|
|
|
|
| 259 |
if not statement.endswith('.'):
|
| 260 |
statement += '.'
|
| 261 |
p_code += statement + '\n'
|
|
|
|
| 262 |
return p_code.strip() # Ensure no trailing whitespace
|
| 263 |
|
| 264 |
|