Spaces:

AIML-TUDA
/

VerifiableRewardsForScalableLogicalReasoning

Running

App Files Files Community

LukasHug commited on Jul 25

Commit

88c2435

1 Parent(s): 999258b

update reward, prevent reward hacking

Browse files

Files changed (1) hide show

VerifiableRewardsForScalableLogicalReasoning.py +9 -13

VerifiableRewardsForScalableLogicalReasoning.py CHANGED Viewed

@@ -109,7 +109,7 @@ def validate_rule_no_hardcoded_cars(prediction):
     matches = re.findall(hardcoded_pattern, prediction)
     if matches:
-        return False, f"Rule contains ground cars: {matches[0]}"
     return True, "Rule is valid"
@@ -131,12 +131,8 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
     positive_pred = eval_config.get("positive_predicate", "eastbound")
     negative_pred = eval_config.get("negative_predicate", "westbound")
-    validation_program = anonymize_entities(validation_program)
     # extract predicate from rule_to_evaluate
-    rule_to_evaluate = extract_ilp_from_text_v2(prediction)
     if positive_pred not in rule_to_evaluate:
         logger.warning(f"Rule '{rule_to_evaluate}' does not contain positive predicate '{positive_pred}'")
         return {
@@ -245,15 +241,16 @@ def extract_ilp_from_text(text):
     return p_code
-def extract_ilp_from_text_v2(text, target_predicates=None):
     # Pre-process: collapse code blocks to single lines
     text = re.sub(r'\n\s*', ' ', text)  # crude: flatten all to one line
-    # Optionally restrict to only some predicates
-    preds = '|'.join([re.escape(p) for p in (target_predicates or [])])
-    head_pat = rf"(?:{preds})" if preds else r"[a-zA-Z_][a-zA-Z0-9_]*"
     # Rule pattern, across newlines
-    rule_pattern = re.compile(rf'({head_pat}\([^()]*\)\s*:-.*?\.)')
-    rules = set(rule_pattern.findall(text))
     # Remove rules that are also captured as facts
     p_code = ''
     for rule in rules:
@@ -262,7 +259,6 @@ def extract_ilp_from_text_v2(text, target_predicates=None):
         if not statement.endswith('.'):
             statement += '.'
         p_code += statement + '\n'
-    print(p_code)
     return p_code.strip()  # Ensure no trailing whitespace

     matches = re.findall(hardcoded_pattern, prediction)
     if matches:
+        return False, f"Cars must be variables: {matches[0]}"
     return True, "Rule is valid"
     positive_pred = eval_config.get("positive_predicate", "eastbound")
     negative_pred = eval_config.get("negative_predicate", "westbound")
     # extract predicate from rule_to_evaluate
+    rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred)
     if positive_pred not in rule_to_evaluate:
         logger.warning(f"Rule '{rule_to_evaluate}' does not contain positive predicate '{positive_pred}'")
         return {
     return p_code
+def extract_ilp_from_text_v2(text, target_predicate=None):
+    text = re.sub(r'%.*?(?=\n|$)', '', text) # remove comments
     # Pre-process: collapse code blocks to single lines
     text = re.sub(r'\n\s*', ' ', text)  # crude: flatten all to one line
     # Rule pattern, across newlines
+    rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
+    rules = list(rule_pattern.findall(text))
+    if len(rules) > 1:
+        logger.warning(f"Found multiple rules in text: {rules}. Using only the first one.")
+        rules = rules[:1]  # Use only the first match
     # Remove rules that are also captured as facts
     p_code = ''
     for rule in rules:
         if not statement.endswith('.'):
             statement += '.'
         p_code += statement + '\n'
     return p_code.strip()  # Ensure no trailing whitespace