Spaces:

AIML-TUDA
/

VerifiableRewardsForScalableLogicalReasoning

Running

App Files Files Community

LukasHug commited on Jul 25

Commit

499cdbd

1 Parent(s): a0f954f

timer back to 10 sec, remove logging messages

Browse files

Files changed (1) hide show

VerifiableRewardsForScalableLogicalReasoning.py +4 -7

VerifiableRewardsForScalableLogicalReasoning.py CHANGED Viewed

@@ -136,12 +136,11 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
     rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
     if positive_pred not in rule_to_evaluate:
         p = prediction.replace('\n', ' ')
-        logger.warning(f"Rule does not contain predicate '{positive_pred}': {p}")
         return {
             "is_correct": False,
             "partial_score": 0.0,
             "syntax_valid": False,
-            "error": f"Invalid Syntax: Logic Rule not found for symbol '{positive_pred}'"
         }
     pos_examples = re.findall(rf'{positive_pred}\(([^)]+)\)', validation_program)
@@ -214,13 +213,11 @@ check_all :- forall((pos({vars});neg({vars})), check({vars})).
     except subprocess.TimeoutExpired:
         r = rule_to_evaluate.replace('\n', ' ')
-        logger.warning(f"Evaluation timed out after {timeout} seconds for rule: '{r}'")
         return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
-                "error": f"Evaluation timed out after {timeout} seconds"}
     except Exception as e:
-        logger.warning(f"Error evaluating rule '{rule_to_evaluate}' returns: '{result.stdout.strip() if result else 'No error message'}' with error: {e}")
         return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
-                "error": f"Syntactically invalid rule '{rule_to_evaluate}'"}
     finally:
         if os.path.exists(temp_file):
             os.remove(temp_file)
@@ -325,7 +322,7 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
             raise ValueError(
                 f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
-        TIMEOUT = 5 if len(predictions) > 500 else 5
         # Prepare evaluation inputs
         eval_inputs = []
         for i, (prediction, reference) in enumerate(zip(predictions, references)):

     rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
     if positive_pred not in rule_to_evaluate:
         p = prediction.replace('\n', ' ')
         return {
             "is_correct": False,
             "partial_score": 0.0,
             "syntax_valid": False,
+            "error": f"Invalid Syntax: Logic Rule not found for symbol '{positive_pred}': {p}"
         }
     pos_examples = re.findall(rf'{positive_pred}\(([^)]+)\)', validation_program)
     except subprocess.TimeoutExpired:
         r = rule_to_evaluate.replace('\n', ' ')
         return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
+                "error": "Evaluation timed out after {timeout} seconds for rule: '{r}'"}
     except Exception as e:
         return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
+                "error": f"Error evaluating rule '{rule_to_evaluate}' returns: '{result.stdout.strip() if result else 'No error message'}' with error: {e}"}
     finally:
         if os.path.exists(temp_file):
             os.remove(temp_file)
             raise ValueError(
                 f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
+        TIMEOUT = 10 if len(predictions) > 500 else 5
         # Prepare evaluation inputs
         eval_inputs = []
         for i, (prediction, reference) in enumerate(zip(predictions, references)):