Spaces:

wandb
/

guardrails-genie

Runtime error

App Files Files Community

geekyrakshit commited on Dec 10, 2024

Commit

1a146c5

unverified ·

2 Parent(s): a22d59b 38ff3b5

Merge pull request #14 from soumik12345/feat/secrets-detection

Browse files

Files changed (6) hide show

benchmarks/secrets_benchmark.py +227 -0
guardrails_genie/guardrails/secrets_detection/__init__.py +4 -6
guardrails_genie/guardrails/secrets_detection/secrets_detection.py +350 -86
guardrails_genie/guardrails/secrets_detection/secrets_patterns.jsonl +0 -0
pyproject.toml +7 -0
tests/guardrails_genie/guardrails/test_secrets_detection.py +26 -35

benchmarks/secrets_benchmark.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import asyncio
+from typing import Any
+import weave
+from guardrails import Guard
+from guardrails.hub import SecretsPresent
+from llm_guard.input_scanners import Secrets
+from llm_guard.util import configure_logger
+from guardrails_genie.guardrails import GuardrailManager
+from guardrails_genie.guardrails.base import Guardrail
+from guardrails_genie.guardrails.secrets_detection import (
+    SecretsDetectionResponse,
+    SecretsDetectionSimpleResponse,
+    SecretsDetectionGuardrail,
+)
+from guardrails_genie.metrics import AccuracyMetric
+logger = configure_logger(log_level="ERROR")
+class GuardrailsAISecretsDetector(Guardrail):
+    """
+    A class to detect secrets using Guardrails AI.
+    Attributes:
+        validator (Any): The validator used for detecting secrets.
+    """
+    validator: Any
+    def __init__(self):
+        """
+        Initializes the GuardrailsAISecretsDetector with a validator.
+        """
+        validator = Guard().use(SecretsPresent, on_fail="fix")
+        super().__init__(validator=validator)
+    def scan(self, text: str) -> dict:
+        """
+        Scans the given text for secrets.
+        Args:
+            text (str): The text to scan for secrets.
+        Returns:
+            dict: A dictionary containing the scan results.
+        """
+        response = self.validator.validate(text)
+        if response.validation_summaries:
+            summary = response.validation_summaries[0]
+            return {
+                "has_secret": True,
+                "detected_secrets": {
+                    str(k): v
+                    for k, v in enumerate(
+                        summary.failure_reason.splitlines()[1:], start=1
+                    )
+                },
+                "explanation": summary.failure_reason,
+                "modified_prompt": response.validated_output,
+                "risk_score": 1.0,
+            }
+        else:
+            return {
+                "has_secret": False,
+                "detected_secrets": None,
+                "explanation": "No secrets detected in the text.",
+                "modified_prompt": response.validated_output,
+                "risk_score": 0.0,
+            }
+    @weave.op
+    def guard(
+        self,
+        prompt: str,
+        return_detected_secrets: bool = True,
+        **kwargs,
+    ) -> SecretsDetectionResponse | SecretsDetectionResponse:
+        """
+        Guards the given prompt by scanning for secrets.
+        Args:
+            prompt (str): The prompt to scan for secrets.
+            return_detected_secrets (bool): Whether to return detected secrets.
+        Returns:
+            SecretsDetectionResponse | SecretsDetectionSimpleResponse: The response after scanning for secrets.
+        """
+        results = self.scan(prompt)
+        if return_detected_secrets:
+            return SecretsDetectionResponse(
+                contains_secrets=results["has_secret"],
+                detected_secrets=results["detected_secrets"],
+                explanation=results["explanation"],
+                redacted_text=results["modified_prompt"],
+                risk_score=results["risk_score"],
+            )
+        else:
+            return SecretsDetectionSimpleResponse(
+                contains_secrets=not results["has_secret"],
+                explanation=results["explanation"],
+                redacted_text=results["modified_prompt"],
+                risk_score=results["risk_score"],
+            )
+class LLMGuardSecretsDetector(Guardrail):
+    """
+    A class to detect secrets using LLM Guard.
+    Attributes:
+        validator (Any): The validator used for detecting secrets.
+    """
+    validator: Any
+    def __init__(self):
+        """
+        Initializes the LLMGuardSecretsDetector with a validator.
+        """
+        validator = Secrets(redact_mode="all")
+        super().__init__(validator=validator)
+    def scan(self, text: str) -> dict:
+        """
+        Scans the given text for secrets.
+        Args:
+            text (str): The text to scan for secrets.
+        Returns:
+            dict: A dictionary containing the scan results.
+        """
+        sanitized_prompt, is_valid, risk_score = self.validator.scan(text)
+        if is_valid:
+            return {
+                "has_secret": not is_valid,
+                "detected_secrets": None,
+                "explanation": "No secrets detected in the text.",
+                "modified_prompt": sanitized_prompt,
+                "risk_score": risk_score,
+            }
+        else:
+            return {
+                "has_secret": not is_valid,
+                "detected_secrets": {},
+                "explanation": "This library does not return detected secrets.",
+                "modified_prompt": sanitized_prompt,
+                "risk_score": risk_score,
+            }
+    @weave.op
+    def guard(
+        self,
+        prompt: str,
+        return_detected_secrets: bool = True,
+        **kwargs,
+    ) -> SecretsDetectionResponse | SecretsDetectionResponse:
+        """
+        Guards the given prompt by scanning for secrets.
+        Args:
+            prompt (str): The prompt to scan for secrets.
+            return_detected_secrets (bool): Whether to return detected secrets.
+        Returns:
+            SecretsDetectionResponse | SecretsDetectionSimpleResponse: The response after scanning for secrets.
+        """
+        results = self.scan(prompt)
+        if return_detected_secrets:
+            return SecretsDetectionResponse(
+                contains_secrets=results["has_secret"],
+                detected_secrets=results["detected_secrets"],
+                explanation=results["explanation"],
+                redacted_text=results["modified_prompt"],
+                risk_score=results["risk_score"],
+            )
+        else:
+            return SecretsDetectionSimpleResponse(
+                contains_secrets=not results["has_secret"],
+                explanation=results["explanation"],
+                redacted_text=results["modified_prompt"],
+                risk_score=results["risk_score"],
+            )
+def main():
+    """
+    Main function to initialize and evaluate the secrets detectors.
+    """
+    client = weave.init("parambharat/secrets-detection")
+    dataset = weave.ref("secrets-detection-benchmark:latest").get()
+    llm_guard_guardrail = LLMGuardSecretsDetector()
+    guardrails_ai_guardrail = GuardrailsAISecretsDetector()
+    guardrails_genie_guardrail = SecretsDetectionGuardrail()
+    all_guards = [
+        llm_guard_guardrail,
+        guardrails_ai_guardrail,
+        guardrails_genie_guardrail,
+    ]
+    evaluation = weave.Evaluation(
+        dataset=dataset.rows,
+        scorers=[AccuracyMetric()],
+    )
+    for guard in all_guards:
+        name = guard.__class__.__name__
+        guardrail_manager = GuardrailManager(
+            guardrails=[
+                guard,
+            ]
+        )
+        results = asyncio.run(
+            evaluation.evaluate(
+                guardrail_manager,
+                __weave={"display_name": f"{name}"},
+            )
+        )
+        print(results)
+if __name__ == "__main__":
+    main()

guardrails_genie/guardrails/secrets_detection/__init__.py CHANGED Viewed

@@ -1,17 +1,15 @@
 from guardrails_genie.guardrails.secrets_detection.secrets_detection import (
-    DEFAULT_SECRETS_PATTERNS,
-    REDACTION,
     SecretsDetectionGuardrail,
-    SecretsDetectionResponse,
     SecretsDetectionSimpleResponse,
-    redact,
 )
 __all__ = [
-    "DEFAULT_SECRETS_PATTERNS",
     "SecretsDetectionGuardrail",
     "SecretsDetectionSimpleResponse",
     "SecretsDetectionResponse",
     "REDACTION",
-    "redact",
 ]

 from guardrails_genie.guardrails.secrets_detection.secrets_detection import (
     SecretsDetectionGuardrail,
     SecretsDetectionSimpleResponse,
+    SecretsDetectionResponse,
+    REDACTION,
+    redact_value,
 )
 __all__ = [
     "SecretsDetectionGuardrail",
     "SecretsDetectionSimpleResponse",
     "SecretsDetectionResponse",
     "REDACTION",
+    "redact_value",
 ]

guardrails_genie/guardrails/secrets_detection/secrets_detection.py CHANGED Viewed

@@ -1,41 +1,30 @@
 import hashlib
 import json
 import pathlib
 from enum import Enum
-from typing import Optional, Union
 import weave
-from pydantic import BaseModel
 from guardrails_genie.guardrails.base import Guardrail
-from guardrails_genie.regex_model import RegexModel
-def load_secrets_patterns() -> dict[str, list[str]]:
-    """
-    Load secret patterns from a JSONL file and return them as a dictionary.
-    Returns:
-        dict: A dictionary where keys are pattern names and values are lists of regex patterns.
-    """
-    default_patterns = {}
-    patterns = (
-        pathlib.Path(__file__).parent.absolute() / "secrets_patterns.jsonl"
-    ).read_text()
-    for pattern in patterns.splitlines():
-        pattern = json.loads(pattern)
-        default_patterns[pattern["name"]] = [rf"{pat}" for pat in pattern["patterns"]]
-    return default_patterns
-# Load default secret patterns from the JSONL file
-DEFAULT_SECRETS_PATTERNS = load_secrets_patterns()
 class REDACTION(str, Enum):
     """
-    Enum for different types of redaction methods.
     """
     REDACT_PARTIAL = "REDACT_PARTIAL"
@@ -44,31 +33,31 @@ class REDACTION(str, Enum):
     REDACT_NONE = "REDACT_NONE"
-def redact(text: str, matches: list[str], redaction_type: REDACTION) -> str:
     """
-    Redact the given matches in the text based on the redaction type.
     Args:
-        text (str): The input text to redact.
-        matches (list[str]): List of strings to be redacted.
-        redaction_type (REDACTION): The type of redaction to apply.
     Returns:
-        str: The redacted text.
-    """
-    for match in matches:
-        if redaction_type == REDACTION.REDACT_PARTIAL:
-            replacement = "[REDACTED:]" + match[:2] + ".." + match[-2:] + "[:REDACTED]"
-        elif redaction_type == REDACTION.REDACT_ALL:
-            replacement = "[REDACTED:]" + ("*" * len(match)) + "[:REDACTED]"
-        elif redaction_type == REDACTION.REDACT_HASH:
-            replacement = (
-                "[REDACTED:]" + hashlib.md5(match.encode()).hexdigest() + "[:REDACTED]"
-            )
-        else:
-            replacement = match
-        text = text.replace(match, replacement)
-    return text
 class SecretsDetectionSimpleResponse(BaseModel):
@@ -79,11 +68,13 @@ class SecretsDetectionSimpleResponse(BaseModel):
         contains_secrets (bool): Indicates if secrets were detected.
         explanation (str): Explanation of the detection result.
         redacted_text (Optional[str]): The redacted text if secrets were found.
     """
     contains_secrets: bool
     explanation: str
     redacted_text: Optional[str] = None
     @property
     def safe(self) -> bool:
@@ -104,54 +95,329 @@ class SecretsDetectionResponse(SecretsDetectionSimpleResponse):
         detected_secrets (dict[str, list[str]]): Dictionary of detected secrets.
     """
-    detected_secrets: dict[str, list[str]]
 class SecretsDetectionGuardrail(Guardrail):
     """
-    A guardrail for detecting secrets in text using regex patterns.
-    reference: SecretBench: A Dataset of Software Secrets
-    https://arxiv.org/abs/2303.06729
     Attributes:
-        regex_model (RegexModel): The regex model used for detection.
-        patterns (Union[dict[str, str], dict[str, list[str]]]): The patterns used for detection.
-        redaction (REDACTION): The type of redaction to apply.
     """
-    regex_model: RegexModel
-    patterns: Union[dict[str, str], dict[str, list[str]]] = {}
     redaction: REDACTION
     def __init__(
         self,
-        use_defaults: bool = True,
         redaction: REDACTION = REDACTION.REDACT_ALL,
         **kwargs,
     ):
         """
-        Initialize the SecretsDetectionGuardrail.
         Args:
-            use_defaults (bool): Whether to use default patterns.
-            redaction (REDACTION): The type of redaction to apply.
             **kwargs: Additional keyword arguments.
         """
-        patterns = {}
-        if use_defaults:
-            patterns = DEFAULT_SECRETS_PATTERNS.copy()
-        if kwargs.get("patterns"):
-            patterns.update(kwargs["patterns"])
-        regex_model = RegexModel(patterns=patterns)
         super().__init__(
-            regex_model=regex_model,
-            patterns=patterns,
             redaction=redaction,
         )
-    @weave.op()
     def guard(
         self,
         prompt: str,
@@ -159,40 +425,38 @@ class SecretsDetectionGuardrail(Guardrail):
         **kwargs,
     ) -> SecretsDetectionResponse | SecretsDetectionResponse:
         """
-        Check if the input prompt contains any secrets based on the regex patterns.
         Args:
-            prompt (str): Input text to check for secrets.
-            return_detected_secrets (bool): If True, returns detailed secrets type information.
         Returns:
-            SecretsDetectionResponse or SecretsDetectionResponse: Detection results.
         """
-        result = self.regex_model.check(prompt)
         explanation_parts = []
-        if result.matched_patterns:
             explanation_parts.append("Found the following secrets in the text:")
-            for secret_type, matches in result.matched_patterns.items():
                 explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
         else:
             explanation_parts.append("No secrets detected in the text.")
-        redacted_text = prompt
-        if result.matched_patterns:
-            for secret_type, matches in result.matched_patterns.items():
-                redacted_text = redact(redacted_text, matches, self.redaction)
         if return_detected_secrets:
             return SecretsDetectionResponse(
-                contains_secrets=not result.passed,
-                detected_secrets=result.matched_patterns,
                 explanation="\n".join(explanation_parts),
-                redacted_text=redacted_text,
             )
         else:
             return SecretsDetectionSimpleResponse(
-                contains_secrets=not result.passed,
                 explanation="\n".join(explanation_parts),
-                redacted_text=redacted_text,
             )

 import hashlib
 import json
+import os
 import pathlib
+import tempfile
 from enum import Enum
+from typing import Optional, Any
 import weave
+from pydantic import BaseModel, PrivateAttr
 from guardrails_genie.guardrails.base import Guardrail
+try:
+    from detect_secrets import SecretsCollection
+    from detect_secrets.settings import default_settings
+    import hyperscan
+except ImportError:
+    raise ImportError(
+        "The `detect-secrets` and the `hyperscan` packages are required for using the SecretsGuardrail. "
+        "Please install then by running `pip install detect-secrets hyperscan`."
+    )
 class REDACTION(str, Enum):
     """
+    Enum for different types of redaction modes.
     """
     REDACT_PARTIAL = "REDACT_PARTIAL"
     REDACT_NONE = "REDACT_NONE"
+def redact_value(value: str, mode: str) -> str:
     """
+    Redacts the given value based on the specified redaction mode.
     Args:
+        value (str): The string value to be redacted.
+        mode (str): The redaction mode to be applied. It can be one of the following:
+            - REDACTION.REDACT_PARTIAL: Partially redacts the value.
+            - REDACTION.REDACT_ALL: Fully redacts the value.
+            - REDACTION.REDACT_HASH: Redacts the value by hashing it.
+            - REDACTION.REDACT_NONE: No redaction is applied.
     Returns:
+        str: The redacted value based on the specified mode.
+    """
+    replacement = value
+    if mode == REDACTION.REDACT_PARTIAL:
+        replacement = "[REDACTED:]" + value[:2] + ".." + value[-2:] + "[:REDACTED]"
+    elif mode == REDACTION.REDACT_ALL:
+        replacement = "[REDACTED:]" + ("*" * len(value)) + "[:REDACTED]"
+    elif mode == REDACTION.REDACT_HASH:
+        replacement = (
+            "[REDACTED:]" + hashlib.md5(value.encode()).hexdigest() + "[:REDACTED]"
+        )
+    return replacement
 class SecretsDetectionSimpleResponse(BaseModel):
         contains_secrets (bool): Indicates if secrets were detected.
         explanation (str): Explanation of the detection result.
         redacted_text (Optional[str]): The redacted text if secrets were found.
+        risk_score (float): The risk score of the detection result. (0.0, 0.5, 1.0)
     """
     contains_secrets: bool
     explanation: str
     redacted_text: Optional[str] = None
+    risk_score: float = 0.0
     @property
     def safe(self) -> bool:
         detected_secrets (dict[str, list[str]]): Dictionary of detected secrets.
     """
+    detected_secrets: dict[str, Any] | None = None
+class SecretsInfo(BaseModel):
+    """
+    Model representing information about a detected secret.
+    Attributes:
+        secret (str): The detected secret value.
+        line_number (int): The line number where the secret was found.
+    """
+    secret: str
+    line_number: int
+class ScanResult(BaseModel):
+    """
+    Model representing the result of a secrets scan.
+    Attributes:
+        detected_secrets (dict[str, Any] | None): Dictionary of detected secrets, or None if no secrets were found.
+        modified_prompt (str): The modified prompt with secrets redacted.
+        has_secret (bool): Indicates if any secrets were detected.
+        risk_score (float): The risk score of the detection result.
+    """
+    detected_secrets: dict[str, Any] | None = None
+    modified_prompt: str
+    has_secret: bool
+    risk_score: float
+class DetectSecretsModel(weave.Model):
+    """
+    Model for detecting secrets using the detect-secrets library.
+    """
+    @staticmethod
+    def scan(text: str) -> dict[str, list[SecretsInfo]]:
+        """
+        Scans the given text for secrets using the detect-secrets library.
+        Args:
+            text (str): The text to scan for secrets.
+        Returns:
+            dict[str, list[SecretsInfo]]: A dictionary where the keys are secret types and the values are lists of SecretsInfo objects.
+        """
+        secrets = SecretsCollection()
+        temp_file = tempfile.NamedTemporaryFile(delete=False)
+        temp_file.write(text.encode("utf-8"))
+        temp_file.close()
+        with default_settings():
+            secrets.scan_file(str(temp_file.name))
+        unique_secrets = {}
+        for file in secrets.files:
+            for found_secret in secrets[file]:
+                if found_secret.secret_value is None:
+                    continue
+                secret_type = found_secret.type
+                actual_secret = found_secret.secret_value
+                line_number = found_secret.line_number
+                if secret_type not in unique_secrets:
+                    unique_secrets[secret_type] = []
+                unique_secrets[secret_type].append(
+                    SecretsInfo(secret=actual_secret, line_number=line_number)
+                )
+        os.remove(temp_file.name)
+        return unique_secrets
+    @weave.op
+    def invoke(self, text: str) -> dict[str, list[SecretsInfo]]:
+        """
+        Invokes the scan method to detect secrets in the given text.
+        Args:
+            text (str): The text to scan for secrets.
+        Returns:
+            dict[str, list[SecretsInfo]]: A dictionary where the keys are secret types and the values are lists of SecretsInfo objects.
+        """
+        return self.scan(text)
+class HyperScanModel(weave.Model):
+    """
+    Model for detecting secrets using the Hyperscan library.
+    We use the Hyperscan library to scan for secrets using regex patterns.
+    The patterns are mined from https://github.com/mazen160/secrets-patterns-db
+    This model is used in conjunction with the DetectSecretsModel to improve the detection of secrets.
+    """
+    _db: Any = PrivateAttr()
+    _pattern_map: dict[str, str] = PrivateAttr()
+    only_high_confidence: bool = False
+    ids: list[str] = []
+    def _load_patterns(self) -> dict[str, str]:
+        """
+        Loads the patterns from a JSONL file.
+        Returns:
+            dict[str, str]: A dictionary where the keys are pattern names and the values are regex patterns.
+        """
+        patterns = (
+            pathlib.Path(__file__).parent.resolve() / "secrets_patterns.jsonl"
+        ).open()
+        patterns_list = [json.loads(line) for line in patterns]
+        if self.only_high_confidence:
+            patterns_list = [
+                pattern for pattern in patterns_list if pattern["confidence"] == "high"
+            ]
+        return {pattern["name"]: pattern["regex"] for pattern in patterns_list}
+    def __init__(self, **kwargs: Any):
+        """
+        Initializes the HyperScanModel instance.
+        """
+        super().__init__(**kwargs)
+    def model_post_init(self, __context: Any) -> None:
+        """
+        Post-initialization method to load patterns and compile the Hyperscan database.
+        """
+        self._pattern_map = self._load_patterns()
+        self.ids = list(self._pattern_map.keys())
+        expressions = [pattern.encode() for pattern in self._pattern_map.values()]
+        self._db = hyperscan.Database()
+        self._db.compile(expressions=expressions, ids=list(range(len(expressions))))
+    def scan(self, text: str) -> dict[str, list[SecretsInfo]]:
+        """
+        Scans the given text for secrets using the Hyperscan library.
+        Args:
+            text (str): The text to scan for secrets.
+        Returns:
+            dict[str, list[SecretsInfo]]: A dictionary where the keys are secret types and the values are lists of SecretsInfo objects.
+        """
+        unique_secrets = {}
+        def on_match(idx, start, end, flags, context):
+            """
+            Callback function for handling matches found by Hyperscan.
+            Args:
+                idx: The index of the matched pattern.
+                start: The start position of the match.
+                end: The end position of the match.
+                flags: The flags associated with the match.
+                context: The context provided to the scan method.
+            """
+            secret = context["text"][start:end]
+            line_number = context["line_number"]
+            current_match = unique_secrets.setdefault(self.ids[idx], [])
+            if not current_match or len(secret) > len(current_match[0].secret):
+                unique_secrets[self.ids[idx]] = [
+                    SecretsInfo(line_number=line_number, secret=secret)
+                ]
+        for line_no, line in enumerate(text.splitlines(), start=1):
+            self._db.scan(
+                line.encode(),
+                match_event_handler=on_match,
+                context={"text": line, "line_number": line_no},
+            )
+        return unique_secrets
+    @weave.op
+    def invoke(self, text: str) -> dict[str, list[SecretsInfo]]:
+        """
+        Invokes the scan method to detect secrets in the given text.
+        Args:
+            text (str): The text to scan for secrets.
+        Returns:
+            dict[str, list[SecretsInfo]]: A dictionary where the keys are secret types and the values are lists of SecretsInfo objects.
+        """
+        return self.scan(text)
 class SecretsDetectionGuardrail(Guardrail):
     """
+    Guardrail class for secrets detection using both detect-secrets and Hyperscan models.
     Attributes:
+        redaction (REDACTION): The redaction mode to be applied.
+        _detect_secrets_model (Any): Instance of the DetectSecretsModel.
+        _hyperscan_model (Any): Instance of the HyperScanModel.
     """
     redaction: REDACTION
+    _detect_secrets_model: Any = PrivateAttr()
+    _hyperscan_model: Any = PrivateAttr()
+    def model_post_init(self, __context: Any) -> None:
+        """
+        Post-initialization method to initialize the detect-secrets and Hyperscan models.
+        """
+        self._detect_secrets_model = DetectSecretsModel()
+        self._hyperscan_model = HyperScanModel()
     def __init__(
         self,
         redaction: REDACTION = REDACTION.REDACT_ALL,
         **kwargs,
     ):
         """
+        Initializes the SecretsDetectionGuardrail instance.
         Args:
+            redaction (REDACTION): The redaction mode to be applied. Defaults to REDACTION.REDACT_ALL.
             **kwargs: Additional keyword arguments.
         """
         super().__init__(
             redaction=redaction,
         )
+    def get_modified_value(
+        self, unique_secrets: dict[str, Any], lines: list[str]
+    ) -> str:
+        """
+        Redacts the detected secrets in the given lines of text.
+        Args:
+            unique_secrets (dict[str, Any]): Dictionary of detected secrets.
+            lines (list[str]): List of lines of text.
+        Returns:
+            str: The modified text with secrets redacted.
+        """
+        for _, secrets_list in unique_secrets.items():
+            for secret_info in secrets_list:
+                secret = secret_info.secret
+                line_number = secret_info.line_number
+                lines[line_number - 1] = lines[line_number - 1].replace(
+                    secret, redact_value(secret, self.redaction)
+                )
+        modified_value = "\n".join(lines)
+        return modified_value
+    def get_scan_result(
+        self, unique_secrets: dict[str, list[SecretsInfo]], lines: list[str]
+    ) -> ScanResult | None:
+        """
+        Generates a ScanResult based on the detected secrets.
+        Args:
+            unique_secrets (dict[str, list[SecretsInfo]]): Dictionary of detected secrets.
+            lines (list[str]): List of lines of text.
+        Returns:
+            ScanResult | None: The scan result if secrets are detected, otherwise None.
+        """
+        if unique_secrets:
+            modified_value = self.get_modified_value(unique_secrets, lines)
+            detected_secrets = {
+                k: [i.secret for i in v] for k, v in unique_secrets.items()
+            }
+            return ScanResult(
+                **{
+                    "detected_secrets": detected_secrets,
+                    "modified_prompt": modified_value,
+                    "has_secret": True,
+                    "risk_score": 1.0,
+                }
+            )
+        return None
+    def scan(self, prompt: str) -> ScanResult:
+        """
+        Scans the given prompt for secrets using both detect-secrets and Hyperscan models.
+        Args:
+            prompt (str): The text to scan for secrets.
+        Returns:
+            ScanResult: The scan result with detected secrets and redacted text.
+        """
+        if prompt.strip() == "":
+            return ScanResult(
+                **{
+                    "detected_secrets": None,
+                    "modified_prompt": prompt,
+                    "has_secret": False,
+                    "risk_score": 0.0,
+                }
+            )
+        unique_secrets = self._detect_secrets_model.invoke(text=prompt)
+        results = self.get_scan_result(unique_secrets, prompt.splitlines())
+        if results:
+            return results
+        unique_secrets = self._hyperscan_model.invoke(text=prompt)
+        results = self.get_scan_result(unique_secrets, prompt.splitlines())
+        if results:
+            results.risk_score = 0.5
+            return results
+        return ScanResult(
+            **{
+                "detected_secrets": None,
+                "modified_prompt": prompt,
+                "has_secret": False,
+                "risk_score": 0.0,
+            }
+        )
+    @weave.op
     def guard(
         self,
         prompt: str,
         **kwargs,
     ) -> SecretsDetectionResponse | SecretsDetectionResponse:
         """
+        Guards the given prompt by scanning for secrets and optionally returning detected secrets.
         Args:
+            prompt (str): The text to scan for secrets.
+            return_detected_secrets (bool): Whether to return detected secrets in the response. Defaults to True.
+            **kwargs: Additional keyword arguments.
         Returns:
+            SecretsDetectionResponse | SecretsDetectionSimpleResponse: The response with scan results and redacted text.
         """
+        results = self.scan(prompt)
         explanation_parts = []
+        if results.has_secret:
             explanation_parts.append("Found the following secrets in the text:")
+            for secret_type, matches in results.detected_secrets.items():
                 explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
         else:
             explanation_parts.append("No secrets detected in the text.")
         if return_detected_secrets:
             return SecretsDetectionResponse(
+                contains_secrets=results.has_secret,
+                detected_secrets=results.detected_secrets,
                 explanation="\n".join(explanation_parts),
+                redacted_text=results.modified_prompt,
+                risk_score=results.risk_score,
             )
         else:
             return SecretsDetectionSimpleResponse(
+                contains_secrets=not results.has_secret,
                 explanation="\n".join(explanation_parts),
+                redacted_text=results.modified_prompt,
+                risk_score=results.risk_score,
             )

guardrails_genie/guardrails/secrets_detection/secrets_patterns.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -25,6 +25,13 @@ presidio = [
     "presidio-analyzer>=2.2.355",
     "presidio-anonymizer>=2.2.355",
 ]
 dev = [
     "isort>=5.13.2",
     "black>=24.10.0",

     "presidio-analyzer>=2.2.355",
     "presidio-anonymizer>=2.2.355",
 ]
+secrets = [
+    "gibberish-detector>=0.1.1",
+    "detect-secrets>=1.5.0",
+    "hyperscan>=0.7.8"
+]
 dev = [
     "isort>=5.13.2",
     "black>=24.10.0",

tests/guardrails_genie/guardrails/test_secrets_detection.py CHANGED Viewed

@@ -2,16 +2,14 @@ import hashlib
 import re
 import pytest
-from hypothesis import given, settings
-from hypothesis import strategies as st
 from guardrails_genie.guardrails.secrets_detection import (
-    DEFAULT_SECRETS_PATTERNS,
-    REDACTION,
-    SecretsDetectionGuardrail,
-    SecretsDetectionResponse,
     SecretsDetectionSimpleResponse,
-    redact,
 )
@@ -19,7 +17,7 @@ from guardrails_genie.guardrails.secrets_detection import (
 def mock_secrets_guard(monkeypatch):
     def _mock_guard(*args, **kwargs):
         prompt = kwargs.get("prompt")
-        return_detected_types = kwargs.get("return_detected_types")
         if "safe text" in prompt:
             if return_detected_types:
@@ -28,12 +26,14 @@ def mock_secrets_guard(monkeypatch):
                     explanation="No secrets detected in the text.",
                     detected_secrets={},
                     redacted_text=prompt,
                 )
             else:
                 return SecretsDetectionSimpleResponse(
                     contains_secrets=False,
                     explanation="No secrets detected in the text.",
                     redacted_text=prompt,
                 )
         else:
             if return_detected_types:
@@ -42,12 +42,14 @@ def mock_secrets_guard(monkeypatch):
                     explanation="The output contains secrets.",
                     detected_secrets={"secrets": ["API_KEY"]},
                     redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
                 )
             else:
                 return SecretsDetectionSimpleResponse(
                     contains_secrets=True,
                     explanation="The output contains secrets.",
                     redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
                 )
     monkeypatch.setattr(
@@ -57,38 +59,28 @@ def mock_secrets_guard(monkeypatch):
 def test_redact_partial():
-    text = "My secret key is ABCDEFGHIJKL"
-    matches = ["ABCDEFGHIJKL"]
-    redacted_text = redact(text, matches, REDACTION.REDACT_PARTIAL)
-    assert redacted_text == "My secret key is [REDACTED:]AB..KL[:REDACTED]"
 def test_redact_all():
-    text = "My secret key is ABCDEFGHIJKL"
-    matches = ["ABCDEFGHIJKL"]
-    redacted_text = redact(text, matches, REDACTION.REDACT_ALL)
-    assert redacted_text == "My secret key is [REDACTED:]************[:REDACTED]"
 def test_redact_hash():
-    text = "My secret key is ABCDEFGHIJKL"
-    matches = ["ABCDEFGHIJKL"]
-    hashed_value = hashlib.md5("ABCDEFGHIJKL".encode()).hexdigest()
-    redacted_text = redact(text, matches, REDACTION.REDACT_HASH)
-    assert redacted_text == f"My secret key is [REDACTED:]{hashed_value}[:REDACTED]"
-def test_redact_no_match():
-    text = "My secret key is ABCDEFGHIJKL"
-    matches = ["XYZ"]
-    redacted_text = redact(text, matches, REDACTION.REDACT_ALL)
-    assert redacted_text == text
 def test_secrets_detection_guardrail_detect_types(mock_secrets_guard):
     from guardrails_genie.guardrails.secrets_detection import (
-        REDACTION,
         SecretsDetectionGuardrail,
     )
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
@@ -104,8 +96,8 @@ def test_secrets_detection_guardrail_detect_types(mock_secrets_guard):
 def test_secrets_detection_guardrail_simple_response(mock_secrets_guard):
     from guardrails_genie.guardrails.secrets_detection import (
-        REDACTION,
         SecretsDetectionGuardrail,
     )
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
@@ -120,8 +112,8 @@ def test_secrets_detection_guardrail_simple_response(mock_secrets_guard):
 def test_secrets_detection_guardrail_no_secrets(mock_secrets_guard):
     from guardrails_genie.guardrails.secrets_detection import (
-        REDACTION,
         SecretsDetectionGuardrail,
     )
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
@@ -135,16 +127,15 @@ def test_secrets_detection_guardrail_no_secrets(mock_secrets_guard):
     assert result.redacted_text == prompt
-# Create a strategy to generate strings that match the patterns
 def pattern_strategy(pattern):
     return st.from_regex(re.compile(pattern), fullmatch=True)
-@settings(deadline=1000)  # Set the deadline to 1000 milliseconds (1 second)
-@given(pattern_strategy(DEFAULT_SECRETS_PATTERNS["JwtToken"][0]))
 def test_specific_pattern_guardrail(text):
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
     result = guardrail.guard(prompt=text, return_detected_secrets=True)
     assert result.contains_secrets is True
-    assert "JwtToken" in result.detected_secrets

 import re
 import pytest
+from hypothesis import strategies as st, given, settings
+from guardrails_genie.guardrails import SecretsDetectionGuardrail
 from guardrails_genie.guardrails.secrets_detection import (
     SecretsDetectionSimpleResponse,
+    SecretsDetectionResponse,
+    REDACTION,
+    redact_value,
 )
 def mock_secrets_guard(monkeypatch):
     def _mock_guard(*args, **kwargs):
         prompt = kwargs.get("prompt")
+        return_detected_types = kwargs.get("return_detected_secrets")
         if "safe text" in prompt:
             if return_detected_types:
                     explanation="No secrets detected in the text.",
                     detected_secrets={},
                     redacted_text=prompt,
+                    risk_score=0.0,
                 )
             else:
                 return SecretsDetectionSimpleResponse(
                     contains_secrets=False,
                     explanation="No secrets detected in the text.",
                     redacted_text=prompt,
+                    risk_score=0.0,
                 )
         else:
             if return_detected_types:
                     explanation="The output contains secrets.",
                     detected_secrets={"secrets": ["API_KEY"]},
                     redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
+                    risk_score=1.0,
                 )
             else:
                 return SecretsDetectionSimpleResponse(
                     contains_secrets=True,
                     explanation="The output contains secrets.",
                     redacted_text="My secret key is [REDACTED:]************[:REDACTED]",
+                    risk_score=1.0,
                 )
     monkeypatch.setattr(
 def test_redact_partial():
+    text = "ABCDEFGHIJKL"
+    redacted_text = redact_value(text, REDACTION.REDACT_PARTIAL)
+    assert redacted_text == "[REDACTED:]AB..KL[:REDACTED]"
 def test_redact_all():
+    text = "ABCDEFGHIJKL"
+    redacted_text = redact_value(text, REDACTION.REDACT_ALL)
+    assert redacted_text == "[REDACTED:]************[:REDACTED]"
 def test_redact_hash():
+    text = "ABCDEFGHIJKL"
+    hashed_value = hashlib.md5(text.encode()).hexdigest()
+    redacted_text = redact_value(text, REDACTION.REDACT_HASH)
+    assert redacted_text == f"[REDACTED:]{hashed_value}[:REDACTED]"
 def test_secrets_detection_guardrail_detect_types(mock_secrets_guard):
     from guardrails_genie.guardrails.secrets_detection import (
         SecretsDetectionGuardrail,
+        REDACTION,
     )
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
 def test_secrets_detection_guardrail_simple_response(mock_secrets_guard):
     from guardrails_genie.guardrails.secrets_detection import (
         SecretsDetectionGuardrail,
+        REDACTION,
     )
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
 def test_secrets_detection_guardrail_no_secrets(mock_secrets_guard):
     from guardrails_genie.guardrails.secrets_detection import (
         SecretsDetectionGuardrail,
+        REDACTION,
     )
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
     assert result.redacted_text == prompt
 def pattern_strategy(pattern):
     return st.from_regex(re.compile(pattern), fullmatch=True)
+@settings(deadline=1000)
+@given(pattern_strategy(r"AKIA[0-9A-Z]{16}"))
 def test_specific_pattern_guardrail(text):
     guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
     result = guardrail.guard(prompt=text, return_detected_secrets=True)
     assert result.contains_secrets is True
+    assert "AWS Access Key" in result.detected_secrets