Spaces:
Runtime error
Runtime error
Commit
·
b077b7d
1
Parent(s):
a1c5338
add: guardrails manager
Browse files
guardrails_genie/guardrails/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
from .injection import SurveyGuardrail
|
|
|
|
| 2 |
|
| 3 |
-
__all__ = ["SurveyGuardrail"]
|
|
|
|
| 1 |
from .injection import SurveyGuardrail
|
| 2 |
+
from .manager import GuardrailManager
|
| 3 |
|
| 4 |
+
__all__ = ["SurveyGuardrail", "GuardrailManager"]
|
guardrails_genie/guardrails/base.py
CHANGED
|
@@ -11,7 +11,3 @@ class Guardrail(weave.Model):
|
|
| 11 |
@weave.op()
|
| 12 |
def guard(self, prompt: str, **kwargs) -> list[str]:
|
| 13 |
pass
|
| 14 |
-
|
| 15 |
-
@weave.op()
|
| 16 |
-
def predict(self, prompt: str, **kwargs) -> list[str]:
|
| 17 |
-
return self.guard(prompt, **kwargs)
|
|
|
|
| 11 |
@weave.op()
|
| 12 |
def guard(self, prompt: str, **kwargs) -> list[str]:
|
| 13 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
guardrails_genie/guardrails/injection/survey_guardrail.py
CHANGED
|
@@ -17,7 +17,7 @@ class SurveyGuardrailResponse(BaseModel):
|
|
| 17 |
|
| 18 |
class SurveyGuardrail(Guardrail):
|
| 19 |
llm_model: OpenAIModel
|
| 20 |
-
|
| 21 |
@weave.op()
|
| 22 |
def load_prompt_injection_survey(self) -> str:
|
| 23 |
prompt_injection_survey_path = os.path.join(
|
|
@@ -61,7 +61,7 @@ Here are some strict instructions that you must follow:
|
|
| 61 |
return user_prompt, system_prompt
|
| 62 |
|
| 63 |
@weave.op()
|
| 64 |
-
def
|
| 65 |
user_prompt, system_prompt = self.format_prompts(prompt)
|
| 66 |
chat_completion = self.llm_model.predict(
|
| 67 |
user_prompts=user_prompt,
|
|
@@ -70,3 +70,8 @@ Here are some strict instructions that you must follow:
|
|
| 70 |
**kwargs,
|
| 71 |
)
|
| 72 |
return chat_completion.choices[0].message.parsed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
class SurveyGuardrail(Guardrail):
|
| 19 |
llm_model: OpenAIModel
|
| 20 |
+
|
| 21 |
@weave.op()
|
| 22 |
def load_prompt_injection_survey(self) -> str:
|
| 23 |
prompt_injection_survey_path = os.path.join(
|
|
|
|
| 61 |
return user_prompt, system_prompt
|
| 62 |
|
| 63 |
@weave.op()
|
| 64 |
+
def predict(self, prompt: str, **kwargs) -> list[str]:
|
| 65 |
user_prompt, system_prompt = self.format_prompts(prompt)
|
| 66 |
chat_completion = self.llm_model.predict(
|
| 67 |
user_prompts=user_prompt,
|
|
|
|
| 70 |
**kwargs,
|
| 71 |
)
|
| 72 |
return chat_completion.choices[0].message.parsed
|
| 73 |
+
|
| 74 |
+
@weave.op()
|
| 75 |
+
def guard(self, prompt: str, **kwargs) -> list[str]:
|
| 76 |
+
response = self.predict(prompt, **kwargs)
|
| 77 |
+
return {"verdict": response.injection_prompt}
|
guardrails_genie/guardrails/manager.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import weave
|
| 2 |
+
from weave.flow.obj import Object as WeaveObject
|
| 3 |
+
|
| 4 |
+
from .base import Guardrail
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class GuardrailManager(WeaveObject):
|
| 8 |
+
guardrails: list[Guardrail]
|
| 9 |
+
|
| 10 |
+
@weave.op()
|
| 11 |
+
def guard(self, prompt: str, **kwargs) -> dict:
|
| 12 |
+
alerts = []
|
| 13 |
+
for guardrail in self.guardrails:
|
| 14 |
+
response = guardrail.guard(prompt, **kwargs)
|
| 15 |
+
alerts.append({guardrail.name: response})
|
| 16 |
+
return alerts
|