Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 0 additions & 31 deletions docs/benchmarking/nsfw.md

This file was deleted.

20 changes: 16 additions & 4 deletions src/guardrails/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def __new__(
cls,
config: str | Path | dict[str, Any],
name: str,
instructions: str,
instructions: str | Callable[[Any, Any], Any] | None = None,
raise_guardrail_errors: bool = False,
block_on_tool_violations: bool = False,
**agent_kwargs: Any,
Expand All @@ -511,7 +511,9 @@ def __new__(
Args:
config: Pipeline configuration (file path, dict, or JSON string)
name: Agent name
instructions: Agent instructions
instructions: Agent instructions. Can be a string, a callable that dynamically
generates instructions, or None. If a callable, it will receive the context
and agent instance and must return a string.
raise_guardrail_errors: If True, raise exceptions when guardrails fail to execute.
If False (default), treat guardrail errors as safe and continue execution.
block_on_tool_violations: If True, tool guardrail violations raise exceptions (halt execution).
Expand Down Expand Up @@ -553,7 +555,11 @@ def __new__(
input_tool, input_agent = _separate_tool_level_from_agent_level(stage_guardrails.get("input", []))
output_tool, output_agent = _separate_tool_level_from_agent_level(stage_guardrails.get("output", []))

# Create agent-level INPUT guardrails
# Extract any user-provided guardrails from agent_kwargs
user_input_guardrails = agent_kwargs.pop("input_guardrails", [])
user_output_guardrails = agent_kwargs.pop("output_guardrails", [])

# Create agent-level INPUT guardrails from config
input_guardrails = []

# Add agent-level guardrails from pre_flight and input stages
Expand All @@ -573,7 +579,10 @@ def __new__(
)
)

# Create agent-level OUTPUT guardrails
# Merge with user-provided input guardrails (config ones run first, then user ones)
input_guardrails.extend(user_input_guardrails)

# Create agent-level OUTPUT guardrails from config
output_guardrails = []
if output_agent:
output_guardrails = _create_agents_guardrails_from_config(
Expand All @@ -583,6 +592,9 @@ def __new__(
raise_guardrail_errors=raise_guardrail_errors,
)

# Merge with user-provided output guardrails (config ones run first, then user ones)
output_guardrails.extend(user_output_guardrails)

# Apply tool-level guardrails
tools = agent_kwargs.get("tools", [])

Expand Down
Loading