Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,002 changes: 2,002 additions & 0 deletions pyrit/datasets/jailbreak/many_shot_examples.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyrit/datasets/jailbreak/text_jailbreak.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
self.template = SeedPrompt.from_yaml_file(template_path)
self.template_source = str(template_path)
elif string_template:
self.template = SeedPrompt(value=string_template, is_general_technique=True)
self.template = SeedPrompt(value=string_template, is_general_technique=True, jinja_template=True)
self.template_source = "<string_template>"
elif template_file_name:
resolved_path = self._resolve_template_by_name(template_file_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:

# Escape Jinja2 template syntax by wrapping the entire prompt in raw tags
# This tells Jinja2 to treat everything inside as literal text
prompt_value = f"{{% raw %}}{prompt_value}{{% endraw %}}"
prompt_value = prompt_value

seed_prompts.append(
SeedPrompt(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
try:
seed_prompts.append(
SeedPrompt(
value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
value=item["prompt"],
data_type="text",
dataset_name=self.dataset_name,
harm_categories=harm_categories,
Expand Down
2 changes: 1 addition & 1 deletion pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:

seed_prompts = [
SeedPrompt(
value=f"{{% raw %}}{item['question']}{{% endraw %}}",
value=item["question"],
data_type="text",
dataset_name=self.dataset_name,
harm_categories=[item["topic"]] if item.get("topic") else [],
Expand Down
2 changes: 1 addition & 1 deletion pyrit/datasets/seed_datasets/remote/or_bench_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:

seed_prompts = [
SeedPrompt(
value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
value=item["prompt"],
data_type="text",
dataset_name=self.dataset_name,
harm_categories=[item["category"]] if item.get("category") else [],
Expand Down
2 changes: 1 addition & 1 deletion pyrit/datasets/seed_datasets/remote/promptintel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def _convert_record_to_seed_prompt(self, record: dict[str, Any]) -> Optional[See
metadata = self._build_metadata(record)

# Escape Jinja2 template syntax in the prompt text
escaped_prompt = f"{{% raw %}}{prompt_value}{{% endraw %}}"
escaped_prompt = prompt_value

return SeedPrompt(
value=escaped_prompt,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
# Clean up single turn prompts that contain unwanted lines of text
cleaned_value = prompt_value.replace("### Response:", "").replace("### Instruction:", "").strip()
# some entries have contents that trip up jinja2, so we escape them
escaped_cleaned_value = f"{{% raw %}}{cleaned_value}{{% endraw %}}"
escaped_cleaned_value = cleaned_value
seed_prompts.append(
SeedPrompt(
value=escaped_cleaned_value,
Expand Down
17 changes: 17 additions & 0 deletions pyrit/datasets/seed_datasets/remote/remote_dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@

logger = logging.getLogger(__name__)


def escape_jinja_template_syntax(value: str) -> str:
"""
Wrap a string in Jinja2 {% raw %}...{% endraw %} tags to prevent template evaluation.

Use this for any untrusted or externally-fetched text that will be stored as a
SeedPrompt value, to ensure it is treated as literal text by the Jinja2 renderer.

Args:
value: The raw string to escape.

Returns:
str: The string wrapped in {% raw %}...{% endraw %} tags.
"""
return f"{{% raw %}}{value}{{% endraw %}}"


# Define the type for the file handlers
FileHandlerRead = Callable[[TextIO], list[dict[str, str]]]
FileHandlerWrite = Callable[[TextIO, list[dict[str, str]]], None]
Expand Down
2 changes: 1 addition & 1 deletion pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:

seed_prompts = [
SeedPrompt(
value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
value=item["prompt"],
data_type="text",
dataset_name=self.dataset_name,
harm_categories=[self._parse_category(c) for c in item["categories"]],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:

seed_prompts = [
SeedPrompt(
value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
value=item["prompt"],
data_type="text",
dataset_name=self.dataset_name,
harm_categories=[item["harm_area"]] if item.get("harm_area") else [],
Expand Down
2 changes: 1 addition & 1 deletion pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
harm_categories = self._extract_harm_categories(item)
try:
prompt = SeedPrompt(
value=f"{{% raw %}}{user_input}{{% endraw %}}",
value=user_input,
data_type="text",
dataset_name=self.dataset_name,
description=description,
Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/attack/multi_turn/red_teaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def _set_adversarial_chat_seed_prompt(self, *, seed_prompt: Union[str, SeedPromp
ValueError: If the seed prompt is not a string or SeedPrompt object.
"""
if isinstance(seed_prompt, str):
self._adversarial_chat_seed_prompt = SeedPrompt(value=seed_prompt, data_type="text")
self._adversarial_chat_seed_prompt = SeedPrompt(value=seed_prompt, data_type="text", jinja_template=True)
elif isinstance(seed_prompt, SeedPrompt):
self._adversarial_chat_seed_prompt = seed_prompt
else:
Expand Down
22 changes: 11 additions & 11 deletions pyrit/executor/attack/single_turn/many_shot_jailbreak.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import json
import logging
from typing import Any, Optional, cast

import requests
from typing import Any, Optional

from pyrit.common.apply_defaults import REQUIRED_VALUE, apply_defaults
from pyrit.common.path import JAILBREAK_TEMPLATES_PATH
from pyrit.common.path import DATASETS_PATH, JAILBREAK_TEMPLATES_PATH
from pyrit.executor.attack.core.attack_config import AttackConverterConfig, AttackScoringConfig
from pyrit.executor.attack.core.attack_parameters import AttackParameters
from pyrit.executor.attack.single_turn.prompt_sending import PromptSendingAttack
Expand All @@ -22,18 +21,19 @@
# as it constructs its own prompt format with examples.
ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message")

_MANY_SHOT_EXAMPLES_PATH = DATASETS_PATH / "jailbreak" / "many_shot_examples.json"


def fetch_many_shot_jailbreaking_dataset() -> list[dict[str, str]]:
def load_many_shot_jailbreaking_dataset() -> list[dict[str, str]]:
"""
Fetch many-shot jailbreaking dataset from a specified source.
Load the bundled many-shot jailbreaking examples from the local dataset file.

Returns:
list[dict[str, str]]: A list of many-shot jailbreaking examples.
"""
source = "https://raw.githubusercontent.com/KutalVolkan/many-shot-jailbreaking-dataset/5eac855/examples.json"
response = requests.get(source)
response.raise_for_status()
return cast("list[dict[str, str]]", response.json())
with open(_MANY_SHOT_EXAMPLES_PATH, encoding="utf-8") as f:
data: list[dict[str, str]] = json.load(f)
return data


class ManyShotJailbreakAttack(PromptSendingAttack):
Expand Down Expand Up @@ -87,7 +87,7 @@ def __init__(
self._examples = (
many_shot_examples[:example_count]
if (many_shot_examples is not None)
else fetch_many_shot_jailbreaking_dataset()[:example_count]
else load_many_shot_jailbreaking_dataset()[:example_count]
)
if not self._examples:
raise ValueError("Many shot examples must be provided.")
Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/promptgen/fuzzer/fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ async def _execute_generation_iteration_async(self, context: FuzzerContext) -> N
raise

# Create template node for tracking
target_template = SeedPrompt(value=target_seed, data_type="text", parameters=["prompt"])
target_template = SeedPrompt(value=target_seed, data_type="text", parameters=["prompt"], jinja_template=True)
target_template_node = _PromptNode(template=target_seed, parent=None)

# Generate prompts from template
Expand Down
55 changes: 52 additions & 3 deletions pyrit/models/seeds/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union

from jinja2 import Environment, StrictUndefined, Template, Undefined
import yaml
from jinja2 import StrictUndefined, Undefined
from jinja2.sandbox import SandboxedEnvironment

from pyrit.common.utils import verify_and_resolve_path
from pyrit.common.yaml_loadable import YamlLoadable

if TYPE_CHECKING:
Expand Down Expand Up @@ -130,6 +133,11 @@ class Seed(YamlLoadable):
# Whether this seed represents a general attack technique (not tied to a specific objective)
is_general_technique: bool = False

# When True, value contains Jinja2 template syntax that should be rendered as-is.
# When False (default), value is treated as literal text and auto-escaped with {% raw %} tags
# to prevent template injection. Trusted sources (YAML files) set this to True automatically.
jinja_template: bool = False

@property
def data_type(self) -> PromptDataType:
"""
Expand Down Expand Up @@ -157,7 +165,8 @@ def render_template_value(self, **kwargs: Any) -> str:
template_identifier = self.name or "<unnamed template>"

try:
jinja_template = Template(self.value, undefined=StrictUndefined)
env = SandboxedEnvironment(undefined=StrictUndefined)
jinja_template = env.from_string(self.value)
return jinja_template.render(**kwargs)
except Exception as e:
raise ValueError(
Expand Down Expand Up @@ -194,7 +203,7 @@ def render_template_value_silent(self, **kwargs: Any) -> str:
return self.value

# Create a Jinja template with PartialUndefined placeholders
env = Environment(undefined=PartialUndefined)
env = SandboxedEnvironment(undefined=PartialUndefined)
jinja_template = env.from_string(self.value)

try:
Expand All @@ -221,6 +230,46 @@ async def set_sha256_value_async(self) -> None:

self.value_sha256 = await original_serializer.get_sha256()

@staticmethod
def escape_for_jinja(value: str) -> str:
"""
Wrap a string in Jinja2 {% raw %}...{% endraw %} tags to prevent template evaluation.

Use this for any untrusted or externally-fetched text that will be stored as a
Seed value, to ensure it is treated as literal text by the Jinja2 renderer.

Args:
value: The raw string to escape.

Returns:
str: The string wrapped in {% raw %}...{% endraw %} tags.
"""
return f"{{% raw %}}{value}{{% endraw %}}"

@classmethod
def from_yaml_file(cls: type[T], file: Union[str, Path]) -> T:
"""
Create a new Seed from a YAML file, marking it as a trusted Jinja2 template.

Args:
file: The input file path.

Returns:
A new Seed of the specific subclass type.

Raises:
ValueError: If the YAML file is invalid.
"""
file = verify_and_resolve_path(file)

try:
yaml_data = yaml.safe_load(file.read_text("utf-8"))
except yaml.YAMLError as exc:
raise ValueError(f"Invalid YAML file '{file}': {exc}") from exc

yaml_data["jinja_template"] = True
return cls(**yaml_data)

@classmethod
@abc.abstractmethod
def from_yaml_with_required_parameters(
Expand Down
32 changes: 32 additions & 0 deletions pyrit/models/seeds/seed_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Optional, Union

import yaml

from pyrit.common import utils
from pyrit.common.utils import verify_and_resolve_path
from pyrit.common.yaml_loadable import YamlLoadable
from pyrit.models.seeds.seed_attack_group import SeedAttackGroup
from pyrit.models.seeds.seed_group import SeedGroup
Expand All @@ -25,6 +28,7 @@

if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path

from pydantic.types import PositiveInt

Expand Down Expand Up @@ -55,6 +59,31 @@ class SeedDataset(YamlLoadable):
# Now the actual prompts
seeds: Sequence[Seed]

@classmethod
def from_yaml_file(cls, file: Union[str, Path]) -> SeedDataset:
"""
Create a SeedDataset from a YAML file, marking nested seeds as trusted templates.

Args:
file: The input file path.

Returns:
SeedDataset: The loaded dataset.

Raises:
ValueError: If the YAML file is invalid.
"""
file = verify_and_resolve_path(file)
try:
yaml_data = yaml.safe_load(file.read_text("utf-8"))
except yaml.YAMLError as exc:
raise ValueError(f"Invalid YAML file '{file}': {exc}") from exc

yaml_data["jinja_template"] = True
if hasattr(cls, "from_dict") and callable(getattr(cls, "from_dict")): # noqa: B009
return cls.from_dict(yaml_data)
return cls(**yaml_data)

def __init__(
self,
*,
Expand All @@ -71,6 +100,7 @@ def __init__(
added_by: Optional[str] = None,
seed_type: Optional[SeedType] = None,
is_objective: bool = False, # Deprecated in 0.13.0: Use seed_type="objective" instead
jinja_template: bool = False,
):
"""
Initialize the dataset.
Expand All @@ -93,6 +123,7 @@ def __init__(
added_by: User who added the dataset.
seed_type: The type of seeds in this dataset ("prompt", "objective", or "simulated_conversation").
is_objective: Deprecated in 0.13.0. Use seed_type="objective" instead.
jinja_template: When True, seed values are Jinja2 templates. Set by from_yaml_file.

Raises:
ValueError: If seeds are missing or contain invalid/contradictory seed definitions.
Expand Down Expand Up @@ -168,6 +199,7 @@ def __init__(
"added_by": p.get("added_by"),
"metadata": p.get("metadata", {}),
"prompt_group_id": p.get("prompt_group_id"),
"jinja_template": jinja_template,
}

if effective_type == "simulated_conversation":
Expand Down
4 changes: 4 additions & 0 deletions pyrit/models/seeds/seed_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
self,
*,
seeds: Sequence[Union[Seed, dict[str, Any]]],
jinja_template: bool = False,
):
"""
Initialize a SeedGroup.
Expand All @@ -59,6 +60,8 @@ def __init__(
- SeedSimulatedConversation (or dict with seed_type="simulated_conversation")
- SeedPrompt for prompts (or dict with seed_type="prompt" or no seed_type)
Note: is_objective and is_simulated_conversation are deprecated since 0.13.0.
jinja_template: When True, seed values are treated as Jinja2 templates.
Set automatically by from_yaml_file for trusted sources.

Raises:
ValueError: If seeds is empty.
Expand All @@ -74,6 +77,7 @@ def __init__(
if isinstance(seed, Seed):
self.seeds.append(seed)
elif isinstance(seed, dict):
seed["jinja_template"] = jinja_template
# Support new seed_type field with backward compatibility for deprecated fields
seed_type = seed.pop("seed_type", None)
is_objective = seed.pop("is_objective", False)
Expand Down
2 changes: 2 additions & 0 deletions pyrit/models/seeds/seed_objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __post_init__(self) -> None:
"""
if self.is_general_technique:
raise ValueError("SeedObjective cannot be a general technique.")
if not self.jinja_template:
self.value = self.escape_for_jinja(self.value)
self.value = super().render_template_value_silent(**PATHS_DICT)

@classmethod
Expand Down
2 changes: 2 additions & 0 deletions pyrit/models/seeds/seed_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def __post_init__(self) -> None:
ValueError: If file-based data type cannot be inferred from extension.
"""
if not self.jinja_template:
self.value = self.escape_for_jinja(self.value)
self.value = self.render_template_value_silent(**PATHS_DICT)

if not self.data_type:
Expand Down
Loading
Loading