Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

choose whether buffing will also include the original prompt #523

Merged
merged 5 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions garak/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ class GarakSubConfig:
pass


@dataclass
class BuffManager:
"""class to store instantiated buffs"""

buffs = []


@dataclass
class TransientConfig(GarakSubConfig):
"""Object to hold transient global config items not set externally"""
Expand Down Expand Up @@ -59,6 +66,7 @@ class TransientConfig(GarakSubConfig):
plugins.harnesses = {}
reporting.taxonomy = None # set here to enable report_digest to be called directly

buffmanager = BuffManager()

config_files = []

Expand Down
1 change: 1 addition & 0 deletions garak/buffs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def buff(
leave=False,
):
# create one or more untransformed new attempts
# don't include the original attempt/prompt in the buffs: https://github.com/leondz/garak/issues/373
new_attempts = []
new_attempts.append(
self._derive_new_attempt(source_attempt, source_attempt.seq)
Expand Down
4 changes: 1 addition & 3 deletions garak/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

""" Definitions of commands and actions that can be run in the garak toolkit"""

from ast import Pass
import logging
import json

Expand Down Expand Up @@ -50,8 +49,7 @@ def start_run():
os.mkdir(_config.reporting.report_dir)
except PermissionError as e:
raise PermissionError(
"Can't create logging directory %s, quitting",
_config.reporting.report_dir,
f"Can't create logging directory {_config.reporting.report_dir}, quitting"
) from e
_config.transient.report_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.report.jsonl"
else:
Expand Down
4 changes: 2 additions & 2 deletions garak/harnesses/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ def _load_buffs(self, buff_names: List) -> None:
harnesses should be explicit about how they expect to deal with buffs.
"""

_config.transient.buff_instances = []
_config.buffmanager.buffs = []
for buff_name in buff_names:
err_msg = None
try:
_config.transient.buff_instances.append(_plugins.load_plugin(buff_name))
_config.buffmanager.buffs.append(_plugins.load_plugin(buff_name))
logging.debug("loaded %s", buff_name)
except ValueError as ve:
err_msg = f"❌🦾 buff load error:❌ {ve}"
Expand Down
14 changes: 7 additions & 7 deletions garak/probes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ def _buff_hook(
self, attempts: Iterable[garak.attempt.Attempt]
) -> Iterable[garak.attempt.Attempt]:
"""this is where we do the buffing, if there's any to do"""
if (
"buff_instances" not in dir(_config.transient)
or len(_config.transient.buff_instances) == 0
):
if len(_config.buffmanager.buffs) == 0:
return attempts
buffed_attempts = []
for buff in _config.transient.buff_instances:
buffed_attempts = [] # the object we'll return
if _config.plugins.buffs_include_original_prompt:
for attempt in attempts:
buffed_attempts.append(attempt)
for buff in _config.buffmanager.buffs:
if buff.post_buff_hook:
self.post_buff_hook = True
for buffed_attempt in buff.buff(
Expand All @@ -81,7 +81,7 @@ def _buff_hook(

@staticmethod
def _postprocess_buff(attempt: garak.attempt.Attempt) -> garak.attempt.Attempt:
for buff in _config.transient.buff_instances:
for buff in _config.buffmanager.buffs:
if buff.post_buff_hook:
attempt = buff.untransform(attempt)
return attempt
Expand Down
22 changes: 22 additions & 0 deletions garak/probes/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,25 @@ class Blank(Probe):

recommended_detector = ["always.Pass"]
prompts = [""]


class Test(Probe):
"""Test probe

Poses some test strings to a model; for use in tests"""

bcp47 = "*"
active = False # usually for testing
goal = "send some test strings, ignore the response"
uri = ""
tags = []

recommended_detector = ["always.Pass"]
prompts = [
"",
"The quick brown fox jumps over the lazy dog",
"Liberté, égalité, fraternité",
"有朋自远方来,不亦乐乎?" 'ZX*fC-np~z0)#cj32{"qWh',
"t=};h+WzTq2wypk4fL/6UV",
"T9btvwOX'EV/)q2+o$kF7#",
]
14 changes: 8 additions & 6 deletions garak/resources/garak.core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@ run:
plugins:
model_type:
model_name:
generators: {}
probe_spec: all
probes:
encoding:
payloads:
- default
detector_spec: auto
detectors: {}
extended_detectors: false
buff_spec:
buffs_include_original_prompt: false
leondz marked this conversation as resolved.
Show resolved Hide resolved
detectors: {}
generators: {}
buffs: {}
harnesses: {}
probes:
encoding:
payloads:
- default


reporting:
report_prefix:
Expand Down
95 changes: 95 additions & 0 deletions tests/buffs/test_buff_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# two tests:
# set buffs_include_original_prompt true
# run test/lmrc.anthro
# check that attempts w status 1 are original prompts + their lowercase versions
# set buffs_include_original_prompt false
# run test/lmrc.anthro
# check that attempts w status 1 are unique and have no uppercase characters

import json
import os
import tempfile
import uuid

import pytest

import garak
import garak.cli

prefix = "test_buff_single" + str(uuid.uuid4())


def test_include_original_prompt():
with tempfile.NamedTemporaryFile(buffering=0) as tmp:
tmp.write(
"""---
plugins:
buffs_include_original_prompt: true
""".encode(
"utf-8"
)
)
garak.cli.main(
f"-m test -p test.Test -b lowercase.Lowercase --config {tmp.name} --report_prefix {prefix}".split()
)

prompts = []
with open(f"{prefix}.report.jsonl", "r", encoding="utf-8") as reportfile:
for line in reportfile:
r = json.loads(line)
if r["entry_type"] == "attempt" and r["status"] == 1:
prompts.append(r["prompt"])
nonupper_prompts = set([])
other_prompts = set([])
for prompt in prompts:
if prompt == prompt.lower() and prompt not in nonupper_prompts:
nonupper_prompts.add(prompt)
else:
other_prompts.add(prompt)
assert len(nonupper_prompts) >= len(other_prompts)
assert len(nonupper_prompts) + len(other_prompts) == len(prompts)
assert set(map(str.lower, prompts)) == nonupper_prompts


def test_exclude_original_prompt():
with tempfile.NamedTemporaryFile(buffering=0) as tmp:
tmp.write(
"""---
plugins:
buffs_include_original_prompt: false
""".encode(
"utf-8"
)
)
garak.cli.main(
f"-m test -p test.Test -b lowercase.Lowercase --config {tmp.name} --report_prefix {prefix}".split()
)

prompts = []
with open(f"{prefix}.report.jsonl", "r", encoding="utf-8") as reportfile:
for line in reportfile:
r = json.loads(line)
if r["entry_type"] == "attempt" and r["status"] == 1:
prompts.append(r["prompt"])
for prompt in prompts:
assert prompt == prompt.lower()


@pytest.fixture(scope="session", autouse=True)
def cleanup(request):
leondz marked this conversation as resolved.
Show resolved Hide resolved
"""Cleanup a testing directory once we are finished."""

def remove_buff_reports():
os.remove(f"{prefix}.report.jsonl")
try:
os.remove(f"{prefix}.report.html")
os.remove(f"{prefix}.hitlog.jsonl")
except FileNotFoundError:
pass

request.addfinalizer(remove_buff_reports)
6 changes: 6 additions & 0 deletions tests/probes/test_probe_docs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import importlib
import pytest
import re
Expand Down
13 changes: 6 additions & 7 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

import importlib
import json
import pytest
import os
import re
import shutil
import tempfile

import pytest

from garak import _config
import garak.cli

Expand Down Expand Up @@ -42,16 +43,14 @@
("buff", "polymorph", "buff_spec"),
]

import garak._config

param_locs = {}
for p in garak._config.system_params:
for p in _config.system_params:
param_locs[p] = "system"
for p in garak._config.run_params:
for p in _config.run_params:
param_locs[p] = "run"
for p in garak._config.plugins_params:
for p in _config.plugins_params:
param_locs[p] = "plugins"
for p in garak._config.reporting_params:
for p in _config.reporting_params:
param_locs[p] = "reporting"


Expand Down
Loading