Skip to content

Commit

Permalink
test-to-harness: initial set up (#511)
Browse files Browse the repository at this point in the history
Ref: #494

Some more comments on this PR in
#511 (comment)

---------

Signed-off-by: David Korczynski <[email protected]>
  • Loading branch information
DavidKorczynski authored Aug 2, 2024
1 parent 5a0781d commit 5b5ee46
Show file tree
Hide file tree
Showing 11 changed files with 379 additions and 68 deletions.
26 changes: 26 additions & 0 deletions benchmark-sets/from-test-small/krb5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"is_test_benchmark": true
"language": "c"
"project": "krb5"
"target_name": "fuzz_gss"
"target_path": "/src/krb5/src/tests/fuzzing/fuzz_gss.c"
"test_files":
- "test_file_path": "//src/krb5/src/tests/gssapi/t_namingexts.c"
- "test_file_path": "//src/krb5/src/tests/icinterleave.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_gssexts.c"
- "test_file_path": "//src/krb5/src/tests/s4u2self.c"
- "test_file_path": "//src/krb5/src/tests/localauth.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_cxx_k5int.cpp"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_inq_cred.c"
- "test_file_path": "//src/krb5/src/tests/t_inetd.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_inq_ctx.c"
- "test_file_path": "//src/krb5/src/tests/rdreq.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_cxx_kadm5.cpp"
- "test_file_path": "//src/krb5/src/tests/asn.1/krb5_encode_test.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_getpw.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_saslname.c"
- "test_file_path": "//src/krb5/src/tests/asn.1/t_trval.c"
- "test_file_path": "//src/krb5/src/tests/unlockiter.c"
- "test_file_path": "//src/krb5/src/tests/hooks.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_spnego.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_nfold.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_ccselect.c"
21 changes: 21 additions & 0 deletions benchmark-sets/from-test-small/liblouis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"is_test_benchmark": true
"language": "c"
"project": "liblouis"
"target_name": "fuzz_translate_generic"
"target_path": "/src/liblouis/tests/fuzzing/fuzz_translate_generic.c"
"test_files":
- "test_file_path": "//src/liblouis/tests/hyphenate_xxx.c"
- "test_file_path": "//src/liblouis/tests/checkTable.c"
- "test_file_path": "//src/liblouis/tests/typeform.c"
- "test_file_path": "//src/liblouis/tests/check_metadata.c"
- "test_file_path": "//src/liblouis/tests/charToFallbackDots.c"
- "test_file_path": "//src/liblouis/tests/findTable.c"
- "test_file_path": "//src/liblouis/tests/typeform_for_emphclass.c"
- "test_file_path": "//src/liblouis/tests/resolve_table.c"
- "test_file_path": "//src/liblouis/tests/suggestChunks.c"
- "test_file_path": "//src/liblouis/tests/hash_collision.c"
- "test_file_path": "//src/liblouis/tests/attributeNames.c"
- "test_file_path": "//src/liblouis/tests/logging.c"
- "test_file_path": "//src/liblouis/tests/getTable.c"
- "test_file_path": "//src/liblouis/tests/check_ueb_test_data.c"
- "test_file_path": "//src/liblouis/tests/emphclass.c"
19 changes: 19 additions & 0 deletions benchmark-sets/from-test-small/libraw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"is_test_benchmark": true
"language": "c++"
"project": "libraw"
"target_name": "libraw_fuzzer"
"target_path": "/src/libraw_fuzzer.cc"
"test_files":
- "test_file_path": "//src/libraw/samples/dcraw_half.c"
- "test_file_path": "//src/libraw/samples/half_mt_win32.c"
- "test_file_path": "//src/libraw/samples/simple_dcraw.cpp"
- "test_file_path": "//src/libraw/samples/raw-identify.cpp"
- "test_file_path": "//src/libraw/samples/unprocessed_raw.cpp"
- "test_file_path": "//src/libraw/samples/mem_image_sample.cpp"
- "test_file_path": "//src/libraw/samples/postprocessing_benchmark.cpp"
- "test_file_path": "//src/libraw/samples/openbayer_sample.cpp"
- "test_file_path": "//src/libraw/samples/half_mt.c"
- "test_file_path": "//src/libraw/samples/multirender_test.cpp"
- "test_file_path": "//src/libraw/samples/rawtextdump.cpp"
- "test_file_path": "//src/libraw/samples/dcraw_emu.cpp"
- "test_file_path": "//src/libraw/samples/4channels.cpp"
26 changes: 26 additions & 0 deletions benchmark-sets/from-test-small/libsodium.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"is_test_benchmark": true
"language": "c++"
"project": "libsodium"
"target_name": "secretbox_easy_fuzzer"
"target_path": "/src/secretbox_easy_fuzzer.cc"
"test_files":
- "test_file_path": "//src/libsodium/test/default/box2.c"
- "test_file_path": "//src/libsodium/test/default/sodium_core.c"
- "test_file_path": "//src/libsodium/test/default/stream2.c"
- "test_file_path": "//src/libsodium/test/default/scalarmult_ristretto255.c"
- "test_file_path": "//src/libsodium/test/default/onetimeauth2.c"
- "test_file_path": "//src/libsodium/test/default/auth6.c"
- "test_file_path": "//src/libsodium/test/default/hash3.c"
- "test_file_path": "//src/libsodium/test/default/secretbox_easy2.c"
- "test_file_path": "//src/libsodium/test/default/chacha20.c"
- "test_file_path": "//src/libsodium/test/default/secretbox.c"
- "test_file_path": "//src/libsodium/test/default/box_seal.c"
- "test_file_path": "//src/libsodium/test/default/keygen.c"
- "test_file_path": "//src/libsodium/test/default/core3.c"
- "test_file_path": "//src/libsodium/test/default/pwhash_scrypt_ll.c"
- "test_file_path": "//src/libsodium/test/default/verify1.c"
- "test_file_path": "//src/libsodium/test/default/auth2.c"
- "test_file_path": "//src/libsodium/test/default/core1.c"
- "test_file_path": "//src/libsodium/test/default/aead_xchacha20poly1305.c"
- "test_file_path": "//src/libsodium/test/default/secretbox2.c"
- "test_file_path": "//src/libsodium/test/default/box_easy.c"
60 changes: 54 additions & 6 deletions data_prep/introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
INTROSPECTOR_ORACLE_EASY_PARAMS = ''
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES = ''
INTROSPECTOR_ORACLE_OPTIMAL = ''
INTROSPECTOR_ORACLE_ALL_TESTS = ''
INTROSPECTOR_FUNCTION_SOURCE = ''
INTROSPECTOR_PROJECT_SOURCE = ''
INTROSPECTOR_XREF = ''
Expand All @@ -81,6 +82,7 @@ def get_oracle_dict() -> Dict[str, Any]:
'easy-params-far-reach': query_introspector_for_easy_param_targets,
'jvm-public-candidates': query_introspector_jvm_all_public_candidates,
'optimal-targets': query_introspector_for_optimal_targets,
'test-migration': query_introspector_for_tests,
}
return oracle_dict

Expand All @@ -96,7 +98,8 @@ def set_introspector_endpoints(endpoint):
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES, \
INTROSPECTOR_ALL_JVM_SOURCE_PATH, INTROSPECTOR_ORACLE_OPTIMAL, \
INTROSPECTOR_HEADERS_FOR_FUNC, \
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE, \
INTROSPECTOR_ORACLE_ALL_TESTS

INTROSPECTOR_ENDPOINT = endpoint

Expand Down Expand Up @@ -127,6 +130,7 @@ def set_introspector_endpoints(endpoint):
f'{INTROSPECTOR_ENDPOINT}/all-project-source-files')
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE = (
f'{INTROSPECTOR_ENDPOINT}/function-with-matching-return-type')
INTROSPECTOR_ORACLE_ALL_TESTS = f'{INTROSPECTOR_ENDPOINT}/project-tests'


def _construct_url(api: str, params: dict) -> str:
Expand Down Expand Up @@ -201,6 +205,14 @@ def _get_data(resp: Optional[requests.Response], key: str,
return default_value


def query_introspector_for_tests(project: str) -> list[str]:
"""Gets the list of test files in the target project."""
resp = _query_introspector(INTROSPECTOR_ORACLE_ALL_TESTS, {
'project': project,
})
return _get_data(resp, 'test-file-list', [])


def query_introspector_oracle(project: str, oracle_api: str) -> list[dict]:
"""Queries a fuzz target oracle API from Fuzz Introspector."""
resp = _query_introspector(
Expand Down Expand Up @@ -678,10 +690,49 @@ def _select_functions_from_oracles(project: str, limit: int,
return [all_functions[func] for func in selected_singatures]


def populate_benchmarks_using_test_migration(
project: str, language: str, limit: int) -> list[benchmarklib.Benchmark]:
"""Populates benchmarks using tests for test-to-harness conversion."""
harnesses, _ = project_src.search_source(project, [], language)
harness = pick_one(harnesses)
if not harness:
logger.error('No fuzz target found in project %s.', project)
return []
logger.info('Using harness path %s', harness)
potential_benchmarks = []
test_files = query_introspector_for_tests(project)
for test_file in test_files:
potential_benchmarks.append(
benchmarklib.Benchmark(benchmark_id='cli',
project=project,
language=language,
function_signature='test-file',
function_name='test-file',
return_type='test',
params=[],
exceptions=[],
is_jvm_static=False,
target_path=harness,
preferred_target_name='',
is_test_benchmark=True,
test_file_path=test_file))
return potential_benchmarks[:limit]


def populate_benchmarks_using_introspector(project: str, language: str,
limit: int,
target_oracles: List[str]):
"""Populates benchmark YAML files from the data from FuzzIntrospector."""

# If there is any oracle with test-migration then only do this oracle
# selection, because the benchmarks will have different .yaml structure.
# TODO(David): clean up benchmark code to make it more flexible for varying
# forms of target selectors, and potential mixing both types of target
# selectors.
for target_oracle in target_oracles:
if 'test-migration' in target_oracle:
return populate_benchmarks_using_test_migration(project, language, limit)

if language == 'jvm':
functions = _select_functions_from_jvm_oracles(project, limit,
target_oracles)
Expand All @@ -703,11 +754,8 @@ def populate_benchmarks_using_introspector(project: str, language: str,
for function in functions
]

result = project_src.search_source(project, filenames, language)
if not result:
return []

harnesses, interesting = result
harnesses, interesting = project_src.search_source(project, filenames,
language)
harness = pick_one(harnesses)
if not harness:
logger.error('No fuzz target found in project %s.', project)
Expand Down
122 changes: 79 additions & 43 deletions experiment/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,26 @@ def to_yaml(cls, benchmarks: list[Benchmark], outdir: str = './'):
# Register the custom representer
yaml.add_representer(str, quoted_string_presenter)
result = {
'project':
benchmarks[0].project,
'language':
benchmarks[0].language,
'target_path':
benchmarks[0].target_path,
'target_name':
benchmarks[0].target_name,
'functions': [{
'signature': b.function_signature,
'name': b.function_name,
'return_type': b.return_type,
'params': b.params,
'exceptions': b.exceptions,
'is_jvm_static': b.is_jvm_static,
} for b in benchmarks],
'project': benchmarks[0].project,
'language': benchmarks[0].language,
'target_path': benchmarks[0].target_path,
'target_name': benchmarks[0].target_name,
'is_test_benchmark': benchmarks[0].is_test_benchmark,
}
if benchmarks[0].is_test_benchmark:
result['test_files'] = [{
'test_file_path': b.test_file_path
} for b in benchmarks]
else:
result['functions'] = [{
'signature': b.function_signature,
'name': b.function_name,
'return_type': b.return_type,
'params': b.params,
'exceptions': b.exceptions,
'is_jvm_static': b.is_jvm_static,
} for b in benchmarks]

with open(os.path.join(outdir, f'{benchmarks[0].project}.yaml'),
'w') as file:
yaml.dump(result, file, default_flow_style=False, width=sys.maxsize)
Expand All @@ -83,32 +86,61 @@ def from_yaml(cls, benchmark_path: str) -> List:
cppify_headers = data.get('cppify_headers', False)
commit = data.get('commit')
functions = data.get('functions', [])
for function in functions:
# Long raw_function_names (particularly for c++ projects) may exceed
# filesystem limits on file path/name length when creating WorkDir.
max_len = os.pathconf('/', 'PC_NAME_MAX') - len('output-')
# Docker tag name cannot exceed 127 characters, and will be suffixed by
# '<sample-id>-experiment'.
docker_name_len = 127 - len('-03-experiment')
max_len = min(max_len, docker_name_len)
truncated_id = f'{project_name}-{function.get("name")}'[:max_len]
benchmarks.append(
cls(truncated_id.lower(),
data['project'],
data['language'],
function.get('signature'),
function.get('name'),
function.get('return_type'),
function.get('params'),
function.get('exceptions', []),
function.get('is_jvm_static', False),
data['target_path'],
data.get('target_name'),
use_project_examples=use_project_examples,
cppify_headers=cppify_headers,
commit=commit,
use_context=use_context,
function_dict=function))

is_test_benchmark = data.get('is_test_benchmark', False)
test_files = data.get('test_files', [])
if is_test_benchmark:
for test_file in test_files:
max_len = os.pathconf('/', 'PC_NAME_MAX') - len('output-')
test_file_path = test_file.get('test_file_path')
normalized_test_path = test_file_path.replace("/",
"_").replace(".", "_")
truncated_id = f'{project_name}-{normalized_test_path}'[:max_len]

benchmarks.append(
cls(
truncated_id.lower(),
data['project'],
data['language'],
'',
'',
'',
[],
[],
False,
data['target_path'],
data.get('target_name', ''),
is_test_benchmark=True,
test_file_path=test_file_path,
))
else:
# function type benchmark
for function in functions:
# Long raw_function_names (particularly for c++ projects) may exceed
# filesystem limits on file path/name length when creating WorkDir.
max_len = os.pathconf('/', 'PC_NAME_MAX') - len('output-')
# Docker tag name cannot exceed 127 characters, and will be suffixed by
# '<sample-id>-experiment'.
docker_name_len = 127 - len('-03-experiment')
max_len = min(max_len, docker_name_len)
truncated_id = f'{project_name}-{function.get("name")}'[:max_len]
benchmarks.append(
cls(truncated_id.lower(),
data['project'],
data['language'],
function.get('signature'),
function.get('name'),
function.get('return_type'),
function.get('params'),
function.get('exceptions', []),
function.get('is_jvm_static', False),
data['target_path'],
data.get('target_name'),
use_project_examples=use_project_examples,
cppify_headers=cppify_headers,
commit=commit,
use_context=use_context,
function_dict=function))

return benchmarks

Expand All @@ -128,7 +160,9 @@ def __init__(self,
cppify_headers=False,
use_context=False,
commit=None,
function_dict: Optional[dict] = None):
function_dict: Optional[dict] = None,
is_test_benchmark: bool = False,
test_file_path: str = ''):
self.id = benchmark_id
self.project = project
self.language = language
Expand All @@ -145,6 +179,8 @@ def __init__(self,
self.use_context = use_context
self.cppify_headers = cppify_headers
self.commit = commit
self.test_file_path = test_file_path
self.is_test_benchmark = is_test_benchmark

if self.language == 'jvm':
# For java projects, in order to differentiate between overloaded methods
Expand Down
1 change: 1 addition & 0 deletions experiment/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def run_log_path(self, generated_target_name: str):

def create_ossfuzz_project(self, name: str, target_file: str) -> str:
"""Creates an OSS-Fuzz project with the generated target."""
logger.info(f'target file: {target_file}')
generated_project_path = os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR,
'projects', name)
if os.path.exists(generated_project_path):
Expand Down
Loading

0 comments on commit 5b5ee46

Please sign in to comment.