Skip to content

Commit

Permalink
Rust: Initialisation of OFG for Rust projects (#781)
Browse files Browse the repository at this point in the history
This PR initialise OFG for rust projects. This PR superceeds #762.
This PR also fixed the experimental from_scratch approach to work for
Rust projects.

---------

Signed-off-by: Arthur Chan <[email protected]>
  • Loading branch information
arthurscchan authored Feb 12, 2025
1 parent de2ab8c commit 248a804
Show file tree
Hide file tree
Showing 22 changed files with 539 additions and 35 deletions.
4 changes: 4 additions & 0 deletions agent/one_prompt_prototyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ def _prompt_builder(self,
# For Python projects
return prompt_builder.DefaultPythonTemplateBuilder(
self.llm, benchmark, self.args.template_directory)
if benchmark.language == 'rust':
# For Rust projects
return prompt_builder.DefaultRustTemplateBuilder(
self.llm, benchmark, self.args.template_directory)

if self.args.prompt_builder == 'CSpecific':
return prompt_builder.CSpecificBuilder(self.llm, benchmark,
Expand Down
37 changes: 37 additions & 0 deletions benchmark-sets/rust-small/askama.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"functions":
- "name": "derive_template"
"params":
- "name": "input"
"type": "TokenStream"
"return_type": "TokenStream"
"signature": "pub fn derive_template(input: TokenStream) -> TokenStream"
- "name": "build_template"
"params":
- "name": "ast"
"type": "&syn::DeriveInput"
"return_type": "Result"
"signature": "pub(crate) fn build_template(ast: &syn::DeriveInput) -> Result<String, CompileError>"
- "name": "build_skeleton"
"params":
- "name": "ast"
"type": "&syn::DeriveInput"
"return_type": "Result"
"signature": "fn build_skeleton(ast: &syn::DeriveInput) -> Result<String, CompileError>"
- "name": "Generator::build"
"params":
- "name": "ctx"
"type": "&Context<'a>"
"return_type": "Result"
"signature": "pub(crate) fn build(mut self, ctx: &Context<'a>) -> Result<String, CompileError>"
- "name": "Generator::impl_template"
"params":
- "name": "ctx"
"type": "&Context<'a>"
- "name": "buf"
"type": "&mut Buffer"
"return_type": "Result"
"signature": "fn impl_template(&mut self, ctx: &Context<'a>, buf: &mut Buffer) -> Result<(), CompileError>"
"language": "rust"
"project": "askama"
"target_name": "fuzz_filters"
"target_path": "/src/askama/fuzz/fuzz_targets/fuzz_filters.rs"
29 changes: 29 additions & 0 deletions benchmark-sets/rust-small/bincode.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"functions":
- "name": "DeriveStruct::generate_decode"
"params":
- "name": "generator"
"type": "&mut Generator"
"return_type": "Result"
"signature": "pub fn generate_decode(self, generator: &mut Generator) -> Result<()>"
- "name": "AllTypes::encode"
"params":
- "name": "encoder"
"type": "& mut __E"
"return_type": "core :: result :: Result"
"signature": "fn encode < __E : :: bincode :: enc :: Encoder >(& self, encoder : & mut __E) ->core :: result :: Result < (), :: bincode:: error :: EncodeError >"
- "name": "DeriveEnum::generate_borrow_decode"
"params":
- "name": "generator"
"type": "&mut Generator"
"return_type": "Result"
"signature": "pub fn generate_borrow_decode(self, generator: &mut Generator) -> Result<()>"
- "name": "AllTypes::borrow_decode"
"params":
- "name": "decoder"
"type": "& mut __D"
"return_type": "core :: result :: Result"
"signature": "fn borrow_decode < __D : :: bincode :: de :: BorrowDecoder < '__de > >(decoder : & mut __D) ->core :: result :: Result < Self, :: bincode ::error :: DecodeError >"
"language": "rust"
"project": "bincode"
"target_name": "roundtrip"
"target_path": "/src/bincode/fuzz/fuzz_targets/roundtrip.rs"
23 changes: 23 additions & 0 deletions benchmark-sets/rust-small/httparse.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"functions":
- "name": "parse_reason"
"params":
- "name": "bytes"
"type": "&mut Bytes<'a>"
"return_type": "Result"
"signature": "fn parse_reason<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str>"
- "name": "enable_simd"
"params":
- "name": "version"
"type": "Version"
"return_type": "void"
"signature": "fn enable_simd(version: Version)"
- "name": "match_uri_vectored"
"params":
- "name": "bytes"
"type": "&mut Bytes"
"return_type": "void"
"signature": "fn match_uri_vectored(bytes: &mut Bytes)"
"language": "rust"
"project": "httparse"
"target_name": "parse_chunk_size"
"target_path": "Did-not-find-sourcefile"
25 changes: 25 additions & 0 deletions benchmark-sets/rust-small/itoa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"functions":
- "name": "udivmod_1e19"
"params":
- "name": "n"
"type": "u128"
"return_type": "(u128, u64)"
"signature": "pub fn udivmod_1e19(n: u128) -> (u128, u64)"
- "name": "u128_mulhi"
"params":
- "name": "x"
"type": "u128"
- "name": "y"
"type": "u128"
"return_type": "u128"
"signature": "fn u128_mulhi(x: u128, y: u128) -> u128"
- "name": "Buffer::format"
"params":
- "name": "i"
"type": "I"
"return_type": "&str"
"signature": "pub fn format<I: Integer>(&mut self, i: I) -> &str"
"language": "rust"
"project": "itoa"
"target_name": "fuzz_itoa"
"target_path": "/src/itoa/fuzz/fuzz_targets/fuzz_itoa.rs"
35 changes: 35 additions & 0 deletions benchmark-sets/rust-small/ryu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"functions":
- "name": "s2f"
"params":
- "name": "buffer"
"type": "&[u8]"
"return_type": "Result"
"signature": "pub fn s2f(buffer: &[u8]) -> Result<f32, Error>"
- "name": "s2d"
"params":
- "name": "buffer"
"type": "&[u8]"
"return_type": "Result"
"signature": "pub fn s2d(buffer: &[u8]) -> Result<f64, Error>"
- "name": "Buffer::format"
"params":
- "name": "f"
"type": "F"
"return_type": "&str"
"signature": "pub fn format<F: Float>(&mut self, f: F) -> &str"
- "name": "compute_pow5"
"params":
- "name": "i"
"type": "u32"
"return_type": "(u64, u64)"
"signature": "pub unsafe fn compute_pow5(i: u32) -> (u64, u64)"
- "name": "compute_inv_pow5"
"params":
- "name": "i"
"type": "u32"
"return_type": "(u64, u64)"
"signature": "pub unsafe fn compute_inv_pow5(i: u32) -> (u64, u64)"
"language": "rust"
"project": "ryu"
"target_name": "fuzz_ryu"
"target_path": "/src/ryu/fuzz/fuzz_targets/fuzz_ryu.rs"
45 changes: 45 additions & 0 deletions benchmark-sets/rust-small/utf8parse.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"functions":
- "name": "expand_state_table"
"params":
- "name": "cx"
"type": "&'cx mut ExtCtxt"
- "name": "sp"
"type": "Span"
- "name": "args"
"type": "&[TokenTree]"
"return_type": "Box"
"signature": "fn expand_state_table<'cx>(cx: &'cx mut ExtCtxt,sp: Span,args: &[TokenTree])-> Box<MacResult + 'cx>"
- "name": "parse_raw_definitions"
"params":
- "name": "definitions"
"type": "Vec<TableDefinitionExprs>"
- "name": "cx"
"type": "&mut ExtCtxt"
"return_type": "Result"
"signature": "fn parse_raw_definitions(definitions: Vec<TableDefinitionExprs>,cx: &mut ExtCtxt) -> Result<Vec<TableDefinition>, ()>"
- "name": "input_mapping_from_arm"
"params":
- "name": "arm"
"type": "Arm"
- "name": "cx"
"type": "&mut ExtCtxt"
"return_type": "Result"
"signature": "fn input_mapping_from_arm(arm: Arm, cx: &mut ExtCtxt) -> Result<InputMapping, ()>"
- "name": "Transition::from_expr"
"params":
- "name": "expr"
"type": "&Expr"
- "name": "cx"
"type": "&mut ExtCtxt"
"return_type": "Result"
"signature": "fn from_expr(expr: &Expr, cx: &mut ExtCtxt) -> Result<Transition, ()>"
- "name": "build_state_tables"
"params":
- "name": "defs"
"type": "T"
"return_type": "[[u8; 256]; 16]"
"signature": "fn build_state_tables<T>(defs: T) -> [[u8; 256]; 16]where T: AsRef<[TableDefinition]>"
"language": "rust"
"project": "utf8parse"
"target_name": "parse"
"target_path": "/src/vte/utf8parse/fuzz/fuzz_targets/parse.rs"
26 changes: 18 additions & 8 deletions data_prep/introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
INTROSPECTOR_ORACLE_FAR_REACH = ''
INTROSPECTOR_ORACLE_KEYWORD = ''
INTROSPECTOR_ORACLE_EASY_PARAMS = ''
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES = ''
INTROSPECTOR_ORACLE_ALL_PUBLIC_CANDIDATES = ''
INTROSPECTOR_ORACLE_OPTIMAL = ''
INTROSPECTOR_ORACLE_ALL_TESTS = ''
INTROSPECTOR_FUNCTION_SOURCE = ''
Expand Down Expand Up @@ -90,6 +90,7 @@ def get_oracle_dict() -> Dict[str, Any]:
'jvm-public-candidates': query_introspector_jvm_all_public_candidates,
'optimal-targets': query_introspector_for_optimal_targets,
'test-migration': query_introspector_for_tests,
'all-public-candidates': query_introspector_all_public_candidates,
}
return oracle_dict

Expand All @@ -102,7 +103,7 @@ def set_introspector_endpoints(endpoint):
INTROSPECTOR_ORACLE_KEYWORD, INTROSPECTOR_ADDR_TYPE, \
INTROSPECTOR_ALL_HEADER_FILES, INTROSPECTOR_ALL_FUNC_TYPES, \
INTROSPECTOR_SAMPLE_XREFS, INTROSPECTOR_ORACLE_EASY_PARAMS, \
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES, \
INTROSPECTOR_ORACLE_ALL_PUBLIC_CANDIDATES, \
INTROSPECTOR_ALL_JVM_SOURCE_PATH, INTROSPECTOR_ORACLE_OPTIMAL, \
INTROSPECTOR_HEADERS_FOR_FUNC, \
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE, \
Expand All @@ -119,7 +120,7 @@ def set_introspector_endpoints(endpoint):
f'{INTROSPECTOR_ENDPOINT}/far-reach-low-cov-fuzz-keyword')
INTROSPECTOR_ORACLE_EASY_PARAMS = (
f'{INTROSPECTOR_ENDPOINT}/easy-params-far-reach')
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES = (
INTROSPECTOR_ORACLE_ALL_PUBLIC_CANDIDATES = (
f'{INTROSPECTOR_ENDPOINT}/all-public-candidates')
INTROSPECTOR_ORACLE_OPTIMAL = f'{INTROSPECTOR_ENDPOINT}/optimal-targets'
INTROSPECTOR_FUNCTION_SOURCE = f'{INTROSPECTOR_ENDPOINT}/function-source-code'
Expand Down Expand Up @@ -277,8 +278,17 @@ def query_introspector_jvm_all_public_candidates(project: str) -> list[dict]:
"""Queries Fuzz Introspector for all public accessible function or
constructor candidates.
"""
return query_introspector_oracle(
project, INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES)
return query_introspector_oracle(project,
INTROSPECTOR_ORACLE_ALL_PUBLIC_CANDIDATES)


def query_introspector_all_public_candidates(project: str) -> list[dict]:
"""Queries Fuzz Introspector for all public accessible function or
constructor candidates.
"""
#TODO May combine this with query_introspector_jvm_all_public_candidates
return query_introspector_oracle(project,
INTROSPECTOR_ORACLE_ALL_PUBLIC_CANDIDATES)


def query_introspector_for_targets(project, target_oracle) -> list[Dict]:
Expand Down Expand Up @@ -876,9 +886,9 @@ def populate_benchmarks_using_introspector(project: str, language: str,
if not any(src_path.endswith(src_file) for src_path in src_path_list):
logger.error('error: %s %s', filename, interesting.keys())
continue
elif interesting and filename not in [
os.path.basename(i) for i in interesting.keys()
]:

elif (language not in ['rust'] and interesting and
filename not in [os.path.basename(i) for i in interesting.keys()]):
# TODO: Bazel messes up paths to include "/proc/self/cwd/..."
logger.error('error: %s %s', filename, interesting.keys())
continue
Expand Down
14 changes: 11 additions & 3 deletions data_prep/project_src.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def _get_harness(src_file: str, out: str, language: str) -> tuple[str, str]:
return '', ''
if language.lower() == 'python' and 'atheris.Fuzz()' not in content:
return '', ''
if language.lower() == 'rust' and 'fuzz_target!' not in content:
return '', ''

short_path = src_file[len(out):]
return short_path, content
Expand Down Expand Up @@ -233,9 +235,9 @@ def _copy_project_src_from_local(project: str, out: str, language: str):
# Sometimes the previous container need longer time to delete
# If the next docker run is invoked before the previous container
# completely removed, it will resulti n Conflict error.
# Sleep for 60 seconds and retry.
logger.warning('Failed to run OSS-Fuzz on %s, retry in 60 sec', project)
time.sleep(60)
# Sleep for 180 seconds and retry.
logger.warning('Failed to run OSS-Fuzz on %s, retry in 180 sec', project)
time.sleep(180)
result = sp.run(run_container,
capture_output=True,
stdin=sp.DEVNULL,
Expand Down Expand Up @@ -307,6 +309,12 @@ def _identify_fuzz_targets(out: str, interesting_filenames: list[str],
interesting_filepaths.append(path)
if path.endswith('.py'):
potential_harnesses.append(path)
elif language == 'rust':
# For Rust
if path.endswith(tuple(interesting_filenames)):
interesting_filepaths.append(path)
if path.endswith('.rs'):
potential_harnesses.append(path)
else:
# For C/C++
short_path = path[len(out):]
Expand Down
7 changes: 7 additions & 0 deletions experiment/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,13 @@ def __init__(self,
# zipp-zipp.difference.
self.id = self.id.replace('._', '.')

if self.language == 'rust':
# For rust projects, double colon (::) is sometime used to identify
# crate, impl or trait name of a function. This could affect the
# benchmark_id and cause OSS-Fuzz build failed.
# Special handling of benchmark_id is needed to avoid this situation.
self.id = self.id.replace('::', '-')

def __str__(self):
return (f'Benchmark<id={self.id}, project={self.project}, '
f'language={self.language}, '
Expand Down
Loading

0 comments on commit 248a804

Please sign in to comment.