Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
4533c51
strlen-case in special of base: first draft
nathanschmidt Apr 30, 2023
ab07547
Registering library functions strcat and strstr
nathanschmidt May 2, 2023
41dc76d
Merge branch 'goblint:master' into base-string-analyses
nathanschmidt May 2, 2023
6e089be
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 2, 2023
0242361
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 2, 2023
ccd58d6
String literals analysis: strlen
nathanschmidt May 5, 2023
2cfae22
Merge branch 'goblint:master' into base-string-analyses
nathanschmidt May 5, 2023
3b02139
String literals analysis: strncpy
nathanschmidt May 5, 2023
fb70556
Merge branch 'goblint:master' into base-string-analyses
nathanschmidt May 5, 2023
5567015
String literals analysis: strncpy
nathanschmidt May 5, 2023
4207673
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 5, 2023
8756057
String literals analysis: strcat
nathanschmidt May 6, 2023
c7e159c
String literals analysis: strncat
nathanschmidt May 6, 2023
40a5265
String literals analysis: strncat
nathanschmidt May 6, 2023
c5eb843
String literals analysis: strstr
nathanschmidt May 7, 2023
3d6513d
String literals analysis: strcmp and strncmp
nathanschmidt May 9, 2023
fe171c5
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 9, 2023
f6962bb
String literals analyses: fixed wrong behavior + cleaned up code
nathanschmidt May 11, 2023
826bcc7
Merge branch 'goblint:master' into base-string-analyses
nathanschmidt May 11, 2023
8ee2f47
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 11, 2023
e94b0cc
Fixed analysis for functions that write to string literals + first dr…
nathanschmidt May 15, 2023
5467880
Merge branch 'goblint:master' into base-string-analyses
nathanschmidt May 15, 2023
6a115c0
Adapted tests for string literals analysis
nathanschmidt May 15, 2023
f73f5de
Reintroduced code that shouldn't have been deleted
nathanschmidt May 15, 2023
5ac4ca2
Reintroduced another line that shouldn't have been deleted
nathanschmidt May 15, 2023
1ac36e3
Added invalidate_actions
nathanschmidt May 15, 2023
b7775e2
Incorporated github-code-scanning suggestions
nathanschmidt May 15, 2023
6a1bc3a
Added Apple's version of str(n)cpy and str(n)cat to library functions…
nathanschmidt May 17, 2023
cea104f
Added missing argument of chk versions of strcpy and co.
nathanschmidt May 17, 2023
4acfb23
Added __builtin_object_size to invalide_actions
nathanschmidt May 17, 2023
527ee45
Excluded str(n)cpy / str(n)cat portion of string literals test for macOS
nathanschmidt May 20, 2023
50193f2
Changed test annotations until CIL issue fixed
nathanschmidt May 20, 2023
627396e
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 21, 2023
7fd3341
Clarified test comments
nathanschmidt May 21, 2023
db30ec3
Fixed false warning
nathanschmidt May 21, 2023
09276ca
Improvements after review
nathanschmidt May 22, 2023
74e90e4
Use Cil.kindOfSizeOf instead of IUInt for strlen
nathanschmidt May 22, 2023
e92cc88
Updated test 01
nathanschmidt May 22, 2023
5ef49e9
Try *0; in #ifdef __APPLE__ to trigger warning on macOS in test 01
nathanschmidt May 24, 2023
f789322
Merge branch 'base-string-analyses' of github.com:nathanschmidt/analy…
nathanschmidt May 24, 2023
822b63c
Trying workaround to make macOS test warn
nathanschmidt May 24, 2023
290ca8d
Further changes to trigger warnings on macOS test
nathanschmidt May 24, 2023
c2538e7
Fix mistake in test 01
nathanschmidt May 24, 2023
5095d2b
Use Z.minus_one in string_comparison
nathanschmidt May 27, 2023
587db3d
Handle null byte in string correctly for all functions
nathanschmidt May 31, 2023
52fea5d
Merge branch 'master' into base-string-analyses
nathanschmidt May 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 97 additions & 26 deletions src/analyses/base.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2041,6 +2041,53 @@ struct
let st: store = ctx.local in
let gs = ctx.global in
let desc = LF.find f in
let memory_copying dst src =
let dest_a, dest_typ = addr_type_of_exp dst in
let src_lval = mkMem ~addr:(Cil.stripCasts src) ~off:NoOffset in
let src_typ = eval_lv (Analyses.ask_of_ctx ctx) gs st src_lval
|> AD.get_type in
(* when src and destination type coincide, take value from the source, otherwise use top *)
let value = if typeSig dest_typ = typeSig src_typ then
let src_cast_lval = mkMem ~addr:(Cilfacade.mkCast ~e:src ~newt:(TPtr (dest_typ, []))) ~off:NoOffset in
eval_rv (Analyses.ask_of_ctx ctx) gs st (Lval src_cast_lval)
else
VD.top_value (unrollType dest_typ)
in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value in
(* for string functions *)
let eval_n = function
(* if only n characters of a given string are needed, evaluate expression n to an integer option *)
| Some n ->
begin match eval_rv (Analyses.ask_of_ctx ctx) gs st n with
| `Int i ->
begin match ID.to_int i with
| Some x -> Some (Z.to_int x)
| _ -> Some (-1)
end
| _ -> Some (-1)
end
(* do nothing if all characters are needed *)
| _ -> None
in
let string_manipulation s1 s2 lv all op =
let s1_a, s1_typ = addr_type_of_exp s1 in
let s2_a, s2_typ = addr_type_of_exp s2 in
match lv, op with
| Some lv_val, Some f ->
(* when whished types coincide, compute result of operation op, otherwise use top *)
let lv_a = eval_lv (Analyses.ask_of_ctx ctx) gs st lv_val in
let lv_typ = Cilfacade.typeOfLval lv_val in
if all && typeSig s1_typ = typeSig s2_typ && typeSig s2_typ = typeSig lv_typ then (* all types need to coincide *)
lv_a, lv_typ, (f s1_a s2_a)
else if not all && typeSig s1_typ = typeSig s2_typ then (* only the types of s1 and s2 need to coincide *)
lv_a, lv_typ, (f s1_a s2_a)
else
lv_a, lv_typ, (VD.top_value (unrollType lv_typ))
| _ ->
(* check if s1 is potentially a string literal as writing to it would be undefined behavior; then return top *)
let _ = AD.string_writing_defined s1_a in
s1_a, s1_typ, VD.top_value (unrollType s1_typ)
in
let st = match desc.special args, f.vname with
| Memset { dest; ch; count; }, _ ->
(* TODO: check count *)
Expand All @@ -2060,33 +2107,57 @@ struct
let dest_a, dest_typ = addr_type_of_exp dest in
let value = VD.zero_init_value dest_typ in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
| Memcpy { dest = dst; src }, _
| Strcpy { dest = dst; src }, _ ->
(* invalidating from interactive *)
(* let dest_a, dest_typ = addr_type_of_exp dst in
let value = VD.top_value dest_typ in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value *)
(* TODO: reuse addr_type_of_exp for master *)
(* assigning from master *)
let get_type lval =
let address = eval_lv (Analyses.ask_of_ctx ctx) gs st lval in
AD.get_type address
in
let dst_lval = mkMem ~addr:(Cil.stripCasts dst) ~off:NoOffset in
let src_lval = mkMem ~addr:(Cil.stripCasts src) ~off:NoOffset in

let dest_typ = get_type dst_lval in
let src_typ = get_type src_lval in

(* When src and destination type coincide, take value from the source, otherwise use top *)
let value = if typeSig dest_typ = typeSig src_typ then
let src_cast_lval = mkMem ~addr:(Cilfacade.mkCast ~e:src ~newt:(TPtr (dest_typ, []))) ~off:NoOffset in
eval_rv (Analyses.ask_of_ctx ctx) gs st (Lval src_cast_lval)
else
VD.top_value (unrollType dest_typ)
in
let dest_a = eval_lv (Analyses.ask_of_ctx ctx) gs st dst_lval in
| Memcpy { dest = dst; src }, _ ->
memory_copying dst src
(* strcpy(dest, src); *)
| Strcpy { dest = dst; src; n = None }, _ ->
let dest_a, dest_typ = addr_type_of_exp dst in
(* when dest surely isn't a string literal, try copying src to dest *)
if AD.string_writing_defined dest_a then
memory_copying dst src
else
(* else return top (after a warning was issued) *)
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ (VD.top_value (unrollType dest_typ))
(* strncpy(dest, src, n); *)
| Strcpy { dest = dst; src; n }, _ ->
begin match eval_n n with
| Some num ->
let dest_a, dest_typ, value = string_manipulation dst src None false None in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
| None -> failwith "already handled in case above"
end
| Strcat { dest = dst; src; n }, _ ->
let dest_a, dest_typ, value = string_manipulation dst src None false None in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
| Strlen s, _ ->
begin match lv with
| Some lv_val ->
let dest_a = eval_lv (Analyses.ask_of_ctx ctx) gs st lv_val in
let dest_typ = Cilfacade.typeOfLval lv_val in
let lval = mkMem ~addr:(Cil.stripCasts s) ~off:NoOffset in
let address = eval_lv (Analyses.ask_of_ctx ctx) gs st lval in
let value = `Int(AD.to_string_length address) in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
| None -> st
end
| Strstr { haystack; needle }, _ ->
begin match lv with
| Some _ ->
(* when haystack, needle and dest type coincide, check if needle is a substring of haystack:
if that is the case, assign the substring of haystack starting at the first occurrence of needle to dest,
else use top *)
let dest_a, dest_typ, value = string_manipulation haystack needle lv true (Some (fun h_a n_a -> `Address(AD.substring_extraction h_a n_a))) in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
| None -> st
end
| Strcmp { s1; s2; n }, _ ->
begin match lv with
| Some _ ->
(* when s1 and s2 type coincide, compare both both strings completely or their first n characters, otherwise use top *)
let dest_a, dest_typ, value = string_manipulation s1 s2 lv false (Some (fun s1_a s2_a -> `Int(AD.string_comparison s1_a s2_a (eval_n n)))) in
set ~ctx (Analyses.ask_of_ctx ctx) gs st dest_a dest_typ value
| None -> st
end
| Abort, _ -> raise Deadcode
| ThreadExit { ret_val = exp }, _ ->
begin match ThreadId.get_current (Analyses.ask_of_ctx ctx) with
Expand Down
6 changes: 5 additions & 1 deletion src/analyses/libraryDesc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ type special =
| Memset of { dest: Cil.exp; ch: Cil.exp; count: Cil.exp; }
| Bzero of { dest: Cil.exp; count: Cil.exp; }
| Memcpy of { dest: Cil.exp; src: Cil.exp }
| Strcpy of { dest: Cil.exp; src: Cil.exp } (* TODO: add count for strncpy when actually used *)
| Strcpy of { dest: Cil.exp; src: Cil.exp; n: Cil.exp option; }
| Strcat of { dest: Cil.exp; src: Cil.exp; n: Cil.exp option; }
| Strlen of Cil.exp
| Strstr of { haystack: Cil.exp; needle: Cil.exp; }
| Strcmp of { s1: Cil.exp; s2: Cil.exp; n: Cil.exp option; }
| Abort
| Identity of Cil.exp (** Identity function. Some compiler optimization annotation functions map to this. *)
| Setjmp of { env: Cil.exp; }
Expand Down
29 changes: 18 additions & 11 deletions src/analyses/libraryFunctions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,23 @@ let c_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[
("memcpy", special [__ "dest" [w]; __ "src" [r]; drop "n" []] @@ fun dest src -> Memcpy { dest; src });
("__builtin_memcpy", special [__ "dest" [w]; __ "src" [r]; drop "n" []] @@ fun dest src -> Memcpy { dest; src });
("__builtin___memcpy_chk", special [__ "dest" [w]; __ "src" [r]; drop "n" []; drop "os" []] @@ fun dest src -> Memcpy { dest; src });
("strncpy", special [__ "dest" [w]; __ "src" [r]; drop "n" []] @@ fun dest src -> Strcpy { dest; src });
("strcpy", special [__ "dest" [w]; __ "src" [r]] @@ fun dest src -> Strcpy { dest; src });
("strcpy", special [__ "dest" [w]; __ "src" [r]] @@ fun dest src -> Strcpy { dest; src; n = None; });
("__builtin_strcpy", special [__ "dest" [w]; __ "src" [r]] @@ fun dest src -> Strcpy { dest; src; n = None; });
("__builtin___strcpy_chk", special [__ "dest" [w]; __ "src" [r]; drop "os" []] @@ fun dest src -> Strcpy { dest; src; n = None; });
("strncpy", special [__ "dest" [w]; __ "src" [r]; __ "n" []] @@ fun dest src n -> Strcpy { dest; src; n = Some n; });
("__builtin_strncpy", special [__ "dest" [w]; __ "src" [r]; __ "n" []] @@ fun dest src n -> Strcpy { dest; src; n = Some n; });
("__builtin___strncpy_chk", special [__ "dest" [w]; __ "src" [r]; __ "n" []; drop "os" []] @@ fun dest src n -> Strcpy { dest; src; n = Some n; });
("strcat", special [__ "dest" [r; w]; __ "src" [r]] @@ fun dest src -> Strcat { dest; src; n = None; });
("__builtin_strcat", special [__ "dest" [r; w]; __ "src" [r]] @@ fun dest src -> Strcat { dest; src; n = None; });
("__builtin___strcat_chk", special [__ "dest" [r; w]; __ "src" [r]; drop "os" []] @@ fun dest src -> Strcat { dest; src; n = None; });
("strncat", special [__ "dest" [r; w]; __ "src" [r]; __ "n" []] @@ fun dest src n -> Strcat { dest; src; n = Some n; });
("__builtin_strncat", special [__ "dest" [r; w]; __ "src" [r]; __ "n" []] @@ fun dest src n -> Strcat { dest; src; n = Some n; });
("__builtin___strncat_chk", special [__ "dest" [r; w]; __ "src" [r]; __ "n" []; drop "os" []] @@ fun dest src n -> Strcat { dest; src; n = Some n; });
("strlen", special [__ "s" [r]] @@ fun s -> Strlen s);
("strstr", special [__ "haystack" [r]; __ "needle" [r]] @@ fun haystack needle -> Strstr { haystack; needle; });
("strcmp", special [__ "s1" [r]; __ "s2" [r]] @@ fun s1 s2 -> Strcmp { s1; s2; n = None; });
("__builtin_strcmp", special [__ "s1" [r]; __ "s2" [r]] @@ fun s1 s2 -> Strcmp { s1; s2; n = None; });
("strncmp", special [__ "s1" [r]; __ "s2" [r]; __ "n" []] @@ fun s1 s2 n -> Strcmp { s1; s2; n = Some n; });
("malloc", special [__ "size" []] @@ fun size -> Malloc size);
("realloc", special [__ "ptr" [r; f]; __ "size" []] @@ fun ptr size -> Realloc { ptr; size });
("abort", special [] Abort);
Expand Down Expand Up @@ -671,12 +686,7 @@ let invalidate_actions = [
"__builtin___snprintf_chk", writes [1];(*keep [1]*)
"sprintf", writes [1];(*keep [1]*)
"sscanf", writesAllButFirst 2 readsAll;(*drop 2*)
"strcmp", readsAll;(*safe*)
"strftime", writes [1];(*keep [1]*)
"strlen", readsAll;(*safe*)
"strncmp", readsAll;(*safe*)
"strncat", writes [1];(*keep [1]*)
"strstr", readsAll;(*safe*)
"strdup", readsAll;(*safe*)
"toupper", readsAll;(*safe*)
"tolower", readsAll;(*safe*)
Expand All @@ -695,7 +705,6 @@ let invalidate_actions = [
"sigfillset", writesAll; (*unsafe*)
"sigprocmask", writesAll; (*unsafe*)
"uname", writesAll;(*unsafe*)
"__builtin_strcmp", readsAll;(*safe*)
"getopt_long", writesAllButFirst 2 readsAll;(*drop 2*)
"__strdup", readsAll;(*safe*)
"strtoul__extinline", readsAll;(*safe*)
Expand Down Expand Up @@ -737,6 +746,7 @@ let invalidate_actions = [
"pthread_sigmask", writesAllButFirst 2 readsAll;(*unsafe*)
"raise", writesAll;(*unsafe*)
"_strlen", readsAll;(*safe*)
"__builtin_object_size", readsAll;(*safe*)
"__builtin_alloca", readsAll;(*safe*)
"dlopen", readsAll;(*safe*)
"dlsym", readsAll;(*safe*)
Expand All @@ -745,9 +755,6 @@ let invalidate_actions = [
"stat__extinline", writesAllButFirst 1 readsAll;(*drop 1*)
"lstat__extinline", writesAllButFirst 1 readsAll;(*drop 1*)
"__builtin_strchr", readsAll;(*safe*)
"__builtin___strcpy", writes [1];(*keep [1]*)
"__builtin___strcpy_chk", writes [1];(*keep [1]*)
"strcat", writes [1];(*keep [1]*)
"strtok", readsAll;(*safe*)
"getpgrp", readsAll;(*safe*)
"umount2", readsAll;(*safe*)
Expand Down
73 changes: 73 additions & 0 deletions src/cdomains/addressDomain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,80 @@ struct

(* strings *)
let from_string x = singleton (Addr.from_string x)

let to_string x = List.filter_map Addr.to_string (elements x)

let to_string_length x =
let transform elem =
match Addr.to_string_length elem with
| Some x -> Idx.of_int !Cil.kindOfSizeOf (Z.of_int x)
| None -> Idx.top_of !Cil.kindOfSizeOf in
(* maps any StrPtr to the length of its content, otherwise maps to top *)
List.map transform (elements x)
(* and returns the least upper bound of computed IntDomain values *)
|> List.fold_left Idx.join (Idx.bot_of !Cil.kindOfSizeOf)

let substring_extraction haystack needle =
(* map all StrPtr elements in input address sets to contained strings *)
let haystack' = List.map Addr.to_c_string (elements haystack) in
let needle' = List.map Addr.to_c_string (elements needle) in

(* helper functions *)
let extract_lval_string = function
| Some s -> from_string s
| None -> null_ptr in
let compute_substring s1 s2 =
try
let i = Str.search_forward (Str.regexp_string s2) s1 0 in
Some (String.sub s1 i (String.length s1 - i))
with Not_found -> None in

(* if any of the input address sets contains an element that isn't a StrPtr, return top *)
if List.mem None haystack' || List.mem None needle' then
top_ptr
else
(* else try to find the first occurrence of all strings in needle' in all strings s of haystack',
collect s starting from that occurrence or if there is none, collect a NULL pointer,
and return the least upper bound *)
BatList.cartesian_product haystack' needle'
|> List.map (fun (s1, s2) -> extract_lval_string (compute_substring (Option.get s1) (Option.get s2)))
|> List.fold_left join (bot ())

let string_comparison x y n =
let f = match n with
| Some num -> Addr.to_n_c_string num
| None -> Addr.to_c_string in

(* map all StrPtr elements in input address sets to contained strings / n-substrings *)
let x' = List.map f (elements x) in
let y' = List.map f (elements y) in

(* helper functions *)
let compare s1 s2 =
let res = String.compare s1 s2 in
if res = 0 then
Idx.of_int IInt Z.zero
else if res > 0 then
Idx.starting IInt Z.one
else
Idx.ending IInt Z.minus_one in

(* if any of the input address sets contains an element that isn't a StrPtr, return top *)
if List.mem None x' || List.mem None y' then
Idx.top_of IInt
else
(* else compare every string of x' with every string of y' and return the least upper bound *)
BatList.cartesian_product x' y'
|> List.map (fun (s1, s2) -> compare (Option.get s1) (Option.get s2))
|> List.fold_left Idx.join (Idx.bot_of IInt)

let string_writing_defined dest =
(* if the destination address set contains a StrPtr, writing to such a string literal is undefined behavior *)
if List.exists Option.is_some (List.map Addr.to_c_string (elements dest)) then
(M.warn ~category:M.Category.Behavior.Undefined.other "May write to a string literal, which leads to a segmentation fault in most cases";
false)
else
true

(* add an & in front of real addresses *)
module ShortAddr =
Expand Down
22 changes: 22 additions & 0 deletions src/cdomains/lval.ml
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,28 @@ struct
let to_string = function
| StrPtr (Some x) -> Some x
| _ -> None
(* only keep part before first null byte *)
let to_c_string = function
| StrPtr (Some x) ->
begin match String.split_on_char '\x00' x with
| s::_ -> Some s
| [] -> None
end
| _ -> None
let to_n_c_string n x =
match to_c_string x with
| Some x ->
if n > String.length x then
Some x
else if n < 0 then
None
else
Some (String.sub x 0 n)
| _ -> None
let to_string_length x =
match to_c_string x with
| Some x -> Some (String.length x)
| _ -> None

(* exception if the offset can't be followed completely *)
exception Type_offset of typ * string
Expand Down
Loading