From 8a9d2dab25305ddef7bf22ae1fd5fe3debe0beb5 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Fri, 23 Apr 2021 15:04:12 +0200 Subject: [PATCH 01/10] cohttp.headers: use faster comparison Signed-off-by: Marcello Seri --- CHANGES.md | 3 ++- cohttp/src/header.ml | 41 ++++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 277ec5eb88..08313a9642 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,7 +8,7 @@ stack overflow happens in the XHR completion handler (mefyl #762). - lwt_jsoo: Add test suite (mefyl #764). -- Cohttp.Header: new implementation (@lyrm #747) +- Cohttp.Header: new implementation (lyrm #747) + New implementation of Header modules using an associative list instead of a map, with one major semantic change (function ```get```, see below), and some new functions (```clean_dup```, ```get_multi_concat```) + More Alcotest tests as well as fuzzing tests for this particular module. @@ -38,6 +38,7 @@ + ```clean_dup``` enables the user to clean headers that follows the {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} RFC7230§3.2.2} (no duplicate, except ```set-cookie```) + ```get_multi_concat``` has been added to get a result similar to the previous ```get``` function. +- Cohttp.Header: optimize internal of cohttp.headers (mseri #778) ## v4.0.0 (2021-03-24) diff --git a/cohttp/src/header.ml b/cohttp/src/header.ml index 2f3166e4fe..2b7613785d 100644 --- a/cohttp/src/header.ml +++ b/cohttp/src/header.ml @@ -21,13 +21,26 @@ module LString : sig val of_string : string -> t val to_string : t -> string - val compare : t -> t -> int + val equal : t -> t -> bool end = struct type t = string let of_string x = String.lowercase_ascii x let to_string x = x - let compare a b = String.compare a b + + let equal x y = + let len = String.length x in + len = String.length y + && + let equal_so_far = ref true in + let i = ref 0 in + while !equal_so_far && !i < len do + let c1 = String.unsafe_get x !i in + let c2 = String.unsafe_get y !i in + equal_so_far := c1 = c2; + incr i + done; + !equal_so_far end type t = (LString.t * string) list @@ -41,7 +54,7 @@ let mem h k = let k = LString.of_string k in let rec loop = function | [] -> false - | (k', _) :: h' -> if LString.compare k k' = 0 then true else loop h' + | (k', _) :: h' -> if LString.equal k k' then true else loop h' in loop h @@ -62,17 +75,17 @@ let get h k = let rec loop h = match h with | [] -> None - | (k', v) :: h' -> if LString.compare k k' = 0 then Some v else loop h' + | (k', v) :: h' -> if LString.equal k k' then Some v else loop h' in loop h let get_multi (h : t) (k : string) = + let k = LString.of_string k in let rec loop h acc = match h with | [] -> acc | (k', v) :: h' -> - if LString.compare (LString.of_string k) k' = 0 then loop h' (v :: acc) - else loop h' acc + if LString.equal k k' then loop h' (v :: acc) else loop h' acc in loop h [] @@ -80,7 +93,7 @@ let remove h k = let k = LString.of_string k in let rec loop seen = function | [] -> if seen then [] else raise Not_found - | (k', _) :: h when LString.compare k k' = 0 -> loop true h + | (k', _) :: h when LString.equal k k' -> loop true h | x :: h -> x :: loop seen h in try loop false h with Not_found -> h @@ -89,7 +102,7 @@ let remove_last h k = let k = LString.of_string k in let rec loop seen = function | [] -> raise Not_found - | (k', _) :: h when LString.compare k k' = 0 -> h + | (k', _) :: h when LString.equal k k' -> h | x :: h -> x :: loop seen h in try loop false h with Not_found -> h @@ -98,7 +111,7 @@ let replace_ last h k v = let k' = LString.of_string k in let rec loop seen = function | [] -> if seen then [] else raise Not_found - | (k'', _) :: h when LString.compare k' k'' = 0 -> + | (k'', _) :: h when LString.equal k' k'' -> if last then (k'', v) :: h else if not seen then (k', v) :: loop true h else loop seen h @@ -202,15 +215,17 @@ let is_header_with_list_value = headers_with_list_values |> Array.iter (fun h -> Hashtbl.add tbl h ()); fun h -> Hashtbl.mem tbl h -let is_set_cookie k = LString.(compare k (of_string "set-cookie")) +let is_set_cookie = + let k' = LString.of_string "set-cookie" in + fun k -> LString.equal k k' (* set-cookie is an exception according to {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} - RFC7230§3.2.2} and can appear multiple times in a response message. + RFC7230§3.2.2} and can appear multiple times in a response message. *) let clean_dup (h : t) : t = let add h k v = - if is_set_cookie k = 0 then (k, v) :: h + if is_set_cookie k then (k, v) :: h else let to_add = ref false in let rec loop = function @@ -218,7 +233,7 @@ let clean_dup (h : t) : t = to_add := true; [] | (k', v') :: hs -> - if LString.compare k k' = 0 then + if LString.equal k k' then if is_header_with_list_value k then (k, v' ^ "," ^ v) :: hs else ( to_add := true; From c5f8bc72f3e6ca6ff24a0730a04ad0d6ab31dd75 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Fri, 23 Apr 2021 16:39:06 +0200 Subject: [PATCH 02/10] cohttp.headers: use String.equal Signed-off-by: Marcello Seri --- cohttp/src/header.ml | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/cohttp/src/header.ml b/cohttp/src/header.ml index 2b7613785d..fdc324447d 100644 --- a/cohttp/src/header.ml +++ b/cohttp/src/header.ml @@ -27,20 +27,7 @@ end = struct let of_string x = String.lowercase_ascii x let to_string x = x - - let equal x y = - let len = String.length x in - len = String.length y - && - let equal_so_far = ref true in - let i = ref 0 in - while !equal_so_far && !i < len do - let c1 = String.unsafe_get x !i in - let c2 = String.unsafe_get y !i in - equal_so_far := c1 = c2; - incr i - done; - !equal_so_far + let equal x y = String.equal x y end type t = (LString.t * string) list From 2cb520f1d87f11ca9f0cb1e6d0f58a1ab8114bc8 Mon Sep 17 00:00:00 2001 From: Anurag Soni Date: Thu, 17 Jun 2021 09:21:10 -0400 Subject: [PATCH 03/10] use caseless compare for header keys --- cohttp/src/header.ml | 138 +++++++++++++---------------- cohttp/test/unitary_test_header.ml | 2 +- 2 files changed, 63 insertions(+), 77 deletions(-) diff --git a/cohttp/src/header.ml b/cohttp/src/header.ml index fdc324447d..5668081d69 100644 --- a/cohttp/src/header.ml +++ b/cohttp/src/header.ml @@ -16,36 +16,37 @@ * }}}*) -module LString : sig - type t - - val of_string : string -> t - val to_string : t -> string - val equal : t -> t -> bool -end = struct - type t = string - - let of_string x = String.lowercase_ascii x - let to_string x = x - let equal x y = String.equal x y -end - -type t = (LString.t * string) list +let caseless_equal a b = + if a == b then true + else + let len = String.length a in + len = String.length b + && + let stop = ref false in + let idx = ref 0 in + while (not !stop) && !idx < len do + let c1 = String.unsafe_get a !idx in + let c2 = String.unsafe_get b !idx in + if Char.lowercase_ascii c1 <> Char.lowercase_ascii c2 then stop := true; + incr idx + done; + not !stop + +type t = (string * string) list let compare = Stdlib.compare let init () = [] let is_empty = function [] -> true | _ -> false -let init_with k v = [ (LString.of_string k, v) ] +let init_with k v = [ (k, v) ] let mem h k = - let k = LString.of_string k in let rec loop = function | [] -> false - | (k', _) :: h' -> if LString.equal k k' then true else loop h' + | (k', _) :: h' -> if caseless_equal k k' then true else loop h' in loop h -let add h k v : t = (LString.of_string k, v) :: h +let add h k v : t = (k, v) :: h let add_list h l = List.fold_left (fun h (k, v) -> add h k v) h l let add_multi h k l = List.fold_left (fun h v -> add h k v) h l @@ -58,11 +59,10 @@ let add_opt_unless_exists h k v = match h with None -> init_with k v | Some h -> add_unless_exists h k v let get h k = - let k = LString.of_string k in let rec loop h = match h with | [] -> None - | (k', v) :: h' -> if LString.equal k k' then Some v else loop h' + | (k', v) :: h' -> if caseless_equal k k' then Some v else loop h' in loop h @@ -72,35 +72,32 @@ let get_multi (h : t) (k : string) = match h with | [] -> acc | (k', v) :: h' -> - if LString.equal k k' then loop h' (v :: acc) else loop h' acc + if caseless_equal k k' then loop h' (v :: acc) else loop h' acc in loop h [] let remove h k = - let k = LString.of_string k in let rec loop seen = function | [] -> if seen then [] else raise Not_found - | (k', _) :: h when LString.equal k k' -> loop true h + | (k', _) :: h when caseless_equal k k' -> loop true h | x :: h -> x :: loop seen h in try loop false h with Not_found -> h let remove_last h k = - let k = LString.of_string k in let rec loop seen = function | [] -> raise Not_found - | (k', _) :: h when LString.equal k k' -> h + | (k', _) :: h when caseless_equal k k' -> h | x :: h -> x :: loop seen h in try loop false h with Not_found -> h let replace_ last h k v = - let k' = LString.of_string k in let rec loop seen = function | [] -> if seen then [] else raise Not_found - | (k'', _) :: h when LString.equal k' k'' -> + | (k'', _) :: h when caseless_equal k k'' -> if last then (k'', v) :: h - else if not seen then (k', v) :: loop true h + else if not seen then (k, v) :: loop true h else loop seen h | x :: h -> x :: loop seen h in @@ -129,33 +126,26 @@ let update_all h k f = let map (f : string -> string -> string) (h : t) : t = List.map (fun (k, v) -> - let vs' = f (LString.to_string k) v in + let vs' = f k v in (k, vs')) h let iter (f : string -> string -> unit) (h : t) : unit = - List.iter (fun (k, v) -> f (LString.to_string k) v) h + List.iter (fun (k, v) -> f k v) h let fold (f : string -> string -> 'a -> 'a) (h : t) (init : 'a) : 'a = - List.fold_left (fun acc (k, v) -> f (LString.to_string k) v acc) init h - -let of_list h = - List.fold_left (fun acc (k, v) -> (LString.of_string k, v) :: acc) [] h + List.fold_left (fun acc (k, v) -> f k v acc) init h -let to_list h = - List.fold_left (fun acc (k, v) -> (LString.to_string k, v) :: acc) [] h +let of_list h = List.rev h +let to_list h = List.rev h let to_lines (h : t) = let header_line k v = Printf.sprintf "%s: %s\r\n" k v in - List.fold_left - (fun acc (k, v) -> header_line (LString.to_string k) v :: acc) - [] h + List.fold_left (fun acc (k, v) -> header_line k v :: acc) [] h let to_frames h = let to_frame k v = Printf.sprintf "%s: %s" k v in - List.fold_left - (fun acc (k, v) -> to_frame (LString.to_string k) v :: acc) - [] h + List.fold_left (fun acc (k, v) -> to_frame k v :: acc) [] h let to_string h = let b = Buffer.create 128 in @@ -169,42 +159,39 @@ let to_string h = Buffer.contents b let headers_with_list_values = - Array.map LString.of_string - [| - "accept"; - "accept-charset"; - "accept-encoding"; - "accept-language"; - "accept-ranges"; - "allow"; - "cache-control"; - "connection"; - "content-encoding"; - "content-language"; - "expect"; - "if-match"; - "if-none-match"; - "link"; - "pragma"; - "proxy-authenticate"; - "te"; - "trailer"; - "transfer-encoding"; - "upgrade"; - "vary"; - "via"; - "warning"; - "www-authenticate"; - |] + [| + "accept"; + "accept-charset"; + "accept-encoding"; + "accept-language"; + "accept-ranges"; + "allow"; + "cache-control"; + "connection"; + "content-encoding"; + "content-language"; + "expect"; + "if-match"; + "if-none-match"; + "link"; + "pragma"; + "proxy-authenticate"; + "te"; + "trailer"; + "transfer-encoding"; + "upgrade"; + "vary"; + "via"; + "warning"; + "www-authenticate"; + |] let is_header_with_list_value = let tbl = Hashtbl.create (Array.length headers_with_list_values) in headers_with_list_values |> Array.iter (fun h -> Hashtbl.add tbl h ()); fun h -> Hashtbl.mem tbl h -let is_set_cookie = - let k' = LString.of_string "set-cookie" in - fun k -> LString.equal k k' +let is_set_cookie k = caseless_equal k "set-cookie" (* set-cookie is an exception according to {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} @@ -220,7 +207,7 @@ let clean_dup (h : t) : t = to_add := true; [] | (k', v') :: hs -> - if LString.equal k k' then + if caseless_equal k k' then if is_header_with_list_value k then (k, v' ^ "," ^ v) :: hs else ( to_add := true; @@ -233,8 +220,7 @@ let clean_dup (h : t) : t = List.rev h |> List.fold_left (fun acc (k, v) -> add acc k v) [] let get_multi_concat ?(list_value_only = false) h k : string option = - if (not list_value_only) || is_header_with_list_value (LString.of_string k) - then + if (not list_value_only) || is_header_with_list_value k then let vs = get_multi h k in match vs with [] -> None | _ -> Some (String.concat "," vs) else get h k diff --git a/cohttp/test/unitary_test_header.ml b/cohttp/test/unitary_test_header.ml index d3421e8764..a47b879544 100644 --- a/cohttp/test/unitary_test_header.ml +++ b/cohttp/test/unitary_test_header.ml @@ -58,7 +58,7 @@ let is_empty_tests () = let init_with_tests () = aessl "init_with k v" - [ ("transfer-encoding", "chunked") ] + [ ("traNsfer-eNcoding", "chunked") ] H.(to_list (init_with "traNsfer-eNcoding" "chunked")) let mem_tests () = From 1cbddbd7ccd1072e5e3703082f4b52dd0f3e4580 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 30 Jun 2021 17:34:28 +0200 Subject: [PATCH 04/10] .github/workflow: fix workflow Signed-off-by: Marcello Seri --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 995e469628..ee4132a53b 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -25,7 +25,7 @@ jobs: uses: actions/checkout@v2 - name: Use OCaml ${{ matrix.ocaml-version }} - uses: actions-ml/setup-ocaml@master + uses: ocaml/setup-ocaml@v2 with: ocaml-version: ${{ matrix.ocaml-version }} opam-depext: false From c5b6edf608dc1d1d0d67da38da5a9acf3a9a9680 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 30 Jun 2021 17:38:52 +0200 Subject: [PATCH 05/10] Header: make wording even more explicit Signed-off-by: Marcello Seri --- cohttp/src/header.mli | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cohttp/src/header.mli b/cohttp/src/header.mli index a3edb4ec9e..38d6823c99 100644 --- a/cohttp/src/header.mli +++ b/cohttp/src/header.mli @@ -32,7 +32,8 @@ val of_list : (string * string) list -> t is true with case insensitive comparison. *) val to_list : t -> (string * string) list -(** [to_list h] converts HTTP headers [h] to a list. Order is preserved. +(** [to_list h] converts HTTP headers [h] to a list. Order and case is + preserved. {e Invariant (with case insensitive comparison):} [to_list (of_list l) = l] *) From b0cb949c325b2796a99459376cfd7ec3ca79ef54 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 30 Jun 2021 17:39:03 +0200 Subject: [PATCH 06/10] CHANGES: update Signed-off-by: Marcello Seri --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 08313a9642..68eee4a01a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,9 @@ + New implementation of Header modules using an associative list instead of a map, with one major semantic change (function ```get```, see below), and some new functions (```clean_dup```, ```get_multi_concat```) + More Alcotest tests as well as fuzzing tests for this particular module. +- Cohttp.Header: performance improvement (mseri, anuragsoni #778) + **Breaking** the headers are no-longer lowercased when parsed, the headers key comparison is case insensitive instead. + ### Purpose The new header implementation uses an associative list instead of a map to represent headers and is focused on predictability and intuitivity: except for some specific and documented functions, the headers are always kept in transmission order, which makes debugging easier and is also important for [RFC7230§3.2.2](https://tools.ietf.org/html/rfc7230#section-3.2.2) that states that multiple values of a header must be kept in order. From 10570a6ab3f5ca4dd22b3101a8758816c6f67e99 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 30 Jun 2021 17:53:40 +0200 Subject: [PATCH 07/10] header: fix rebasing leftover Signed-off-by: Marcello Seri --- cohttp/src/header.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/cohttp/src/header.ml b/cohttp/src/header.ml index 5668081d69..f4995c07ed 100644 --- a/cohttp/src/header.ml +++ b/cohttp/src/header.ml @@ -67,7 +67,6 @@ let get h k = loop h let get_multi (h : t) (k : string) = - let k = LString.of_string k in let rec loop h acc = match h with | [] -> acc From 7422726c5084d043142d012041222ae423bcbf65 Mon Sep 17 00:00:00 2001 From: Marcello Seri Date: Wed, 30 Jun 2021 18:18:29 +0200 Subject: [PATCH 08/10] fuzz_header: update tests Signed-off-by: Marcello Seri --- cohttp/fuzz/fuzz_header.ml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cohttp/fuzz/fuzz_header.ml b/cohttp/fuzz/fuzz_header.ml index d53b743c04..ed982ca06f 100644 --- a/cohttp/fuzz/fuzz_header.ml +++ b/cohttp/fuzz/fuzz_header.ml @@ -189,10 +189,10 @@ let is_empty_test () = let init_with_test () = Crowbar.( (* FS *) - (* forall k v. to_list (init_with k v) = [String.lowercase k, v] *) + (* forall k v. to_list (init_with k v) = [k, v] *) add_test ~name:"[init_list k v] is [k, v]" [ header_name_gen; word_gen ] (fun k v -> - check_eq H.(to_list (init_with k v)) [ (String.lowercase_ascii k, v) ])) + check_eq H.(to_list (init_with k v)) [ (k, v) ])) let mem_test () = Crowbar.( @@ -206,7 +206,7 @@ let mem_test () = [ headers_gen; header_name_gen ] (fun h k -> check_eq H.(mem h k) - List.(mem_assoc (String.lowercase_ascii k) (H.to_list h)))) + List.(mem_assoc k (H.to_list h)))) let add_test () = Crowbar.( @@ -221,7 +221,7 @@ let add_test () = ~name:"[add] adds a value at the header end" [ headers_gen; header_name_gen; word_gen ] (fun h k v -> check_eq - (H.to_list h @ [ (String.lowercase_ascii k, v) ]) + (H.to_list h @ [ (k, v) ]) H.(to_list (add h k v)))) let to_list_of_list_test () = From f74c85fe2c3379f67136296900b9b3d88d923a52 Mon Sep 17 00:00:00 2001 From: Anurag Soni Date: Sat, 3 Jul 2021 12:09:44 -0400 Subject: [PATCH 09/10] adapt tests to work with case insensitive comparison --- cohttp-lwt-unix/test/test_parser.ml | 2 +- cohttp/fuzz/fuzz_header.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cohttp-lwt-unix/test/test_parser.ml b/cohttp-lwt-unix/test/test_parser.ml index 025627f0d6..3bf2692f78 100644 --- a/cohttp-lwt-unix/test/test_parser.ml +++ b/cohttp-lwt-unix/test/test_parser.ml @@ -246,7 +246,7 @@ let make_simple_req () = let open Cohttp in let open Cohttp_lwt_unix in let expected = - "POST /foo/bar HTTP/1.1\r\nfoo: bar\r\nhost: localhost\r\nuser-agent: " + "POST /foo/bar HTTP/1.1\r\nFoo: bar\r\nhost: localhost\r\nuser-agent: " ^ user_agent ^ "\r\ntransfer-encoding: chunked\r\n\r\n6\r\nfoobar\r\n0\r\n\r\n" in diff --git a/cohttp/fuzz/fuzz_header.ml b/cohttp/fuzz/fuzz_header.ml index ed982ca06f..d80927b1f6 100644 --- a/cohttp/fuzz/fuzz_header.ml +++ b/cohttp/fuzz/fuzz_header.ml @@ -206,7 +206,7 @@ let mem_test () = [ headers_gen; header_name_gen ] (fun h k -> check_eq H.(mem h k) - List.(mem_assoc k (H.to_list h)))) + List.(exists (fun (x, _) -> String.lowercase_ascii x = String.lowercase_ascii k) (H.to_list h)))) let add_test () = Crowbar.( From a5ec3835b2af6ade9dc50dbfa5999be49bc10972 Mon Sep 17 00:00:00 2001 From: Anurag Soni Date: Mon, 5 Jul 2021 10:30:53 -0400 Subject: [PATCH 10/10] Improve fuzz test specs + test implementation --- cohttp/fuzz/fuzz_header.ml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cohttp/fuzz/fuzz_header.ml b/cohttp/fuzz/fuzz_header.ml index d80927b1f6..e5640f1d49 100644 --- a/cohttp/fuzz/fuzz_header.ml +++ b/cohttp/fuzz/fuzz_header.ml @@ -191,8 +191,7 @@ let init_with_test () = (* FS *) (* forall k v. to_list (init_with k v) = [k, v] *) add_test ~name:"[init_list k v] is [k, v]" [ header_name_gen; word_gen ] - (fun k v -> - check_eq H.(to_list (init_with k v)) [ (k, v) ])) + (fun k v -> check_eq H.(to_list (init_with k v)) [ (k, v) ])) let mem_test () = Crowbar.( @@ -201,12 +200,16 @@ let mem_test () = add_test ~name:"[mem h k] on an empty header is always false" [ header_name_gen ] (fun k -> check_eq false H.(mem (init ()) k)); (* SI *) - (* forall h, k. H.mem h k = List.(mem_assoc k (H.to_list h)) *) + (* forall h, k. H.mem h k = List.(mem_assoc (String.lowercase_ascii x) (List.map (fun (k, v) -> String.lowercase_ascii k, v) (H.to_list h))) *) add_test ~name:"Header.mem has the same behavior than List.mem_assoc" [ headers_gen; header_name_gen ] (fun h k -> check_eq H.(mem h k) - List.(exists (fun (x, _) -> String.lowercase_ascii x = String.lowercase_ascii k) (H.to_list h)))) + List.( + mem_assoc (String.lowercase_ascii k) + (List.map + (fun (k, v) -> (String.lowercase_ascii k, v)) + (H.to_list h))))) let add_test () = Crowbar.( @@ -220,9 +223,7 @@ let add_test () = (* forall h, k, v. to_list (add h k v) = to_list h @ [lowercase k, v] *) ~name:"[add] adds a value at the header end" [ headers_gen; header_name_gen; word_gen ] (fun h k v -> - check_eq - (H.to_list h @ [ (k, v) ]) - H.(to_list (add h k v)))) + check_eq (H.to_list h @ [ (k, v) ]) H.(to_list (add h k v)))) let to_list_of_list_test () = Crowbar.(