diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 995e469628..ee4132a53b 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -25,7 +25,7 @@ jobs: uses: actions/checkout@v2 - name: Use OCaml ${{ matrix.ocaml-version }} - uses: actions-ml/setup-ocaml@master + uses: ocaml/setup-ocaml@v2 with: ocaml-version: ${{ matrix.ocaml-version }} opam-depext: false diff --git a/CHANGES.md b/CHANGES.md index 277ec5eb88..68eee4a01a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,11 +8,14 @@ stack overflow happens in the XHR completion handler (mefyl #762). - lwt_jsoo: Add test suite (mefyl #764). -- Cohttp.Header: new implementation (@lyrm #747) +- Cohttp.Header: new implementation (lyrm #747) + New implementation of Header modules using an associative list instead of a map, with one major semantic change (function ```get```, see below), and some new functions (```clean_dup```, ```get_multi_concat```) + More Alcotest tests as well as fuzzing tests for this particular module. +- Cohttp.Header: performance improvement (mseri, anuragsoni #778) + **Breaking** the headers are no-longer lowercased when parsed, the headers key comparison is case insensitive instead. + ### Purpose The new header implementation uses an associative list instead of a map to represent headers and is focused on predictability and intuitivity: except for some specific and documented functions, the headers are always kept in transmission order, which makes debugging easier and is also important for [RFC7230§3.2.2](https://tools.ietf.org/html/rfc7230#section-3.2.2) that states that multiple values of a header must be kept in order. @@ -38,6 +41,7 @@ + ```clean_dup``` enables the user to clean headers that follows the {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} RFC7230§3.2.2} (no duplicate, except ```set-cookie```) + ```get_multi_concat``` has been added to get a result similar to the previous ```get``` function. +- Cohttp.Header: optimize internal of cohttp.headers (mseri #778) ## v4.0.0 (2021-03-24) diff --git a/cohttp-lwt-unix/test/test_parser.ml b/cohttp-lwt-unix/test/test_parser.ml index 025627f0d6..3bf2692f78 100644 --- a/cohttp-lwt-unix/test/test_parser.ml +++ b/cohttp-lwt-unix/test/test_parser.ml @@ -246,7 +246,7 @@ let make_simple_req () = let open Cohttp in let open Cohttp_lwt_unix in let expected = - "POST /foo/bar HTTP/1.1\r\nfoo: bar\r\nhost: localhost\r\nuser-agent: " + "POST /foo/bar HTTP/1.1\r\nFoo: bar\r\nhost: localhost\r\nuser-agent: " ^ user_agent ^ "\r\ntransfer-encoding: chunked\r\n\r\n6\r\nfoobar\r\n0\r\n\r\n" in diff --git a/cohttp/fuzz/fuzz_header.ml b/cohttp/fuzz/fuzz_header.ml index d53b743c04..e5640f1d49 100644 --- a/cohttp/fuzz/fuzz_header.ml +++ b/cohttp/fuzz/fuzz_header.ml @@ -189,10 +189,9 @@ let is_empty_test () = let init_with_test () = Crowbar.( (* FS *) - (* forall k v. to_list (init_with k v) = [String.lowercase k, v] *) + (* forall k v. to_list (init_with k v) = [k, v] *) add_test ~name:"[init_list k v] is [k, v]" [ header_name_gen; word_gen ] - (fun k v -> - check_eq H.(to_list (init_with k v)) [ (String.lowercase_ascii k, v) ])) + (fun k v -> check_eq H.(to_list (init_with k v)) [ (k, v) ])) let mem_test () = Crowbar.( @@ -201,12 +200,16 @@ let mem_test () = add_test ~name:"[mem h k] on an empty header is always false" [ header_name_gen ] (fun k -> check_eq false H.(mem (init ()) k)); (* SI *) - (* forall h, k. H.mem h k = List.(mem_assoc k (H.to_list h)) *) + (* forall h, k. H.mem h k = List.(mem_assoc (String.lowercase_ascii x) (List.map (fun (k, v) -> String.lowercase_ascii k, v) (H.to_list h))) *) add_test ~name:"Header.mem has the same behavior than List.mem_assoc" [ headers_gen; header_name_gen ] (fun h k -> check_eq H.(mem h k) - List.(mem_assoc (String.lowercase_ascii k) (H.to_list h)))) + List.( + mem_assoc (String.lowercase_ascii k) + (List.map + (fun (k, v) -> (String.lowercase_ascii k, v)) + (H.to_list h))))) let add_test () = Crowbar.( @@ -220,9 +223,7 @@ let add_test () = (* forall h, k, v. to_list (add h k v) = to_list h @ [lowercase k, v] *) ~name:"[add] adds a value at the header end" [ headers_gen; header_name_gen; word_gen ] (fun h k v -> - check_eq - (H.to_list h @ [ (String.lowercase_ascii k, v) ]) - H.(to_list (add h k v)))) + check_eq (H.to_list h @ [ (k, v) ]) H.(to_list (add h k v)))) let to_list_of_list_test () = Crowbar.( diff --git a/cohttp/src/header.ml b/cohttp/src/header.ml index 2f3166e4fe..f4995c07ed 100644 --- a/cohttp/src/header.ml +++ b/cohttp/src/header.ml @@ -16,36 +16,37 @@ * }}}*) -module LString : sig - type t - - val of_string : string -> t - val to_string : t -> string - val compare : t -> t -> int -end = struct - type t = string - - let of_string x = String.lowercase_ascii x - let to_string x = x - let compare a b = String.compare a b -end - -type t = (LString.t * string) list +let caseless_equal a b = + if a == b then true + else + let len = String.length a in + len = String.length b + && + let stop = ref false in + let idx = ref 0 in + while (not !stop) && !idx < len do + let c1 = String.unsafe_get a !idx in + let c2 = String.unsafe_get b !idx in + if Char.lowercase_ascii c1 <> Char.lowercase_ascii c2 then stop := true; + incr idx + done; + not !stop + +type t = (string * string) list let compare = Stdlib.compare let init () = [] let is_empty = function [] -> true | _ -> false -let init_with k v = [ (LString.of_string k, v) ] +let init_with k v = [ (k, v) ] let mem h k = - let k = LString.of_string k in let rec loop = function | [] -> false - | (k', _) :: h' -> if LString.compare k k' = 0 then true else loop h' + | (k', _) :: h' -> if caseless_equal k k' then true else loop h' in loop h -let add h k v : t = (LString.of_string k, v) :: h +let add h k v : t = (k, v) :: h let add_list h l = List.fold_left (fun h (k, v) -> add h k v) h l let add_multi h k l = List.fold_left (fun h v -> add h k v) h l @@ -58,11 +59,10 @@ let add_opt_unless_exists h k v = match h with None -> init_with k v | Some h -> add_unless_exists h k v let get h k = - let k = LString.of_string k in let rec loop h = match h with | [] -> None - | (k', v) :: h' -> if LString.compare k k' = 0 then Some v else loop h' + | (k', v) :: h' -> if caseless_equal k k' then Some v else loop h' in loop h @@ -71,36 +71,32 @@ let get_multi (h : t) (k : string) = match h with | [] -> acc | (k', v) :: h' -> - if LString.compare (LString.of_string k) k' = 0 then loop h' (v :: acc) - else loop h' acc + if caseless_equal k k' then loop h' (v :: acc) else loop h' acc in loop h [] let remove h k = - let k = LString.of_string k in let rec loop seen = function | [] -> if seen then [] else raise Not_found - | (k', _) :: h when LString.compare k k' = 0 -> loop true h + | (k', _) :: h when caseless_equal k k' -> loop true h | x :: h -> x :: loop seen h in try loop false h with Not_found -> h let remove_last h k = - let k = LString.of_string k in let rec loop seen = function | [] -> raise Not_found - | (k', _) :: h when LString.compare k k' = 0 -> h + | (k', _) :: h when caseless_equal k k' -> h | x :: h -> x :: loop seen h in try loop false h with Not_found -> h let replace_ last h k v = - let k' = LString.of_string k in let rec loop seen = function | [] -> if seen then [] else raise Not_found - | (k'', _) :: h when LString.compare k' k'' = 0 -> + | (k'', _) :: h when caseless_equal k k'' -> if last then (k'', v) :: h - else if not seen then (k', v) :: loop true h + else if not seen then (k, v) :: loop true h else loop seen h | x :: h -> x :: loop seen h in @@ -129,33 +125,26 @@ let update_all h k f = let map (f : string -> string -> string) (h : t) : t = List.map (fun (k, v) -> - let vs' = f (LString.to_string k) v in + let vs' = f k v in (k, vs')) h let iter (f : string -> string -> unit) (h : t) : unit = - List.iter (fun (k, v) -> f (LString.to_string k) v) h + List.iter (fun (k, v) -> f k v) h let fold (f : string -> string -> 'a -> 'a) (h : t) (init : 'a) : 'a = - List.fold_left (fun acc (k, v) -> f (LString.to_string k) v acc) init h - -let of_list h = - List.fold_left (fun acc (k, v) -> (LString.of_string k, v) :: acc) [] h + List.fold_left (fun acc (k, v) -> f k v acc) init h -let to_list h = - List.fold_left (fun acc (k, v) -> (LString.to_string k, v) :: acc) [] h +let of_list h = List.rev h +let to_list h = List.rev h let to_lines (h : t) = let header_line k v = Printf.sprintf "%s: %s\r\n" k v in - List.fold_left - (fun acc (k, v) -> header_line (LString.to_string k) v :: acc) - [] h + List.fold_left (fun acc (k, v) -> header_line k v :: acc) [] h let to_frames h = let to_frame k v = Printf.sprintf "%s: %s" k v in - List.fold_left - (fun acc (k, v) -> to_frame (LString.to_string k) v :: acc) - [] h + List.fold_left (fun acc (k, v) -> to_frame k v :: acc) [] h let to_string h = let b = Buffer.create 128 in @@ -169,48 +158,47 @@ let to_string h = Buffer.contents b let headers_with_list_values = - Array.map LString.of_string - [| - "accept"; - "accept-charset"; - "accept-encoding"; - "accept-language"; - "accept-ranges"; - "allow"; - "cache-control"; - "connection"; - "content-encoding"; - "content-language"; - "expect"; - "if-match"; - "if-none-match"; - "link"; - "pragma"; - "proxy-authenticate"; - "te"; - "trailer"; - "transfer-encoding"; - "upgrade"; - "vary"; - "via"; - "warning"; - "www-authenticate"; - |] + [| + "accept"; + "accept-charset"; + "accept-encoding"; + "accept-language"; + "accept-ranges"; + "allow"; + "cache-control"; + "connection"; + "content-encoding"; + "content-language"; + "expect"; + "if-match"; + "if-none-match"; + "link"; + "pragma"; + "proxy-authenticate"; + "te"; + "trailer"; + "transfer-encoding"; + "upgrade"; + "vary"; + "via"; + "warning"; + "www-authenticate"; + |] let is_header_with_list_value = let tbl = Hashtbl.create (Array.length headers_with_list_values) in headers_with_list_values |> Array.iter (fun h -> Hashtbl.add tbl h ()); fun h -> Hashtbl.mem tbl h -let is_set_cookie k = LString.(compare k (of_string "set-cookie")) +let is_set_cookie k = caseless_equal k "set-cookie" (* set-cookie is an exception according to {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} - RFC7230§3.2.2} and can appear multiple times in a response message. + RFC7230§3.2.2} and can appear multiple times in a response message. *) let clean_dup (h : t) : t = let add h k v = - if is_set_cookie k = 0 then (k, v) :: h + if is_set_cookie k then (k, v) :: h else let to_add = ref false in let rec loop = function @@ -218,7 +206,7 @@ let clean_dup (h : t) : t = to_add := true; [] | (k', v') :: hs -> - if LString.compare k k' = 0 then + if caseless_equal k k' then if is_header_with_list_value k then (k, v' ^ "," ^ v) :: hs else ( to_add := true; @@ -231,8 +219,7 @@ let clean_dup (h : t) : t = List.rev h |> List.fold_left (fun acc (k, v) -> add acc k v) [] let get_multi_concat ?(list_value_only = false) h k : string option = - if (not list_value_only) || is_header_with_list_value (LString.of_string k) - then + if (not list_value_only) || is_header_with_list_value k then let vs = get_multi h k in match vs with [] -> None | _ -> Some (String.concat "," vs) else get h k diff --git a/cohttp/src/header.mli b/cohttp/src/header.mli index a3edb4ec9e..38d6823c99 100644 --- a/cohttp/src/header.mli +++ b/cohttp/src/header.mli @@ -32,7 +32,8 @@ val of_list : (string * string) list -> t is true with case insensitive comparison. *) val to_list : t -> (string * string) list -(** [to_list h] converts HTTP headers [h] to a list. Order is preserved. +(** [to_list h] converts HTTP headers [h] to a list. Order and case is + preserved. {e Invariant (with case insensitive comparison):} [to_list (of_list l) = l] *) diff --git a/cohttp/test/unitary_test_header.ml b/cohttp/test/unitary_test_header.ml index d3421e8764..a47b879544 100644 --- a/cohttp/test/unitary_test_header.ml +++ b/cohttp/test/unitary_test_header.ml @@ -58,7 +58,7 @@ let is_empty_tests () = let init_with_tests () = aessl "init_with k v" - [ ("transfer-encoding", "chunked") ] + [ ("traNsfer-eNcoding", "chunked") ] H.(to_list (init_with "traNsfer-eNcoding" "chunked")) let mem_tests () =