Skip to content

Commit

Permalink
Merge pull request #778 from mseri/parser
Browse files Browse the repository at this point in the history
cohttp.headers: use faster comparison
  • Loading branch information
mseri authored Jul 7, 2021
2 parents 0187c8a + e0bd381 commit d67a184
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 90 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
uses: actions/checkout@v2

- name: Use OCaml ${{ matrix.ocaml-version }}
uses: actions-ml/setup-ocaml@master
uses: ocaml/setup-ocaml@v2
with:
ocaml-version: ${{ matrix.ocaml-version }}
opam-depext: false
Expand Down
6 changes: 5 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
stack overflow happens in the XHR completion handler (mefyl #762).
- lwt_jsoo: Add test suite (mefyl #764).

- Cohttp.Header: new implementation (@lyrm #747)
- Cohttp.Header: new implementation (lyrm #747)

+ New implementation of Header modules using an associative list instead of a map, with one major semantic change (function ```get```, see below), and some new functions (```clean_dup```, ```get_multi_concat```)
+ More Alcotest tests as well as fuzzing tests for this particular module.

- Cohttp.Header: performance improvement (mseri, anuragsoni #778)
**Breaking** the headers are no-longer lowercased when parsed, the headers key comparison is case insensitive instead.

### Purpose

The new header implementation uses an associative list instead of a map to represent headers and is focused on predictability and intuitivity: except for some specific and documented functions, the headers are always kept in transmission order, which makes debugging easier and is also important for [RFC7230§3.2.2](https://tools.ietf.org/html/rfc7230#section-3.2.2) that states that multiple values of a header must be kept in order.
Expand All @@ -38,6 +41,7 @@

+ ```clean_dup``` enables the user to clean headers that follows the {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} RFC7230§3.2.2} (no duplicate, except ```set-cookie```)
+ ```get_multi_concat``` has been added to get a result similar to the previous ```get``` function.
- Cohttp.Header: optimize internal of cohttp.headers (mseri #778)

## v4.0.0 (2021-03-24)

Expand Down
2 changes: 1 addition & 1 deletion cohttp-lwt-unix/test/test_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ let make_simple_req () =
let open Cohttp in
let open Cohttp_lwt_unix in
let expected =
"POST /foo/bar HTTP/1.1\r\nfoo: bar\r\nhost: localhost\r\nuser-agent: "
"POST /foo/bar HTTP/1.1\r\nFoo: bar\r\nhost: localhost\r\nuser-agent: "
^ user_agent
^ "\r\ntransfer-encoding: chunked\r\n\r\n6\r\nfoobar\r\n0\r\n\r\n"
in
Expand Down
17 changes: 9 additions & 8 deletions cohttp/fuzz/fuzz_header.ml
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,9 @@ let is_empty_test () =
let init_with_test () =
Crowbar.(
(* FS *)
(* forall k v. to_list (init_with k v) = [String.lowercase k, v] *)
(* forall k v. to_list (init_with k v) = [k, v] *)
add_test ~name:"[init_list k v] is [k, v]" [ header_name_gen; word_gen ]
(fun k v ->
check_eq H.(to_list (init_with k v)) [ (String.lowercase_ascii k, v) ]))
(fun k v -> check_eq H.(to_list (init_with k v)) [ (k, v) ]))

let mem_test () =
Crowbar.(
Expand All @@ -201,12 +200,16 @@ let mem_test () =
add_test ~name:"[mem h k] on an empty header is always false"
[ header_name_gen ] (fun k -> check_eq false H.(mem (init ()) k));
(* SI *)
(* forall h, k. H.mem h k = List.(mem_assoc k (H.to_list h)) *)
(* forall h, k. H.mem h k = List.(mem_assoc (String.lowercase_ascii x) (List.map (fun (k, v) -> String.lowercase_ascii k, v) (H.to_list h))) *)
add_test ~name:"Header.mem has the same behavior than List.mem_assoc"
[ headers_gen; header_name_gen ] (fun h k ->
check_eq
H.(mem h k)
List.(mem_assoc (String.lowercase_ascii k) (H.to_list h))))
List.(
mem_assoc (String.lowercase_ascii k)
(List.map
(fun (k, v) -> (String.lowercase_ascii k, v))
(H.to_list h)))))

let add_test () =
Crowbar.(
Expand All @@ -220,9 +223,7 @@ let add_test () =
(* forall h, k, v. to_list (add h k v) = to_list h @ [lowercase k, v] *)
~name:"[add] adds a value at the header end"
[ headers_gen; header_name_gen; word_gen ] (fun h k v ->
check_eq
(H.to_list h @ [ (String.lowercase_ascii k, v) ])
H.(to_list (add h k v))))
check_eq (H.to_list h @ [ (k, v) ]) H.(to_list (add h k v))))

let to_list_of_list_test () =
Crowbar.(
Expand Down
141 changes: 64 additions & 77 deletions cohttp/src/header.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,36 +16,37 @@
*
}}}*)

module LString : sig
type t

val of_string : string -> t
val to_string : t -> string
val compare : t -> t -> int
end = struct
type t = string

let of_string x = String.lowercase_ascii x
let to_string x = x
let compare a b = String.compare a b
end

type t = (LString.t * string) list
let caseless_equal a b =
if a == b then true
else
let len = String.length a in
len = String.length b
&&
let stop = ref false in
let idx = ref 0 in
while (not !stop) && !idx < len do
let c1 = String.unsafe_get a !idx in
let c2 = String.unsafe_get b !idx in
if Char.lowercase_ascii c1 <> Char.lowercase_ascii c2 then stop := true;
incr idx
done;
not !stop

type t = (string * string) list

let compare = Stdlib.compare
let init () = []
let is_empty = function [] -> true | _ -> false
let init_with k v = [ (LString.of_string k, v) ]
let init_with k v = [ (k, v) ]

let mem h k =
let k = LString.of_string k in
let rec loop = function
| [] -> false
| (k', _) :: h' -> if LString.compare k k' = 0 then true else loop h'
| (k', _) :: h' -> if caseless_equal k k' then true else loop h'
in
loop h

let add h k v : t = (LString.of_string k, v) :: h
let add h k v : t = (k, v) :: h
let add_list h l = List.fold_left (fun h (k, v) -> add h k v) h l
let add_multi h k l = List.fold_left (fun h v -> add h k v) h l

Expand All @@ -58,11 +59,10 @@ let add_opt_unless_exists h k v =
match h with None -> init_with k v | Some h -> add_unless_exists h k v

let get h k =
let k = LString.of_string k in
let rec loop h =
match h with
| [] -> None
| (k', v) :: h' -> if LString.compare k k' = 0 then Some v else loop h'
| (k', v) :: h' -> if caseless_equal k k' then Some v else loop h'
in
loop h

Expand All @@ -71,36 +71,32 @@ let get_multi (h : t) (k : string) =
match h with
| [] -> acc
| (k', v) :: h' ->
if LString.compare (LString.of_string k) k' = 0 then loop h' (v :: acc)
else loop h' acc
if caseless_equal k k' then loop h' (v :: acc) else loop h' acc
in
loop h []

let remove h k =
let k = LString.of_string k in
let rec loop seen = function
| [] -> if seen then [] else raise Not_found
| (k', _) :: h when LString.compare k k' = 0 -> loop true h
| (k', _) :: h when caseless_equal k k' -> loop true h
| x :: h -> x :: loop seen h
in
try loop false h with Not_found -> h

let remove_last h k =
let k = LString.of_string k in
let rec loop seen = function
| [] -> raise Not_found
| (k', _) :: h when LString.compare k k' = 0 -> h
| (k', _) :: h when caseless_equal k k' -> h
| x :: h -> x :: loop seen h
in
try loop false h with Not_found -> h

let replace_ last h k v =
let k' = LString.of_string k in
let rec loop seen = function
| [] -> if seen then [] else raise Not_found
| (k'', _) :: h when LString.compare k' k'' = 0 ->
| (k'', _) :: h when caseless_equal k k'' ->
if last then (k'', v) :: h
else if not seen then (k', v) :: loop true h
else if not seen then (k, v) :: loop true h
else loop seen h
| x :: h -> x :: loop seen h
in
Expand Down Expand Up @@ -129,33 +125,26 @@ let update_all h k f =
let map (f : string -> string -> string) (h : t) : t =
List.map
(fun (k, v) ->
let vs' = f (LString.to_string k) v in
let vs' = f k v in
(k, vs'))
h

let iter (f : string -> string -> unit) (h : t) : unit =
List.iter (fun (k, v) -> f (LString.to_string k) v) h
List.iter (fun (k, v) -> f k v) h

let fold (f : string -> string -> 'a -> 'a) (h : t) (init : 'a) : 'a =
List.fold_left (fun acc (k, v) -> f (LString.to_string k) v acc) init h

let of_list h =
List.fold_left (fun acc (k, v) -> (LString.of_string k, v) :: acc) [] h
List.fold_left (fun acc (k, v) -> f k v acc) init h

let to_list h =
List.fold_left (fun acc (k, v) -> (LString.to_string k, v) :: acc) [] h
let of_list h = List.rev h
let to_list h = List.rev h

let to_lines (h : t) =
let header_line k v = Printf.sprintf "%s: %s\r\n" k v in
List.fold_left
(fun acc (k, v) -> header_line (LString.to_string k) v :: acc)
[] h
List.fold_left (fun acc (k, v) -> header_line k v :: acc) [] h

let to_frames h =
let to_frame k v = Printf.sprintf "%s: %s" k v in
List.fold_left
(fun acc (k, v) -> to_frame (LString.to_string k) v :: acc)
[] h
List.fold_left (fun acc (k, v) -> to_frame k v :: acc) [] h

let to_string h =
let b = Buffer.create 128 in
Expand All @@ -169,56 +158,55 @@ let to_string h =
Buffer.contents b

let headers_with_list_values =
Array.map LString.of_string
[|
"accept";
"accept-charset";
"accept-encoding";
"accept-language";
"accept-ranges";
"allow";
"cache-control";
"connection";
"content-encoding";
"content-language";
"expect";
"if-match";
"if-none-match";
"link";
"pragma";
"proxy-authenticate";
"te";
"trailer";
"transfer-encoding";
"upgrade";
"vary";
"via";
"warning";
"www-authenticate";
|]
[|
"accept";
"accept-charset";
"accept-encoding";
"accept-language";
"accept-ranges";
"allow";
"cache-control";
"connection";
"content-encoding";
"content-language";
"expect";
"if-match";
"if-none-match";
"link";
"pragma";
"proxy-authenticate";
"te";
"trailer";
"transfer-encoding";
"upgrade";
"vary";
"via";
"warning";
"www-authenticate";
|]

let is_header_with_list_value =
let tbl = Hashtbl.create (Array.length headers_with_list_values) in
headers_with_list_values |> Array.iter (fun h -> Hashtbl.add tbl h ());
fun h -> Hashtbl.mem tbl h

let is_set_cookie k = LString.(compare k (of_string "set-cookie"))
let is_set_cookie k = caseless_equal k "set-cookie"

(* set-cookie is an exception according to
{{:https://tools.ietf.org/html/rfc7230#section-3.2.2}
RFC7230§3.2.2} and can appear multiple times in a response message.
RFC7230§3.2.2} and can appear multiple times in a response message.
*)
let clean_dup (h : t) : t =
let add h k v =
if is_set_cookie k = 0 then (k, v) :: h
if is_set_cookie k then (k, v) :: h
else
let to_add = ref false in
let rec loop = function
| [] ->
to_add := true;
[]
| (k', v') :: hs ->
if LString.compare k k' = 0 then
if caseless_equal k k' then
if is_header_with_list_value k then (k, v' ^ "," ^ v) :: hs
else (
to_add := true;
Expand All @@ -231,8 +219,7 @@ let clean_dup (h : t) : t =
List.rev h |> List.fold_left (fun acc (k, v) -> add acc k v) []

let get_multi_concat ?(list_value_only = false) h k : string option =
if (not list_value_only) || is_header_with_list_value (LString.of_string k)
then
if (not list_value_only) || is_header_with_list_value k then
let vs = get_multi h k in
match vs with [] -> None | _ -> Some (String.concat "," vs)
else get h k
Expand Down
3 changes: 2 additions & 1 deletion cohttp/src/header.mli
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ val of_list : (string * string) list -> t
is true with case insensitive comparison. *)

val to_list : t -> (string * string) list
(** [to_list h] converts HTTP headers [h] to a list. Order is preserved.
(** [to_list h] converts HTTP headers [h] to a list. Order and case is
preserved.
{e Invariant (with case insensitive comparison):} [to_list (of_list l) = l] *)

Expand Down
2 changes: 1 addition & 1 deletion cohttp/test/unitary_test_header.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ let is_empty_tests () =

let init_with_tests () =
aessl "init_with k v"
[ ("transfer-encoding", "chunked") ]
[ ("traNsfer-eNcoding", "chunked") ]
H.(to_list (init_with "traNsfer-eNcoding" "chunked"))

let mem_tests () =
Expand Down

0 comments on commit d67a184

Please sign in to comment.