From 6a754d0a0c22afb08cdbd68973f5bd73285b30f8 Mon Sep 17 00:00:00 2001 From: Marek Kubica Date: Thu, 26 Sep 2024 12:06:45 +0200 Subject: [PATCH 1/3] Add some tests showing the behaviour of the filename parser Co-authored-by: Kate --- test/test.ml | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/test/test.ml b/test/test.ml index 4884330..251451a 100644 --- a/test/test.ml +++ b/test/test.ml @@ -806,10 +806,175 @@ let unified_diff_creation = [ "end, diff size, none no_nl", `Quick, check_diff diff_tests_end_diff_size_none_no_nl diff_tests_hunk_end_diff_size_none_no_nl ; ] +let operations exp diff () = + let ops = diff |> Patch.parse |> List.map (fun p -> p.Patch.operation) in + Alcotest.(check (list op_test)) __LOC__ exp ops + +let unified_diff_spaces = {|\ +--- "a/foo bar" 2024-09-04 10:56:24.139293679 +0200 ++++ "b/foo bar" 2024-09-04 10:56:12.519195763 +0200 +@@ -1 +1 @@ +-This is wrong. ++This is right. +|} + +let unified_diff_spaces = + operations [Patch.Edit ("\"a/foo bar\"", "\"b/foo bar\"")] unified_diff_spaces + +let git_diff_spaces = {|\ +diff --git a/foo bar b/foo bar +index ef00db3..88adca3 100644 +--- a/foo bar ++++ b/foo bar +@@ -1 +1 @@ +-This is wrong. ++This is right. +|} + +let git_diff_spaces = + operations [Patch.Edit ("foo bar", "foo bar")] git_diff_spaces + +let busybox_diff_spaces = {|\ +--- a/foo bar ++++ b/foo bar +@@ -1 +1 @@ +-This is wrong. ++This is right. +|} + +let busybox_diff_spaces = + operations [Patch.Edit ("a/foo bar", "b/foo bar")] busybox_diff_spaces + +let unified_diff_quotes = {|\ +--- "foo bar \"baz\"" 2024-09-27 11:09:48.325541553 +0200 ++++ "\"foo\" bar baz" 2024-09-27 11:06:42.612922437 +0200 +@@ -1 +1 @@ +-This is right. ++This is wrong. +|} + +let unified_diff_quotes = + operations [Patch.Edit ({|"foo bar \"baz\""|}, {|"\"foo\" bar baz"|})] unified_diff_quotes + +let git_diff_quotes = {|\ +diff --git "a/foo bar \"baz\"" "b/\"foo\" bar baz" +index 88adca3..ef00db3 100644 +--- "a/foo bar \"baz\"" ++++ "b/\"foo\" bar baz" +@@ -1 +1 @@ +-This is right. ++This is wrong. +|} + +let git_diff_quotes = + operations [Patch.Edit ({|"a/foo bar \"baz\""|}, {|"b/\"foo\" bar baz"|})] git_diff_quotes + +let busybox_diff_quotes = {|\ +--- foo bar "baz" ++++ "foo" bar baz +@@ -1 +1 @@ +-This is right. ++This is wrong. +|} + +let busybox_diff_quotes = + operations [Patch.Edit ({|foo bar "baz"|}, {|"foo" bar baz|})] busybox_diff_quotes + +let dev_null_like = {|\ +--- /dev/null_but_actually_not ++++ b +@@ -0,0 +1 @@ ++foo +|} + +let dev_null_like = + operations [Patch.Edit ("/dev/null_but_actually_not", "b")] dev_null_like + +let macos_diff_N_deletion = {|\ +diff -ruaN a/test b/test +--- a/test 2024-03-21 11:29:11 ++++ b/test 1970-01-01 01:00:00 +@@ -1 +0,0 @@ +-aaa +|} + +let macos_diff_N_deletion = + operations [Patch.Edit ("a/test", "b/test")] macos_diff_N_deletion + +let openbsd_diff_N_deletion = {|\ +diff -ruaN a/test b/test +--- a/test Thu Mar 21 12:34:45 2024 ++++ b/test Thu Jan 1 01:00:00 1970 +@@ -1 +0,0 @@ +-aaa +|} + +let openbsd_diff_N_deletion = + operations [Patch.Edit ("a/test", "b/test")] openbsd_diff_N_deletion + +let gnu_diff_N_deletion = {|\ +diff -ruaN a/test b/test +--- a/test 2024-03-21 11:35:38.363194916 +0000 ++++ b/test 1970-01-01 01:00:00.000000000 +0100 +@@ -1 +0,0 @@ +-aaa +|} + +let gnu_diff_N_deletion = + operations [Patch.Edit ("a/test", "b/test")] gnu_diff_N_deletion + +let busybox_diff_N_deletion = {|\ +--- a/test ++++ /dev/null +@@ -1 +0,0 @@ +-aaa +|} + +let busybox_diff_N_deletion = + operations [Patch.Delete "a/test"] busybox_diff_N_deletion + +let quoted_filename = {|\ +--- /dev/null ++++ "\a\b\f\n\r\t\v\\\"\001\177\046" +@@ -0,0 +1 @@ ++aaa +|} + +let quoted_filename = + operations [Patch.Create {|"\a\b\f\n\r\t\v\\\"\001\177\046"|}] quoted_filename + +let unquoted_filename = {|\ +--- /dev/null ++++ \a\b\f\n\r\t\v\\\"\001\177\046 +@@ -0,0 +1 @@ ++aaa +|} + +let unquoted_filename = + operations [Patch.Create {|\a\b\f\n\r\t\v\\\"\001\177\046|}] unquoted_filename + +let filename_diffs = + [ + "unified diff with spaces", `Quick, unified_diff_spaces; + "git diff with spaces", `Quick, git_diff_spaces; + "busybox diff with spaces", `Quick, busybox_diff_spaces; + "unified diff with quotes", `Quick, unified_diff_quotes; + "git diff with quotes", `Quick, git_diff_quotes; + "busybox diff with quotes", `Quick, busybox_diff_quotes; + "file that looks like /dev/null", `Quick, dev_null_like; + "diff -uN with file deletion on macOS", `Quick, macos_diff_N_deletion; + "diff -uN with file deletion on OpenBSD", `Quick, openbsd_diff_N_deletion; + "diff -uN with file deletion with GNU diff", `Quick, gnu_diff_N_deletion; + "diff -uN with file deletion with Busybox", `Quick, busybox_diff_N_deletion; + "heavily quoted filename", `Quick, quoted_filename; + "unquoted filename with backslashes", `Quick, unquoted_filename; + ] + let tests = [ "parse", parse_diffs ; "apply", apply_diffs ; "multiple", multi_diffs ; + "filename", filename_diffs ; "regression basic", basic_regression_diffs ; "parse real diffs", parse_real_diff_headers ; "regression", regression_diffs ; From ae2812348640ff79e7d414d2d68c6f21cd88696e Mon Sep 17 00:00:00 2001 From: Kate Date: Fri, 4 Oct 2024 16:53:15 +0100 Subject: [PATCH 2/3] Fix the parsing of filenames coming from GNU/git diff Co-authored-by: Marek Kubica --- src/dune | 5 +-- src/fname.mli | 5 +++ src/fname.mll | 69 +++++++++++++++++++++++++++++++++++ src/patch.ml | 91 +++++++++++++++++------------------------------ src/patch.mli | 11 +++++- src/patch_lib.ml | 51 ++++++++++++++++++++++++++ src/patch_lib.mli | 12 +++++++ test/test.ml | 8 ++--- 8 files changed, 187 insertions(+), 65 deletions(-) create mode 100644 src/fname.mli create mode 100644 src/fname.mll create mode 100644 src/patch_lib.ml create mode 100644 src/patch_lib.mli diff --git a/src/dune b/src/dune index 5598670..5ff03f7 100644 --- a/src/dune +++ b/src/dune @@ -2,8 +2,9 @@ (name patch) (synopsis "Patch purely in OCaml") (public_name patch) - (modules patch) - (wrapped false)) + (modules patch patch_lib fname)) + +(ocamllex fname) (executable (name patch_command) diff --git a/src/fname.mli b/src/fname.mli new file mode 100644 index 0000000..27280f1 --- /dev/null +++ b/src/fname.mli @@ -0,0 +1,5 @@ +val parse : string -> (string option, string) result +(** [parse s] parses [s] and returns a filename or [None] if the filename + is equivalent to [/dev/null]. + + Returns [Error msg] in case of error. *) diff --git a/src/fname.mll b/src/fname.mll new file mode 100644 index 0000000..08cb1cf --- /dev/null +++ b/src/fname.mll @@ -0,0 +1,69 @@ +{ +module String = Patch_lib.String + +type lexer_output = + | Quoted of string + | Unquoted + | Error of string + +exception Cant_parse_octal + +let ascii_zero = Char.code '0' +let octal_to_char c1 c2 c3 = + let char_to_digit c = Char.code c - ascii_zero in + try + Char.chr ( + (char_to_digit c1 lsl 6) lor + (char_to_digit c2 lsl 3) lor + char_to_digit c3 + ) + with Invalid_argument _ -> raise Cant_parse_octal +} + +let octal = ['0'-'7'] + +rule lex_quoted_filename buf = parse + | "\\a" { Buffer.add_char buf '\007'; lex_quoted_filename buf lexbuf } + | "\\b" { Buffer.add_char buf '\b'; lex_quoted_filename buf lexbuf } + | "\\f" { Buffer.add_char buf '\012'; lex_quoted_filename buf lexbuf } + | "\\n" { Buffer.add_char buf '\n'; lex_quoted_filename buf lexbuf } + | "\\r" { Buffer.add_char buf '\r'; lex_quoted_filename buf lexbuf } + | "\\t" { Buffer.add_char buf '\t'; lex_quoted_filename buf lexbuf } + | "\\v" { Buffer.add_char buf '\011'; lex_quoted_filename buf lexbuf } + | "\\\\" { Buffer.add_char buf '\\'; lex_quoted_filename buf lexbuf } + | "\\\"" { Buffer.add_char buf '"'; lex_quoted_filename buf lexbuf } + | '\\' (['0'-'3'] as c1) (octal as c2) (octal as c3) + { + match octal_to_char c1 c2 c3 with + | octal -> + Buffer.add_char buf octal; + lex_quoted_filename buf lexbuf + | exception Cant_parse_octal -> Unquoted + } + | '\\' _ { Unquoted } + | '"' eof { Quoted (Buffer.contents buf) } + | '"' _ { Unquoted } + | _ as c { Buffer.add_char buf c; lex_quoted_filename buf lexbuf } + | eof { Unquoted } + +and lex_filename buf = parse + | '"' { lex_quoted_filename buf lexbuf } + | _ { Unquoted } + | eof { Error "empty filename" } + +{ +let parse s = + let filename, _date = + match String.cut '\t' s with + | None -> (s, "") + | Some x -> x + in + if filename = "/dev/null" then + Ok None + else + let lexbuf = Lexing.from_string filename in + match lex_filename (Buffer.create 128) lexbuf with + | Quoted x -> Ok (Some x) + | Unquoted -> Ok (Some filename) + | Error msg -> Error msg +} diff --git a/src/patch.ml b/src/patch.ml index 221fb0e..da34522 100644 --- a/src/patch.ml +++ b/src/patch.ml @@ -1,47 +1,4 @@ -module String = struct - let is_prefix ~prefix str = - let pl = String.length prefix in - if String.length str < pl then - false - else - String.sub str 0 (String.length prefix) = prefix - - let cut sep str = - try - let idx = String.index str sep - and l = String.length str - in - let sidx = succ idx in - Some (String.sub str 0 idx, String.sub str sidx (l - sidx)) - with - Not_found -> None - - let cuts sep str = - let rec doit acc s = - match cut sep s with - | None -> List.rev (s :: acc) - | Some (a, b) -> doit (a :: acc) b - in - doit [] str - - let slice ?(start = 0) ?stop str = - let stop = match stop with - | None -> String.length str - | Some x -> x - in - let len = stop - start in - String.sub str start len - - let trim = String.trim - - let get = String.get - - let concat = String.concat - - let length = String.length - - let equal = String.equal -end +module String = Patch_lib.String type hunk = { mine_start : int ; @@ -52,6 +9,14 @@ type hunk = { their : string list ; } +type parse_error = { + msg : string; + lines : string list; + (* TODO: add the start position of the error *) +} + +exception Parse_error of parse_error + let unified_diff ~mine_no_nl ~their_no_nl hunk = let no_nl_str = ["\\ No newline at end of file"] in (* TODO *) @@ -247,19 +212,30 @@ let pp ~git ppf {operation; hunks; mine_no_nl; their_no_nl} = let pp_list ~git ppf diffs = List.iter (Format.fprintf ppf "%a" (pp ~git)) diffs +(* TODO: remove this and let users decide the prefix level they want *) +let process_git_prefix ~git ~prefix s = + if git && String.is_prefix ~prefix s then + String.slice ~start:(String.length prefix) s + else + s + let operation_of_strings git mine their = - let get_filename_opt n = - let s = match String.cut '\t' n with None -> n | Some (x, _) -> x in - if s = no_file then None else - if git && (String.is_prefix ~prefix:"a/" s || String.is_prefix ~prefix:"b/" s) then - Some (String.slice ~start:2 s) - else Some s - in - match get_filename_opt mine, get_filename_opt their with - | None, Some n -> Create n - | Some n, None -> Delete n - | Some a, Some b -> Edit (a, b) - | None, None -> assert false (* ??!?? *) + let mine_fn = String.slice ~start:4 mine + and their_fn = String.slice ~start:4 their in + match Fname.parse mine_fn, Fname.parse their_fn with + | Ok None, Ok (Some b) -> + let b = process_git_prefix ~git ~prefix:"b/" b in + Create b + | Ok (Some a), Ok None -> + let a = process_git_prefix ~git ~prefix:"a/" a in + Delete a + | Ok (Some a), Ok (Some b) -> + let a = process_git_prefix ~git ~prefix:"a/" a in + let b = process_git_prefix ~git ~prefix:"b/" b in + Edit (a, b) + | Ok None, Ok None -> assert false (* ??!?? *) + | Error msg, _ -> raise (Parse_error {msg; lines = [mine]}) + | _, Error msg -> raise (Parse_error {msg; lines = [their]}) let parse_one data = (* first locate --- and +++ lines *) @@ -271,8 +247,7 @@ let parse_one data = let hdr = Rename_only (String.slice ~start:12 x, String.slice ~start:10 y) in find_start git ~hdr xs | x::y::xs when String.is_prefix ~prefix:"--- " x -> - let mine = String.slice ~start:4 x and their = String.slice ~start:4 y in - Some (operation_of_strings git mine their), xs + Some (operation_of_strings git x y), xs | _::xs -> find_start git ?hdr xs in match find_start false data with diff --git a/src/patch.mli b/src/patch.mli index d3ecaa7..13ed736 100644 --- a/src/patch.mli +++ b/src/patch.mli @@ -11,6 +11,13 @@ type hunk = { (** A hunk contains some difference between two files: each with a start line and length, and then the content as lists of string. *) +type parse_error = { + msg : string; + lines : string list; +} + +exception Parse_error of parse_error + val pp_hunk : mine_no_nl:bool -> their_no_nl:bool -> Format.formatter -> hunk -> unit (** [pp_hunk ppf hunk] pretty-prints the [hunk] on [ppf], the printing is in the same format as [diff] does. *) @@ -49,7 +56,9 @@ val pp_list : git:bool -> Format.formatter -> t list -> unit "git diff" style will be printed. *) val parse : string -> t list -(** [parse data] decodes [data] as a list of diffs. *) +(** [parse data] decodes [data] as a list of diffs. + + @raise Parse_error if a filename was unable to be parsed *) val patch : string option -> t -> string option (** [patch file_contents diff] applies [diff] on [file_contents], resulting in diff --git a/src/patch_lib.ml b/src/patch_lib.ml new file mode 100644 index 0000000..bcd6e2d --- /dev/null +++ b/src/patch_lib.ml @@ -0,0 +1,51 @@ +module String = struct + let is_prefix ~prefix str = + let pl = String.length prefix in + if String.length str < pl then + false + else + String.sub str 0 (String.length prefix) = prefix + + let is_suffix ~suffix str = + let pl = String.length suffix in + if String.length str < pl then + false + else + String.sub str (String.length str - pl) pl = suffix + + let cut sep str = + try + let idx = String.index str sep + and l = String.length str + in + let sidx = succ idx in + Some (String.sub str 0 idx, String.sub str sidx (l - sidx)) + with + Not_found -> None + + let cuts sep str = + let rec doit acc s = + match cut sep s with + | None -> List.rev (s :: acc) + | Some (a, b) -> doit (a :: acc) b + in + doit [] str + + let slice ?(start = 0) ?stop str = + let stop = match stop with + | None -> String.length str + | Some x -> x + in + let len = stop - start in + String.sub str start len + + let trim = String.trim + + let get = String.get + + let concat = String.concat + + let length = String.length + + let equal = String.equal +end diff --git a/src/patch_lib.mli b/src/patch_lib.mli new file mode 100644 index 0000000..bcaa10f --- /dev/null +++ b/src/patch_lib.mli @@ -0,0 +1,12 @@ +module String : sig + val is_prefix : prefix:string -> string -> bool + val is_suffix : suffix:string -> string -> bool + val cut : char -> string -> (string * string) option + val cuts : char -> string -> string list + val slice : ?start:int -> ?stop:int -> string -> string + val trim : string -> string + val get : string -> int -> char + val concat : string -> string list -> string + val length : string -> int + val equal : string -> string -> bool +end diff --git a/test/test.ml b/test/test.ml index 251451a..ffe9e75 100644 --- a/test/test.ml +++ b/test/test.ml @@ -819,7 +819,7 @@ let unified_diff_spaces = {|\ |} let unified_diff_spaces = - operations [Patch.Edit ("\"a/foo bar\"", "\"b/foo bar\"")] unified_diff_spaces + operations [Patch.Edit ("a/foo bar", "b/foo bar")] unified_diff_spaces let git_diff_spaces = {|\ diff --git a/foo bar b/foo bar @@ -854,7 +854,7 @@ let unified_diff_quotes = {|\ |} let unified_diff_quotes = - operations [Patch.Edit ({|"foo bar \"baz\""|}, {|"\"foo\" bar baz"|})] unified_diff_quotes + operations [Patch.Edit ({|foo bar "baz"|}, {|"foo" bar baz|})] unified_diff_quotes let git_diff_quotes = {|\ diff --git "a/foo bar \"baz\"" "b/\"foo\" bar baz" @@ -867,7 +867,7 @@ index 88adca3..ef00db3 100644 |} let git_diff_quotes = - operations [Patch.Edit ({|"a/foo bar \"baz\""|}, {|"b/\"foo\" bar baz"|})] git_diff_quotes + operations [Patch.Edit ({|foo bar "baz"|}, {|"foo" bar baz|})] git_diff_quotes let busybox_diff_quotes = {|\ --- foo bar "baz" @@ -941,7 +941,7 @@ let quoted_filename = {|\ |} let quoted_filename = - operations [Patch.Create {|"\a\b\f\n\r\t\v\\\"\001\177\046"|}] quoted_filename + operations [Patch.Create "\007\b\012\n\r\t\011\\\"\001\127&"] quoted_filename let unquoted_filename = {|\ --- /dev/null From 7cde69d00a6c81659d9dff10bf5a1a14a1554477 Mon Sep 17 00:00:00 2001 From: Kate Date: Fri, 4 Oct 2024 17:01:20 +0100 Subject: [PATCH 3/3] Detect file creation/deletion when parsing patch files created with diff -N --- src/fname.mll | 9 +++++++-- test/test.ml | 6 +++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/fname.mll b/src/fname.mll index 08cb1cf..c92154d 100644 --- a/src/fname.mll +++ b/src/fname.mll @@ -53,12 +53,17 @@ and lex_filename buf = parse { let parse s = - let filename, _date = + let filename, date = match String.cut '\t' s with | None -> (s, "") | Some x -> x in - if filename = "/dev/null" then + if filename = "/dev/null" || + String.is_prefix ~prefix:"1970-" date || + String.is_prefix ~prefix:"1969-" date || + String.is_suffix ~suffix:" 1970" date || + String.is_suffix ~suffix:" 1969" date then + (* See https://github.com/hannesm/patch/issues/8 *) Ok None else let lexbuf = Lexing.from_string filename in diff --git a/test/test.ml b/test/test.ml index ffe9e75..6cddc95 100644 --- a/test/test.ml +++ b/test/test.ml @@ -899,7 +899,7 @@ diff -ruaN a/test b/test |} let macos_diff_N_deletion = - operations [Patch.Edit ("a/test", "b/test")] macos_diff_N_deletion + operations [Patch.Delete "a/test"] macos_diff_N_deletion let openbsd_diff_N_deletion = {|\ diff -ruaN a/test b/test @@ -910,7 +910,7 @@ diff -ruaN a/test b/test |} let openbsd_diff_N_deletion = - operations [Patch.Edit ("a/test", "b/test")] openbsd_diff_N_deletion + operations [Patch.Delete "a/test"] openbsd_diff_N_deletion let gnu_diff_N_deletion = {|\ diff -ruaN a/test b/test @@ -921,7 +921,7 @@ diff -ruaN a/test b/test |} let gnu_diff_N_deletion = - operations [Patch.Edit ("a/test", "b/test")] gnu_diff_N_deletion + operations [Patch.Delete "a/test"] gnu_diff_N_deletion let busybox_diff_N_deletion = {|\ --- a/test