Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
The loosely specified `diff` file format is widely used for transmitting
differences of line-based information. The motivating example is
[`opam`](https://opam.ocaml.org), which is able to validate updates being
cryptographically signed (e.g. [conex](https://github.com/hannesm/conex)) by
cryptographically signed (e.g. [conex](https://github.com/robur-coop/conex)) by
providing a unified diff.

The [test-based infered specification](https://www.artima.com/weblogs/viewpost.jsp?thread=164293)
Expand Down Expand Up @@ -50,11 +50,16 @@ or deleted, and if the chunk size is omitted (including the comma), it is set
to 1. NB from practical experiments, only "+1" and "-1" are supported.

```OCaml
type git_ext =
| Rename_only of string * string
| Delete_only
| Create_only

type operation =
| Edit of string * string
| Delete of string
| Create of string
| Rename_only of string * string
| Git_ext of (string * string * git_ext)

type hunk (* positions and contents *)

Expand All @@ -73,8 +78,7 @@ from old and new file contents is also provided.

The function `patch` assumes that the patch applies cleanly, and does not
check this assumption. Exceptions may be raised if this assumption is violated.
The git diff format allows further features, such as file permissions, and also
a "copy from / to" header, which I was unable to spot in the wild.
The git diff format allows further features, such as file permissions.

## Installation

Expand Down
2 changes: 1 addition & 1 deletion src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
(name patch)
(synopsis "Patch purely in OCaml")
(public_name patch)
(modules patch lib fname))
(modules patch lib fname rope))

(executable
(name patch_command)
Expand Down
8 changes: 0 additions & 8 deletions src/lib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,4 @@ module List = struct
| [] -> invalid_arg "List.last"
| [x] -> x
| _::xs -> last xs

let rev_cut idx l =
let rec aux acc idx = function
| l when idx = 0 -> (acc, l)
| [] -> invalid_arg "List.cut"
| x::xs -> aux (x :: acc) (idx - 1) xs
in
aux [] idx l
end
1 change: 0 additions & 1 deletion src/lib.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,4 @@ end

module List : sig
val last : 'a list -> 'a
val rev_cut : int -> 'a list -> 'a list * 'a list
end
33 changes: 18 additions & 15 deletions src/patch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,30 @@ let pp_hunk ~mine_no_nl ~their_no_nl ppf hunk =
hunk.mine_start hunk.mine_len hunk.their_start hunk.their_len
(unified_diff ~mine_no_nl ~their_no_nl hunk)

let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, lines) ({mine_start; mine_len; mine; their_start = _; their_len; their} as hunk) =
let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, rope) ({mine_start; mine_len; mine; their_start = _; their_len; their} as hunk) =
let mine_start = mine_start + offset in
let patch_match ~search_offset =
let mine_start = mine_start + search_offset in
let rev_prefix, rest = Lib.List.rev_cut (Stdlib.max 0 (mine_start - 1)) lines in
let rev_actual_mine, suffix = Lib.List.rev_cut mine_len rest in
let actual_mine = List.rev rev_actual_mine in
if actual_mine <> (mine : string list) then
invalid_arg "unequal mine";
(* TODO: should we check their_len against List.length their? *)
let off_mine = Stdlib.max 0 (mine_start - 1) in
let prefix = Rope.chop rope off_mine in
let actual_mine = Rope.chop rope ~off:off_mine mine_len in
let off = off_mine + mine_len in
let suffix = Rope.shift rope off in
if not (Rope.equal_to_string_list actual_mine mine) then
invalid_arg "unequal mine";
let theirs =
let nl = Rope.last_is_nl actual_mine in
Rope.of_strings their nl
in
(mine_start + mine_len, offset + (their_len - mine_len),
(* TODO: Replace rev_append (rev ...) by the tail-rec when patch
requires OCaml >= 4.14 *)
List.rev_append rev_prefix (List.rev_append (List.rev their) suffix))
Rope.concat prefix (Rope.concat theirs suffix))
in
try patch_match ~search_offset:0
with Invalid_argument _ ->
if cleanly then
invalid_arg "apply_hunk"
else
let max_pos_offset = Stdlib.max 0 (List.length lines - Stdlib.max 0 (mine_start - 1) - mine_len) in
let max_pos_offset = Stdlib.max 0 (Rope.length rope - Stdlib.max 0 (mine_start - 1) - mine_len) in
let max_neg_offset = mine_start - last_matched_line in
let rec locate search_offset =
let aux search_offset max_offset =
Expand Down Expand Up @@ -100,7 +103,7 @@ let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, lines) ({mine_star
else if mine_len = (hunk.mine_len : int) && their_len = (hunk.their_len : int) then
invalid_arg "apply_hunk: could not apply fuzz"
else
apply_hunk ~cleanly ~fuzz:(fuzz + 1) (last_matched_line, offset, lines) hunk
apply_hunk ~cleanly ~fuzz:(fuzz + 1) (last_matched_line, offset, rope) hunk
else
invalid_arg "apply_hunk"
else
Expand Down Expand Up @@ -476,9 +479,9 @@ let patch ~cleanly filedata diff =
| _ -> assert false
end
| Edit _ ->
let old = match filedata with None -> [] | Some x -> to_lines x in
let _, _, lines = List.fold_left (apply_hunk ~cleanly ~fuzz:0) (0, 0, old) diff.hunks in
let lines = String.concat "\n" lines in
let old = match filedata with None -> Rope.empty | Some x -> Rope.of_string x in
let _, _, rope = List.fold_left (apply_hunk ~cleanly ~fuzz:0) (0, 0, old) diff.hunks in
let lines = Rope.to_string rope in
let lines =
match diff.mine_no_nl, diff.their_no_nl with
| false, true ->
Expand Down
117 changes: 117 additions & 0 deletions src/rope.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
(* Originally from https://github.com/robur-coop/utcp, written by
Calascibetta Romain <[email protected]> *)

(* A rope data structure where each node is a line *)

type t =
| Str of string array * bool * int * int
| App of t * t * int

let length = function
| Str (_, _, len, _) -> len
| App (_, _, len) -> len

(* keep compatibility with 4.08 *)
let min_int (a : int) (b : int) = min a b
external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit
= "caml_blit_string" [@@noalloc]

let append t1 t2 =
App (t1, t2, length t1 + length t2)

let empty = Str (Array.make 0 "", true, 0, 0)

let rec unsafe_sub t start stop =
if start = 0 && Int.equal stop (length t) then
t
else if Int.equal start stop then
empty
else match t with
| Str (data, nl, len, off) ->
assert (stop <= (len : int));
Str (data, nl, stop - start, off + start)
| App (l, r, _) ->
let len = length l in
if stop <= (len : int) then unsafe_sub l start stop
else if start >= (len : int) then unsafe_sub r (start - len) (stop - len)
else append (unsafe_sub l start len) (unsafe_sub r 0 (stop - len))

let chop t ?(off = 0) len =
if len < 0 || len > (length t - off : int) then
invalid_arg "Rope.chop";
if len = 0 then empty else unsafe_sub t off (off + len)

let shift t len =
if len < 0 then
invalid_arg "Rope.shift";
if len = 0 then
t
else
let max = length t in
let len = min_int max len in
let l = len + (max - len) in
unsafe_sub t len l

let rec last_is_nl = function
| Str (a, nl, len, off) -> if Int.equal (Array.length a - off) len then nl else true
| App (_, r, _) -> last_is_nl r

let rec byte_length = function
| Str (s, _, len, off) as a ->
let sum = ref 0 in
for idx = off to len + off - 1 do
let data = Array.unsafe_get s idx in
sum := !sum + String.length data + 1
done;
!sum - if last_is_nl a then 0 else 1
| App (l, r, _) -> byte_length l + byte_length r

let rec into_bytes buf dst_off = function
| Str (s, _, len, off) as a ->
let off' = ref dst_off in
for idx = off to len + off - 1 do
let data = Array.unsafe_get s idx in
unsafe_blit_string data 0 buf !off' (String.length data);
off' := !off' + String.length data + 1;
if idx - off < (len - 1 : int) || (Int.equal (idx - off) (len - 1) && last_is_nl a) then
Bytes.unsafe_set buf (!off' - 1) '\n'
done
| App (l, r, _) ->
into_bytes buf dst_off l;
into_bytes buf (dst_off + byte_length l) r

let to_string t =
let len = byte_length t in
let buf = Bytes.create len in
into_bytes buf 0 t;
Bytes.unsafe_to_string buf

let concat a b = append a b

let of_strings xs last_is_nl =
let d = Array.of_list xs in
Str (d, last_is_nl, Array.length d, 0)

let of_string str =
let splitted = String.split_on_char '\n' str in
let last_is_nl = String.unsafe_get str (String.length str - 1) = '\n' in
let d = Array.of_list splitted in
Str (d, last_is_nl, Array.length d - (if last_is_nl then 1 else 0), 0)

let rec equal_to_string_list t = function
| [] -> length t = 0
| hd :: tl ->
let rec find_data = function
| Str (data, _, len, off) ->
if len > 0 then Some (Array.get data off) else None
| App (l, r, _) ->
if length l > 0 then
find_data l
else
find_data r
in
match find_data t with
| None -> false
| Some data ->
String.equal hd data &&
equal_to_string_list (shift t 1) tl
36 changes: 36 additions & 0 deletions src/rope.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
type t (** The type for a rope data structure *)

val length : t -> int
(** [length t] returns the amount of strings in [t]. *)

val empty : t
(** [empty] is the empty rope. *)

val of_strings : string list -> bool -> t
(** [of_strings xs nl] is a rope [t] which contains the strings of [xs]. If
[nl] is true, the last string will have a newline, otherwise not. *)

val of_string : string -> t
(** [of_string str] will split the string [str] on newline, and return a rope. *)

val to_string : t -> string
(** [to_string t] is the string where the contents of [t] is present. *)

val chop : t -> ?off:int -> int -> t
(** [chop t ~off len] returns a new rope that contains [len] strings starting
at [off] of the provided rope [t]. Raises Invalid_argument if [len] and
[off] are not inside the bounds. *)

val shift : t -> int -> t
(** [shift t len] returns a new rope that does not contain the first [len]
strings, but only the remaining strings of [t]. *)

val concat : t -> t -> t
(** [concat t t'] returns a new rope which contains [t] followed by [t']. *)

val last_is_nl : t -> bool
(** [last_is_nl t] returns [true] if the last string should have a newline. *)

val equal_to_string_list : t -> string list -> bool
(** [equal_to_string_list t xs] returns [true] if the content of [t] is equal to
the content of [xs]. *)
2 changes: 1 addition & 1 deletion test/data/external
Submodule external updated from 683877 to 495bc1
12 changes: 12 additions & 0 deletions test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1101,12 +1101,24 @@ let one_mil_apply () =
Alcotest.(check string) __LOC__ expected (Option.get actual)
| None, _, _ | _, None, _ | _, _, None -> Alcotest.skip ()

let many_hunks_old = lazy (opt_read "./external/many-hunks.old")
let many_hunks_new = lazy (opt_read "./external/many-hunks.new")
let many_hunks_diff = lazy (opt_read "./external/many-hunks.diff")
let many_hunks_apply () =
match Lazy.force many_hunks_old, Lazy.force many_hunks_new, Lazy.force many_hunks_diff with
| Some many_hunks_old, Some expected, Some diff ->
let patch = Patch.parse ~p:0 diff in
let actual = Patch.patch ~cleanly:true (Some many_hunks_old) (List.hd patch) in
Alcotest.(check string) __LOC__ expected (Option.get actual)
| None, _, _ | _, None, _ | _, _, None -> Alcotest.skip ()

let big_diff = [
"parse", `Quick, parse_big;
"print", `Quick, print_big;
"parse own", `Quick, parse_own;
"1_000_000 print", `Quick, one_mil_print;
"1_000_000 apply", `Quick, one_mil_apply;
"many-hunks apply", `Quick, many_hunks_apply;
]

let print_diff_mine_empty_their_no_nl () =
Expand Down