Skip to content

Commit 1c2a055

Browse files
committed
Use a rope of string array instead of a list of strings
This improves my usecase (a single file diffed with 2000 hunks by a factor of 20 GitHub workflow: run on 4.08 as well
1 parent 0b365e5 commit 1c2a055

File tree

4 files changed

+144
-21
lines changed

4 files changed

+144
-21
lines changed

.github/workflows/main.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,22 @@ jobs:
1717
- ubuntu-latest
1818
- windows-latest
1919
ocaml-compiler:
20+
- 4.08.x
2021
- 4.14.x
2122

2223
runs-on: ${{ matrix.os }}
2324

2425
steps:
2526
- name: Checkout code
26-
uses: actions/checkout@v2
27+
uses: actions/checkout@v5
2728

2829
- name: Use OCaml ${{ matrix.ocaml-compiler }}
29-
uses: ocaml/setup-ocaml@v2
30+
uses: ocaml/setup-ocaml@v3
3031
with:
3132
ocaml-compiler: ${{ matrix.ocaml-compiler }}
3233

33-
- run: opam install . --deps-only --with-test
34+
- run: opam install ./patch.opam --deps-only --with-test
3435

35-
- run: opam exec -- dune build
36+
- run: opam exec -- dune build -p patch
3637

37-
- run: opam exec -- dune runtest
38+
- run: opam exec -- dune runtest -p patch

src/dune

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
(name patch)
33
(synopsis "Patch purely in OCaml")
44
(public_name patch)
5-
(modules patch lib fname))
5+
(modules patch lib fname rope))
66

77
(executable
88
(name patch_command)

src/patch.ml

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,27 +36,29 @@ let pp_hunk ~mine_no_nl ~their_no_nl ppf hunk =
3636
hunk.mine_start hunk.mine_len hunk.their_start hunk.their_len
3737
(unified_diff ~mine_no_nl ~their_no_nl hunk)
3838

39-
let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, lines) ({mine_start; mine_len; mine; their_start = _; their_len; their} as hunk) =
39+
let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, rope) ({mine_start; mine_len; mine; their_start = _; their_len; their} as hunk) =
4040
let mine_start = mine_start + offset in
4141
let patch_match ~search_offset =
4242
let mine_start = mine_start + search_offset in
43-
let rev_prefix, rest = Lib.List.rev_cut (Stdlib.max 0 (mine_start - 1)) lines in
44-
let rev_actual_mine, suffix = Lib.List.rev_cut mine_len rest in
45-
let actual_mine = List.rev rev_actual_mine in
46-
if actual_mine <> (mine : string list) then
47-
invalid_arg "unequal mine";
48-
(* TODO: should we check their_len against List.length their? *)
43+
let prefix = Rope.chop rope (Stdlib.max 0 (mine_start - 1)) in
44+
let actual_mine = Rope.chop rope ~off:(Stdlib.max 0 (mine_start - 1)) mine_len in
45+
let off = Stdlib.max 0 (mine_start - 1) + mine_len in
46+
let suffix = Rope.shift rope off in
47+
if Rope.to_strings actual_mine <> mine then
48+
invalid_arg "unequal mine";
49+
let theirs =
50+
let nl = Rope.last_is_nl actual_mine in
51+
Rope.of_strings their nl
52+
in
4953
(mine_start + mine_len, offset + (their_len - mine_len),
50-
(* TODO: Replace rev_append (rev ...) by the tail-rec when patch
51-
requires OCaml >= 4.14 *)
52-
List.rev_append rev_prefix (List.rev_append (List.rev their) suffix))
54+
Rope.concat prefix (Rope.concat theirs suffix))
5355
in
5456
try patch_match ~search_offset:0
5557
with Invalid_argument _ ->
5658
if cleanly then
5759
invalid_arg "apply_hunk"
5860
else
59-
let max_pos_offset = Stdlib.max 0 (List.length lines - Stdlib.max 0 (mine_start - 1) - mine_len) in
61+
let max_pos_offset = Stdlib.max 0 (Rope.length rope - Stdlib.max 0 (mine_start - 1) - mine_len) in
6062
let max_neg_offset = mine_start - last_matched_line in
6163
let rec locate search_offset =
6264
let aux search_offset max_offset =
@@ -100,7 +102,7 @@ let rec apply_hunk ~cleanly ~fuzz (last_matched_line, offset, lines) ({mine_star
100102
else if mine_len = (hunk.mine_len : int) && their_len = (hunk.their_len : int) then
101103
invalid_arg "apply_hunk: could not apply fuzz"
102104
else
103-
apply_hunk ~cleanly ~fuzz:(fuzz + 1) (last_matched_line, offset, lines) hunk
105+
apply_hunk ~cleanly ~fuzz:(fuzz + 1) (last_matched_line, offset, rope) hunk
104106
else
105107
invalid_arg "apply_hunk"
106108
else
@@ -476,9 +478,9 @@ let patch ~cleanly filedata diff =
476478
| _ -> assert false
477479
end
478480
| Edit _ ->
479-
let old = match filedata with None -> [] | Some x -> to_lines x in
480-
let _, _, lines = List.fold_left (apply_hunk ~cleanly ~fuzz:0) (0, 0, old) diff.hunks in
481-
let lines = String.concat "\n" lines in
481+
let old = match filedata with None -> Rope.empty | Some x -> Rope.of_string x in
482+
let _, _, rope = List.fold_left (apply_hunk ~cleanly ~fuzz:0) (0, 0, old) diff.hunks in
483+
let lines = Rope.to_string rope in
482484
let lines =
483485
match diff.mine_no_nl, diff.their_no_nl with
484486
| false, true ->

src/rope.ml

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
(* Originally from https://github.com/robur-coop/utcp, written by
2+
Calascibetta Romain <[email protected]> *)
3+
4+
(* A rope data structure where each node is a line *)
5+
6+
type t =
7+
| Str of string array * bool * int * int
8+
| App of t * t * int * int
9+
10+
let length = function
11+
| Str (_, _, len, _) -> len
12+
| App (_, _, len, _) -> len
13+
14+
let height = function
15+
| Str _ -> 0
16+
| App (_, _, _, h) -> h
17+
18+
(* keep compatibility with 4.08 *)
19+
let max_int (a : int) (b : int) = max a b
20+
let min_int (a : int) (b : int) = min a b
21+
external unsafe_blit_string : string -> int -> bytes -> int -> int -> unit
22+
= "caml_blit_string" [@@noalloc]
23+
24+
let append t1 t2 =
25+
App (t1, t2, length t1 + length t2, 1 + max_int (height t1) (height t2))
26+
27+
let empty = Str (Array.make 0 "", true, 0, 0)
28+
29+
let rec unsafe_sub t start stop =
30+
if start == 0 && stop = length t then
31+
t
32+
else if start == stop then
33+
empty
34+
else match t with
35+
| Str (data, nl, len, off) ->
36+
assert (stop <= len);
37+
Str (data, nl, stop - start, off + start)
38+
| App (l, r, _, _) ->
39+
let len = length l in
40+
if stop <= len then unsafe_sub l start stop
41+
else if start >= len then unsafe_sub r (start - len) (stop - len)
42+
else append (unsafe_sub l start len) (unsafe_sub r 0 (stop - len))
43+
44+
let chop t ?(off = 0) len =
45+
if len < 0 || len > length t - off
46+
then invalid_arg "Rope.chop";
47+
if len == 0 then empty else unsafe_sub t off (off + len)
48+
49+
let shift t len =
50+
if len < 0 then t
51+
else if len == 0 then t
52+
else
53+
let max = length t in
54+
let len = min_int max len in
55+
let l = len + (max - len) in
56+
unsafe_sub t len l
57+
58+
let rec last_is_nl = function
59+
| Str (a, nl, len, off) -> if Array.length a - off = len then nl else true
60+
| App (_, r, _, _) -> last_is_nl r
61+
62+
let rec byte_length = function
63+
| Str (s, _, len, off) as a ->
64+
let sum = ref 0 in
65+
for idx = off to len + off - 1 do
66+
let data = Array.unsafe_get s idx in
67+
sum := !sum + String.length data + 1
68+
done;
69+
!sum - if last_is_nl a then 0 else 1
70+
| App (l, r, _, _) -> byte_length l + byte_length r
71+
72+
let rec into_bytes buf dst_off = function
73+
| Str (s, _, len, off) as a ->
74+
let off' = ref dst_off in
75+
for idx = off to len + off - 1 do
76+
let data = Array.unsafe_get s idx in
77+
Bytes.unsafe_blit_string data 0 buf !off' (String.length data);
78+
off' := !off' + String.length data + 1;
79+
if idx - off < len - 1 || (idx - off = len - 1 && last_is_nl a) then
80+
Bytes.unsafe_set buf (!off' - 1) '\n'
81+
done
82+
| App (l, r, _, _) ->
83+
into_bytes buf dst_off l;
84+
into_bytes buf (dst_off + byte_length l) r
85+
86+
let to_strings t =
87+
let rec go acc = function
88+
| Str (s, _nl, len, off) ->
89+
let r = ref [] in
90+
for idx = off to len + off - 1 do
91+
let data = Array.unsafe_get s idx in
92+
r := data :: !r
93+
done;
94+
List.rev !r @ acc
95+
| App (l, r, _, _) -> go (go acc r) l in
96+
go [] t
97+
98+
let to_string t =
99+
let len = byte_length t in
100+
let buf = Bytes.create len in
101+
into_bytes buf 0 t;
102+
Bytes.unsafe_to_string buf
103+
104+
let concat a b = append a b
105+
let prepend (str, nl) t = append (Str (Array.make 1 str, nl, 1, 0)) t
106+
107+
let append t (str, nl) = append t (Str (Array.make 1 str, nl, 1, 0))
108+
109+
let of_strings xs last_is_nl =
110+
let d = Array.of_list xs in
111+
Str (d, last_is_nl, Array.length d, 0)
112+
113+
let of_string str =
114+
let splitted = String.split_on_char '\n' str in
115+
let last_is_nl = String.unsafe_get str (String.length str - 1) = '\n' in
116+
let splitted = if last_is_nl then List.rev (List.tl (List.rev splitted)) else splitted in
117+
let d = Array.of_list splitted in
118+
Str (d, last_is_nl, Array.length d, 0)
119+
120+
let equal a b = String.equal (to_string a) (to_string b)

0 commit comments

Comments
 (0)