Skip to content

Commit f3919b0

Browse files
panglesdEmileTrotignon
authored andcommitted
Add a marshalled input/output format for search indexes
Signed-off-by: Paul-Elliot <[email protected]>
1 parent 51f171e commit f3919b0

File tree

10 files changed

+207
-43
lines changed

10 files changed

+207
-43
lines changed

src/model/paths.ml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,10 @@ module Identifier = struct
643643
mk_parent LocalName.to_string "sli" (fun (p, n) ->
644644
`SourceLocationInternal (p, n))
645645
end
646+
647+
module Hashtbl = struct
648+
module Any = Hashtbl.Make (Any)
649+
end
646650
end
647651

648652
module Path = struct

src/model/paths.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ module Identifier : sig
236236
end
237237
end
238238

239+
module Hashtbl : sig
240+
module Any : Hashtbl.S with type key = Any.t
241+
end
242+
239243
module Mk : sig
240244
open Names
241245

src/odoc/bin/main.ml

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -465,46 +465,56 @@ module Compile_impl = struct
465465
end
466466

467467
module Indexing = struct
468-
let output_file ~dst =
469-
match dst with
470-
| Some file -> Fs.File.of_string file
471-
| None -> Fs.File.of_string "index.json"
472-
473-
let index dst warnings_options inputs_in_file inputs =
474-
let output = output_file ~dst in
468+
let output_file ~dst marshall =
469+
match (dst, marshall) with
470+
| Some file, _ -> Fs.File.of_string file
471+
| None, `JSON -> Fs.File.of_string "index.json"
472+
| None, `Marshall -> Fs.File.of_string "index-index.odoc"
473+
474+
let index dst marshall warnings_options inputs_in_file inputs =
475+
let marshall = if marshall then `Marshall else `JSON in
476+
let output = output_file ~dst marshall in
475477
match (inputs_in_file, inputs) with
476478
| [], [] ->
477479
Result.Error
478480
(`Msg
479481
"At least one of --file-list or an .odocl file must be passed to \
480482
odoc compile-index")
481-
| _ -> Indexing.compile ~output ~warnings_options inputs_in_file inputs
483+
| _ ->
484+
Indexing.compile marshall ~output ~warnings_options inputs_in_file
485+
inputs
482486

483487
let cmd =
484488
let dst =
485489
let doc =
486490
"Output file path. Non-existing intermediate directories are created. \
487-
Defaults to index.json"
491+
Defaults to index.json, or index-index.odoc if --marshall is passed \
492+
(in which case, the $(i,index-) prefix is mandatory)."
488493
in
489494
Arg.(
490495
value & opt (some string) None & info ~docs ~docv:"PATH" ~doc [ "o" ])
491496
in
492497
let inputs_in_file =
493498
let doc =
494-
"Input text file containing a line-separated list of paths to .odocl \
495-
files to index."
499+
"Input text file containing a line-separated list of paths to \
500+
.odocl/.json files to index."
496501
in
497502
Arg.(
498503
value & opt_all convert_fpath []
499504
& info ~doc ~docv:"FILE" [ "file-list" ])
500505
in
506+
let marshall =
507+
let doc = "whether to output a json file, or an .odoc file" in
508+
Arg.(value & flag & info ~doc [ "marshall" ])
509+
in
501510
let inputs =
502-
let doc = ".odocl file to index" in
511+
let doc = ".odocl/.json file to index" in
503512
Arg.(value & pos_all convert_fpath [] & info ~doc ~docv:"FILE" [])
504513
in
505514
Term.(
506515
const handle_error
507-
$ (const index $ dst $ warnings_options $ inputs_in_file $ inputs))
516+
$ (const index $ dst $ marshall $ warnings_options $ inputs_in_file
517+
$ inputs))
508518

509519
let info ~docs =
510520
let doc =

src/odoc/indexing.ml

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,26 @@ open Odoc_json_index
33
open Or_error
44
open Odoc_model
55

6-
let handle_file file ~unit ~page =
7-
Odoc_file.load file >>= fun unit' ->
8-
match unit' with
9-
| { Odoc_file.content = Unit_content unit'; _ } when unit'.hidden ->
10-
Error (`Msg "Hidden units are ignored when generating an index")
11-
| { Odoc_file.content = Unit_content unit'; _ } (* when not unit'.hidden *) ->
12-
Ok (unit unit')
13-
| { Odoc_file.content = Page_content page'; _ } -> Ok (page page')
14-
| _ ->
15-
Error
16-
(`Msg
17-
"Only pages and unit are allowed as input when generating an index")
6+
module H = Odoc_model.Paths.Identifier.Hashtbl.Any
7+
8+
let handle_file file ~unit ~page ~occ =
9+
match Fpath.basename file with
10+
| s when String.is_prefix ~affix:"index-" s ->
11+
Odoc_file.load_index file >>= fun index -> Ok (occ index)
12+
| _ -> (
13+
Odoc_file.load file >>= fun unit' ->
14+
match unit' with
15+
| { Odoc_file.content = Unit_content unit'; _ } when unit'.hidden ->
16+
Error (`Msg "Hidden units are ignored when generating an index")
17+
| { Odoc_file.content = Unit_content unit'; _ }
18+
(* when not unit'.hidden *) ->
19+
Ok (unit unit')
20+
| { Odoc_file.content = Page_content page'; _ } -> Ok (page page')
21+
| _ ->
22+
Error
23+
(`Msg
24+
"Only pages and unit are allowed as input when generating an \
25+
index"))
1826

1927
let parse_input_file input =
2028
let is_sep = function '\n' | '\r' -> true | _ -> false in
@@ -32,7 +40,7 @@ let parse_input_files input =
3240
(Ok []) input
3341
>>= fun files -> Ok (List.concat files)
3442

35-
let compile ~output ~warnings_options inputs_in_file inputs =
43+
let compile_to_json ~output ~warnings_options inputs_in_file inputs =
3644
parse_input_files inputs_in_file >>= fun files ->
3745
let files = List.rev_append inputs files in
3846
let output_channel =
@@ -53,6 +61,7 @@ let compile ~output ~warnings_options inputs_in_file inputs =
5361
handle_file
5462
~unit:(print Json_search.unit acc)
5563
~page:(print Json_search.page acc)
64+
~occ:(print Json_search.index acc)
5665
file
5766
with
5867
| Ok acc -> acc
@@ -66,3 +75,56 @@ let compile ~output ~warnings_options inputs_in_file inputs =
6675
result |> Error.handle_warnings ~warnings_options >>= fun (_ : bool) ->
6776
Format.fprintf output "]";
6877
Ok ()
78+
79+
let compile_to_marshall ~output ~warnings_options inputs_in_file inputs =
80+
parse_input_files inputs_in_file >>= fun files ->
81+
let files = List.rev_append inputs files in
82+
let final_index = H.create 10 in
83+
let unit u =
84+
Odoc_model.Fold.unit
85+
~f:(fun () item ->
86+
let entries =
87+
Odoc_search.Entry.entries_of_item
88+
(* (u.Odoc_model.Lang.Compilation_unit.id *)
89+
(* :> Odoc_model.Paths.Identifier.t) *)
90+
item
91+
in
92+
List.iter
93+
(fun entry -> H.add final_index entry.Odoc_search.Entry.id entry)
94+
entries)
95+
() u
96+
in
97+
let page p =
98+
Odoc_model.Fold.page
99+
~f:(fun () item ->
100+
let entries =
101+
Odoc_search.Entry.entries_of_item
102+
(* (p.Odoc_model.Lang.Page.name :> Odoc_model.Paths.Identifier.t) *)
103+
item
104+
in
105+
List.iter
106+
(fun entry -> H.add final_index entry.Odoc_search.Entry.id entry)
107+
entries)
108+
() p
109+
in
110+
let index i = H.iter (H.add final_index) i in
111+
let index () =
112+
List.fold_left
113+
(fun acc file ->
114+
match handle_file ~unit ~page ~occ:index file with
115+
| Ok acc -> acc
116+
| Error (`Msg m) ->
117+
Error.raise_warning ~non_fatal:true
118+
(Error.filename_only "%s" m (Fs.File.to_string file));
119+
acc)
120+
() files
121+
in
122+
let result = Error.catch_warnings index in
123+
result |> Error.handle_warnings ~warnings_options >>= fun () ->
124+
Ok (Odoc_file.save_index output final_index)
125+
126+
let compile out_format ~output ~warnings_options inputs_in_file inputs =
127+
match out_format with
128+
| `JSON -> compile_to_json ~output ~warnings_options inputs_in_file inputs
129+
| `Marshall ->
130+
compile_to_marshall ~output ~warnings_options inputs_in_file inputs

src/odoc/indexing.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ val handle_file :
44
Fpath.t ->
55
unit:(Odoc_model.Lang.Compilation_unit.t -> 'a) ->
66
page:(Odoc_model.Lang.Page.t -> 'a) ->
7+
occ:(Odoc_search.Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t -> 'a) ->
78
('a, [> msg ]) result
89
(** This function is exposed for custom indexers that uses [odoc] as a library
910
to generate their search index *)
1011

1112
val compile :
13+
[ `JSON | `Marshall ] ->
1214
output:Fs.file ->
1315
warnings_options:Odoc_model.Error.warnings_options ->
1416
Fs.file list ->

src/odoc/odoc_file.ml

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,18 @@ type t = { content : content; warnings : Odoc_model.Error.t list }
3131
let magic = "odoc-%%VERSION%%"
3232

3333
(** Exceptions while saving are allowed to leak. *)
34-
let save_unit file (root : Root.t) (t : t) =
34+
let save_ file f =
3535
Fs.Directory.mkdir_p (Fs.File.dirname file);
3636
let oc = open_out_bin (Fs.File.to_string file) in
3737
output_string oc magic;
38-
Marshal.to_channel oc root [];
39-
Marshal.to_channel oc t [];
38+
f oc;
4039
close_out oc
4140

41+
let save_unit file (root : Root.t) (t : t) =
42+
save_ file (fun oc ->
43+
Marshal.to_channel oc root [];
44+
Marshal.to_channel oc t [])
45+
4246
let save_page file ~warnings page =
4347
let dir = Fs.File.dirname file in
4448
let base = Fs.File.(to_string @@ basename file) in
@@ -81,9 +85,7 @@ let load_ file f =
8185
let res =
8286
try
8387
let actual_magic = really_input_string ic (String.length magic) in
84-
if actual_magic = magic then
85-
let root = Marshal.from_channel ic in
86-
f ic root
88+
if actual_magic = magic then f ic
8789
else
8890
let msg =
8991
Printf.sprintf "%s: invalid magic number %S, expected %S\n%!" file
@@ -100,7 +102,17 @@ let load_ file f =
100102
close_in ic;
101103
res
102104

103-
let load file = load_ file (fun ic _ -> Ok (Marshal.from_channel ic))
105+
let load file =
106+
load_ file (fun ic ->
107+
let _root = Marshal.from_channel ic in
108+
Ok (Marshal.from_channel ic))
104109

105110
(** The root is saved separately in the files to support this function. *)
106-
let load_root file = load_ file (fun _ root -> Ok root)
111+
let load_root file =
112+
load_ file (fun ic ->
113+
let root = Marshal.from_channel ic in
114+
Ok root)
115+
116+
let save_index dst idx = save_ dst (fun oc -> Marshal.to_channel oc idx [])
117+
118+
let load_index file = load_ file (fun ic -> Ok (Marshal.from_channel ic))

src/odoc/odoc_file.mli

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,11 @@ val load : Fs.File.t -> (t, [> msg ]) result
5555

5656
val load_root : Fs.File.t -> (Root.t, [> msg ]) result
5757
(** Only load the root. Faster than {!load}, used for looking up imports. *)
58+
59+
val save_index :
60+
Fs.File.t -> Odoc_search.Entry.t Paths.Identifier.Hashtbl.Any.t -> unit
61+
62+
val load_index :
63+
Fs.File.t ->
64+
(Odoc_search.Entry.t Paths.Identifier.Hashtbl.Any.t, [> msg ]) result
65+
(** Load an [.odoc] file. *)

src/search/json_index/json_search.ml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,3 +214,13 @@ let page ppf (page : Odoc_model.Lang.Page.t) =
214214
in
215215
let _first = Odoc_model.Fold.page ~f true page in
216216
()
217+
218+
let index ppf (index : Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t) =
219+
let _first =
220+
Odoc_model.Paths.Identifier.Hashtbl.Any.fold
221+
(fun _id entry first ->
222+
let entry = (entry, Html.of_entry entry) in
223+
output_json ppf first [ entry ])
224+
index true
225+
in
226+
()

src/search/json_index/json_search.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22

33
val unit : Format.formatter -> Odoc_model.Lang.Compilation_unit.t -> unit
44
val page : Format.formatter -> Odoc_model.Lang.Page.t -> unit
5+
val index :
6+
Format.formatter ->
7+
Odoc_search.Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t ->
8+
unit

test/search/html_search.t/run.t

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,21 @@ we will generate.
4848
$ odoc html-generate --search-uri fuse.js.js --search-uri index.js -o html page-page.odocl
4949
$ odoc support-files -o html
5050

51-
We now focus on how to generate the index.js file. There are mainly two ways: by
52-
using odoc as a library, or by using the the `compile-index` command. This
53-
command generates a json index containing all .odocl given as input, to be
54-
consumed later by a search engine. If -o is not provided, the file is saved as
55-
index.json.
56-
Odocl files can be given either in a list (using --file-list,
57-
passing a file with the list of line-separated files), or by passing directly
58-
the name of the files.
51+
We now focus on how to generate the index.js file.
52+
53+
For this, we compute an index of all the values contained in a given list of
54+
odoc files, using the `compile-index` command.
55+
56+
This command generates has two output format: a json output for consumption by
57+
external search engine, and an `odoc` specific extension. The odoc file is
58+
meant to be consumed either by search engine written in OCaml, which would
59+
depend on `odoc` as a library, or by `odoc` itself to build a global index
60+
incrementally: the `compile-index` command can take indexes as input!
61+
62+
If -o is not provided, the file is saved as index.json, or index-index.odoc if
63+
the --marshall flag is passed. Odocl files can be given either in a list (using
64+
--file-list, passing a file with the list of line-separated files), or by
65+
passing directly the name of the files.
5966

6067
$ printf "main.odocl\npage-page.odocl\nj.odocl\n" > index_map
6168
$ odoc compile-index -o index1.json --file-list index_map
@@ -74,7 +81,48 @@ Let's check that the previous commands are indeed independent:
7481
$ diff index.json index1.json
7582
$ diff index.json index2.json
7683
77-
The index file contains a json array, each element of the array corresponding to
84+
Let's now test the --marshall flag.
85+
We compare:
86+
- the result of outputing as a marshalled file, and then use that to output a json file.
87+
- Directly outputing a json file
88+
89+
$ odoc compile-index -o index-main.odoc --marshall main.odocl
90+
$ odoc compile-index -o main.json index-main.odoc
91+
$ cat main.json | jq sort | jq '.[]' -c | sort > main1.json
92+
93+
$ odoc compile-index -o main.json main.odocl
94+
$ cat main.json | jq sort | jq '.[]' -c | sort > main2.json
95+
96+
$ diff main1.json main2.json
97+
98+
$ odoc compile-index -o index-j.odoc --marshall j.odocl
99+
$ odoc compile-index -o j.json index-j.odoc
100+
$ cat j.json | jq sort | jq '.[]' -c | sort > j1.json
101+
102+
$ odoc compile-index -o j.json j.odocl
103+
$ cat j.json | jq sort | jq '.[]' -c | sort > j2.json
104+
105+
$ diff j1.json j2.json
106+
107+
$ odoc compile-index -o index-page.odoc --marshall page-page.odocl
108+
$ odoc compile-index -o page.json index-page.odoc
109+
$ cat page.json | jq sort | jq '.[]' -c | sort > page1.json
110+
111+
$ odoc compile-index -o page.json page-page.odocl
112+
$ cat page.json | jq sort | jq '.[]' -c | sort > page2.json
113+
114+
$ diff page1.json page2.json
115+
116+
Now, we compare the combination of the three marshalled files (index-main.odoc,
117+
index-page.odoc, index-j.odoc).
118+
119+
$ odoc compile-index -o all.json index-page.odoc index-j.odoc index-main.odoc
120+
$ cat all.json | jq sort | jq '.[]' -c | sort > all1.json
121+
122+
$ cat index.json | jq sort | jq '.[]' -c | sort > all2.json
123+
$ diff all1.json all2.json
124+
125+
The json index file contains a json array, each element of the array corresponding to
78126
a search entry.
79127
An index entry contains:
80128
- an ID,

0 commit comments

Comments
 (0)