Skip to content

Commit 2861f69

Browse files
committed
Add a marshalled input/output format for search indexes
Signed-off-by: Paul-Elliot <[email protected]>
1 parent 8668acf commit 2861f69

File tree

10 files changed

+207
-43
lines changed

10 files changed

+207
-43
lines changed

src/model/paths.ml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,10 @@ module Identifier = struct
636636
mk_parent LocalName.to_string "sli" (fun (p, n) ->
637637
`SourceLocationInternal (p, n))
638638
end
639+
640+
module Hashtbl = struct
641+
module Any = Hashtbl.Make (Any)
642+
end
639643
end
640644

641645
module Path = struct

src/model/paths.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ module Identifier : sig
236236
end
237237
end
238238

239+
module Hashtbl : sig
240+
module Any : Hashtbl.S with type key = Any.t
241+
end
242+
239243
module Mk : sig
240244
open Names
241245

src/odoc/bin/main.ml

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -403,46 +403,56 @@ module Source_tree = struct
403403
end
404404

405405
module Indexing = struct
406-
let output_file ~dst =
407-
match dst with
408-
| Some file -> Fs.File.of_string file
409-
| None -> Fs.File.of_string "index.json"
410-
411-
let index dst warnings_options inputs_in_file inputs =
412-
let output = output_file ~dst in
406+
let output_file ~dst marshall =
407+
match (dst, marshall) with
408+
| Some file, _ -> Fs.File.of_string file
409+
| None, `JSON -> Fs.File.of_string "index.json"
410+
| None, `Marshall -> Fs.File.of_string "index-index.odoc"
411+
412+
let index dst marshall warnings_options inputs_in_file inputs =
413+
let marshall = if marshall then `Marshall else `JSON in
414+
let output = output_file ~dst marshall in
413415
match (inputs_in_file, inputs) with
414416
| [], [] ->
415417
Result.Error
416418
(`Msg
417419
"At least one of --file-list or an .odocl file must be passed to \
418420
odoc compile-index")
419-
| _ -> Indexing.compile ~output ~warnings_options inputs_in_file inputs
421+
| _ ->
422+
Indexing.compile marshall ~output ~warnings_options inputs_in_file
423+
inputs
420424

421425
let cmd =
422426
let dst =
423427
let doc =
424428
"Output file path. Non-existing intermediate directories are created. \
425-
Defaults to index.json"
429+
Defaults to index.json, or index-index.odoc if --marshall is passed \
430+
(in which case, the $(i,index-) prefix is mandatory)."
426431
in
427432
Arg.(
428433
value & opt (some string) None & info ~docs ~docv:"PATH" ~doc [ "o" ])
429434
in
430435
let inputs_in_file =
431436
let doc =
432-
"Input text file containing a line-separated list of paths to .odocl \
433-
files to index."
437+
"Input text file containing a line-separated list of paths to \
438+
.odocl/.json files to index."
434439
in
435440
Arg.(
436441
value & opt_all convert_fpath []
437442
& info ~doc ~docv:"FILE" [ "file-list" ])
438443
in
444+
let marshall =
445+
let doc = "whether to output a json file, or an .odoc file" in
446+
Arg.(value & flag & info ~doc [ "marshall" ])
447+
in
439448
let inputs =
440-
let doc = ".odocl file to index" in
449+
let doc = ".odocl/.json file to index" in
441450
Arg.(value & pos_all convert_fpath [] & info ~doc ~docv:"FILE" [])
442451
in
443452
Term.(
444453
const handle_error
445-
$ (const index $ dst $ warnings_options $ inputs_in_file $ inputs))
454+
$ (const index $ dst $ marshall $ warnings_options $ inputs_in_file
455+
$ inputs))
446456

447457
let info ~docs =
448458
let doc =

src/odoc/indexing.ml

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,26 @@ open Odoc_json_index
33
open Or_error
44
open Odoc_model
55

6-
let handle_file file ~unit ~page =
7-
Odoc_file.load file >>= fun unit' ->
8-
match unit' with
9-
| { Odoc_file.content = Unit_content unit'; _ } when unit'.hidden ->
10-
Error (`Msg "Hidden units are ignored when generating an index")
11-
| { Odoc_file.content = Unit_content unit'; _ } (* when not unit'.hidden *) ->
12-
Ok (unit unit')
13-
| { Odoc_file.content = Page_content page'; _ } -> Ok (page page')
14-
| _ ->
15-
Error
16-
(`Msg
17-
"Only pages and unit are allowed as input when generating an index")
6+
module H = Odoc_model.Paths.Identifier.Hashtbl.Any
7+
8+
let handle_file file ~unit ~page ~occ =
9+
match Fpath.basename file with
10+
| s when String.is_prefix ~affix:"index-" s ->
11+
Odoc_file.load_index file >>= fun index -> Ok (occ index)
12+
| _ -> (
13+
Odoc_file.load file >>= fun unit' ->
14+
match unit' with
15+
| { Odoc_file.content = Unit_content unit'; _ } when unit'.hidden ->
16+
Error (`Msg "Hidden units are ignored when generating an index")
17+
| { Odoc_file.content = Unit_content unit'; _ }
18+
(* when not unit'.hidden *) ->
19+
Ok (unit unit')
20+
| { Odoc_file.content = Page_content page'; _ } -> Ok (page page')
21+
| _ ->
22+
Error
23+
(`Msg
24+
"Only pages and unit are allowed as input when generating an \
25+
index"))
1826

1927
let parse_input_file input =
2028
let is_sep = function '\n' | '\r' -> true | _ -> false in
@@ -32,7 +40,7 @@ let parse_input_files input =
3240
(Ok []) input
3341
>>= fun files -> Ok (List.concat files)
3442

35-
let compile ~output ~warnings_options inputs_in_file inputs =
43+
let compile_to_json ~output ~warnings_options inputs_in_file inputs =
3644
parse_input_files inputs_in_file >>= fun files ->
3745
let files = List.rev_append inputs files in
3846
let output_channel =
@@ -53,6 +61,7 @@ let compile ~output ~warnings_options inputs_in_file inputs =
5361
handle_file
5462
~unit:(print Json_search.unit acc)
5563
~page:(print Json_search.page acc)
64+
~occ:(print Json_search.index acc)
5665
file
5766
with
5867
| Ok acc -> acc
@@ -66,3 +75,56 @@ let compile ~output ~warnings_options inputs_in_file inputs =
6675
result |> Error.handle_warnings ~warnings_options >>= fun (_ : bool) ->
6776
Format.fprintf output "]";
6877
Ok ()
78+
79+
let compile_to_marshall ~output ~warnings_options inputs_in_file inputs =
80+
parse_input_files inputs_in_file >>= fun files ->
81+
let files = List.rev_append inputs files in
82+
let final_index = H.create 10 in
83+
let unit u =
84+
Odoc_model.Fold.unit
85+
~f:(fun () item ->
86+
let entries =
87+
Odoc_search.Entry.entries_of_item
88+
(* (u.Odoc_model.Lang.Compilation_unit.id *)
89+
(* :> Odoc_model.Paths.Identifier.t) *)
90+
item
91+
in
92+
List.iter
93+
(fun entry -> H.add final_index entry.Odoc_search.Entry.id entry)
94+
entries)
95+
() u
96+
in
97+
let page p =
98+
Odoc_model.Fold.page
99+
~f:(fun () item ->
100+
let entries =
101+
Odoc_search.Entry.entries_of_item
102+
(* (p.Odoc_model.Lang.Page.name :> Odoc_model.Paths.Identifier.t) *)
103+
item
104+
in
105+
List.iter
106+
(fun entry -> H.add final_index entry.Odoc_search.Entry.id entry)
107+
entries)
108+
() p
109+
in
110+
let index i = H.iter (H.add final_index) i in
111+
let index () =
112+
List.fold_left
113+
(fun acc file ->
114+
match handle_file ~unit ~page ~occ:index file with
115+
| Ok acc -> acc
116+
| Error (`Msg m) ->
117+
Error.raise_warning ~non_fatal:true
118+
(Error.filename_only "%s" m (Fs.File.to_string file));
119+
acc)
120+
() files
121+
in
122+
let result = Error.catch_warnings index in
123+
result |> Error.handle_warnings ~warnings_options >>= fun () ->
124+
Ok (Odoc_file.save_index output final_index)
125+
126+
let compile out_format ~output ~warnings_options inputs_in_file inputs =
127+
match out_format with
128+
| `JSON -> compile_to_json ~output ~warnings_options inputs_in_file inputs
129+
| `Marshall ->
130+
compile_to_marshall ~output ~warnings_options inputs_in_file inputs

src/odoc/indexing.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ val handle_file :
44
Fpath.t ->
55
unit:(Odoc_model.Lang.Compilation_unit.t -> 'a) ->
66
page:(Odoc_model.Lang.Page.t -> 'a) ->
7+
occ:(Odoc_search.Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t -> 'a) ->
78
('a, [> msg ]) result
89
(** This function is exposed for custom indexers that uses [odoc] as a library
910
to generate their search index *)
1011

1112
val compile :
13+
[ `JSON | `Marshall ] ->
1214
output:Fs.file ->
1315
warnings_options:Odoc_model.Error.warnings_options ->
1416
Fs.file list ->

src/odoc/odoc_file.ml

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,18 @@ type t = { content : content; warnings : Odoc_model.Error.t list }
3030
let magic = "odoc-%%VERSION%%"
3131

3232
(** Exceptions while saving are allowed to leak. *)
33-
let save_unit file (root : Root.t) (t : t) =
33+
let save_ file f =
3434
Fs.Directory.mkdir_p (Fs.File.dirname file);
3535
let oc = open_out_bin (Fs.File.to_string file) in
3636
output_string oc magic;
37-
Marshal.to_channel oc root [];
38-
Marshal.to_channel oc t [];
37+
f oc;
3938
close_out oc
4039

40+
let save_unit file (root : Root.t) (t : t) =
41+
save_ file (fun oc ->
42+
Marshal.to_channel oc root [];
43+
Marshal.to_channel oc t [])
44+
4145
let save_page file ~warnings page =
4246
let dir = Fs.File.dirname file in
4347
let base = Fs.File.(to_string @@ basename file) in
@@ -70,9 +74,7 @@ let load_ file f =
7074
let res =
7175
try
7276
let actual_magic = really_input_string ic (String.length magic) in
73-
if actual_magic = magic then
74-
let root = Marshal.from_channel ic in
75-
f ic root
77+
if actual_magic = magic then f ic
7678
else
7779
let msg =
7880
Printf.sprintf "%s: invalid magic number %S, expected %S\n%!" file
@@ -89,7 +91,17 @@ let load_ file f =
8991
close_in ic;
9092
res
9193

92-
let load file = load_ file (fun ic _ -> Ok (Marshal.from_channel ic))
94+
let load file =
95+
load_ file (fun ic ->
96+
let _root = Marshal.from_channel ic in
97+
Ok (Marshal.from_channel ic))
9398

9499
(** The root is saved separately in the files to support this function. *)
95-
let load_root file = load_ file (fun _ root -> Ok root)
100+
let load_root file =
101+
load_ file (fun ic ->
102+
let root = Marshal.from_channel ic in
103+
Ok root)
104+
105+
let save_index dst idx = save_ dst (fun oc -> Marshal.to_channel oc idx [])
106+
107+
let load_index file = load_ file (fun ic -> Ok (Marshal.from_channel ic))

src/odoc/odoc_file.mli

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,11 @@ val load : Fs.File.t -> (t, [> msg ]) result
4949

5050
val load_root : Fs.File.t -> (Root.t, [> msg ]) result
5151
(** Only load the root. Faster than {!load}, used for looking up imports. *)
52+
53+
val save_index :
54+
Fs.File.t -> Odoc_search.Entry.t Paths.Identifier.Hashtbl.Any.t -> unit
55+
56+
val load_index :
57+
Fs.File.t ->
58+
(Odoc_search.Entry.t Paths.Identifier.Hashtbl.Any.t, [> msg ]) result
59+
(** Load an [.odoc] file. *)

src/search/json_index/json_search.ml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,3 +214,13 @@ let page ppf (page : Odoc_model.Lang.Page.t) =
214214
in
215215
let _first = Odoc_model.Fold.page ~f true page in
216216
()
217+
218+
let index ppf (index : Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t) =
219+
let _first =
220+
Odoc_model.Paths.Identifier.Hashtbl.Any.fold
221+
(fun _id entry first ->
222+
let entry = (entry, Html.of_entry entry) in
223+
output_json ppf first [ entry ])
224+
index true
225+
in
226+
()

src/search/json_index/json_search.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22

33
val unit : Format.formatter -> Odoc_model.Lang.Compilation_unit.t -> unit
44
val page : Format.formatter -> Odoc_model.Lang.Page.t -> unit
5+
val index :
6+
Format.formatter ->
7+
Odoc_search.Entry.t Odoc_model.Paths.Identifier.Hashtbl.Any.t ->
8+
unit

test/search/html_search.t/run.t

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,21 @@ we will generate.
4848
$ odoc html-generate --search-uri fuse.js.js --search-uri index.js -o html page-page.odocl
4949
$ odoc support-files -o html
5050

51-
We now focus on how to generate the index.js file. There are mainly two ways: by
52-
using odoc as a library, or by using the the `compile-index` command. This
53-
command generates a json index containing all .odocl given as input, to be
54-
consumed later by a search engine. If -o is not provided, the file is saved as
55-
index.json.
56-
Odocl files can be given either in a list (using --file-list,
57-
passing a file with the list of line-separated files), or by passing directly
58-
the name of the files.
51+
We now focus on how to generate the index.js file.
52+
53+
For this, we compute an index of all the values contained in a given list of
54+
odoc files, using the `compile-index` command.
55+
56+
This command generates has two output format: a json output for consumption by
57+
external search engine, and an `odoc` specific extension. The odoc file is
58+
meant to be consumed either by search engine written in OCaml, which would
59+
depend on `odoc` as a library, or by `odoc` itself to build a global index
60+
incrementally: the `compile-index` command can take indexes as input!
61+
62+
If -o is not provided, the file is saved as index.json, or index-index.odoc if
63+
the --marshall flag is passed. Odocl files can be given either in a list (using
64+
--file-list, passing a file with the list of line-separated files), or by
65+
passing directly the name of the files.
5966

6067
$ printf "main.odocl\npage-page.odocl\nj.odocl\n" > index_map
6168
$ odoc compile-index -o index1.json --file-list index_map
@@ -74,7 +81,48 @@ Let's check that the previous commands are indeed independent:
7481
$ diff index.json index1.json
7582
$ diff index.json index2.json
7683
77-
The index file contains a json array, each element of the array corresponding to
84+
Let's now test the --marshall flag.
85+
We compare:
86+
- the result of outputing as a marshalled file, and then use that to output a json file.
87+
- Directly outputing a json file
88+
89+
$ odoc compile-index -o index-main.odoc --marshall main.odocl
90+
$ odoc compile-index -o main.json index-main.odoc
91+
$ cat main.json | jq sort | jq '.[]' -c | sort > main1.json
92+
93+
$ odoc compile-index -o main.json main.odocl
94+
$ cat main.json | jq sort | jq '.[]' -c | sort > main2.json
95+
96+
$ diff main1.json main2.json
97+
98+
$ odoc compile-index -o index-j.odoc --marshall j.odocl
99+
$ odoc compile-index -o j.json index-j.odoc
100+
$ cat j.json | jq sort | jq '.[]' -c | sort > j1.json
101+
102+
$ odoc compile-index -o j.json j.odocl
103+
$ cat j.json | jq sort | jq '.[]' -c | sort > j2.json
104+
105+
$ diff j1.json j2.json
106+
107+
$ odoc compile-index -o index-page.odoc --marshall page-page.odocl
108+
$ odoc compile-index -o page.json index-page.odoc
109+
$ cat page.json | jq sort | jq '.[]' -c | sort > page1.json
110+
111+
$ odoc compile-index -o page.json page-page.odocl
112+
$ cat page.json | jq sort | jq '.[]' -c | sort > page2.json
113+
114+
$ diff page1.json page2.json
115+
116+
Now, we compare the combination of the three marshalled files (index-main.odoc,
117+
index-page.odoc, index-j.odoc).
118+
119+
$ odoc compile-index -o all.json index-page.odoc index-j.odoc index-main.odoc
120+
$ cat all.json | jq sort | jq '.[]' -c | sort > all1.json
121+
122+
$ cat index.json | jq sort | jq '.[]' -c | sort > all2.json
123+
$ diff all1.json all2.json
124+
125+
The json index file contains a json array, each element of the array corresponding to
78126
a search entry.
79127
An index entry contains:
80128
- an ID,

0 commit comments

Comments
 (0)