From 17b3ba446d8b256a251c6786ba1afd5b926b5a10 Mon Sep 17 00:00:00 2001 From: Max Lang <17551908+just-max@users.noreply.github.com> Date: Sun, 29 Sep 2024 04:11:00 -0400 Subject: [PATCH] Stdlib_variants: Avoid types being implicitly duplicated by signature builder (#23) --- .../signature-builder/signature_builder.ml | 109 ++++++++++++++++-- .../stdlib-alerts/stdlib_alerts.ml | 71 ++++++++++-- .../stdlib-alerts/threads_alerts.ml | 10 +- 3 files changed, 167 insertions(+), 23 deletions(-) diff --git a/src/stdlib-variants/signature-builder/signature_builder.ml b/src/stdlib-variants/signature-builder/signature_builder.ml index 02e0a97..9b039ee 100644 --- a/src/stdlib-variants/signature-builder/signature_builder.ml +++ b/src/stdlib-variants/signature-builder/signature_builder.ml @@ -2,7 +2,9 @@ open Ppxlib open Common.Util (** Library and PPX rewriter for building interfaces from existing interfaces - by selectively adding and removing interface items. *) + by selectively adding and removing interface items. + + See {!Parse} for the full grammar for inclusion specifications. *) (** {1 Loading saved signatures} *) @@ -87,11 +89,72 @@ type signature_item_pat = { names : longident_loc list; kind : kind option; loc type include_specs = include_spec list and include_spec = - | Include_from of { from : longident_loc; items : signature_item_pat Ordered_set.t } + | Include_from of { from : longident_loc; items : signature_item_pat Ordered_set.t; loc : Location.t } | Include_group of { attributes : attribute list; items : include_specs } module Parse = struct + (** + + Parser for include specifications. + + To use the signature builder, first enable the PPX rewriter. + With dune, add the PPX rewriter to your component's [dune] file: + {@dune[(preprocess (pps less-power.signature-builder-ppx))]} + Then, within a signature, use an extension point to include signature items, for example: + {[ + module type Foo : sig + [%%include stdlib.stdlib (fst, snd)] + end + ]} + + Inclusion specifications can be more complex. The full grammar is as follows: + + {@text[ + include-specs := include-spec | '()' | '(' include-spec {',' include-spec} ')' + include-spec := + | from-spec items-spec + | '{' 'items =' include-specs ';' ['attributes = __' {attribute} ';'] '}' + from-spec := (* qualified path to signature-info to take items from *) + lowercase-ident {'.' lowercase-ident}+ + items-spec := (* ordered set of items *) + item-spec | '!standard' | items-spec '-' items-spec | '()' | '(' items-spec {',' items-spec} ')' + item-spec := item-names ['@' item-options] + item-names := item-name | '[' {item-name ';'}+ ']' + item-name := (* refer to values, types, modules, ... *) + lowercase-ident | capitalized-ident | '[]' + ]} + + The following more complex example includes [t] and [to_string_default] + from the standard [Printexc] module, then also includes everything except + that type and that value, with each item given the [unsafe] attribute: + + {[ + [%%include + stdlib.printexc (t, to_string_default), + { attributes = __ [@alert unsafe "This item is not permitted"]; + items = stdlib.printexc (!standard - (t, to_string_default)) } + ] + ]} + + To disambiguate signature items of different kinds with the same name, + use the syntax [identifier @ { kind = "kind"}], where [kind] is one of + [value], [type], [exception], [module], [module-type]. For example, to + include the standard [ref] function: + + {[[%%include stdlib.stdlib (ref @ { kind = "value" })]]} + + The preprocessor will refuse to include types which don't include a type + equation (e.g. abstract types and types defined only by constructors). This + is because this will create copies of the type, which is generally not the intention. + In this case, types must be defined manually without the inclusion mechanism, e.g.: + + {[type ('a, 'b) result = ('a, 'b) Stdlib.result = Ok of 'a | Error of 'b]} + *) + + (* TODO: make it possible to reference an existing type automatically. + Challenge is deciding how to resolve the type to reference. *) + let ordered_set = let open Ordered_set in let rec impl exp = match exp with @@ -176,10 +239,10 @@ module Parse = struct | exp -> [include_spec ~allow_comma:true exp] and include_spec ~allow_comma = function - | [%expr [%e? from_exp] [%e? items_exp] ] -> + | [%expr [%e? from_exp] [%e? items_exp] ] as exp -> let from = { txt = dotted_name from_exp; loc = from_exp.pexp_loc } in let items = ordered_set items_exp |> Ordered_set.map signature_item_pat in - Include_from { from; items } + Include_from { from; items; loc = exp.pexp_loc } | { pexp_desc = Pexp_record (entries, None); _ } as exp -> (* TODO: error on extra and duplicate keys *) let entries = List.map (fun (k, v) -> k.txt, v) entries in @@ -245,21 +308,35 @@ let match_signature_item_pat (pat : signature_item_pat) sigi = (match pat.kind with Some k -> match_kind k sigi | None -> true) && list_equal (fun p s -> match_lident_name p.txt s) pat.names (signature_item_names sigi) -let eval_pat_ordered_set signature s = +let eval_pat_ordered_set loc signature s = let indexed = signature |> List.filter is_signature_item_selectable - |> List.mapi (fun i x -> i, x) + |> List.mapi (fun i x -> i, (x, loc)) in let lookup pat = - match List.filter (snd %> match_signature_item_pat pat) indexed with - | [item] -> item + match List.filter (snd %> fst %> match_signature_item_pat pat) indexed with + | [i, (item, _)] -> i, (item, pat.loc) | [] -> Location.raise_errorf ~loc:pat.loc "Did not match any signature items" | items -> Location.raise_errorf ~loc:pat.loc "Ambiguous: matched %d signature items. Hint: use { kind = \"...\" } to disambiguate." (List.length items) in + let check_psigi item loc = + match item.psig_desc with + | Psig_type (_, tdcls) -> + tdcls |> List.iter (function + | { ptype_manifest = None; ptype_name; _ } -> + Location.raise_errorf ~loc + ("Included type '%s' has no manifest. You should include it " ^^ + "manually and (optionally) specify a type equality, " ^^ + "e.g. type t = Foo.t") + ptype_name.txt + | _ -> ()) + | _ -> () + in Ordered_set.map lookup s |> Ordered_set.eval ~eq:(fun (i1, _) (i2, _) -> i1 = i2) indexed |> List.map snd + |> List.map (fun (item, loc) -> check_psigi item loc; item) let modify_attributes f sigi = let desc = match sigi.psig_desc with @@ -288,14 +365,14 @@ let rec eval_include_specs sig_infos specs = List.concat_map (eval_include_spec sig_infos) specs and eval_include_spec sig_infos = function - | Include_from { from; items } -> + | Include_from { from; items; loc } -> let[@warning "-8"] p :: ps = Longident.flatten_exn from.txt in let signature = match infos_get_signature p ps sig_infos with | Some s -> s | None -> Location.raise_errorf ~loc:from.loc "Unknown signature" in - eval_pat_ordered_set signature items + eval_pat_ordered_set loc signature items | Include_group { attributes; items = spec_items } -> eval_include_specs sig_infos spec_items |> List.map (modify_attributes (fun attr -> attr @ attributes)) @@ -311,5 +388,13 @@ let default_sig_infos = [ let ppx_include ?(sig_infos = default_sig_infos) ~ctxt exp = let loc = Expansion_context.Extension.extension_point_loc ctxt in let module B = Ast_builder.Make (struct let loc = loc end) in - let sig_items = Parse.include_specs exp |> eval_include_specs sig_infos in - B.psig_include (B.include_infos (B.pmty_signature sig_items)) + try + let sig_items = Parse.include_specs exp |> eval_include_specs sig_infos in + B.psig_include (B.include_infos (B.pmty_signature sig_items)) + with e -> + (* Note: the docs recommend embedding extension errors as deeply as + possible. But that makes the code significantly more complex (everything + has to be in Result.t), for no real gain (dev experience is fine as-is) *) + match Location.Error.of_exn e with + | Some loc_err -> B.psig_extension (Location.Error.to_extension loc_err) [] + | None -> raise e diff --git a/src/stdlib-variants/stdlib-alerts/stdlib_alerts.ml b/src/stdlib-variants/stdlib-alerts/stdlib_alerts.ml index 2ce1fc8..7f53d6a 100644 --- a/src/stdlib-variants/stdlib-alerts/stdlib_alerts.ml +++ b/src/stdlib-variants/stdlib-alerts/stdlib_alerts.ml @@ -57,10 +57,18 @@ module type Float_alerting = sig end module type Hashtbl_alerting = sig + type[@alert impure "Hash tables are not permitted"] ('a, 'b) t = ('a, 'b) Hashtbl.t + type[@alert impure "Hash tables are not permitted"] statistics = Hashtbl.statistics = { + num_bindings: int; + num_buckets: int; + max_bucket_length: int; + bucket_histogram: int array; + } + [%%include stdlib.hashtbl (hash, seeded_hash, hash_param, seeded_hash_param), { attributes = __ [@alert impure "Hash tables are not permitted"]; - items = stdlib.hashtbl (!standard - (hash, seeded_hash, hash_param, seeded_hash_param)) } + items = stdlib.hashtbl (!standard - (t, statistics, hash, seeded_hash, hash_param, seeded_hash_param)) } ] end @@ -89,11 +97,24 @@ module type MoreLabels_alerting = sig end module type Printexc_alerting = sig + type[@alert unsafe "This item is not permitted"] raw_backtrace = Printexc.raw_backtrace + type[@alert unsafe "This item is not permitted"] raw_backtrace_slot = Printexc.raw_backtrace_slot + type[@alert unsafe "This item is not permitted"] backtrace_slot = Printexc.backtrace_slot + type[@alert unsafe "This item is not permitted"] location = Printexc.location = { + filename : string; + line_number : int; + start_char : int; + end_char : int; + } + (* only to_string_default can be expected to be pure, not to_string *) [%%include stdlib.printexc (t, to_string_default), { attributes = __ [@alert unsafe "This item is not permitted"]; - items = stdlib.printexc (!standard - (t, to_string_default)) } + items = stdlib.printexc ( + !standard + - (t, raw_backtrace, raw_backtrace_slot, backtrace_slot, location, to_string_default) + ) } ] end @@ -120,8 +141,11 @@ module type Scanf_alerting = sig end module type Seq_alerting = sig + type 'a t = unit -> 'a node + and 'a node = 'a Seq.node = Nil | Cons of 'a * 'a t + [%%include - stdlib.seq (!standard - (once, Forced_twice, to_dispenser)), + stdlib.seq (!standard - ([t; node], once, Forced_twice, to_dispenser)), { attributes = __ [@alert impure "This imperative programming item is not permitted"]; items = stdlib.seq (once, Forced_twice, to_dispenser) } ] @@ -156,7 +180,10 @@ module type String_alerting = sig end module type Uchar_alerting = sig - [%%include stdlib.uchar (!standard - (unsafe_of_int, unsafe_to_char))] + type t = Uchar.t [@@immediate] + type utf_decode = Uchar.utf_decode [@@immediate] + + [%%include stdlib.uchar (!standard - (t, utf_decode @ {kind = "type"}, unsafe_of_int, unsafe_to_char))] end @@ -233,9 +260,16 @@ module type Stdlib_alerting = sig log, log10, log1p, sin, sinh, asinh, sqrt, tan, tanh, atanh, ceil, floor, abs_float, copysign, mod_float, frexp, ldexp, modf, float, float_of_int, truncate, int_of_float, infinity, neg_infinity, nan, - max_float, min_float, epsilon_float, fpclass, classify_float + max_float, min_float, epsilon_float, classify_float )] + type fpclass = Stdlib.fpclass = + | FP_normal + | FP_subnormal + | FP_zero + | FP_infinite + | FP_nan + (** {1 String operations} More in {!Stdlib.String} *) @@ -273,10 +307,23 @@ module type Stdlib_alerting = sig (** {1 I/O operations} This is regular, unmocked, IO. *) + type[@alert input_output "Input/output is not permitted"] in_channel = Stdlib.in_channel + type[@alert input_output "Input/output is not permitted"] out_channel = Stdlib.out_channel + type[@alert input_output "Input/output is not permitted"] open_flag = Stdlib.open_flag = + | Open_rdonly + | Open_wronly + | Open_append + | Open_creat + | Open_trunc + | Open_excl + | Open_binary + | Open_text + | Open_nonblock + [%%include { attributes = __ [@alert input_output "Input/output is not permitted"]; items = stdlib.stdlib ( - in_channel, out_channel, stdin, stdout, stderr, + stdin, stdout, stderr, (* Output functions on standard output *) print_char, print_string, print_bytes, print_int, print_float, print_endline, print_newline, @@ -286,7 +333,7 @@ module type Stdlib_alerting = sig (* Input functions on standard input *) read_line, read_int_opt, read_int, read_float_opt, read_float, (* General output functions *) - open_flag, open_out, open_out_bin, open_out_gen, flush, flush_all, + open_out, open_out_bin, open_out_gen, flush, flush_all, output_char, output_string, output_bytes, output, output_substring, output_byte, output_binary_int, output_value, seek_out, pos_out, out_channel_length, close_out, close_out_noerr, set_binary_mode_out, @@ -302,10 +349,12 @@ module type Stdlib_alerting = sig (** {1 References} *) + type 'a ref = 'a Stdlib.ref + [%%include { attributes = __ [@alert impure "References are not permitted"]; items = stdlib.stdlib ( - ref @ { kind = "type" }, ref @ { kind = "value" }, + ref @ { kind = "value" }, ( ! ), ( := ), incr, decr ) }] @@ -313,7 +362,11 @@ module type Stdlib_alerting = sig (** {1 Result type} More in {!Stdlib.Result} *) - [%%include stdlib.stdlib result ] + (* If we just included result into the signature, it would define a new type, + which we would have to reference everywhere else. For example, we would + not be able to write [module Result = Result], since that would reference + the original definition.*) + type ('a, 'b) result = ('a, 'b) Stdlib.result = Ok of 'a | Error of 'b (** {1 Format strings} More in {!Stdlib.Scanf}, {!Stdlib.Printf}, and {!Stdlib.Format} *) diff --git a/src/stdlib-variants/stdlib-alerts/threads_alerts.ml b/src/stdlib-variants/stdlib-alerts/threads_alerts.ml index 7ec5f5b..958b3a6 100644 --- a/src/stdlib-variants/stdlib-alerts/threads_alerts.ml +++ b/src/stdlib-variants/stdlib-alerts/threads_alerts.ml @@ -3,9 +3,11 @@ module type Thread_alerting = sig + type t = Thread.t + [%%include (* [exit] is deprecated, but not harmful *) - threads.thread (t, create, self, id, Exit, exit, delay, join, yield, default_uncaught_exception_handler), + threads.thread (create, self, id, Exit, exit, delay, join, yield, default_uncaught_exception_handler), { attributes = __ [@alert unsafe "This function is not permitted"]; items = threads.thread (wait_timed_read, wait_timed_write, select, wait_pid, sigmask, wait_signal) }, { attributes = __ [@alert impure "This imperative programming function is not permitted"]; @@ -14,7 +16,11 @@ module type Thread_alerting = sig end -module type Event_alerting = sig [%%include threads.event !standard] end +module type Event_alerting = sig + type 'a channel = 'a Event.channel + type 'a event = 'a Event.event + [%%include threads.event (!standard - (channel, event))] +end module Thread_alerting : Thread_alerting = Thread