From 24319eb4874f828c9b3af1f5b10fce9d40419659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Fri, 16 May 2025 16:16:52 +0200 Subject: [PATCH 01/31] Add change entry. --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index b073cc300b..f574b62c6a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,7 @@ unreleased - `inlay-hints` fix inlay hints on function parameters (#1923) - Fix issues with ident validation and Lid comparison for occurrences (#1924) - Handle class type in outline (#1932) + - Handle object expression inside a let in outline (#1936) + ocaml-index - Improve the granularity of index reading by segmenting the marshalization of the involved data-structures. (#1889) From a7a3729daef14e8ccf08319770b7ac15e1bd90d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Wed, 16 Jul 2025 12:26:32 +0200 Subject: [PATCH 02/31] Add a new command to extract expression into a fresh let binding. --- CHANGES.md | 1 - doc/dev/PROTOCOL.md | 19 + src/analysis/parsetree_utils.ml | 15 + src/analysis/parsetree_utils.mli | 2 + src/analysis/refactor_extract_region.ml | 395 ++++++++++++++++++ src/analysis/refactor_extract_region.mli | 110 +++++ src/commands/new_commands.ml | 31 ++ src/commands/query_json.ml | 17 +- src/frontend/query_commands.ml | 9 + src/frontend/query_protocol.ml | 6 + src/utils/std.ml | 4 + .../const-extraction.t/const.ml | 41 ++ .../const-extraction.t/run.t | 203 +++++++++ .../func-extraction.t/func.ml | 88 ++++ .../func-extraction.t/run.t | 372 +++++++++++++++++ 15 files changed, 1311 insertions(+), 2 deletions(-) create mode 100644 src/analysis/refactor_extract_region.ml create mode 100644 src/analysis/refactor_extract_region.mli create mode 100644 tests/test-dirs/refactor-extract-region/const-extraction.t/const.ml create mode 100644 tests/test-dirs/refactor-extract-region/const-extraction.t/run.t create mode 100644 tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml create mode 100644 tests/test-dirs/refactor-extract-region/func-extraction.t/run.t diff --git a/CHANGES.md b/CHANGES.md index f574b62c6a..b073cc300b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,7 +10,6 @@ unreleased - `inlay-hints` fix inlay hints on function parameters (#1923) - Fix issues with ident validation and Lid comparison for occurrences (#1924) - Handle class type in outline (#1932) - - Handle object expression inside a let in outline (#1936) + ocaml-index - Improve the granularity of index reading by segmenting the marshalization of the involved data-structures. (#1889) diff --git a/doc/dev/PROTOCOL.md b/doc/dev/PROTOCOL.md index a6b86a0efb..0eb76d3109 100644 --- a/doc/dev/PROTOCOL.md +++ b/doc/dev/PROTOCOL.md @@ -485,6 +485,25 @@ The result is returned as a list of: } ``` +### `refactor-extract-region -start -stop -extract-name ` + +``` + -start Where extracted region start + -stop Where extracted region end + -extract-name Name used for the generated let binding +``` + +Returns the string `Nothing to do` (if extractor is not ables to select an expression to extract in the given position interval) or the following object: + +```javascript +{ + 'start': position, // the start of the region to be substituted + 'end': position, // the end of the region to be substituted + 'content' string, // the content of the substitution + 'selection_range': location // the location where to position the cursor for easy renaming of the generated let binding +} +``` + ### `syntax-document -position ` -position The position of the keyword to be documented diff --git a/src/analysis/parsetree_utils.ml b/src/analysis/parsetree_utils.ml index 0713586015..8e8a933554 100644 --- a/src/analysis/parsetree_utils.ml +++ b/src/analysis/parsetree_utils.ml @@ -1,5 +1,20 @@ +open Std + open Parsetree type nonrec constant_desc = constant_desc let constant_desc c = c.pconst_desc + +let filter_attr = + let default = Ast_mapper.default_mapper in + let keep attr = + let { Location.txt; _ }, _ = Ast_helper.Attr.as_tuple attr in + not (Std.String.is_prefixed ~by:"merlin." txt) + in + let attributes mapper attrs = + default.Ast_mapper.attributes mapper (List.filter ~f:keep attrs) + in + { default with Ast_mapper.attributes } + +let filter_expr_attr expr = filter_attr.Ast_mapper.expr filter_attr expr diff --git a/src/analysis/parsetree_utils.mli b/src/analysis/parsetree_utils.mli index eb5bab8eb9..416fc39080 100644 --- a/src/analysis/parsetree_utils.mli +++ b/src/analysis/parsetree_utils.mli @@ -6,3 +6,5 @@ open Parsetree type nonrec constant_desc = constant_desc val constant_desc : constant -> constant_desc + +val filter_expr_attr : expression -> expression diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml new file mode 100644 index 0000000000..67bb826537 --- /dev/null +++ b/src/analysis/refactor_extract_region.ml @@ -0,0 +1,395 @@ +open Std + +exception Nothing_to_do +exception Not_allowed_in_interface_file + +let () = + Location.register_error_of_exn (function + | Nothing_to_do -> Some (Location.error "Nothing to do") + | Not_allowed_in_interface_file -> + Some + (Location.error + "Expression extraction is only allowed in implementation file") + | _ -> None) + +module Fresh_name = struct + (* Generate a fresh name that does not already exist in given environment. *) + let gen_val_name basename env = + let rec loop n = + let guess = basename ^ Int.to_string n in + if Env.bound_value guess env then succ n |> loop else guess + in + loop 1 +end + +module Gen = struct + let unit = Longident.Lident "()" |> Location.mknoloc + + (* Generates [let name = body]. *) + let toplevel_let ~name ~body = + let open Ast_helper in + let pattern = Pat.mk (Ppat_var { txt = name; loc = Location.none }) in + let body = Parsetree_utils.filter_expr_attr body in + Str.value Nonrecursive [ Vb.mk pattern body ] + + (* Generates [let name () = body]. *) + let let_unit_toplevel ~name ~body = + let open Ast_helper in + let unit_param = + { Parsetree.pparam_loc = Location.none; + pparam_desc = Pparam_val (Nolabel, None, Pat.construct unit None) + } + in + let body = Exp.function_ [ unit_param ] None (Pfunction_body body) in + toplevel_let ~name ~body + + (* Generates [let name params = body]. *) + let toplevel_function params ~name ~body = + let open Ast_helper in + let params = + List.map + ~f:(fun param -> + let pattern = + Pat.construct + (Location.mknoloc (Untypeast.lident_of_path param)) + None + in + { Parsetree.pparam_loc = Location.none; + pparam_desc = Pparam_val (Nolabel, None, pattern) + }) + params + in + let body = Exp.function_ params None (Pfunction_body body) in + toplevel_let ~name ~body + + let ident ~name = + Longident.Lident name |> Location.mknoloc |> Ast_helper.Exp.ident + + let fun_apply params ~name = + let open Ast_helper in + let params = List.map ~f:(fun p -> (Asttypes.Nolabel, p)) params in + Exp.apply (ident ~name) params + + let fun_apply_unit = fun_apply [ Ast_helper.Exp.ident unit ] + + let fun_apply_params params = + params + |> List.map ~f:(fun param -> + Ast_helper.Exp.ident + (Location.mknoloc (Longident.Lident (Path.name param)))) + |> fun_apply +end + +module Msource = struct + include Msource + + (* TODO: Maybe add this directly in [Msource]? *) + let sub_loc src loc = + let (`Offset start_offset) = + let line, col = Lexing.split_pos loc.Location.loc_start in + Msource.get_offset src (`Logical (line, col)) + in + let (`Offset end_offset) = + `Logical (Lexing.split_pos loc.loc_end) |> Msource.get_offset src + in + String.sub (Msource.text src) ~pos:start_offset + ~len:(end_offset - start_offset) + |> Msource.make +end + +type analysis = { bounded_vars : Path.t list; gen_binding_kind : rec_flag } + +and rec_flag = Non_recursive | Rec_and + +type extraction = + { expr : Typedtree.expression; (** Expression that being extracted *) + expr_env : Env.t; (** Environment of the extracted expression *) + toplevel_item : toplevel_item; + (** The value binding toplevel or class declaration enclosing the extracted expression. *) + name : extraction_name; (** Binding name of the extracted expression. *) + gen_binding_kind : rec_flag; + generated_binding : generated_binding; + generated_call : generated_call + } + +and extraction_name = Default of { basename : string } | Fixed of string + +and toplevel_item = { rec_flag : Asttypes.rec_flag; loc : Location.t } +(* A convenient type for grouping info. *) + +and generated_binding = + name:string -> body:Parsetree.expression -> Parsetree.structure_item + +and generated_call = name:string -> Parsetree.expression + +let is_recursive = function + | { rec_flag = Asttypes.Recursive; _ } -> true + | { rec_flag = Nonrecursive; _ } -> false + +let rec occuring_vars node = + let rec find_pattern_var : type a. a Typedtree.general_pattern -> Path.t list + = + fun { Typedtree.pat_desc; _ } -> + match pat_desc with + | Typedtree.Tpat_var (ident, _, _) -> [ Pident ident ] + | Tpat_tuple pats -> List.concat_map ~f:find_pattern_var pats + | Tpat_alias (pat, ident, _, _) -> Pident ident :: find_pattern_var pat + | Tpat_construct (_, _, pats, _) -> List.concat_map ~f:find_pattern_var pats + | Tpat_variant (_, Some pat, _) -> find_pattern_var pat + | Tpat_record (fields, _) -> + List.concat_map ~f:(fun (_, _, field) -> find_pattern_var field) fields + | Tpat_array arr -> List.concat_map ~f:find_pattern_var arr + | Tpat_lazy pat | Tpat_exception pat -> find_pattern_var pat + | Tpat_value pat -> + find_pattern_var (pat :> Typedtree.value Typedtree.general_pattern) + | Tpat_or (l, r, _) -> find_pattern_var l @ find_pattern_var r + | _ -> [] + in + let loop acc node = + match node.Browse_tree.t_node with + | Browse_raw.Expression { exp_desc = Texp_ident (path, _, _); _ } -> + path :: acc + | Pattern pat -> find_pattern_var pat @ acc + | _ -> + Lazy.force node.t_children + |> List.concat_map ~f:occuring_vars + |> List.append acc + in + loop [] node |> List.rev + +let analyze_expr expr env ~toplevel_item ~mconfig ~local_defs = + let unbounded_enclosing = + { Location.loc_start = toplevel_item.loc.loc_start; + loc_end = expr.Typedtree.exp_loc.loc_start; + loc_ghost = false + } + in + Browse_tree.of_node ~env (Browse_raw.Expression expr) + |> occuring_vars + |> List.fold_left + ~init:{ bounded_vars = []; gen_binding_kind = Non_recursive } + ~f:(fun acc var_path -> + match + Locate.from_path + ~config:{ mconfig; ml_or_mli = `ML; traverse_aliases = true } + ~env ~local_defs ~namespace:Value var_path + with + | `Found { location; approximated = false; _ } -> + let acc = + if Location_aux.included location ~into:unbounded_enclosing then + { acc with bounded_vars = var_path :: acc.bounded_vars } + else acc + in + if + is_recursive toplevel_item + && Location_aux.included location ~into:toplevel_item.loc + then { acc with gen_binding_kind = Rec_and } + else acc + | _ -> acc) + +let extract_to_toplevel + { expr; + expr_env; + name; + gen_binding_kind; + generated_binding; + generated_call; + toplevel_item + } buffer = + let val_name = + match name with + | Default { basename } -> Fresh_name.gen_val_name basename expr_env + | Fixed name -> name + in + let fresh_call = + generated_call ~name:val_name |> Format.asprintf "%a" Pprintast.expression + in + let toplevel_item_span = Msource.sub_loc buffer toplevel_item.loc in + let subst_loc = + let start_lnum = + 1 + expr.exp_loc.Location.loc_start.pos_lnum + - toplevel_item.loc.loc_start.pos_lnum + in + let end_lnum = + start_lnum + expr.exp_loc.loc_end.pos_lnum + - expr.exp_loc.loc_start.pos_lnum + in + { expr.exp_loc with + loc_start = { expr.exp_loc.loc_start with pos_lnum = start_lnum }; + loc_end = { expr.exp_loc.loc_end with pos_lnum = end_lnum } + } + in + let substitued_toplevel_binding = + Msource.substitute toplevel_item_span + (`Logical (Lexing.split_pos subst_loc.loc_start)) + (`Logical (Lexing.split_pos subst_loc.loc_end)) + fresh_call + |> Msource.text + in + let untyped_expr = Untypeast.untype_expression expr in + let content = + match gen_binding_kind with + | Non_recursive -> + let fresh_let_binding = + generated_binding ~name:val_name ~body:untyped_expr + |> Format.asprintf "%a" Pprintast.structure_item + in + fresh_let_binding ^ "\n" ^ substitued_toplevel_binding + | Rec_and -> + let fresh_let_binding = + generated_binding ~name:val_name ~body:untyped_expr + |> Format.asprintf "%a" Pprintast.structure_item + in + let fresh_and_binding = + "and" ^ String.drop 3 fresh_let_binding (* Sorry *) + in + substitued_toplevel_binding ^ "\n" ^ fresh_and_binding + in + let selection_range = + let lnum = + match gen_binding_kind with + | Non_recursive -> toplevel_item.loc.loc_start.pos_lnum + | Rec_and -> toplevel_item.loc.loc_end.pos_lnum + String.length "\n" + in + let prefix_length = + match gen_binding_kind with + | Non_recursive -> + if is_recursive toplevel_item then String.length "let rec " + else String.length "let " + | Rec_and -> String.length "and " + in + { Location.loc_start = Lexing.make_pos (lnum, prefix_length); + loc_end = Lexing.make_pos (lnum, prefix_length + String.length val_name); + loc_ghost = false + } + in + { Query_protocol.loc = toplevel_item.loc; content; selection_range } + +let extract_const_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = + let name = + Option.fold extract_name + ~none:(Default { basename = "const_name" }) + ~some:(fun name -> Fixed name) + in + extract_to_toplevel + { expr; + expr_env; + toplevel_item; + name; + gen_binding_kind = Non_recursive; + generated_binding = Gen.toplevel_let; + generated_call = Gen.ident + } + +let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item + ~local_defs ~mconfig = + let is_function = function + | { Typedtree.exp_desc = Texp_function _; _ } -> true + | _ -> false + in + let { bounded_vars; gen_binding_kind } = + analyze_expr expr expr_env ~toplevel_item ~local_defs ~mconfig + in + let generated_binding, generated_call = + match bounded_vars with + | [] when Fun.negate is_function expr -> + (* If the extracted expr is already a function, no need to delayed computation + with a unit parameter. *) + (Gen.let_unit_toplevel, Gen.fun_apply_unit) + | _ -> + (Gen.toplevel_function bounded_vars, Gen.fun_apply_params bounded_vars) + in + let name = + Option.fold extract_name + ~none:(Default { basename = "fun_name" }) + ~some:(fun name -> Fixed name) + in + extract_to_toplevel + { expr; + expr_env; + toplevel_item; + name; + gen_binding_kind; + generated_binding; + generated_call + } + +let most_inclusive_expr ~start ~stop nodes = + let is_inside_region = + Location_aux.included + ~into:{ Location.loc_start = start; loc_end = stop; loc_ghost = true } + in + let rec select_among_child env node = + let select_deeper node env = + let node = Browse_tree.of_node ~env node in + Lazy.force node.t_children |> List.rev + |> Stdlib.List.find_map (fun node -> + select_among_child node.Browse_tree.t_env node.t_node) + in + let node_loc = Mbrowse.node_loc node in + let remove_poly expr = + (* We have to remove poly extra that cause unexpected "!poly!" to be printed + in generated code. This happens when you try to extract the body of a method. *) + let open Typedtree in + { expr with + exp_extra = + List.filter + ~f:(function + | Texp_poly _, _, _ -> false + | _ -> true) + expr.exp_extra + } + in + match node with + | Expression expr -> + (* We filter expression that have a ghost location. Otherwise, expression + such as [let f x = 10 + x] can be extracted and this can lead to invalid + code gen. ^^^^^^^^^^ *) + if node_loc.loc_ghost = false && is_inside_region node_loc then + Some (remove_poly expr, env) + else select_deeper node env + | _ -> select_deeper node env + in + nodes |> List.rev + |> Stdlib.List.find_map (fun (env, node) -> select_among_child env node) + +let find_associated_toplevel_item expr structure = + Stdlib.List.find_map + (fun { Typedtree.str_desc; str_loc; _ } -> + match str_desc with + | Tstr_value (rec_flag, _) + when Location_aux.included expr.Typedtree.exp_loc ~into:str_loc -> + Some { rec_flag; loc = str_loc } + | Tstr_class cs -> + Stdlib.List.find_map + (fun (class_decl, _) -> + let loc = class_decl.Typedtree.ci_loc in + if Location_aux.included expr.exp_loc ~into:loc then + Some { rec_flag = Nonrecursive; loc } + else None) + cs + | _ -> None) + structure.Typedtree.str_items + +let substitute ~start ~stop ?extract_name mconfig buffer typedtree = + match typedtree with + | `Interface _ -> raise Not_allowed_in_interface_file + | `Implementation structure -> ( + let enclosing = + Mbrowse.enclosing start [ Mbrowse.of_structure structure ] + in + match most_inclusive_expr ~start ~stop enclosing with + | None -> raise Nothing_to_do + | Some (expr, expr_env) -> ( + match find_associated_toplevel_item expr structure with + | None -> raise Nothing_to_do + | Some toplevel_item -> ( + match expr.exp_desc with + | Texp_constant _ -> + (* Special case for constant. They can't produce side effect so it's not + necessary to add a trailing unit parameter to the let binding. *) + extract_const_to_toplevel ?extract_name expr ~expr_env buffer + ~toplevel_item + | _ -> + extract_expr_to_toplevel ?extract_name expr buffer ~expr_env + ~toplevel_item ~local_defs:typedtree ~mconfig))) diff --git a/src/analysis/refactor_extract_region.mli b/src/analysis/refactor_extract_region.mli new file mode 100644 index 0000000000..bcedbd31e7 --- /dev/null +++ b/src/analysis/refactor_extract_region.mli @@ -0,0 +1,110 @@ +(** Region extractor allows extracting arbitrary expression into a fresh + toplevel binding. The extractor detects bounded variables inside the + extracted expression and performs code generation acordingly. + + For instance, let's assume that we want to extract the pi value oustide of + the body of [circle_area]: + {[ + let circle_area radius = 3.14159 *. (radius ** 2.) + (* ^^^^^^^ *) + ]} + + The generated code will look like this: + {[ + let const_name1 = 3.14159 + let circle_area radius = const_name1 *. (radius ** 2.) + ]} + + Extraction also works on expressions that are functions: + {[ + let all_empty l = + List.for_all + (function + | [] -> true + | _ -> false) + (* ^^^^^^^^^^^^^^^ *) + l + ]} + + {[ + let is_empty = (function | [] -> true | _ -> false) + let all_empty l = List.for_all is_empty l + ]} + + Let's look at a more complicated example where we want to extract the entire + body of [f]: + {[ + let rec f x = 10 + y + x + (* ^^^^^^^^^^ *) + + and y = 80 + ]} + + Performing the extraction leads to this code: + {[ + let rec f x = fun_name2 x + + and y = 80 + + and fun_name2 x = 10 + y + x + ]} + + We can see that extractor detects this kind of pattern and extracts the + expression inside an [and] binding. It also substitutes the expression by a + call to the fresh generated function with the correct parameters. + + Finally, if there is no bounded variable in the expression, a trailing unit + parameter is added to the generated let binding in order to preserve the + evaluation order. Let's extract the entire body of [x]: + {[ + let my_list = + print_endline "Wild side effect!"; + 1 :: [ 2; 3; 4 ] + ]} + + {[ + let fun_name1 () = + print_endline "Wild side effect!"; + [ 1; 2; 3; 4 ] + let f = fun_name1 () + ]} + + Final remarks: + - Extraction currently works on any typedtree expression that doesn't have a + ghost location. This restriction prevents the generation of invalid code. + + - The generated code is pretty printed by the compiler libs and may not be + formatted according to OCamlformat conventions. + +*) + +(** Raised when extractor is not ables to select an expression to extract in + given location interval. *) +exception Nothing_to_do + +(** Raised when extraction is called inside an interface file. *) +exception Not_allowed_in_interface_file + +(** [substitute ~start ~stop ~extract_name config buffer typedtree] tries to + extract the most inclusive expression located in interval [start-stop] into + a fresh toplevel generated let binding. + + Returns a {!Query_protocol.substitution_result} consisting of three fields: + - [loc]: the location where [content] musts be inserted. + - [content]: the code where the substitution takes places completed by the + generated let binding. + - [selection_range]: the location where to position the cursor for easy + renaming of the generated let binding. + + If there is no [extract_name] provided, the generated binding is named with + an untaken name in its current scope. Extracted constants will be named with + a name beginning with ["const_name"], while extracted functions will have a + name beginning with ["fun_name"]. *) +val substitute : + start:Lexing.position -> + stop:Lexing.position -> + ?extract_name:string -> + Mconfig.t -> + Msource.t -> + Mtyper.typedtree -> + Query_protocol.substitution_result diff --git a/src/commands/new_commands.ml b/src/commands/new_commands.ml index 7eed73d488..95fd600ec0 100644 --- a/src/commands/new_commands.ml +++ b/src/commands/new_commands.ml @@ -565,6 +565,37 @@ let all_commands = | Some action, (#Msource.position as pos) -> run buffer (Query_protocol.Refactor_open (action, pos)) end; + command "refactoring-extract-region" ~doc:"extract a region as function" + ~spec: + [ arg "-start" " Where extracted region start" + (marg_position (fun start (_start, stop, name) -> + (start, stop, name))); + arg "-end" " Where extracted region end" + (marg_position (fun stop (start, _stop, name) -> + (start, stop, name))); + optional "-extract-name" + " Name used by the generated let binding" + (Marg.param "string" (fun name (start, stop, _name) -> + let name = + match String.trim name with + | "" -> None + | n -> Some n + in + (start, stop, name))) + ] + ~default:(`None, `None, None) + begin + fun buffer (start, stop, name) -> + match (start, stop, name) with + | `None, `None, _ -> failwith "-start and -end are mandatory" + | `None, _, _ -> failwith "-start is mandatory" + | _, `None, _ -> failwith "-end is mandatory" + | (#Msource.position as start), (#Msource.position as stop), name -> + let raw_source = Mpipeline.raw_source buffer in + run buffer + (Query_protocol.Refactor_extract_region + (start, stop, name, raw_source)) + end; command "search-by-polarity" ~doc:"search-by-polarity -position pos -query ident\n\tTODO" ~spec: diff --git a/src/commands/query_json.ml b/src/commands/query_json.ml index d108b20e3f..f9a87e7333 100644 --- a/src/commands/query_json.ml +++ b/src/commands/query_json.ml @@ -211,6 +211,9 @@ let dump (type a) : a t -> json = | `Unqualify -> "unqualify") ); ("position", mk_position pos) ] + | Refactor_extract_region (start, stop, _, _) -> + mk "refactoring-extract-region" + [ ("start", mk_position start); ("stop", mk_position stop) ] | Signature_help { position; _ } -> mk "signature-help" [ ("position", mk_position position) ] | Version -> mk "version" [] @@ -406,6 +409,16 @@ let json_of_search_result list = in `List list +let json_of_substitution_result { loc; content; selection_range } = + with_location loc + [ ("content", `String content); + ( "selection-range", + `Assoc + [ ("start", Lexing.json_of_position selection_range.Location.loc_start); + ("end", Lexing.json_of_position selection_range.loc_end) + ] ) + ] + let json_of_response (type a) (query : a t) (response : a) : json = match (query, response) with | Type_expr _, str -> `String str @@ -420,6 +433,8 @@ let json_of_response (type a) (query : a t) (response : a) : json = `List (List.map locations ~f:(fun (name, loc) -> with_location loc [ ("content", `String name) ])) + | Refactor_extract_region _, subst_res -> + json_of_substitution_result subst_res | Document _, resp -> begin match resp with | `No_documentation -> `String "No documentation available" @@ -499,7 +514,7 @@ let json_of_response (type a) (query : a t) (response : a) : json = | Occurrences (_, scope), (occurrences, _project) -> let with_file = scope = `Project || scope = `Renaming in `List - (List.map occurrences ~f:(fun occurrence -> + (List.map occurrences ~f:(fun (occurrence : Query_protocol.occurrence) -> with_location ~with_file occurrence.loc [ ("stale", Json.bool occurrence.is_stale) ])) | Signature_help _, s -> json_of_signature_help s diff --git a/src/frontend/query_commands.ml b/src/frontend/query_commands.ml index 818246ceb0..e037726301 100644 --- a/src/frontend/query_commands.ml +++ b/src/frontend/query_commands.ml @@ -812,6 +812,15 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = function Inlay_hints.of_structure ~hint_let_binding ~hint_pattern_binding ~hint_function_params ~avoid_ghost_location ~start ~stop structure end + | Refactor_extract_region (start, stop, extract_name, buffer) -> + let start = Mpipeline.get_lexing_pos pipeline start + and stop = Mpipeline.get_lexing_pos pipeline stop in + let config = Mpipeline.final_config pipeline in + let typer_result = + Mpipeline.typer_result pipeline |> Mtyper.get_typedtree + in + Refactor_extract_region.substitute ~start ~stop ?extract_name config buffer + typer_result | Signature_help { position; _ } -> ( (* Todo: additionnal contextual information could help us provide better results.*) diff --git a/src/frontend/query_protocol.ml b/src/frontend/query_protocol.ml index 75c00c3c6e..cd8837cb48 100644 --- a/src/frontend/query_protocol.ml +++ b/src/frontend/query_protocol.ml @@ -132,6 +132,9 @@ type occurrences_status = type occurrence = { loc : Location.t; is_stale : bool } +type substitution_result = + { loc : Location.t; content : string; selection_range : Location.t } + type _ t = | Type_expr (* *) : string * Msource.position -> string t | Type_enclosing (* *) : @@ -157,6 +160,9 @@ type _ t = | Refactor_open : [ `Qualify | `Unqualify ] * Msource.position -> (string * Location.t) list t + | Refactor_extract_region : + Msource.position * Msource.position * string option * Msource.t + -> substitution_result t | Document (* *) : string option * Msource.position -> [ `Found of string diff --git a/src/utils/std.ml b/src/utils/std.ml index 9bcb784acf..a7d91fcb43 100644 --- a/src/utils/std.ml +++ b/src/utils/std.ml @@ -326,6 +326,10 @@ module Option = struct | None -> default | Some x -> f x + let fold ~none ~some = function + | Some v -> some v + | None -> none + let iter ~f = function | None -> () | Some x -> f x diff --git a/tests/test-dirs/refactor-extract-region/const-extraction.t/const.ml b/tests/test-dirs/refactor-extract-region/const-extraction.t/const.ml new file mode 100644 index 0000000000..882180ad22 --- /dev/null +++ b/tests/test-dirs/refactor-extract-region/const-extraction.t/const.ml @@ -0,0 +1,41 @@ +let const_name1 = "before" + +let circle_area radius = 3.14159 *. (radius ** 2.) + +let read ?(chunk_size = 4096) ic = + let buf = Bytes.create chunk_size in + In_channel.input ic buf 0 (Bytes.length buf) + +(* My commentary *) +let my_nested_long_int = + let o = + let c = + let a = + let m = + let l = 1_000_000_000L in + l + in + m + in + a + in + c + in + o + +let log ppf msg = Format.pp_print_string ppf ("CRITICAL: " ^ msg) + +module type EMPTY = sig end +let f () : (module EMPTY) = + (module struct + let const_name2 = assert false + let secret = String.make 100 '@' + end) + +let g () = + let multilines_cst = {foo| +multi +lines +constant +|foo} in + print_endline multilines_cst diff --git a/tests/test-dirs/refactor-extract-region/const-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/const-extraction.t/run.t new file mode 100644 index 0000000000..80f34c3dce --- /dev/null +++ b/tests/test-dirs/refactor-extract-region/const-extraction.t/run.t @@ -0,0 +1,203 @@ + $ $MERLIN single refactoring-extract-region -start 3:25 -end 3:34 -extract-name pi < const.ml + { + "class": "return", + "value": { + "start": { + "line": 3, + "col": 0 + }, + "end": { + "line": 3, + "col": 50 + }, + "content": "let pi = 3.14159 + let circle_area radius = pi *. (radius ** 2.)", + "selection-range": { + "start": { + "line": 3, + "col": 4 + }, + "end": { + "line": 3, + "col": 6 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 5:24 -end 5:28 -extract-name chunk_size < const.ml + { + "class": "return", + "value": { + "start": { + "line": 5, + "col": 0 + }, + "end": { + "line": 7, + "col": 46 + }, + "content": "let chunk_size = 4096 + let read ?(chunk_size = chunk_size) ic = + let buf = Bytes.create chunk_size in + In_channel.input ic buf 0 (Bytes.length buf)", + "selection-range": { + "start": { + "line": 5, + "col": 4 + }, + "end": { + "line": 5, + "col": 14 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 15:18 -end 15:32 < const.ml + { + "class": "return", + "value": { + "start": { + "line": 10, + "col": 0 + }, + "end": { + "line": 24, + "col": 3 + }, + "content": "let const_name2 = 1000000000L + let my_nested_long_int = + let o = + let c = + let a = + let m = + let l = const_name2 in + l + in + m + in + a + in + c + in + o", + "selection-range": { + "start": { + "line": 10, + "col": 4 + }, + "end": { + "line": 10, + "col": 15 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 26:46 -end 26:58 -extract-name header_log < const.ml + { + "class": "return", + "value": { + "start": { + "line": 26, + "col": 0 + }, + "end": { + "line": 26, + "col": 65 + }, + "content": "let header_log = \"CRITICAL: \" + let log ppf msg = Format.pp_print_string ppf (header_log ^ msg)", + "selection-range": { + "start": { + "line": 26, + "col": 4 + }, + "end": { + "line": 26, + "col": 14 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 32:33 -end 32:36 < const.ml + { + "class": "return", + "value": { + "start": { + "line": 29, + "col": 0 + }, + "end": { + "line": 33, + "col": 6 + }, + "content": "let const_name3 = '@' + let f () : (module EMPTY) = + (module struct + let const_name2 = assert false + let secret = String.make 100 const_name3 + end)", + "selection-range": { + "start": { + "line": 29, + "col": 4 + }, + "end": { + "line": 29, + "col": 15 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 36:23 -end 40:5 -extract-name my_essay < const.ml + { + "class": "return", + "value": { + "start": { + "line": 35, + "col": 0 + }, + "end": { + "line": 41, + "col": 30 + }, + "content": "let my_essay = {foo| + multi + lines + constant + |foo} + let g () = + let multilines_cst = my_essay in + print_endline multilines_cst", + "selection-range": { + "start": { + "line": 35, + "col": 4 + }, + "end": { + "line": 35, + "col": 12 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 1:0 -end 2:0 \ + > -filename foobar.mli < val f : int -> int + > EOF + { + "class": "error", + "value": "Expression extraction is only allowed in implementation file", + "notifications": [] + } diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml new file mode 100644 index 0000000000..ad6d46be85 --- /dev/null +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -0,0 +1,88 @@ +let fun_name1 () = () + +let all_empty l = + List.for_all + (function + | [] -> true + | _ -> false) + l + +let max l = List.fold_left (fun acc x -> if x > acc then x else acc) l + +(* A comment *) +let z = "..." + +let test x y = + let fun_name2 = Fun.id in + let m = + let m = print_endline (x ^ y ^ z) in + m + in + m + +let map f = + let rec loop acc = function + | [] -> List.rev acc + | x :: xs -> loop (f x :: acc) xs + in + loop [] + +let rec x = object end + +and y _ = + object + method foo = x + end + +let rec z x = 10 + y + x + +and y = 80 + +let f = + print_endline "Wild side effect!"; + 1 :: [ 2; 3; 4 ] + +class a = + let inner_expr = + let bar = 20 in + object + method foo = bar + end + in + object + method x = (Fun.const 10) () + method y = print_endline + method z = + let x = + object + method x = "foobar" + end + in + x + end + +and b = object end + +let my_mutable_state = + let var = ref 0 in + var := 10 * 50; + !var + +let func () = + let x = [] in + Fun.protect + (fun () -> + let fun_name2 = ( / ) in + let y = [ ( + ); ( - ); fun_name2 ] @ x in + List.map2 (fun op (a, b) -> op a b) y [ (1, 1); (3, 2); (8, 2) ]) + ~finally:(Fun.const ()) + +let rec f = List.map Fun.id + +and y = [ 10; 20; 30 ] + +and z x = + object + method x = x + method y = y + end diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t new file mode 100644 index 0000000000..f2c37bb90d --- /dev/null +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -0,0 +1,372 @@ + $ $MERLIN single refactoring-extract-region -start 5:4 -end 7:19 -extract-name is_empty < func.ml + { + "class": "return", + "value": { + "start": { + "line": 3, + "col": 0 + }, + "end": { + "line": 8, + "col": 5 + }, + "content": "let is_empty = (function | [] -> true | _ -> false) + let all_empty l = + List.for_all + is_empty + l", + "selection-range": { + "start": { + "line": 3, + "col": 4 + }, + "end": { + "line": 3, + "col": 12 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 10:20 -end 10:70 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 10, + "col": 0 + }, + "end": { + "line": 10, + "col": 70 + }, + "content": "let fun_name2 = fun acc x -> if x > acc then x else acc + let max l = List.fold_left fun_name2 l", + "selection-range": { + "start": { + "line": 10, + "col": 4 + }, + "end": { + "line": 10, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 18:12 -end 18:37 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 15, + "col": 0 + }, + "end": { + "line": 21, + "col": 3 + }, + "content": "let fun_name3 (x) (y) = print_endline (x ^ (y ^ z)) + let test x y = + let fun_name2 = Fun.id in + let m = + let m = fun_name3 x y in + m + in + m", + "selection-range": { + "start": { + "line": 15, + "col": 4 + }, + "end": { + "line": 15, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 24:21 -end 26:37 -extract-name map_aux < func.ml + "Nothing to do" + + $ $MERLIN single refactoring-extract-region -start 37:14 -end 37:24 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 37, + "col": 0 + }, + "end": { + "line": 39, + "col": 10 + }, + "content": "let rec z x = fun_name2 x + + and y = 80 + and fun_name2 (x) = (10 + y) + x", + "selection-range": { + "start": { + "line": 40, + "col": 4 + }, + "end": { + "line": 40, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 42:2 -end 43:18 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 41, + "col": 0 + }, + "end": { + "line": 43, + "col": 18 + }, + "content": "let fun_name2 () = print_endline \"Wild side effect!\"; [1; 2; 3; 4] + let f = + fun_name2 ()", + "selection-range": { + "start": { + "line": 41, + "col": 4 + }, + "end": { + "line": 41, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 47:4 -end 50:7 -extract-name outsider_expr < func.ml + { + "class": "return", + "value": { + "start": { + "line": 45, + "col": 0 + }, + "end": { + "line": 62, + "col": 5 + }, + "content": "let outsider_expr () = let bar = 20 in object method foo = bar end + class a = + let inner_expr = + outsider_expr () + in + object + method x = (Fun.const 10) () + method y = print_endline + method z = + let x = + object + method x = \"foobar\" + end + in + x + end", + "selection-range": { + "start": { + "line": 45, + "col": 4 + }, + "end": { + "line": 45, + "col": 17 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 56:6 -end 61:7 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 45, + "col": 0 + }, + "end": { + "line": 62, + "col": 5 + }, + "content": "let fun_name2 () = let x = object method x = \"foobar\" end in x + class a = + let inner_expr = + let bar = 20 in + object + method foo = bar + end + in + object + method x = (Fun.const 10) () + method y = print_endline + method z = + fun_name2 () + end", + "selection-range": { + "start": { + "line": 45, + "col": 4 + }, + "end": { + "line": 45, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 67:2 -end 69:6 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 66, + "col": 0 + }, + "end": { + "line": 69, + "col": 6 + }, + "content": "let fun_name2 () = let var = ref 0 in var := (10 * 50); !var + let my_mutable_state = + fun_name2 ()", + "selection-range": { + "start": { + "line": 66, + "col": 4 + }, + "end": { + "line": 66, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 76:14 -end 76:45 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 71, + "col": 0 + }, + "end": { + "line": 78, + "col": 27 + }, + "content": "let fun_name3 (x) (fun_name2) = [(+); (-); fun_name2] @ x + let func () = + let x = [] in + Fun.protect + (fun () -> + let fun_name2 = ( / ) in + let y = fun_name3 x fun_name2 in + List.map2 (fun op (a, b) -> op a b) y [ (1, 1); (3, 2); (8, 2) ]) + ~finally:(Fun.const ())", + "selection-range": { + "start": { + "line": 71, + "col": 4 + }, + "end": { + "line": 71, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 82:8 -end 82:22 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 80, + "col": 0 + }, + "end": { + "line": 88, + "col": 5 + }, + "content": "let fun_name2 () = [10; 20; 30] + let rec f = List.map Fun.id + + and y = fun_name2 () + + and z x = + object + method x = x + method y = y + end", + "selection-range": { + "start": { + "line": 80, + "col": 8 + }, + "end": { + "line": 80, + "col": 17 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 86:15 -end 86:16 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 80, + "col": 0 + }, + "end": { + "line": 88, + "col": 5 + }, + "content": "let rec f = List.map Fun.id + + and y = [ 10; 20; 30 ] + + and z x = + object + method x = fun_name2 x + method y = y + end + and fun_name2 (x) = x", + "selection-range": { + "start": { + "line": 89, + "col": 4 + }, + "end": { + "line": 89, + "col": 13 + } + } + }, + "notifications": [] + } From 1595bf4a6a23f67fa0cbe257812d10b924559734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Thu, 17 Jul 2025 14:26:20 +0200 Subject: [PATCH 03/31] Code quality. --- src/analysis/parsetree_utils.ml | 5 +-- src/analysis/parsetree_utils.mli | 3 +- src/analysis/refactor_extract_region.ml | 45 +++++++++++-------------- src/utils/std.ml | 4 --- 4 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/analysis/parsetree_utils.ml b/src/analysis/parsetree_utils.ml index 8e8a933554..79f7080c43 100644 --- a/src/analysis/parsetree_utils.ml +++ b/src/analysis/parsetree_utils.ml @@ -6,7 +6,7 @@ type nonrec constant_desc = constant_desc let constant_desc c = c.pconst_desc -let filter_attr = +let filter_merlin_attr = let default = Ast_mapper.default_mapper in let keep attr = let { Location.txt; _ }, _ = Ast_helper.Attr.as_tuple attr in @@ -17,4 +17,5 @@ let filter_attr = in { default with Ast_mapper.attributes } -let filter_expr_attr expr = filter_attr.Ast_mapper.expr filter_attr expr +let expr_remove_merlin_attributes expr = + filter_merlin_attr.Ast_mapper.expr filter_merlin_attr expr diff --git a/src/analysis/parsetree_utils.mli b/src/analysis/parsetree_utils.mli index 416fc39080..f3b5487f91 100644 --- a/src/analysis/parsetree_utils.mli +++ b/src/analysis/parsetree_utils.mli @@ -7,4 +7,5 @@ type nonrec constant_desc = constant_desc val constant_desc : constant -> constant_desc -val filter_expr_attr : expression -> expression +(** Filter parsetree attributes which are prefixed by ["merlin."] in given expression. *) +val expr_remove_merlin_attributes : expression -> expression diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 67bb826537..cac3aba1eb 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -29,7 +29,7 @@ module Gen = struct let toplevel_let ~name ~body = let open Ast_helper in let pattern = Pat.mk (Ppat_var { txt = name; loc = Location.none }) in - let body = Parsetree_utils.filter_expr_attr body in + let body = Parsetree_utils.expr_remove_merlin_attributes body in Str.value Nonrecursive [ Vb.mk pattern body ] (* Generates [let name () = body]. *) @@ -80,22 +80,17 @@ module Gen = struct |> fun_apply end -module Msource = struct - include Msource - - (* TODO: Maybe add this directly in [Msource]? *) - let sub_loc src loc = - let (`Offset start_offset) = - let line, col = Lexing.split_pos loc.Location.loc_start in - Msource.get_offset src (`Logical (line, col)) - in - let (`Offset end_offset) = - `Logical (Lexing.split_pos loc.loc_end) |> Msource.get_offset src - in - String.sub (Msource.text src) ~pos:start_offset - ~len:(end_offset - start_offset) - |> Msource.make -end +let source_sub_loc src loc = + let (`Offset start_offset) = + let line, col = Lexing.split_pos loc.Location.loc_start in + Msource.get_offset src (`Logical (line, col)) + in + let (`Offset end_offset) = + `Logical (Lexing.split_pos loc.loc_end) |> Msource.get_offset src + in + String.sub (Msource.text src) ~pos:start_offset + ~len:(end_offset - start_offset) + |> Msource.make type analysis = { bounded_vars : Path.t list; gen_binding_kind : rec_flag } @@ -204,7 +199,7 @@ let extract_to_toplevel let fresh_call = generated_call ~name:val_name |> Format.asprintf "%a" Pprintast.expression in - let toplevel_item_span = Msource.sub_loc buffer toplevel_item.loc in + let toplevel_item_span = source_sub_loc buffer toplevel_item.loc in let subst_loc = let start_lnum = 1 + expr.exp_loc.Location.loc_start.pos_lnum @@ -267,9 +262,9 @@ let extract_to_toplevel let extract_const_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = let name = - Option.fold extract_name - ~none:(Default { basename = "const_name" }) - ~some:(fun name -> Fixed name) + match extract_name with + | None -> Default { basename = "const_name" } + | Some name -> Fixed name in extract_to_toplevel { expr; @@ -292,7 +287,7 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item in let generated_binding, generated_call = match bounded_vars with - | [] when Fun.negate is_function expr -> + | [] when not (is_function expr) -> (* If the extracted expr is already a function, no need to delayed computation with a unit parameter. *) (Gen.let_unit_toplevel, Gen.fun_apply_unit) @@ -300,9 +295,9 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item (Gen.toplevel_function bounded_vars, Gen.fun_apply_params bounded_vars) in let name = - Option.fold extract_name - ~none:(Default { basename = "fun_name" }) - ~some:(fun name -> Fixed name) + match extract_name with + | None -> Default { basename = "fun_name" } + | Some name -> Fixed name in extract_to_toplevel { expr; diff --git a/src/utils/std.ml b/src/utils/std.ml index a7d91fcb43..9bcb784acf 100644 --- a/src/utils/std.ml +++ b/src/utils/std.ml @@ -326,10 +326,6 @@ module Option = struct | None -> default | Some x -> f x - let fold ~none ~some = function - | Some v -> some v - | None -> none - let iter ~f = function | None -> () | Some x -> f x From d2d6e08bfd66221cda6b65baf804bd228df7e40a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Fri, 18 Jul 2025 12:00:29 +0200 Subject: [PATCH 04/31] Add a function to determine if an expression is extractable inside a region. --- src/analysis/refactor_extract_region.ml | 38 +++++++++++++++--------- src/analysis/refactor_extract_region.mli | 8 +++++ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index cac3aba1eb..3eb77bedc5 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -366,25 +366,35 @@ let find_associated_toplevel_item expr structure = | _ -> None) structure.Typedtree.str_items +let extract_region ~start ~stop enclosing structure = + let open Option.Infix in + most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> + find_associated_toplevel_item expr structure >>| fun toplevel_item -> + (expr, expr_env, toplevel_item) + +let is_region_extractable ~start ~stop enclosing structure = + match extract_region ~start ~stop enclosing structure with + | None -> false + | Some _ -> true + let substitute ~start ~stop ?extract_name mconfig buffer typedtree = match typedtree with | `Interface _ -> raise Not_allowed_in_interface_file - | `Implementation structure -> ( + | `Implementation structure -> begin let enclosing = Mbrowse.enclosing start [ Mbrowse.of_structure structure ] in - match most_inclusive_expr ~start ~stop enclosing with + match extract_region ~start ~stop enclosing structure with | None -> raise Nothing_to_do - | Some (expr, expr_env) -> ( - match find_associated_toplevel_item expr structure with - | None -> raise Nothing_to_do - | Some toplevel_item -> ( - match expr.exp_desc with - | Texp_constant _ -> - (* Special case for constant. They can't produce side effect so it's not + | Some (expr, expr_env, toplevel_item) -> begin + match expr.exp_desc with + | Texp_constant _ -> + (* Special case for constant. They can't produce side effect so it's not necessary to add a trailing unit parameter to the let binding. *) - extract_const_to_toplevel ?extract_name expr ~expr_env buffer - ~toplevel_item - | _ -> - extract_expr_to_toplevel ?extract_name expr buffer ~expr_env - ~toplevel_item ~local_defs:typedtree ~mconfig))) + extract_const_to_toplevel ?extract_name expr ~expr_env buffer + ~toplevel_item + | _ -> + extract_expr_to_toplevel ?extract_name expr buffer ~expr_env + ~toplevel_item ~local_defs:typedtree ~mconfig + end + end diff --git a/src/analysis/refactor_extract_region.mli b/src/analysis/refactor_extract_region.mli index bcedbd31e7..4223d7502c 100644 --- a/src/analysis/refactor_extract_region.mli +++ b/src/analysis/refactor_extract_region.mli @@ -85,6 +85,14 @@ exception Nothing_to_do (** Raised when extraction is called inside an interface file. *) exception Not_allowed_in_interface_file +(** Is an expression is extractable in the given region? *) +val is_region_extractable : + start:Lexing.position -> + stop:Lexing.position -> + (Env.t * Browse_raw.node) list -> + Typedtree.structure -> + bool + (** [substitute ~start ~stop ~extract_name config buffer typedtree] tries to extract the most inclusive expression located in interval [start-stop] into a fresh toplevel generated let binding. From 38055ff0515a76ea2e7cceae75cfcd4aeaaafb11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Fri, 18 Jul 2025 15:01:58 +0200 Subject: [PATCH 05/31] More tests. --- .../func-extraction.t/func.ml | 42 +- .../func-extraction.t/run.t | 376 ++++++++++++++++-- 2 files changed, 366 insertions(+), 52 deletions(-) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index ad6d46be85..a4c89185fb 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -20,13 +20,6 @@ let test x y = in m -let map f = - let rec loop acc = function - | [] -> List.rev acc - | x :: xs -> loop (f x :: acc) xs - in - loop [] - let rec x = object end and y _ = @@ -65,7 +58,7 @@ and b = object end let my_mutable_state = let var = ref 0 in - var := 10 * 50; + var := y * 50; !var let func () = @@ -86,3 +79,36 @@ and z x = method x = x method y = y end + +let f = 0 + 1 + +let f x = (x * 2) + 3 + +let f x = + let y = 0 in + (x * y) + 3 + +let f x = + let exception Local in + raise Local + +let x = 0 +let f x = x + 1 + +let x = 0 +let y = 1 +let f x = x + y + +let f x = List.map (fun y -> y + 1) x + +let f y = + let y = y + 1 in + y + 2 + +let f () = y + 1 + +let f x = + let module M = struct + let y = 0 + end in + (x * M.y) + 3 diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index f2c37bb90d..c6029335b9 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -91,19 +91,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 24:21 -end 26:37 -extract-name map_aux < func.ml - "Nothing to do" - - $ $MERLIN single refactoring-extract-region -start 37:14 -end 37:24 < func.ml + $ $MERLIN single refactoring-extract-region -start 30:14 -end 30:24 < func.ml { "class": "return", "value": { "start": { - "line": 37, + "line": 30, "col": 0 }, "end": { - "line": 39, + "line": 32, "col": 10 }, "content": "let rec z x = fun_name2 x @@ -112,11 +109,11 @@ and fun_name2 (x) = (10 + y) + x", "selection-range": { "start": { - "line": 40, + "line": 33, "col": 4 }, "end": { - "line": 40, + "line": 33, "col": 13 } } @@ -124,16 +121,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 42:2 -end 43:18 < func.ml + $ $MERLIN single refactoring-extract-region -start 35:2 -end 36:18 < func.ml { "class": "return", "value": { "start": { - "line": 41, + "line": 34, "col": 0 }, "end": { - "line": 43, + "line": 36, "col": 18 }, "content": "let fun_name2 () = print_endline \"Wild side effect!\"; [1; 2; 3; 4] @@ -141,11 +138,11 @@ fun_name2 ()", "selection-range": { "start": { - "line": 41, + "line": 34, "col": 4 }, "end": { - "line": 41, + "line": 34, "col": 13 } } @@ -153,16 +150,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 47:4 -end 50:7 -extract-name outsider_expr < func.ml + $ $MERLIN single refactoring-extract-region -start 40:4 -end 43:7 -extract-name outsider_expr < func.ml { "class": "return", "value": { "start": { - "line": 45, + "line": 38, "col": 0 }, "end": { - "line": 62, + "line": 55, "col": 5 }, "content": "let outsider_expr () = let bar = 20 in object method foo = bar end @@ -183,11 +180,11 @@ end", "selection-range": { "start": { - "line": 45, + "line": 38, "col": 4 }, "end": { - "line": 45, + "line": 38, "col": 17 } } @@ -195,16 +192,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 56:6 -end 61:7 < func.ml + $ $MERLIN single refactoring-extract-region -start 49:6 -end 56:7 < func.ml { "class": "return", "value": { "start": { - "line": 45, + "line": 38, "col": 0 }, "end": { - "line": 62, + "line": 55, "col": 5 }, "content": "let fun_name2 () = let x = object method x = \"foobar\" end in x @@ -223,11 +220,11 @@ end", "selection-range": { "start": { - "line": 45, + "line": 38, "col": 4 }, "end": { - "line": 45, + "line": 38, "col": 13 } } @@ -235,28 +232,28 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 67:2 -end 69:6 < func.ml + $ $MERLIN single refactoring-extract-region -start 60:2 -end 62:6 < func.ml { "class": "return", "value": { "start": { - "line": 66, + "line": 59, "col": 0 }, "end": { - "line": 69, + "line": 62, "col": 6 }, - "content": "let fun_name2 () = let var = ref 0 in var := (10 * 50); !var + "content": "let fun_name2 () = let var = ref 0 in var := (y * 50); !var let my_mutable_state = fun_name2 ()", "selection-range": { "start": { - "line": 66, + "line": 59, "col": 4 }, "end": { - "line": 66, + "line": 59, "col": 13 } } @@ -264,16 +261,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 76:14 -end 76:45 < func.ml + $ $MERLIN single refactoring-extract-region -start 69:14 -end 69:45 < func.ml { "class": "return", "value": { "start": { - "line": 71, + "line": 64, "col": 0 }, "end": { - "line": 78, + "line": 71, "col": 27 }, "content": "let fun_name3 (x) (fun_name2) = [(+); (-); fun_name2] @ x @@ -287,11 +284,11 @@ ~finally:(Fun.const ())", "selection-range": { "start": { - "line": 71, + "line": 64, "col": 4 }, "end": { - "line": 71, + "line": 64, "col": 13 } } @@ -299,16 +296,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 82:8 -end 82:22 < func.ml + $ $MERLIN single refactoring-extract-region -start 75:8 -end 75:22 < func.ml { "class": "return", "value": { "start": { - "line": 80, + "line": 73, "col": 0 }, "end": { - "line": 88, + "line": 81, "col": 5 }, "content": "let fun_name2 () = [10; 20; 30] @@ -323,11 +320,11 @@ end", "selection-range": { "start": { - "line": 80, + "line": 73, "col": 8 }, "end": { - "line": 80, + "line": 73, "col": 17 } } @@ -335,16 +332,16 @@ "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 86:15 -end 86:16 < func.ml + $ $MERLIN single refactoring-extract-region -start 79:15 -end 79:16 < func.ml { "class": "return", "value": { "start": { - "line": 80, + "line": 73, "col": 0 }, "end": { - "line": 88, + "line": 81, "col": 5 }, "content": "let rec f = List.map Fun.id @@ -359,11 +356,302 @@ and fun_name2 (x) = x", "selection-range": { "start": { - "line": 89, + "line": 82, + "col": 4 + }, + "end": { + "line": 82, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 83:12 -end 83:13 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 83, + "col": 0 + }, + "end": { + "line": 83, + "col": 13 + }, + "content": "let const_name1 = 1 + let f = 0 + const_name1", + "selection-range": { + "start": { + "line": 83, + "col": 4 + }, + "end": { + "line": 83, + "col": 15 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 85:10 -end 85:17 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 85, + "col": 0 + }, + "end": { + "line": 85, + "col": 21 + }, + "content": "let fun_name2 (x) = x * 2 + let f x = fun_name2 x + 3", + "selection-range": { + "start": { + "line": 85, + "col": 4 + }, + "end": { + "line": 85, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 89:2 -end 89:10 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 87, + "col": 0 + }, + "end": { + "line": 89, + "col": 13 + }, + "content": "let fun_name2 (x) (y) = x * y + let f x = + let y = 0 in + fun_name2 x y + 3", + "selection-range": { + "start": { + "line": 87, + "col": 4 + }, + "end": { + "line": 87, + "col": 13 + } + } + }, + "notifications": [] + } + +TODO: This extraction shouldn't be allowed. + $ $MERLIN single refactoring-extract-region -start 93:2 -end 93:13 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 91, + "col": 0 + }, + "end": { + "line": 93, + "col": 13 + }, + "content": "let fun_name2 () = raise Local + let f x = + let exception Local in + fun_name2 ()", + "selection-range": { + "start": { + "line": 91, + "col": 4 + }, + "end": { + "line": 91, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 96:10 -end 96:16 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 96, + "col": 0 + }, + "end": { + "line": 96, + "col": 15 + }, + "content": "let fun_name2 (x) = x + 1 + let f x = fun_name2 x", + "selection-range": { + "start": { + "line": 96, + "col": 4 + }, + "end": { + "line": 96, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 100:10 -end 100:16 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 100, + "col": 0 + }, + "end": { + "line": 100, + "col": 15 + }, + "content": "let fun_name2 (x) = x + y + let f x = fun_name2 x", + "selection-range": { + "start": { + "line": 100, + "col": 4 + }, + "end": { + "line": 100, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 102:10 -end 102:38 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 102, + "col": 0 + }, + "end": { + "line": 102, + "col": 37 + }, + "content": "let fun_name2 (x) = List.map (fun y -> y + 1) x + let f x = fun_name2 x", + "selection-range": { + "start": { + "line": 102, + "col": 4 + }, + "end": { + "line": 102, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 106:2 -end 106:7 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 104, + "col": 0 + }, + "end": { + "line": 106, + "col": 7 + }, + "content": "let fun_name2 (y) = y + 2 + let f y = + let y = y + 1 in + fun_name2 y", + "selection-range": { + "start": { + "line": 104, + "col": 4 + }, + "end": { + "line": 104, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 108:11 -end 108:16 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 108, + "col": 0 + }, + "end": { + "line": 108, + "col": 16 + }, + "content": "let fun_name2 () = y + 1 + let f () = fun_name2 ()", + "selection-range": { + "start": { + "line": 108, + "col": 4 + }, + "end": { + "line": 108, + "col": 13 + } + } + }, + "notifications": [] + } + + $ $MERLIN single refactoring-extract-region -start 114:2 -end 114:11 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 110, + "col": 0 + }, + "end": { + "line": 114, + "col": 15 + }, + "content": "let fun_name2 (x) (y) = x * y + let f x = + let module M = struct + let y = 0 + end in + fun_name2 x M.y + 3", + "selection-range": { + "start": { + "line": 110, "col": 4 }, "end": { - "line": 89, + "line": 110, "col": 13 } } From f388319137b795c15405b9c09f1123acaf15598b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Fri, 18 Jul 2025 15:06:13 +0200 Subject: [PATCH 06/31] Attempt to fix a bug with binding belonging to a module. --- src/analysis/refactor_extract_region.ml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 3eb77bedc5..5cb2522dcc 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -51,7 +51,7 @@ module Gen = struct ~f:(fun param -> let pattern = Pat.construct - (Location.mknoloc (Untypeast.lident_of_path param)) + (Location.mknoloc (Longident.Lident (Path.last param))) None in { Parsetree.pparam_loc = Location.none; @@ -74,9 +74,7 @@ module Gen = struct let fun_apply_params params = params - |> List.map ~f:(fun param -> - Ast_helper.Exp.ident - (Location.mknoloc (Longident.Lident (Path.name param)))) + |> List.map ~f:(fun param -> ident ~name:(Path.name param)) |> fun_apply end From 17956327e5c72659347c3166a97406fcb8621191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Fri, 18 Jul 2025 15:50:43 +0200 Subject: [PATCH 07/31] Add change entry. --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index b6b35b77a7..33c70b9c03 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +unreleased +========== + + + merlin library + - Implement new refactor-extract-region command for extracting region to a fresh let binding (#1546) + merlin 5.5 ========== Tue Jun 24 16:10:42 CEST 2025 From aee93b007f569be8482727dc4eb57f1c78424c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=C3=A9o=20Arnouts?= Date: Fri, 18 Jul 2025 16:21:48 +0200 Subject: [PATCH 08/31] Add another test. --- .../func-extraction.t/func.ml | 6 ++++ .../func-extraction.t/run.t | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index a4c89185fb..5eb8e83f3a 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -112,3 +112,9 @@ let f x = let y = 0 end in (x * M.y) + 3 + +let f = + let x = 1 in + let y = 2 in + let z = x + y in + z + z + 1 diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index c6029335b9..40b4f1e9f9 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -658,3 +658,35 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 119:2 -end 119:18 -extract-name z < func.ml + { + "class": "return", + "value": { + "start": { + "line": 116, + "col": 0 + }, + "end": { + "line": 120, + "col": 11 + }, + "content": "let z (x) (y) = x + y + let f = + let x = 1 in + let y = 2 in + let z = z x y in + z + z + 1", + "selection-range": { + "start": { + "line": 116, + "col": 4 + }, + "end": { + "line": 116, + "col": 5 + } + } + }, + "notifications": [] + } From 7f90f475091f7b8ffefbe35352432016aa996a8a Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Mon, 21 Jul 2025 14:35:25 +0200 Subject: [PATCH 09/31] Fix generation for parameter that appears twice, parenthised generated call and add a corresponding test. --- src/analysis/refactor_extract_region.ml | 20 +++-- .../func-extraction.t/func.ml | 13 +++ .../func-extraction.t/run.t | 79 ++++++++++++++----- 3 files changed, 86 insertions(+), 26 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 5cb2522dcc..2f1ab99d31 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -102,7 +102,9 @@ type extraction = name : extraction_name; (** Binding name of the extracted expression. *) gen_binding_kind : rec_flag; generated_binding : generated_binding; - generated_call : generated_call + generated_call : generated_call; + call_need_parenthesis : bool + (** Sometime we must parenthised call in order to type check. *) } and extraction_name = Default of { basename : string } | Fixed of string @@ -148,7 +150,8 @@ let rec occuring_vars node = |> List.concat_map ~f:occuring_vars |> List.append acc in - loop [] node |> List.rev + loop [] node |> Path.Set.of_list |> Path.Set.elements |> List.rev + |> List.filter ~f:(fun path -> Ident.name (Path.head path) <> "Stdlib") let analyze_expr expr env ~toplevel_item ~mconfig ~local_defs = let unbounded_enclosing = @@ -187,7 +190,8 @@ let extract_to_toplevel gen_binding_kind; generated_binding; generated_call; - toplevel_item + toplevel_item; + call_need_parenthesis } buffer = let val_name = match name with @@ -195,7 +199,12 @@ let extract_to_toplevel | Fixed name -> name in let fresh_call = - generated_call ~name:val_name |> Format.asprintf "%a" Pprintast.expression + let parenthised_opt s = + if call_need_parenthesis then "(" ^ s ^ ")" else s + in + generated_call ~name:val_name + |> Format.asprintf "%a" Pprintast.expression + |> parenthised_opt in let toplevel_item_span = source_sub_loc buffer toplevel_item.loc in let subst_loc = @@ -271,7 +280,8 @@ let extract_const_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = name; gen_binding_kind = Non_recursive; generated_binding = Gen.toplevel_let; - generated_call = Gen.ident + generated_call = Gen.ident; + call_need_parenthesis = false } let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index 5eb8e83f3a..804b5e9675 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -118,3 +118,16 @@ let f = let y = 2 in let z = x + y in z + z + 1 + +type document = markup list +and markup = Text of string | Bold of string + +let pp_document ppf doc = + let open Format in + let bold_tag = "**" in + fprintf ppf "%a" + (pp_print_list (fun ppf markup -> + match markup with + | Text txt -> pp_print_string ppf txt + | Bold txt -> pp_print_string ppf (bold_tag ^ txt ^ bold_tag))) + doc diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index 40b4f1e9f9..b94d70df2c 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -13,7 +13,7 @@ "content": "let is_empty = (function | [] -> true | _ -> false) let all_empty l = List.for_all - is_empty + (is_empty ) l", "selection-range": { "start": { @@ -42,7 +42,7 @@ "col": 70 }, "content": "let fun_name2 = fun acc x -> if x > acc then x else acc - let max l = List.fold_left fun_name2 l", + let max l = List.fold_left (fun_name2 ) l", "selection-range": { "start": { "line": 10, @@ -73,7 +73,7 @@ let test x y = let fun_name2 = Fun.id in let m = - let m = fun_name3 x y in + let m = (fun_name3 x y) in m in m", @@ -103,7 +103,7 @@ "line": 32, "col": 10 }, - "content": "let rec z x = fun_name2 x + "content": "let rec z x = (fun_name2 x) and y = 80 and fun_name2 (x) = (10 + y) + x", @@ -135,7 +135,7 @@ }, "content": "let fun_name2 () = print_endline \"Wild side effect!\"; [1; 2; 3; 4] let f = - fun_name2 ()", + (fun_name2 ())", "selection-range": { "start": { "line": 34, @@ -165,7 +165,7 @@ "content": "let outsider_expr () = let bar = 20 in object method foo = bar end class a = let inner_expr = - outsider_expr () + (outsider_expr ()) in object method x = (Fun.const 10) () @@ -216,7 +216,7 @@ method x = (Fun.const 10) () method y = print_endline method z = - fun_name2 () + (fun_name2 ()) end", "selection-range": { "start": { @@ -246,7 +246,7 @@ }, "content": "let fun_name2 () = let var = ref 0 in var := (y * 50); !var let my_mutable_state = - fun_name2 ()", + (fun_name2 ())", "selection-range": { "start": { "line": 59, @@ -279,7 +279,7 @@ Fun.protect (fun () -> let fun_name2 = ( / ) in - let y = fun_name3 x fun_name2 in + let y = (fun_name3 x fun_name2) in List.map2 (fun op (a, b) -> op a b) y [ (1, 1); (3, 2); (8, 2) ]) ~finally:(Fun.const ())", "selection-range": { @@ -311,7 +311,7 @@ "content": "let fun_name2 () = [10; 20; 30] let rec f = List.map Fun.id - and y = fun_name2 () + and y = (fun_name2 ()) and z x = object @@ -350,7 +350,7 @@ and z x = object - method x = fun_name2 x + method x = (fun_name2 x) method y = y end and fun_name2 (x) = x", @@ -409,7 +409,7 @@ "col": 21 }, "content": "let fun_name2 (x) = x * 2 - let f x = fun_name2 x + 3", + let f x = (fun_name2 x) + 3", "selection-range": { "start": { "line": 85, @@ -439,7 +439,7 @@ "content": "let fun_name2 (x) (y) = x * y let f x = let y = 0 in - fun_name2 x y + 3", + (fun_name2 x y) + 3", "selection-range": { "start": { "line": 87, @@ -470,7 +470,7 @@ TODO: This extraction shouldn't be allowed. "content": "let fun_name2 () = raise Local let f x = let exception Local in - fun_name2 ()", + (fun_name2 ())", "selection-range": { "start": { "line": 91, @@ -498,7 +498,7 @@ TODO: This extraction shouldn't be allowed. "col": 15 }, "content": "let fun_name2 (x) = x + 1 - let f x = fun_name2 x", + let f x = (fun_name2 x)", "selection-range": { "start": { "line": 96, @@ -526,7 +526,7 @@ TODO: This extraction shouldn't be allowed. "col": 15 }, "content": "let fun_name2 (x) = x + y - let f x = fun_name2 x", + let f x = (fun_name2 x)", "selection-range": { "start": { "line": 100, @@ -554,7 +554,7 @@ TODO: This extraction shouldn't be allowed. "col": 37 }, "content": "let fun_name2 (x) = List.map (fun y -> y + 1) x - let f x = fun_name2 x", + let f x = (fun_name2 x)", "selection-range": { "start": { "line": 102, @@ -584,7 +584,7 @@ TODO: This extraction shouldn't be allowed. "content": "let fun_name2 (y) = y + 2 let f y = let y = y + 1 in - fun_name2 y", + (fun_name2 y)", "selection-range": { "start": { "line": 104, @@ -612,7 +612,7 @@ TODO: This extraction shouldn't be allowed. "col": 16 }, "content": "let fun_name2 () = y + 1 - let f () = fun_name2 ()", + let f () = (fun_name2 ())", "selection-range": { "start": { "line": 108, @@ -644,7 +644,7 @@ TODO: This extraction shouldn't be allowed. let module M = struct let y = 0 end in - fun_name2 x M.y + 3", + (fun_name2 x M.y) + 3", "selection-range": { "start": { "line": 110, @@ -675,7 +675,7 @@ TODO: This extraction shouldn't be allowed. let f = let x = 1 in let y = 2 in - let z = z x y in + let z = (z x y) in z + z + 1", "selection-range": { "start": { @@ -690,3 +690,40 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 129:19 -end 132:71 -extract-name pp_markup < func.ml + { + "class": "return", + "value": { + "start": { + "line": 125, + "col": 0 + }, + "end": { + "line": 133, + "col": 7 + }, + "content": "let pp_markup (bold_tag) = + fun ppf markup -> + match markup with + | Text txt -> pp_print_string ppf txt + | Bold txt -> pp_print_string ppf (bold_tag ^ (txt ^ bold_tag)) + let pp_document ppf doc = + let open Format in + let bold_tag = \"**\" in + fprintf ppf \"%a\" + (pp_print_list (pp_markup bold_tag)) + doc", + "selection-range": { + "start": { + "line": 125, + "col": 4 + }, + "end": { + "line": 125, + "col": 13 + } + } + }, + "notifications": [] + } From baedfc7673e049e83308a1d7508f83f716c10d85 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Mon, 21 Jul 2025 14:36:02 +0200 Subject: [PATCH 10/31] Code quality. --- src/analysis/refactor_extract_region.ml | 39 +++++++++++++------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 2f1ab99d31..d16a3de729 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -314,9 +314,21 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item name; gen_binding_kind; generated_binding; - generated_call + generated_call; + call_need_parenthesis = true } +let remove_poly expr = + let open Typedtree in + { expr with + exp_extra = + List.filter + ~f:(function + | Texp_poly _, _, _ -> false + | _ -> true) + expr.exp_extra + } + let most_inclusive_expr ~start ~stop nodes = let is_inside_region = Location_aux.included @@ -330,31 +342,22 @@ let most_inclusive_expr ~start ~stop nodes = select_among_child node.Browse_tree.t_env node.t_node) in let node_loc = Mbrowse.node_loc node in - let remove_poly expr = - (* We have to remove poly extra that cause unexpected "!poly!" to be printed - in generated code. This happens when you try to extract the body of a method. *) - let open Typedtree in - { expr with - exp_extra = - List.filter - ~f:(function - | Texp_poly _, _, _ -> false - | _ -> true) - expr.exp_extra - } - in match node with - | Expression expr -> + | Expression expr + when node_loc.loc_ghost = false && is_inside_region node_loc -> (* We filter expression that have a ghost location. Otherwise, expression such as [let f x = 10 + x] can be extracted and this can lead to invalid code gen. ^^^^^^^^^^ *) - if node_loc.loc_ghost = false && is_inside_region node_loc then - Some (remove_poly expr, env) - else select_deeper node env + Some (expr, env) | _ -> select_deeper node env in nodes |> List.rev |> Stdlib.List.find_map (fun (env, node) -> select_among_child env node) + |> Option.map ~f:(fun (expr, env) -> + (* We also have to remove poly extra that cause unexpected "!poly!" + to be printed in generated code. This happens when you try to extract + the body of a method. *) + (remove_poly expr, env)) let find_associated_toplevel_item expr structure = Stdlib.List.find_map From 3627558b89cc47b0203f7133cca85ed34ce3b6e2 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Mon, 21 Jul 2025 16:40:10 +0200 Subject: [PATCH 11/31] Replace location heuristic by a proper analysis. --- src/analysis/refactor_extract_region.ml | 125 ++++++++++-------- src/analysis/refactor_extract_region.mli | 3 +- src/frontend/query_commands.ml | 3 +- .../func-extraction.t/run.t | 14 +- 4 files changed, 77 insertions(+), 68 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index d16a3de729..05840de88f 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -90,7 +90,7 @@ let source_sub_loc src loc = ~len:(end_offset - start_offset) |> Msource.make -type analysis = { bounded_vars : Path.t list; gen_binding_kind : rec_flag } +type analysis = { bounded_vars : Path.t list; binding_kind : rec_flag } and rec_flag = Non_recursive | Rec_and @@ -109,9 +109,16 @@ type extraction = and extraction_name = Default of { basename : string } | Fixed of string -and toplevel_item = { rec_flag : Asttypes.rec_flag; loc : Location.t } +and toplevel_item = + { rec_flag : Asttypes.rec_flag; + env : Env.t; + loc : Location.t; + kind : toplevel_item_kind + } (* A convenient type for grouping info. *) +and toplevel_item_kind = Let of Typedtree.value_binding list | Class_decl + and generated_binding = name:string -> body:Parsetree.expression -> Parsetree.structure_item @@ -121,25 +128,24 @@ let is_recursive = function | { rec_flag = Asttypes.Recursive; _ } -> true | { rec_flag = Nonrecursive; _ } -> false +let rec find_pattern_var : type a. a Typedtree.general_pattern -> Path.t list = + fun { Typedtree.pat_desc; _ } -> + match pat_desc with + | Typedtree.Tpat_var (ident, _, _) -> [ Pident ident ] + | Tpat_tuple pats -> List.concat_map ~f:find_pattern_var pats + | Tpat_alias (pat, ident, _, _) -> Pident ident :: find_pattern_var pat + | Tpat_construct (_, _, pats, _) -> List.concat_map ~f:find_pattern_var pats + | Tpat_variant (_, Some pat, _) -> find_pattern_var pat + | Tpat_record (fields, _) -> + List.concat_map ~f:(fun (_, _, field) -> find_pattern_var field) fields + | Tpat_array arr -> List.concat_map ~f:find_pattern_var arr + | Tpat_lazy pat | Tpat_exception pat -> find_pattern_var pat + | Tpat_value pat -> + find_pattern_var (pat :> Typedtree.value Typedtree.general_pattern) + | Tpat_or (l, r, _) -> find_pattern_var l @ find_pattern_var r + | _ -> [] + let rec occuring_vars node = - let rec find_pattern_var : type a. a Typedtree.general_pattern -> Path.t list - = - fun { Typedtree.pat_desc; _ } -> - match pat_desc with - | Typedtree.Tpat_var (ident, _, _) -> [ Pident ident ] - | Tpat_tuple pats -> List.concat_map ~f:find_pattern_var pats - | Tpat_alias (pat, ident, _, _) -> Pident ident :: find_pattern_var pat - | Tpat_construct (_, _, pats, _) -> List.concat_map ~f:find_pattern_var pats - | Tpat_variant (_, Some pat, _) -> find_pattern_var pat - | Tpat_record (fields, _) -> - List.concat_map ~f:(fun (_, _, field) -> find_pattern_var field) fields - | Tpat_array arr -> List.concat_map ~f:find_pattern_var arr - | Tpat_lazy pat | Tpat_exception pat -> find_pattern_var pat - | Tpat_value pat -> - find_pattern_var (pat :> Typedtree.value Typedtree.general_pattern) - | Tpat_or (l, r, _) -> find_pattern_var l @ find_pattern_var r - | _ -> [] - in let loop acc node = match node.Browse_tree.t_node with | Browse_raw.Expression { exp_desc = Texp_ident (path, _, _); _ } -> @@ -153,35 +159,35 @@ let rec occuring_vars node = loop [] node |> Path.Set.of_list |> Path.Set.elements |> List.rev |> List.filter ~f:(fun path -> Ident.name (Path.head path) <> "Stdlib") -let analyze_expr expr env ~toplevel_item ~mconfig ~local_defs = - let unbounded_enclosing = - { Location.loc_start = toplevel_item.loc.loc_start; - loc_end = expr.Typedtree.exp_loc.loc_start; - loc_ghost = false - } +let analyze_expr expr expr_env ~toplevel_item = + let is_value_unbound path = + let is_bound path env = + try + let _ = Env.find_value path env in + true + with Not_found -> false + in + is_bound path expr_env && not (is_bound path toplevel_item.env) + in + let is_one_of_value_decl var_path bindings = + List.exists + ~f:(fun vb -> + let names = find_pattern_var vb.Typedtree.vb_pat |> Path.Set.of_list in + Path.Set.mem var_path names) + bindings in - Browse_tree.of_node ~env (Browse_raw.Expression expr) + Browse_tree.of_node ~env:expr_env (Browse_raw.Expression expr) |> occuring_vars - |> List.fold_left - ~init:{ bounded_vars = []; gen_binding_kind = Non_recursive } + |> List.fold_left ~init:{ bounded_vars = []; binding_kind = Non_recursive } ~f:(fun acc var_path -> - match - Locate.from_path - ~config:{ mconfig; ml_or_mli = `ML; traverse_aliases = true } - ~env ~local_defs ~namespace:Value var_path - with - | `Found { location; approximated = false; _ } -> - let acc = - if Location_aux.included location ~into:unbounded_enclosing then - { acc with bounded_vars = var_path :: acc.bounded_vars } - else acc - in - if - is_recursive toplevel_item - && Location_aux.included location ~into:toplevel_item.loc - then { acc with gen_binding_kind = Rec_and } - else acc - | _ -> acc) + if is_value_unbound var_path then + match toplevel_item.kind with + | Let bindings + when is_recursive toplevel_item + && is_one_of_value_decl var_path bindings -> + { acc with binding_kind = Rec_and } + | _ -> { acc with bounded_vars = var_path :: acc.bounded_vars } + else acc) let extract_to_toplevel { expr; @@ -284,14 +290,13 @@ let extract_const_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = call_need_parenthesis = false } -let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item - ~local_defs ~mconfig = +let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = let is_function = function | { Typedtree.exp_desc = Texp_function _; _ } -> true | _ -> false in - let { bounded_vars; gen_binding_kind } = - analyze_expr expr expr_env ~toplevel_item ~local_defs ~mconfig + let { bounded_vars; binding_kind } = + analyze_expr expr expr_env ~toplevel_item in let generated_binding, generated_call = match bounded_vars with @@ -312,7 +317,7 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item expr_env; toplevel_item; name; - gen_binding_kind; + gen_binding_kind = binding_kind; generated_binding; generated_call; call_need_parenthesis = true @@ -361,17 +366,22 @@ let most_inclusive_expr ~start ~stop nodes = let find_associated_toplevel_item expr structure = Stdlib.List.find_map - (fun { Typedtree.str_desc; str_loc; _ } -> + (fun { Typedtree.str_desc; str_loc; str_env } -> match str_desc with - | Tstr_value (rec_flag, _) + | Tstr_value (rec_flag, vb) when Location_aux.included expr.Typedtree.exp_loc ~into:str_loc -> - Some { rec_flag; loc = str_loc } + Some { rec_flag; env = str_env; loc = str_loc; kind = Let vb } | Tstr_class cs -> Stdlib.List.find_map (fun (class_decl, _) -> let loc = class_decl.Typedtree.ci_loc in if Location_aux.included expr.exp_loc ~into:loc then - Some { rec_flag = Nonrecursive; loc } + Some + { rec_flag = Nonrecursive; + env = str_env; + loc; + kind = Class_decl + } else None) cs | _ -> None) @@ -380,6 +390,7 @@ let find_associated_toplevel_item expr structure = let extract_region ~start ~stop enclosing structure = let open Option.Infix in most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> + (* si contenu de l'expr contient une expression local alors inextrayable *) find_associated_toplevel_item expr structure >>| fun toplevel_item -> (expr, expr_env, toplevel_item) @@ -388,7 +399,7 @@ let is_region_extractable ~start ~stop enclosing structure = | None -> false | Some _ -> true -let substitute ~start ~stop ?extract_name mconfig buffer typedtree = +let substitute ~start ~stop ?extract_name buffer typedtree = match typedtree with | `Interface _ -> raise Not_allowed_in_interface_file | `Implementation structure -> begin @@ -406,6 +417,6 @@ let substitute ~start ~stop ?extract_name mconfig buffer typedtree = ~toplevel_item | _ -> extract_expr_to_toplevel ?extract_name expr buffer ~expr_env - ~toplevel_item ~local_defs:typedtree ~mconfig + ~toplevel_item end end diff --git a/src/analysis/refactor_extract_region.mli b/src/analysis/refactor_extract_region.mli index 4223d7502c..e9d24cc8db 100644 --- a/src/analysis/refactor_extract_region.mli +++ b/src/analysis/refactor_extract_region.mli @@ -93,7 +93,7 @@ val is_region_extractable : Typedtree.structure -> bool -(** [substitute ~start ~stop ~extract_name config buffer typedtree] tries to +(** [substitute ~start ~stop ~extract_name buffer typedtree] tries to extract the most inclusive expression located in interval [start-stop] into a fresh toplevel generated let binding. @@ -112,7 +112,6 @@ val substitute : start:Lexing.position -> stop:Lexing.position -> ?extract_name:string -> - Mconfig.t -> Msource.t -> Mtyper.typedtree -> Query_protocol.substitution_result diff --git a/src/frontend/query_commands.ml b/src/frontend/query_commands.ml index e037726301..ccbd5f7068 100644 --- a/src/frontend/query_commands.ml +++ b/src/frontend/query_commands.ml @@ -815,11 +815,10 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = function | Refactor_extract_region (start, stop, extract_name, buffer) -> let start = Mpipeline.get_lexing_pos pipeline start and stop = Mpipeline.get_lexing_pos pipeline stop in - let config = Mpipeline.final_config pipeline in let typer_result = Mpipeline.typer_result pipeline |> Mtyper.get_typedtree in - Refactor_extract_region.substitute ~start ~stop ?extract_name config buffer + Refactor_extract_region.substitute ~start ~stop ?extract_name buffer typer_result | Signature_help { position; _ } -> ( (* Todo: additionnal contextual information could help us provide better diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index b94d70df2c..bd7ea7c91d 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -344,7 +344,8 @@ "line": 81, "col": 5 }, - "content": "let rec f = List.map Fun.id + "content": "let fun_name2 (x) = x + let rec f = List.map Fun.id and y = [ 10; 20; 30 ] @@ -352,16 +353,15 @@ object method x = (fun_name2 x) method y = y - end - and fun_name2 (x) = x", + end", "selection-range": { "start": { - "line": 82, - "col": 4 + "line": 73, + "col": 8 }, "end": { - "line": 82, - "col": 13 + "line": 73, + "col": 17 } } }, From 195ed6df402726430203963c2ca055ba4a8f7b26 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Tue, 22 Jul 2025 10:37:17 +0200 Subject: [PATCH 12/31] Remove path of value living in a module inside extracted expression. --- src/analysis/refactor_extract_region.ml | 31 +++++++++++++--- .../func-extraction.t/func.ml | 13 +++++++ .../func-extraction.t/run.t | 36 +++++++++++++++++++ 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 05840de88f..a55aa8c8f4 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -145,7 +145,7 @@ let rec find_pattern_var : type a. a Typedtree.general_pattern -> Path.t list = | Tpat_or (l, r, _) -> find_pattern_var l @ find_pattern_var r | _ -> [] -let rec occuring_vars node = +let rec occuring_vars_path node = let loop acc node = match node.Browse_tree.t_node with | Browse_raw.Expression { exp_desc = Texp_ident (path, _, _); _ } -> @@ -153,7 +153,7 @@ let rec occuring_vars node = | Pattern pat -> find_pattern_var pat @ acc | _ -> Lazy.force node.t_children - |> List.concat_map ~f:occuring_vars + |> List.concat_map ~f:occuring_vars_path |> List.append acc in loop [] node |> Path.Set.of_list |> Path.Set.elements |> List.rev @@ -177,7 +177,7 @@ let analyze_expr expr expr_env ~toplevel_item = bindings in Browse_tree.of_node ~env:expr_env (Browse_raw.Expression expr) - |> occuring_vars + |> occuring_vars_path |> List.fold_left ~init:{ bounded_vars = []; binding_kind = Non_recursive } ~f:(fun acc var_path -> if is_value_unbound var_path then @@ -295,9 +295,23 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | { Typedtree.exp_desc = Texp_function _; _ } -> true | _ -> false in + let is_module_bound path = + try + let _ = Env.find_module path toplevel_item.env in + false + with Not_found -> true + in let { bounded_vars; binding_kind } = analyze_expr expr expr_env ~toplevel_item in + let bounded_vars_stamp = + List.map ~f:(fun p -> Path.head p |> Ident.stamp) bounded_vars + in + let is_bound_var path = + List.exists + ~f:(Int.equal (Path.head path |> Ident.stamp)) + bounded_vars_stamp + in let generated_binding, generated_call = match bounded_vars with | [] when not (is_function expr) -> @@ -312,8 +326,17 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | None -> Default { basename = "fun_name" } | Some name -> Fixed name in + let remove_path_prefix mapper expr = + match expr.Typedtree.exp_desc with + | Texp_ident (Pdot (path, val_name), longident, vd) + when is_bound_var path && is_module_bound path -> + let ident = { longident with txt = Longident.Lident val_name } in + { expr with exp_desc = Texp_ident (path, ident, vd) } + | _ -> Tast_mapper.default.expr mapper expr + in + let mapper = { Tast_mapper.default with expr = remove_path_prefix } in extract_to_toplevel - { expr; + { expr = mapper.expr mapper expr; expr_env; toplevel_item; name; diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index 804b5e9675..480253a2d8 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -131,3 +131,16 @@ let pp_document ppf doc = | Text txt -> pp_print_string ppf txt | Bold txt -> pp_print_string ppf (bold_tag ^ txt ^ bold_tag))) doc + +module A = struct + let a = 10 +end +let f x = + let module Empty = struct end in + let module M = struct + module MM = struct + let y = 0 + end + let z = 0 + end in + x * M.z * M.MM.y + A.a diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index bd7ea7c91d..d05942a712 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -727,3 +727,39 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 146:2 -end 146:25 -extract-name add < func.ml + { + "class": "return", + "value": { + "start": { + "line": 138, + "col": 0 + }, + "end": { + "line": 146, + "col": 24 + }, + "content": "let add (x) (z) (y) = ((x * z) * y) + A.a + let f x = + let module Empty = struct end in + let module M = struct + module MM = struct + let y = 0 + end + let z = 0 + end in + (add x M.z M.MM.y)", + "selection-range": { + "start": { + "line": 138, + "col": 4 + }, + "end": { + "line": 138, + "col": 7 + } + } + }, + "notifications": [] + } From af58896492b2d8ffecad907d1f4a1e7bb0be3283 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Thu, 24 Jul 2025 16:02:46 +0200 Subject: [PATCH 13/31] Make extraction works in submodule. --- src/analysis/refactor_extract_region.ml | 55 +++++++++------- src/analysis/refactor_extract_region.mli | 6 +- .../func-extraction.t/func.ml | 14 +++- .../func-extraction.t/run.t | 65 +++++++++++++++++++ 4 files changed, 109 insertions(+), 31 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index a55aa8c8f4..ba05a61d4d 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -370,6 +370,7 @@ let most_inclusive_expr ~start ~stop nodes = select_among_child node.Browse_tree.t_env node.t_node) in let node_loc = Mbrowse.node_loc node in + match node with | Expression expr when node_loc.loc_ghost = false && is_inside_region node_loc -> @@ -387,38 +388,42 @@ let most_inclusive_expr ~start ~stop nodes = the body of a method. *) (remove_poly expr, env)) -let find_associated_toplevel_item expr structure = +let find_associated_toplevel_item expr enclosing = Stdlib.List.find_map - (fun { Typedtree.str_desc; str_loc; str_env } -> - match str_desc with - | Tstr_value (rec_flag, vb) - when Location_aux.included expr.Typedtree.exp_loc ~into:str_loc -> - Some { rec_flag; env = str_env; loc = str_loc; kind = Let vb } - | Tstr_class cs -> - Stdlib.List.find_map - (fun (class_decl, _) -> - let loc = class_decl.Typedtree.ci_loc in - if Location_aux.included expr.exp_loc ~into:loc then - Some - { rec_flag = Nonrecursive; - env = str_env; - loc; - kind = Class_decl - } - else None) - cs + (fun (_, item) -> + match item with + | Browse_raw.Structure_item ({ str_desc; str_loc; str_env }, _) -> begin + match str_desc with + | Tstr_value (rec_flag, vb) + when Location_aux.included expr.Typedtree.exp_loc ~into:str_loc -> + Some { rec_flag; env = str_env; loc = str_loc; kind = Let vb } + | Tstr_class cs -> + Stdlib.List.find_map + (fun (class_decl, _) -> + let loc = class_decl.Typedtree.ci_loc in + if Location_aux.included expr.exp_loc ~into:loc then + Some + { rec_flag = Nonrecursive; + env = str_env; + loc; + kind = Class_decl + } + else None) + cs + | _ -> None + end | _ -> None) - structure.Typedtree.str_items + enclosing -let extract_region ~start ~stop enclosing structure = +let extract_region ~start ~stop enclosing = let open Option.Infix in most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> (* si contenu de l'expr contient une expression local alors inextrayable *) - find_associated_toplevel_item expr structure >>| fun toplevel_item -> + find_associated_toplevel_item expr enclosing >>| fun toplevel_item -> (expr, expr_env, toplevel_item) -let is_region_extractable ~start ~stop enclosing structure = - match extract_region ~start ~stop enclosing structure with +let is_region_extractable ~start ~stop enclosing = + match extract_region ~start ~stop enclosing with | None -> false | Some _ -> true @@ -429,7 +434,7 @@ let substitute ~start ~stop ?extract_name buffer typedtree = let enclosing = Mbrowse.enclosing start [ Mbrowse.of_structure structure ] in - match extract_region ~start ~stop enclosing structure with + match extract_region ~start ~stop enclosing with | None -> raise Nothing_to_do | Some (expr, expr_env, toplevel_item) -> begin match expr.exp_desc with diff --git a/src/analysis/refactor_extract_region.mli b/src/analysis/refactor_extract_region.mli index e9d24cc8db..311b49301c 100644 --- a/src/analysis/refactor_extract_region.mli +++ b/src/analysis/refactor_extract_region.mli @@ -87,11 +87,7 @@ exception Not_allowed_in_interface_file (** Is an expression is extractable in the given region? *) val is_region_extractable : - start:Lexing.position -> - stop:Lexing.position -> - (Env.t * Browse_raw.node) list -> - Typedtree.structure -> - bool + start:Lexing.position -> stop:Lexing.position -> Mbrowse.t -> bool (** [substitute ~start ~stop ~extract_name buffer typedtree] tries to extract the most inclusive expression located in interval [start-stop] into diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index 480253a2d8..5721ce1d8b 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -143,4 +143,16 @@ let f x = end let z = 0 end in - x * M.z * M.MM.y + A.a + (x * M.z * M.MM.y) + A.a + +module T = struct + let on_list x = x + 1 + let k : (int, int) Result.t = Ok 10 + let r = Ok 10 + + let x = + let a_list = List.map on_list [ 1; 2; 3 ] in + let open Format in + let printer = pp_print_list pp_print_int in + printf "%a\n" printer a_list +end diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index d05942a712..30776d5fac 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -763,3 +763,68 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 154:17 -end 154:45 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 153, + "col": 2 + }, + "end": { + "line": 157, + "col": 32 + }, + "content": "let fun_name2 () = List.map on_list [1; 2; 3] + let x = + let a_list = (fun_name2 ()) in + let open Format in + let printer = pp_print_list pp_print_int in + printf \"%a\ + \" printer a_list", + "selection-range": { + "start": { + "line": 153, + "col": 4 + }, + "end": { + "line": 153, + "col": 13 + } + } + }, + "notifications": [] + } + $ $MERLIN single refactoring-extract-region -start 154:18 -end 154:44 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 153, + "col": 2 + }, + "end": { + "line": 157, + "col": 32 + }, + "content": "let fun_name2 () = on_list + let x = + let a_list = List.map (fun_name2 ()) [ 1; 2; 3 ] in + let open Format in + let printer = pp_print_list pp_print_int in + printf \"%a\ + \" printer a_list", + "selection-range": { + "start": { + "line": 153, + "col": 4 + }, + "end": { + "line": 153, + "col": 13 + } + } + }, + "notifications": [] + } From b064a8b3db6c05820aabcf404bbe0f2531fa0c4d Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Fri, 25 Jul 2025 14:13:12 +0200 Subject: [PATCH 14/31] Clean up expression before printing. --- src/analysis/refactor_extract_region.ml | 89 ++++++++++++------- .../func-extraction.t/run.t | 39 ++++++++ 2 files changed, 97 insertions(+), 31 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index ba05a61d4d..85aea9a40c 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -22,6 +22,39 @@ module Fresh_name = struct loop 1 end +let clean_up_for_printing expr = + let mapper = + { Ast_mapper.default_mapper with + expr = + (fun mapper expr -> + match expr.pexp_desc with + | Pexp_construct + ( ident, + Some + { pexp_desc = + Pexp_tuple + (_ + :: ({ pexp_desc = + Pexp_constant + { pconst_desc = Pconst_string _; _ }; + _ + } as const) + :: _); + _ + } ) + when Longident.head ident.txt = "CamlinternalFormatBasics" -> + (* We need to retransform format specification which has been desugared into string. *) + const + | Pexp_poly (expr, _) -> + (* We also have to remove poly extra that cause unexpected "!poly!" + to be printed in generated code. This happens when you try + to extract the body of a method. *) + expr + | _ -> Ast_mapper.default_mapper.expr mapper expr) + } + in + mapper.expr mapper expr |> Parsetree_utils.expr_remove_merlin_attributes + module Gen = struct let unit = Longident.Lident "()" |> Location.mknoloc @@ -29,7 +62,7 @@ module Gen = struct let toplevel_let ~name ~body = let open Ast_helper in let pattern = Pat.mk (Ppat_var { txt = name; loc = Location.none }) in - let body = Parsetree_utils.expr_remove_merlin_attributes body in + let body = clean_up_for_printing body in Str.value Nonrecursive [ Vb.mk pattern body ] (* Generates [let name () = body]. *) @@ -67,7 +100,9 @@ module Gen = struct let fun_apply params ~name = let open Ast_helper in - let params = List.map ~f:(fun p -> (Asttypes.Nolabel, p)) params in + let params = + List.map ~f:(fun p -> (Asttypes.Nolabel, clean_up_for_printing p)) params + in Exp.apply (ident ~name) params let fun_apply_unit = fun_apply [ Ast_helper.Exp.ident unit ] @@ -157,7 +192,9 @@ let rec occuring_vars_path node = |> List.append acc in loop [] node |> Path.Set.of_list |> Path.Set.elements |> List.rev - |> List.filter ~f:(fun path -> Ident.name (Path.head path) <> "Stdlib") + |> List.filter ~f:(fun path -> + (* TODO: fix this *) + Ident.name (Path.head path) <> "Stdlib") let analyze_expr expr expr_env ~toplevel_item = let is_value_unbound path = @@ -234,18 +271,18 @@ let extract_to_toplevel fresh_call |> Msource.text in - let untyped_expr = Untypeast.untype_expression expr in + let expr = Untypeast.untype_expression expr in let content = match gen_binding_kind with | Non_recursive -> let fresh_let_binding = - generated_binding ~name:val_name ~body:untyped_expr + generated_binding ~name:val_name ~body:expr |> Format.asprintf "%a" Pprintast.structure_item in fresh_let_binding ^ "\n" ^ substitued_toplevel_binding | Rec_and -> let fresh_let_binding = - generated_binding ~name:val_name ~body:untyped_expr + generated_binding ~name:val_name ~body:expr |> Format.asprintf "%a" Pprintast.structure_item in let fresh_and_binding = @@ -326,17 +363,23 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | None -> Default { basename = "fun_name" } | Some name -> Fixed name in - let remove_path_prefix mapper expr = - match expr.Typedtree.exp_desc with - | Texp_ident (Pdot (path, val_name), longident, vd) - when is_bound_var path && is_module_bound path -> - let ident = { longident with txt = Longident.Lident val_name } in - { expr with exp_desc = Texp_ident (path, ident, vd) } - | _ -> Tast_mapper.default.expr mapper expr + let remove_path_prefix expr = + let mapper = + { Tast_mapper.default with + expr = + (fun mapper expr -> + match expr.Typedtree.exp_desc with + | Texp_ident (Pdot (path, val_name), longident, vd) + when is_bound_var path && is_module_bound path -> + let ident = { longident with txt = Longident.Lident val_name } in + { expr with exp_desc = Texp_ident (path, ident, vd) } + | _ -> Tast_mapper.default.expr mapper expr) + } + in + mapper.expr mapper expr in - let mapper = { Tast_mapper.default with expr = remove_path_prefix } in extract_to_toplevel - { expr = mapper.expr mapper expr; + { expr = remove_path_prefix expr; expr_env; toplevel_item; name; @@ -346,17 +389,6 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = call_need_parenthesis = true } -let remove_poly expr = - let open Typedtree in - { expr with - exp_extra = - List.filter - ~f:(function - | Texp_poly _, _, _ -> false - | _ -> true) - expr.exp_extra - } - let most_inclusive_expr ~start ~stop nodes = let is_inside_region = Location_aux.included @@ -382,11 +414,6 @@ let most_inclusive_expr ~start ~stop nodes = in nodes |> List.rev |> Stdlib.List.find_map (fun (env, node) -> select_among_child env node) - |> Option.map ~f:(fun (expr, env) -> - (* We also have to remove poly extra that cause unexpected "!poly!" - to be printed in generated code. This happens when you try to extract - the body of a method. *) - (remove_poly expr, env)) let find_associated_toplevel_item expr enclosing = Stdlib.List.find_map diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index 30776d5fac..0e26a1ba50 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -796,6 +796,7 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + $ $MERLIN single refactoring-extract-region -start 154:18 -end 154:44 < func.ml { "class": "return", @@ -828,3 +829,41 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 128:2 -end 133:7 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 125, + "col": 0 + }, + "end": { + "line": 133, + "col": 7 + }, + "content": "let fun_name2 (ppf) (doc) (bold_tag) = + fprintf ppf \"%a\" + (pp_print_list ?pp_sep:None + (fun ppf markup -> + match markup with + | Text txt -> pp_print_string ppf txt + | Bold txt -> pp_print_string ppf (bold_tag ^ (txt ^ bold_tag)))) + doc + let pp_document ppf doc = + let open Format in + let bold_tag = \"**\" in + (fun_name2 ppf doc bold_tag)", + "selection-range": { + "start": { + "line": 125, + "col": 4 + }, + "end": { + "line": 125, + "col": 13 + } + } + }, + "notifications": [] + } From f3db9271246d3b48c442561a5d48da014f33feb9 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Fri, 25 Jul 2025 14:55:11 +0200 Subject: [PATCH 15/31] Generate a suffix when a name is given and already used in current scope. --- src/analysis/refactor_extract_region.ml | 12 +++++++----- .../refactor-extract-region/func-extraction.t/run.t | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 85aea9a40c..b37324c4da 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -226,6 +226,12 @@ let analyze_expr expr expr_env ~toplevel_item = | _ -> { acc with bounded_vars = var_path :: acc.bounded_vars } else acc) +let choose_name name env = + match name with + | Default { basename } -> Fresh_name.gen_val_name basename env + | Fixed name -> + if Env.bound_value name env then Fresh_name.gen_val_name name env else name + let extract_to_toplevel { expr; expr_env; @@ -236,11 +242,7 @@ let extract_to_toplevel toplevel_item; call_need_parenthesis } buffer = - let val_name = - match name with - | Default { basename } -> Fresh_name.gen_val_name basename expr_env - | Fixed name -> name - in + let val_name = choose_name name expr_env in let fresh_call = let parenthised_opt s = if call_need_parenthesis then "(" ^ s ^ ")" else s diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index 0e26a1ba50..f1ed42957e 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -671,11 +671,11 @@ TODO: This extraction shouldn't be allowed. "line": 120, "col": 11 }, - "content": "let z (x) (y) = x + y + "content": "let z1 (x) (y) = x + y let f = let x = 1 in let y = 2 in - let z = (z x y) in + let z = (z1 x y) in z + z + 1", "selection-range": { "start": { @@ -684,7 +684,7 @@ TODO: This extraction shouldn't be allowed. }, "end": { "line": 116, - "col": 5 + "col": 6 } } }, From 7551d2f6f8a31fc33a4fe375c5a344e0728f1a37 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Fri, 25 Jul 2025 15:00:11 +0200 Subject: [PATCH 16/31] Fix a test. --- .../test-dirs/refactor-extract-region/func-extraction.t/run.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index f1ed42957e..c547e6eca1 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -728,7 +728,7 @@ TODO: This extraction shouldn't be allowed. "notifications": [] } - $ $MERLIN single refactoring-extract-region -start 146:2 -end 146:25 -extract-name add < func.ml + $ $MERLIN single refactoring-extract-region -start 146:2 -end 146:26 -extract-name add < func.ml { "class": "return", "value": { @@ -738,7 +738,7 @@ TODO: This extraction shouldn't be allowed. }, "end": { "line": 146, - "col": 24 + "col": 26 }, "content": "let add (x) (z) (y) = ((x * z) * y) + A.a let f x = From d1d3c1130887c713931cac034f7f636f7300ed2c Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Mon, 8 Sep 2025 16:05:33 +0200 Subject: [PATCH 17/31] Take Ulysse's comments into account. --- src/analysis/refactor_extract_region.ml | 125 ++++++++++++----------- src/analysis/refactor_extract_region.mli | 18 ++-- src/commands/new_commands.ml | 4 +- src/commands/query_json.ml | 2 +- src/frontend/query_commands.ml | 16 +-- src/frontend/query_protocol.ml | 2 +- 6 files changed, 89 insertions(+), 78 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index b37324c4da..2b2cb8a543 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -105,15 +105,18 @@ module Gen = struct in Exp.apply (ident ~name) params + (* [fun_apply_unit ~name] generates a call to the function named [name] to which we apply unit. *) let fun_apply_unit = fun_apply [ Ast_helper.Exp.ident unit ] + (* [fun_apply_params params ~name] generates a call to the function named [name] + to which we apply the list of arguments [params]. *) let fun_apply_params params = params |> List.map ~f:(fun param -> ident ~name:(Path.name param)) |> fun_apply end -let source_sub_loc src loc = +let extract_source_around_loc src loc = let (`Offset start_offset) = let line, col = Lexing.split_pos loc.Location.loc_start in Msource.get_offset src (`Logical (line, col)) @@ -136,8 +139,8 @@ type extraction = (** The value binding toplevel or class declaration enclosing the extracted expression. *) name : extraction_name; (** Binding name of the extracted expression. *) gen_binding_kind : rec_flag; - generated_binding : generated_binding; - generated_call : generated_call; + binding_generator : binding_generator; + call_generator : call_generator; call_need_parenthesis : bool (** Sometime we must parenthised call in order to type check. *) } @@ -154,10 +157,10 @@ and toplevel_item = and toplevel_item_kind = Let of Typedtree.value_binding list | Class_decl -and generated_binding = +and binding_generator = name:string -> body:Parsetree.expression -> Parsetree.structure_item -and generated_call = name:string -> Parsetree.expression +and call_generator = name:string -> Parsetree.expression let is_recursive = function | { rec_flag = Asttypes.Recursive; _ } -> true @@ -237,8 +240,8 @@ let extract_to_toplevel expr_env; name; gen_binding_kind; - generated_binding; - generated_call; + binding_generator; + call_generator; toplevel_item; call_need_parenthesis } buffer = @@ -247,11 +250,13 @@ let extract_to_toplevel let parenthised_opt s = if call_need_parenthesis then "(" ^ s ^ ")" else s in - generated_call ~name:val_name + call_generator ~name:val_name |> Format.asprintf "%a" Pprintast.expression |> parenthised_opt in - let toplevel_item_span = source_sub_loc buffer toplevel_item.loc in + let toplevel_item_source = + extract_source_around_loc buffer toplevel_item.loc + in let subst_loc = let start_lnum = 1 + expr.exp_loc.Location.loc_start.pos_lnum @@ -266,8 +271,8 @@ let extract_to_toplevel loc_end = { expr.exp_loc.loc_end with pos_lnum = end_lnum } } in - let substitued_toplevel_binding = - Msource.substitute toplevel_item_span + let substituted_binding = + Msource.substitute toplevel_item_source (`Logical (Lexing.split_pos subst_loc.loc_start)) (`Logical (Lexing.split_pos subst_loc.loc_end)) fresh_call @@ -278,19 +283,19 @@ let extract_to_toplevel match gen_binding_kind with | Non_recursive -> let fresh_let_binding = - generated_binding ~name:val_name ~body:expr + binding_generator ~name:val_name ~body:expr |> Format.asprintf "%a" Pprintast.structure_item in - fresh_let_binding ^ "\n" ^ substitued_toplevel_binding + fresh_let_binding ^ "\n" ^ substituted_binding | Rec_and -> let fresh_let_binding = - generated_binding ~name:val_name ~body:expr + binding_generator ~name:val_name ~body:expr |> Format.asprintf "%a" Pprintast.structure_item in let fresh_and_binding = "and" ^ String.drop 3 fresh_let_binding (* Sorry *) in - substitued_toplevel_binding ^ "\n" ^ fresh_and_binding + substituted_binding ^ "\n" ^ fresh_and_binding in let selection_range = let lnum = @@ -324,8 +329,8 @@ let extract_const_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = toplevel_item; name; gen_binding_kind = Non_recursive; - generated_binding = Gen.toplevel_let; - generated_call = Gen.ident; + binding_generator = Gen.toplevel_let; + call_generator = Gen.ident; call_need_parenthesis = false } @@ -334,7 +339,7 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | { Typedtree.exp_desc = Texp_function _; _ } -> true | _ -> false in - let is_module_bound path = + let is_module_bound_in_toplevel_env path = try let _ = Env.find_module path toplevel_item.env in false @@ -346,12 +351,10 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = let bounded_vars_stamp = List.map ~f:(fun p -> Path.head p |> Ident.stamp) bounded_vars in - let is_bound_var path = - List.exists - ~f:(Int.equal (Path.head path |> Ident.stamp)) - bounded_vars_stamp + let is_bound_var ident = + List.exists ~f:(Int.equal (Ident.stamp ident)) bounded_vars_stamp in - let generated_binding, generated_call = + let binding_generator, call_generator = match bounded_vars with | [] when not (is_function expr) -> (* If the extracted expr is already a function, no need to delayed computation @@ -365,15 +368,25 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | None -> Default { basename = "fun_name" } | Some name -> Fixed name in - let remove_path_prefix expr = + let remove_path_prefix_of_bound_values expr = + (* We need to unquality bound values. Otherwise, the generated call will use + the qualified name even if it does not exist in the scope. Examples: + + let f () = + let module X = struct let x = 10 end in + X.x + ^^^ If we extract this, the corresponding extracted call will be: + + let fun_name1 x = X.x *) let mapper = { Tast_mapper.default with expr = (fun mapper expr -> match expr.Typedtree.exp_desc with - | Texp_ident (Pdot (path, val_name), longident, vd) - when is_bound_var path && is_module_bound path -> - let ident = { longident with txt = Longident.Lident val_name } in + | Texp_ident (Pdot (path, name), longident, vd) + when is_bound_var (Path.head path) + && is_module_bound_in_toplevel_env path -> + let ident = { longident with txt = Longident.Lident name } in { expr with exp_desc = Texp_ident (path, ident, vd) } | _ -> Tast_mapper.default.expr mapper expr) } @@ -381,30 +394,28 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = mapper.expr mapper expr in extract_to_toplevel - { expr = remove_path_prefix expr; + { expr = remove_path_prefix_of_bound_values expr; expr_env; toplevel_item; name; gen_binding_kind = binding_kind; - generated_binding; - generated_call; + binding_generator; + call_generator; call_need_parenthesis = true } +(* [most_inclusive_expr ~start ~stop nodes] tries to find the most inclusive expression + within the range [start]-[stop] among [nodes]. + + [nodes] is a list of enclosings around the start position from the deepest + to the topelevel. It's reversed searched for an expression that fits the range. *) let most_inclusive_expr ~start ~stop nodes = let is_inside_region = Location_aux.included ~into:{ Location.loc_start = start; loc_end = stop; loc_ghost = true } in let rec select_among_child env node = - let select_deeper node env = - let node = Browse_tree.of_node ~env node in - Lazy.force node.t_children |> List.rev - |> Stdlib.List.find_map (fun node -> - select_among_child node.Browse_tree.t_env node.t_node) - in let node_loc = Mbrowse.node_loc node in - match node with | Expression expr when node_loc.loc_ghost = false && is_inside_region node_loc -> @@ -412,7 +423,12 @@ let most_inclusive_expr ~start ~stop nodes = such as [let f x = 10 + x] can be extracted and this can lead to invalid code gen. ^^^^^^^^^^ *) Some (expr, env) - | _ -> select_deeper node env + | _ -> + (* Continue to browse through the child of [node]. *) + let node = Browse_tree.of_node ~env node in + Lazy.force node.t_children |> List.rev + |> Stdlib.List.find_map (fun node -> + select_among_child node.Browse_tree.t_env node.t_node) in nodes |> List.rev |> Stdlib.List.find_map (fun (env, node) -> select_among_child env node) @@ -447,7 +463,6 @@ let find_associated_toplevel_item expr enclosing = let extract_region ~start ~stop enclosing = let open Option.Infix in most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> - (* si contenu de l'expr contient une expression local alors inextrayable *) find_associated_toplevel_item expr enclosing >>| fun toplevel_item -> (expr, expr_env, toplevel_item) @@ -456,24 +471,18 @@ let is_region_extractable ~start ~stop enclosing = | None -> false | Some _ -> true -let substitute ~start ~stop ?extract_name buffer typedtree = - match typedtree with - | `Interface _ -> raise Not_allowed_in_interface_file - | `Implementation structure -> begin - let enclosing = - Mbrowse.enclosing start [ Mbrowse.of_structure structure ] - in - match extract_region ~start ~stop enclosing with - | None -> raise Nothing_to_do - | Some (expr, expr_env, toplevel_item) -> begin - match expr.exp_desc with - | Texp_constant _ -> - (* Special case for constant. They can't produce side effect so it's not +let substitute ~start ~stop ?extract_name buffer structure = + let enclosing = Mbrowse.enclosing start [ Mbrowse.of_structure structure ] in + match extract_region ~start ~stop enclosing with + | None -> raise Nothing_to_do + | Some (expr, expr_env, toplevel_item) -> begin + match expr.exp_desc with + | Texp_constant _ -> + (* Special case for constant. They can't produce side effect so it's not necessary to add a trailing unit parameter to the let binding. *) - extract_const_to_toplevel ?extract_name expr ~expr_env buffer - ~toplevel_item - | _ -> - extract_expr_to_toplevel ?extract_name expr buffer ~expr_env - ~toplevel_item - end + extract_const_to_toplevel ?extract_name expr ~expr_env buffer + ~toplevel_item + | _ -> + extract_expr_to_toplevel ?extract_name expr buffer ~expr_env + ~toplevel_item end diff --git a/src/analysis/refactor_extract_region.mli b/src/analysis/refactor_extract_region.mli index 311b49301c..321ce7e2c9 100644 --- a/src/analysis/refactor_extract_region.mli +++ b/src/analysis/refactor_extract_region.mli @@ -1,6 +1,6 @@ -(** Region extractor allows extracting arbitrary expression into a fresh - toplevel binding. The extractor detects bounded variables inside the - extracted expression and performs code generation acordingly. +(** Region extractor allows extracting arbitrary expressions into a fresh + toplevel binding. The extractor detects bound variables inside the extracted + expression and performs code generation accordingly. For instance, let's assume that we want to extract the pi value oustide of the body of [circle_area]: @@ -53,9 +53,9 @@ expression inside an [and] binding. It also substitutes the expression by a call to the fresh generated function with the correct parameters. - Finally, if there is no bounded variable in the expression, a trailing unit + Finally, if there is no bound variable in the expression, a trailing unit parameter is added to the generated let binding in order to preserve the - evaluation order. Let's extract the entire body of [x]: + evaluation order. Let's extract the entire body of [my_list]: {[ let my_list = print_endline "Wild side effect!"; @@ -63,10 +63,10 @@ ]} {[ - let fun_name1 () = + let my_list () = print_endline "Wild side effect!"; [ 1; 2; 3; 4 ] - let f = fun_name1 () + let f = my_list () ]} Final remarks: @@ -89,7 +89,7 @@ exception Not_allowed_in_interface_file val is_region_extractable : start:Lexing.position -> stop:Lexing.position -> Mbrowse.t -> bool -(** [substitute ~start ~stop ~extract_name buffer typedtree] tries to +(** [substitute ~start ~stop ~extract_name buffer typedtree_structure] tries to extract the most inclusive expression located in interval [start-stop] into a fresh toplevel generated let binding. @@ -109,5 +109,5 @@ val substitute : stop:Lexing.position -> ?extract_name:string -> Msource.t -> - Mtyper.typedtree -> + Typedtree.structure -> Query_protocol.substitution_result diff --git a/src/commands/new_commands.ml b/src/commands/new_commands.ml index 95fd600ec0..7790eae663 100644 --- a/src/commands/new_commands.ml +++ b/src/commands/new_commands.ml @@ -591,10 +591,8 @@ let all_commands = | `None, _, _ -> failwith "-start is mandatory" | _, `None, _ -> failwith "-end is mandatory" | (#Msource.position as start), (#Msource.position as stop), name -> - let raw_source = Mpipeline.raw_source buffer in run buffer - (Query_protocol.Refactor_extract_region - (start, stop, name, raw_source)) + (Query_protocol.Refactor_extract_region (start, stop, name)) end; command "search-by-polarity" ~doc:"search-by-polarity -position pos -query ident\n\tTODO" diff --git a/src/commands/query_json.ml b/src/commands/query_json.ml index 5d56a0085e..cf2e522692 100644 --- a/src/commands/query_json.ml +++ b/src/commands/query_json.ml @@ -211,7 +211,7 @@ let dump (type a) : a t -> json = | `Unqualify -> "unqualify") ); ("position", mk_position pos) ] - | Refactor_extract_region (start, stop, _, _) -> + | Refactor_extract_region (start, stop, _) -> mk "refactoring-extract-region" [ ("start", mk_position start); ("stop", mk_position stop) ] | Signature_help { position; _ } -> diff --git a/src/frontend/query_commands.ml b/src/frontend/query_commands.ml index ccbd5f7068..98732cd69c 100644 --- a/src/frontend/query_commands.ml +++ b/src/frontend/query_commands.ml @@ -812,14 +812,18 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = function Inlay_hints.of_structure ~hint_let_binding ~hint_pattern_binding ~hint_function_params ~avoid_ghost_location ~start ~stop structure end - | Refactor_extract_region (start, stop, extract_name, buffer) -> + | Refactor_extract_region (start, stop, extract_name) -> let start = Mpipeline.get_lexing_pos pipeline start and stop = Mpipeline.get_lexing_pos pipeline stop in - let typer_result = - Mpipeline.typer_result pipeline |> Mtyper.get_typedtree - in - Refactor_extract_region.substitute ~start ~stop ?extract_name buffer - typer_result + let raw_source = Mpipeline.raw_source pipeline in + begin + match Mpipeline.typer_result pipeline |> Mtyper.get_typedtree with + | `Interface _ -> + raise Refactor_extract_region.Not_allowed_in_interface_file + | `Implementation structure -> + Refactor_extract_region.substitute ~start ~stop ?extract_name raw_source + structure + end | Signature_help { position; _ } -> ( (* Todo: additionnal contextual information could help us provide better results.*) diff --git a/src/frontend/query_protocol.ml b/src/frontend/query_protocol.ml index 52f9c29cbf..fadcf8b91a 100644 --- a/src/frontend/query_protocol.ml +++ b/src/frontend/query_protocol.ml @@ -162,7 +162,7 @@ type _ t = [ `Qualify | `Unqualify ] * Msource.position -> (string * Location.t) list t | Refactor_extract_region : - Msource.position * Msource.position * string option * Msource.t + Msource.position * Msource.position * string option -> substitution_result t | Document (* *) : string option * Msource.position From c50af5b368ceaed963466bc4e07c4f1f62c4efaa Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Mon, 8 Sep 2025 16:05:33 +0200 Subject: [PATCH 18/31] Remove function type declaration. --- src/analysis/refactor_extract_region.ml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 2b2cb8a543..1fb23b54f1 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -139,8 +139,9 @@ type extraction = (** The value binding toplevel or class declaration enclosing the extracted expression. *) name : extraction_name; (** Binding name of the extracted expression. *) gen_binding_kind : rec_flag; - binding_generator : binding_generator; - call_generator : call_generator; + binding_generator : + name:string -> body:Parsetree.expression -> Parsetree.structure_item; + call_generator : name:string -> Parsetree.expression; call_need_parenthesis : bool (** Sometime we must parenthised call in order to type check. *) } @@ -157,11 +158,6 @@ and toplevel_item = and toplevel_item_kind = Let of Typedtree.value_binding list | Class_decl -and binding_generator = - name:string -> body:Parsetree.expression -> Parsetree.structure_item - -and call_generator = name:string -> Parsetree.expression - let is_recursive = function | { rec_flag = Asttypes.Recursive; _ } -> true | { rec_flag = Nonrecursive; _ } -> false From 28a421e174e598ce2ba1327545a7bf2b01386e0c Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Mon, 8 Sep 2025 17:06:24 +0200 Subject: [PATCH 19/31] Fix constant extraction nested in a modules. --- src/analysis/refactor_extract_region.ml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 1fb23b54f1..0b3c20d161 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -426,8 +426,7 @@ let most_inclusive_expr ~start ~stop nodes = |> Stdlib.List.find_map (fun node -> select_among_child node.Browse_tree.t_env node.t_node) in - nodes |> List.rev - |> Stdlib.List.find_map (fun (env, node) -> select_among_child env node) + nodes |> Stdlib.List.find_map (fun (env, node) -> select_among_child env node) let find_associated_toplevel_item expr enclosing = Stdlib.List.find_map @@ -458,6 +457,8 @@ let find_associated_toplevel_item expr enclosing = let extract_region ~start ~stop enclosing = let open Option.Infix in + (* We want to traverse [enclosing] in ascending order. *) + let enclosing = List.rev enclosing in most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> find_associated_toplevel_item expr enclosing >>| fun toplevel_item -> (expr, expr_env, toplevel_item) From 7fe97e7f6380518daaa460dd3c0d5b90a7b9f03c Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Tue, 9 Sep 2025 14:14:38 +0200 Subject: [PATCH 20/31] Last fixes. --- src/analysis/refactor_extract_region.ml | 60 ++++++++++++++----------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 0b3c20d161..8147d41e3c 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -179,20 +179,21 @@ let rec find_pattern_var : type a. a Typedtree.general_pattern -> Path.t list = | Tpat_or (l, r, _) -> find_pattern_var l @ find_pattern_var r | _ -> [] -let rec occuring_vars_path node = - let loop acc node = +let occuring_vars_path node = + let rec loop acc node = match node.Browse_tree.t_node with | Browse_raw.Expression { exp_desc = Texp_ident (path, _, _); _ } -> - path :: acc - | Pattern pat -> find_pattern_var pat @ acc - | _ -> - Lazy.force node.t_children - |> List.concat_map ~f:occuring_vars_path - |> List.append acc + Path.Set.add path acc + | Pattern pat -> + let paths = find_pattern_var pat |> List.to_seq in + Path.Set.add_seq paths acc + | _ -> Lazy.force node.t_children |> List.fold_left ~f:loop ~init:acc in - loop [] node |> Path.Set.of_list |> Path.Set.elements |> List.rev - |> List.filter ~f:(fun path -> - (* TODO: fix this *) + loop Path.Set.empty node + |> Path.Set.filter (fun path -> + (* Filter identifier that are in Stdlib to avoid cluttering the list + of generated parameters. + TODO: there probably a more correct way to do this *) Ident.name (Path.head path) <> "Stdlib") let analyze_expr expr expr_env ~toplevel_item = @@ -212,18 +213,25 @@ let analyze_expr expr expr_env ~toplevel_item = Path.Set.mem var_path names) bindings in - Browse_tree.of_node ~env:expr_env (Browse_raw.Expression expr) - |> occuring_vars_path - |> List.fold_left ~init:{ bounded_vars = []; binding_kind = Non_recursive } - ~f:(fun acc var_path -> - if is_value_unbound var_path then - match toplevel_item.kind with - | Let bindings - when is_recursive toplevel_item - && is_one_of_value_decl var_path bindings -> - { acc with binding_kind = Rec_and } - | _ -> { acc with bounded_vars = var_path :: acc.bounded_vars } - else acc) + let vars_path = + Browse_tree.of_node ~env:expr_env (Browse_raw.Expression expr) + |> occuring_vars_path + in + let analysis = + Path.Set.fold + (fun var_path acc -> + if is_value_unbound var_path then + match toplevel_item.kind with + | Let bindings + when is_recursive toplevel_item + && is_one_of_value_decl var_path bindings -> + { acc with binding_kind = Rec_and } + | _ -> { acc with bounded_vars = var_path :: acc.bounded_vars } + else acc) + vars_path + { bounded_vars = []; binding_kind = Non_recursive } + in + { analysis with bounded_vars = List.rev analysis.bounded_vars } let choose_name name env = match name with @@ -400,12 +408,12 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = call_need_parenthesis = true } -(* [most_inclusive_expr ~start ~stop nodes] tries to find the most inclusive expression +(* [largest_expr_between ~start ~stop nodes] tries to find the most inclusive expression within the range [start]-[stop] among [nodes]. [nodes] is a list of enclosings around the start position from the deepest to the topelevel. It's reversed searched for an expression that fits the range. *) -let most_inclusive_expr ~start ~stop nodes = +let largest_expr_between ~start ~stop nodes = let is_inside_region = Location_aux.included ~into:{ Location.loc_start = start; loc_end = stop; loc_ghost = true } @@ -459,7 +467,7 @@ let extract_region ~start ~stop enclosing = let open Option.Infix in (* We want to traverse [enclosing] in ascending order. *) let enclosing = List.rev enclosing in - most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> + largest_expr_between ~start ~stop enclosing >>= fun (expr, expr_env) -> find_associated_toplevel_item expr enclosing >>| fun toplevel_item -> (expr, expr_env, toplevel_item) From 32f03f01a1b2cd27a969d206c409972fea6f8ab0 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Tue, 9 Sep 2025 14:26:58 +0200 Subject: [PATCH 21/31] WIP. --- TODO | 13 +++ src/analysis/refactor_extract_region.ml | 89 ++++++++++++++----- src/ocaml/typing/env.mli | 2 +- .../func-extraction.t/func.ml | 11 +++ .../func-extraction.t/run.t | 35 ++++++++ 5 files changed, 127 insertions(+), 23 deletions(-) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000000..cc8c202488 --- /dev/null +++ b/TODO @@ -0,0 +1,13 @@ +- let binding extraction and elimination + if nom donné pas libre alors génréré fresh name + +- interdire extraction exception local + +- method: + - documenter dans mli les extractions de méthode + - si extraction dans classe alors ajouter une méthode + +- renvoyer liste des refactorings possibles à un endroit donné + +- pas avoir de duplicata dans les noms de paramètre + diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index b37324c4da..dfa57a2317 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -14,12 +14,16 @@ let () = module Fresh_name = struct (* Generate a fresh name that does not already exist in given environment. *) - let gen_val_name basename env = + let gen_val_name ~is_bound basename env = let rec loop n = let guess = basename ^ Int.to_string n in - if Env.bound_value guess env then succ n |> loop else guess + if is_bound guess env then succ n |> loop else guess in loop 1 + + let gen_val_name_env = gen_val_name ~is_bound:Env.bound_value + + let gen_val_name_set = gen_val_name ~is_bound:String.Set.mem end let clean_up_for_printing expr = @@ -62,7 +66,7 @@ module Gen = struct let toplevel_let ~name ~body = let open Ast_helper in let pattern = Pat.mk (Ppat_var { txt = name; loc = Location.none }) in - let body = clean_up_for_printing body in + let body = Untypeast.untype_expression body |> clean_up_for_printing in Str.value Nonrecursive [ Vb.mk pattern body ] (* Generates [let name () = body]. *) @@ -79,20 +83,61 @@ module Gen = struct (* Generates [let name params = body]. *) let toplevel_function params ~name ~body = let open Ast_helper in - let params = - List.map - ~f:(fun param -> - let pattern = - Pat.construct - (Location.mknoloc (Longident.Lident (Path.last param))) - None + let used, params = + List.fold_left_map + ~f:(fun already_used param -> + let param_name = Path.last param in + let param_name = + if String.Set.mem param_name already_used then + let other_name = + match Path.flatten param with + | `Contains_apply -> assert false + | `Ok (id, path) -> + Ident.name id :: path + |> List.map ~f:String.lowercase_ascii + |> String.concat ~sep:"_" + in + if String.Set.mem other_name already_used then + Fresh_name.gen_val_name_set other_name already_used + else other_name + else param_name in - { Parsetree.pparam_loc = Location.none; - pparam_desc = Pparam_val (Nolabel, None, pattern) - }) - params + let fun_param = + let pattern = + Pat.construct + (Location.mknoloc (Longident.Lident param_name)) + None + in + { Parsetree.pparam_loc = Location.none; + pparam_desc = Pparam_val (Nolabel, None, pattern) + } + in + (String.Set.add param_name already_used, fun_param)) + ~init:String.Set.empty params + in + (* traverser parsetree et renommer variable qui ont le même stamp en ça *) + (* Parsetree. *) + let _mapper = + { Tast_mapper.default with + expr = + (fun mapper expr -> + match expr.exp_desc with + | Texp_ident _ -> + (* Ident.t *) + expr + (* Longident.flatten *) + | _ -> Tast_mapper.default.expr mapper expr) + } + in + let body = + { Typedtree.exp_desc = Texp_function (params, Tfunction_body body); + exp_loc = Location.none; + exp_extra = []; + exp_type = Types.Transient_expr.create Tnil ~level:0 ~scope:0 ~id:0; + exp_env = Env.empty; + exp_attributes = [] + } in - let body = Exp.function_ params None (Pfunction_body body) in toplevel_let ~name ~body let ident ~name = @@ -101,7 +146,9 @@ module Gen = struct let fun_apply params ~name = let open Ast_helper in let params = - List.map ~f:(fun p -> (Asttypes.Nolabel, clean_up_for_printing p)) params + List.map + ~f:(fun param -> (Asttypes.Nolabel, clean_up_for_printing param)) + params in Exp.apply (ident ~name) params @@ -155,7 +202,7 @@ and toplevel_item = and toplevel_item_kind = Let of Typedtree.value_binding list | Class_decl and generated_binding = - name:string -> body:Parsetree.expression -> Parsetree.structure_item + name:string -> body:Typedtree.expression -> Parsetree.structure_item and generated_call = name:string -> Parsetree.expression @@ -228,9 +275,10 @@ let analyze_expr expr expr_env ~toplevel_item = let choose_name name env = match name with - | Default { basename } -> Fresh_name.gen_val_name basename env + | Default { basename } -> Fresh_name.gen_val_name_env basename env | Fixed name -> - if Env.bound_value name env then Fresh_name.gen_val_name name env else name + if Env.bound_value name env then Fresh_name.gen_val_name_env name env + else name let extract_to_toplevel { expr; @@ -273,7 +321,6 @@ let extract_to_toplevel fresh_call |> Msource.text in - let expr = Untypeast.untype_expression expr in let content = match gen_binding_kind with | Non_recursive -> @@ -404,7 +451,6 @@ let most_inclusive_expr ~start ~stop nodes = select_among_child node.Browse_tree.t_env node.t_node) in let node_loc = Mbrowse.node_loc node in - match node with | Expression expr when node_loc.loc_ghost = false && is_inside_region node_loc -> @@ -447,7 +493,6 @@ let find_associated_toplevel_item expr enclosing = let extract_region ~start ~stop enclosing = let open Option.Infix in most_inclusive_expr ~start ~stop enclosing >>= fun (expr, expr_env) -> - (* si contenu de l'expr contient une expression local alors inextrayable *) find_associated_toplevel_item expr enclosing >>| fun toplevel_item -> (expr, expr_env, toplevel_item) diff --git a/src/ocaml/typing/env.mli b/src/ocaml/typing/env.mli index 6866dabe38..3d8cef4302 100644 --- a/src/ocaml/typing/env.mli +++ b/src/ocaml/typing/env.mli @@ -355,7 +355,7 @@ val open_pers_signature: string -> t -> (t, [`Not_found]) result val remove_last_open: Path.t -> t -> t option (* Insertion by name *) - +(* *) val enter_value: ?check:(string -> Warnings.t) -> string -> value_description -> t -> Ident.t * t diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index 5721ce1d8b..26c5aea976 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -156,3 +156,14 @@ module T = struct let printer = pp_print_list pp_print_int in printf "%a\n" printer a_list end + +let z = 100 + +let complicated_function x y = + let a = 10 in + let b = 11 in + let c = 12 in + let module D = struct + let x = 13 + end in + a + b + (c * x * y) + z + D.x diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index c547e6eca1..581a37efaa 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -867,3 +867,38 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 169:2 -end 169:31 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 162, + "col": 0 + }, + "end": { + "line": 169, + "col": 31 + }, + "content": "let fun_name2 (x) (y) (a) (b) (c) (d_x) = (((a + b) + ((c * x) * y)) + z) + d_x + let complicated_function x y = + let a = 10 in + let b = 11 in + let c = 12 in + let module D = struct + let x = 13 + end in + (fun_name2 x y a b c D.x)", + "selection-range": { + "start": { + "line": 162, + "col": 4 + }, + "end": { + "line": 162, + "col": 13 + } + } + }, + "notifications": [] + } From daf74d9cb72e4e7af919394a87d6567920a7359b Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Tue, 9 Sep 2025 14:30:14 +0200 Subject: [PATCH 22/31] Reproduce a bug. --- .../func-extraction.t/func.ml | 11 ++++++ .../func-extraction.t/run.t | 35 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index 5721ce1d8b..26c5aea976 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -156,3 +156,14 @@ module T = struct let printer = pp_print_list pp_print_int in printf "%a\n" printer a_list end + +let z = 100 + +let complicated_function x y = + let a = 10 in + let b = 11 in + let c = 12 in + let module D = struct + let x = 13 + end in + a + b + (c * x * y) + z + D.x diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index c547e6eca1..581a37efaa 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -867,3 +867,38 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 169:2 -end 169:31 < func.ml + { + "class": "return", + "value": { + "start": { + "line": 162, + "col": 0 + }, + "end": { + "line": 169, + "col": 31 + }, + "content": "let fun_name2 (x) (y) (a) (b) (c) (d_x) = (((a + b) + ((c * x) * y)) + z) + d_x + let complicated_function x y = + let a = 10 in + let b = 11 in + let c = 12 in + let module D = struct + let x = 13 + end in + (fun_name2 x y a b c D.x)", + "selection-range": { + "start": { + "line": 162, + "col": 4 + }, + "end": { + "line": 162, + "col": 13 + } + } + }, + "notifications": [] + } From 3ec6dccc92558072ae19dd7190ef735a30720351 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Thu, 11 Sep 2025 10:40:54 +0200 Subject: [PATCH 23/31] Last fixes. --- src/analysis/refactor_extract_region.ml | 159 ++++++++++++------ .../func-extraction.t/func.ml | 10 ++ .../func-extraction.t/run.t | 34 ++-- 3 files changed, 133 insertions(+), 70 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 4f6db8da53..3252d864a0 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -22,8 +22,6 @@ module Fresh_name = struct loop 1 let gen_val_name_env = gen_val_name ~is_bound:Env.bound_value - - let gen_val_name_set = gen_val_name ~is_bound:String.Set.mem end let clean_up_for_printing expr = @@ -62,12 +60,14 @@ let clean_up_for_printing expr = module Gen = struct let unit = Longident.Lident "()" |> Location.mknoloc - (* Generates [let name = body]. *) - let toplevel_let ~name ~body = + let untyped_toplevel_let ~name ~body = let open Ast_helper in let pattern = Pat.mk (Ppat_var { txt = name; loc = Location.none }) in - let body = Untypeast.untype_expression body |> clean_up_for_printing in - Str.value Nonrecursive [ Vb.mk pattern body ] + Str.value Nonrecursive [ Vb.mk pattern (clean_up_for_printing body) ] + + (* Generates [let name = body]. *) + let toplevel_let ~name ~body = + untyped_toplevel_let ~name ~body:(Untypeast.untype_expression body) (* Generates [let name () = body]. *) let let_unit_toplevel ~name ~body = @@ -77,68 +77,119 @@ module Gen = struct pparam_desc = Pparam_val (Nolabel, None, Pat.construct unit None) } in - let body = Exp.function_ [ unit_param ] None (Pfunction_body body) in - toplevel_let ~name ~body + let body = + Exp.function_ [ unit_param ] None + (Pfunction_body (Untypeast.untype_expression body)) + in + untyped_toplevel_let ~name ~body + + module Id_map = Map.Make (struct + type t = string list + + let compare = List.compare ~cmp:String.compare + end) (* Generates [let name params = body]. *) let toplevel_function params ~name ~body = - let open Ast_helper in - let used, params = + let choose_param_name ~basename ~already_used param_path = + let mem_value s = Id_map.exists (fun _ v -> String.equal s v) in + if mem_value basename already_used then + let other_name = + match Path.flatten param_path with + | `Contains_apply -> assert false + | `Ok (id, path) -> + Ident.name id :: path + |> List.map ~f:String.lowercase_ascii + |> String.concat ~sep:"_" + in + if mem_value other_name already_used then + Fresh_name.gen_val_name ~is_bound:mem_value other_name already_used + else other_name + else basename + in + let rec compute_path = function + | Path.Pident id -> [ `stamp (Ident.stamp id) ] + | Pdot (p, s) -> `string s :: compute_path p + | Pextra_ty (p, _) -> compute_path p + | Papply (p, p') -> compute_path p @ compute_path p' + in + let used_params, params = List.fold_left_map ~f:(fun already_used param -> - let param_name = Path.last param in let param_name = - if String.Set.mem param_name already_used then - let other_name = - match Path.flatten param with - | `Contains_apply -> assert false - | `Ok (id, path) -> - Ident.name id :: path - |> List.map ~f:String.lowercase_ascii - |> String.concat ~sep:"_" - in - if String.Set.mem other_name already_used then - Fresh_name.gen_val_name_set other_name already_used - else other_name - else param_name + choose_param_name ~basename:(Path.last param) ~already_used param + in + let param_pattern = + Ast_helper.Pat.var (Location.mknoloc param_name) in let fun_param = - let pattern = - Pat.construct - (Location.mknoloc (Longident.Lident param_name)) - None - in { Parsetree.pparam_loc = Location.none; - pparam_desc = Pparam_val (Nolabel, None, pattern) + pparam_desc = Pparam_val (Nolabel, None, param_pattern) } in - (String.Set.add param_name already_used, fun_param)) - ~init:String.Set.empty params + let id = + match Path.flatten param with + | `Contains_apply -> assert false + | `Ok (_id, ss) -> ss + in + Format_doc.asprintf "%a | Computed: %S" Path.print param + (compute_path param |> List.rev + |> List.map ~f:(function + | `stamp i -> Int.to_string i + | `string s -> s) + |> String.concat ~sep:".") + |> prerr_endline; + (Id_map.add id param_name already_used, fun_param)) + ~init:Id_map.empty params in - (* traverser parsetree et renommer variable qui ont le même stamp en ça *) - (* Parsetree. *) - let _mapper = - { Tast_mapper.default with - expr = - (fun mapper expr -> - match expr.exp_desc with - | Texp_ident _ -> - (* Ident.t *) - expr - (* Longident.flatten *) - | _ -> Tast_mapper.default.expr mapper expr) - } + + (* Id_map.iter + (fun stamp v -> + Format_doc.asprintf "Stamp: %s | %S" + (stamp |> List.rev + |> List.map ~f:(function + | `stamp i -> Int.to_string i + | `string s -> s) + |> String.concat ~sep:".") + v + |> prerr_endline) + used_params; *) + + (* prendre résultat de Path.flatten et virer le premier ident *) + let foobar expr = + let mapper = + { Tast_mapper.default with + expr = + (fun mapper expr -> + match expr.exp_desc with + | Texp_ident (path, _, vd) -> begin + prerr_endline + @@ (compute_path path |> List.rev + |> List.map ~f:(function + | `stamp i -> Int.to_string i + | `string s -> s) + |> String.concat ~sep:"."); + match Id_map.find_opt ( match Path.flatten path with + | `Contains_apply -> assert false + | `Ok (_id, ss) -> ss) used_params with + | Some new_name -> + { expr with + exp_desc = + Typedtree.Texp_ident + (path, Location.mknoloc (Longident.Lident new_name), vd) + } + | _ -> expr + end + | _ -> Tast_mapper.default.expr mapper expr) + } + in + mapper.expr mapper expr in let body = - { Typedtree.exp_desc = Texp_function (params, Tfunction_body body); - exp_loc = Location.none; - exp_extra = []; - exp_type = Types.Transient_expr.create Tnil ~level:0 ~scope:0 ~id:0; - exp_env = Env.empty; - exp_attributes = [] - } + Ast_helper.Exp.function_ params None + (Parsetree.Pfunction_body (Untypeast.untype_expression (foobar body))) in - toplevel_let ~name ~body + untyped_toplevel_let ~name ~body let ident ~name = Longident.Lident name |> Location.mknoloc |> Ast_helper.Exp.ident @@ -187,7 +238,7 @@ type extraction = name : extraction_name; (** Binding name of the extracted expression. *) gen_binding_kind : rec_flag; binding_generator : - name:string -> body:Parsetree.expression -> Parsetree.structure_item; + name:string -> body:Typedtree.expression -> Parsetree.structure_item; call_generator : name:string -> Parsetree.expression; call_need_parenthesis : bool (** Sometime we must parenthised call in order to type check. *) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index 26c5aea976..ee3de34f7d 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -167,3 +167,13 @@ let complicated_function x y = let x = 13 end in a + b + (c * x * y) + z + D.x + +let f () = + let module D = struct + let x = 42 + end in + let module M = struct + let x = 1 + end in + let a, b, c, x = (1, 2, 3, 4) in + a + b + c + D.x + x + M.(x + a) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index 581a37efaa..c800698967 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -69,7 +69,7 @@ "line": 21, "col": 3 }, - "content": "let fun_name3 (x) (y) = print_endline (x ^ (y ^ z)) + "content": "let fun_name3 x y = print_endline (x ^ (y ^ z)) let test x y = let fun_name2 = Fun.id in let m = @@ -106,7 +106,7 @@ "content": "let rec z x = (fun_name2 x) and y = 80 - and fun_name2 (x) = (10 + y) + x", + and fun_name2 x = (10 + y) + x", "selection-range": { "start": { "line": 33, @@ -273,7 +273,7 @@ "line": 71, "col": 27 }, - "content": "let fun_name3 (x) (fun_name2) = [(+); (-); fun_name2] @ x + "content": "let fun_name3 x fun_name2 = [(+); (-); fun_name2] @ x let func () = let x = [] in Fun.protect @@ -344,7 +344,7 @@ "line": 81, "col": 5 }, - "content": "let fun_name2 (x) = x + "content": "let fun_name2 x = x let rec f = List.map Fun.id and y = [ 10; 20; 30 ] @@ -408,7 +408,7 @@ "line": 85, "col": 21 }, - "content": "let fun_name2 (x) = x * 2 + "content": "let fun_name2 x = x * 2 let f x = (fun_name2 x) + 3", "selection-range": { "start": { @@ -436,7 +436,7 @@ "line": 89, "col": 13 }, - "content": "let fun_name2 (x) (y) = x * y + "content": "let fun_name2 x y = x * y let f x = let y = 0 in (fun_name2 x y) + 3", @@ -497,7 +497,7 @@ TODO: This extraction shouldn't be allowed. "line": 96, "col": 15 }, - "content": "let fun_name2 (x) = x + 1 + "content": "let fun_name2 x = x + 1 let f x = (fun_name2 x)", "selection-range": { "start": { @@ -525,7 +525,7 @@ TODO: This extraction shouldn't be allowed. "line": 100, "col": 15 }, - "content": "let fun_name2 (x) = x + y + "content": "let fun_name2 x = x + y let f x = (fun_name2 x)", "selection-range": { "start": { @@ -553,7 +553,7 @@ TODO: This extraction shouldn't be allowed. "line": 102, "col": 37 }, - "content": "let fun_name2 (x) = List.map (fun y -> y + 1) x + "content": "let fun_name2 x = List.map (fun y -> y + 1) x let f x = (fun_name2 x)", "selection-range": { "start": { @@ -581,7 +581,7 @@ TODO: This extraction shouldn't be allowed. "line": 106, "col": 7 }, - "content": "let fun_name2 (y) = y + 2 + "content": "let fun_name2 y = y + 2 let f y = let y = y + 1 in (fun_name2 y)", @@ -639,7 +639,7 @@ TODO: This extraction shouldn't be allowed. "line": 114, "col": 15 }, - "content": "let fun_name2 (x) (y) = x * y + "content": "let fun_name2 x y = x * y let f x = let module M = struct let y = 0 @@ -671,7 +671,7 @@ TODO: This extraction shouldn't be allowed. "line": 120, "col": 11 }, - "content": "let z1 (x) (y) = x + y + "content": "let z1 x y = x + y let f = let x = 1 in let y = 2 in @@ -703,7 +703,7 @@ TODO: This extraction shouldn't be allowed. "line": 133, "col": 7 }, - "content": "let pp_markup (bold_tag) = + "content": "let pp_markup bold_tag = fun ppf markup -> match markup with | Text txt -> pp_print_string ppf txt @@ -740,7 +740,7 @@ TODO: This extraction shouldn't be allowed. "line": 146, "col": 26 }, - "content": "let add (x) (z) (y) = ((x * z) * y) + A.a + "content": "let add x z y = ((x * z) * y) + A.a let f x = let module Empty = struct end in let module M = struct @@ -842,7 +842,7 @@ TODO: This extraction shouldn't be allowed. "line": 133, "col": 7 }, - "content": "let fun_name2 (ppf) (doc) (bold_tag) = + "content": "let fun_name2 ppf doc bold_tag = fprintf ppf \"%a\" (pp_print_list ?pp_sep:None (fun ppf markup -> @@ -880,7 +880,7 @@ TODO: This extraction shouldn't be allowed. "line": 169, "col": 31 }, - "content": "let fun_name2 (x) (y) (a) (b) (c) (d_x) = (((a + b) + ((c * x) * y)) + z) + d_x + "content": "let fun_name2 x y a b c d_x = (((a + b) + ((c * x) * y)) + z) + d_x let complicated_function x y = let a = 10 in let b = 11 in @@ -902,3 +902,5 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } + + $ $MERLIN single refactoring-extract-region -start 179:2 -end 179:33 < func.ml From d314db6853c50b9aedb2d12602634572957dee90 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Thu, 25 Sep 2025 12:43:22 +0200 Subject: [PATCH 24/31] Fix CLI doc and typos. --- doc/dev/PROTOCOL.md | 4 ++-- src/analysis/refactor_extract_region.ml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/dev/PROTOCOL.md b/doc/dev/PROTOCOL.md index 0eb76d3109..bb69c4c944 100644 --- a/doc/dev/PROTOCOL.md +++ b/doc/dev/PROTOCOL.md @@ -485,11 +485,11 @@ The result is returned as a list of: } ``` -### `refactor-extract-region -start -stop -extract-name ` +### `refactor-extract-region -start -end -extract-name ` ``` -start Where extracted region start - -stop Where extracted region end + -end Where extracted region end -extract-name Name used for the generated let binding ``` diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 3252d864a0..7563d40763 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -471,7 +471,7 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | Some name -> Fixed name in let remove_path_prefix_of_bound_values expr = - (* We need to unquality bound values. Otherwise, the generated call will use + (* We need to unqualify bound values. Otherwise, the generated call will use the qualified name even if it does not exist in the scope. Examples: let f () = From 11805bab4a0cf95d8d950637be423824e866ee4d Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Fri, 26 Sep 2025 12:32:26 +0200 Subject: [PATCH 25/31] Clean up. --- src/analysis/refactor_extract_region.ml | 78 +++---------------------- 1 file changed, 9 insertions(+), 69 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 7563d40763..6a9063c1b1 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -92,8 +92,8 @@ module Gen = struct (* Generates [let name params = body]. *) let toplevel_function params ~name ~body = let choose_param_name ~basename ~already_used param_path = - let mem_value s = Id_map.exists (fun _ v -> String.equal s v) in - if mem_value basename already_used then + let param_name = Path.last param_path in + if String.Set.mem param_name already_used then let other_name = match Path.flatten param_path with | `Contains_apply -> assert false @@ -102,18 +102,13 @@ module Gen = struct |> List.map ~f:String.lowercase_ascii |> String.concat ~sep:"_" in - if mem_value other_name already_used then - Fresh_name.gen_val_name ~is_bound:mem_value other_name already_used + if String.Set.mem other_name already_used then + Fresh_name.gen_val_name ~is_bound:String.Set.mem other_name + already_used else other_name else basename in - let rec compute_path = function - | Path.Pident id -> [ `stamp (Ident.stamp id) ] - | Pdot (p, s) -> `string s :: compute_path p - | Pextra_ty (p, _) -> compute_path p - | Papply (p, p') -> compute_path p @ compute_path p' - in - let used_params, params = + let _used_params, params = List.fold_left_map ~f:(fun already_used param -> let param_name = @@ -127,67 +122,12 @@ module Gen = struct pparam_desc = Pparam_val (Nolabel, None, param_pattern) } in - let id = - match Path.flatten param with - | `Contains_apply -> assert false - | `Ok (_id, ss) -> ss - in - Format_doc.asprintf "%a | Computed: %S" Path.print param - (compute_path param |> List.rev - |> List.map ~f:(function - | `stamp i -> Int.to_string i - | `string s -> s) - |> String.concat ~sep:".") - |> prerr_endline; - (Id_map.add id param_name already_used, fun_param)) - ~init:Id_map.empty params - in - - (* Id_map.iter - (fun stamp v -> - Format_doc.asprintf "Stamp: %s | %S" - (stamp |> List.rev - |> List.map ~f:(function - | `stamp i -> Int.to_string i - | `string s -> s) - |> String.concat ~sep:".") - v - |> prerr_endline) - used_params; *) - - (* prendre résultat de Path.flatten et virer le premier ident *) - let foobar expr = - let mapper = - { Tast_mapper.default with - expr = - (fun mapper expr -> - match expr.exp_desc with - | Texp_ident (path, _, vd) -> begin - prerr_endline - @@ (compute_path path |> List.rev - |> List.map ~f:(function - | `stamp i -> Int.to_string i - | `string s -> s) - |> String.concat ~sep:"."); - match Id_map.find_opt ( match Path.flatten path with - | `Contains_apply -> assert false - | `Ok (_id, ss) -> ss) used_params with - | Some new_name -> - { expr with - exp_desc = - Typedtree.Texp_ident - (path, Location.mknoloc (Longident.Lident new_name), vd) - } - | _ -> expr - end - | _ -> Tast_mapper.default.expr mapper expr) - } - in - mapper.expr mapper expr + (String.Set.add param_name already_used, fun_param)) + ~init:String.Set.empty params in let body = Ast_helper.Exp.function_ params None - (Parsetree.Pfunction_body (Untypeast.untype_expression (foobar body))) + (Parsetree.Pfunction_body (Untypeast.untype_expression body)) in untyped_toplevel_let ~name ~body From d3f6bdfca0a7701251977d625ce3f4781b6a8df6 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Fri, 26 Sep 2025 12:32:40 +0200 Subject: [PATCH 26/31] Add FIXME tests. --- .../extraction-issue.t/foo.ml | 20 +++++ .../extraction-issue.t/run.t | 77 +++++++++++++++++++ .../func-extraction.t/func.ml | 21 ----- .../func-extraction.t/run.t | 37 --------- 4 files changed, 97 insertions(+), 58 deletions(-) create mode 100644 tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml create mode 100644 tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml b/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml new file mode 100644 index 0000000000..d1679d1ace --- /dev/null +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml @@ -0,0 +1,20 @@ +let z = 100 + +let complicated_function x y = + let a = 10 in + let b = 11 in + let c = 12 in + let module D = struct + let x = 13 + end in + a + b + (c * x * y) + z + D.x + +let f () = + let module D = struct + let x = 42 + end in + let module M = struct + let x = 1 + end in + let a, b, c, x = (1, 2, 3, 4) in + a + b + c + D.x + x + M.(x + a) diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t new file mode 100644 index 0000000000..e3a82e75b9 --- /dev/null +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t @@ -0,0 +1,77 @@ +FIXME: `x` is used instead of `d_x` in the extracted function body! +Should be: (((a + b) + ((c * x) * y)) + z) + d_x + + $ $MERLIN single refactoring-extract-region -start 10:2 -end 10:31 < foo.ml + { + "class": "return", + "value": { + "start": { + "line": 3, + "col": 0 + }, + "end": { + "line": 10, + "col": 31 + }, + "content": "let fun_name1 x y a b c d_x = (((a + b) + ((c * x) * y)) + z) + x + let complicated_function x y = + let a = 10 in + let b = 11 in + let c = 12 in + let module D = struct + let x = 13 + end in + (fun_name1 x y a b c D.x)", + "selection-range": { + "start": { + "line": 3, + "col": 4 + }, + "end": { + "line": 3, + "col": 13 + } + } + }, + "notifications": [] + } + +FIXME: the extracted function body is wrong. +Should be: a + b + c + d_x + x + (m_x + a) + + $ $MERLIN single refactoring-extract-region -start 20:2 -end 20:33 < foo.ml + { + "class": "return", + "value": { + "start": { + "line": 12, + "col": 0 + }, + "end": { + "line": 20, + "col": 33 + }, + "content": "let fun_name1 a b c x d_x m_x = + ((((a + b) + c) + x) + x) + (let open M in x + a) + let f () = + let module D = struct + let x = 42 + end in + let module M = struct + let x = 1 + end in + let a, b, c, x = (1, 2, 3, 4) in + (fun_name1 a b c x D.x M.x)", + "selection-range": { + "start": { + "line": 12, + "col": 4 + }, + "end": { + "line": 12, + "col": 13 + } + } + }, + "notifications": [] + } diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml index ee3de34f7d..5721ce1d8b 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/func.ml @@ -156,24 +156,3 @@ module T = struct let printer = pp_print_list pp_print_int in printf "%a\n" printer a_list end - -let z = 100 - -let complicated_function x y = - let a = 10 in - let b = 11 in - let c = 12 in - let module D = struct - let x = 13 - end in - a + b + (c * x * y) + z + D.x - -let f () = - let module D = struct - let x = 42 - end in - let module M = struct - let x = 1 - end in - let a, b, c, x = (1, 2, 3, 4) in - a + b + c + D.x + x + M.(x + a) diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index c800698967..a26951fcbf 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -867,40 +867,3 @@ TODO: This extraction shouldn't be allowed. }, "notifications": [] } - - $ $MERLIN single refactoring-extract-region -start 169:2 -end 169:31 < func.ml - { - "class": "return", - "value": { - "start": { - "line": 162, - "col": 0 - }, - "end": { - "line": 169, - "col": 31 - }, - "content": "let fun_name2 x y a b c d_x = (((a + b) + ((c * x) * y)) + z) + d_x - let complicated_function x y = - let a = 10 in - let b = 11 in - let c = 12 in - let module D = struct - let x = 13 - end in - (fun_name2 x y a b c D.x)", - "selection-range": { - "start": { - "line": 162, - "col": 4 - }, - "end": { - "line": 162, - "col": 13 - } - } - }, - "notifications": [] - } - - $ $MERLIN single refactoring-extract-region -start 179:2 -end 179:33 < func.ml From 68b02e7f255136f737dda621a80fc8f4b99d064d Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Tue, 30 Sep 2025 11:23:41 +0200 Subject: [PATCH 27/31] Fix PR nb. --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 14cf9b7027..cf1c19a2fb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ unreleased ========== + merlin library - - Implement new refactor-extract-region command for extracting region to a fresh let binding (#1546) + - Implement new refactor-extract-region command for extracting region to a fresh let binding (#1948) - Fix `merlin_reader` for OpenBSD (#1956) merlin 5.5 From fc8c063dd9531e9ce58c7e6292dd06ac7f18b539 Mon Sep 17 00:00:00 2001 From: Tim-ats-d Date: Thu, 2 Oct 2025 12:08:15 +0200 Subject: [PATCH 28/31] Ensure minimality for refactor issue examples. --- .../extraction-issue.t/foo.ml | 9 ++--- .../extraction-issue.t/run.t | 36 +++++++++---------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml b/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml index d1679d1ace..4bc6d7698e 100644 --- a/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/foo.ml @@ -1,13 +1,10 @@ let z = 100 let complicated_function x y = - let a = 10 in - let b = 11 in - let c = 12 in let module D = struct let x = 13 end in - a + b + (c * x * y) + z + D.x + (x * y) + D.x let f () = let module D = struct @@ -16,5 +13,5 @@ let f () = let module M = struct let x = 1 end in - let a, b, c, x = (1, 2, 3, 4) in - a + b + c + D.x + x + M.(x + a) + let x = 10 in + D.x + x + M.x diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t index e3a82e75b9..391b8c5b51 100644 --- a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t @@ -1,7 +1,7 @@ FIXME: `x` is used instead of `d_x` in the extracted function body! -Should be: (((a + b) + ((c * x) * y)) + z) + d_x +Should be: (x * y) + d_x - $ $MERLIN single refactoring-extract-region -start 10:2 -end 10:31 < foo.ml + $ $MERLIN single refactoring-extract-region -start 7:2 -end 7:15 < foo.ml { "class": "return", "value": { @@ -10,18 +10,15 @@ Should be: (((a + b) + ((c * x) * y)) + z) + d_x "col": 0 }, "end": { - "line": 10, - "col": 31 + "line": 7, + "col": 15 }, - "content": "let fun_name1 x y a b c d_x = (((a + b) + ((c * x) * y)) + z) + x + "content": "let fun_name1 x y d_x = (x * y) + x let complicated_function x y = - let a = 10 in - let b = 11 in - let c = 12 in let module D = struct let x = 13 end in - (fun_name1 x y a b c D.x)", + (fun_name1 x y D.x)", "selection-range": { "start": { "line": 3, @@ -37,22 +34,21 @@ Should be: (((a + b) + ((c * x) * y)) + z) + d_x } FIXME: the extracted function body is wrong. -Should be: a + b + c + d_x + x + (m_x + a) +Should be: d_x + x + m_x - $ $MERLIN single refactoring-extract-region -start 20:2 -end 20:33 < foo.ml + $ $MERLIN single refactoring-extract-region -start 17:2 -end 17:16 < foo.ml { "class": "return", "value": { "start": { - "line": 12, + "line": 9, "col": 0 }, "end": { - "line": 20, - "col": 33 + "line": 17, + "col": 15 }, - "content": "let fun_name1 a b c x d_x m_x = - ((((a + b) + c) + x) + x) + (let open M in x + a) + "content": "let fun_name1 x d_x m_x = (x + x) + x let f () = let module D = struct let x = 42 @@ -60,15 +56,15 @@ Should be: a + b + c + d_x + x + (m_x + a) let module M = struct let x = 1 end in - let a, b, c, x = (1, 2, 3, 4) in - (fun_name1 a b c x D.x M.x)", + let x = 10 in + (fun_name1 x D.x M.x)", "selection-range": { "start": { - "line": 12, + "line": 9, "col": 4 }, "end": { - "line": 12, + "line": 9, "col": 13 } } From 86cf319225888ca45e05c15e28482d255ce456dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Thu, 2 Oct 2025 12:19:10 +0200 Subject: [PATCH 29/31] Fix the path names in extractions --- src/analysis/refactor_extract_region.ml | 20 ++++++++++--------- .../extraction-issue.t/run.t | 8 ++++---- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 6a9063c1b1..2e0775814e 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -89,19 +89,20 @@ module Gen = struct let compare = List.compare ~cmp:String.compare end) + let name_of_path path = + match Path.flatten path with + | `Contains_apply -> "_functor_path_not_supported" + | `Ok (id, path) -> + Ident.name id :: path + |> List.map ~f:String.lowercase_ascii + |> String.concat ~sep:"_" + (* Generates [let name params = body]. *) let toplevel_function params ~name ~body = let choose_param_name ~basename ~already_used param_path = let param_name = Path.last param_path in if String.Set.mem param_name already_used then - let other_name = - match Path.flatten param_path with - | `Contains_apply -> assert false - | `Ok (id, path) -> - Ident.name id :: path - |> List.map ~f:String.lowercase_ascii - |> String.concat ~sep:"_" - in + let other_name = name_of_path param_path in if String.Set.mem other_name already_used then Fresh_name.gen_val_name ~is_bound:String.Set.mem other_name already_used @@ -428,6 +429,7 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | Texp_ident (Pdot (path, name), longident, vd) when is_bound_var (Path.head path) && is_module_bound_in_toplevel_env path -> + let name = Gen.name_of_path (Pdot (path, name)) in let ident = { longident with txt = Longident.Lident name } in { expr with exp_desc = Texp_ident (path, ident, vd) } | _ -> Tast_mapper.default.expr mapper expr) @@ -462,7 +464,7 @@ let largest_expr_between ~start ~stop nodes = | Expression expr when node_loc.loc_ghost = false && is_inside_region node_loc -> (* We filter expression that have a ghost location. Otherwise, expression - such as [let f x = 10 + x] can be extracted and this can lead to invalid + such as [let f x = 10 + x] can be extracted and this can lead to invalid code gen. ^^^^^^^^^^ *) Some (expr, env) | _ -> diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t index 391b8c5b51..6def99d90b 100644 --- a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t @@ -1,4 +1,4 @@ -FIXME: `x` is used instead of `d_x` in the extracted function body! +`x` is used instead of `d_x` in the extracted function body! Should be: (x * y) + d_x $ $MERLIN single refactoring-extract-region -start 7:2 -end 7:15 < foo.ml @@ -13,7 +13,7 @@ Should be: (x * y) + d_x "line": 7, "col": 15 }, - "content": "let fun_name1 x y d_x = (x * y) + x + "content": "let fun_name1 x y d_x = (x * y) + d_x let complicated_function x y = let module D = struct let x = 13 @@ -33,7 +33,7 @@ Should be: (x * y) + d_x "notifications": [] } -FIXME: the extracted function body is wrong. +The extracted function body is wrong. Should be: d_x + x + m_x $ $MERLIN single refactoring-extract-region -start 17:2 -end 17:16 < foo.ml @@ -48,7 +48,7 @@ Should be: d_x + x + m_x "line": 17, "col": 15 }, - "content": "let fun_name1 x d_x m_x = (x + x) + x + "content": "let fun_name1 x d_x m_x = (d_x + x) + m_x let f () = let module D = struct let x = 42 From ee60e2d5cfd96c8d6d22c1ec11fcaae2ccbd43b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Thu, 2 Oct 2025 12:33:20 +0200 Subject: [PATCH 30/31] Revert "Fix the path names in extractions" This reverts commit 86cf319225888ca45e05c15e28482d255ce456dc. --- src/analysis/refactor_extract_region.ml | 20 +++++++++---------- .../extraction-issue.t/run.t | 8 ++++---- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 2e0775814e..6a9063c1b1 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -89,20 +89,19 @@ module Gen = struct let compare = List.compare ~cmp:String.compare end) - let name_of_path path = - match Path.flatten path with - | `Contains_apply -> "_functor_path_not_supported" - | `Ok (id, path) -> - Ident.name id :: path - |> List.map ~f:String.lowercase_ascii - |> String.concat ~sep:"_" - (* Generates [let name params = body]. *) let toplevel_function params ~name ~body = let choose_param_name ~basename ~already_used param_path = let param_name = Path.last param_path in if String.Set.mem param_name already_used then - let other_name = name_of_path param_path in + let other_name = + match Path.flatten param_path with + | `Contains_apply -> assert false + | `Ok (id, path) -> + Ident.name id :: path + |> List.map ~f:String.lowercase_ascii + |> String.concat ~sep:"_" + in if String.Set.mem other_name already_used then Fresh_name.gen_val_name ~is_bound:String.Set.mem other_name already_used @@ -429,7 +428,6 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | Texp_ident (Pdot (path, name), longident, vd) when is_bound_var (Path.head path) && is_module_bound_in_toplevel_env path -> - let name = Gen.name_of_path (Pdot (path, name)) in let ident = { longident with txt = Longident.Lident name } in { expr with exp_desc = Texp_ident (path, ident, vd) } | _ -> Tast_mapper.default.expr mapper expr) @@ -464,7 +462,7 @@ let largest_expr_between ~start ~stop nodes = | Expression expr when node_loc.loc_ghost = false && is_inside_region node_loc -> (* We filter expression that have a ghost location. Otherwise, expression - such as [let f x = 10 + x] can be extracted and this can lead to invalid + such as [let f x = 10 + x] can be extracted and this can lead to invalid code gen. ^^^^^^^^^^ *) Some (expr, env) | _ -> diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t index 6def99d90b..391b8c5b51 100644 --- a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t @@ -1,4 +1,4 @@ -`x` is used instead of `d_x` in the extracted function body! +FIXME: `x` is used instead of `d_x` in the extracted function body! Should be: (x * y) + d_x $ $MERLIN single refactoring-extract-region -start 7:2 -end 7:15 < foo.ml @@ -13,7 +13,7 @@ Should be: (x * y) + d_x "line": 7, "col": 15 }, - "content": "let fun_name1 x y d_x = (x * y) + d_x + "content": "let fun_name1 x y d_x = (x * y) + x let complicated_function x y = let module D = struct let x = 13 @@ -33,7 +33,7 @@ Should be: (x * y) + d_x "notifications": [] } -The extracted function body is wrong. +FIXME: the extracted function body is wrong. Should be: d_x + x + m_x $ $MERLIN single refactoring-extract-region -start 17:2 -end 17:16 < foo.ml @@ -48,7 +48,7 @@ Should be: d_x + x + m_x "line": 17, "col": 15 }, - "content": "let fun_name1 x d_x m_x = (d_x + x) + m_x + "content": "let fun_name1 x d_x m_x = (x + x) + x let f () = let module D = struct let x = 42 From 9bb9388c61cb18b340519f54c7473e58d2acf051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Thu, 2 Oct 2025 13:11:14 +0200 Subject: [PATCH 31/31] Attempt #2 at fixing pathed param names --- src/analysis/refactor_extract_region.ml | 27 ++++++++++--------- .../extraction-issue.t/run.t | 6 ++--- .../func-extraction.t/run.t | 4 +-- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/analysis/refactor_extract_region.ml b/src/analysis/refactor_extract_region.ml index 6a9063c1b1..c2e06bcd2e 100644 --- a/src/analysis/refactor_extract_region.ml +++ b/src/analysis/refactor_extract_region.ml @@ -89,19 +89,20 @@ module Gen = struct let compare = List.compare ~cmp:String.compare end) + let param_name_of_path path = + match Path.flatten path with + | `Contains_apply -> "_functor_paths_not_handled" + | `Ok (id, path) -> + Ident.name id :: path + |> List.map ~f:String.lowercase_ascii + |> String.concat ~sep:"_" + (* Generates [let name params = body]. *) let toplevel_function params ~name ~body = let choose_param_name ~basename ~already_used param_path = let param_name = Path.last param_path in if String.Set.mem param_name already_used then - let other_name = - match Path.flatten param_path with - | `Contains_apply -> assert false - | `Ok (id, path) -> - Ident.name id :: path - |> List.map ~f:String.lowercase_ascii - |> String.concat ~sep:"_" - in + let other_name = param_name_of_path param_path in if String.Set.mem other_name already_used then Fresh_name.gen_val_name ~is_bound:String.Set.mem other_name already_used @@ -112,7 +113,8 @@ module Gen = struct List.fold_left_map ~f:(fun already_used param -> let param_name = - choose_param_name ~basename:(Path.last param) ~already_used param + choose_param_name ~basename:(param_name_of_path param) ~already_used + param in let param_pattern = Ast_helper.Pat.var (Location.mknoloc param_name) @@ -235,7 +237,7 @@ let occuring_vars_path node = Ident.name (Path.head path) <> "Stdlib") let analyze_expr expr expr_env ~toplevel_item = - let is_value_unbound path = + let is_value_bound_locally path = let is_bound path env = try let _ = Env.find_value path env in @@ -258,7 +260,7 @@ let analyze_expr expr expr_env ~toplevel_item = let analysis = Path.Set.fold (fun var_path acc -> - if is_value_unbound var_path then + if is_value_bound_locally var_path then match toplevel_item.kind with | Let bindings when is_recursive toplevel_item @@ -428,6 +430,7 @@ let extract_expr_to_toplevel ?extract_name expr ~expr_env ~toplevel_item = | Texp_ident (Pdot (path, name), longident, vd) when is_bound_var (Path.head path) && is_module_bound_in_toplevel_env path -> + let name = Gen.param_name_of_path (Pdot (path, name)) in let ident = { longident with txt = Longident.Lident name } in { expr with exp_desc = Texp_ident (path, ident, vd) } | _ -> Tast_mapper.default.expr mapper expr) @@ -462,7 +465,7 @@ let largest_expr_between ~start ~stop nodes = | Expression expr when node_loc.loc_ghost = false && is_inside_region node_loc -> (* We filter expression that have a ghost location. Otherwise, expression - such as [let f x = 10 + x] can be extracted and this can lead to invalid + such as [let f x = 10 + x] can be extracted and this can lead to invalid code gen. ^^^^^^^^^^ *) Some (expr, env) | _ -> diff --git a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t index 391b8c5b51..c61c72b90f 100644 --- a/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t +++ b/tests/test-dirs/refactor-extract-region/extraction-issue.t/run.t @@ -1,4 +1,3 @@ -FIXME: `x` is used instead of `d_x` in the extracted function body! Should be: (x * y) + d_x $ $MERLIN single refactoring-extract-region -start 7:2 -end 7:15 < foo.ml @@ -13,7 +12,7 @@ Should be: (x * y) + d_x "line": 7, "col": 15 }, - "content": "let fun_name1 x y d_x = (x * y) + x + "content": "let fun_name1 x y d_x = (x * y) + d_x let complicated_function x y = let module D = struct let x = 13 @@ -33,7 +32,6 @@ Should be: (x * y) + d_x "notifications": [] } -FIXME: the extracted function body is wrong. Should be: d_x + x + m_x $ $MERLIN single refactoring-extract-region -start 17:2 -end 17:16 < foo.ml @@ -48,7 +46,7 @@ Should be: d_x + x + m_x "line": 17, "col": 15 }, - "content": "let fun_name1 x d_x m_x = (x + x) + x + "content": "let fun_name1 x d_x m_x = (d_x + x) + m_x let f () = let module D = struct let x = 42 diff --git a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t index a26951fcbf..7d99fc410c 100644 --- a/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t +++ b/tests/test-dirs/refactor-extract-region/func-extraction.t/run.t @@ -639,7 +639,7 @@ TODO: This extraction shouldn't be allowed. "line": 114, "col": 15 }, - "content": "let fun_name2 x y = x * y + "content": "let fun_name2 x m_y = x * m_y let f x = let module M = struct let y = 0 @@ -740,7 +740,7 @@ TODO: This extraction shouldn't be allowed. "line": 146, "col": 26 }, - "content": "let add x z y = ((x * z) * y) + A.a + "content": "let add x m_z m_mm_y = ((x * m_z) * m_mm_y) + A.a let f x = let module Empty = struct end in let module M = struct