From eaf0cc61bf7ff71a41c902339e3936d1656eae01 Mon Sep 17 00:00:00 2001 From: Zach Allaun Date: Tue, 12 Sep 2023 23:05:13 -0400 Subject: [PATCH] Change `get_range/1` to allow returning `nil` and add syntax corpus (#107) * chore: add a corpus of syntax examples adapted from tree-sitter-elixir * feat!: `get_range/1` returns `nil` if range cannot be calculated --- .credo.exs | 12 +- .formatter.exs | 4 +- lib/sourceror.ex | 15 +- lib/sourceror/range.ex | 144 ++++----- test/corpus/NOTICE | 207 +++++++++++++ test/corpus/comment.ex | 34 ++ test/corpus/do_end.ex | 291 ++++++++++++++++++ test/corpus/edge_syntax.ex | 12 + test/corpus/edge_syntax_1_13.ex | 6 + test/corpus/expression/anonymous_function.ex | 76 +++++ test/corpus/expression/block.ex | 55 ++++ test/corpus/expression/call.ex | 207 +++++++++++++ test/corpus/expression/capture.ex | 28 ++ test/corpus/expression/operator.ex | 207 +++++++++++++ test/corpus/expression/operator_1_13.ex | 7 + test/corpus/expression/sigil.ex | 66 ++++ test/corpus/expression/sigil_1_13.ex | 4 + test/corpus/expression/sigil_1_15.ex | 3 + .../corpus/integration/function_definition.ex | 90 ++++++ test/corpus/integration/kernel.ex | 19 ++ test/corpus/integration/module_definition.ex | 34 ++ test/corpus/integration/spec.ex | 31 ++ test/corpus/semicolon.ex | 12 + test/corpus/term/alias.ex | 35 +++ test/corpus/term/atom.ex | 25 ++ test/corpus/term/atom_1_13.ex | 3 + test/corpus/term/bitstring.ex | 43 +++ test/corpus/term/boolean.ex | 4 + test/corpus/term/char.ex | 21 ++ test/corpus/term/charlist.ex | 67 ++++ test/corpus/term/float.ex | 17 + test/corpus/term/integer.ex | 24 ++ test/corpus/term/keyword_list.ex | 38 +++ test/corpus/term/list.ex | 18 ++ test/corpus/term/map.ex | 24 ++ test/corpus/term/nil.ex | 3 + test/corpus/term/string.ex | 96 ++++++ test/corpus/term/struct.ex | 51 +++ test/corpus/term/tuple.ex | 16 + test/corpus/unicode.ex | 24 ++ test/corpus/variable.ex | 25 ++ test/range_test.exs | 31 +- test/support/corpus.ex | 35 +++ 43 files changed, 2066 insertions(+), 98 deletions(-) create mode 100644 test/corpus/NOTICE create mode 100644 test/corpus/comment.ex create mode 100644 test/corpus/do_end.ex create mode 100644 test/corpus/edge_syntax.ex create mode 100644 test/corpus/edge_syntax_1_13.ex create mode 100644 test/corpus/expression/anonymous_function.ex create mode 100644 test/corpus/expression/block.ex create mode 100644 test/corpus/expression/call.ex create mode 100644 test/corpus/expression/capture.ex create mode 100644 test/corpus/expression/operator.ex create mode 100644 test/corpus/expression/operator_1_13.ex create mode 100644 test/corpus/expression/sigil.ex create mode 100644 test/corpus/expression/sigil_1_13.ex create mode 100644 test/corpus/expression/sigil_1_15.ex create mode 100644 test/corpus/integration/function_definition.ex create mode 100644 test/corpus/integration/kernel.ex create mode 100644 test/corpus/integration/module_definition.ex create mode 100644 test/corpus/integration/spec.ex create mode 100644 test/corpus/semicolon.ex create mode 100644 test/corpus/term/alias.ex create mode 100644 test/corpus/term/atom.ex create mode 100644 test/corpus/term/atom_1_13.ex create mode 100644 test/corpus/term/bitstring.ex create mode 100644 test/corpus/term/boolean.ex create mode 100644 test/corpus/term/char.ex create mode 100644 test/corpus/term/charlist.ex create mode 100644 test/corpus/term/float.ex create mode 100644 test/corpus/term/integer.ex create mode 100644 test/corpus/term/keyword_list.ex create mode 100644 test/corpus/term/list.ex create mode 100644 test/corpus/term/map.ex create mode 100644 test/corpus/term/nil.ex create mode 100644 test/corpus/term/string.ex create mode 100644 test/corpus/term/struct.ex create mode 100644 test/corpus/term/tuple.ex create mode 100644 test/corpus/unicode.ex create mode 100644 test/corpus/variable.ex create mode 100644 test/support/corpus.ex diff --git a/.credo.exs b/.credo.exs index fd19a5c..692d91d 100644 --- a/.credo.exs +++ b/.credo.exs @@ -6,10 +6,16 @@ included: [ "lib/", "src/", - "test/", + "test/" ], - excluded: [~r"/_build/", ~r"/deps/", ~r"/lib/sourceror/code/", ~r"/test/code/", "lib/sourceror/code.ex"] - }, + excluded: [ + ~r"/_build/", + ~r"/deps/", + ~r"/lib/sourceror/code/", + ~r"/test/corpus/", + "lib/sourceror/code.ex" + ] + } } ] } diff --git a/.formatter.exs b/.formatter.exs index 0a1488d..f77f21e 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -9,5 +9,7 @@ locals_without_parens = [ export: [ locals_without_parens: locals_without_parens ], - inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] + inputs: + ["{mix,.formatter}.exs"] ++ + (Path.wildcard("{config,lib,test}/**/*.{ex,exs}") -- Path.wildcard("test/corpus/**/*.ex")) ] diff --git a/lib/sourceror.ex b/lib/sourceror.ex index fda0dab..7f7bcef 100644 --- a/lib/sourceror.ex +++ b/lib/sourceror.ex @@ -484,13 +484,21 @@ defmodule Sourceror do get_start_position(left, default) end + def get_start_position({{:., _, [Kernel, :to_string]}, _, [left | _]}, default) do + get_start_position(left, default) + end + + def get_start_position({{:., _, [List, :to_charlist]}, meta, _}, default) do + position = Keyword.take(meta, [:line, :column]) + Keyword.merge(default, position) + end + def get_start_position({{:., _, [left | _]}, _, _}, default) do get_start_position(left, default) end def get_start_position({_, meta, _}, default) do position = Keyword.take(meta, [:line, :column]) - Keyword.merge(default, position) end @@ -621,6 +629,9 @@ defmodule Sourceror do The quoted expression must have at least line and column metadata, otherwise it is not possible to calculate an accurate range, or to calculate it at all. + Additionally, certain syntax constructs desugar into ASTs without a + meaningful range. In these cases, `get_range/1` returns `nil`. + This function is most useful when used after `Sourceror.parse_string/1`, before any kind of modification to the AST. @@ -654,7 +665,7 @@ defmodule Sourceror do ...> |> Sourceror.get_range(include_comments: true) %{start: [line: 1, column: 1], end: [line: 2, column: 11]} """ - @spec get_range(Macro.t()) :: range + @spec get_range(Macro.t()) :: range | nil def get_range(quoted, opts \\ []) do Sourceror.Range.get_range(quoted, opts) end diff --git a/lib/sourceror/range.ex b/lib/sourceror/range.ex index f0144f9..d01def1 100644 --- a/lib/sourceror/range.ex +++ b/lib/sourceror/range.ex @@ -7,13 +7,14 @@ defmodule Sourceror.Range do String.split(string, ~r/\n|\r\n|\r/) end + @spec get_range(Macro.t()) :: Sourceror.range() | nil def get_range(quoted, opts \\ []) do - range = do_get_range(quoted) - - if Keyword.get(opts, :include_comments, false) do - add_comments_to_range(range, quoted) - else - range + with %{} = range <- do_get_range(quoted) do + if Keyword.get(opts, :include_comments, false) do + add_comments_to_range(range, quoted) + else + range + end end end @@ -51,16 +52,13 @@ defmodule Sourceror.Range do } end - @spec get_range(Macro.t()) :: Sourceror.range() + @spec do_get_range(Macro.t()) :: Sourceror.range() | nil defp do_get_range(quoted) # Module aliases starting with a non-atom or special form # e.g. __MODULE__.Nested, @module.Nested, module().Nested defp do_get_range({:__aliases__, meta, [{_, _, _} = first_segment | rest]}) do - %{start: start_pos} = do_get_range(first_segment) - %{end: end_pos} = do_get_range({:__aliases__, meta, rest}) - - %{start: start_pos, end: end_pos} + get_range_for_pair(first_segment, {:__aliases__, meta, rest}) end # Module aliases @@ -161,15 +159,14 @@ defmodule Sourceror.Range do [{_, _, _} | _] -> {first, rest} = List.pop_at(args, 0) {last, _} = List.pop_at(rest, -1, first) - - %{ - start: get_range(first).start, - end: get_range(last).end - } + get_range_for_pair(first, last) [charlist] when is_list(charlist) -> string = List.to_string(charlist) do_get_range({:__block__, meta, [string]}) + + [] -> + nil end end end @@ -188,27 +185,18 @@ defmodule Sourceror.Range do # 2-tuples from keyword lists defp do_get_range({left, right}) do - left_range = get_range(left) - right_range = get_range(right) - - %{start: left_range.start, end: right_range.end} + get_range_for_pair(left, right) end # Handles arguments. Lists are always wrapped in `:__block__`, so the only case # in which we can have a naked list is in partial keyword lists, as in `[:a, :b, c: d, e: f]`, # or stabs like `:foo -> :bar` - defp do_get_range(list) when is_list(list) do - first_range = List.first(list) |> get_range() - start_pos = first_range.start - - end_pos = - if last = List.last(list) do - get_range(last).end - else - first_range.end - end + defp do_get_range([first, _second | _] = list) do + get_range_for_pair(first, List.last(list)) + end - %{start: start_pos, end: end_pos} + defp do_get_range([first]) do + get_range(first) end # Stabs without args @@ -222,11 +210,8 @@ defmodule Sourceror.Range do # Stabs with args # a -> b - defp do_get_range({:->, _, [left_args, right]}) do - start_pos = get_range(left_args).start - end_pos = get_range(right).end - - %{start: start_pos, end: end_pos} + defp do_get_range({:->, _, [left, right]}) do + get_range_for_pair(left, right) end # Argument capture syntax @@ -245,10 +230,10 @@ defmodule Sourceror.Range do # Unwrapped qualified calls defp do_get_range({:., meta, [left, atom]}) when is_atom(atom) do - start_pos = get_range(left).start - atom_length = atom |> inspect() |> String.length() - - %{start: start_pos, end: [line: meta[:line], column: meta[:column] + atom_length]} + with %{start: start_pos} <- get_range(left) do + atom_length = atom |> inspect() |> String.length() + %{start: start_pos, end: [line: meta[:line], column: meta[:column] + atom_length]} + end end # Access syntax @@ -300,33 +285,28 @@ defmodule Sourceror.Range do # Unary operators defp do_get_range({op, meta, [arg]}) when is_unary_op(op) do - start_pos = Keyword.take(meta, [:line, :column]) - arg_range = get_range(arg) + with %{end: end_pos} <- get_range(arg) do + start_pos = Keyword.take(meta, [:line, :column]) - end_column = - if arg_range.end[:line] == meta[:line] do - arg_range.end[:column] - else - arg_range.end[:column] + String.length(to_string(op)) - end + end_column = + if end_pos[:line] == meta[:line] do + end_pos[:column] + else + end_pos[:column] + String.length(to_string(op)) + end - %{start: start_pos, end: [line: arg_range.end[:line], column: end_column]} + %{start: start_pos, end: [line: end_pos[:line], column: end_column]} + end end # Binary operators defp do_get_range({op, _, [left, right]}) when is_binary_op(op) do - %{ - start: get_range(left).start, - end: get_range(right).end - } + get_range_for_pair(left, right) end # Stepped ranges defp do_get_range({:"..//", _, [left, _middle, right]}) do - %{ - start: get_range(left).start, - end: get_range(right).end - } + get_range_for_pair(left, right) end # Bitstrings and interpolations @@ -386,14 +366,17 @@ defmodule Sourceror.Range do get_range_for_unqualified_call(quoted) end + # Catch-all + defp do_get_range(_), do: nil + defp get_range_for_unqualified_call({_call, meta, args} = quoted) do if Sourceror.has_closing_line?(quoted) do get_range_for_node_with_closing_line(quoted) else - start_pos = Keyword.take(meta, [:line, :column]) - end_pos = get_range(List.last(args)).end - - %{start: start_pos, end: end_pos} + with %{end: end_pos} <- get_range(List.last(args)) do + start_pos = Keyword.take(meta, [:line, :column]) + %{start: start_pos, end: end_pos} + end end end @@ -407,22 +390,23 @@ defmodule Sourceror.Range do [left] -> {left, 0} end - start_pos = get_range(left).start - identifier_pos = Keyword.take(meta, [:line, :column]) + with %{start: start_pos} <- get_range(left) do + identifier_pos = Keyword.take(meta, [:line, :column]) - parens_length = - if meta[:no_parens] do - 0 - else - 2 - end + parens_length = + if meta[:no_parens] do + 0 + else + 2 + end - end_pos = [ - line: identifier_pos[:line], - column: identifier_pos[:column] + right_len + parens_length - ] + end_pos = [ + line: identifier_pos[:line], + column: identifier_pos[:column] + right_len + parens_length + ] - %{start: start_pos, end: end_pos} + %{start: start_pos, end: end_pos} + end end end @@ -430,10 +414,7 @@ defmodule Sourceror.Range do if Sourceror.has_closing_line?(quoted) do get_range_for_node_with_closing_line(quoted) else - start_pos = get_range(left).start - end_pos = get_range(List.last(args) || left).end - - %{start: start_pos, end: end_pos} + get_range_for_pair(left, List.last(args) || left) end end @@ -510,6 +491,13 @@ defmodule Sourceror.Range do end end + defp get_range_for_pair(left, right) do + with %{start: start_pos} <- get_range(left), + %{end: end_pos} <- get_range(right) do + %{start: start_pos, end: end_pos} + end + end + defp has_interpolations?(segments) do Enum.any?(segments, fn segment -> match?({:"::", _, _}, segment) or match?({{:., _, [Kernel, :to_string]}, _, _}, segment) diff --git a/test/corpus/NOTICE b/test/corpus/NOTICE new file mode 100644 index 0000000..74abb54 --- /dev/null +++ b/test/corpus/NOTICE @@ -0,0 +1,207 @@ +Files in this corpus have been adapted from the tree-sitter-elixir project: +https://github.com/elixir-lang/tree-sitter-elixir + +All licensing information relevant to this corpus has been duplicated below. + +== LICENSE (elixir-lang/tree-sitter-elixir) + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +== NOTICE (elixir-lang/tree-sitter-elixir) for some file fragments from corpus + +Copyright (c) 2021 Anantha Kumaran + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/test/corpus/comment.ex b/test/corpus/comment.ex new file mode 100644 index 0000000..b5e4f14 --- /dev/null +++ b/test/corpus/comment.ex @@ -0,0 +1,34 @@ +## empty + +# + +## single line + +# single comment + +## multiple start symbols + +### multiple "#" + +## many consecutive lines + +# many +# consecutive +1 +# lines + +## in the same line as regular code + +1 # comment + +## matches inside a nested structure + +[ 1, ## inside a list + { 2, # and a tuple, too! + 3 } +] + +## does not match inside a string + +"# string" +"this is #{interpolation}" \ No newline at end of file diff --git a/test/corpus/do_end.ex b/test/corpus/do_end.ex new file mode 100644 index 0000000..151ba27 --- /dev/null +++ b/test/corpus/do_end.ex @@ -0,0 +1,291 @@ +## call without arguments + +fun do + a +end + +## call with arguments in parentheses + +fun(a, b) do + c +end + +## call with arguments without parentheses + +fun a, b do + c +end + +## remote call + +Mod.fun do + a +end + +## sticks to the outermost call + +outer_fun inner_fun arg do + a +end + +## newline before do + +fun x +do + x +end + +fun x +# comment +do + x +end + +fun() +do + x +end + +Mod.fun x +do + x +end + +## stab clause / no arguments + +fun do + () -> x +end + +## stab clause / no arguments without parentheses + +fun do + -> x +end + +## stab clause / one argument + +fun do + x -> x +end + +## stab clause / many arguments + +fun do + x, y, 1 -> :ok +end + +## stab clause / arguments in parentheses + +fun do + (x, y) -> :ok +end + +## stab clause / many clauses + +fun do + 1 -> :yes + 2 -> :no + other -> :maybe +end + +## stab clause / multiline expression + +fun do + x -> + y + x +end + +## stab clause / with guard / no arguments + +fun do + () when node() == :nonode@nohost -> true +end + +## stab clause / with guard / one argument + +fun do + x when x == [] -> x +end + +## stab clause / with guard / multiple arguments + +fun do + x, y when x == [] -> x +end + +## stab clause / with guard / arguments in parentheses + +fun do + (x, y) when y == [] -> y +end + +## stab clause / with guard / multiple guards + +fun do + x when x > 10 when x < 5 -> x +end + +## stab clause / edge cases / no stab + +foo do + a when a +end + +foo do + ([]) +end + +## stab clause / edge cases / "when" in arguments + +foo do + a when b, c when d == e -> 1 + (a, a when b) -> 1 +end + +## stab clause / edge cases / block argument + +foo do + (x; y) -> 1 + ((x; y)) -> 1 +end + +## stab clause / edge cases / operator with lower precedence than "when" + +foo do + x <- y when x -> y +end + +foo do + (x <- y) when x -> y +end + +## stab clause / edge cases / empty + +fun do->end + +## stab clause / edge cases / trailing call in multiline clause + +fun do + 1 -> + 1 + x + + 1 -> + 1 +end + +fun do + 1 -> + 1 + Mod.fun + + 1 -> + 1 +end + +fun do + 1 -> + 1 + mod.fun + + 1 -> + 1 +end + +fun do + 1 -> + 1 + + x 1 -> + 1 +end + +## stab clause / edge cases / empty right-hand-side + +fun do + x -> +end + +## pattern matching + +fun do + [h | tail] -> {h, tail} +end + +## child blocks / after + +fun do + x +after + y +end + +## child blocks / catch + +fun do + x +catch + y +end + +## child blocks / else + +fun do + x +else + y +end + +## child blocks / rescue + +fun do + x +rescue + y +end + +## child blocks / duplicated + +fun do + x +after + y +after + z +end + +## child blocks / mixed + +fun do + x +else + y +after + z +end + +## child blocks / stab clause + +fun do + x +rescue + y -> y +end + +## child blocks / keyword pattern with child block start token + +fun do + x +after +after + after: 1 -> y +end + +## [field names] + +fun do + x -> x + x when x == [] -> x +end \ No newline at end of file diff --git a/test/corpus/edge_syntax.ex b/test/corpus/edge_syntax.ex new file mode 100644 index 0000000..46c0ace --- /dev/null +++ b/test/corpus/edge_syntax.ex @@ -0,0 +1,12 @@ +## operator with arity (valid and supported by IEx.Helpers.h) + +::/2 + +## stab and slash ambiguity + +(-> / 2) +(-> / / 2) + +## def with remote call + +def Mod.fun(x), do: 1 \ No newline at end of file diff --git a/test/corpus/edge_syntax_1_13.ex b/test/corpus/edge_syntax_1_13.ex new file mode 100644 index 0000000..37e6e9e --- /dev/null +++ b/test/corpus/edge_syntax_1_13.ex @@ -0,0 +1,6 @@ +@ / 1 +& / 1 +not / 1 +not in / 2 +* / 2 +h +/2 diff --git a/test/corpus/expression/anonymous_function.ex b/test/corpus/expression/anonymous_function.ex new file mode 100644 index 0000000..4c23686 --- /dev/null +++ b/test/corpus/expression/anonymous_function.ex @@ -0,0 +1,76 @@ +## no arguments + +fn() -> 1 end +fn () -> 1 end + +## no arguments without parentheses + +fn -> 1 end + +## one argument + +fn(x) -> x end + +## one argument without parentheses + +fn x -> x end + +## many arguments + +fn(x, y, z) -> x + y end + +## many arguments without parentheses + +fn x, y -> x + y end + +## multiline body + +fn x, y -> + y + x +end + +## many clauses + +fn + 1 -> :yes + 2 -> :no + other -> :maybe +end + +## with guard / no arguments + +fn + () when node() == :nonode@nohost -> true +end + +## with guard / one argument + +fn + x when x == [] -> x +end + +## with guard / multiple arguments + +fn + x, y when x == [] -> x +end + +## with guard / arguments in parentheses + +fn + (x, y) when y == [] -> y +end + +## with guard / multiple guards + +fn + x when x > 10 when x < 5 -> x +end + +## pattern matching + +fn + [h | tail] -> {h, tail} + %{x: x} when x == 1 -> 1 +end \ No newline at end of file diff --git a/test/corpus/expression/block.ex b/test/corpus/expression/block.ex new file mode 100644 index 0000000..e1ff274 --- /dev/null +++ b/test/corpus/expression/block.ex @@ -0,0 +1,55 @@ +## empty + +() + +## single expression + +(1) + +## multiple expressions separated by newline + +( + 1 + 2 +) + +## multiple expressions separated by semicolon + +(1;2) + +## multiple expressions separated by mixed separators + +( + 1 + + ; + + 2 +) + +## leading semicolon + +(;1;2) + +## trailing semicolon + +(1;2;) + +## stab clause / multiple clauses + +(x -> x; y -> y + z -> z) + +## stab clause / multiple arguments + +(x, y, z -> x) +((x, y, z) -> x) + +## stab clause / guard + +(x, y when x == y -> 1) +((x, y when x == y -> 1)) +((x, y when x == y) -> 1) +(x, y when x, z -> 1) +((x, y when x, z -> 1)) +((x, y when x, z) -> 1) \ No newline at end of file diff --git a/test/corpus/expression/call.ex b/test/corpus/expression/call.ex new file mode 100644 index 0000000..e01f474 --- /dev/null +++ b/test/corpus/expression/call.ex @@ -0,0 +1,207 @@ +## local call / no arguments + +fun() + +## local call / arguments in parentheses + +fun(a) +fun([1, 2], option: true, other: 5) + +## local call / arguments without parentheses + +fun a +fun {} +fun [1, 2], option: true, other: 5 +fun +: 1 + +## local call / arguments without parentheses / multiline + +fun [1, 2], + option: true, + other: 5 + +## local call / nested with parentheses + +outer_fun(inner_fun(a)) + +## local call / nested without parentheses (right associativity) + +outer_fun inner_fun a, b +outer_fun inner_fun do: 1 + +## local call / precedence with operator + +outer_fun 1 + 1 +1 + inner_fun 1 +outer_fun 1 + inner_fun 1 +fun 1, 2 |> other_fun + +## local call / treats nonimmediate parentheses as a block argument + +fun (x) + +## remote call / no arguments + +Mod.fun() + +## remote call / no arguments without parentheses + +Mod.fun + +## remote call / arguments in parentheses + +Mod.fun(a) +Mod.fun([1, 2], option: true, other: 5) + +## remote call / arguments without parentheses + +Mod.fun a +Mod.fun [1, 2], option: true, other: 5 + +## remote call / nested with parentheses + +Mod.outer_fun(Mod.inner_fun(a)) + +## remote call / nested without parentheses (right associativity) + +Mod.outer_fun Mod.inner_fun a + +## remote call / precedence with operator + +Mod.outer_fun 1 + 1 +1 + Mod.inner_fun 1 +Mod.outer_fun 1 + Mod.inner_fun 1 + +## remote call / treats nonimmediate parentheses as a block argument + +Mod.fun (x) + +## remote call / multi-level alias + +Mod1.Mod2.Mod3.fun(a) + +## remote call / operator + +Kernel.+(a, b) + +## remote call / quoted function name + +Mod."fun"(a) +Mod.'fun'(a) + +## remote call / atom literal module + +:mod.fun(a) +:"Elixir.Mod".fun(a) + +## anonymous call / no arguments + +fun.() + +## anonymous call / arguments in parentheses + +fun.(a) +fun.([1, 2], option: true, other: 5) + +## anonymous call / nested with parentheses + +outer_fun.(inner_fun.(a)) + +## mixed call types + +Mod.outer_fun mid_fun inner_fun.(a) + +## identifier call + +mod.fun(a) + +## nested identifier call + +map.mod.fun(a) + +## reserved word call + +a.and + +## range call + +(1..2).step +(1..2//3).step + +## multi-expression block call + +( + x + 1..2 +).step + +## map call + +%{}.field + +## struct call + +%Mod{}.field + +## arbitrary term call + +1.(1, 2) + +## escaped newline call + +fun \ +a + +## keyword list trailing separator + +fun(option: true, other: 5,) + +## newline before dot + +Mod + .fun(a) + +## newline after dot + +Mod. + fun(a) + +## access syntax + +map[key] +map[:key] + +## access syntax / does not allow whitespace + +map [key] + +## access syntax / precedence with dot call + +map.map[:key] +map[:mod].fun + +## access syntax / precedence with operators + +-x[:key] +@x[:key] +&x[:key] +&1[:key] + +## double parenthesised call + +fun()() +fun() () +fun(1)(1) +Mod.fun()() +fun.()() + +unquote(name)() + +## [field names] + +fun() +fun a +Mod.fun a +fun()() +fun.() +map[key] diff --git a/test/corpus/expression/capture.ex b/test/corpus/expression/capture.ex new file mode 100644 index 0000000..3e0b80d --- /dev/null +++ b/test/corpus/expression/capture.ex @@ -0,0 +1,28 @@ +## anonymous function + +& &1 + &2 +&(&1 + &2) +&foo(&1, a, &2) + +## argument call + +& &1.some_fun +&(&1.some_fun) +& &1.(&2) + +## remote MFA + +&Mod.fun/1 + +## remote operator MFA + +&Kernel.>=/2 + +## local MFA + +&fun/1 + +## local operator MFA + +&>=/2 +&//2 \ No newline at end of file diff --git a/test/corpus/expression/operator.ex b/test/corpus/expression/operator.ex new file mode 100644 index 0000000..de9bcdd --- /dev/null +++ b/test/corpus/expression/operator.ex @@ -0,0 +1,207 @@ +## unary + +@arg + ++arg +-arg +!arg +^arg +not arg +~~~arg + +&arg + +## binary left associative + +a * b * c +a / b / c + +a + b + c +a - b - c + +a ^^^ b ^^^ c + +a in b in c +a not in b not in c + +a |> b |> c +a <<< b <<< c +a >>> b >>> c +a <<~ b <<~ c +a ~>> b ~>> c +a <~ b <~ c +a ~> b ~> c +a <~> b <~> c +a <|> b <|> c + +a < b < c +a > b > c +a <= b <= c +a >= b >= c + +a == b == c +a != b != c +a =~ b =~ c +a === b === c +a !== b !== c + +a && b && c +a &&& b &&& c +a and b and c + +a || b || c +a ||| b ||| c +a or b or c + +a <- b <- c +a \\ b \\ c + +## binary right associative + +a ++ b ++ c +a -- b -- c +a +++ b +++ c +a --- b --- c +a .. b .. c +a <> b <> c + +a = b = c + +a | b | c + +a :: b :: c + +a when b when c + +## precedence on the same level falls back to associativity + +a * b / c +a + b - c +a in b not in c +a <<< b >>> c +a < b > c +a == b != c +a &&& b && c +a ||| b || c +a <- b \\ c + +a ++ b -- c + +## precedence on different levels + +& @ a - b +a -- b + c +a - b ++ c +a = b <<< c + +a + b * c - d + +## precedence determined by parentheses + +(& a) - b + +(a + b) * (c - d) + +## "not in" spacing + +a not in b + +## "not in" boundary + +fun not inARG + +## multiline / unary + +@ +arg + ++ +arg + +- +arg + +! +arg + +^ +arg + +not +arg + +~~~ +arg + +& +arg + +## multiline / unary over binary + +a ++ +b + +a +- +b + +## multiline / right operands + +x +not in +[y] + +x +not in[y] + +:a +++:b + +:a++ +:b + +## multiline / unary over binary (precedence) + +x +- +y + +x ++ +y + +## plus minus + +x+y +x + y +x+ y + +x +y +x +y +z + + +## nullary range + +.. + +## stepped range + +1 .. 2 // 3 +1..2//3 +0..1//-1 + +## stepped range / multiline + +1..2 +// 4 + +## stepped ranges / blocks + +foo do end..bar do end//baz do end +1..2//3 + +## [field names] + +a + b +@a \ No newline at end of file diff --git a/test/corpus/expression/operator_1_13.ex b/test/corpus/expression/operator_1_13.ex new file mode 100644 index 0000000..b57dbfe --- /dev/null +++ b/test/corpus/expression/operator_1_13.ex @@ -0,0 +1,7 @@ +## binary left associative + +a ** b ** c + +## precedence on different levels + +a ** b + c ** d diff --git a/test/corpus/expression/sigil.ex b/test/corpus/expression/sigil.ex new file mode 100644 index 0000000..c647b36 --- /dev/null +++ b/test/corpus/expression/sigil.ex @@ -0,0 +1,66 @@ +## simple literal + +~s(content) +~r{content} +~w[content] +~a +~b"content" +~c'content' +~d|content| +~e/content/ + +## multiple lines + +~s"line 1 +line 2" + +## interpolation + +~s"hey #{name}!" +~r/hey #{ + name +}!/ +~w{##{name}#} + +## nested interpolation + +~s{this is #{~s{number #{1}}}!} + +## escape sequence + +~s{_\}_\n_\t_\r_\e_\\_\1_\x3f_\u0065\u0301_} + +## escaped interpolation + +~s/\#{1}/ + +## upper sigil / no interpolation + +~S"hey #{name}!" + +## upper sigil / no escape sequence + +~S"\n" + +## upper sigil / escape terminator + +~S"content \" content" +~S{content \} content} +~S/content \/ content/ + +## heredoc delimiter + +~s""" +text +with "quotes" +""" + +~s''' +text +with 'quotes' +''' + +## modifiers + +~r/left|right/i +~r/left|right/iUx diff --git a/test/corpus/expression/sigil_1_13.ex b/test/corpus/expression/sigil_1_13.ex new file mode 100644 index 0000000..07bfb73 --- /dev/null +++ b/test/corpus/expression/sigil_1_13.ex @@ -0,0 +1,4 @@ +## modifiers + +~r/left|right/0 +~r/left|right/u8 diff --git a/test/corpus/expression/sigil_1_15.ex b/test/corpus/expression/sigil_1_15.ex new file mode 100644 index 0000000..23cd511 --- /dev/null +++ b/test/corpus/expression/sigil_1_15.ex @@ -0,0 +1,3 @@ +## upper sigil / multiple characters + +~MAT"1 2" diff --git a/test/corpus/integration/function_definition.ex b/test/corpus/integration/function_definition.ex new file mode 100644 index 0000000..e557c5d --- /dev/null +++ b/test/corpus/integration/function_definition.ex @@ -0,0 +1,90 @@ +## def / no arguments + +def fun() do +end + +## def / no arguments without parentheses + +def fun do +end + +## def / one argument + +def fun(x) do + x +end + +## def / one argument without parentheses + +def fun x do + x +end + +## def / many arguments + +def fun(x, y) do + x + y +end + +## def / many arguments without parentheses + +def fun x, y do + x + y +end + +## def / default arguments + +def fun x, y \\ 1 do + x + y +end + +def fun(x, y \\ 1) do + x + y +end + +## def / keyword do block + +def fun(), do: 1 +def fun(x), do: x + +## def / pattern matching + +def fun([{x, y} | tail]) do + x + y +end + +## def / with guard + +def fun(x) when x == 1 do + x +end + +## def / with guard / multiple guards + +def fun(x) when x > 10 when x < 5 do + x +end + +## defp + +defp fun(x) do + x +end + +## defmacro + +defmacro fun(x) do + quote do + [unquote(x)] + end +end + +## defguard + +defguard is_even(term) when is_integer(term) and rem(term, 2) == 0 + +## def in macro + +def unquote(name)(unquote_splicing(args)) do + unquote(compiled) +end \ No newline at end of file diff --git a/test/corpus/integration/kernel.ex b/test/corpus/integration/kernel.ex new file mode 100644 index 0000000..de94fd6 --- /dev/null +++ b/test/corpus/integration/kernel.ex @@ -0,0 +1,19 @@ +## for / enumerable + +for n <- [1, 2], do: n * 2 + +## for / enumerable / with options and block + +for line <- IO.stream(), into: IO.stream() do + String.upcase(line) +end + +## for / binary + +for <>, c != ?\s, into: "", do: <> + +## for / reduce + +for x <- [1, 2, 1], reduce: %{} do + acc -> Map.update(acc, x, 1, & &1 + 1) +end \ No newline at end of file diff --git a/test/corpus/integration/module_definition.ex b/test/corpus/integration/module_definition.ex new file mode 100644 index 0000000..e3983c5 --- /dev/null +++ b/test/corpus/integration/module_definition.ex @@ -0,0 +1,34 @@ +## empty module definition + +defmodule Mod do +end + +defmodule Mod.Child do +end + +## module definition with atom literal + +defmodule :mod do +end + +## full module definition + +defmodule Mod do + @moduledoc """ + Example module + """ + + use UseMod + + @attribute 1 + + @doc """ + Example function + """ + @spec func(integer) :: integer + def func(x) when is_integer(x) do + priv(x) + priv(x) + end + + defp priv(x), do: x * x +end \ No newline at end of file diff --git a/test/corpus/integration/spec.ex b/test/corpus/integration/spec.ex new file mode 100644 index 0000000..8b2a837 --- /dev/null +++ b/test/corpus/integration/spec.ex @@ -0,0 +1,31 @@ +## without type parentheses + +@spec fun(atom, integer, keyword) :: string + +## with type parentheses + +@spec fun(atom(), integer(), keyword()) :: string() + +## with literals + +@spec fun(%{key: atom}) :: {:ok, atom} | {:error, binary} + +## with function reference + +@spec fun((-> atom), (atom -> integer)) :: integer + +## with remote type + +@spec fun(Keyword.t()) :: String.t() + +## with type guard + +@spec fun(arg1, arg2) :: {arg1, arg2} when arg1: atom, arg2: integer + +## with named arguments + +@spec days_since_epoch(year :: integer, month :: integer, day :: integer) :: integer + +## nonempty list + +@spec fun() :: [integer, ...] diff --git a/test/corpus/semicolon.ex b/test/corpus/semicolon.ex new file mode 100644 index 0000000..6bf1026 --- /dev/null +++ b/test/corpus/semicolon.ex @@ -0,0 +1,12 @@ +## separates expressions in the same line + +1 ; 1 + +## trailing + +1; +2; + +## with comment + +1 ; # comment \ No newline at end of file diff --git a/test/corpus/term/alias.ex b/test/corpus/term/alias.ex new file mode 100644 index 0000000..4a4a7c2 --- /dev/null +++ b/test/corpus/term/alias.ex @@ -0,0 +1,35 @@ +## single part + +Mod +AZ_az_19_ + +## multiple parts + +Mod.Child +Mod.Child.Child + +## spacing + +Mod . Child + +Mod +. +Child + +## qualified tuples + +Mod.{Child1, Child2} + +## dot on identifier + +name.Mod +name.Mod.Child + +## dot on special identifier + +__MODULE__.Child + +(source + (dot + (identifier) + (alias))) \ No newline at end of file diff --git a/test/corpus/term/atom.ex b/test/corpus/term/atom.ex new file mode 100644 index 0000000..5bb8cab --- /dev/null +++ b/test/corpus/term/atom.ex @@ -0,0 +1,25 @@ +## simple literal + +:atom +:_az_AZ_19_ +:nonode@nohost +:bang! +:question? + +## operators + +[:~~~, :~>>, :~>, :|||, :||, :|>, :|, :>>>, :>=, :>, :=~, :===, :==, :=, :<~>, :<~, :<|>, :<>, :<=, :<<~, :<<<, :<-, :<, :+++, :++, :+, :^^^, :^, :&&&, :&&, :&, :\\, :/, :*, :@, :.., :., :!==, :!=, :!, :::, :->, :---, :--, :-] + +## special operator-like atoms + +[:..., :%{}, :{}, :%, :<<>>, :..//] + +## quoted atom + +:"atom ?? !! ' \n" +:'atom ?? !! " \n' + +## interpolation + +:"hey #{name}!" +:'hey #{name}!' \ No newline at end of file diff --git a/test/corpus/term/atom_1_13.ex b/test/corpus/term/atom_1_13.ex new file mode 100644 index 0000000..944f9c4 --- /dev/null +++ b/test/corpus/term/atom_1_13.ex @@ -0,0 +1,3 @@ +## operators + +[:**] diff --git a/test/corpus/term/bitstring.ex b/test/corpus/term/bitstring.ex new file mode 100644 index 0000000..9224746 --- /dev/null +++ b/test/corpus/term/bitstring.ex @@ -0,0 +1,43 @@ +## single item + +<<>> +<<10>> +<<10.0>> +<<"string">> + +## multiple items + +<< + 10, + 10.0, + "string" +>> + +## size modifiers + +<<10::4>> +<<10::size(4)>> + +## multiple modifiers + +<<"string"::utf8-big>> +<<"string"::utf16-big>> +<<"string"::utf32-big>> +<<10::32-little-unsigned>> +<<10::integer-signed-big>> +<<10.10::float-signed-native>> + +## multiple components with modifiers + +<<10::8-native, "string", 3.14::float, a::8, b::binary-size(known_size)>> + +## spacing + +<< + 10 :: 8-native, + b :: binary - size(known_size) +>> + +## trailing separator + +<<1,>> \ No newline at end of file diff --git a/test/corpus/term/boolean.ex b/test/corpus/term/boolean.ex new file mode 100644 index 0000000..60b0137 --- /dev/null +++ b/test/corpus/term/boolean.ex @@ -0,0 +1,4 @@ +## simple literal + +true +false \ No newline at end of file diff --git a/test/corpus/term/char.ex b/test/corpus/term/char.ex new file mode 100644 index 0000000..9427623 --- /dev/null +++ b/test/corpus/term/char.ex @@ -0,0 +1,21 @@ +## regular character + +?a +?Z +?0 +?9 +?_ +?? + +## escaped character + +?\n +?\t +?\s +?\\ +?\a +?\b + +## list of char literals + +[?a, ?b, ?c] \ No newline at end of file diff --git a/test/corpus/term/charlist.ex b/test/corpus/term/charlist.ex new file mode 100644 index 0000000..48e571d --- /dev/null +++ b/test/corpus/term/charlist.ex @@ -0,0 +1,67 @@ +## single line + +'Hello, 123!' + +## multiple lines + +'line 1 +line 2' + +## interpolation + +'hey #{name}!' +'hey #{ + name +}!' +'##{name}#' + +## nested interpolation + +'this is #{'number #{1}'}!' + +## escape sequence + +'_\'_\n_\t_\r_\e_\\_\1_\x3f_\u0065\u0301_' + +## escaped interpolation + +'\#{1}' + +## heredoc / charlist + +''' +text +with 'quotes' +''' + +## heredoc / interpolation + +''' +hey #{name}! +''' + +## heredoc / nested interpolation + +''' +this is #{ + ''' + number #{1} + ''' +}! +''' + +## heredoc / escaped delimiter + +''' +\''' +''' + +''' +\'\'\' +''' + +## heredoc / escaped interpolation + +''' +\#{1} +''' \ No newline at end of file diff --git a/test/corpus/term/float.ex b/test/corpus/term/float.ex new file mode 100644 index 0000000..13830e4 --- /dev/null +++ b/test/corpus/term/float.ex @@ -0,0 +1,17 @@ +## simple literal + +1234567890.1234567890 +-1234567890.1234567890 +-1_234_567_890.123_456_789_0 + +## scientific notation + +1.0e6 +1.0e+6 +1.0e-6 +-1.0e6 +-1.0e+6 +-1.0e-6 +1.0E6 +1.0E+6 +1.0E-6 diff --git a/test/corpus/term/integer.ex b/test/corpus/term/integer.ex new file mode 100644 index 0000000..4f57de5 --- /dev/null +++ b/test/corpus/term/integer.ex @@ -0,0 +1,24 @@ +## decimal + +1234567890 +-1234567890 +1_234_567_890 +019 + +## binary + +0b0101011 +-0b0101011 +0b0_10_10_11 + +## octal + +0o1234567 +-0o1234567 +0o1_23_45_67 + +## hexadecimal + +0x123456789abcdefABCDEF +-0x123456789abcdefABCDEF +0x123456789_abcdef_ABCDEF \ No newline at end of file diff --git a/test/corpus/term/keyword_list.ex b/test/corpus/term/keyword_list.ex new file mode 100644 index 0000000..02e712a --- /dev/null +++ b/test/corpus/term/keyword_list.ex @@ -0,0 +1,38 @@ +## simple literal + +[a: 1, a_b@12?: 2, A_B@12!: 3, Mod: 4, __struct__: 5] + +## trailing separator + +[a: 1,] + +## with leading items + +[1, {:c, 1}, a: 1, b: 2] + +## operator key + +[~~~: 1, ==: 2, >: 3] + +## special atom key + +[...: 1, %{}: 2, {}: 3, %: 4, <<>>: 5, ..//: 6] + +## reserved token key + +[not: 1, and: 2] +[nil: 1, true: 2] + +## quoted key + +[ + "key1 ?? !! ' \n": 1, + 'key2 ?? !! " \n': 2 +] + +## key interpolation + +[ + "hey #{name}!": 1, + 'hey #{name}!': 2 +] \ No newline at end of file diff --git a/test/corpus/term/list.ex b/test/corpus/term/list.ex new file mode 100644 index 0000000..fa525c2 --- /dev/null +++ b/test/corpus/term/list.ex @@ -0,0 +1,18 @@ +## simple literal + +[] +[a] +[A] +[1] +[1, 2] +[1,2] +[ 1 , 2 ] + +## nested + +[[1], 1] + +## trailing separator + +[1,] +[1,2,] diff --git a/test/corpus/term/map.ex b/test/corpus/term/map.ex new file mode 100644 index 0000000..bbb10d0 --- /dev/null +++ b/test/corpus/term/map.ex @@ -0,0 +1,24 @@ +## empty + +%{} + +## from keywords + +%{a: 1, b: 2} + +## from arrow entries + +%{:a => 1, "b" => 2, c => 3} + +## from both arrow entries and keywords + +%{"a" => 1, b: 2, c: 3} + +## trailing separator + +%{"a" => 1,} + +## update syntax + +%{user | name: "Jane", email: "jane@example.com"} +%{user | "name" => "Jane"} diff --git a/test/corpus/term/nil.ex b/test/corpus/term/nil.ex new file mode 100644 index 0000000..7b2686b --- /dev/null +++ b/test/corpus/term/nil.ex @@ -0,0 +1,3 @@ +## simple literal + +nil \ No newline at end of file diff --git a/test/corpus/term/string.ex b/test/corpus/term/string.ex new file mode 100644 index 0000000..71479fb --- /dev/null +++ b/test/corpus/term/string.ex @@ -0,0 +1,96 @@ +## empty + +"" + +## single line + +"Hello, 123!" + +## multiple lines + +"line 1 +line 2" + +## interpolation + +"hey #{name}!" +"hey #{ + name +}!" +"##{name}#" + +## nested interpolation + +"this is #{"number #{1}"}!" + +## empty interpolation + +"#{}" + +## escape sequence + +"_\"_\n_\t_\r_\e_\\_\1_\x3f_\u0065\u0301_" + +## escaped interpolation + +"\#{1}" + +## heredoc / string + +""" +text +with "quotes" +""" + +## heredoc / interpolation + +""" +hey #{name}! +""" + +## heredoc / nested interpolation + +""" +this is #{ + """ + number #{1} + """ +}! +""" + +## heredoc / delimiter in the middle + +""" +hey """ +""" + +## heredoc / escaped newline (ignored) + +""" +hey \ +""" + + """ + hey \ + """ + +""" +hey \ +there +""" + +## heredoc / escaped delimiter + +""" +\""" +""" + +""" +\"\"\" +""" + +## heredoc / escaped interpolation + +""" +\#{1} +""" \ No newline at end of file diff --git a/test/corpus/term/struct.ex b/test/corpus/term/struct.ex new file mode 100644 index 0000000..19bfeac --- /dev/null +++ b/test/corpus/term/struct.ex @@ -0,0 +1,51 @@ +## empty + +%User{} + +## from keywords + +%User{a: 1, b: 2} + +## from arrow entries + +%User{:a => 1, "b" => 2, c => 3} + +## from both arrow entries and keywords + +%User{"a" => 1, b: 2, c: 3} + +## trailing separator + +%User{"a" => 1,} + +## update syntax + +%User{user | name: "Jane", email: "jane@example.com"} +%User{user | "name" => "Jane"} + +## unused struct identifier + +%_{} + +## matching struct identifier + +%name{} + +## pinned struct identifier + +%^name{} + +## with special identifier + +%__MODULE__{} +%__MODULE__.Child{} + +## with atom + +%:"Elixir.Mod"{} + +## with call + +%fun(){} +%Mod.fun(){} +%fun.(){} \ No newline at end of file diff --git a/test/corpus/term/tuple.ex b/test/corpus/term/tuple.ex new file mode 100644 index 0000000..acc3c34 --- /dev/null +++ b/test/corpus/term/tuple.ex @@ -0,0 +1,16 @@ +## simple literal + +{} +{1} +{1, 2} +{1,2} +{ 1 , 2 } + +## nested + +{{1}, 1} + +## trailing separator + +{1,} +{1,2,} diff --git a/test/corpus/unicode.ex b/test/corpus/unicode.ex new file mode 100644 index 0000000..fa5c4cd --- /dev/null +++ b/test/corpus/unicode.ex @@ -0,0 +1,24 @@ +## atom + +:time_μs +:"£" +:'£' +:こんにちは世界 +:Ólá +:olá +:Olá + +## string + +"time_μs" +"£" +"こんにちは世界" +"Ólá" +"olá" +"Olá" + +## variable + +time_μs +こんにちは世界 +olá \ No newline at end of file diff --git a/test/corpus/variable.ex b/test/corpus/variable.ex new file mode 100644 index 0000000..c2e5eeb --- /dev/null +++ b/test/corpus/variable.ex @@ -0,0 +1,25 @@ +## regular + +snake_case +camelCase +az_AZ_19 +bang! +question? +doctest +not1 +notfalse + +## unused + +_ +_number +__TEST__ + +## three dots identifier + +... + +## special identifier + +__MODULE__ +__DIR__ \ No newline at end of file diff --git a/test/range_test.exs b/test/range_test.exs index 1bf6c64..e7e32c9 100644 --- a/test/range_test.exs +++ b/test/range_test.exs @@ -724,24 +724,24 @@ defmodule SourcerorTest.RangeTest do } end - test "should not raise on any three-element tuple parsed by parse_string" do - for relative_path <- Path.wildcard("lib/*/**.ex") do - assert_can_get_ranges(relative_path) - end + test "should never raise" do + ExUnit.CaptureIO.capture_io(:stderr, fn -> + for file <- SourcerorTest.Support.Corpus.all_paths() do + assert :ok = can_get_ranges(file) + end + end) end - defp assert_can_get_ranges(relative_path) do - source = relative_path |> Path.relative_to_cwd() |> File.read!() - quoted = Sourceror.parse_string!(source) - - Sourceror.prewalk(quoted, fn - {_, _, _} = quoted, acc -> + defp can_get_ranges(file) do + with {:ok, source} <- File.read(file), + {:ok, quoted} <- Sourceror.parse_string(source) do + Sourceror.prewalk(quoted, fn quoted, acc -> try do Sourceror.get_range(quoted) rescue e -> flunk(""" - Expected a range from expression in #{relative_path}: + Expected a range from expression (#{file}): #{inspect(quoted)} @@ -752,10 +752,13 @@ defmodule SourcerorTest.RangeTest do end {quoted, acc} + end) - quoted, acc -> - {quoted, acc} - end) + :ok + else + {:error, error} -> + {:error, file, error} + end end end end diff --git a/test/support/corpus.ex b/test/support/corpus.ex new file mode 100644 index 0000000..9a847db --- /dev/null +++ b/test/support/corpus.ex @@ -0,0 +1,35 @@ +defmodule SourcerorTest.Support.Corpus do + @moduledoc false + + @version_requirements [ + {">= 1.11.0", "_1_11.ex"}, + {">= 1.12.0", "_1_12.ex"}, + {">= 1.13.0", "_1_13.ex"}, + {">= 1.14.0", "_1_14.ex"}, + {">= 1.15.0", "_1_15.ex"} + ] + + @doc """ + Return paths to all Elixir files in the corpus. + """ + def all_paths do + ommissions = ommissions() + + "test/corpus/**/*.ex" + |> Path.wildcard() + |> Enum.map(&Path.relative_to_cwd(&1)) + |> Enum.reject(&omit?(ommissions, &1)) + end + + def omit?(ommissions, file) do + Enum.any?(ommissions, &(file =~ &1)) + end + + def ommissions do + version = System.version() + + @version_requirements + |> Enum.filter(fn {requirement, _file} -> not Version.match?(version, requirement) end) + |> Enum.map(&elem(&1, 1)) + end +end