diff --git a/apps/language_server/lib/language_server/experimental/code_unit.ex b/apps/language_server/lib/language_server/experimental/code_unit.ex index 14ad0b822..96ab4a658 100644 --- a/apps/language_server/lib/language_server/experimental/code_unit.ex +++ b/apps/language_server/lib/language_server/experimental/code_unit.ex @@ -44,12 +44,12 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnit do @spec to_utf8(String.t(), utf16_code_unit()) :: {:ok, utf8_code_unit()} | error def to_utf8(binary, utf16_unit) do - do_to_utf8(binary, utf16_unit + 1, 0) + do_to_utf8(binary, utf16_unit, 0) end @spec to_utf16(String.t(), utf8_code_unit()) :: {:ok, utf16_code_unit()} | error def to_utf16(binary, utf16_unit) do - do_to_utf16(binary, utf16_unit + 1, 0) + do_to_utf16(binary, utf16_unit, 0) end def count(:utf16, binary) do @@ -98,7 +98,7 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnit do end defp do_to_utf16(_, 0, utf16_unit) do - {:ok, utf16_unit - 1} + {:ok, utf16_unit} end defp do_to_utf16(_, utf8_unit, _) when utf8_unit < 0 do @@ -152,7 +152,7 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnit do end defp do_to_utf8(_, 0, utf8_unit) do - {:ok, utf8_unit - 1} + {:ok, utf8_unit} end defp do_to_utf8(_, utf_16_units, _) when utf_16_units < 0 do diff --git a/apps/language_server/lib/language_server/experimental/source_file/conversions.ex b/apps/language_server/lib/language_server/experimental/source_file/conversions.ex index 8a9f424e8..c019b74fb 100644 --- a/apps/language_server/lib/language_server/experimental/source_file/conversions.ex +++ b/apps/language_server/lib/language_server/experimental/source_file/conversions.ex @@ -59,10 +59,6 @@ defmodule ElixirLS.LanguageServer.Experimental.SourceFile.Conversions do position end - def to_elixir(%LSPosition{} = position, %SourceFile{} = source_file) do - to_elixir(position, source_file.document) - end - def to_elixir(%LSPosition{} = position, %Document{} = document) do document_size = Document.size(document) # we need to handle out of bounds line numbers, because it's possible to build a document @@ -85,17 +81,8 @@ defmodule ElixirLS.LanguageServer.Experimental.SourceFile.Conversions do {:ok, ElixirPosition.new(elixir_line_number, 0)} true -> - with {:ok, line} <- Document.fetch_line(document, elixir_line_number) do - elixir_character = - case line do - line(ascii?: true, text: text) -> - min(ls_character, byte_size(text)) - - line(text: text) -> - {:ok, utf16_text} = to_utf16(text) - lsp_character_to_elixir(utf16_text, ls_character) - end - + with {:ok, line} <- Document.fetch_line(document, elixir_line_number), + {:ok, elixir_character} <- extract_elixir_character(position, line) do {:ok, ElixirPosition.new(elixir_line_number, elixir_character)} end end @@ -127,20 +114,11 @@ defmodule ElixirLS.LanguageServer.Experimental.SourceFile.Conversions do end def to_lsp(%ElixirPosition{} = position, %Document{} = document) do - %ElixirPosition{character: elixir_character, line: elixir_line} = position + with {:ok, line} <- Document.fetch_line(document, position.line), + {:ok, lsp_character} <- extract_lsp_character(position, line) do + ls_pos = + LSPosition.new(character: lsp_character, line: position.line - @elixir_ls_index_base) - with {:ok, line} <- Document.fetch_line(document, elixir_line) do - lsp_character = - case line do - line(ascii?: true, text: text) -> - min(position.character, byte_size(text)) - - line(text: utf8_text) -> - {:ok, character} = elixir_character_to_lsp(utf8_text, elixir_character) - character - end - - ls_pos = LSPosition.new(character: lsp_character, line: elixir_line - @elixir_ls_index_base) {:ok, ls_pos} end end @@ -151,19 +129,27 @@ defmodule ElixirLS.LanguageServer.Experimental.SourceFile.Conversions do # Private - defp extract_lsp_character(%ElixirPosition{} = position, line(ascii?: true)) do - {:ok, position.character} + defp extract_lsp_character(%ElixirPosition{} = position, line(ascii?: true, text: text)) do + character = min(position.character, byte_size(text)) + {:ok, character} end defp extract_lsp_character(%ElixirPosition{} = position, line(text: utf8_text)) do - {:ok, CodeUnit.utf16_offset(utf8_text, position.character)} + with {:ok, code_unit} <- CodeUnit.to_utf16(utf8_text, position.character) do + character = min(code_unit, CodeUnit.count(:utf16, utf8_text)) + {:ok, character} + end end - defp extract_elixir_character(%LSPosition{} = position, line(ascii?: true)) do - {:ok, position.character} + defp extract_elixir_character(%LSPosition{} = position, line(ascii?: true, text: text)) do + character = min(position.character, byte_size(text)) + {:ok, character} end defp extract_elixir_character(%LSPosition{} = position, line(text: utf8_text)) do - {:ok, CodeUnit.utf8_offset(utf8_text, position.character)} + with {:ok, code_unit} <- CodeUnit.to_utf8(utf8_text, position.character) do + character = min(code_unit, byte_size(utf8_text)) + {:ok, character} + end end end diff --git a/apps/language_server/test/experimental/code_unit_test.exs b/apps/language_server/test/experimental/code_unit_test.exs index 69b5ac985..06b602065 100644 --- a/apps/language_server/test/experimental/code_unit_test.exs +++ b/apps/language_server/test/experimental/code_unit_test.exs @@ -64,6 +64,7 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do end test "handles multi-byte characters properly" do + # guitar is 2 code units in utf16 but 4 in utf8 line = "b🎸abc" assert 0 == utf16_offset(line, 0) assert 1 == utf16_offset(line, 1) @@ -77,19 +78,21 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do describe "converting to utf8" do test "bounds are respected" do - assert {:error, :out_of_bounds} = to_utf16("h", 1) + assert {:error, :out_of_bounds} = to_utf16("h", 2) end test "with a multi-byte character" do line = "🏳️‍🌈" + code_unit_count = count_utf8_code_units(line) - assert to_utf8(line, 0) == {:error, :misaligned} - assert to_utf8(line, 1) == {:ok, 3} - assert to_utf8(line, 2) == {:ok, 6} - assert to_utf8(line, 3) == {:ok, 9} - assert to_utf8(line, 4) == {:error, :misaligned} - assert to_utf8(line, 5) == {:ok, code_unit_count - 1} + assert to_utf8(line, 0) == {:ok, 0} + assert to_utf8(line, 1) == {:error, :misaligned} + assert to_utf8(line, 2) == {:ok, 4} + assert to_utf8(line, 3) == {:ok, 7} + assert to_utf8(line, 4) == {:ok, 10} + assert to_utf8(line, 5) == {:error, :misaligned} + assert to_utf8(line, 6) == {:ok, code_unit_count} end test "after a unicode character" do @@ -99,8 +102,8 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do assert to_utf8(line, 1) == {:ok, 1} assert to_utf8(line, 4) == {:ok, 4} assert to_utf8(line, 5) == {:ok, 5} - assert to_utf8(line, 6) == {:error, :misaligned} - assert to_utf8(line, 7) == {:ok, 9} + assert to_utf8(line, 6) == {:ok, 6} + assert to_utf8(line, 7) == {:error, :misaligned} # after the guitar character assert to_utf8(line, 8) == {:ok, 10} assert to_utf8(line, 9) == {:ok, 11} @@ -114,24 +117,27 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do describe "converting to utf16" do test "respects bounds" do - assert {:error, :out_of_bounds} = to_utf16("h", 1) + assert {:error, :out_of_bounds} = to_utf16("h", 2) end test "with a multi-byte character" do line = "🏳️‍🌈" + code_unit_count = count_utf16_code_units(line) utf8_code_unit_count = count_utf8_code_units(line) - assert to_utf16(line, 0) == {:error, :misaligned} + assert to_utf16(line, 0) == {:ok, 0} assert to_utf16(line, 1) == {:error, :misaligned} assert to_utf16(line, 2) == {:error, :misaligned} - assert to_utf16(line, 3) == {:ok, 1} - assert to_utf16(line, 4) == {:error, :misaligned} - assert to_utf16(line, utf8_code_unit_count - 1) == {:ok, code_unit_count - 1} + assert to_utf16(line, 3) == {:error, :misaligned} + assert to_utf16(line, 4) == {:ok, 2} + assert to_utf16(line, utf8_code_unit_count - 1) == {:error, :misaligned} + assert to_utf16(line, utf8_code_unit_count) == {:ok, code_unit_count} end test "after a multi-byte character" do line = " {\"🎸\", \"ok\"}" + utf16_code_unit_count = count_utf16_code_units(line) utf8_code_unit_count = count_utf8_code_units(line) @@ -140,11 +146,12 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do assert to_utf16(line, index) == {:ok, index} end - assert to_utf16(line, 6) == {:error, :misaligned} + assert to_utf16(line, 6) == {:ok, 6} assert to_utf16(line, 7) == {:error, :misaligned} assert to_utf16(line, 8) == {:error, :misaligned} + assert to_utf16(line, 9) == {:error, :misaligned} - for index <- 9..17 do + for index <- 10..19 do assert to_utf16(line, index) == {:ok, index - 2} end @@ -157,11 +164,11 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do utf8_code_unit_count = count_utf8_code_units(s) utf16_unit_count = count_utf16_code_units(s) - assert {:ok, utf16_unit} = to_utf16(s, utf8_code_unit_count - 1) - assert utf16_unit == utf16_unit_count - 1 + assert {:ok, utf16_unit} = to_utf16(s, utf8_code_unit_count) + assert utf16_unit == utf16_unit_count assert {:ok, utf8_unit} = to_utf8(s, utf16_unit) - assert utf8_unit == utf8_code_unit_count - 1 + assert utf8_unit == utf8_code_unit_count end end @@ -170,11 +177,11 @@ defmodule ElixirLS.LanguageServer.Experimental.CodeUnitTest do utf16_code_unit_count = count_utf16_code_units(s) utf8_code_unit_count = count_utf8_code_units(s) - assert {:ok, utf8_code_unit} = to_utf8(s, utf16_code_unit_count - 1) - assert utf8_code_unit == utf8_code_unit_count - 1 + assert {:ok, utf8_code_unit} = to_utf8(s, utf16_code_unit_count) + assert utf8_code_unit == utf8_code_unit_count assert {:ok, utf16_unit} = to_utf16(s, utf8_code_unit) - assert utf16_unit == utf16_code_unit_count - 1 + assert utf16_unit == utf16_code_unit_count end end