From 206ed215ebc5e897f6c91cf50b1823ce2728d06f Mon Sep 17 00:00:00 2001 From: Lukasz Samson Date: Fri, 13 Oct 2023 21:25:04 +0200 Subject: [PATCH] Make position translation to UTF16 more robust When diagnostics were emitted for a previous version of the file the positions may no longer be valid Fixes https://github.com/elixir-lsp/elixir-ls/issues/1002 --- .../lib/language_server/diagnostics.ex | 75 +++++++++++-------- .../lib/language_server/source_file.ex | 59 ++++++++++----- apps/language_server/test/dialyzer_test.exs | 20 ++--- apps/language_server/test/server_test.exs | 2 +- .../language_server/test/source_file_test.exs | 40 ++++++++-- 5 files changed, 127 insertions(+), 69 deletions(-) diff --git a/apps/language_server/lib/language_server/diagnostics.ex b/apps/language_server/lib/language_server/diagnostics.ex index 5e94f61c8..abe141a8c 100644 --- a/apps/language_server/lib/language_server/diagnostics.ex +++ b/apps/language_server/lib/language_server/diagnostics.ex @@ -291,24 +291,37 @@ defmodule ElixirLS.LanguageServer.Diagnostics do # https://microsoft.github.io/language-server-protocol/specifications/specification-3-16/#diagnostic # position is a 1 based line number - # we return a range of trimmed text in that line - defp range(position, source_file) - when is_integer(position) and position >= 1 and not is_nil(source_file) do + # we return a 0 length range at first non whitespace character in line + defp range(line_start, source_file) + when is_integer(line_start) and not is_nil(source_file) do # line is 1 based - line = position - 1 - text = Enum.at(SourceFile.lines(source_file), line) || "" + lines = SourceFile.lines(source_file) - start_idx = String.length(text) - String.length(String.trim_leading(text)) + 1 - length = max(String.length(String.trim(text)), 1) + {line_start_lsp, char_start_lsp} = + if line_start > 0 do + case Enum.at(lines, line_start - 1) do + nil -> + # position is outside file range - this will return end of the file + SourceFile.elixir_position_to_lsp(lines, {line_start, 1}) + + line -> + # find first non whitespace character in line + start_idx = String.length(line) - String.length(String.trim_leading(line)) + 1 + {line_start - 1, SourceFile.elixir_character_to_lsp(line, start_idx)} + end + else + # return begin of the file + {0, 0} + end %{ "start" => %{ - "line" => line, - "character" => SourceFile.elixir_character_to_lsp(text, start_idx) + "line" => line_start_lsp, + "character" => char_start_lsp }, "end" => %{ - "line" => line, - "character" => SourceFile.elixir_character_to_lsp(text, start_idx + length) + "line" => line_start_lsp, + "character" => char_start_lsp } } end @@ -316,21 +329,20 @@ defmodule ElixirLS.LanguageServer.Diagnostics do # position is a 1 based line number and 0 based character cursor (UTF8) # we return a 0 length range exactly at that location defp range({line_start, char_start}, source_file) - when line_start >= 1 and not is_nil(source_file) do + when not is_nil(source_file) do lines = SourceFile.lines(source_file) - # line is 1 based - start_line = Enum.at(lines, line_start - 1) - # SourceFile.elixir_character_to_lsp assumes char to be 1 based but it's 0 based here - character = SourceFile.elixir_character_to_lsp(start_line, char_start + 1) + # elixir_position_to_lsp will handle positions outside file range + {line_start_lsp, char_start_lsp} = + SourceFile.elixir_position_to_lsp(lines, {line_start, char_start - 1}) %{ "start" => %{ - "line" => line_start - 1, - "character" => character + "line" => line_start_lsp, + "character" => char_start_lsp }, "end" => %{ - "line" => line_start - 1, - "character" => character + "line" => line_start_lsp, + "character" => char_start_lsp } } end @@ -338,29 +350,28 @@ defmodule ElixirLS.LanguageServer.Diagnostics do # position is a range defined by 1 based line numbers and 0 based character cursors (UTF8) # we return exactly that range defp range({line_start, char_start, line_end, char_end}, source_file) - when line_start >= 1 and line_end >= 1 and not is_nil(source_file) do + when not is_nil(source_file) do lines = SourceFile.lines(source_file) - # line is 1 based - start_line = Enum.at(lines, line_start - 1) - end_line = Enum.at(lines, line_end - 1) + # elixir_position_to_lsp will handle positions outside file range + {line_start_lsp, char_start_lsp} = + SourceFile.elixir_position_to_lsp(lines, {line_start, char_start - 1}) - # SourceFile.elixir_character_to_lsp assumes char to be 1 based but it's 0 based here - start_char = SourceFile.elixir_character_to_lsp(start_line, char_start + 1) - end_char = SourceFile.elixir_character_to_lsp(end_line, char_end + 1) + {line_end_lsp, char_end_lsp} = + SourceFile.elixir_position_to_lsp(lines, {line_end, char_end - 1}) %{ "start" => %{ - "line" => line_start - 1, - "character" => start_char + "line" => line_start_lsp, + "character" => char_start_lsp }, "end" => %{ - "line" => line_end - 1, - "character" => end_char + "line" => line_end_lsp, + "character" => char_end_lsp } } end - # source file is unknown, position is 0 or invalid + # source file is unknown # we discard any position information as it is meaningless # unfortunately LSP does not allow `null` range so we need to return something defp range(_, _) do diff --git a/apps/language_server/lib/language_server/source_file.ex b/apps/language_server/lib/language_server/source_file.ex index 04e7049d8..eab8cf708 100644 --- a/apps/language_server/lib/language_server/source_file.ex +++ b/apps/language_server/lib/language_server/source_file.ex @@ -286,24 +286,29 @@ defmodule ElixirLS.LanguageServer.SourceFile do utf8_character + 1 end - def lsp_position_to_elixir(_urf8_text, {lsp_line, _lsp_character}) when lsp_line < 0, + def lsp_position_to_elixir(_urf8_text_or_lines, {lsp_line, _lsp_character}) when lsp_line < 0, do: {1, 1} - def lsp_position_to_elixir(_urf8_text, {lsp_line, lsp_character}) when lsp_character <= 0, - do: {max(lsp_line + 1, 1), 1} + def lsp_position_to_elixir(_urf8_text_or_lines, {lsp_line, lsp_character}) + when lsp_character <= 0, + do: {max(lsp_line + 1, 1), 1} - def lsp_position_to_elixir(urf8_text, {lsp_line, lsp_character}) do - source_file_lines = lines(urf8_text) - total_lines = length(source_file_lines) + def lsp_position_to_elixir(urf8_text, {lsp_line, lsp_character}) when is_binary(urf8_text) do + lsp_position_to_elixir(lines(urf8_text), {lsp_line, lsp_character}) + end + + def lsp_position_to_elixir([_ | _] = urf8_lines, {lsp_line, lsp_character}) + when lsp_line >= 0 do + total_lines = length(urf8_lines) if lsp_line > total_lines - 1 do # sanitize to position after last char in last line - {total_lines, String.length(source_file_lines |> Enum.at(total_lines - 1)) + 1} + last_line = Enum.at(urf8_lines, total_lines - 1) + elixir_last_character = String.length(last_line) + 1 + {total_lines, elixir_last_character} else - utf8_character = - source_file_lines - |> Enum.at(lsp_line) - |> lsp_character_to_elixir(lsp_character) + line = Enum.at(urf8_lines, lsp_line) + utf8_character = lsp_character_to_elixir(line, lsp_character) {lsp_line + 1, utf8_character} end @@ -319,17 +324,35 @@ defmodule ElixirLS.LanguageServer.SourceFile do |> div(2) end - def elixir_position_to_lsp(_urf8_text, {elixir_line, elixir_character}) + def elixir_position_to_lsp(_urf8_text_or_lines, {elixir_line, _elixir_character}) + when elixir_line < 1, + do: {0, 0} + + def elixir_position_to_lsp(_urf8_text_or_lines, {elixir_line, elixir_character}) when elixir_character <= 1, do: {max(elixir_line - 1, 0), 0} - def elixir_position_to_lsp(urf8_text, {elixir_line, elixir_character}) do - line = - lines(urf8_text) - |> Enum.at(max(elixir_line - 1, 0)) + def elixir_position_to_lsp(urf8_text, {elixir_line, elixir_character}) + when is_binary(urf8_text) do + elixir_position_to_lsp(lines(urf8_text), {elixir_line, elixir_character}) + end - utf16_character = elixir_character_to_lsp(line || "", elixir_character) + def elixir_position_to_lsp([_ | _] = urf8_lines, {elixir_line, elixir_character}) + when elixir_line >= 1 do + total_lines = length(urf8_lines) - {elixir_line - 1, utf16_character} + if elixir_line > total_lines do + # sanitize to position after last char in last line + last_line = Enum.at(urf8_lines, total_lines - 1) + elixir_last_character = String.length(last_line) + 1 + + utf16_character = elixir_character_to_lsp(last_line, elixir_last_character) + {total_lines - 1, utf16_character} + else + line = Enum.at(urf8_lines, max(elixir_line - 1, 0)) + utf16_character = elixir_character_to_lsp(line, elixir_character) + + {elixir_line - 1, utf16_character} + end end end diff --git a/apps/language_server/test/dialyzer_test.exs b/apps/language_server/test/dialyzer_test.exs index fe3052f92..8a8e8bf2c 100644 --- a/apps/language_server/test/dialyzer_test.exs +++ b/apps/language_server/test/dialyzer_test.exs @@ -59,7 +59,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message1, "range" => %{ - "end" => %{"character" => 12, "line" => 1}, + "end" => %{"character" => 2, "line" => 1}, "start" => %{"character" => 2, "line" => 1} }, "severity" => 2, @@ -68,7 +68,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message2, "range" => %{ - "end" => %{"character" => 17, "line" => 2}, + "end" => %{"character" => 4, "line" => 2}, "start" => %{"character" => 4, "line" => 2} }, "severity" => 2, @@ -171,7 +171,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message1, "range" => %{ - "end" => %{"character" => 12, "line" => 1}, + "end" => %{"character" => 2, "line" => 1}, "start" => %{"character" => 2, "line" => 1} }, "severity" => 2, @@ -180,7 +180,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message2, "range" => %{ - "end" => %{"character" => 17, "line" => 2}, + "end" => %{"character" => 4, "line" => 2}, "start" => %{"character" => 4, "line" => 2} }, "severity" => 2, @@ -219,7 +219,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message1, "range" => %{ - "end" => %{"character" => 12, "line" => 1}, + "end" => %{"character" => 2, "line" => 1}, "start" => %{"character" => 2, "line" => 1} }, "severity" => 2, @@ -228,7 +228,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message2, "range" => %{ - "end" => %{"character" => 17, "line" => 2}, + "end" => %{"character" => 4, "line" => 2}, "start" => %{"character" => 4, "line" => 2} }, "severity" => 2, @@ -257,7 +257,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message1, "range" => %{ - "end" => %{"character" => 12, "line" => 1}, + "end" => %{"character" => 2, "line" => 1}, "start" => %{"character" => 2, "line" => 1} }, "severity" => 2, @@ -266,7 +266,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => _error_message2, "range" => %{ - "end" => %{"character" => 17, "line" => 2}, + "end" => %{"character" => 4, "line" => 2}, "start" => %{"character" => 4, "line" => 2} }, "severity" => 2, @@ -296,7 +296,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message1, "range" => %{ - "end" => %{"character" => 22, "line" => 1}, + "end" => %{"character" => 2, "line" => 1}, "start" => %{"character" => 2, "line" => 1} }, "severity" => 2, @@ -305,7 +305,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do %{ "message" => error_message2, "range" => %{ - "end" => %{"character" => 22, "line" => 2}, + "end" => %{"character" => 4, "line" => 2}, "start" => %{"character" => 4, "line" => 2} }, "severity" => 2, diff --git a/apps/language_server/test/server_test.exs b/apps/language_server/test/server_test.exs index f622e4cbc..52c86c1bc 100644 --- a/apps/language_server/test/server_test.exs +++ b/apps/language_server/test/server_test.exs @@ -1663,7 +1663,7 @@ defmodule ElixirLS.LanguageServer.ServerTest do %{ "message" => "undefined function does_not_exist/0" <> _, "range" => %{ - "end" => %{"character" => 20, "line" => 3}, + "end" => %{"character" => 4, "line" => 3}, "start" => %{"character" => 4, "line" => 3} }, "severity" => 1, diff --git a/apps/language_server/test/source_file_test.exs b/apps/language_server/test/source_file_test.exs index 9d58f4c17..a564b06ca 100644 --- a/apps/language_server/test/source_file_test.exs +++ b/apps/language_server/test/source_file_test.exs @@ -655,6 +655,14 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do assert {1, 2} == SourceFile.lsp_position_to_elixir("abcde", {0, 1}) end + test "lsp_position_to_elixir single line utf8" do + assert {1, 2} == SourceFile.lsp_position_to_elixir("🏳️‍🌈abcde", {0, 6}) + end + + test "lsp_position_to_elixir multi line" do + assert {2, 2} == SourceFile.lsp_position_to_elixir("abcde\n1234", {1, 1}) + end + # This is not specified in LSP but some clients fail to synchronize text properly test "lsp_position_to_elixir single line before line start" do assert {1, 1} == SourceFile.lsp_position_to_elixir("abcde", {0, -1}) @@ -667,14 +675,6 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do assert {1, 1} == SourceFile.lsp_position_to_elixir("", {0, 15}) end - test "lsp_position_to_elixir single line utf8" do - assert {1, 2} == SourceFile.lsp_position_to_elixir("🏳️‍🌈abcde", {0, 6}) - end - - test "lsp_position_to_elixir multi line" do - assert {2, 2} == SourceFile.lsp_position_to_elixir("abcde\n1234", {1, 1}) - end - # This is not specified in LSP but some clients fail to synchronize text properly test "lsp_position_to_elixir multi line before first line" do assert {1, 1} == SourceFile.lsp_position_to_elixir("abcde\n1234", {-1, 2}) @@ -705,6 +705,30 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do assert {1, 1} == SourceFile.elixir_position_to_lsp("abcde\n1234", {2, 2}) end + # This is not specified in LSP but some clients fail to synchronize text properly + test "elixir_position_to_lsp single line before line start" do + assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde", {1, -1}) + assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde", {1, 0}) + end + + # LSP spec 3.17 https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position + # position character If the character value is greater than the line length it defaults back to the line length + test "elixir_position_to_lsp single line after line end" do + assert {0, 5} == SourceFile.elixir_position_to_lsp("abcde", {1, 15}) + assert {0, 0} == SourceFile.elixir_position_to_lsp("", {1, 15}) + end + + # This is not specified in LSP but some clients fail to synchronize text properly + test "elixir_position_to_lsp multi line before first line" do + assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde\n1234", {-1, 2}) + assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde\n1234", {0, 2}) + end + + # This is not specified in LSP but some clients fail to synchronize text properly + test "elixir_position_to_lsp multi line after last line" do + assert {1, 4} == SourceFile.elixir_position_to_lsp("abcde\n1234", {8, 2}) + end + test "sanity check" do text = "aąłsd🏳️‍🌈abcde"