Skip to content

Commit

Permalink
Make position translation to UTF16 more robust
Browse files Browse the repository at this point in the history
When diagnostics were emitted for a previous version of the file the positions may no longer be valid
Fixes #1002
  • Loading branch information
lukaszsamson committed Oct 13, 2023
1 parent 42f16b8 commit 206ed21
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 69 deletions.
75 changes: 43 additions & 32 deletions apps/language_server/lib/language_server/diagnostics.ex
Original file line number Diff line number Diff line change
Expand Up @@ -291,76 +291,87 @@ defmodule ElixirLS.LanguageServer.Diagnostics do
# https://microsoft.github.io/language-server-protocol/specifications/specification-3-16/#diagnostic

# position is a 1 based line number
# we return a range of trimmed text in that line
defp range(position, source_file)
when is_integer(position) and position >= 1 and not is_nil(source_file) do
# we return a 0 length range at first non whitespace character in line
defp range(line_start, source_file)
when is_integer(line_start) and not is_nil(source_file) do
# line is 1 based
line = position - 1
text = Enum.at(SourceFile.lines(source_file), line) || ""
lines = SourceFile.lines(source_file)

start_idx = String.length(text) - String.length(String.trim_leading(text)) + 1
length = max(String.length(String.trim(text)), 1)
{line_start_lsp, char_start_lsp} =
if line_start > 0 do
case Enum.at(lines, line_start - 1) do
nil ->
# position is outside file range - this will return end of the file
SourceFile.elixir_position_to_lsp(lines, {line_start, 1})

line ->
# find first non whitespace character in line
start_idx = String.length(line) - String.length(String.trim_leading(line)) + 1
{line_start - 1, SourceFile.elixir_character_to_lsp(line, start_idx)}
end
else
# return begin of the file
{0, 0}
end

%{
"start" => %{
"line" => line,
"character" => SourceFile.elixir_character_to_lsp(text, start_idx)
"line" => line_start_lsp,
"character" => char_start_lsp
},
"end" => %{
"line" => line,
"character" => SourceFile.elixir_character_to_lsp(text, start_idx + length)
"line" => line_start_lsp,
"character" => char_start_lsp
}
}
end

# position is a 1 based line number and 0 based character cursor (UTF8)
# we return a 0 length range exactly at that location
defp range({line_start, char_start}, source_file)
when line_start >= 1 and not is_nil(source_file) do
when not is_nil(source_file) do
lines = SourceFile.lines(source_file)
# line is 1 based
start_line = Enum.at(lines, line_start - 1)
# SourceFile.elixir_character_to_lsp assumes char to be 1 based but it's 0 based here
character = SourceFile.elixir_character_to_lsp(start_line, char_start + 1)
# elixir_position_to_lsp will handle positions outside file range
{line_start_lsp, char_start_lsp} =
SourceFile.elixir_position_to_lsp(lines, {line_start, char_start - 1})

%{
"start" => %{
"line" => line_start - 1,
"character" => character
"line" => line_start_lsp,
"character" => char_start_lsp
},
"end" => %{
"line" => line_start - 1,
"character" => character
"line" => line_start_lsp,
"character" => char_start_lsp
}
}
end

# position is a range defined by 1 based line numbers and 0 based character cursors (UTF8)
# we return exactly that range
defp range({line_start, char_start, line_end, char_end}, source_file)
when line_start >= 1 and line_end >= 1 and not is_nil(source_file) do
when not is_nil(source_file) do
lines = SourceFile.lines(source_file)
# line is 1 based
start_line = Enum.at(lines, line_start - 1)
end_line = Enum.at(lines, line_end - 1)
# elixir_position_to_lsp will handle positions outside file range
{line_start_lsp, char_start_lsp} =
SourceFile.elixir_position_to_lsp(lines, {line_start, char_start - 1})

# SourceFile.elixir_character_to_lsp assumes char to be 1 based but it's 0 based here
start_char = SourceFile.elixir_character_to_lsp(start_line, char_start + 1)
end_char = SourceFile.elixir_character_to_lsp(end_line, char_end + 1)
{line_end_lsp, char_end_lsp} =
SourceFile.elixir_position_to_lsp(lines, {line_end, char_end - 1})

%{
"start" => %{
"line" => line_start - 1,
"character" => start_char
"line" => line_start_lsp,
"character" => char_start_lsp
},
"end" => %{
"line" => line_end - 1,
"character" => end_char
"line" => line_end_lsp,
"character" => char_end_lsp
}
}
end

# source file is unknown, position is 0 or invalid
# source file is unknown
# we discard any position information as it is meaningless
# unfortunately LSP does not allow `null` range so we need to return something
defp range(_, _) do
Expand Down
59 changes: 41 additions & 18 deletions apps/language_server/lib/language_server/source_file.ex
Original file line number Diff line number Diff line change
Expand Up @@ -286,24 +286,29 @@ defmodule ElixirLS.LanguageServer.SourceFile do
utf8_character + 1
end

def lsp_position_to_elixir(_urf8_text, {lsp_line, _lsp_character}) when lsp_line < 0,
def lsp_position_to_elixir(_urf8_text_or_lines, {lsp_line, _lsp_character}) when lsp_line < 0,
do: {1, 1}

def lsp_position_to_elixir(_urf8_text, {lsp_line, lsp_character}) when lsp_character <= 0,
do: {max(lsp_line + 1, 1), 1}
def lsp_position_to_elixir(_urf8_text_or_lines, {lsp_line, lsp_character})
when lsp_character <= 0,
do: {max(lsp_line + 1, 1), 1}

def lsp_position_to_elixir(urf8_text, {lsp_line, lsp_character}) do
source_file_lines = lines(urf8_text)
total_lines = length(source_file_lines)
def lsp_position_to_elixir(urf8_text, {lsp_line, lsp_character}) when is_binary(urf8_text) do
lsp_position_to_elixir(lines(urf8_text), {lsp_line, lsp_character})
end

def lsp_position_to_elixir([_ | _] = urf8_lines, {lsp_line, lsp_character})
when lsp_line >= 0 do
total_lines = length(urf8_lines)

if lsp_line > total_lines - 1 do
# sanitize to position after last char in last line
{total_lines, String.length(source_file_lines |> Enum.at(total_lines - 1)) + 1}
last_line = Enum.at(urf8_lines, total_lines - 1)
elixir_last_character = String.length(last_line) + 1
{total_lines, elixir_last_character}
else
utf8_character =
source_file_lines
|> Enum.at(lsp_line)
|> lsp_character_to_elixir(lsp_character)
line = Enum.at(urf8_lines, lsp_line)
utf8_character = lsp_character_to_elixir(line, lsp_character)

{lsp_line + 1, utf8_character}
end
Expand All @@ -319,17 +324,35 @@ defmodule ElixirLS.LanguageServer.SourceFile do
|> div(2)
end

def elixir_position_to_lsp(_urf8_text, {elixir_line, elixir_character})
def elixir_position_to_lsp(_urf8_text_or_lines, {elixir_line, _elixir_character})
when elixir_line < 1,
do: {0, 0}

def elixir_position_to_lsp(_urf8_text_or_lines, {elixir_line, elixir_character})
when elixir_character <= 1,
do: {max(elixir_line - 1, 0), 0}

def elixir_position_to_lsp(urf8_text, {elixir_line, elixir_character}) do
line =
lines(urf8_text)
|> Enum.at(max(elixir_line - 1, 0))
def elixir_position_to_lsp(urf8_text, {elixir_line, elixir_character})
when is_binary(urf8_text) do
elixir_position_to_lsp(lines(urf8_text), {elixir_line, elixir_character})
end

utf16_character = elixir_character_to_lsp(line || "", elixir_character)
def elixir_position_to_lsp([_ | _] = urf8_lines, {elixir_line, elixir_character})
when elixir_line >= 1 do
total_lines = length(urf8_lines)

{elixir_line - 1, utf16_character}
if elixir_line > total_lines do
# sanitize to position after last char in last line
last_line = Enum.at(urf8_lines, total_lines - 1)
elixir_last_character = String.length(last_line) + 1

utf16_character = elixir_character_to_lsp(last_line, elixir_last_character)
{total_lines - 1, utf16_character}
else
line = Enum.at(urf8_lines, max(elixir_line - 1, 0))
utf16_character = elixir_character_to_lsp(line, elixir_character)

{elixir_line - 1, utf16_character}
end
end
end
20 changes: 10 additions & 10 deletions apps/language_server/test/dialyzer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message1,
"range" => %{
"end" => %{"character" => 12, "line" => 1},
"end" => %{"character" => 2, "line" => 1},
"start" => %{"character" => 2, "line" => 1}
},
"severity" => 2,
Expand All @@ -68,7 +68,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message2,
"range" => %{
"end" => %{"character" => 17, "line" => 2},
"end" => %{"character" => 4, "line" => 2},
"start" => %{"character" => 4, "line" => 2}
},
"severity" => 2,
Expand Down Expand Up @@ -171,7 +171,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message1,
"range" => %{
"end" => %{"character" => 12, "line" => 1},
"end" => %{"character" => 2, "line" => 1},
"start" => %{"character" => 2, "line" => 1}
},
"severity" => 2,
Expand All @@ -180,7 +180,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message2,
"range" => %{
"end" => %{"character" => 17, "line" => 2},
"end" => %{"character" => 4, "line" => 2},
"start" => %{"character" => 4, "line" => 2}
},
"severity" => 2,
Expand Down Expand Up @@ -219,7 +219,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message1,
"range" => %{
"end" => %{"character" => 12, "line" => 1},
"end" => %{"character" => 2, "line" => 1},
"start" => %{"character" => 2, "line" => 1}
},
"severity" => 2,
Expand All @@ -228,7 +228,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message2,
"range" => %{
"end" => %{"character" => 17, "line" => 2},
"end" => %{"character" => 4, "line" => 2},
"start" => %{"character" => 4, "line" => 2}
},
"severity" => 2,
Expand Down Expand Up @@ -257,7 +257,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message1,
"range" => %{
"end" => %{"character" => 12, "line" => 1},
"end" => %{"character" => 2, "line" => 1},
"start" => %{"character" => 2, "line" => 1}
},
"severity" => 2,
Expand All @@ -266,7 +266,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => _error_message2,
"range" => %{
"end" => %{"character" => 17, "line" => 2},
"end" => %{"character" => 4, "line" => 2},
"start" => %{"character" => 4, "line" => 2}
},
"severity" => 2,
Expand Down Expand Up @@ -296,7 +296,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message1,
"range" => %{
"end" => %{"character" => 22, "line" => 1},
"end" => %{"character" => 2, "line" => 1},
"start" => %{"character" => 2, "line" => 1}
},
"severity" => 2,
Expand All @@ -305,7 +305,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
%{
"message" => error_message2,
"range" => %{
"end" => %{"character" => 22, "line" => 2},
"end" => %{"character" => 4, "line" => 2},
"start" => %{"character" => 4, "line" => 2}
},
"severity" => 2,
Expand Down
2 changes: 1 addition & 1 deletion apps/language_server/test/server_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -1663,7 +1663,7 @@ defmodule ElixirLS.LanguageServer.ServerTest do
%{
"message" => "undefined function does_not_exist/0" <> _,
"range" => %{
"end" => %{"character" => 20, "line" => 3},
"end" => %{"character" => 4, "line" => 3},
"start" => %{"character" => 4, "line" => 3}
},
"severity" => 1,
Expand Down
40 changes: 32 additions & 8 deletions apps/language_server/test/source_file_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,14 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do
assert {1, 2} == SourceFile.lsp_position_to_elixir("abcde", {0, 1})
end

test "lsp_position_to_elixir single line utf8" do
assert {1, 2} == SourceFile.lsp_position_to_elixir("🏳️‍🌈abcde", {0, 6})
end

test "lsp_position_to_elixir multi line" do
assert {2, 2} == SourceFile.lsp_position_to_elixir("abcde\n1234", {1, 1})
end

# This is not specified in LSP but some clients fail to synchronize text properly
test "lsp_position_to_elixir single line before line start" do
assert {1, 1} == SourceFile.lsp_position_to_elixir("abcde", {0, -1})
Expand All @@ -667,14 +675,6 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do
assert {1, 1} == SourceFile.lsp_position_to_elixir("", {0, 15})
end

test "lsp_position_to_elixir single line utf8" do
assert {1, 2} == SourceFile.lsp_position_to_elixir("🏳️‍🌈abcde", {0, 6})
end

test "lsp_position_to_elixir multi line" do
assert {2, 2} == SourceFile.lsp_position_to_elixir("abcde\n1234", {1, 1})
end

# This is not specified in LSP but some clients fail to synchronize text properly
test "lsp_position_to_elixir multi line before first line" do
assert {1, 1} == SourceFile.lsp_position_to_elixir("abcde\n1234", {-1, 2})
Expand Down Expand Up @@ -705,6 +705,30 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do
assert {1, 1} == SourceFile.elixir_position_to_lsp("abcde\n1234", {2, 2})
end

# This is not specified in LSP but some clients fail to synchronize text properly
test "elixir_position_to_lsp single line before line start" do
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde", {1, -1})
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde", {1, 0})
end

# LSP spec 3.17 https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
# position character If the character value is greater than the line length it defaults back to the line length
test "elixir_position_to_lsp single line after line end" do
assert {0, 5} == SourceFile.elixir_position_to_lsp("abcde", {1, 15})
assert {0, 0} == SourceFile.elixir_position_to_lsp("", {1, 15})
end

# This is not specified in LSP but some clients fail to synchronize text properly
test "elixir_position_to_lsp multi line before first line" do
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde\n1234", {-1, 2})
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde\n1234", {0, 2})
end

# This is not specified in LSP but some clients fail to synchronize text properly
test "elixir_position_to_lsp multi line after last line" do
assert {1, 4} == SourceFile.elixir_position_to_lsp("abcde\n1234", {8, 2})
end

test "sanity check" do
text = "aąłsd🏳️‍🌈abcde"

Expand Down

0 comments on commit 206ed21

Please sign in to comment.