-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Simplify DocumentUri implementation #81951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| using System; | ||
| using System.Diagnostics.CodeAnalysis; | ||
| using Microsoft.CodeAnalysis; | ||
|
|
||
| namespace Roslyn.LanguageServer.Protocol; | ||
|
|
||
|
|
@@ -12,31 +12,19 @@ namespace Roslyn.LanguageServer.Protocol; | |
| /// see https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri | ||
| /// </summary> | ||
| /// <remarks> | ||
| /// While .NET has a type represent URIs (System.Uri), we do not want to use this type directly in | ||
| /// serialization and deserialization. System.Uri does full parsing and validation on the URI upfront, so | ||
| /// any issues in the uri format will cause deserialization to fail and bypass any of our error recovery. | ||
| /// While .NET has a type represent URIs (System.Uri), we do not want to use this type directly in serialization and | ||
| /// deserialization. System.Uri does full parsing and validation on the URI upfront, so any issues in the uri format | ||
| /// will cause deserialization to fail and bypass any of our error recovery. | ||
| /// | ||
| /// Compounding this problem, System.Uri will fail to parse various RFC spec valid URIs. | ||
| /// In order to gracefully handle these issues, we defer the parsing of the URI until someone | ||
| /// actually asks for it (and can handle the failure). | ||
| /// Compounding this problem, System.Uri will fail to parse various RFC spec valid URIs. In order to gracefully handle | ||
| /// these issues, we defer the parsing of the URI until someone actually asks for it (and can handle the failure). | ||
| /// </remarks> | ||
| internal sealed class DocumentUri : IEquatable<DocumentUri> | ||
| internal sealed record class DocumentUri(string UriString) | ||
| { | ||
| private readonly Lazy<Uri?> _parsedUriLazy; | ||
| private Optional<Uri> _parsedUri; | ||
|
|
||
| public DocumentUri(string uriString) | ||
| { | ||
| UriString = uriString; | ||
| _parsedUriLazy = new(() => ParseUri(UriString)); | ||
| } | ||
|
|
||
| public DocumentUri(Uri parsedUri) | ||
| { | ||
| UriString = parsedUri.AbsoluteUri; | ||
| _parsedUriLazy = new(() => parsedUri); | ||
| } | ||
|
|
||
| public string UriString { get; } | ||
| public DocumentUri(Uri parsedUri) : this(parsedUri.AbsoluteUri) | ||
| => _parsedUri = parsedUri; | ||
|
|
||
| /// <summary> | ||
| /// Gets the parsed System.Uri for the URI string. | ||
|
|
@@ -45,13 +33,20 @@ public DocumentUri(Uri parsedUri) | |
| /// Null if the URI string is not parse-able with System.Uri. | ||
| /// </returns> | ||
| /// <remarks> | ||
| /// Invalid RFC spec URI strings are not parse-able as so will return null here. | ||
| /// However, System.Uri can also fail to parse certain valid RFC spec URI strings. | ||
| /// Invalid RFC spec URI strings are not parse-able as so will return null here. However, System.Uri can also fail | ||
| /// to parse certain valid RFC spec URI strings. | ||
| /// | ||
| /// For example, any URI containing a 'sub-delims' character in the host name | ||
| /// is a valid RFC spec URI, but will fail with System.Uri | ||
| /// For example, any URI containing a 'sub-delims' character in the host name is a valid RFC spec URI, but will fail | ||
| /// with System.Uri | ||
| /// </remarks> | ||
| public Uri? ParsedUri => _parsedUriLazy.Value; | ||
| public Uri? ParsedUri | ||
| { | ||
| get | ||
| { | ||
| _parsedUri = _parsedUri.HasValue ? _parsedUri : ParseUri(UriString); | ||
| return _parsedUri.Value; | ||
| } | ||
| } | ||
|
|
||
| private static Uri? ParseUri(string uriString) | ||
| { | ||
|
|
@@ -68,83 +63,59 @@ public DocumentUri(Uri parsedUri) | |
|
|
||
| public override string ToString() => UriString; | ||
|
|
||
| public override bool Equals([NotNullWhen(true)] object? obj) => obj is DocumentUri other && this.Equals(other); | ||
|
|
||
| public bool Equals(DocumentUri otherUri) | ||
| { | ||
| // 99% of the time the equivalent URIs will have equivalent URI strings, as the client is expected to be consistent in how it sends the URIs to the server, | ||
| // either always encoded or always unencoded. | ||
| // See https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri | ||
| if (otherUri is null) | ||
| return false; | ||
|
|
||
| // 99% of the time the equivalent URIs will have equivalent URI strings, as the client is expected to be | ||
| // consistent in how it sends the URIs to the server, either always encoded or always unencoded. See | ||
| // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri | ||
| if (this.UriString == otherUri.UriString) | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| // If either of the URIs cannot be parsed | ||
| // Bail if we cannot parse either of the URIs. We already determined the URI strings are not equal and we need | ||
| // to be able to parse the URIs to do deeper equivalency checks. | ||
| if (otherUri.ParsedUri is null || this.ParsedUri is null) | ||
| { | ||
| // Bail if we cannot parse either of the URIs. We already determined the URI strings are not equal | ||
| // and we need to be able to parse the URIs to do deeper equivalency checks. | ||
| return false; | ||
| } | ||
|
|
||
| // Next we compare the parsed URIs to handle various casing and encoding scenarios (for example - different schemes may handle casing differently). | ||
| // Next we compare the parsed URIs to handle various casing and encoding scenarios (for example - different | ||
| // schemes may handle casing differently). | ||
|
|
||
| // Uri.Equals will not always consider a percent encoded URI equal to an unencoded URI, even if they point to the same resource. | ||
| // As above, the client is supposed to be consistent in which kind of URI they send. | ||
| // Uri.Equals will not always consider a percent encoded URI equal to an unencoded URI, even if they point to | ||
| // the same resource. As above, the client is supposed to be consistent in which kind of URI they send. | ||
| // | ||
| // However, there are rare cases where we are comparing an unencoded URI to an encoded URI and should consider them | ||
| // equivalent if they point to the same file path. | ||
| // For example - say the client generally sends us the unencoded URI. When we serialize URIs back to the client, we always serialize the AbsoluteUri property (see FromUri). | ||
| // The AbsoluteUri property is *always* percent encoded - if this URI gets sent back to us as part of a data object on a request (e.g. codelens/resolve), the client will leave | ||
| // the URI untouched, even if they generally send unencoded URIs. In such cases we need to consider the encoded and unencoded URI equivalent. | ||
| // However, there are rare cases where we are comparing an unencoded URI to an encoded URI and should consider | ||
| // them equivalent if they point to the same file path. For example - say the client generally sends us the | ||
| // unencoded URI. When we serialize URIs back to the client, we always serialize the AbsoluteUri property (see | ||
| // FromUri). The AbsoluteUri property is *always* percent encoded - if this URI gets sent back to us as part of | ||
| // a data object on a request (e.g. codelens/resolve), the client will leave the URI untouched, even if they | ||
| // generally send unencoded URIs. In such cases we need to consider the encoded and unencoded URI equivalent. | ||
| // | ||
| // To handle that, we first compare the AbsoluteUri properties on both, which are always percent encoded. | ||
| if (this.ParsedUri.IsAbsoluteUri && otherUri.ParsedUri.IsAbsoluteUri && this.ParsedUri.AbsoluteUri == otherUri.ParsedUri.AbsoluteUri) | ||
| { | ||
| return true; | ||
| } | ||
| else | ||
| { | ||
| return Uri.Equals(this.ParsedUri, otherUri.ParsedUri); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note Uri.Equals doesn't exist. it calls through to object.Equals
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am pretty sure Uri.Equals exists and I am pretty sure we need to use it in some cases to handle casing - https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.Uri/src/System/Uri.cs#L1745 |
||
| } | ||
| return (this.ParsedUri.IsAbsoluteUri && otherUri.ParsedUri.IsAbsoluteUri && this.ParsedUri.AbsoluteUri == otherUri.ParsedUri.AbsoluteUri) || | ||
| Equals(this.ParsedUri, otherUri.ParsedUri); | ||
| } | ||
|
|
||
| public override int GetHashCode() | ||
| { | ||
| // We can't do anything better than the uri string hash code if we cannot parse the URI. | ||
| if (this.ParsedUri is null) | ||
| { | ||
| // We can't do anything better than the uri string hash code if we cannot parse the URI. | ||
| return this.UriString.GetHashCode(); | ||
| } | ||
|
|
||
| if (this.ParsedUri.IsAbsoluteUri) | ||
| { | ||
| // Since the Uri type does not consider an encoded Uri equal to an unencoded Uri, we need to handle this ourselves. | ||
| // The AbsoluteUri property is always encoded, so we can use this to compare the URIs (see Equals above). | ||
| // | ||
| // However, depending on the kind of URI, case sensitivity in AbsoluteUri should be ignored. | ||
| // Uri.GetHashCode normally handles this internally, but the parameters it uses to determine which comparison to use are not exposed. | ||
| // | ||
| // Instead, we will always create the hash code ignoring case, and will rely on the Equals implementation | ||
| // to handle collisions (between two Uris with different casing). This should be very rare in practice. | ||
| // Collisions can happen for non UNC URIs (e.g. `git:/blah` vs `git:/Blah`). | ||
| return StringComparer.OrdinalIgnoreCase.GetHashCode(this.ParsedUri.AbsoluteUri); | ||
| } | ||
| else | ||
| { | ||
| return this.ParsedUri.GetHashCode(); | ||
| } | ||
| // Since the Uri type does not consider an encoded Uri equal to an unencoded Uri, we need to handle this | ||
| // ourselves. The AbsoluteUri property is always encoded, so we can use this to compare the URIs (see Equals | ||
| // above). | ||
| // | ||
| // However, depending on the kind of URI, case sensitivity in AbsoluteUri should be ignored. Uri.GetHashCode | ||
| // normally handles this internally, but the parameters it uses to determine which comparison to use are not | ||
| // exposed. | ||
| // | ||
| // Instead, we will always create the hash code ignoring case, and will rely on the Equals implementation to | ||
| // handle collisions (between two Uris with different casing). This should be very rare in practice. Collisions | ||
| // can happen for non UNC URIs (e.g. `git:/blah` vs `git:/Blah`). | ||
| return this.ParsedUri.IsAbsoluteUri | ||
| ? StringComparer.OrdinalIgnoreCase.GetHashCode(this.ParsedUri.AbsoluteUri) | ||
| : this.ParsedUri.GetHashCode(); | ||
| } | ||
|
|
||
| public static bool operator ==(DocumentUri? uri1, DocumentUri? uri2) | ||
| => (uri1, uri2) switch | ||
| { | ||
| (null, null) => true, | ||
| (null, _) or (_, null) => false, | ||
| _ => uri1.Equals(uri2) | ||
| }; | ||
|
|
||
| public static bool operator !=(DocumentUri? uri1, DocumentUri? uri2) | ||
| => !(uri1 == uri2); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is always correct. Nothing is constraining the
System.Urito be an absolute uri,, it could be relative andAbsoluteUriwill throwI think that also even round tripping an absolute URI like this could change how it is parsed - for example possibly percent encoding the Uri twice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i def agree. though that was the existing logic:
Any thoughts on what we should do?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
edit - the first part of my statement was wrong. it looks like its already doing
AbsoluteUribelow. But I believe the second still applies. I'm not sure there is a guarantee thatUri.AbsoluteUri == new Uri(Uri.AbsoluteUr)).AbsoluteUriThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to keep the
DocumentUri(Uri parsedUri)ctor, and assign theUrito theOptional<Uri>like we did before?For the absolute part, it must be the callers that guarantee an absolute URI. I would be an ok with an explicit throw if not absolute here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ooh. very good catch. i actually had that, but accidentally deleted it. it def should remain!