From 5ef93322d8d5fb5f5dba4d59f41034644cd3a9fd Mon Sep 17 00:00:00 2001 From: Jan Provaznik Date: Tue, 3 Feb 2026 12:18:31 +0100 Subject: [PATCH] Add GetCanonicalForm with span-based detection - Implement GetCanonicalForm() to match Path.GetFullPath behavior - Use span-based scanning for efficient relative segment detection - Handle all separator combinations on Windows (/, \, mixed) - Detect consecutive separators for normalization - Avoid false positives on hidden files (.nuget, .git, .hidden, etc.) - Add comprehensive unit tests for all scenarios --- src/Framework.UnitTests/AbsolutePath_Tests.cs | 196 ++++++++++++++++++ src/Framework/PathHelpers/AbsolutePath.cs | 117 +++++++++++ 2 files changed, 313 insertions(+) diff --git a/src/Framework.UnitTests/AbsolutePath_Tests.cs b/src/Framework.UnitTests/AbsolutePath_Tests.cs index b890d9938bc..f6d9f867e89 100644 --- a/src/Framework.UnitTests/AbsolutePath_Tests.cs +++ b/src/Framework.UnitTests/AbsolutePath_Tests.cs @@ -213,5 +213,201 @@ public void AbsolutePath_UnixPathValidation_ShouldAcceptOnlyTrueAbsolutePaths(st { ValidatePathAcceptance(path, shouldBeAccepted); } + + #region GetCanonicalForm Tests + + [Fact] + public void GetCanonicalForm_DefaultInstance_ShouldReturnSameInstance() + { + var absolutePath = default(AbsolutePath); + var result = absolutePath.GetCanonicalForm(); + + result.ShouldBe(absolutePath); + } + + [Fact] + public void GetCanonicalForm_EmptyPath_ShouldReturnSameInstance() + { + var absolutePath = new AbsolutePath(string.Empty, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + + result.ShouldBe(absolutePath); + } + + [WindowsOnlyTheory] + // Current directory segments with pure separators + [InlineData("C:\\foo\\.\\bar")] // Backslash: current directory + [InlineData("C:/foo/./bar")] // Forward slash: current directory + // Parent directory segments with pure separators + [InlineData("C:\\foo\\..\\bar")] // Backslash: parent directory + [InlineData("C:/foo/../bar")] // Forward slash: parent directory + // Mixed separators with relative segments + [InlineData("C:\\foo\\./bar")] // Backslash then forward: current + [InlineData("C:/foo/.\\bar")] // Forward then backslash: current + [InlineData("C:\\foo\\../bar")] // Backslash then forward: parent + [InlineData("C:/foo/..\\bar")] // Forward then backslash: parent + // Trailing relative segments + [InlineData("C:\\foo\\bar\\.")] // Trailing current directory + [InlineData("C:\\foo\\bar\\..")] // Trailing parent directory + [InlineData("C:/foo/bar/.")] // Trailing current (forward slash) + [InlineData("C:/foo/bar/..")] // Trailing parent (forward slash) + // Root-level relative segments + [InlineData("C:\\.")] // Current dir at root + [InlineData("C:\\..")] // Parent dir at root + [InlineData("C:/.")] // Current dir at root (forward slash) + [InlineData("C:/..")] // Parent dir at root (forward slash) + // Separator normalization only (no relative segments) + [InlineData("C:/foo/bar")] // Forward slashes need normalization + public void GetCanonicalForm_WindowsPathNormalization_ShouldMatchPathGetFullPath(string inputPath) + { + ValidateGetCanonicalFormMatchesSystem(inputPath); + } + + [WindowsOnlyTheory] + // Hidden files/folders - should NOT trigger normalization (false positive prevention) + [InlineData("C:\\.hidden")] // Hidden file at root + [InlineData("C:\\foo\\.hidden")] // Hidden file in folder + [InlineData("C:\\foo\\.hidden\\bar")] // Hidden folder + [InlineData("C:\\.nuget\\packages")] // .nuget folder + [InlineData("C:\\.config\\settings")] // .config folder + [InlineData("C:\\foo\\.git\\config")] // .git folder + [InlineData("C:\\foo\\.vs\\settings")] // .vs folder + // Files starting with dots but not relative segments + [InlineData("C:\\foo\\.gitignore")] // .gitignore file + [InlineData("C:\\foo\\.editorconfig")] // .editorconfig file + [InlineData("C:\\foo\\...")] // Triple dot (not relative) + [InlineData("C:\\foo\\....")] // Quad dot (not relative) + [InlineData("C:\\foo\\.hidden.txt")] // Hidden file with extension + public void GetCanonicalForm_WindowsHiddenFiles_ShouldReturnSameInstance(string inputPath) + { + var absolutePath = new AbsolutePath(inputPath, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + + // Should return the exact same instance (no normalization needed) + ReferenceEquals(result.Value, absolutePath.Value).ShouldBeTrue( + $"Path '{inputPath}' should not trigger normalization but GetCanonicalForm returned a different instance"); + result.Value.ShouldBe(inputPath); + } + + [WindowsOnlyTheory] + // Simple paths already in canonical form + [InlineData("C:\\foo\\bar")] // Standard Windows path + [InlineData("C:\\")] // Root only + [InlineData("D:\\folder\\subfolder\\file.txt")] // Deep path + public void GetCanonicalForm_WindowsAlreadyCanonical_ShouldReturnSameInstance(string inputPath) + { + var absolutePath = new AbsolutePath(inputPath, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + + ReferenceEquals(result.Value, absolutePath.Value).ShouldBeTrue( + $"Path '{inputPath}' is already canonical but GetCanonicalForm returned a different instance"); + } + + [UnixOnlyTheory] + // Current directory segments + [InlineData("/foo/./bar")] // Current directory reference + // Parent directory segments + [InlineData("/foo/../bar")] // Parent directory reference + // Trailing relative segments + [InlineData("/foo/bar/.")] // Trailing current directory + [InlineData("/foo/bar/..")] // Trailing parent directory + // Root-level relative segments + [InlineData("/.")] // Current dir at root + [InlineData("/..")] // Parent dir at root + // Multiple relative segments + [InlineData("/foo/./bar/../baz")] // Mixed current and parent + public void GetCanonicalForm_UnixPathNormalization_ShouldMatchPathGetFullPath(string inputPath) + { + ValidateGetCanonicalFormMatchesSystem(inputPath); + } + + [UnixOnlyTheory] + // Hidden files/folders - should NOT trigger normalization + [InlineData("/.hidden")] // Hidden file at root + [InlineData("/foo/.hidden")] // Hidden file in folder + [InlineData("/foo/.hidden/bar")] // Hidden folder + [InlineData("/.nuget/packages")] // .nuget folder + [InlineData("/.config/settings")] // .config folder + [InlineData("/foo/.git/config")] // .git folder + [InlineData("/foo/.local/share")] // .local folder + // Files starting with dots but not relative segments + [InlineData("/foo/.gitignore")] // .gitignore file + [InlineData("/foo/.bashrc")] // .bashrc file + [InlineData("/foo/...")] // Triple dot (not relative) + [InlineData("/foo/....")] // Quad dot (not relative) + [InlineData("/foo/.hidden.txt")] // Hidden file with extension + // Backslash in Unix paths (part of filename, not separator) + [InlineData("/foo/bar\\baz")] // Backslash is part of name + [InlineData("/foo/.\\hidden")] // Backslash after dot (not a separator on Unix) + public void GetCanonicalForm_UnixHiddenFiles_ShouldReturnSameInstance(string inputPath) + { + var absolutePath = new AbsolutePath(inputPath, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + + ReferenceEquals(result.Value, absolutePath.Value).ShouldBeTrue( + $"Path '{inputPath}' should not trigger normalization but GetCanonicalForm returned a different instance"); + result.Value.ShouldBe(inputPath); + } + + [UnixOnlyTheory] + // Simple paths already in canonical form + [InlineData("/foo/bar")] // Standard Unix path + [InlineData("/")] // Root only + [InlineData("/home/user/documents/file.txt")] // Deep path + public void GetCanonicalForm_UnixAlreadyCanonical_ShouldReturnSameInstance(string inputPath) + { + var absolutePath = new AbsolutePath(inputPath, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + + ReferenceEquals(result.Value, absolutePath.Value).ShouldBeTrue( + $"Path '{inputPath}' is already canonical but GetCanonicalForm returned a different instance"); + } + + [Fact] + public void GetCanonicalForm_ShouldPreserveOriginalValue() + { + string originalValue = "original/relative/path"; + string absoluteValue = NativeMethods.IsWindows ? "C:\\foo\\.\\bar" : "/foo/./bar"; + + var absolutePath = new AbsolutePath(absoluteValue, originalValue, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + + // Original value should be preserved even after canonicalization + result.OriginalValue.ShouldBe(originalValue); + } + + [WindowsOnlyTheory] + // UNC paths + [InlineData("\\\\server\\share\\path")] // Basic UNC path + [InlineData("\\\\server\\share\\.\\path")] // UNC with current dir segment + [InlineData("\\\\server\\share\\..\\path")] // UNC with parent dir segment + public void GetCanonicalForm_WindowsUNCPaths_ShouldMatchPathGetFullPath(string inputPath) + { + ValidateGetCanonicalFormMatchesSystem(inputPath); + } + + [WindowsOnlyTheory] + // Multiple consecutive separators + [InlineData("C:\\foo\\\\bar")] // Double backslash + [InlineData("C://foo//bar")] // Double forward slash + public void GetCanonicalForm_MultipleConsecutiveSeparators_ShouldMatchPathGetFullPath(string inputPath) + { + ValidateGetCanonicalFormMatchesSystem(inputPath); + } + + private static void ValidateGetCanonicalFormMatchesSystem(string inputPath) + { + var absolutePath = new AbsolutePath(inputPath, ignoreRootedCheck: true); + var result = absolutePath.GetCanonicalForm(); + var systemResult = Path.GetFullPath(inputPath); + + // Should match Path.GetFullPath behavior exactly + result.Value.ShouldBe(systemResult); + + // Should preserve original value + result.OriginalValue.ShouldBe(absolutePath.OriginalValue); + } + + #endregion } } diff --git a/src/Framework/PathHelpers/AbsolutePath.cs b/src/Framework/PathHelpers/AbsolutePath.cs index 8a625ad6a4f..e1ace8bf87b 100644 --- a/src/Framework/PathHelpers/AbsolutePath.cs +++ b/src/Framework/PathHelpers/AbsolutePath.cs @@ -126,6 +126,123 @@ public AbsolutePath(string path, AbsolutePath basePath) /// The path to convert. public static implicit operator string(AbsolutePath path) => path.Value; + /// + /// Returns the canonical form of this path. + /// + /// + /// An representing the canonical form of the path. + /// + /// + /// + /// The canonical form of a path is exactly what would produce, + /// with the following properties: + /// + /// All relative path segments ("." and "..") are resolved. + /// Directory separators are normalized to the platform convention (backslash on Windows). + /// + /// + /// + /// If the path is already in canonical form, returns the current instance to avoid unnecessary allocations. + /// Preserves the OriginalValue of the current instance. + /// + /// + internal AbsolutePath GetCanonicalForm() + { + if (string.IsNullOrEmpty(Value)) + { + return this; + } + + bool needsNormalization = HasRelativeSegmentOrConsecutiveSeparators(Value.AsSpan(), out bool hasAltSeparator); + + // Check if directory separator normalization is required (only on Windows: "/" to "\"). + // On Unix "\" is not a valid path separator, but is a part of the file/directory name, so no normalization is needed. + if (!needsNormalization && NativeMethods.IsWindows && hasAltSeparator) + { + needsNormalization = true; + } + + if (!needsNormalization) + { + return this; + } + + // Use Path.GetFullPath to resolve relative segments and normalize separators. + // Skip validation since Path.GetFullPath already ensures the result is absolute. + return new AbsolutePath(Path.GetFullPath(Value), OriginalValue, ignoreRootedCheck: true); + } + + /// + /// Scans for path segments that are exactly "." or ".." (relative segments), + /// or consecutive directory separators. + /// Handles all separator combinations on Windows (/, \, and mixed). + /// + /// The path to scan. + /// Set to true if an alternate separator (/) is found on Windows. + /// True if the path needs normalization. + private static bool HasRelativeSegmentOrConsecutiveSeparators(ReadOnlySpan path, out bool hasAltSeparator) + { + hasAltSeparator = false; + bool previousWasSeparator = false; + + for (int i = 0; i < path.Length; i++) + { + char c = path[i]; + bool isSeparator = IsSeparator(c); + + if (isSeparator) + { + // Track if we've seen an alternate separator (for Windows normalization) + if (c == Path.AltDirectorySeparatorChar) + { + hasAltSeparator = true; + } + + // Check for consecutive separators (but skip UNC path prefix \\server) + if (previousWasSeparator && i > 1) + { + return true; + } + + // Check for "/." or "/..": separator followed by one or two dots, then separator or end. + int nextPos = i + 1; + if (nextPos < path.Length && path[nextPos] == '.') + { + int afterDots = nextPos + 1; + + // Check for "/." (single dot segment) + if (afterDots == path.Length || IsSeparator(path[afterDots])) + { + return true; + } + + // Check for "/.." (double dot segment) + if (path[afterDots] == '.') + { + int afterTwoDots = afterDots + 1; + if (afterTwoDots == path.Length || IsSeparator(path[afterTwoDots])) + { + return true; + } + } + } + } + + previousWasSeparator = isSeparator; + } + + return false; + } + + /// + /// Checks if a character is a directory separator. + /// On Windows, both '/' and '\' are separators. On Unix, only '/' is a separator. + /// + private static bool IsSeparator(char c) + { + return c == Path.DirectorySeparatorChar || c == Path.AltDirectorySeparatorChar; + } + /// /// Determines whether two instances are equal. ///