Skip to content

Commit

Permalink
Improve performance of Tar library (dotnet#74281)
Browse files Browse the repository at this point in the history
* Avoid unnecessary byte[] allocations

* Remove unnecessary use of FileStreamOptions

* Clean up Dispose{Async} implementations

* Clean up unnecessary consts

Not a perf thing, just readability.

* Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary async variants, and overhaul GenerateExtendedAttributesDataStream

* Avoid string allocations in ReadMagicAttribute

* Avoid allocation in WriteAsOctal

* Improve handling of octal

* Avoid allocation for version string

* Removing boxing and char string allocation in GenerateExtendedAttributeName

* Fix a couple unnecessary dictionary lookups

* Replace Enum.HasFlag usage

* Remove allocations from Write{Posix}Name

* Replace ArrayPool use with string.Create

* Replace more superfluous ArrayPool usage

* Remove ArrayPool use from System.IO.Compression.ZipFile

* Fix inverted condition

* Use generic math to parse octal

* Remove allocations from StringReader and string.Split

* Remove magic string allocation for Ustar when not V7

* Remove file name and directory name allocation in GenerateExtendedAttributeName
  • Loading branch information
stephentoub authored and carlossanlop committed Aug 23, 2022
1 parent abc9acd commit 987d39f
Show file tree
Hide file tree
Showing 12 changed files with 511 additions and 576 deletions.
41 changes: 25 additions & 16 deletions src/libraries/Common/src/System/IO/Archiving.Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ internal static partial class ArchivingUtils
private const char PathSeparatorChar = '/';
private const string PathSeparatorString = "/";

public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false)
public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false)
{
Debug.Assert(length <= entry.Length - offset);
Debug.Assert(buffer != null);

// Remove any leading slashes from the entry name:
while (length > 0)
Expand All @@ -32,26 +31,36 @@ public static string EntryFromPath(string entry, int offset, int length, ref cha
}

if (length == 0)
{
return appendPathSeparator ? PathSeparatorString : string.Empty;
}

int resultLength = appendPathSeparator ? length + 1 : length;
EnsureCapacity(ref buffer, resultLength);
entry.CopyTo(offset, buffer, 0, length);

// '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux)
// We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is
// explicitly trying to standardize to '/'
for (int i = 0; i < length; i++)
if (appendPathSeparator)
{
char ch = buffer[i];
if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar)
buffer[i] = PathSeparatorChar;
length++;
}

if (appendPathSeparator)
buffer[length] = PathSeparatorChar;
return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) =>
{
state.entry.AsSpan(state.offset).CopyTo(dest);
// '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux)
// We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is
// explicitly trying to standardize to '/'
for (int i = 0; i < dest.Length; i++)
{
char ch = dest[i];
if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar)
{
dest[i] = PathSeparatorChar;
}
}
return new string(buffer, 0, resultLength);
if (state.appendPathSeparator)
{
dest[^1] = PathSeparatorChar;
}
});
}

public static void EnsureCapacity(ref char[] buffer, int min)
Expand Down
3 changes: 3 additions & 0 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -255,4 +255,7 @@
<data name="IO_SeekBeforeBegin" xml:space="preserve">
<value>An attempt was made to move the position before the beginning of the stream.</value>
</data>
<data name="TarInvalidNumber" xml:space="preserve">
<value>Unable to parse number.</value>
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) =>
// the substream is just 'a chunk' of the super-stream
protected override void Dispose(bool disposing)
{
if (disposing && !_isDisposed)
{
_isDisposed = true;
}
_isDisposed = true;
base.Dispose(disposing);
}
}
Expand Down
53 changes: 15 additions & 38 deletions src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@ namespace System.Formats.Tar
/// </summary>
public static class TarFile
{
// Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely
// to be greater than the length of typical entry names from the file system, even
// on non-Windows platforms. The capacity will be increased, if needed.
private const int DefaultCapacity = 260;

/// <summary>
/// Creates a tar stream that contains all the filesystem entries from the specified directory.
/// </summary>
Expand Down Expand Up @@ -283,23 +278,14 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre
DirectoryInfo di = new(sourceDirectoryName);
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);

char[] entryNameBuffer = ArrayPool<char>.Shared.Rent(DefaultCapacity);

try
if (includeBaseDirectory)
{
if (includeBaseDirectory)
{
writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer));
}

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer));
}
writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name));
}
finally

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
ArrayPool<char>.Shared.Return(entryNameBuffer);
writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length));
}
}
}
Expand Down Expand Up @@ -339,23 +325,14 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector
DirectoryInfo di = new(sourceDirectoryName);
string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory);

char[] entryNameBuffer = ArrayPool<char>.Shared.Rent(DefaultCapacity);

try
if (includeBaseDirectory)
{
if (includeBaseDirectory)
{
await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false);
}

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false);
}
await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false);
}
finally

foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories))
{
ArrayPool<char>.Shared.Return(entryNameBuffer);
await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false);
}
}
}
Expand All @@ -365,18 +342,18 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i
includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName;

// Constructs the entry name used for a filesystem entry when creating an archive.
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer)
private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength)
{
int entryNameLength = file.FullName.Length - basePathLength;
Debug.Assert(entryNameLength > 0);

bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory);
return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory);
bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0;
return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory);
}

private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer)
private static string GetEntryNameForBaseDirectory(string name)
{
return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true);
return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true);
}

// Extracts an archive into the specified directory.
Expand Down
Loading

0 comments on commit 987d39f

Please sign in to comment.