From d52d83689ff77f65290bb5ecc045361087120d4e Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 11:15:28 -0400 Subject: [PATCH 01/21] Avoid unnecessary byte[] allocations --- .../src/System/Formats/Tar/TarHeader.Read.cs | 92 +++++++++---------- .../src/System/Formats/Tar/TarHeader.Write.cs | 24 ++++- .../src/System/Formats/Tar/TarWriter.cs | 15 ++- 3 files changed, 77 insertions(+), 54 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e89c9d4579dd3..01bbfbe0b376d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -518,11 +518,16 @@ private void ReadUstarAttributes(Span buffer) // Throws if end of stream is reached or if an attribute is malformed. private void ReadExtendedAttributesBlock(Stream archiveStream) { - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadExtendedAttributesFromBuffer(buffer, _name); + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Span span = buffer.AsSpan(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadExtendedAttributesFromBuffer(span, _name); + + ArrayPool.Shared.Return(buffer); } } @@ -531,32 +536,30 @@ private void ReadExtendedAttributesBlock(Stream archiveStream) private async ValueTask ReadExtendedAttributesBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadExtendedAttributesFromBuffer(buffer, _name); - } - } + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); - // Return a byte array if the size field has a valid value for extended attributes. Otherwise, return null, or throw. - private byte[]? CreateExtendedAttributesBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadExtendedAttributesFromBuffer(memory.Span, _name); - // It is not expected that the extended attributes data section will be longer than Array.MaxLength, considering - // the size field is 12 bytes long, which fits a number with a value under int.MaxValue. - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); + ArrayPool.Shared.Return(buffer); } + } - if (_size == 0) + private void ValidateSize() + { + if ((uint)_size > (uint)Array.MaxLength) { - return null; + ThrowSizeFieldTooLarge(); } - return new byte[(int)_size]; + [DoesNotReturn] + void ThrowSizeFieldTooLarge() => + throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); } // Returns a dictionary containing the extended attributes collected from the provided byte buffer. @@ -581,11 +584,16 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string // Throws if end of stream is reached. private void ReadGnuLongPathDataBlock(Stream archiveStream) { - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Span span = buffer.AsSpan(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadGnuLongPathDataFromBuffer(span); + + ArrayPool.Shared.Return(buffer); } } @@ -595,11 +603,17 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream) private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); + + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadGnuLongPathDataFromBuffer(memory.Span); + + ArrayPool.Shared.Return(buffer); } } @@ -618,24 +632,6 @@ private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) } } - // Return a byte array if the size field has a valid value for GNU long metadata entry data. Otherwise, return null, or throw. - private byte[]? CreateGnuLongDataBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.LongLink or TarEntryType.LongPath); - - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); - } - - if (_size == 0) - { - return null; - } - - return new byte[(int)_size]; - } - // Tries to collect the next extended attribute from the string wrapped by the specified reader. // Extended attributes are saved in the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index ad5cdae22d031..825c320446868 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -524,8 +525,18 @@ private int WriteGnuFields(Span buffer) private static void WriteData(Stream archiveStream, Stream dataStream, long actualLength) { dataStream.CopyTo(archiveStream); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - archiveStream.Write(new byte[paddingAfterData]); + if (paddingAfterData != 0) + { + Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); + + Span padding = stackalloc byte[TarHelpers.RecordSize]; + padding = padding.Slice(0, paddingAfterData); + padding.Clear(); + + archiveStream.Write(padding); + } } // Asynchronously writes the current header's data stream into the archive stream. @@ -534,8 +545,17 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream cancellationToken.ThrowIfCancellationRequested(); await dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - await archiveStream.WriteAsync(new byte[paddingAfterData], cancellationToken).ConfigureAwait(false); + if (paddingAfterData != 0) + { + byte[] buffer = ArrayPool.Shared.Rent(paddingAfterData); + + Array.Clear(buffer, 0, paddingAfterData); + await archiveStream.WriteAsync(buffer.AsMemory(0, paddingAfterData), cancellationToken).ConfigureAwait(false); + + ArrayPool.Shared.Return(buffer); + } } // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 9469231684023..52624f5df84e9 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -364,7 +364,9 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can // by two records consisting entirely of zero bytes. private void WriteFinalRecords() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; + Span emptyRecord = stackalloc byte[TarHelpers.RecordSize]; + emptyRecord.Clear(); + _archiveStream.Write(emptyRecord); _archiveStream.Write(emptyRecord); } @@ -374,9 +376,14 @@ private void WriteFinalRecords() // This method is called from DisposeAsync, so we don't want to propagate a cancelled CancellationToken. private async ValueTask WriteFinalRecordsAsync() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); + const int TwoRecordSize = TarHelpers.RecordSize * 2; + + byte[] twoEmptyRecords = ArrayPool.Shared.Rent(TwoRecordSize); + Array.Clear(twoEmptyRecords, 0, TwoRecordSize); + + await _archiveStream.WriteAsync(twoEmptyRecords.AsMemory(0, TwoRecordSize), cancellationToken: default).ConfigureAwait(false); + + ArrayPool.Shared.Return(twoEmptyRecords); } private (string, string) ValidateWriteEntryArguments(string fileName, string? entryName) From 1e5020e42de6cc9bbab582486ce40c139e02dc11 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 11:25:48 -0400 Subject: [PATCH 02/21] Remove unnecessary use of FileStreamOptions --- .../src/System/Formats/Tar/TarWriter.Unix.cs | 11 +---------- .../src/System/Formats/Tar/TarWriter.Windows.cs | 10 +--------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs index 7e678bcd91871..28eaf1e332323 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs @@ -94,16 +94,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { Debug.Assert(entry._header._dataStream == null); - - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs index bfb6cf17f1107..6b3b2d2cfc307 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs @@ -63,16 +63,8 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - Debug.Assert(entry._header._dataStream == null); - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; From 6938c772e5cf28c500ae2809e312b1ce01bd9e64 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 11:33:04 -0400 Subject: [PATCH 03/21] Clean up Dispose{Async} implementations --- .../src/System/Formats/Tar/SubReadStream.cs | 5 +- .../src/System/Formats/Tar/TarReader.cs | 74 ++++++---------- .../src/System/Formats/Tar/TarWriter.cs | 84 +++++++------------ 3 files changed, 53 insertions(+), 110 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index c6b9b6afc8d6d..014165939ac1c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) => // the substream is just 'a chunk' of the super-stream protected override void Dispose(bool disposing) { - if (disposing && !_isDisposed) - { - _isDisposed = true; - } + _isDisposed = true; base.Dispose(disposing); } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 7e951c5243e35..137acbcfbd2d0 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -54,8 +54,18 @@ public TarReader(Stream archiveStream, bool leaveOpen = false) /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + s.Dispose(); + } + } + } } /// @@ -64,8 +74,18 @@ public void Dispose() /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + await s.DisposeAsync().ConfigureAwait(false); + } + } + } } /// @@ -249,52 +269,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel } } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - s.Dispose(); - } - } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - await s.DisposeAsync().ConfigureAwait(false); - } - } - } - finally - { - _isDisposed = true; - } - } - } - // Asynchronously retrieves the next entry if one is found. private async ValueTask GetNextEntryInternalAsync(bool copyData, CancellationToken cancellationToken) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 52624f5df84e9..cba1a63943ba5 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -84,8 +84,20 @@ public TarWriter(Stream archiveStream, TarEntryFormat format = TarEntryFormat.Pa /// public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + WriteFinalRecords(); + } + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } } /// @@ -93,8 +105,20 @@ public void Dispose() /// public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + await WriteFinalRecordsAsync().ConfigureAwait(false); + } + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } } /// @@ -241,58 +265,6 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken return WriteEntryAsyncInternal(entry, cancellationToken); } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (_wroteEntries) - { - WriteFinalRecords(); - } - - - if (!_leaveOpen) - { - _archiveStream.Dispose(); - } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (_wroteEntries) - { - await WriteFinalRecordsAsync().ConfigureAwait(false); - } - - - if (!_leaveOpen) - { - await _archiveStream.DisposeAsync().ConfigureAwait(false); - } - } - finally - { - _isDisposed = true; - } - } - } - // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. private void WriteEntryInternal(TarEntry entry) { From 5fa03e9e178ef3aa3eb7606239053670f0586456 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 11:40:59 -0400 Subject: [PATCH 04/21] Clean up unnecessary consts Not a perf thing, just readability. --- .../src/System/Formats/Tar/TarHeader.Write.cs | 16 ++++++++-------- .../src/System/Formats/Tar/TarHelpers.cs | 5 ----- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 825c320446868..f21cea5c3ea3b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -663,11 +663,11 @@ private static byte[] GenerateExtendedAttributeKeyValuePairAsByteArray(byte[] ke List bytesList = new(); bytesList.AddRange(finalTotalCharCountBytes); - bytesList.Add(TarHelpers.SpaceChar); + bytesList.Add((byte)' '); bytesList.AddRange(keyBytes); - bytesList.Add(TarHelpers.EqualsChar); + bytesList.Add((byte)'='); bytesList.AddRange(valueBytes); - bytesList.Add(TarHelpers.NewLineChar); + bytesList.Add((byte)'\n'); Debug.Assert(bytesList.Count == (realTotalCharCount + suffixByteCount)); @@ -681,7 +681,7 @@ internal int WriteChecksum(int checksum, Span buffer) { // The checksum field is also counted towards the total sum // but as an array filled with spaces - checksum += TarHelpers.SpaceChar * 8; + checksum += (byte)' ' * 8; Span converted = stackalloc byte[FieldLengths.Checksum]; WriteAsOctal(checksum, converted, 0, converted.Length); @@ -689,8 +689,8 @@ internal int WriteChecksum(int checksum, Span buffer) Span destination = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); // Checksum field ends with a null and a space - destination[^1] = TarHelpers.SpaceChar; // ' ' - destination[^2] = 0; // '\0' + destination[^1] = (byte)' '; + destination[^2] = (byte)'\0'; int i = destination.Length - 3; int j = converted.Length - 1; @@ -704,7 +704,7 @@ internal int WriteChecksum(int checksum, Span buffer) } else { - destination[i] = TarHelpers.ZeroChar; // Leading zero chars '0' + destination[i] = (byte)'0'; // Leading zero chars } i--; } @@ -749,7 +749,7 @@ private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytes } else { - destination[i] = TarHelpers.ZeroChar; // leading zeros + destination[i] = (byte)'0'; // leading zeros } checksum += destination[i]; i--; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index d4592eb85d4b5..ec8cd04e3504c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -17,11 +17,6 @@ internal static partial class TarHelpers internal const short RecordSize = 512; internal const int MaxBufferLength = 4096; - internal const int ZeroChar = 0x30; - internal const byte SpaceChar = 0x20; - internal const byte EqualsChar = 0x3d; - internal const byte NewLineChar = 0xa; - // Default mode for TarEntry created for a file-type. private const UnixFileMode DefaultFileMode = UnixFileMode.UserRead | UnixFileMode.UserWrite | From 8dd0ac1108db7d32675c23648d5a6dae6fec3b98 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 12:24:58 -0400 Subject: [PATCH 05/21] Remove MemoryStream/Encoding.UTF8.GetBytes allocations, unnecessary async variants, and overhaul GenerateExtendedAttributesDataStream --- .../src/System/Formats/Tar/TarHeader.Write.cs | 148 +++++++----------- 1 file changed, 58 insertions(+), 90 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index f21cea5c3ea3b..f0b1a38ec1577 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -2,10 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -216,7 +218,7 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Second, we determine if we need a preceding LongPath, and write it if needed if (_name.Length > FieldLengths.Name) { - TarHeader longPathHeader = await GetGnuLongMetadataHeaderAsync(TarEntryType.LongPath, _name, cancellationToken).ConfigureAwait(false); + TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name); await longPathHeader.WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it } @@ -228,34 +230,8 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Creates and returns a GNU long metadata header, with the specified long text written into its data stream. private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText) { - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - longMetadataHeader._dataStream.Write(Encoding.UTF8.GetBytes(longText)); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Asynchronously creates and returns a GNU long metadata header, with the specified long text written into its data stream. - private static async Task GetGnuLongMetadataHeaderAsync(TarEntryType entryType, string longText, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - await longMetadataHeader._dataStream.WriteAsync(Encoding.UTF8.GetBytes(longText), cancellationToken).ConfigureAwait(false); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Constructs a GNU metadata header with default values for the specified entry type. - private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, TarEntryType entryType) - { - Debug.Assert((entryType is TarEntryType.LongPath && longTextLength > FieldLengths.Name) || - (entryType is TarEntryType.LongLink && longTextLength > FieldLengths.LinkName)); + Debug.Assert((entryType is TarEntryType.LongPath && longText.Length > FieldLengths.Name) || + (entryType is TarEntryType.LongLink && longText.Length > FieldLengths.LinkName)); TarHeader longMetadataHeader = new(TarEntryFormat.Gnu); @@ -265,7 +241,7 @@ private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, Tar longMetadataHeader._gid = 0; longMetadataHeader._mTime = DateTimeOffset.MinValue; // 0 longMetadataHeader._typeFlag = entryType; - longMetadataHeader._dataStream = new MemoryStream(); + longMetadataHeader._dataStream = new MemoryStream(Encoding.UTF8.GetBytes(longText)); return longMetadataHeader; } @@ -321,13 +297,13 @@ private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffe } // Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum. - private async Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) + private Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber); - _dataStream = await GenerateExtendedAttributesDataStreamAsync(extendedAttributes, cancellationToken).ConfigureAwait(false); - await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + _dataStream = GenerateExtendedAttributesDataStream(extendedAttributes); + return WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken); } // Initializes the name, mode and type flag of a PAX extended attributes entry. @@ -561,37 +537,69 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. private static Stream? GenerateExtendedAttributesDataStream(Dictionary extendedAttributes) { + byte[]? buffer = null; MemoryStream? dataStream = null; + if (extendedAttributes.Count > 0) { dataStream = new MemoryStream(); + foreach ((string attribute, string value) in extendedAttributes) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - dataStream.Write(entryBytes); + // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html + + // The format is: + // "XX attribute=value\n" + // where "XX" is the number of characters in the entry, including those required for the count itself. + int length = 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value); + length += CountDigits(length); + + // Get a large enough buffer if we don't already have one. + if (buffer is null || buffer.Length < length) + { + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } + buffer = ArrayPool.Shared.Rent(length); + } + + // Format the contents. + bool formatted = Utf8Formatter.TryFormat(length, buffer, out int bytesWritten); + Debug.Assert(formatted); + buffer[bytesWritten++] = (byte)' '; + bytesWritten += Encoding.UTF8.GetBytes(attribute, buffer.AsSpan(bytesWritten)); + buffer[bytesWritten++] = (byte)'='; + bytesWritten += Encoding.UTF8.GetBytes(value, buffer.AsSpan(bytesWritten)); + buffer[bytesWritten++] = (byte)'\n'; + + // Write it to the stream. + dataStream.Write(buffer.AsSpan(0, bytesWritten)); } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + + dataStream.Position = 0; // Ensure it gets written into the archive from the beginning } - return dataStream; - } - // Asynchronously dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. - private static async Task GenerateExtendedAttributesDataStreamAsync(Dictionary extendedAttributes, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } - MemoryStream? dataStream = null; - if (extendedAttributes.Count > 0) + return dataStream; + + static int CountDigits(int value) { - dataStream = new MemoryStream(); - foreach ((string attribute, string value) in extendedAttributes) + Debug.Assert(value >= 0); + int digits = 1; + while (true) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - await dataStream.WriteAsync(entryBytes, cancellationToken).ConfigureAwait(false); + value /= 10; + if (value == 0) break; + digits++; } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + return digits; } - return dataStream; } // Some fields that have a reserved spot in the header, may not fit in such field anymore, but they can fit in the @@ -634,46 +642,6 @@ static void TryAddStringField(Dictionary extendedAttributes, str } } - // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html - private static byte[] GenerateExtendedAttributeKeyValuePairAsByteArray(byte[] keyBytes, byte[] valueBytes) - { - // Assuming key="ab" and value="cdef" - - // The " ab=cdef\n" attribute string has a length of 9 chars - int suffixByteCount = 3 + // leading space, equals sign and trailing newline - keyBytes.Length + valueBytes.Length; - - // The count string "9" has a length of 1 char - string suffixByteCountString = suffixByteCount.ToString(); - int firstTotalByteCount = Encoding.ASCII.GetByteCount(suffixByteCountString); - - // If we prepend the count string length to the attribute string, - // the total length increases to 10, which has one more digit - // "9 abc=def\n" - int firstPrefixAndSuffixByteCount = firstTotalByteCount + suffixByteCount; - - // The new count string "10" has an increased length of 2 chars - string prefixAndSuffixByteCountString = firstPrefixAndSuffixByteCount.ToString(); - int realTotalCharCount = Encoding.ASCII.GetByteCount(prefixAndSuffixByteCountString); - - byte[] finalTotalCharCountBytes = Encoding.ASCII.GetBytes(prefixAndSuffixByteCountString); - - // The final string should contain the correct total length now - List bytesList = new(); - - bytesList.AddRange(finalTotalCharCountBytes); - bytesList.Add((byte)' '); - bytesList.AddRange(keyBytes); - bytesList.Add((byte)'='); - bytesList.AddRange(valueBytes); - bytesList.Add((byte)'\n'); - - Debug.Assert(bytesList.Count == (realTotalCharCount + suffixByteCount)); - - return bytesList.ToArray(); - } - // The checksum accumulator first adds up the byte values of eight space chars, then the final number // is written on top of those spaces on the specified span as ascii. // At the end, it's saved in the header field and the final value returned. From 5be57ad948a6a99d635c4d5d458d2d4606de59b8 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 12:33:18 -0400 Subject: [PATCH 06/21] Avoid string allocations in ReadMagicAttribute --- .../src/System/Formats/Tar/TarHeader.Read.cs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 01bbfbe0b376d..763af89d4b598 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -425,17 +425,21 @@ private void ReadMagicAttribute(Span buffer) } // When the magic field is set, the archive is newer than v7. - _magic = Encoding.ASCII.GetString(magic); - - if (_magic == GnuMagic) + if (magic.SequenceEqual(GnuMagicBytes)) { + _magic = GnuMagic; _format = TarEntryFormat.Gnu; } - else if (_format == TarEntryFormat.V7 && _magic == UstarMagic) + else if (_format == TarEntryFormat.V7 && magic.SequenceEqual(PaxMagicBytes)) { // Important: Only change to ustar if we had not changed the format to pax already + _magic = UstarMagic; _format = TarEntryFormat.Ustar; } + else + { + _magic = Encoding.ASCII.GetString(magic); + } } // Reads the version string and determines the format depending on its value. From ab71e6cf63a209a7bab677c3b5f4b2fbd06e8020 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 13:32:06 -0400 Subject: [PATCH 07/21] Avoid allocation in WriteAsOctal --- .../src/System/Formats/Tar/TarHeader.Write.cs | 53 +++++++++++-------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index f0b1a38ec1577..f1557e3696d7c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -390,27 +390,27 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (_mode > 0) { - checksum += WriteAsOctal(_mode, buffer, FieldLocations.Mode, FieldLengths.Mode); + checksum += WriteAsOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); } if (_uid > 0) { - checksum += WriteAsOctal(_uid, buffer, FieldLocations.Uid, FieldLengths.Uid); + checksum += WriteAsOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); } if (_gid > 0) { - checksum += WriteAsOctal(_gid, buffer, FieldLocations.Gid, FieldLengths.Gid); + checksum += WriteAsOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); } _size = actualLength; if (_size > 0) { - checksum += WriteAsOctal(_size, buffer, FieldLocations.Size, FieldLengths.Size); + checksum += WriteAsOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size)); } - checksum += WriteAsTimestamp(_mTime, buffer, FieldLocations.MTime, FieldLengths.MTime); + checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)); char typeFlagChar = (char)actualEntryType; buffer[FieldLocations.TypeFlag] = (byte)typeFlagChar; @@ -418,7 +418,7 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (!string.IsNullOrEmpty(_linkName)) { - checksum += WriteAsAsciiString(_linkName, buffer, FieldLocations.LinkName, FieldLengths.LinkName); + checksum += WriteAsAsciiString(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)); } return checksum; @@ -462,22 +462,22 @@ private int WritePosixAndGnuSharedFields(Span buffer) if (!string.IsNullOrEmpty(_uName)) { - checksum += WriteAsAsciiString(_uName, buffer, FieldLocations.UName, FieldLengths.UName); + checksum += WriteAsAsciiString(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName)); } if (!string.IsNullOrEmpty(_gName)) { - checksum += WriteAsAsciiString(_gName, buffer, FieldLocations.GName, FieldLengths.GName); + checksum += WriteAsAsciiString(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName)); } if (_devMajor > 0) { - checksum += WriteAsOctal(_devMajor, buffer, FieldLocations.DevMajor, FieldLengths.DevMajor); + checksum += WriteAsOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); } if (_devMinor > 0) { - checksum += WriteAsOctal(_devMinor, buffer, FieldLocations.DevMinor, FieldLengths.DevMinor); + checksum += WriteAsOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } return checksum; @@ -486,8 +486,8 @@ private int WritePosixAndGnuSharedFields(Span buffer) // Saves the gnu-specific fields into the specified spans. private int WriteGnuFields(Span buffer) { - int checksum = WriteAsTimestamp(_aTime, buffer, FieldLocations.ATime, FieldLengths.ATime); - checksum += WriteAsTimestamp(_cTime, buffer, FieldLocations.CTime, FieldLengths.CTime); + int checksum = WriteAsTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + checksum += WriteAsTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); if (_gnuUnusedBytes != null) { @@ -652,7 +652,8 @@ internal int WriteChecksum(int checksum, Span buffer) checksum += (byte)' ' * 8; Span converted = stackalloc byte[FieldLengths.Checksum]; - WriteAsOctal(checksum, converted, 0, converted.Length); + converted.Clear(); + WriteAsOctal(checksum, converted); Span destination = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); @@ -727,25 +728,35 @@ private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytes } // Writes the specified decimal number as a right-aligned octal number and returns its checksum. - internal static int WriteAsOctal(long tenBaseNumber, Span destination, int location, int length) + internal static int WriteAsOctal(long tenBaseNumber, Span destination) { - long octal = TarHelpers.ConvertDecimalToOctal(tenBaseNumber); - byte[] bytes = Encoding.ASCII.GetBytes(octal.ToString()); - return WriteRightAlignedBytesAndGetChecksum(bytes.AsSpan(), destination.Slice(location, length)); + ulong value = (ulong)tenBaseNumber; + Span digits = stackalloc byte[32]; // longest possible octal representation of a ulong is 21 digits + + int i = digits.Length - 1; + while (true) + { + digits[i] = (byte)('0' + (value % 8)); + value /= 8; + if (value == 0) break; + i--; + } + + return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination); } // Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum. - private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination, int location, int length) + private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination) { long unixTimeSeconds = timestamp.ToUnixTimeSeconds(); - return WriteAsOctal(unixTimeSeconds, destination, location, length); + return WriteAsOctal(unixTimeSeconds, destination); } // Writes the specified text as an ASCII string aligned to the left, and returns its checksum. - private static int WriteAsAsciiString(string str, Span buffer, int location, int length) + private static int WriteAsAsciiString(string str, Span buffer) { byte[] bytes = Encoding.ASCII.GetBytes(str); - return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer.Slice(location, length)); + return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer); } // Gets the special name for the 'name' field in an extended attribute entry. From df2d74220c2f8a35df1df6db3c5675ba2e03b2f7 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 15:20:18 -0400 Subject: [PATCH 08/21] Improve handling of octal --- .../src/Resources/Strings.resx | 3 + .../src/System/Formats/Tar/TarHeader.Read.cs | 20 ++-- .../src/System/Formats/Tar/TarHeader.Write.cs | 26 +++--- .../src/System/Formats/Tar/TarHelpers.cs | 91 ++++++++++--------- 4 files changed, 75 insertions(+), 65 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 79e3188410c3c..001d85b764bca 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -255,4 +255,7 @@ An attempt was made to move the position before the beginning of the stream. + + Unable to parse number. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 763af89d4b598..d2c8d2146e320 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -357,14 +357,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { return null; } - int checksum = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(spanChecksum); + int checksum = TarHelpers.ParseOctalAsInt32(spanChecksum); // Zero checksum means the whole header is empty if (checksum == 0) { return null; } - long size = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + long size = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); if (size < 0) { throw new FormatException(string.Format(SR.TarSizeFieldNegative)); @@ -373,14 +373,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), - mode: TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), - mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), + mode: TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), + mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.ParseOctalAsInt64(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) { _checksum = checksum, _size = size, - _uid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), - _gid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), + _uid = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), + _gid = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; @@ -481,10 +481,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice) { // Major number for a character device or block device entry. - _devMajor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); + _devMajor = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); // Minor number for a character device or block device entry. - _devMinor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); + _devMinor = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } } @@ -493,10 +493,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) private void ReadGnuAttributes(Span buffer) { // Convert byte arrays - long aTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + long aTime = TarHelpers.ParseOctalAsInt64(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); _aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime); - long cTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); + long cTime = TarHelpers.ParseOctalAsInt64(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); _cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime); // TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230 diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index f1557e3696d7c..3f6c44ded16d0 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -390,24 +390,24 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (_mode > 0) { - checksum += WriteAsOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); + checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); } if (_uid > 0) { - checksum += WriteAsOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); + checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); } if (_gid > 0) { - checksum += WriteAsOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); + checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); } _size = actualLength; if (_size > 0) { - checksum += WriteAsOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size)); } checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)); @@ -472,12 +472,12 @@ private int WritePosixAndGnuSharedFields(Span buffer) if (_devMajor > 0) { - checksum += WriteAsOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); + checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); } if (_devMinor > 0) { - checksum += WriteAsOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); + checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } return checksum; @@ -653,7 +653,7 @@ internal int WriteChecksum(int checksum, Span buffer) Span converted = stackalloc byte[FieldLengths.Checksum]; converted.Clear(); - WriteAsOctal(checksum, converted); + FormatOctal(checksum, converted); Span destination = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); @@ -728,17 +728,17 @@ private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytes } // Writes the specified decimal number as a right-aligned octal number and returns its checksum. - internal static int WriteAsOctal(long tenBaseNumber, Span destination) + internal static int FormatOctal(long value, Span destination) { - ulong value = (ulong)tenBaseNumber; + ulong remaining = (ulong)value; Span digits = stackalloc byte[32]; // longest possible octal representation of a ulong is 21 digits int i = digits.Length - 1; while (true) { - digits[i] = (byte)('0' + (value % 8)); - value /= 8; - if (value == 0) break; + digits[i] = (byte)('0' + (remaining % 8)); + remaining /= 8; + if (remaining == 0) break; i--; } @@ -749,7 +749,7 @@ internal static int WriteAsOctal(long tenBaseNumber, Span destination) private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination) { long unixTimeSeconds = timestamp.ToUnixTimeSeconds(); - return WriteAsOctal(unixTimeSeconds, destination); + return FormatOctal(unixTimeSeconds, destination); } // Writes the specified text as an ASCII string aligned to the left, and returns its checksum. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index ec8cd04e3504c..6038e6427283a 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -3,6 +3,7 @@ using System.Buffers; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Text; @@ -112,36 +113,6 @@ internal static int CalculatePadding(long size) return padding; } - // Returns the specified 8-base number as a 10-base number. - internal static int ConvertDecimalToOctal(int value) - { - int multiplier = 1; - int accum = value; - int actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - - // Returns the specified 10-base number as an 8-base number. - internal static long ConvertDecimalToOctal(long value) - { - long multiplier = 1; - long accum = value; - long actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - // Returns true if all the bytes in the specified array are nulls, false otherwise. internal static bool IsAllNullBytes(Span buffer) => buffer.IndexOfAnyExcept((byte)0) < 0; @@ -186,9 +157,10 @@ internal static bool TryGetStringAsBaseTenInteger(IReadOnlyDictionary buffer) + internal static int ParseOctalAsInt32(ReadOnlySpan buffer) { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt32(str, fromBase: 8); + buffer = TrimEndingNullsAndSpaces(buffer); + + uint value = 0; + foreach (byte b in buffer) + { + uint digit = (uint)(b - '0'); + if (digit >= 8) + { + ThrowInvalidNumber(); + } + + value = checked((value * 8u) + digit); + } + + return (int)value; } // Receives a byte array that represents an ASCII string containing a number in octal base. // Converts the array to an octal base number, then transforms it to ten base and returns it. - internal static long GetTenBaseLongFromOctalAsciiChars(Span buffer) + internal static long ParseOctalAsInt64(ReadOnlySpan buffer) { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt64(str, fromBase: 8); + buffer = TrimEndingNullsAndSpaces(buffer); + + ulong value = 0; + foreach (byte b in buffer) + { + ulong digit = (ulong)(b - '0'); + if (digit >= 8) + { + ThrowInvalidNumber(); + } + + value = checked((value * 8u) + digit); + } + + return (long)value; } + [DoesNotReturn] + private static void ThrowInvalidNumber() => + throw new FormatException(SR.Format(SR.TarInvalidNumber)); + // Returns the string contained in the specified buffer of bytes, // in the specified encoding, removing the trailing null or space chars. private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding) + { + buffer = TrimEndingNullsAndSpaces(buffer); + return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); + } + + private static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) { int trimmedLength = buffer.Length; - while (trimmedLength > 0 && IsByteNullOrSpace(buffer[trimmedLength - 1])) + while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) { trimmedLength--; } - return trimmedLength == 0 ? string.Empty : encoding.GetString(buffer.Slice(0, trimmedLength)); - - static bool IsByteNullOrSpace(byte c) => c is 0 or 32; + return buffer.Slice(0, trimmedLength); } // Returns the ASCII string contained in the specified buffer of bytes, From c6058bd1d9b39a443f6f84a27d0b8d05ad7e14af Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 15:39:01 -0400 Subject: [PATCH 09/21] Avoid allocation for version string --- .../src/System/Formats/Tar/TarHeader.Read.cs | 34 ++++++++++++------- .../src/System/Formats/Tar/TarHeader.Write.cs | 8 ++--- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index d2c8d2146e320..39bb54a2eff18 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -430,7 +430,7 @@ private void ReadMagicAttribute(Span buffer) _magic = GnuMagic; _format = TarEntryFormat.Gnu; } - else if (_format == TarEntryFormat.V7 && magic.SequenceEqual(PaxMagicBytes)) + else if (_format == TarEntryFormat.V7 && magic.SequenceEqual(UstarMagicBytes)) { // Important: Only change to ustar if we had not changed the format to pax already _magic = UstarMagic; @@ -452,19 +452,29 @@ private void ReadVersionAttribute(Span buffer) } Span version = buffer.Slice(FieldLocations.Version, FieldLengths.Version); - - _version = Encoding.ASCII.GetString(version); - - // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" - if ((_format is TarEntryFormat.Ustar or TarEntryFormat.Pax) && _version != UstarVersion) + switch (_format) { - throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); - } + case TarEntryFormat.Ustar or TarEntryFormat.Pax: + // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" + if (!version.SequenceEqual(UstarVersionBytes)) + { + throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); + } + _version = UstarVersion; + break; - // The GNU format has a Magic+Version 8 byte string "ustar \0" - if (_format == TarEntryFormat.Gnu && _version != GnuVersion) - { - throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + case TarEntryFormat.Gnu: + // The GNU format has a Magic+Version 8 byte string "ustar \0" + if (!version.SequenceEqual(GnuVersionBytes)) + { + throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + } + _version = GnuVersion; + break; + + default: + _version = Encoding.ASCII.GetString(version); + break; } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 3f6c44ded16d0..7d97bc20e2d4f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -17,8 +17,8 @@ namespace System.Formats.Tar // Writes header attributes of a tar archive entry. internal sealed partial class TarHeader { - private static ReadOnlySpan PaxMagicBytes => "ustar\0"u8; - private static ReadOnlySpan PaxVersionBytes => "00"u8; + private static ReadOnlySpan UstarMagicBytes => "ustar\0"u8; + private static ReadOnlySpan UstarVersionBytes => "00"u8; private static ReadOnlySpan GnuMagicBytes => "ustar "u8; private static ReadOnlySpan GnuVersionBytes => " \0"u8; @@ -442,8 +442,8 @@ private long GetTotalDataBytesToWrite() // Writes the magic and version fields of a ustar or pax entry into the specified spans. private static int WritePosixMagicAndVersion(Span buffer) { - int checksum = WriteLeftAlignedBytesAndGetChecksum(PaxMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); - checksum += WriteLeftAlignedBytesAndGetChecksum(PaxVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); + int checksum = WriteLeftAlignedBytesAndGetChecksum(UstarMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); + checksum += WriteLeftAlignedBytesAndGetChecksum(UstarVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); return checksum; } From 5756a8c449d2bd76246e17a43e3a87d7c83a2d93 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 15:45:18 -0400 Subject: [PATCH 10/21] Removing boxing and char string allocation in GenerateExtendedAttributeName --- .../src/System/Formats/Tar/TarHeader.Write.cs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 7d97bc20e2d4f..1a6ce11de6e2c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -23,10 +23,6 @@ internal sealed partial class TarHeader private static ReadOnlySpan GnuMagicBytes => "ustar "u8; private static ReadOnlySpan GnuVersionBytes => " \0"u8; - // Extended Attribute entries have a special format in the Name field: - // "{dirName}/PaxHeaders.{processId}/{fileName}{trailingSeparator}" - private const string PaxHeadersFormat = "{0}/PaxHeaders.{1}/{2}{3}"; - // Predefined text for the Name field of a GNU long metadata entry. Applies for both LongPath ('L') and LongLink ('K'). private const string GnuLongMetadataName = "././@LongLink"; @@ -769,15 +765,12 @@ private string GenerateExtendedAttributeName() string? dirName = Path.GetDirectoryName(_name); dirName = string.IsNullOrEmpty(dirName) ? "." : dirName; - int processId = Environment.ProcessId; - string? fileName = Path.GetFileName(_name); fileName = string.IsNullOrEmpty(fileName) ? "." : fileName; - string trailingSeparator = (_typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList) ? - $"{Path.DirectorySeparatorChar}" : string.Empty; - - return string.Format(PaxHeadersFormat, dirName, processId, fileName, trailingSeparator); + return _typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList ? + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}" : + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}"; } // Gets the special name for the 'name' field in a global extended attribute entry. From 9539a4a42fd7c06003e79b8a8af8e6bafd372e88 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 15:55:02 -0400 Subject: [PATCH 11/21] Fix a couple unnecessary dictionary lookups --- .../src/System/Formats/Tar/TarHeader.Read.cs | 3 +-- .../src/System/Formats/Tar/TarHeader.Write.cs | 3 ++- .../src/System/Formats/Tar/TarHelpers.Unix.cs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 39bb54a2eff18..682b75743ba6b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -585,11 +585,10 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string while (TryGetNextExtendedAttribute(reader, out string? key, out string? value)) { - if (ExtendedAttributes.ContainsKey(key)) + if (!ExtendedAttributes.TryAdd(key, value)) { throw new FormatException(string.Format(SR.TarDuplicateExtendedAttribute, name)); } - ExtendedAttributes.Add(key, value); } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 1a6ce11de6e2c..9df1a5d1bde97 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -608,10 +608,12 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() { ExtendedAttributes.Add(PaxEaMTime, TarHelpers.GetTimestampStringFromDateTimeOffset(_mTime)); } + if (!string.IsNullOrEmpty(_gName)) { TryAddStringField(ExtendedAttributes, PaxEaGName, _gName, FieldLengths.GName); } + if (!string.IsNullOrEmpty(_uName)) { TryAddStringField(ExtendedAttributes, PaxEaUName, _uName, FieldLengths.UName); @@ -627,7 +629,6 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() ExtendedAttributes.Add(PaxEaSize, _size.ToString()); } - // Adds the specified string to the dictionary if it's longer than the specified max byte length. static void TryAddStringField(Dictionary extendedAttributes, string key, string value, int maxLength) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs index f684dd78fde1c..525720d57a008 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.Unix.cs @@ -92,7 +92,7 @@ internal static void CreateDirectory(string fullPath, UnixFileMode? mode, bool o } else if (overwriteMetadata || pendingModes.ContainsKey(fullPath)) { - pendingModes[fullPath] = mode.Value; + pendingModes[fullPath] = mode.Value; } } return; From 74bbc9ce4d35da27ce4cdae72d6c24effc3610f5 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 15:59:38 -0400 Subject: [PATCH 12/21] Replace Enum.HasFlag usage --- .../System.Formats.Tar/src/System/Formats/Tar/TarFile.cs | 2 +- .../src/System/Formats/Tar/TarWriter.Windows.cs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index a77cb2c36a65e..72e27b6030852 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -370,7 +370,7 @@ private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int bas int entryNameLength = file.FullName.Length - basePathLength; Debug.Assert(entryNameLength > 0); - bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory); + bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory); } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs index 6b3b2d2cfc307..d3fa19a5b12ea 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs @@ -22,15 +22,15 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil FileAttributes attributes = File.GetAttributes(fullPath); TarEntryType entryType; - if (attributes.HasFlag(FileAttributes.ReparsePoint)) + if ((attributes & FileAttributes.ReparsePoint) != 0) { entryType = TarEntryType.SymbolicLink; } - else if (attributes.HasFlag(FileAttributes.Directory)) + else if ((attributes & FileAttributes.Directory) != 0) { entryType = TarEntryType.Directory; } - else if (attributes.HasFlag(FileAttributes.Normal) || attributes.HasFlag(FileAttributes.Archive)) + else if ((attributes & (FileAttributes.Normal | FileAttributes.Archive)) != 0) { entryType = Format is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile; } @@ -48,7 +48,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil _ => throw new FormatException(string.Format(SR.TarInvalidFormat, Format)), }; - FileSystemInfo info = attributes.HasFlag(FileAttributes.Directory) ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); + FileSystemInfo info = (attributes & FileAttributes.Directory) != 0 ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); entry._header._mTime = info.LastWriteTimeUtc; entry._header._aTime = info.LastAccessTimeUtc; From 46e0855df266f34565ed774d8a6d5ad9c6a16f9f Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 17:49:34 -0400 Subject: [PATCH 13/21] Remove allocations from Write{Posix}Name --- .../src/System/Formats/Tar/TarHeader.Write.cs | 89 ++++++++++--------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 9df1a5d1bde97..57d5ced6eb0f4 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -60,7 +60,7 @@ private long WriteV7FieldsToBuffer(Span buffer) long actualLength = GetTotalDataBytesToWrite(); TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType); _checksum = WriteChecksum(tmpChecksum, buffer); @@ -275,7 +275,7 @@ private void WriteAsGnuSharedInternal(Span buffer, out long actualLength) { actualLength = GetTotalDataBytesToWrite(); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag)); tmpChecksum += WriteGnuMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); @@ -358,24 +358,33 @@ private void WriteAsPaxSharedInternal(Span buffer, out long actualLength) _checksum = WriteChecksum(tmpChecksum, buffer); } - // All formats save in the name byte array only the ASCII bytes that fit. The full string is returned in the out byte array. - private int WriteName(Span buffer, out byte[] fullNameBytes) + // All formats save in the name byte array only the ASCII bytes that fit. + private int WriteName(Span buffer) { - fullNameBytes = Encoding.ASCII.GetBytes(_name); - int nameBytesLength = Math.Min(fullNameBytes.Length, FieldLengths.Name); - int checksum = WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(0, nameBytesLength), buffer.Slice(FieldLocations.Name, FieldLengths.Name)); - return checksum; + ReadOnlySpan src = _name.AsSpan(0, Math.Min(_name.Length, FieldLengths.Name)); + Span dest = buffer.Slice(FieldLocations.Name, FieldLengths.Name); + int encoded = Encoding.ASCII.GetBytes(src, dest); + return Checksum(dest.Slice(0, encoded)); } // Ustar and PAX save in the name byte array only the ASCII bytes that fit, and the rest of that string is saved in the prefix field. private int WritePosixName(Span buffer) { - int checksum = WriteName(buffer, out byte[] fullNameBytes); - if (fullNameBytes.Length > FieldLengths.Name) + int checksum = WriteName(buffer); + + if (_name.Length > FieldLengths.Name) { - int prefixBytesLength = Math.Min(fullNameBytes.Length - FieldLengths.Name, FieldLengths.Name); - checksum += WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(FieldLengths.Name, prefixBytesLength), buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); + int prefixBytesLength = Math.Min(_name.Length - FieldLengths.Name, FieldLengths.Name); + Span remaining = prefixBytesLength <= 256 ? + stackalloc byte[prefixBytesLength] : + new byte[prefixBytesLength]; + + int encoded = Encoding.ASCII.GetBytes(_name.AsSpan(FieldLengths.Name), remaining); + Debug.Assert(encoded == remaining.Length); + + checksum += WriteLeftAlignedBytesAndGetChecksum(remaining, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); } + return checksum; } @@ -642,7 +651,7 @@ static void TryAddStringField(Dictionary extendedAttributes, str // The checksum accumulator first adds up the byte values of eight space chars, then the final number // is written on top of those spaces on the specified span as ascii. // At the end, it's saved in the header field and the final value returned. - internal int WriteChecksum(int checksum, Span buffer) + internal static int WriteChecksum(int checksum, Span buffer) { // The checksum field is also counted towards the total sum // but as an array filled with spaces @@ -683,44 +692,42 @@ private static int WriteLeftAlignedBytesAndGetChecksum(ReadOnlySpan bytesT { Debug.Assert(destination.Length > 1); - int checksum = 0; - - for (int i = 0, j = 0; i < destination.Length && j < bytesToWrite.Length; i++, j++) - { - destination[i] = bytesToWrite[j]; - checksum += destination[i]; - } + // Copy as many bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + bytesToWrite.CopyTo(destination); - return checksum; + return Checksum(bytesToWrite); } // Writes the specified bytes aligned to the right, filling all the leading bytes with the zero char 0x30, // ensuring a null terminator is included at the end of the specified span. private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytesToWrite, Span destination) { - int checksum = 0; - int i = destination.Length - 1; - int j = bytesToWrite.Length - 1; + Debug.Assert(destination.Length > 1); - while (i >= 0) + // Null terminated + destination[^1] = (byte)'\0'; + + // Copy as many input bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length - 1); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + int copyPos = destination.Length - 1 - bytesToWrite.Length; + bytesToWrite.CopyTo(destination.Slice(copyPos)); + + // Fill all leading bytes with zeros + destination.Slice(0, copyPos).Fill((byte)'0'); + + return Checksum(destination); + } + + private static int Checksum(ReadOnlySpan bytes) + { + int checksum = 0; + foreach (byte b in bytes) { - if (i == destination.Length - 1) - { - destination[i] = 0; // null terminated - } - else if (j >= 0) - { - destination[i] = bytesToWrite[j]; - j--; - } - else - { - destination[i] = (byte)'0'; // leading zeros - } - checksum += destination[i]; - i--; + checksum += b; } - return checksum; } From 02ca7da9dc4f2b2c74ef3687559ca6c359e4310d Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 17:56:54 -0400 Subject: [PATCH 14/21] Replace ArrayPool use with string.Create --- .../Common/src/System/IO/Archiving.Utils.cs | 41 +++++++++------ .../src/System/Formats/Tar/TarFile.cs | 51 +++++-------------- 2 files changed, 39 insertions(+), 53 deletions(-) diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.cs index 8335dbd26edbc..80d633228cc0c 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.cs @@ -15,10 +15,9 @@ internal static partial class ArchivingUtils private const char PathSeparatorChar = '/'; private const string PathSeparatorString = "/"; - public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false) + public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false) { Debug.Assert(length <= entry.Length - offset); - Debug.Assert(buffer != null); // Remove any leading slashes from the entry name: while (length > 0) @@ -32,26 +31,36 @@ public static string EntryFromPath(string entry, int offset, int length, ref cha } if (length == 0) + { return appendPathSeparator ? PathSeparatorString : string.Empty; + } - int resultLength = appendPathSeparator ? length + 1 : length; - EnsureCapacity(ref buffer, resultLength); - entry.CopyTo(offset, buffer, 0, length); - - // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) - // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is - // explicitly trying to standardize to '/' - for (int i = 0; i < length; i++) + if (appendPathSeparator) { - char ch = buffer[i]; - if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) - buffer[i] = PathSeparatorChar; + length++; } - if (appendPathSeparator) - buffer[length] = PathSeparatorChar; + return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) => + { + state.entry.AsSpan(state.offset).CopyTo(dest); + + // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) + // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is + // explicitly trying to standardize to '/' + for (int i = 0; i < dest.Length; i++) + { + char ch = dest[i]; + if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) + { + dest[i] = PathSeparatorChar; + } + } - return new string(buffer, 0, resultLength); + if (state.appendPathSeparator) + { + dest[^1] = PathSeparatorChar; + } + }); } public static void EnsureCapacity(ref char[] buffer, int min) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index 72e27b6030852..9953045a5840d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -15,11 +15,6 @@ namespace System.Formats.Tar /// public static class TarFile { - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - private const int DefaultCapacity = 260; - /// /// Creates a tar stream that contains all the filesystem entries from the specified directory. /// @@ -283,23 +278,14 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer)); - } - - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer)); - } + writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name)); } - finally + + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - ArrayPool.Shared.Return(entryNameBuffer); + writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length)); } } } @@ -339,23 +325,14 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } - - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } + await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false); } - finally + + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - ArrayPool.Shared.Return(entryNameBuffer); + await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false); } } } @@ -365,18 +342,18 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName; // Constructs the entry name used for a filesystem entry when creating an archive. - private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer) + private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength) { int entryNameLength = file.FullName.Length - basePathLength; Debug.Assert(entryNameLength > 0); bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; - return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory); + return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory); } - private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer) + private static string GetEntryNameForBaseDirectory(string name) { - return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true); + return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true); } // Extracts an archive into the specified directory. From f9eb99f6d10079b4b0af9464d7c1d0fea5582f21 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 18:09:36 -0400 Subject: [PATCH 15/21] Replace more superfluous ArrayPool usage --- .../src/System/Formats/Tar/TarHeader.Read.cs | 26 ++++++-- .../src/System/Formats/Tar/TarHeader.Write.cs | 23 ++++--- .../src/System/Formats/Tar/TarWriter.cs | 63 +++++++++---------- 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 682b75743ba6b..a02d60bc89d3c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -535,13 +535,20 @@ private void ReadExtendedAttributesBlock(Stream archiveStream) if (_size != 0) { ValidateSize(); - byte[] buffer = ArrayPool.Shared.Rent((int)_size); - Span span = buffer.AsSpan(0, (int)_size); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); archiveStream.ReadExactly(span); ReadExtendedAttributesFromBuffer(span, _name); - ArrayPool.Shared.Return(buffer); + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -600,13 +607,20 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream) if (_size != 0) { ValidateSize(); - byte[] buffer = ArrayPool.Shared.Rent((int)_size); - Span span = buffer.AsSpan(0, (int)_size); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); archiveStream.ReadExactly(span); ReadGnuLongPathDataFromBuffer(span); - ArrayPool.Shared.Return(buffer); + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 57d5ced6eb0f4..08df7eeaa965f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -531,8 +531,8 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream if (paddingAfterData != 0) { byte[] buffer = ArrayPool.Shared.Rent(paddingAfterData); - Array.Clear(buffer, 0, paddingAfterData); + await archiveStream.WriteAsync(buffer.AsMemory(0, paddingAfterData), cancellationToken).ConfigureAwait(false); ArrayPool.Shared.Return(buffer); @@ -542,9 +542,11 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. private static Stream? GenerateExtendedAttributesDataStream(Dictionary extendedAttributes) { - byte[]? buffer = null; MemoryStream? dataStream = null; + byte[]? buffer = null; + Span span = stackalloc byte[512]; + if (extendedAttributes.Count > 0) { dataStream = new MemoryStream(); @@ -561,26 +563,27 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream length += CountDigits(length); // Get a large enough buffer if we don't already have one. - if (buffer is null || buffer.Length < length) + if (span.Length < length) { if (buffer is not null) { ArrayPool.Shared.Return(buffer); } buffer = ArrayPool.Shared.Rent(length); + span = buffer; } // Format the contents. - bool formatted = Utf8Formatter.TryFormat(length, buffer, out int bytesWritten); + bool formatted = Utf8Formatter.TryFormat(length, span, out int bytesWritten); Debug.Assert(formatted); - buffer[bytesWritten++] = (byte)' '; - bytesWritten += Encoding.UTF8.GetBytes(attribute, buffer.AsSpan(bytesWritten)); - buffer[bytesWritten++] = (byte)'='; - bytesWritten += Encoding.UTF8.GetBytes(value, buffer.AsSpan(bytesWritten)); - buffer[bytesWritten++] = (byte)'\n'; + span[bytesWritten++] = (byte)' '; + bytesWritten += Encoding.UTF8.GetBytes(attribute, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'='; + bytesWritten += Encoding.UTF8.GetBytes(value, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'\n'; // Write it to the stream. - dataStream.Write(buffer.AsSpan(0, bytesWritten)); + dataStream.Write(span.Slice(0, bytesWritten)); } dataStream.Position = 0; // Ensure it gets written into the archive from the beginning diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index cba1a63943ba5..7d3977eda68df 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -268,40 +268,37 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. private void WriteEntryInternal(TarEntry entry) { - byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); - Span buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger - buffer.Clear(); // Rented arrays aren't clean - try - { - switch (entry.Format) - { - case TarEntryFormat.V7: - entry._header.WriteAsV7(_archiveStream, buffer); - break; - case TarEntryFormat.Ustar: - entry._header.WriteAsUstar(_archiveStream, buffer); - break; - case TarEntryFormat.Pax: - if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) - { - entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); - } - else - { - entry._header.WriteAsPax(_archiveStream, buffer); - } - break; - case TarEntryFormat.Gnu: - entry._header.WriteAsGnu(_archiveStream, buffer); - break; - default: - Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); - throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); - } - } - finally + Span buffer = stackalloc byte[TarHelpers.RecordSize]; + buffer.Clear(); + + switch (entry.Format) { - ArrayPool.Shared.Return(rented); + case TarEntryFormat.V7: + entry._header.WriteAsV7(_archiveStream, buffer); + break; + + case TarEntryFormat.Ustar: + entry._header.WriteAsUstar(_archiveStream, buffer); + break; + + case TarEntryFormat.Pax: + if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) + { + entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); + } + else + { + entry._header.WriteAsPax(_archiveStream, buffer); + } + break; + + case TarEntryFormat.Gnu: + entry._header.WriteAsGnu(_archiveStream, buffer); + break; + + default: + Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); + throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); } _wroteEntries = true; From add6179cba50414bf524356379b56bd5df223446 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 19 Aug 2022 20:59:16 -0400 Subject: [PATCH 16/21] Remove ArrayPool use from System.IO.Compression.ZipFile --- .../System/IO/Compression/ZipFile.Create.cs | 58 +++++++------------ 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs index 4370cd16878b1..33923d91b9b8f 100644 --- a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs +++ b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs @@ -375,49 +375,35 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des if (includeBaseDirectory && di.Parent != null) basePath = di.Parent.FullName; - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - const int DefaultCapacity = 260; - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - directoryIsEmpty = false; + directoryIsEmpty = false; - int entryNameLength = file.FullName.Length - basePath.Length; - Debug.Assert(entryNameLength > 0); + int entryNameLength = file.FullName.Length - basePath.Length; + Debug.Assert(entryNameLength > 0); - if (file is FileInfo) - { - // Create entry for file: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer); - ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); - } - else + if (file is FileInfo) + { + // Create entry for file: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength); + ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); + } + else + { + // Entry marking an empty dir: + if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) { - // Entry marking an empty dir: - if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) - { - // FullName never returns a directory separator character on the end, - // but Zip archives require it to specify an explicit directory: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer, appendPathSeparator: true); - archive.CreateEntry(entryName); - } + // FullName never returns a directory separator character on the end, + // but Zip archives require it to specify an explicit directory: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, appendPathSeparator: true); + archive.CreateEntry(entryName); } - } // foreach - - // If no entries create an empty root directory entry: - if (includeBaseDirectory && directoryIsEmpty) - archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, ref entryNameBuffer, appendPathSeparator: true)); - } - finally - { - ArrayPool.Shared.Return(entryNameBuffer); + } } + // If no entries create an empty root directory entry: + if (includeBaseDirectory && directoryIsEmpty) + archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, appendPathSeparator: true)); } } } From 6f8cb75d054d702722eac63de2ad2920d2228c26 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 20 Aug 2022 07:58:49 -0400 Subject: [PATCH 17/21] Fix inverted condition --- .../src/System/Formats/Tar/TarHeader.Write.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 08df7eeaa965f..b769429fdba35 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -780,8 +780,8 @@ private string GenerateExtendedAttributeName() fileName = string.IsNullOrEmpty(fileName) ? "." : fileName; return _typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList ? - $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}" : - $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}"; + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}" : + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}"; } // Gets the special name for the 'name' field in a global extended attribute entry. From 827a5882f2bc36b54f7177eaee445b5bbf998ae2 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 20 Aug 2022 08:09:40 -0400 Subject: [PATCH 18/21] Use generic math to parse octal --- .../src/System/Formats/Tar/TarHeader.Read.cs | 20 +++++------ .../src/System/Formats/Tar/TarHelpers.cs | 34 ++++--------------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index a02d60bc89d3c..aa3eecf5f9baa 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -357,14 +357,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { return null; } - int checksum = TarHelpers.ParseOctalAsInt32(spanChecksum); + int checksum = (int)TarHelpers.ParseOctal(spanChecksum); // Zero checksum means the whole header is empty if (checksum == 0) { return null; } - long size = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + long size = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); if (size < 0) { throw new FormatException(string.Format(SR.TarSizeFieldNegative)); @@ -373,14 +373,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), - mode: TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), - mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.ParseOctalAsInt64(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), + mode: (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), + mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) { _checksum = checksum, _size = size, - _uid = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), - _gid = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), + _uid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), + _gid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; @@ -491,10 +491,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice) { // Major number for a character device or block device entry. - _devMajor = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); + _devMajor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); // Minor number for a character device or block device entry. - _devMinor = TarHelpers.ParseOctalAsInt32(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); + _devMinor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } } @@ -503,10 +503,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) private void ReadGnuAttributes(Span buffer) { // Convert byte arrays - long aTime = TarHelpers.ParseOctalAsInt64(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + long aTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); _aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime); - long cTime = TarHelpers.ParseOctalAsInt64(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); + long cTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); _cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime); // TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230 diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index 6038e6427283a..6aac3ca771e1d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -6,6 +6,7 @@ using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -198,13 +199,13 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format, return entryType; } - // Receives a byte array that represents an ASCII string containing a number in octal base. - // Converts the array to an octal base number, then transforms it to ten base and returns it. - internal static int ParseOctalAsInt32(ReadOnlySpan buffer) + /// Parses a byte span that represents an ASCII string containing a number in octal base. + internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { buffer = TrimEndingNullsAndSpaces(buffer); - uint value = 0; + T octalFactor = T.CreateTruncating(8u); + T value = T.Zero; foreach (byte b in buffer) { uint digit = (uint)(b - '0'); @@ -213,31 +214,10 @@ internal static int ParseOctalAsInt32(ReadOnlySpan buffer) ThrowInvalidNumber(); } - value = checked((value * 8u) + digit); + value = checked((value * octalFactor) + T.CreateTruncating(digit)); } - return (int)value; - } - - // Receives a byte array that represents an ASCII string containing a number in octal base. - // Converts the array to an octal base number, then transforms it to ten base and returns it. - internal static long ParseOctalAsInt64(ReadOnlySpan buffer) - { - buffer = TrimEndingNullsAndSpaces(buffer); - - ulong value = 0; - foreach (byte b in buffer) - { - ulong digit = (ulong)(b - '0'); - if (digit >= 8) - { - ThrowInvalidNumber(); - } - - value = checked((value * 8u) + digit); - } - - return (long)value; + return value; } [DoesNotReturn] From ae2147895fb0c868cf814793bb004d5522e8edfb Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 20 Aug 2022 12:49:39 -0400 Subject: [PATCH 19/21] Remove allocations from StringReader and string.Split --- .../src/System/Formats/Tar/TarHeader.Read.cs | 49 +++++++++++++------ .../src/System/Formats/Tar/TarHeader.Write.cs | 2 +- .../src/System/Formats/Tar/TarHelpers.cs | 2 +- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index aa3eecf5f9baa..fca364f1c754d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -586,11 +586,9 @@ void ThrowSizeFieldTooLarge() => // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - string dataAsString = TarHelpers.GetTrimmedUtf8String(buffer); + buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer); - using StringReader reader = new(dataAsString); - - while (TryGetNextExtendedAttribute(reader, out string? key, out string? value)) + while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { if (!ExtendedAttributes.TryAdd(key, value)) { @@ -659,43 +657,64 @@ private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) } } - // Tries to collect the next extended attribute from the string wrapped by the specified reader. + // Tries to collect the next extended attribute from the string. // Extended attributes are saved in the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html // "LENGTH KEY=VALUE\n" // Where LENGTH is the total number of bytes of that line, from LENGTH itself to the endline, inclusive. // Throws if end of stream is reached or if an attribute is malformed. private static bool TryGetNextExtendedAttribute( - StringReader reader, + ref ReadOnlySpan buffer, [NotNullWhen(returnValue: true)] out string? key, [NotNullWhen(returnValue: true)] out string? value) { key = null; value = null; - string? nextLine = reader.ReadLine(); - if (string.IsNullOrWhiteSpace(nextLine)) + // Slice off the next line. + int newlinePos = buffer.IndexOf((byte)'\n'); + if (newlinePos < 0) { return false; } + ReadOnlySpan line = buffer.Slice(0, newlinePos); + + // Update buffer to point to the next line for the next call + buffer = buffer.Slice(newlinePos + 1); - StringSplitOptions splitOptions = StringSplitOptions.RemoveEmptyEntries; + // Find the end of the length and remove everything up through it. + int spacePos = line.IndexOf((byte)' '); + if (spacePos < 0) + { + return false; + } + line = line.Slice(spacePos + 1).TrimStart((byte)' '); - string[] attributeArray = nextLine.Split(' ', 2, splitOptions); - if (attributeArray.Length != 2) + // If there are any more spaces, it's malformed. + if (line.IndexOf((byte)' ') >= 0) { return false; } - string[] keyAndValueArray = attributeArray[1].Split('=', 2, splitOptions); - if (keyAndValueArray.Length != 2) + // Find the equal separator. + int equalPos = line.IndexOf((byte)'='); + if (equalPos < 0) { return false; } - key = keyAndValueArray[0]; - value = keyAndValueArray[1]; + ReadOnlySpan keySlice = line.Slice(0, equalPos); + ReadOnlySpan valueSlice = line.Slice(equalPos + 1); + + // If the value contains an =, it's malformed. + if (valueSlice.IndexOf((byte)'=') >= 0) + { + return false; + } + // Return the parsed key and value. + key = Encoding.UTF8.GetString(keySlice); + value = Encoding.UTF8.GetString(valueSlice); return true; } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index b769429fdba35..1c476d3c49c7d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -738,7 +738,7 @@ private static int Checksum(ReadOnlySpan bytes) internal static int FormatOctal(long value, Span destination) { ulong remaining = (ulong)value; - Span digits = stackalloc byte[32]; // longest possible octal representation of a ulong is 21 digits + Span digits = stackalloc byte[32]; // longer than any possible octal formatting of a ulong int i = digits.Length - 1; while (true) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index 6aac3ca771e1d..898daf1e6c3ed 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -232,7 +232,7 @@ private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encod return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); } - private static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) + internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) { int trimmedLength = buffer.Length; while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) From d6b6727e0f476aab4c9855b960506d2fe7d4e2cd Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 20 Aug 2022 12:59:56 -0400 Subject: [PATCH 20/21] Remove magic string allocation for Ustar when not V7 --- .../src/System/Formats/Tar/TarHeader.Read.cs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index fca364f1c754d..f826a2c1e1b09 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -430,11 +430,14 @@ private void ReadMagicAttribute(Span buffer) _magic = GnuMagic; _format = TarEntryFormat.Gnu; } - else if (_format == TarEntryFormat.V7 && magic.SequenceEqual(UstarMagicBytes)) + else if (magic.SequenceEqual(UstarMagicBytes)) { - // Important: Only change to ustar if we had not changed the format to pax already _magic = UstarMagic; - _format = TarEntryFormat.Ustar; + if (_format == TarEntryFormat.V7) + { + // Important: Only change to ustar if we had not changed the format to pax already + _format = TarEntryFormat.Ustar; + } } else { From 480af5cc8de861452778ab35b285412a25643a43 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 20 Aug 2022 13:01:56 -0400 Subject: [PATCH 21/21] Remove file name and directory name allocation in GenerateExtendedAttributeName --- .../src/System/Formats/Tar/TarHeader.Write.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index 1c476d3c49c7d..724274d1689d9 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -773,11 +773,11 @@ private static int WriteAsAsciiString(string str, Span buffer) // - %f: The filename of the file, equivalent to the result of the basename utility on the translated pathname. private string GenerateExtendedAttributeName() { - string? dirName = Path.GetDirectoryName(_name); - dirName = string.IsNullOrEmpty(dirName) ? "." : dirName; + ReadOnlySpan dirName = Path.GetDirectoryName(_name.AsSpan()); + dirName = dirName.IsEmpty ? "." : dirName; - string? fileName = Path.GetFileName(_name); - fileName = string.IsNullOrEmpty(fileName) ? "." : fileName; + ReadOnlySpan fileName = Path.GetFileName(_name.AsSpan()); + fileName = fileName.IsEmpty ? "." : fileName; return _typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList ? $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}" :