diff --git a/src/libraries/Common/src/System/IO/Archiving.Utils.cs b/src/libraries/Common/src/System/IO/Archiving.Utils.cs index 8335dbd26edbc..80d633228cc0c 100644 --- a/src/libraries/Common/src/System/IO/Archiving.Utils.cs +++ b/src/libraries/Common/src/System/IO/Archiving.Utils.cs @@ -15,10 +15,9 @@ internal static partial class ArchivingUtils private const char PathSeparatorChar = '/'; private const string PathSeparatorString = "/"; - public static string EntryFromPath(string entry, int offset, int length, ref char[] buffer, bool appendPathSeparator = false) + public static string EntryFromPath(string entry, int offset, int length, bool appendPathSeparator = false) { Debug.Assert(length <= entry.Length - offset); - Debug.Assert(buffer != null); // Remove any leading slashes from the entry name: while (length > 0) @@ -32,26 +31,36 @@ public static string EntryFromPath(string entry, int offset, int length, ref cha } if (length == 0) + { return appendPathSeparator ? PathSeparatorString : string.Empty; + } - int resultLength = appendPathSeparator ? length + 1 : length; - EnsureCapacity(ref buffer, resultLength); - entry.CopyTo(offset, buffer, 0, length); - - // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) - // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is - // explicitly trying to standardize to '/' - for (int i = 0; i < length; i++) + if (appendPathSeparator) { - char ch = buffer[i]; - if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) - buffer[i] = PathSeparatorChar; + length++; } - if (appendPathSeparator) - buffer[length] = PathSeparatorChar; + return string.Create(length, (appendPathSeparator, offset, entry), static (dest, state) => + { + state.entry.AsSpan(state.offset).CopyTo(dest); + + // '/' is a more broadly recognized directory separator on all platforms (eg: mac, linux) + // We don't use Path.DirectorySeparatorChar or AltDirectorySeparatorChar because this is + // explicitly trying to standardize to '/' + for (int i = 0; i < dest.Length; i++) + { + char ch = dest[i]; + if (ch == Path.DirectorySeparatorChar || ch == Path.AltDirectorySeparatorChar) + { + dest[i] = PathSeparatorChar; + } + } - return new string(buffer, 0, resultLength); + if (state.appendPathSeparator) + { + dest[^1] = PathSeparatorChar; + } + }); } public static void EnsureCapacity(ref char[] buffer, int min) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 79e3188410c3c..001d85b764bca 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -255,4 +255,7 @@ An attempt was made to move the position before the beginning of the stream. + + Unable to parse number. + diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs index c6b9b6afc8d6d..014165939ac1c 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs @@ -188,10 +188,7 @@ public override Task FlushAsync(CancellationToken cancellationToken) => // the substream is just 'a chunk' of the super-stream protected override void Dispose(bool disposing) { - if (disposing && !_isDisposed) - { - _isDisposed = true; - } + _isDisposed = true; base.Dispose(disposing); } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs index a77cb2c36a65e..9953045a5840d 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarFile.cs @@ -15,11 +15,6 @@ namespace System.Formats.Tar /// public static class TarFile { - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - private const int DefaultCapacity = 260; - /// /// Creates a tar stream that contains all the filesystem entries from the specified directory. /// @@ -283,23 +278,14 @@ private static void CreateFromDirectoryInternal(string sourceDirectoryName, Stre DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer)); - } - - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer)); - } + writer.WriteEntry(di.FullName, GetEntryNameForBaseDirectory(di.Name)); } - finally + + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - ArrayPool.Shared.Return(entryNameBuffer); + writer.WriteEntry(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length)); } } } @@ -339,23 +325,14 @@ private static async Task CreateFromDirectoryInternalAsync(string sourceDirector DirectoryInfo di = new(sourceDirectoryName); string basePath = GetBasePathForCreateFromDirectory(di, includeBaseDirectory); - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + if (includeBaseDirectory) { - if (includeBaseDirectory) - { - await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } - - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length, ref entryNameBuffer), cancellationToken).ConfigureAwait(false); - } + await writer.WriteEntryAsync(di.FullName, GetEntryNameForBaseDirectory(di.Name), cancellationToken).ConfigureAwait(false); } - finally + + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - ArrayPool.Shared.Return(entryNameBuffer); + await writer.WriteEntryAsync(file.FullName, GetEntryNameForFileSystemInfo(file, basePath.Length), cancellationToken).ConfigureAwait(false); } } } @@ -365,18 +342,18 @@ private static string GetBasePathForCreateFromDirectory(DirectoryInfo di, bool i includeBaseDirectory && di.Parent != null ? di.Parent.FullName : di.FullName; // Constructs the entry name used for a filesystem entry when creating an archive. - private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength, ref char[] entryNameBuffer) + private static string GetEntryNameForFileSystemInfo(FileSystemInfo file, int basePathLength) { int entryNameLength = file.FullName.Length - basePathLength; Debug.Assert(entryNameLength > 0); - bool isDirectory = file.Attributes.HasFlag(FileAttributes.Directory); - return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, ref entryNameBuffer, appendPathSeparator: isDirectory); + bool isDirectory = (file.Attributes & FileAttributes.Directory) != 0; + return ArchivingUtils.EntryFromPath(file.FullName, basePathLength, entryNameLength, appendPathSeparator: isDirectory); } - private static string GetEntryNameForBaseDirectory(string name, ref char[] entryNameBuffer) + private static string GetEntryNameForBaseDirectory(string name) { - return ArchivingUtils.EntryFromPath(name, 0, name.Length, ref entryNameBuffer, appendPathSeparator: true); + return ArchivingUtils.EntryFromPath(name, 0, name.Length, appendPathSeparator: true); } // Extracts an archive into the specified directory. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e89c9d4579dd3..f826a2c1e1b09 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -357,14 +357,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca { return null; } - int checksum = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(spanChecksum); + int checksum = (int)TarHelpers.ParseOctal(spanChecksum); // Zero checksum means the whole header is empty if (checksum == 0) { return null; } - long size = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + long size = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); if (size < 0) { throw new FormatException(string.Format(SR.TarSizeFieldNegative)); @@ -373,14 +373,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca // Continue with the rest of the fields that require no special checks TarHeader header = new(initialFormat, name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), - mode: TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), - mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), + mode: (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), + mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) { _checksum = checksum, _size = size, - _uid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), - _gid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), + _uid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), + _gid = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) }; @@ -425,16 +425,23 @@ private void ReadMagicAttribute(Span buffer) } // When the magic field is set, the archive is newer than v7. - _magic = Encoding.ASCII.GetString(magic); - - if (_magic == GnuMagic) + if (magic.SequenceEqual(GnuMagicBytes)) { + _magic = GnuMagic; _format = TarEntryFormat.Gnu; } - else if (_format == TarEntryFormat.V7 && _magic == UstarMagic) + else if (magic.SequenceEqual(UstarMagicBytes)) + { + _magic = UstarMagic; + if (_format == TarEntryFormat.V7) + { + // Important: Only change to ustar if we had not changed the format to pax already + _format = TarEntryFormat.Ustar; + } + } + else { - // Important: Only change to ustar if we had not changed the format to pax already - _format = TarEntryFormat.Ustar; + _magic = Encoding.ASCII.GetString(magic); } } @@ -448,19 +455,29 @@ private void ReadVersionAttribute(Span buffer) } Span version = buffer.Slice(FieldLocations.Version, FieldLengths.Version); - - _version = Encoding.ASCII.GetString(version); - - // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" - if ((_format is TarEntryFormat.Ustar or TarEntryFormat.Pax) && _version != UstarVersion) + switch (_format) { - throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); - } + case TarEntryFormat.Ustar or TarEntryFormat.Pax: + // The POSIX formats have a 6 byte Magic "ustar\0", followed by a 2 byte Version "00" + if (!version.SequenceEqual(UstarVersionBytes)) + { + throw new FormatException(string.Format(SR.TarPosixFormatExpected, _name)); + } + _version = UstarVersion; + break; - // The GNU format has a Magic+Version 8 byte string "ustar \0" - if (_format == TarEntryFormat.Gnu && _version != GnuVersion) - { - throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + case TarEntryFormat.Gnu: + // The GNU format has a Magic+Version 8 byte string "ustar \0" + if (!version.SequenceEqual(GnuVersionBytes)) + { + throw new FormatException(string.Format(SR.TarGnuFormatExpected, _name)); + } + _version = GnuVersion; + break; + + default: + _version = Encoding.ASCII.GetString(version); + break; } } @@ -477,10 +494,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice) { // Major number for a character device or block device entry. - _devMajor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); + _devMajor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); // Minor number for a character device or block device entry. - _devMinor = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); + _devMinor = (int)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } } @@ -489,10 +506,10 @@ private void ReadPosixAndGnuSharedAttributes(Span buffer) private void ReadGnuAttributes(Span buffer) { // Convert byte arrays - long aTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + long aTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); _aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime); - long cTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); + long cTime = (long)TarHelpers.ParseOctal(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); _cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime); // TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230 @@ -518,11 +535,23 @@ private void ReadUstarAttributes(Span buffer) // Throws if end of stream is reached or if an attribute is malformed. private void ReadExtendedAttributesBlock(Stream archiveStream) { - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadExtendedAttributesFromBuffer(buffer, _name); + ValidateSize(); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadExtendedAttributesFromBuffer(span, _name); + + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -531,48 +560,43 @@ private void ReadExtendedAttributesBlock(Stream archiveStream) private async ValueTask ReadExtendedAttributesBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadExtendedAttributesFromBuffer(buffer, _name); - } - } + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); - // Return a byte array if the size field has a valid value for extended attributes. Otherwise, return null, or throw. - private byte[]? CreateExtendedAttributesBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadExtendedAttributesFromBuffer(memory.Span, _name); - // It is not expected that the extended attributes data section will be longer than Array.MaxLength, considering - // the size field is 12 bytes long, which fits a number with a value under int.MaxValue. - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); + ArrayPool.Shared.Return(buffer); } + } - if (_size == 0) + private void ValidateSize() + { + if ((uint)_size > (uint)Array.MaxLength) { - return null; + ThrowSizeFieldTooLarge(); } - return new byte[(int)_size]; + [DoesNotReturn] + void ThrowSizeFieldTooLarge() => + throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); } // Returns a dictionary containing the extended attributes collected from the provided byte buffer. private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - string dataAsString = TarHelpers.GetTrimmedUtf8String(buffer); - - using StringReader reader = new(dataAsString); + buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer); - while (TryGetNextExtendedAttribute(reader, out string? key, out string? value)) + while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value)) { - if (ExtendedAttributes.ContainsKey(key)) + if (!ExtendedAttributes.TryAdd(key, value)) { throw new FormatException(string.Format(SR.TarDuplicateExtendedAttribute, name)); } - ExtendedAttributes.Add(key, value); } } @@ -581,11 +605,23 @@ private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string // Throws if end of stream is reached. private void ReadGnuLongPathDataBlock(Stream archiveStream) { - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + if (_size != 0) { - archiveStream.ReadExactly(buffer); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + + byte[]? buffer = null; + Span span = _size <= 256 ? + stackalloc byte[256] : + (buffer = ArrayPool.Shared.Rent((int)_size)); + span = span.Slice(0, (int)_size); + + archiveStream.ReadExactly(span); + ReadGnuLongPathDataFromBuffer(span); + + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } } } @@ -595,11 +631,17 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream) private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); - if (buffer != null) + + if (_size != 0) { - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - ReadGnuLongPathDataFromBuffer(buffer); + ValidateSize(); + byte[] buffer = ArrayPool.Shared.Rent((int)_size); + Memory memory = buffer.AsMemory(0, (int)_size); + + await archiveStream.ReadExactlyAsync(memory, cancellationToken).ConfigureAwait(false); + ReadGnuLongPathDataFromBuffer(memory.Span); + + ArrayPool.Shared.Return(buffer); } } @@ -618,61 +660,64 @@ private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) } } - // Return a byte array if the size field has a valid value for GNU long metadata entry data. Otherwise, return null, or throw. - private byte[]? CreateGnuLongDataBufferIfSizeIsValid() - { - Debug.Assert(_typeFlag is TarEntryType.LongLink or TarEntryType.LongPath); - - if (_size > Array.MaxLength) - { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); - } - - if (_size == 0) - { - return null; - } - - return new byte[(int)_size]; - } - - // Tries to collect the next extended attribute from the string wrapped by the specified reader. + // Tries to collect the next extended attribute from the string. // Extended attributes are saved in the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html // "LENGTH KEY=VALUE\n" // Where LENGTH is the total number of bytes of that line, from LENGTH itself to the endline, inclusive. // Throws if end of stream is reached or if an attribute is malformed. private static bool TryGetNextExtendedAttribute( - StringReader reader, + ref ReadOnlySpan buffer, [NotNullWhen(returnValue: true)] out string? key, [NotNullWhen(returnValue: true)] out string? value) { key = null; value = null; - string? nextLine = reader.ReadLine(); - if (string.IsNullOrWhiteSpace(nextLine)) + // Slice off the next line. + int newlinePos = buffer.IndexOf((byte)'\n'); + if (newlinePos < 0) { return false; } + ReadOnlySpan line = buffer.Slice(0, newlinePos); - StringSplitOptions splitOptions = StringSplitOptions.RemoveEmptyEntries; + // Update buffer to point to the next line for the next call + buffer = buffer.Slice(newlinePos + 1); - string[] attributeArray = nextLine.Split(' ', 2, splitOptions); - if (attributeArray.Length != 2) + // Find the end of the length and remove everything up through it. + int spacePos = line.IndexOf((byte)' '); + if (spacePos < 0) { return false; } + line = line.Slice(spacePos + 1).TrimStart((byte)' '); - string[] keyAndValueArray = attributeArray[1].Split('=', 2, splitOptions); - if (keyAndValueArray.Length != 2) + // If there are any more spaces, it's malformed. + if (line.IndexOf((byte)' ') >= 0) { return false; } - key = keyAndValueArray[0]; - value = keyAndValueArray[1]; + // Find the equal separator. + int equalPos = line.IndexOf((byte)'='); + if (equalPos < 0) + { + return false; + } + + ReadOnlySpan keySlice = line.Slice(0, equalPos); + ReadOnlySpan valueSlice = line.Slice(equalPos + 1); + + // If the value contains an =, it's malformed. + if (valueSlice.IndexOf((byte)'=') >= 0) + { + return false; + } + // Return the parsed key and value. + key = Encoding.UTF8.GetString(keySlice); + value = Encoding.UTF8.GetString(valueSlice); return true; } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index ad5cdae22d031..724274d1689d9 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -1,10 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -14,16 +17,12 @@ namespace System.Formats.Tar // Writes header attributes of a tar archive entry. internal sealed partial class TarHeader { - private static ReadOnlySpan PaxMagicBytes => "ustar\0"u8; - private static ReadOnlySpan PaxVersionBytes => "00"u8; + private static ReadOnlySpan UstarMagicBytes => "ustar\0"u8; + private static ReadOnlySpan UstarVersionBytes => "00"u8; private static ReadOnlySpan GnuMagicBytes => "ustar "u8; private static ReadOnlySpan GnuVersionBytes => " \0"u8; - // Extended Attribute entries have a special format in the Name field: - // "{dirName}/PaxHeaders.{processId}/{fileName}{trailingSeparator}" - private const string PaxHeadersFormat = "{0}/PaxHeaders.{1}/{2}{3}"; - // Predefined text for the Name field of a GNU long metadata entry. Applies for both LongPath ('L') and LongLink ('K'). private const string GnuLongMetadataName = "././@LongLink"; @@ -61,7 +60,7 @@ private long WriteV7FieldsToBuffer(Span buffer) long actualLength = GetTotalDataBytesToWrite(); TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType); _checksum = WriteChecksum(tmpChecksum, buffer); @@ -215,7 +214,7 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Second, we determine if we need a preceding LongPath, and write it if needed if (_name.Length > FieldLengths.Name) { - TarHeader longPathHeader = await GetGnuLongMetadataHeaderAsync(TarEntryType.LongPath, _name, cancellationToken).ConfigureAwait(false); + TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name); await longPathHeader.WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it } @@ -227,34 +226,8 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, C // Creates and returns a GNU long metadata header, with the specified long text written into its data stream. private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText) { - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - longMetadataHeader._dataStream.Write(Encoding.UTF8.GetBytes(longText)); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Asynchronously creates and returns a GNU long metadata header, with the specified long text written into its data stream. - private static async Task GetGnuLongMetadataHeaderAsync(TarEntryType entryType, string longText, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); - - TarHeader longMetadataHeader = GetDefaultGnuLongMetadataHeader(longText.Length, entryType); - Debug.Assert(longMetadataHeader._dataStream != null); - - await longMetadataHeader._dataStream.WriteAsync(Encoding.UTF8.GetBytes(longText), cancellationToken).ConfigureAwait(false); - longMetadataHeader._dataStream.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning - - return longMetadataHeader; - } - - // Constructs a GNU metadata header with default values for the specified entry type. - private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, TarEntryType entryType) - { - Debug.Assert((entryType is TarEntryType.LongPath && longTextLength > FieldLengths.Name) || - (entryType is TarEntryType.LongLink && longTextLength > FieldLengths.LinkName)); + Debug.Assert((entryType is TarEntryType.LongPath && longText.Length > FieldLengths.Name) || + (entryType is TarEntryType.LongLink && longText.Length > FieldLengths.LinkName)); TarHeader longMetadataHeader = new(TarEntryFormat.Gnu); @@ -264,7 +237,7 @@ private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, Tar longMetadataHeader._gid = 0; longMetadataHeader._mTime = DateTimeOffset.MinValue; // 0 longMetadataHeader._typeFlag = entryType; - longMetadataHeader._dataStream = new MemoryStream(); + longMetadataHeader._dataStream = new MemoryStream(Encoding.UTF8.GetBytes(longText)); return longMetadataHeader; } @@ -302,7 +275,7 @@ private void WriteAsGnuSharedInternal(Span buffer, out long actualLength) { actualLength = GetTotalDataBytesToWrite(); - int tmpChecksum = WriteName(buffer, out _); + int tmpChecksum = WriteName(buffer); tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag)); tmpChecksum += WriteGnuMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); @@ -320,13 +293,13 @@ private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffe } // Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum. - private async Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) + private Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber); - _dataStream = await GenerateExtendedAttributesDataStreamAsync(extendedAttributes, cancellationToken).ConfigureAwait(false); - await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + _dataStream = GenerateExtendedAttributesDataStream(extendedAttributes); + return WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken); } // Initializes the name, mode and type flag of a PAX extended attributes entry. @@ -385,24 +358,33 @@ private void WriteAsPaxSharedInternal(Span buffer, out long actualLength) _checksum = WriteChecksum(tmpChecksum, buffer); } - // All formats save in the name byte array only the ASCII bytes that fit. The full string is returned in the out byte array. - private int WriteName(Span buffer, out byte[] fullNameBytes) + // All formats save in the name byte array only the ASCII bytes that fit. + private int WriteName(Span buffer) { - fullNameBytes = Encoding.ASCII.GetBytes(_name); - int nameBytesLength = Math.Min(fullNameBytes.Length, FieldLengths.Name); - int checksum = WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(0, nameBytesLength), buffer.Slice(FieldLocations.Name, FieldLengths.Name)); - return checksum; + ReadOnlySpan src = _name.AsSpan(0, Math.Min(_name.Length, FieldLengths.Name)); + Span dest = buffer.Slice(FieldLocations.Name, FieldLengths.Name); + int encoded = Encoding.ASCII.GetBytes(src, dest); + return Checksum(dest.Slice(0, encoded)); } // Ustar and PAX save in the name byte array only the ASCII bytes that fit, and the rest of that string is saved in the prefix field. private int WritePosixName(Span buffer) { - int checksum = WriteName(buffer, out byte[] fullNameBytes); - if (fullNameBytes.Length > FieldLengths.Name) + int checksum = WriteName(buffer); + + if (_name.Length > FieldLengths.Name) { - int prefixBytesLength = Math.Min(fullNameBytes.Length - FieldLengths.Name, FieldLengths.Name); - checksum += WriteLeftAlignedBytesAndGetChecksum(fullNameBytes.AsSpan(FieldLengths.Name, prefixBytesLength), buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); + int prefixBytesLength = Math.Min(_name.Length - FieldLengths.Name, FieldLengths.Name); + Span remaining = prefixBytesLength <= 256 ? + stackalloc byte[prefixBytesLength] : + new byte[prefixBytesLength]; + + int encoded = Encoding.ASCII.GetBytes(_name.AsSpan(FieldLengths.Name), remaining); + Debug.Assert(encoded == remaining.Length); + + checksum += WriteLeftAlignedBytesAndGetChecksum(remaining, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix)); } + return checksum; } @@ -413,27 +395,27 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (_mode > 0) { - checksum += WriteAsOctal(_mode, buffer, FieldLocations.Mode, FieldLengths.Mode); + checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); } if (_uid > 0) { - checksum += WriteAsOctal(_uid, buffer, FieldLocations.Uid, FieldLengths.Uid); + checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); } if (_gid > 0) { - checksum += WriteAsOctal(_gid, buffer, FieldLocations.Gid, FieldLengths.Gid); + checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); } _size = actualLength; if (_size > 0) { - checksum += WriteAsOctal(_size, buffer, FieldLocations.Size, FieldLengths.Size); + checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size)); } - checksum += WriteAsTimestamp(_mTime, buffer, FieldLocations.MTime, FieldLengths.MTime); + checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)); char typeFlagChar = (char)actualEntryType; buffer[FieldLocations.TypeFlag] = (byte)typeFlagChar; @@ -441,7 +423,7 @@ private int WriteCommonFields(Span buffer, long actualLength, TarEntryType if (!string.IsNullOrEmpty(_linkName)) { - checksum += WriteAsAsciiString(_linkName, buffer, FieldLocations.LinkName, FieldLengths.LinkName); + checksum += WriteAsAsciiString(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)); } return checksum; @@ -465,8 +447,8 @@ private long GetTotalDataBytesToWrite() // Writes the magic and version fields of a ustar or pax entry into the specified spans. private static int WritePosixMagicAndVersion(Span buffer) { - int checksum = WriteLeftAlignedBytesAndGetChecksum(PaxMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); - checksum += WriteLeftAlignedBytesAndGetChecksum(PaxVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); + int checksum = WriteLeftAlignedBytesAndGetChecksum(UstarMagicBytes, buffer.Slice(FieldLocations.Magic, FieldLengths.Magic)); + checksum += WriteLeftAlignedBytesAndGetChecksum(UstarVersionBytes, buffer.Slice(FieldLocations.Version, FieldLengths.Version)); return checksum; } @@ -485,22 +467,22 @@ private int WritePosixAndGnuSharedFields(Span buffer) if (!string.IsNullOrEmpty(_uName)) { - checksum += WriteAsAsciiString(_uName, buffer, FieldLocations.UName, FieldLengths.UName); + checksum += WriteAsAsciiString(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName)); } if (!string.IsNullOrEmpty(_gName)) { - checksum += WriteAsAsciiString(_gName, buffer, FieldLocations.GName, FieldLengths.GName); + checksum += WriteAsAsciiString(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName)); } if (_devMajor > 0) { - checksum += WriteAsOctal(_devMajor, buffer, FieldLocations.DevMajor, FieldLengths.DevMajor); + checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor)); } if (_devMinor > 0) { - checksum += WriteAsOctal(_devMinor, buffer, FieldLocations.DevMinor, FieldLengths.DevMinor); + checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor)); } return checksum; @@ -509,8 +491,8 @@ private int WritePosixAndGnuSharedFields(Span buffer) // Saves the gnu-specific fields into the specified spans. private int WriteGnuFields(Span buffer) { - int checksum = WriteAsTimestamp(_aTime, buffer, FieldLocations.ATime, FieldLengths.ATime); - checksum += WriteAsTimestamp(_cTime, buffer, FieldLocations.CTime, FieldLengths.CTime); + int checksum = WriteAsTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime)); + checksum += WriteAsTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime)); if (_gnuUnusedBytes != null) { @@ -524,8 +506,18 @@ private int WriteGnuFields(Span buffer) private static void WriteData(Stream archiveStream, Stream dataStream, long actualLength) { dataStream.CopyTo(archiveStream); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - archiveStream.Write(new byte[paddingAfterData]); + if (paddingAfterData != 0) + { + Debug.Assert(paddingAfterData <= TarHelpers.RecordSize); + + Span padding = stackalloc byte[TarHelpers.RecordSize]; + padding = padding.Slice(0, paddingAfterData); + padding.Clear(); + + archiveStream.Write(padding); + } } // Asynchronously writes the current header's data stream into the archive stream. @@ -534,44 +526,88 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream cancellationToken.ThrowIfCancellationRequested(); await dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position + int paddingAfterData = TarHelpers.CalculatePadding(actualLength); - await archiveStream.WriteAsync(new byte[paddingAfterData], cancellationToken).ConfigureAwait(false); + if (paddingAfterData != 0) + { + byte[] buffer = ArrayPool.Shared.Rent(paddingAfterData); + Array.Clear(buffer, 0, paddingAfterData); + + await archiveStream.WriteAsync(buffer.AsMemory(0, paddingAfterData), cancellationToken).ConfigureAwait(false); + + ArrayPool.Shared.Return(buffer); + } } // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. private static Stream? GenerateExtendedAttributesDataStream(Dictionary extendedAttributes) { MemoryStream? dataStream = null; + + byte[]? buffer = null; + Span span = stackalloc byte[512]; + if (extendedAttributes.Count > 0) { dataStream = new MemoryStream(); + foreach ((string attribute, string value) in extendedAttributes) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - dataStream.Write(entryBytes); + // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html + + // The format is: + // "XX attribute=value\n" + // where "XX" is the number of characters in the entry, including those required for the count itself. + int length = 3 + Encoding.UTF8.GetByteCount(attribute) + Encoding.UTF8.GetByteCount(value); + length += CountDigits(length); + + // Get a large enough buffer if we don't already have one. + if (span.Length < length) + { + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } + buffer = ArrayPool.Shared.Rent(length); + span = buffer; + } + + // Format the contents. + bool formatted = Utf8Formatter.TryFormat(length, span, out int bytesWritten); + Debug.Assert(formatted); + span[bytesWritten++] = (byte)' '; + bytesWritten += Encoding.UTF8.GetBytes(attribute, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'='; + bytesWritten += Encoding.UTF8.GetBytes(value, span.Slice(bytesWritten)); + span[bytesWritten++] = (byte)'\n'; + + // Write it to the stream. + dataStream.Write(span.Slice(0, bytesWritten)); } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + + dataStream.Position = 0; // Ensure it gets written into the archive from the beginning } - return dataStream; - } - // Asynchronously dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. - private static async Task GenerateExtendedAttributesDataStreamAsync(Dictionary extendedAttributes, CancellationToken cancellationToken) - { - cancellationToken.ThrowIfCancellationRequested(); + if (buffer is not null) + { + ArrayPool.Shared.Return(buffer); + } - MemoryStream? dataStream = null; - if (extendedAttributes.Count > 0) + return dataStream; + + static int CountDigits(int value) { - dataStream = new MemoryStream(); - foreach ((string attribute, string value) in extendedAttributes) + Debug.Assert(value >= 0); + int digits = 1; + while (true) { - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - await dataStream.WriteAsync(entryBytes, cancellationToken).ConfigureAwait(false); + value /= 10; + if (value == 0) break; + digits++; } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning + return digits; } - return dataStream; } // Some fields that have a reserved spot in the header, may not fit in such field anymore, but they can fit in the @@ -584,10 +620,12 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() { ExtendedAttributes.Add(PaxEaMTime, TarHelpers.GetTimestampStringFromDateTimeOffset(_mTime)); } + if (!string.IsNullOrEmpty(_gName)) { TryAddStringField(ExtendedAttributes, PaxEaGName, _gName, FieldLengths.GName); } + if (!string.IsNullOrEmpty(_uName)) { TryAddStringField(ExtendedAttributes, PaxEaUName, _uName, FieldLengths.UName); @@ -603,7 +641,6 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded() ExtendedAttributes.Add(PaxEaSize, _size.ToString()); } - // Adds the specified string to the dictionary if it's longer than the specified max byte length. static void TryAddStringField(Dictionary extendedAttributes, string key, string value, int maxLength) { @@ -614,63 +651,24 @@ static void TryAddStringField(Dictionary extendedAttributes, str } } - // Generates an extended attribute key value pair string saved into a byte array, following the ISO/IEC 10646-1:2000 standard UTF-8 encoding format. - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html - private static byte[] GenerateExtendedAttributeKeyValuePairAsByteArray(byte[] keyBytes, byte[] valueBytes) - { - // Assuming key="ab" and value="cdef" - - // The " ab=cdef\n" attribute string has a length of 9 chars - int suffixByteCount = 3 + // leading space, equals sign and trailing newline - keyBytes.Length + valueBytes.Length; - - // The count string "9" has a length of 1 char - string suffixByteCountString = suffixByteCount.ToString(); - int firstTotalByteCount = Encoding.ASCII.GetByteCount(suffixByteCountString); - - // If we prepend the count string length to the attribute string, - // the total length increases to 10, which has one more digit - // "9 abc=def\n" - int firstPrefixAndSuffixByteCount = firstTotalByteCount + suffixByteCount; - - // The new count string "10" has an increased length of 2 chars - string prefixAndSuffixByteCountString = firstPrefixAndSuffixByteCount.ToString(); - int realTotalCharCount = Encoding.ASCII.GetByteCount(prefixAndSuffixByteCountString); - - byte[] finalTotalCharCountBytes = Encoding.ASCII.GetBytes(prefixAndSuffixByteCountString); - - // The final string should contain the correct total length now - List bytesList = new(); - - bytesList.AddRange(finalTotalCharCountBytes); - bytesList.Add(TarHelpers.SpaceChar); - bytesList.AddRange(keyBytes); - bytesList.Add(TarHelpers.EqualsChar); - bytesList.AddRange(valueBytes); - bytesList.Add(TarHelpers.NewLineChar); - - Debug.Assert(bytesList.Count == (realTotalCharCount + suffixByteCount)); - - return bytesList.ToArray(); - } - // The checksum accumulator first adds up the byte values of eight space chars, then the final number // is written on top of those spaces on the specified span as ascii. // At the end, it's saved in the header field and the final value returned. - internal int WriteChecksum(int checksum, Span buffer) + internal static int WriteChecksum(int checksum, Span buffer) { // The checksum field is also counted towards the total sum // but as an array filled with spaces - checksum += TarHelpers.SpaceChar * 8; + checksum += (byte)' ' * 8; Span converted = stackalloc byte[FieldLengths.Checksum]; - WriteAsOctal(checksum, converted, 0, converted.Length); + converted.Clear(); + FormatOctal(checksum, converted); Span destination = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); // Checksum field ends with a null and a space - destination[^1] = TarHelpers.SpaceChar; // ' ' - destination[^2] = 0; // '\0' + destination[^1] = (byte)' '; + destination[^2] = (byte)'\0'; int i = destination.Length - 3; int j = converted.Length - 1; @@ -684,7 +682,7 @@ internal int WriteChecksum(int checksum, Span buffer) } else { - destination[i] = TarHelpers.ZeroChar; // Leading zero chars '0' + destination[i] = (byte)'0'; // Leading zero chars } i--; } @@ -697,67 +695,75 @@ private static int WriteLeftAlignedBytesAndGetChecksum(ReadOnlySpan bytesT { Debug.Assert(destination.Length > 1); - int checksum = 0; - - for (int i = 0, j = 0; i < destination.Length && j < bytesToWrite.Length; i++, j++) - { - destination[i] = bytesToWrite[j]; - checksum += destination[i]; - } + // Copy as many bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + bytesToWrite.CopyTo(destination); - return checksum; + return Checksum(bytesToWrite); } // Writes the specified bytes aligned to the right, filling all the leading bytes with the zero char 0x30, // ensuring a null terminator is included at the end of the specified span. private static int WriteRightAlignedBytesAndGetChecksum(ReadOnlySpan bytesToWrite, Span destination) { - int checksum = 0; - int i = destination.Length - 1; - int j = bytesToWrite.Length - 1; + Debug.Assert(destination.Length > 1); - while (i >= 0) + // Null terminated + destination[^1] = (byte)'\0'; + + // Copy as many input bytes as will fit + int numToCopy = Math.Min(bytesToWrite.Length, destination.Length - 1); + bytesToWrite = bytesToWrite.Slice(0, numToCopy); + int copyPos = destination.Length - 1 - bytesToWrite.Length; + bytesToWrite.CopyTo(destination.Slice(copyPos)); + + // Fill all leading bytes with zeros + destination.Slice(0, copyPos).Fill((byte)'0'); + + return Checksum(destination); + } + + private static int Checksum(ReadOnlySpan bytes) + { + int checksum = 0; + foreach (byte b in bytes) { - if (i == destination.Length - 1) - { - destination[i] = 0; // null terminated - } - else if (j >= 0) - { - destination[i] = bytesToWrite[j]; - j--; - } - else - { - destination[i] = TarHelpers.ZeroChar; // leading zeros - } - checksum += destination[i]; - i--; + checksum += b; } - return checksum; } // Writes the specified decimal number as a right-aligned octal number and returns its checksum. - internal static int WriteAsOctal(long tenBaseNumber, Span destination, int location, int length) + internal static int FormatOctal(long value, Span destination) { - long octal = TarHelpers.ConvertDecimalToOctal(tenBaseNumber); - byte[] bytes = Encoding.ASCII.GetBytes(octal.ToString()); - return WriteRightAlignedBytesAndGetChecksum(bytes.AsSpan(), destination.Slice(location, length)); + ulong remaining = (ulong)value; + Span digits = stackalloc byte[32]; // longer than any possible octal formatting of a ulong + + int i = digits.Length - 1; + while (true) + { + digits[i] = (byte)('0' + (remaining % 8)); + remaining /= 8; + if (remaining == 0) break; + i--; + } + + return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination); } // Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum. - private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination, int location, int length) + private static int WriteAsTimestamp(DateTimeOffset timestamp, Span destination) { long unixTimeSeconds = timestamp.ToUnixTimeSeconds(); - return WriteAsOctal(unixTimeSeconds, destination, location, length); + return FormatOctal(unixTimeSeconds, destination); } // Writes the specified text as an ASCII string aligned to the left, and returns its checksum. - private static int WriteAsAsciiString(string str, Span buffer, int location, int length) + private static int WriteAsAsciiString(string str, Span buffer) { byte[] bytes = Encoding.ASCII.GetBytes(str); - return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer.Slice(location, length)); + return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer); } // Gets the special name for the 'name' field in an extended attribute entry. @@ -767,18 +773,15 @@ private static int WriteAsAsciiString(string str, Span buffer, int locatio // - %f: The filename of the file, equivalent to the result of the basename utility on the translated pathname. private string GenerateExtendedAttributeName() { - string? dirName = Path.GetDirectoryName(_name); - dirName = string.IsNullOrEmpty(dirName) ? "." : dirName; - - int processId = Environment.ProcessId; - - string? fileName = Path.GetFileName(_name); - fileName = string.IsNullOrEmpty(fileName) ? "." : fileName; + ReadOnlySpan dirName = Path.GetDirectoryName(_name.AsSpan()); + dirName = dirName.IsEmpty ? "." : dirName; - string trailingSeparator = (_typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList) ? - $"{Path.DirectorySeparatorChar}" : string.Empty; + ReadOnlySpan fileName = Path.GetFileName(_name.AsSpan()); + fileName = fileName.IsEmpty ? "." : fileName; - return string.Format(PaxHeadersFormat, dirName, processId, fileName, trailingSeparator); + return _typeFlag is TarEntryType.Directory or TarEntryType.DirectoryList ? + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}{Path.DirectorySeparatorChar}" : + $"{dirName}/PaxHeaders.{Environment.ProcessId}/{fileName}"; } // Gets the special name for the 'name' field in a global extended attribute entry. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs index d4592eb85d4b5..898daf1e6c3ed 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs @@ -3,8 +3,10 @@ using System.Buffers; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; +using System.Numerics; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -17,11 +19,6 @@ internal static partial class TarHelpers internal const short RecordSize = 512; internal const int MaxBufferLength = 4096; - internal const int ZeroChar = 0x30; - internal const byte SpaceChar = 0x20; - internal const byte EqualsChar = 0x3d; - internal const byte NewLineChar = 0xa; - // Default mode for TarEntry created for a file-type. private const UnixFileMode DefaultFileMode = UnixFileMode.UserRead | UnixFileMode.UserWrite | @@ -117,36 +114,6 @@ internal static int CalculatePadding(long size) return padding; } - // Returns the specified 8-base number as a 10-base number. - internal static int ConvertDecimalToOctal(int value) - { - int multiplier = 1; - int accum = value; - int actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - - // Returns the specified 10-base number as an 8-base number. - internal static long ConvertDecimalToOctal(long value) - { - long multiplier = 1; - long accum = value; - long actual = 0; - while (accum != 0) - { - actual += (accum % 8) * multiplier; - accum /= 8; - multiplier *= 10; - } - return actual; - } - // Returns true if all the bytes in the specified array are nulls, false otherwise. internal static bool IsAllNullBytes(Span buffer) => buffer.IndexOfAnyExcept((byte)0) < 0; @@ -191,9 +158,10 @@ internal static bool TryGetStringAsBaseTenInteger(IReadOnlyDictionary buffer) + /// Parses a byte span that represents an ASCII string containing a number in octal base. + internal static T ParseOctal(ReadOnlySpan buffer) where T : struct, INumber { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt32(str, fromBase: 8); - } + buffer = TrimEndingNullsAndSpaces(buffer); - // Receives a byte array that represents an ASCII string containing a number in octal base. - // Converts the array to an octal base number, then transforms it to ten base and returns it. - internal static long GetTenBaseLongFromOctalAsciiChars(Span buffer) - { - string str = GetTrimmedAsciiString(buffer); - return string.IsNullOrEmpty(str) ? 0 : Convert.ToInt64(str, fromBase: 8); + T octalFactor = T.CreateTruncating(8u); + T value = T.Zero; + foreach (byte b in buffer) + { + uint digit = (uint)(b - '0'); + if (digit >= 8) + { + ThrowInvalidNumber(); + } + + value = checked((value * octalFactor) + T.CreateTruncating(digit)); + } + + return value; } + [DoesNotReturn] + private static void ThrowInvalidNumber() => + throw new FormatException(SR.Format(SR.TarInvalidNumber)); + // Returns the string contained in the specified buffer of bytes, // in the specified encoding, removing the trailing null or space chars. private static string GetTrimmedString(ReadOnlySpan buffer, Encoding encoding) + { + buffer = TrimEndingNullsAndSpaces(buffer); + return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer); + } + + internal static ReadOnlySpan TrimEndingNullsAndSpaces(ReadOnlySpan buffer) { int trimmedLength = buffer.Length; - while (trimmedLength > 0 && IsByteNullOrSpace(buffer[trimmedLength - 1])) + while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32) { trimmedLength--; } - return trimmedLength == 0 ? string.Empty : encoding.GetString(buffer.Slice(0, trimmedLength)); - - static bool IsByteNullOrSpace(byte c) => c is 0 or 32; + return buffer.Slice(0, trimmedLength); } // Returns the ASCII string contained in the specified buffer of bytes, diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 7e951c5243e35..137acbcfbd2d0 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -54,8 +54,18 @@ public TarReader(Stream archiveStream, bool leaveOpen = false) /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + s.Dispose(); + } + } + } } /// @@ -64,8 +74,18 @@ public void Dispose() /// The property of any entry can be replaced with a new stream. If the user decides to replace it on a instance that was obtained using a , the underlying stream gets disposed immediately, freeing the of origin from the responsibility of having to dispose it. public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) + { + foreach (Stream s in _dataStreamsToDispose) + { + await s.DisposeAsync().ConfigureAwait(false); + } + } + } } /// @@ -249,52 +269,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel } } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - s.Dispose(); - } - } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (!_leaveOpen && _dataStreamsToDispose?.Count > 0) - { - foreach (Stream s in _dataStreamsToDispose) - { - await s.DisposeAsync().ConfigureAwait(false); - } - } - } - finally - { - _isDisposed = true; - } - } - } - // Asynchronously retrieves the next entry if one is found. private async ValueTask GetNextEntryInternalAsync(bool copyData, CancellationToken cancellationToken) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs index 7e678bcd91871..28eaf1e332323 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Unix.cs @@ -94,16 +94,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { Debug.Assert(entry._header._dataStream == null); - - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs index bfb6cf17f1107..d3fa19a5b12ea 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.Windows.cs @@ -22,15 +22,15 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil FileAttributes attributes = File.GetAttributes(fullPath); TarEntryType entryType; - if (attributes.HasFlag(FileAttributes.ReparsePoint)) + if ((attributes & FileAttributes.ReparsePoint) != 0) { entryType = TarEntryType.SymbolicLink; } - else if (attributes.HasFlag(FileAttributes.Directory)) + else if ((attributes & FileAttributes.Directory) != 0) { entryType = TarEntryType.Directory; } - else if (attributes.HasFlag(FileAttributes.Normal) || attributes.HasFlag(FileAttributes.Archive)) + else if ((attributes & (FileAttributes.Normal | FileAttributes.Archive)) != 0) { entryType = Format is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile; } @@ -48,7 +48,7 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil _ => throw new FormatException(string.Format(SR.TarInvalidFormat, Format)), }; - FileSystemInfo info = attributes.HasFlag(FileAttributes.Directory) ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); + FileSystemInfo info = (attributes & FileAttributes.Directory) != 0 ? new DirectoryInfo(fullPath) : new FileInfo(fullPath); entry._header._mTime = info.LastWriteTimeUtc; entry._header._aTime = info.LastAccessTimeUtc; @@ -63,16 +63,8 @@ private TarEntry ConstructEntryForWriting(string fullPath, string entryName, Fil if (entry.EntryType is TarEntryType.RegularFile or TarEntryType.V7RegularFile) { - FileStreamOptions options = new() - { - Mode = FileMode.Open, - Access = FileAccess.Read, - Share = FileShare.Read, - Options = fileOptions - }; - Debug.Assert(entry._header._dataStream == null); - entry._header._dataStream = new FileStream(fullPath, options); + entry._header._dataStream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, fileOptions); } return entry; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 9469231684023..7d3977eda68df 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -84,8 +84,20 @@ public TarWriter(Stream archiveStream, TarEntryFormat format = TarEntryFormat.Pa /// public void Dispose() { - Dispose(disposing: true); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + WriteFinalRecords(); + } + + if (!_leaveOpen) + { + _archiveStream.Dispose(); + } + } } /// @@ -93,8 +105,20 @@ public void Dispose() /// public async ValueTask DisposeAsync() { - await DisposeAsync(disposing: true).ConfigureAwait(false); - GC.SuppressFinalize(this); + if (!_isDisposed) + { + _isDisposed = true; + + if (_wroteEntries) + { + await WriteFinalRecordsAsync().ConfigureAwait(false); + } + + if (!_leaveOpen) + { + await _archiveStream.DisposeAsync().ConfigureAwait(false); + } + } } /// @@ -241,95 +265,40 @@ public Task WriteEntryAsync(TarEntry entry, CancellationToken cancellationToken return WriteEntryAsyncInternal(entry, cancellationToken); } - // Disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private void Dispose(bool disposing) + // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. + private void WriteEntryInternal(TarEntry entry) { - if (disposing && !_isDisposed) + Span buffer = stackalloc byte[TarHelpers.RecordSize]; + buffer.Clear(); + + switch (entry.Format) { - try - { - if (_wroteEntries) - { - WriteFinalRecords(); - } + case TarEntryFormat.V7: + entry._header.WriteAsV7(_archiveStream, buffer); + break; + case TarEntryFormat.Ustar: + entry._header.WriteAsUstar(_archiveStream, buffer); + break; - if (!_leaveOpen) + case TarEntryFormat.Pax: + if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) { - _archiveStream.Dispose(); + entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); } - } - finally - { - _isDisposed = true; - } - } - } - - // Asynchronously disposes the current instance. - // If 'disposing' is 'false', the method was called from the finalizer. - private async ValueTask DisposeAsync(bool disposing) - { - if (disposing && !_isDisposed) - { - try - { - if (_wroteEntries) + else { - await WriteFinalRecordsAsync().ConfigureAwait(false); + entry._header.WriteAsPax(_archiveStream, buffer); } + break; + case TarEntryFormat.Gnu: + entry._header.WriteAsGnu(_archiveStream, buffer); + break; - if (!_leaveOpen) - { - await _archiveStream.DisposeAsync().ConfigureAwait(false); - } - } - finally - { - _isDisposed = true; - } - } - } - - // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. - private void WriteEntryInternal(TarEntry entry) - { - byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); - Span buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger - buffer.Clear(); // Rented arrays aren't clean - try - { - switch (entry.Format) - { - case TarEntryFormat.V7: - entry._header.WriteAsV7(_archiveStream, buffer); - break; - case TarEntryFormat.Ustar: - entry._header.WriteAsUstar(_archiveStream, buffer); - break; - case TarEntryFormat.Pax: - if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) - { - entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); - } - else - { - entry._header.WriteAsPax(_archiveStream, buffer); - } - break; - case TarEntryFormat.Gnu: - entry._header.WriteAsGnu(_archiveStream, buffer); - break; - default: - Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); - throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); - } - } - finally - { - ArrayPool.Shared.Return(rented); + default: + Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); + throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); } _wroteEntries = true; @@ -364,7 +333,9 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can // by two records consisting entirely of zero bytes. private void WriteFinalRecords() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; + Span emptyRecord = stackalloc byte[TarHelpers.RecordSize]; + emptyRecord.Clear(); + _archiveStream.Write(emptyRecord); _archiveStream.Write(emptyRecord); } @@ -374,9 +345,14 @@ private void WriteFinalRecords() // This method is called from DisposeAsync, so we don't want to propagate a cancelled CancellationToken. private async ValueTask WriteFinalRecordsAsync() { - byte[] emptyRecord = new byte[TarHelpers.RecordSize]; - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); - await _archiveStream.WriteAsync(emptyRecord, cancellationToken: default).ConfigureAwait(false); + const int TwoRecordSize = TarHelpers.RecordSize * 2; + + byte[] twoEmptyRecords = ArrayPool.Shared.Rent(TwoRecordSize); + Array.Clear(twoEmptyRecords, 0, TwoRecordSize); + + await _archiveStream.WriteAsync(twoEmptyRecords.AsMemory(0, TwoRecordSize), cancellationToken: default).ConfigureAwait(false); + + ArrayPool.Shared.Return(twoEmptyRecords); } private (string, string) ValidateWriteEntryArguments(string fileName, string? entryName) diff --git a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs index 4370cd16878b1..33923d91b9b8f 100644 --- a/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs +++ b/src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs @@ -375,49 +375,35 @@ private static void DoCreateFromDirectory(string sourceDirectoryName, string des if (includeBaseDirectory && di.Parent != null) basePath = di.Parent.FullName; - // Windows' MaxPath (260) is used as an arbitrary default capacity, as it is likely - // to be greater than the length of typical entry names from the file system, even - // on non-Windows platforms. The capacity will be increased, if needed. - const int DefaultCapacity = 260; - char[] entryNameBuffer = ArrayPool.Shared.Rent(DefaultCapacity); - - try + foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) { - foreach (FileSystemInfo file in di.EnumerateFileSystemInfos("*", SearchOption.AllDirectories)) - { - directoryIsEmpty = false; + directoryIsEmpty = false; - int entryNameLength = file.FullName.Length - basePath.Length; - Debug.Assert(entryNameLength > 0); + int entryNameLength = file.FullName.Length - basePath.Length; + Debug.Assert(entryNameLength > 0); - if (file is FileInfo) - { - // Create entry for file: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer); - ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); - } - else + if (file is FileInfo) + { + // Create entry for file: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength); + ZipFileExtensions.DoCreateEntryFromFile(archive, file.FullName, entryName, compressionLevel); + } + else + { + // Entry marking an empty dir: + if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) { - // Entry marking an empty dir: - if (file is DirectoryInfo possiblyEmpty && ArchivingUtils.IsDirEmpty(possiblyEmpty)) - { - // FullName never returns a directory separator character on the end, - // but Zip archives require it to specify an explicit directory: - string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, ref entryNameBuffer, appendPathSeparator: true); - archive.CreateEntry(entryName); - } + // FullName never returns a directory separator character on the end, + // but Zip archives require it to specify an explicit directory: + string entryName = ArchivingUtils.EntryFromPath(file.FullName, basePath.Length, entryNameLength, appendPathSeparator: true); + archive.CreateEntry(entryName); } - } // foreach - - // If no entries create an empty root directory entry: - if (includeBaseDirectory && directoryIsEmpty) - archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, ref entryNameBuffer, appendPathSeparator: true)); - } - finally - { - ArrayPool.Shared.Return(entryNameBuffer); + } } + // If no entries create an empty root directory entry: + if (includeBaseDirectory && directoryIsEmpty) + archive.CreateEntry(ArchivingUtils.EntryFromPath(di.Name, 0, di.Name.Length, appendPathSeparator: true)); } } }