diff --git a/src/SharpCompress/Common/CompressionType.cs b/src/SharpCompress/Common/CompressionType.cs index 595834233..f9b638550 100644 --- a/src/SharpCompress/Common/CompressionType.cs +++ b/src/SharpCompress/Common/CompressionType.cs @@ -9,6 +9,7 @@ public enum CompressionType Deflate, Rar, LZMA, + LZMA2, BCJ, BCJ2, LZip, diff --git a/src/SharpCompress/Common/SevenZip/ArchiveWriter.cs b/src/SharpCompress/Common/SevenZip/ArchiveWriter.cs new file mode 100644 index 000000000..d9f90e584 --- /dev/null +++ b/src/SharpCompress/Common/SevenZip/ArchiveWriter.cs @@ -0,0 +1,52 @@ +using System.IO; +using SharpCompress.Compressors.LZMA.Utilities; + +namespace SharpCompress.Common.SevenZip; + +/// +/// Top-level orchestrator for writing 7z archive headers. +/// Assembles the complete header from StreamsInfo and FilesInfo, +/// and supports writing either a raw header (kHeader) or an +/// encoded/compressed header (kEncodedHeader). +/// +internal static class ArchiveHeaderWriter +{ + /// + /// Writes a raw (uncompressed) header containing MainStreamsInfo and FilesInfo. + /// + public static void WriteRawHeader( + Stream stream, + SevenZipStreamsInfoWriter? mainStreamsInfo, + SevenZipFilesInfoWriter? filesInfo + ) + { + stream.WriteByte((byte)BlockType.Header); + + if (mainStreamsInfo != null) + { + stream.WriteByte((byte)BlockType.MainStreamsInfo); + mainStreamsInfo.Write(stream); + } + + if (filesInfo != null) + { + stream.WriteByte((byte)BlockType.FilesInfo); + filesInfo.Write(stream); + } + + stream.WriteByte((byte)BlockType.End); + } + + /// + /// Writes an encoded header - a StreamsInfo block that describes + /// how to decompress the actual header data. + /// + public static void WriteEncodedHeader( + Stream stream, + SevenZipStreamsInfoWriter headerStreamsInfo + ) + { + stream.WriteByte((byte)BlockType.EncodedHeader); + headerStreamsInfo.Write(stream); + } +} diff --git a/src/SharpCompress/Common/SevenZip/SevenZipFilesInfo.cs b/src/SharpCompress/Common/SevenZip/SevenZipFilesInfo.cs new file mode 100644 index 000000000..f5fd410e2 --- /dev/null +++ b/src/SharpCompress/Common/SevenZip/SevenZipFilesInfo.cs @@ -0,0 +1,226 @@ +using System; +using System.IO; +using System.Text; +using SharpCompress.Compressors.LZMA.Utilities; + +namespace SharpCompress.Common.SevenZip; + +/// +/// Entry metadata collected during writing, used to build FilesInfo header. +/// +internal sealed class SevenZipWriteEntry +{ + public string Name { get; init; } = string.Empty; + public DateTime? ModificationTime { get; init; } + public uint? Attributes { get; init; } + public bool IsDirectory { get; init; } + public bool IsEmpty { get; init; } +} + +/// +/// Writes the FilesInfo section of a 7z header, including all file properties +/// (names, timestamps, attributes, empty stream/file markers). +/// +internal sealed class SevenZipFilesInfoWriter +{ + public SevenZipWriteEntry[] Entries { get; init; } = []; + + public void Write(Stream stream) + { + var numFiles = (ulong)Entries.Length; + stream.WriteEncodedUInt64(numFiles); + + // Count empty streams (directories + zero-length files) + var emptyStreamCount = 0; + for (var i = 0; i < Entries.Length; i++) + { + if (Entries[i].IsEmpty || Entries[i].IsDirectory) + { + emptyStreamCount++; + } + } + + // EmptyStream property + if (emptyStreamCount > 0) + { + WriteEmptyStreamProperty(stream, emptyStreamCount); + } + + // Names property + WriteNameProperty(stream); + + // MTime property + WriteMTimeProperty(stream); + + // Attributes property + WriteAttributesProperty(stream); + + stream.WriteByte((byte)BlockType.End); + } + + private void WriteEmptyStreamProperty(Stream stream, int emptyStreamCount) + { + var emptyStreams = new bool[Entries.Length]; + var emptyFiles = new bool[emptyStreamCount]; + var hasEmptyFile = false; + var emptyIndex = 0; + + for (var i = 0; i < Entries.Length; i++) + { + if (Entries[i].IsEmpty || Entries[i].IsDirectory) + { + emptyStreams[i] = true; + var isEmptyFile = !Entries[i].IsDirectory; + emptyFiles[emptyIndex++] = isEmptyFile; + if (isEmptyFile) + { + hasEmptyFile = true; + } + } + } + + // kEmptyStream + WriteFileProperty(stream, BlockType.EmptyStream, s => s.WriteBoolVector(emptyStreams)); + + // kEmptyFile (only if there are actual empty files, not just directories) + if (hasEmptyFile) + { + WriteFileProperty(stream, BlockType.EmptyFile, s => s.WriteBoolVector(emptyFiles)); + } + } + + private void WriteNameProperty(Stream stream) + { + WriteFileProperty( + stream, + BlockType.Name, + s => + { + // External = 0 (inline) + s.WriteByte(0); + + for (var i = 0; i < Entries.Length; i++) + { + var nameBytes = Encoding.Unicode.GetBytes(Entries[i].Name); + s.Write(nameBytes); + // null terminator (2 bytes for UTF-16) + s.WriteByte(0); + s.WriteByte(0); + } + } + ); + } + + private void WriteMTimeProperty(Stream stream) + { + var hasTimes = false; + for (var i = 0; i < Entries.Length; i++) + { + if (Entries[i].ModificationTime != null) + { + hasTimes = true; + break; + } + } + + if (!hasTimes) + { + return; + } + + WriteFileProperty( + stream, + BlockType.MTime, + s => + { + var defined = new bool[Entries.Length]; + for (var i = 0; i < Entries.Length; i++) + { + defined[i] = Entries[i].ModificationTime != null; + } + s.WriteOptionalBoolVector(defined); + + // External = 0 (inline) + s.WriteByte(0); + + var buf = new byte[8]; + for (var i = 0; i < Entries.Length; i++) + { + if (Entries[i].ModificationTime is { } mtime) + { + var fileTime = (ulong)mtime.ToUniversalTime().ToFileTimeUtc(); + System.Buffers.Binary.BinaryPrimitives.WriteUInt64LittleEndian( + buf, + fileTime + ); + s.Write(buf, 0, 8); + } + } + } + ); + } + + private void WriteAttributesProperty(Stream stream) + { + var hasAttrs = false; + for (var i = 0; i < Entries.Length; i++) + { + if (Entries[i].Attributes != null) + { + hasAttrs = true; + break; + } + } + + if (!hasAttrs) + { + return; + } + + WriteFileProperty( + stream, + BlockType.WinAttributes, + s => + { + var defined = new bool[Entries.Length]; + for (var i = 0; i < Entries.Length; i++) + { + defined[i] = Entries[i].Attributes != null; + } + s.WriteOptionalBoolVector(defined); + + // External = 0 (inline) + s.WriteByte(0); + + var buf = new byte[4]; + for (var i = 0; i < Entries.Length; i++) + { + if (Entries[i].Attributes is { } attrs) + { + System.Buffers.Binary.BinaryPrimitives.WriteUInt32LittleEndian(buf, attrs); + s.Write(buf, 0, 4); + } + } + } + ); + } + + /// + /// Writes a file property block: PropertyID + size + data. + /// Size is computed by writing to a temporary buffer first. + /// + private static void WriteFileProperty( + Stream stream, + BlockType propertyId, + Action writeData + ) + { + using var dataStream = new MemoryStream(); + writeData(dataStream); + + stream.WriteByte((byte)propertyId); + stream.WriteEncodedUInt64((ulong)dataStream.Length); + dataStream.Position = 0; + dataStream.CopyTo(stream); + } +} diff --git a/src/SharpCompress/Common/SevenZip/SevenZipHeaderStructures.cs b/src/SharpCompress/Common/SevenZip/SevenZipHeaderStructures.cs new file mode 100644 index 000000000..24e86bfd0 --- /dev/null +++ b/src/SharpCompress/Common/SevenZip/SevenZipHeaderStructures.cs @@ -0,0 +1,306 @@ +using System; +using System.IO; +using SharpCompress.Compressors.LZMA.Utilities; + +namespace SharpCompress.Common.SevenZip; + +/// +/// Writes Digests (CRC32 arrays with optional-defined-vector) for 7z headers. +/// +internal sealed class SevenZipDigestsWriter(uint?[] crcs) +{ + public uint?[] CRCs { get; } = crcs; + + public void Write(Stream stream) + { + var defined = new bool[CRCs.Length]; + for (var i = 0; i < CRCs.Length; i++) + { + defined[i] = CRCs[i] != null; + } + + stream.WriteOptionalBoolVector(defined); + + var buf = new byte[4]; + for (var i = 0; i < CRCs.Length; i++) + { + if (CRCs[i] is { } crcValue) + { + System.Buffers.Binary.BinaryPrimitives.WriteUInt32LittleEndian(buf, crcValue); + stream.Write(buf, 0, 4); + } + } + } + + public bool HasAnyDefined() + { + for (var i = 0; i < CRCs.Length; i++) + { + if (CRCs[i] != null) + { + return true; + } + } + return false; + } +} + +/// +/// Writes PackInfo section: packed stream positions, sizes, and CRCs. +/// +internal sealed class SevenZipPackInfoWriter +{ + public ulong PackPos { get; init; } + public ulong[] Sizes { get; init; } = []; + public uint?[] CRCs { get; init; } = []; + + public void Write(Stream stream) + { + stream.WriteEncodedUInt64(PackPos); + stream.WriteEncodedUInt64((ulong)Sizes.Length); + + // Sizes + stream.WriteByte((byte)BlockType.Size); + for (var i = 0; i < Sizes.Length; i++) + { + stream.WriteEncodedUInt64(Sizes[i]); + } + + // CRCs (optional) + var digests = new SevenZipDigestsWriter(CRCs); + if (digests.HasAnyDefined()) + { + stream.WriteByte((byte)BlockType.Crc); + digests.Write(stream); + } + + stream.WriteByte((byte)BlockType.End); + } +} + +/// +/// Writes UnPackInfo section: folder definitions (coders, bind pairs, unpack sizes, CRCs). +/// +internal sealed class SevenZipUnPackInfoWriter +{ + public CFolder[] Folders { get; init; } = []; + + public void Write(Stream stream) + { + stream.WriteByte((byte)BlockType.Folder); + + // Number of folders + stream.WriteEncodedUInt64((ulong)Folders.Length); + + // External = 0 (inline) + stream.WriteByte(0); + + // Write each folder's coder definitions + for (var i = 0; i < Folders.Length; i++) + { + WriteFolder(stream, Folders[i]); + } + + // CodersUnPackSize + stream.WriteByte((byte)BlockType.CodersUnpackSize); + for (var i = 0; i < Folders.Length; i++) + { + for (var j = 0; j < Folders[i]._unpackSizes.Count; j++) + { + stream.WriteEncodedUInt64((ulong)Folders[i]._unpackSizes[j]); + } + } + + // UnPackDigests (CRCs per folder) + var hasCrc = false; + for (var i = 0; i < Folders.Length; i++) + { + if (Folders[i]._unpackCrc != null) + { + hasCrc = true; + break; + } + } + + if (hasCrc) + { + stream.WriteByte((byte)BlockType.Crc); + var crcs = new uint?[Folders.Length]; + for (var i = 0; i < Folders.Length; i++) + { + crcs[i] = Folders[i]._unpackCrc; + } + new SevenZipDigestsWriter(crcs).Write(stream); + } + + stream.WriteByte((byte)BlockType.End); + } + + private static void WriteFolder(Stream stream, CFolder folder) + { + // NumCoders + stream.WriteEncodedUInt64((ulong)folder._coders.Count); + + for (var i = 0; i < folder._coders.Count; i++) + { + WriteCoder(stream, folder._coders[i]); + } + + // BindPairs + for (var i = 0; i < folder._bindPairs.Count; i++) + { + stream.WriteEncodedUInt64((ulong)folder._bindPairs[i]._inIndex); + stream.WriteEncodedUInt64((ulong)folder._bindPairs[i]._outIndex); + } + + // PackedIndices (only if > 1 packed stream) + var numPackStreams = folder._packStreams.Count; + if (numPackStreams > 1) + { + for (var i = 0; i < numPackStreams; i++) + { + stream.WriteEncodedUInt64((ulong)folder._packStreams[i]); + } + } + } + + private static void WriteCoder(Stream stream, CCoderInfo coder) + { + var codecIdLength = coder._methodId.GetLength(); + byte attributes = (byte)(codecIdLength & 0x0F); + + var isComplex = coder._numInStreams != 1 || coder._numOutStreams != 1; + if (isComplex) + { + attributes |= 0x10; + } + + var hasProperties = coder._props != null && coder._props.Length > 0; + if (hasProperties) + { + attributes |= 0x20; + } + + stream.WriteByte(attributes); + + // Codec ID bytes (big-endian, most significant byte first) + var codecId = new byte[codecIdLength]; + var id = coder._methodId._id; + for (var i = codecIdLength - 1; i >= 0; i--) + { + codecId[i] = (byte)(id & 0xFF); + id >>= 8; + } + stream.Write(codecId, 0, codecIdLength); + + if (isComplex) + { + stream.WriteEncodedUInt64((ulong)coder._numInStreams); + stream.WriteEncodedUInt64((ulong)coder._numOutStreams); + } + + if (hasProperties) + { + stream.WriteEncodedUInt64((ulong)coder._props!.Length); + stream.Write(coder._props); + } + } +} + +/// +/// Writes SubStreamsInfo section: per-file unpack sizes and CRCs within folders. +/// +internal sealed class SevenZipSubStreamsInfoWriter +{ + public CFolder[] Folders { get; init; } = []; + public ulong[] NumUnPackStreamsInFolders { get; init; } = []; + public ulong[] UnPackSizes { get; init; } = []; + public uint?[] CRCs { get; init; } = []; + + public void Write(Stream stream) + { + var numFolders = (ulong)Folders.Length; + + // NumUnPackStream per folder (skip if all folders have exactly 1 stream) + var totalStreams = 0UL; + var allSingle = true; + for (var i = 0; i < NumUnPackStreamsInFolders.Length; i++) + { + totalStreams += NumUnPackStreamsInFolders[i]; + if (NumUnPackStreamsInFolders[i] != 1) + { + allSingle = false; + } + } + + if (!allSingle) + { + stream.WriteByte((byte)BlockType.NumUnpackStream); + for (var i = 0; i < NumUnPackStreamsInFolders.Length; i++) + { + stream.WriteEncodedUInt64(NumUnPackStreamsInFolders[i]); + } + } + + // UnPackSizes - write all except the last per folder (it's implicit from folder unpack size). + // Only emit the Size block when at least one folder has multiple substreams. + if (UnPackSizes.Length > 0 && !allSingle) + { + stream.WriteByte((byte)BlockType.Size); + + var sizeIndex = 0; + for (var i = 0; i < NumUnPackStreamsInFolders.Length; i++) + { + var numStreams = NumUnPackStreamsInFolders[i]; + for (var j = 1UL; j < numStreams; j++) + { + stream.WriteEncodedUInt64(UnPackSizes[sizeIndex++]); + } + sizeIndex++; // skip the last (implicit) + } + } + + // Digests for streams with unknown CRCs + var digests = new SevenZipDigestsWriter(CRCs); + if (digests.HasAnyDefined()) + { + stream.WriteByte((byte)BlockType.Crc); + digests.Write(stream); + } + + stream.WriteByte((byte)BlockType.End); + } +} + +/// +/// Writes the complete StreamsInfo section (PackInfo + UnPackInfo + SubStreamsInfo). +/// +internal sealed class SevenZipStreamsInfoWriter +{ + public SevenZipPackInfoWriter? PackInfo { get; init; } + public SevenZipUnPackInfoWriter? UnPackInfo { get; init; } + public SevenZipSubStreamsInfoWriter? SubStreamsInfo { get; init; } + + public void Write(Stream stream) + { + if (PackInfo != null) + { + stream.WriteByte((byte)BlockType.PackInfo); + PackInfo.Write(stream); + } + + if (UnPackInfo != null) + { + stream.WriteByte((byte)BlockType.UnpackInfo); + UnPackInfo.Write(stream); + } + + if (SubStreamsInfo != null) + { + stream.WriteByte((byte)BlockType.SubStreamsInfo); + SubStreamsInfo.Write(stream); + } + + stream.WriteByte((byte)BlockType.End); + } +} diff --git a/src/SharpCompress/Common/SevenZip/SevenZipSignatureHeader.cs b/src/SharpCompress/Common/SevenZip/SevenZipSignatureHeader.cs new file mode 100644 index 000000000..3f64a72ec --- /dev/null +++ b/src/SharpCompress/Common/SevenZip/SevenZipSignatureHeader.cs @@ -0,0 +1,91 @@ +using System; +using System.Buffers.Binary; +using System.IO; +using SharpCompress.Crypto; + +namespace SharpCompress.Common.SevenZip; + +/// +/// Handles writing the 7z signature header (32 bytes at position 0 of the archive). +/// Layout: [6 bytes magic] [2 bytes version] [4 bytes StartHeaderCRC] [20 bytes StartHeader] +/// +internal static class SevenZipSignatureHeaderWriter +{ + /// + /// 7z file magic signature bytes. + /// + private static readonly byte[] Signature = [(byte)'7', (byte)'z', 0xBC, 0xAF, 0x27, 0x1C]; + + /// + /// Total size of the signature header in bytes (6+2+4+8+8+4 = 32). + /// + public const int HeaderSize = 32; + + /// + /// Writes a placeholder signature header (all zeros for CRC/offset fields). + /// Call this at the start of archive creation to reserve space. + /// + public static void WritePlaceholder(Stream stream) + { + var header = new byte[HeaderSize]; + + // magic signature + Array.Copy(Signature, 0, header, 0, Signature.Length); + + // version: major=0, minor=2 (standard 7z format) + header[6] = 0; + header[7] = 2; + + // remaining 24 bytes are zero (placeholder for CRC and StartHeader) + stream.Write(header, 0, header.Length); + } + + /// + /// Writes the final signature header with correct offsets and CRCs. + /// The stream must be seekable; this method seeks to position 0. + /// + /// The archive output stream (seekable). + /// Offset from end of signature header to start of metadata header. + /// Size of the metadata header in bytes. + /// CRC32 of the metadata header bytes. + public static void WriteFinal( + Stream stream, + ulong nextHeaderOffset, + ulong nextHeaderSize, + uint nextHeaderCrc + ) + { + // Build StartHeader (20 bytes): NextHeaderOffset(8) + NextHeaderSize(8) + NextHeaderCRC(4) + var startHeader = new byte[20]; + BinaryPrimitives.WriteUInt64LittleEndian(startHeader.AsSpan(0, 8), nextHeaderOffset); + BinaryPrimitives.WriteUInt64LittleEndian(startHeader.AsSpan(8, 8), nextHeaderSize); + BinaryPrimitives.WriteUInt32LittleEndian(startHeader.AsSpan(16, 4), nextHeaderCrc); + + // CRC32 of StartHeader + var startHeaderCrc = Crc32Stream.Compute( + Crc32Stream.DEFAULT_POLYNOMIAL, + Crc32Stream.DEFAULT_SEED, + startHeader + ); + + // Assemble full 32-byte header + var header = new byte[HeaderSize]; + + // magic signature + Array.Copy(Signature, 0, header, 0, Signature.Length); + + // version + header[6] = 0; + header[7] = 2; + + // StartHeaderCRC + BinaryPrimitives.WriteUInt32LittleEndian(header.AsSpan(8, 4), startHeaderCrc); + + // StartHeader + Array.Copy(startHeader, 0, header, 12, startHeader.Length); + + // Write at position 0 + stream.Position = 0; + stream.Write(header, 0, header.Length); + } +} diff --git a/src/SharpCompress/Common/SevenZip/SevenZipStreamsCompressor.cs b/src/SharpCompress/Common/SevenZip/SevenZipStreamsCompressor.cs new file mode 100644 index 000000000..052cce49f --- /dev/null +++ b/src/SharpCompress/Common/SevenZip/SevenZipStreamsCompressor.cs @@ -0,0 +1,155 @@ +using System; +using System.IO; +using SharpCompress.Common; +using SharpCompress.Compressors.LZMA; +using SharpCompress.Crypto; + +namespace SharpCompress.Common.SevenZip; + +/// +/// Result of compressing a stream - contains folder metadata, compressed sizes, and CRCs. +/// +internal sealed class PackedStream +{ + public CFolder Folder { get; init; } = new(); + public ulong[] Sizes { get; init; } = []; + public uint?[] CRCs { get; init; } = []; +} + +/// +/// Compresses a single input stream using LZMA or LZMA2, writing compressed output +/// to the archive stream. Builds the CFolder metadata describing the compression. +/// Uses SharpCompress's existing LzmaStream encoder. +/// +internal sealed class SevenZipStreamsCompressor(Stream outputStream) +{ + /// + /// Compresses the input stream to the output stream using the specified method. + /// Returns a PackedStream containing folder metadata, compressed size, and CRCs. + /// + /// Uncompressed data to compress. + /// Compression method (LZMA or LZMA2). + /// LZMA encoder properties (null for defaults). + public PackedStream Compress( + Stream inputStream, + CompressionType compressionType, + LzmaEncoderProperties? encoderProperties = null + ) + { + var isLzma2 = compressionType == CompressionType.LZMA2; + encoderProperties ??= new LzmaEncoderProperties(eos: !isLzma2); + + var outStartOffset = outputStream.Position; + + // Wrap the output stream in CRC calculator + using var outCrcStream = new Crc32Stream(outputStream); + + byte[] properties; + + if (isLzma2) + { + // LZMA2: use Lzma2EncoderStream for chunk-based framing + using var lzma2Stream = new Lzma2EncoderStream( + outCrcStream, + encoderProperties.DictionarySize, + encoderProperties.NumFastBytes + ); + + CopyWithCrc(inputStream, lzma2Stream, out var inputCrc2, out var inputSize2); + lzma2Stream.Dispose(); + + properties = lzma2Stream.Properties; + + return BuildPackedStream( + isLzma2: true, + properties, + (ulong)(outputStream.Position - outStartOffset), + (ulong)inputSize2, + inputCrc2, + outCrcStream.Crc + ); + } + + // LZMA + using var lzmaStream = LzmaStream.Create(encoderProperties, false, outCrcStream); + properties = lzmaStream.Properties; + + CopyWithCrc(inputStream, lzmaStream, out var inputCrc, out var inputSize); + lzmaStream.Dispose(); + + return BuildPackedStream( + isLzma2: false, + properties, + (ulong)(outputStream.Position - outStartOffset), + (ulong)inputSize, + inputCrc, + outCrcStream.Crc + ); + } + + /// + /// Copies data from source to destination while computing CRC32 of the source data. + /// Uses Crc32Stream.Compute for CRC calculation to avoid duplicating the table/algorithm. + /// + private static void CopyWithCrc( + Stream source, + Stream destination, + out uint crc, + out long bytesRead + ) + { + var seed = Crc32Stream.DEFAULT_SEED; + var buffer = new byte[81920]; + long totalRead = 0; + + int read; + while ((read = source.Read(buffer, 0, buffer.Length)) > 0) + { + // Crc32Stream.Compute returns ~CalculateCrc(table, seed, data), + // so passing ~result as next seed chains correctly. + seed = ~Crc32Stream.Compute( + Crc32Stream.DEFAULT_POLYNOMIAL, + seed, + buffer.AsSpan(0, read) + ); + destination.Write(buffer, 0, read); + totalRead += read; + } + + crc = ~seed; + bytesRead = totalRead; + } + + private static PackedStream BuildPackedStream( + bool isLzma2, + byte[] properties, + ulong compressedSize, + ulong uncompressedSize, + uint inputCrc, + uint? outputCrc + ) + { + var methodId = isLzma2 ? CMethodId.K_LZMA2 : CMethodId.K_LZMA; + + var folder = new CFolder(); + folder._coders.Add( + new CCoderInfo + { + _methodId = methodId, + _numInStreams = 1, + _numOutStreams = 1, + _props = properties, + } + ); + folder._packStreams.Add(0); + folder._unpackSizes.Add((long)uncompressedSize); + folder._unpackCrc = inputCrc; + + return new PackedStream + { + Folder = folder, + Sizes = [compressedSize], + CRCs = [outputCrc], + }; + } +} diff --git a/src/SharpCompress/Common/SevenZip/SevenZipWriteExtensions.cs b/src/SharpCompress/Common/SevenZip/SevenZipWriteExtensions.cs new file mode 100644 index 000000000..26b8692c2 --- /dev/null +++ b/src/SharpCompress/Common/SevenZip/SevenZipWriteExtensions.cs @@ -0,0 +1,97 @@ +using System; +using System.IO; + +namespace SharpCompress.Common.SevenZip; + +/// +/// Stream extension methods for writing 7z binary format primitives. +/// Mirrors the read-side encoding in DataReader.ReadNumber() and the reference +/// StreamExtensions (ReadDecodedUInt64/WriteEncodedUInt64/WriteBoolVector). +/// +internal static class SevenZipWriteExtensions +{ + /// + /// Writes a variable-length encoded 64-bit unsigned integer to the stream. + /// Uses the 7z VLQ format: the first byte has leading 1-bits indicating how many + /// extra bytes follow, with remaining bits holding the high part of the value. + /// + public static int WriteEncodedUInt64(this Stream stream, ulong value) + { + var data = new byte[9]; + data[0] = 0xFF; + byte mask = 0x80; + var length = 1; + + for (var i = 0; i < 8; i++) + { + if (value < mask) + { + var headerMask = (byte)((0xFF ^ mask) ^ (mask - 1u)); + data[0] = (byte)(value | headerMask); + break; + } + + data[length++] = (byte)(value & 0xFF); + value >>= 8; + mask >>= 1; + } + + stream.Write(data, 0, length); + return length; + } + + /// + /// Writes a boolean vector as a packed bitmask. + /// Each bool becomes one bit, MSB first, padded to byte boundary. + /// + public static ulong WriteBoolVector(this Stream stream, bool[] vector) + { + byte mask = 0x80; + byte b = 0; + ulong bytesWritten = 0; + + for (var i = 0L; i < vector.LongLength; i++) + { + if (vector[i]) + { + b |= mask; + } + + mask >>= 1; + if (mask == 0) + { + stream.WriteByte(b); + bytesWritten++; + mask = 0x80; + b = 0; + } + } + + if (mask != 0x80) + { + stream.WriteByte(b); + bytesWritten++; + } + + return bytesWritten; + } + + /// + /// Writes an optional bool vector. If all elements are true, writes a single 0x01 byte + /// (AllAreDefined marker). Otherwise writes 0x00 followed by the packed bitmask. + /// + public static void WriteOptionalBoolVector(this Stream stream, bool[] vector) + { + for (var i = 0L; i < vector.LongLength; i++) + { + if (!vector[i]) + { + stream.WriteByte(0); + stream.WriteBoolVector(vector); + return; + } + } + + stream.WriteByte(1); + } +} diff --git a/src/SharpCompress/Compressors/LZMA/Lzma2EncoderStream.cs b/src/SharpCompress/Compressors/LZMA/Lzma2EncoderStream.cs new file mode 100644 index 000000000..1642be449 --- /dev/null +++ b/src/SharpCompress/Compressors/LZMA/Lzma2EncoderStream.cs @@ -0,0 +1,292 @@ +using System; +using System.IO; + +namespace SharpCompress.Compressors.LZMA; + +/// +/// Write-only stream that compresses data using the LZMA2 framing format. +/// Buffers input, compresses in chunks, and writes LZMA2-framed output to the underlying stream. +/// Each chunk is independently compressed with a fresh LZMA encoder. +/// +internal sealed class Lzma2EncoderStream : Stream +{ + // Max uncompressed chunk size per LZMA2 spec: (0x1F << 16) + 0xFFFF + 1 = 2MB + private const int MAX_UNCOMPRESSED_CHUNK_SIZE = (0x1F << 16) + 0xFFFF + 1; + + // Max compressed payload per LZMA2 chunk header: 0xFFFF + 1 = 64KB + private const int MAX_COMPRESSED_CHUNK_SIZE = 0xFFFF + 1; + + // Max uncompressed sub-chunk for raw (uncompressed) chunks: 0xFFFF + 1 = 64KB + private const int MAX_UNCOMPRESSED_SUBCHUNK_SIZE = 0xFFFF + 1; + + private readonly Stream _output; + private readonly int _dictionarySize; + private readonly int _numFastBytes; + private readonly byte[] _buffer; + private int _bufferPosition; + private bool _isFirstChunk = true; + private bool _isDisposed; + private byte _lzmaPropertiesByte; + private bool _lzmaPropertiesKnown; + + /// + /// Creates a new LZMA2 encoder stream. + /// + /// The stream to write LZMA2-framed compressed data to. + /// Dictionary size for LZMA compression. + /// Number of fast bytes for LZMA compression. + public Lzma2EncoderStream(Stream output, int dictionarySize, int numFastBytes) + { + _output = output; + _dictionarySize = dictionarySize; + _numFastBytes = numFastBytes; + _buffer = new byte[MAX_UNCOMPRESSED_CHUNK_SIZE]; + _bufferPosition = 0; + } + + /// + /// Gets the 1-byte LZMA2 properties (encoded dictionary size). + /// + public byte[] Properties => [EncodeDictionarySize(_dictionarySize)]; + + public override bool CanRead => false; + public override bool CanSeek => false; + public override bool CanWrite => true; + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + while (count > 0) + { + var toCopy = Math.Min(count, _buffer.Length - _bufferPosition); + Buffer.BlockCopy(buffer, offset, _buffer, _bufferPosition, toCopy); + _bufferPosition += toCopy; + offset += toCopy; + count -= toCopy; + + if (_bufferPosition == _buffer.Length) + { + FlushChunk(); + } + } + } + + public override void Flush() { } + + public override int Read(byte[] buffer, int offset, int count) => + throw new NotSupportedException(); + + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + + public override void SetLength(long value) => throw new NotSupportedException(); + + protected override void Dispose(bool disposing) + { + if (disposing && !_isDisposed) + { + _isDisposed = true; + + // Flush remaining buffered data + if (_bufferPosition > 0) + { + FlushChunk(); + } + + // Write LZMA2 end marker + _output.WriteByte(0x00); + } + base.Dispose(disposing); + } + + private void FlushChunk() + { + if (_bufferPosition == 0) + { + return; + } + + var uncompressedData = _buffer.AsSpan(0, _bufferPosition); + _bufferPosition = 0; + + // Try compressing the data + byte[] compressed; + try + { + compressed = CompressBlock(uncompressedData); + } + catch (Exception ex) when (ex is not OutOfMemoryException and not StackOverflowException) + { + // If compression fails, write as uncompressed + WriteUncompressedChunks(uncompressedData); + return; + } + + // Check if compressed output fits in a single chunk and is actually smaller + if ( + compressed.Length <= MAX_COMPRESSED_CHUNK_SIZE + && compressed.Length < uncompressedData.Length + ) + { + WriteCompressedChunk(uncompressedData.Length, compressed); + } + else + { + WriteUncompressedChunks(uncompressedData); + } + } + + private byte[] CompressBlock(ReadOnlySpan data) + { + var encoderProps = new LzmaEncoderProperties(eos: false, _dictionarySize, _numFastBytes); + + var encoder = new Encoder(); + encoder.SetCoderProperties(encoderProps.PropIDs, encoderProps.Properties); + + // Capture the LZMA properties byte (pb/lp/lc encoding) for the chunk header + if (!_lzmaPropertiesKnown) + { + var propBytes = new byte[5]; + encoder.WriteCoderProperties(propBytes); + _lzmaPropertiesByte = propBytes[0]; + _lzmaPropertiesKnown = true; + } + + using var inputMs = new MemoryStream(data.ToArray(), writable: false); + using var outputMs = new MemoryStream(); + + encoder.Code(inputMs, outputMs, data.Length, -1, null); + + var fullCompressed = outputMs.ToArray(); + + // The LZMA range encoder flush writes trailing bytes the decoder doesn't consume. + // Trial-decode to find the exact byte count the decoder needs, so the LZMA2 + // chunk header reports a compressed size that matches what the decoder reads. + var consumed = FindConsumedBytes(fullCompressed, data.Length); + if (consumed < fullCompressed.Length) + { + return fullCompressed.AsSpan(0, consumed).ToArray(); + } + + return fullCompressed; + } + + private int FindConsumedBytes(byte[] compressedData, int uncompressedSize) + { + // Build 5-byte LZMA property header: [pb/lp/lc byte] [dictSize as LE int32] + var props = new byte[5]; + props[0] = _lzmaPropertiesByte; + props[1] = (byte)_dictionarySize; + props[2] = (byte)(_dictionarySize >> 8); + props[3] = (byte)(_dictionarySize >> 16); + props[4] = (byte)(_dictionarySize >> 24); + + var decoder = new Decoder(); + decoder.SetDecoderProperties(props); + + using var input = new MemoryStream(compressedData); + using var output = new MemoryStream(); + decoder.Code(input, output, compressedData.Length, uncompressedSize, null); + + return (int)input.Position; + } + + /// + /// Writes a compressed LZMA2 chunk. + /// Header: [control] [uncompSize_hi] [uncompSize_lo] [compSize_hi] [compSize_lo] [props?] + /// + private void WriteCompressedChunk(int uncompressedSize, byte[] compressedData) + { + var uncompSizeMinus1 = uncompressedSize - 1; + var compSizeMinus1 = compressedData.Length - 1; + + // Each chunk is compressed independently with a fresh LZMA encoder, + // so we must use 0xE0 (full reset: dictionary + state + properties) every time. + // The decoder uses outWindow.Total for literal context and posState; + // 0xE0 triggers outWindow.Reset() which zeros Total, matching the encoder's + // assumption that position starts at 0 for each chunk. + var control = (byte)(0xE0 | ((uncompSizeMinus1 >> 16) & 0x1F)); + _isFirstChunk = false; + + _output.WriteByte(control); + _output.WriteByte((byte)((uncompSizeMinus1 >> 8) & 0xFF)); + _output.WriteByte((byte)(uncompSizeMinus1 & 0xFF)); + _output.WriteByte((byte)((compSizeMinus1 >> 8) & 0xFF)); + _output.WriteByte((byte)(compSizeMinus1 & 0xFF)); + + // 0xE0 (>= 0xC0) requires properties byte + _output.WriteByte(_lzmaPropertiesByte); + + _output.Write(compressedData, 0, compressedData.Length); + } + + /// + /// Writes data as uncompressed LZMA2 sub-chunks (max 64KB each). + /// Header: [control] [size_hi] [size_lo] + /// + private void WriteUncompressedChunks(ReadOnlySpan data) + { + var offset = 0; + while (offset < data.Length) + { + var chunkSize = Math.Min(data.Length - offset, MAX_UNCOMPRESSED_SUBCHUNK_SIZE); + var sizeMinus1 = chunkSize - 1; + + byte control; + if (_isFirstChunk) + { + // 0x01: uncompressed with dictionary reset + control = 0x01; + _isFirstChunk = false; + } + else + { + // 0x02: uncompressed without dictionary reset + control = 0x02; + } + + _output.WriteByte(control); + _output.WriteByte((byte)((sizeMinus1 >> 8) & 0xFF)); + _output.WriteByte((byte)(sizeMinus1 & 0xFF)); + + _output.Write(data.Slice(offset, chunkSize)); + offset += chunkSize; + } + } + + /// + /// Encodes a dictionary size into the 1-byte LZMA2 properties format. + /// Reverse of the decoder formula: dictSize = (2 | (p and 1)) shl ((p shr 1) + 11) + /// Finds the smallest p where the formula result >= target dictSize. + /// + internal static byte EncodeDictionarySize(int dictSize) + { + // Special case: very small dictionary sizes + if (dictSize <= (2 << 11)) + { + return 0; + } + + for (byte p = 0; p < 40; p++) + { + var shift = (p >> 1) + 11; + if (shift >= 31) + { + return p; + } + + var size = (2 | (p & 1)) << shift; + if (size >= dictSize) + { + return p; + } + } + + return 40; + } +} diff --git a/src/SharpCompress/Compressors/LZMA/LzmaEncoderProperties.cs b/src/SharpCompress/Compressors/LZMA/LzmaEncoderProperties.cs index 198ba471c..8076ac08b 100644 --- a/src/SharpCompress/Compressors/LZMA/LzmaEncoderProperties.cs +++ b/src/SharpCompress/Compressors/LZMA/LzmaEncoderProperties.cs @@ -12,6 +12,16 @@ public class LzmaEncoderProperties internal ReadOnlySpan Properties => _properties; private readonly object[] _properties; + /// + /// The dictionary size configured for this encoder. + /// + internal int DictionarySize { get; } + + /// + /// The number of fast bytes configured for this encoder. + /// + internal int NumFastBytes { get; } + public LzmaEncoderProperties() : this(false) { } @@ -23,6 +33,8 @@ public LzmaEncoderProperties(bool eos, int dictionary) public LzmaEncoderProperties(bool eos, int dictionary, int numFastBytes) { + DictionarySize = dictionary; + NumFastBytes = numFastBytes; var posStateBits = 2; var litContextBits = 3; var litPosBits = 0; diff --git a/src/SharpCompress/Factories/SevenZipFactory.cs b/src/SharpCompress/Factories/SevenZipFactory.cs index e4942041d..adc26074d 100644 --- a/src/SharpCompress/Factories/SevenZipFactory.cs +++ b/src/SharpCompress/Factories/SevenZipFactory.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using System.IO; using System.Threading; @@ -5,15 +6,18 @@ using SharpCompress.Archives; using SharpCompress.Archives.SevenZip; using SharpCompress.Common; +using SharpCompress.Common.Options; using SharpCompress.IO; using SharpCompress.Readers; +using SharpCompress.Writers; +using SharpCompress.Writers.SevenZip; namespace SharpCompress.Factories; /// /// Represents the foundation factory of 7Zip archive. /// -public class SevenZipFactory : Factory, IArchiveFactory, IMultiArchiveFactory +public class SevenZipFactory : Factory, IArchiveFactory, IMultiArchiveFactory, IWriterFactory { #region IFactory @@ -131,4 +135,35 @@ out IReader? reader } #endregion + + #region IWriterFactory + + /// + public IWriter OpenWriter(Stream stream, IWriterOptions writerOptions) + { + SevenZipWriterOptions sevenZipOptions = writerOptions switch + { + SevenZipWriterOptions szo => szo, + WriterOptions wo => new SevenZipWriterOptions(wo), + _ => throw new ArgumentException( + $"Expected WriterOptions or SevenZipWriterOptions, got {writerOptions.GetType().Name}", + nameof(writerOptions) + ), + }; + return new SevenZipWriter(stream, sevenZipOptions); + } + + /// + public ValueTask OpenAsyncWriter( + Stream stream, + IWriterOptions writerOptions, + CancellationToken cancellationToken = default + ) + { + cancellationToken.ThrowIfCancellationRequested(); + var writer = OpenWriter(stream, writerOptions); + return new((IAsyncWriter)writer); + } + + #endregion } diff --git a/src/SharpCompress/Writers/SevenZip/SevenZipWriter.Async.cs b/src/SharpCompress/Writers/SevenZip/SevenZipWriter.Async.cs new file mode 100644 index 000000000..6fd59d877 --- /dev/null +++ b/src/SharpCompress/Writers/SevenZip/SevenZipWriter.Async.cs @@ -0,0 +1,39 @@ +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace SharpCompress.Writers.SevenZip; + +public partial class SevenZipWriter +{ + /// + /// Asynchronously writes a file entry to the 7z archive. + /// Note: LZMA compression itself is synchronous; async is used for stream copying. + /// + public override ValueTask WriteAsync( + string filename, + Stream source, + DateTime? modificationTime, + CancellationToken cancellationToken = default + ) + { + cancellationToken.ThrowIfCancellationRequested(); + Write(filename, source, modificationTime); + return new ValueTask(); + } + + /// + /// Asynchronously writes a directory entry to the 7z archive. + /// + public override ValueTask WriteDirectoryAsync( + string directoryName, + DateTime? modificationTime, + CancellationToken cancellationToken = default + ) + { + cancellationToken.ThrowIfCancellationRequested(); + WriteDirectory(directoryName, modificationTime); + return new ValueTask(); + } +} diff --git a/src/SharpCompress/Writers/SevenZip/SevenZipWriter.Factory.cs b/src/SharpCompress/Writers/SevenZip/SevenZipWriter.Factory.cs new file mode 100644 index 000000000..89e80414a --- /dev/null +++ b/src/SharpCompress/Writers/SevenZip/SevenZipWriter.Factory.cs @@ -0,0 +1,62 @@ +#if NET8_0_OR_GREATER +using System.IO; + +namespace SharpCompress.Writers.SevenZip; + +public partial class SevenZipWriter : IWriterOpenable +{ + /// + /// Opens a new SevenZipWriter for the specified file path. + /// + public static IWriter OpenWriter(string filePath, SevenZipWriterOptions writerOptions) + { + filePath.NotNullOrEmpty(nameof(filePath)); + return OpenWriter(new FileInfo(filePath), writerOptions); + } + + /// + /// Opens a new SevenZipWriter for the specified file. + /// + public static IWriter OpenWriter(FileInfo fileInfo, SevenZipWriterOptions writerOptions) + { + fileInfo.NotNull(nameof(fileInfo)); + return new SevenZipWriter(fileInfo.OpenWrite(), writerOptions); + } + + /// + /// Opens a new SevenZipWriter for the specified stream. + /// + public static IWriter OpenWriter(Stream stream, SevenZipWriterOptions writerOptions) + { + stream.NotNull(nameof(stream)); + return new SevenZipWriter(stream, writerOptions); + } + + /// + /// Opens a new async SevenZipWriter for the specified file path. + /// + public static IAsyncWriter OpenAsyncWriter(string filePath, SevenZipWriterOptions writerOptions) + { + return (IAsyncWriter)OpenWriter(filePath, writerOptions); + } + + /// + /// Opens a new async SevenZipWriter for the specified stream. + /// + public static IAsyncWriter OpenAsyncWriter(Stream stream, SevenZipWriterOptions writerOptions) + { + return (IAsyncWriter)OpenWriter(stream, writerOptions); + } + + /// + /// Opens a new async SevenZipWriter for the specified file. + /// + public static IAsyncWriter OpenAsyncWriter( + FileInfo fileInfo, + SevenZipWriterOptions writerOptions + ) + { + return (IAsyncWriter)OpenWriter(fileInfo, writerOptions); + } +} +#endif diff --git a/src/SharpCompress/Writers/SevenZip/SevenZipWriter.cs b/src/SharpCompress/Writers/SevenZip/SevenZipWriter.cs new file mode 100644 index 000000000..a785aaf2c --- /dev/null +++ b/src/SharpCompress/Writers/SevenZip/SevenZipWriter.cs @@ -0,0 +1,361 @@ +using System; +using System.Collections.Generic; +using System.IO; +using SharpCompress.Common; +using SharpCompress.Common.SevenZip; +using SharpCompress.Compressors.LZMA; +using SharpCompress.Crypto; +using SharpCompress.IO; + +namespace SharpCompress.Writers.SevenZip; + +/// +/// Writes 7z archives in non-solid mode (each file compressed independently). +/// Requires a seekable output stream for back-patching the signature header. +/// TODO: solid mode support in a future iteration. +/// TODO: IWritableArchive support in a future iteration. +/// +public partial class SevenZipWriter : AbstractWriter +{ + private readonly SevenZipWriterOptions sevenZipOptions; + private readonly List entries = []; + private readonly List packedStreams = []; + private bool finalized; + + /// + /// Creates a new SevenZipWriter writing to the specified stream. + /// + /// Seekable output stream. + /// Writer options. + public SevenZipWriter(Stream destination, SevenZipWriterOptions options) + : base(ArchiveType.SevenZip, options) + { + if (!destination.CanSeek) + { + throw new ArchiveOperationException( + "7z writing requires a seekable stream for header back-patching." + ); + } + + sevenZipOptions = options; + + if (options.LeaveStreamOpen) + { + destination = SharpCompressStream.CreateNonDisposing(destination); + } + + InitializeStream(destination); + + // Write placeholder signature header (32 bytes) - will be back-patched on finalize + SevenZipSignatureHeaderWriter.WritePlaceholder(OutputStream.NotNull()); + } + + /// + /// Writes a file entry to the archive. + /// + public override void Write(string filename, Stream source, DateTime? modificationTime) + { + if (finalized) + { + throw new ObjectDisposedException( + nameof(SevenZipWriter), + "Cannot write to a finalized archive." + ); + } + + filename = NormalizeFilename(filename); + var progressStream = WrapWithProgress(source, filename); + + var isEmpty = source.CanSeek && source.Length == 0; + + if (isEmpty) + { + // Empty file - no compression, just record metadata + entries.Add( + new SevenZipWriteEntry + { + Name = filename, + ModificationTime = modificationTime, + IsDirectory = false, + IsEmpty = true, + } + ); + return; + } + + // Compress file data to output stream + var output = OutputStream.NotNull(); + var outputPosBefore = output.Position; + var compressor = new SevenZipStreamsCompressor(output); + var packed = compressor.Compress( + progressStream, + sevenZipOptions.CompressionType, + sevenZipOptions.LzmaProperties + ); + + // Check if the stream was actually empty (handles non-seekable streams with no data) + var actuallyEmpty = packed.Folder.GetUnpackSize() == 0; + if (!actuallyEmpty) + { + packedStreams.Add(packed); + } + else + { + // Rewind output to erase orphaned encoder header/end-marker bytes + // so they don't shift subsequent pack stream offsets + output.Position = outputPosBefore; + output.SetLength(outputPosBefore); + } + + entries.Add( + new SevenZipWriteEntry + { + Name = filename, + ModificationTime = modificationTime, + IsDirectory = false, + IsEmpty = isEmpty || actuallyEmpty, + } + ); + } + + /// + /// Writes a directory entry to the archive. + /// + public override void WriteDirectory(string directoryName, DateTime? modificationTime) + { + if (finalized) + { + throw new ObjectDisposedException( + nameof(SevenZipWriter), + "Cannot write to a finalized archive." + ); + } + + directoryName = NormalizeFilename(directoryName); + directoryName = directoryName.TrimEnd('/'); + + entries.Add( + new SevenZipWriteEntry + { + Name = directoryName, + ModificationTime = modificationTime, + IsDirectory = true, + IsEmpty = true, + Attributes = 0x10, // FILE_ATTRIBUTE_DIRECTORY + } + ); + } + + /// + /// Finalizes the archive - writes metadata headers and back-patches the signature header. + /// + protected override void Dispose(bool isDisposing) + { + if (isDisposing && !finalized) + { + finalized = true; + FinalizeArchive(); + } + base.Dispose(isDisposing); + } + + private void FinalizeArchive() + { + var output = OutputStream.NotNull(); + + // Current position = end of packed data streams + var endOfPackedData = output.Position; + + // Build the header structures + var mainStreamsInfo = BuildStreamsInfo(); + var filesInfo = new SevenZipFilesInfoWriter { Entries = entries.ToArray() }; + + // Write header to a temporary stream first + using var headerStream = new MemoryStream(); + ArchiveHeaderWriter.WriteRawHeader(headerStream, mainStreamsInfo, filesInfo); + + // Optionally compress the header + if (sevenZipOptions.CompressHeader && headerStream.Length > 0) + { + WriteCompressedHeader(headerStream, endOfPackedData); + } + else + { + WriteRawHeaderToOutput(headerStream, endOfPackedData); + } + } + + private void WriteCompressedHeader(MemoryStream rawHeaderStream, long endOfPackedData) + { + var output = OutputStream.NotNull(); + + // Compress header using LZMA (always LZMA, not LZMA2, matching 7-Zip standard behavior) + rawHeaderStream.Position = 0; + var headerCompressor = new SevenZipStreamsCompressor(output); + var headerPacked = headerCompressor.Compress( + rawHeaderStream, + CompressionType.LZMA, + sevenZipOptions.LzmaProperties + ); + + // Build EncodedHeader StreamsInfo (describes how to decompress the header) + var headerPackPos = (ulong)(endOfPackedData - SevenZipSignatureHeaderWriter.HeaderSize); + var headerStreamsInfo = new SevenZipStreamsInfoWriter + { + PackInfo = new SevenZipPackInfoWriter + { + PackPos = headerPackPos, + Sizes = headerPacked.Sizes, + CRCs = headerPacked.CRCs, + }, + UnPackInfo = new SevenZipUnPackInfoWriter { Folders = [headerPacked.Folder] }, + }; + + // Write encoded header to a second temporary stream + using var encodedHeaderStream = new MemoryStream(); + ArchiveHeaderWriter.WriteEncodedHeader(encodedHeaderStream, headerStreamsInfo); + + // Write the encoded header to the output + var headerStartPos = output.Position; + encodedHeaderStream.Position = 0; + encodedHeaderStream.CopyTo(output); + + // Compute CRC of the encoded header + var headerCrc = Crc32Stream.Compute( + Crc32Stream.DEFAULT_POLYNOMIAL, + Crc32Stream.DEFAULT_SEED, + encodedHeaderStream.GetBuffer().AsSpan(0, (int)encodedHeaderStream.Length) + ); + + // Back-patch signature header + var nextHeaderOffset = (ulong)(headerStartPos - SevenZipSignatureHeaderWriter.HeaderSize); + var nextHeaderSize = (ulong)encodedHeaderStream.Length; + + SevenZipSignatureHeaderWriter.WriteFinal( + output, + nextHeaderOffset, + nextHeaderSize, + headerCrc + ); + + // Seek to end + output.Seek(0, SeekOrigin.End); + } + + private void WriteRawHeaderToOutput(MemoryStream rawHeaderStream, long endOfPackedData) + { + var output = OutputStream.NotNull(); + + // Write raw header directly + var headerStartPos = output.Position; + rawHeaderStream.Position = 0; + rawHeaderStream.CopyTo(output); + + // Compute CRC of the raw header + var headerCrc = Crc32Stream.Compute( + Crc32Stream.DEFAULT_POLYNOMIAL, + Crc32Stream.DEFAULT_SEED, + rawHeaderStream.GetBuffer().AsSpan(0, (int)rawHeaderStream.Length) + ); + + // Back-patch signature header + var nextHeaderOffset = (ulong)(headerStartPos - SevenZipSignatureHeaderWriter.HeaderSize); + var nextHeaderSize = (ulong)rawHeaderStream.Length; + + SevenZipSignatureHeaderWriter.WriteFinal( + output, + nextHeaderOffset, + nextHeaderSize, + headerCrc + ); + + // Seek to end + output.Seek(0, SeekOrigin.End); + } + + private SevenZipStreamsInfoWriter? BuildStreamsInfo() + { + if (packedStreams.Count == 0) + { + return null; + } + + // Collect all packed sizes and CRCs across all folders + var totalPackStreams = 0; + for (var i = 0; i < packedStreams.Count; i++) + { + totalPackStreams += packedStreams[i].Sizes.Length; + } + + var allSizes = new ulong[totalPackStreams]; + var allCRCs = new uint?[totalPackStreams]; + var folders = new CFolder[packedStreams.Count]; + + var sizeIndex = 0; + for (var i = 0; i < packedStreams.Count; i++) + { + var ps = packedStreams[i]; + for (var j = 0; j < ps.Sizes.Length; j++) + { + allSizes[sizeIndex] = ps.Sizes[j]; + allCRCs[sizeIndex] = ps.CRCs[j]; + sizeIndex++; + } + folders[i] = ps.Folder; + } + + // Build per-file unpack sizes and CRCs for SubStreamsInfo + // In non-solid mode, each folder has exactly 1 file + var numUnPackStreamsPerFolder = new ulong[packedStreams.Count]; + var unpackSizes = new ulong[packedStreams.Count]; + var fileCRCs = new uint?[packedStreams.Count]; + + for (var i = 0; i < packedStreams.Count; i++) + { + numUnPackStreamsPerFolder[i] = 1; + unpackSizes[i] = (ulong)packedStreams[i].Folder.GetUnpackSize(); + fileCRCs[i] = packedStreams[i].Folder._unpackCrc; + + // Clear folder-level CRC (it's moved to SubStreamsInfo) + packedStreams[i].Folder._unpackCrc = null; + } + + return new SevenZipStreamsInfoWriter + { + PackInfo = new SevenZipPackInfoWriter + { + PackPos = 0, + Sizes = allSizes, + CRCs = allCRCs, + }, + UnPackInfo = new SevenZipUnPackInfoWriter { Folders = folders }, + SubStreamsInfo = new SevenZipSubStreamsInfoWriter + { + Folders = folders, + NumUnPackStreamsInFolders = numUnPackStreamsPerFolder, + UnPackSizes = unpackSizes, + CRCs = fileCRCs, + }, + }; + } + + /// + /// Normalizes a filename for 7z archive storage. + /// Converts backslashes to forward slashes and removes leading slashes. + /// + private static string NormalizeFilename(string filename) + { + filename = filename.Replace('\\', '/'); + + // Remove drive letter prefix (e.g., "C:/") + if (filename.Length >= 3 && filename[1] == ':' && filename[2] == '/') + { + filename = filename.Substring(3); + } + + // Remove leading slashes + filename = filename.TrimStart('/'); + + return filename; + } +} diff --git a/src/SharpCompress/Writers/SevenZip/SevenZipWriterOptions.cs b/src/SharpCompress/Writers/SevenZip/SevenZipWriterOptions.cs new file mode 100644 index 000000000..033f07fdd --- /dev/null +++ b/src/SharpCompress/Writers/SevenZip/SevenZipWriterOptions.cs @@ -0,0 +1,128 @@ +using System; +using SharpCompress.Common; +using SharpCompress.Common.Options; +using SharpCompress.Compressors.LZMA; +using SharpCompress.Providers; + +namespace SharpCompress.Writers.SevenZip; + +/// +/// Options for configuring 7z writer behavior. +/// +public sealed record SevenZipWriterOptions : IWriterOptions +{ + private CompressionType _compressionType; + private int _compressionLevel; + + /// + /// The compression type to use. Supported: LZMA and LZMA2 (default). + /// + public CompressionType CompressionType + { + get => _compressionType; + init + { + if (value != CompressionType.LZMA && value != CompressionType.LZMA2) + { + throw new ArgumentException( + $"SevenZipWriter only supports CompressionType.LZMA and CompressionType.LZMA2. Got: {value}", + nameof(value) + ); + } + _compressionType = value; + } + } + + /// + /// Compression level (not used for LZMA in this implementation; reserved for future use). + /// + public int CompressionLevel + { + get => _compressionLevel; + init => _compressionLevel = value; + } + + /// + /// SharpCompress will keep the supplied streams open. Default is true. + /// + public bool LeaveStreamOpen { get; init; } = true; + + /// + /// Encoding to use for archive entry names. + /// + public IArchiveEncoding ArchiveEncoding { get; init; } = new ArchiveEncoding(); + + /// + /// An optional progress reporter for tracking compression operations. + /// + public IProgress? Progress { get; init; } + + /// + /// Registry of compression providers. + /// Defaults to but can be replaced with custom implementations. + /// + public CompressionProviderRegistry Providers { get; init; } = + CompressionProviderRegistry.Default; + + /// + /// Whether to compress the archive header itself using LZMA. + /// Default is true, matching standard 7-Zip behavior. + /// + public bool CompressHeader { get; init; } = true; + + /// + /// Custom LZMA encoder properties. Null uses defaults (1MB dictionary, 32 fast bytes). + /// + public LzmaEncoderProperties? LzmaProperties { get; init; } + + /// + /// Creates a new SevenZipWriterOptions instance with LZMA2 compression (default). + /// + public SevenZipWriterOptions() + { + CompressionType = CompressionType.LZMA2; + } + + /// + /// Creates a new SevenZipWriterOptions instance with the specified compression type. + /// + /// The compression type for the archive. + public SevenZipWriterOptions(CompressionType compressionType) + { + CompressionType = compressionType; + } + + /// + /// Creates a new SevenZipWriterOptions instance from an existing WriterOptions instance. + /// + /// The WriterOptions to copy values from. + public SevenZipWriterOptions(WriterOptions options) + { + CompressionType = options.CompressionType; + CompressionLevel = options.CompressionLevel; + LeaveStreamOpen = options.LeaveStreamOpen; + ArchiveEncoding = options.ArchiveEncoding; + Progress = options.Progress; + Providers = options.Providers; + } + + /// + /// Creates a new SevenZipWriterOptions from an existing IWriterOptions instance. + /// + /// The IWriterOptions to copy values from. + public SevenZipWriterOptions(IWriterOptions options) + { + CompressionType = options.CompressionType; + CompressionLevel = options.CompressionLevel; + LeaveStreamOpen = options.LeaveStreamOpen; + ArchiveEncoding = options.ArchiveEncoding; + Progress = options.Progress; + Providers = options.Providers; + } + + /// + /// Implicit conversion from CompressionType to SevenZipWriterOptions. + /// + public static implicit operator SevenZipWriterOptions(CompressionType compressionType) => + new(compressionType); +} diff --git a/src/SharpCompress/Writers/WriterOptionsExtensions.cs b/src/SharpCompress/Writers/WriterOptionsExtensions.cs index e1bdf988d..964d91876 100644 --- a/src/SharpCompress/Writers/WriterOptionsExtensions.cs +++ b/src/SharpCompress/Writers/WriterOptionsExtensions.cs @@ -4,6 +4,7 @@ using SharpCompress.Compressors; using SharpCompress.Providers; using SharpCompress.Writers.GZip; +using SharpCompress.Writers.SevenZip; using SharpCompress.Writers.Tar; using SharpCompress.Writers.Zip; @@ -42,6 +43,10 @@ bool leaveStreamOpen ZipWriterOptions zipOptions => zipOptions with { LeaveStreamOpen = leaveStreamOpen }, TarWriterOptions tarOptions => tarOptions with { LeaveStreamOpen = leaveStreamOpen }, GZipWriterOptions gzipOptions => gzipOptions with { LeaveStreamOpen = leaveStreamOpen }, + SevenZipWriterOptions sevenZipOptions => sevenZipOptions with + { + LeaveStreamOpen = leaveStreamOpen, + }, _ => throw new NotSupportedException( $"Cannot set LeaveStreamOpen on options of type {options.GetType().Name}. " + "Options must be a record type implementing IWriterOptions." diff --git a/tests/SharpCompress.Test/SevenZip/SevenZipWriterTests.cs b/tests/SharpCompress.Test/SevenZip/SevenZipWriterTests.cs new file mode 100644 index 000000000..8209db4cc --- /dev/null +++ b/tests/SharpCompress.Test/SevenZip/SevenZipWriterTests.cs @@ -0,0 +1,463 @@ +using System; +using System.IO; +using System.Linq; +using System.Text; +using SharpCompress.Archives.SevenZip; +using SharpCompress.Common; +using SharpCompress.Writers; +using SharpCompress.Writers.SevenZip; +using Xunit; + +namespace SharpCompress.Test.SevenZip; + +public class SevenZipWriterTests : TestBase +{ + [Fact] + public void SevenZipWriter_SingleFile_RoundTrip() + { + var content = "Hello, 7z world! This is a test of the SevenZipWriter."u8.ToArray(); + + using var archiveStream = new MemoryStream(); + + // Write archive + using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions())) + { + using var source = new MemoryStream(content); + writer.Write("test.txt", source, DateTime.UtcNow); + } + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + Assert.Equal("test.txt", entries[0].Key); + Assert.Equal(content.Length, (int)entries[0].Size); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_MultipleFiles_RoundTrip() + { + var files = new[] + { + ("file1.txt", "Content of file 1"), + ("subdir/file2.txt", "Content of file 2 in subdirectory"), + ("file3.bin", "Some binary-ish content with special bytes"), + }; + + using var archiveStream = new MemoryStream(); + + // Write archive + using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions())) + { + foreach (var (name, text) in files) + { + using var source = new MemoryStream(Encoding.UTF8.GetBytes(text)); + writer.Write(name, source, DateTime.UtcNow); + } + } + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Equal(files.Length, entries.Count); + + for (var i = 0; i < files.Length; i++) + { + var entry = entries.First(e => e.Key == files[i].Item1); + using var output = new MemoryStream(); + using (var entryStream = entry.OpenEntryStream()) + { + entryStream.CopyTo(output); + } + var extractedText = Encoding.UTF8.GetString(output.ToArray()); + Assert.Equal(files[i].Item2, extractedText); + } + } + } + + [Fact] + public void SevenZipWriter_WithDirectory_RoundTrip() + { + using var archiveStream = new MemoryStream(); + + // Write archive with directory and file + using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions())) + { + writer.WriteDirectory("mydir", DateTime.UtcNow); + + using var source = new MemoryStream("file inside dir"u8.ToArray()); + writer.Write("mydir/data.txt", source, DateTime.UtcNow); + } + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var allEntries = archive.Entries.ToList(); + Assert.Equal(2, allEntries.Count); + + var dirEntry = allEntries.FirstOrDefault(e => e.IsDirectory); + Assert.NotNull(dirEntry); + + var fileEntry = allEntries.FirstOrDefault(e => !e.IsDirectory); + Assert.NotNull(fileEntry); + Assert.Equal("mydir/data.txt", fileEntry!.Key); + + using var output = new MemoryStream(); + using (var entryStream = fileEntry.OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal("file inside dir", Encoding.UTF8.GetString(output.ToArray())); + } + } + + [Fact] + public void SevenZipWriter_EmptyFile_RoundTrip() + { + using var archiveStream = new MemoryStream(); + + // Write archive with an empty file + using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions())) + { + using var source = new MemoryStream(); + writer.Write("empty.txt", source, DateTime.UtcNow); + + using var source2 = new MemoryStream("not empty"u8.ToArray()); + writer.Write("notempty.txt", source2, DateTime.UtcNow); + } + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Equal(2, entries.Count); + + var emptyEntry = entries.First(e => e.Key == "empty.txt"); + Assert.Equal(0, (int)emptyEntry.Size); + + var nonEmptyEntry = entries.First(e => e.Key == "notempty.txt"); + using var output = new MemoryStream(); + using (var entryStream = nonEmptyEntry.OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal("not empty", Encoding.UTF8.GetString(output.ToArray())); + } + } + + [Fact] + public void SevenZipWriter_LZMA2_SingleFile_RoundTrip() + { + var content = + "Hello, LZMA2 world! This is a test of LZMA2 encoding in the SevenZipWriter."u8.ToArray(); + + using var archiveStream = new MemoryStream(); + + using ( + var writer = new SevenZipWriter( + archiveStream, + new SevenZipWriterOptions(CompressionType.LZMA2) + ) + ) + { + using var source = new MemoryStream(content); + writer.Write("test.txt", source, DateTime.UtcNow); + } + + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + Assert.Equal("test.txt", entries[0].Key); + Assert.Equal(content.Length, (int)entries[0].Size); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_LZMA2_MultipleFiles_RoundTrip() + { + var files = new[] + { + ("file1.txt", "Content of file 1 for LZMA2 testing"), + ("subdir/file2.txt", "Content of file 2 in subdirectory for LZMA2"), + ("file3.bin", "Some binary-ish content with special bytes for LZMA2 testing"), + }; + + using var archiveStream = new MemoryStream(); + + using ( + var writer = new SevenZipWriter( + archiveStream, + new SevenZipWriterOptions(CompressionType.LZMA2) + ) + ) + { + foreach (var (name, text) in files) + { + using var source = new MemoryStream(Encoding.UTF8.GetBytes(text)); + writer.Write(name, source, DateTime.UtcNow); + } + } + + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Equal(files.Length, entries.Count); + + for (var i = 0; i < files.Length; i++) + { + var entry = entries.First(e => e.Key == files[i].Item1); + using var output = new MemoryStream(); + using (var entryStream = entry.OpenEntryStream()) + { + entryStream.CopyTo(output); + } + var extractedText = Encoding.UTF8.GetString(output.ToArray()); + Assert.Equal(files[i].Item2, extractedText); + } + } + } + + [Fact] + public void SevenZipWriter_LZMA2_LargerFile_RoundTrip() + { + // Create 3MB of repeating pattern data - forces multi-chunk in LZMA2 + var content = new byte[3 * 1024 * 1024]; + var pattern = Encoding.UTF8.GetBytes( + "This is a repeating pattern for LZMA2 compression testing. " + ); + for (var i = 0; i < content.Length; i++) + { + content[i] = pattern[i % pattern.Length]; + } + + using var archiveStream = new MemoryStream(); + + using ( + var writer = new SevenZipWriter( + archiveStream, + new SevenZipWriterOptions(CompressionType.LZMA2) + ) + ) + { + using var source = new MemoryStream(content); + writer.Write("large.bin", source, DateTime.UtcNow); + } + + Assert.True( + archiveStream.Length < content.Length, + "Archive should be smaller than uncompressed data" + ); + + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + Assert.Equal(content.Length, (int)entries[0].Size); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_LZMA2_IncompressibleData_RoundTrip() + { + // Random bytes - forces uncompressed fallback in LZMA2 + var content = new byte[100 * 1024]; + var rng = new Random(42); + rng.NextBytes(content); + + using var archiveStream = new MemoryStream(); + + using ( + var writer = new SevenZipWriter( + archiveStream, + new SevenZipWriterOptions(CompressionType.LZMA2) + ) + ) + { + using var source = new MemoryStream(content); + writer.Write("random.bin", source, DateTime.UtcNow); + } + + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + Assert.Equal(content.Length, (int)entries[0].Size); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_UnsupportedCompressionType_Throws() + { + Assert.Throws(() => new SevenZipWriterOptions(CompressionType.Deflate)); + } + + [Fact] + public void SevenZipWriter_UncompressedHeader_RoundTrip() + { + var content = "Testing with uncompressed header"u8.ToArray(); + + using var archiveStream = new MemoryStream(); + + // Write archive with uncompressed header + using ( + var writer = new SevenZipWriter( + archiveStream, + new SevenZipWriterOptions { CompressHeader = false } + ) + ) + { + using var source = new MemoryStream(content); + writer.Write("rawheader.txt", source, DateTime.UtcNow); + } + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_ViaWriterFactory() + { + var content = "Factory-created archive"u8.ToArray(); + + using var archiveStream = new MemoryStream(); + + // Write via WriterFactory + using ( + var writer = WriterFactory.OpenWriter( + archiveStream, + ArchiveType.SevenZip, + new SevenZipWriterOptions() + ) + ) + { + using var source = new MemoryStream(content); + writer.Write("factory.txt", source, DateTime.UtcNow); + } + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_LargerFile_RoundTrip() + { + // Create 100KB of repeating pattern data (compresses well) + var content = new byte[100 * 1024]; + var pattern = Encoding.UTF8.GetBytes( + "This is a repeating pattern for compression testing. " + ); + for (var i = 0; i < content.Length; i++) + { + content[i] = pattern[i % pattern.Length]; + } + + using var archiveStream = new MemoryStream(); + + // Write archive + using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions())) + { + using var source = new MemoryStream(content); + writer.Write("large.bin", source, DateTime.UtcNow); + } + + // Verify compressed size is smaller than original + Assert.True( + archiveStream.Length < content.Length, + "Archive should be smaller than uncompressed data" + ); + + // Read back and verify + archiveStream.Position = 0; + using (var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream)) + { + var entries = archive.Entries.Where(e => !e.IsDirectory).ToList(); + Assert.Single(entries); + Assert.Equal(content.Length, (int)entries[0].Size); + + using var output = new MemoryStream(); + using (var entryStream = entries[0].OpenEntryStream()) + { + entryStream.CopyTo(output); + } + Assert.Equal(content, output.ToArray()); + } + } + + [Fact] + public void SevenZipWriter_RequiresSeekableStream() + { + var nonSeekable = new NonSeekableStream(); + Assert.Throws(() => + new SevenZipWriter(nonSeekable, new SevenZipWriterOptions()) + ); + } + + private class NonSeekableStream : MemoryStream + { + public override bool CanSeek => false; + } +}