From f15cb90463f58c248f9aef79102a22785f83cbe5 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 8 Sep 2022 11:16:48 +0800 Subject: [PATCH 1/5] Initial support for AIX big archive --- src/archive.rs | 56 +++++++++++++++ src/read/archive.rs | 172 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 209 insertions(+), 19 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index d4b419be..7ad1cdef 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -8,6 +8,9 @@ use crate::pod::Pod; /// File identification bytes stored at the beginning of the file. pub const MAGIC: [u8; 8] = *b"!\n"; +/// File identification bytes at the beginning of AIX big archive. +pub const AIX_BIG_MAGIC: [u8; 8] = *b"\n"; + /// File identification bytes stored at the beginning of a thin archive. /// /// A thin archive only contains a symbol table and file names. @@ -36,4 +39,57 @@ pub struct Header { pub terminator: [u8; 2], } +/// The header at the start of an AIX big archive member, without name. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct AixHeader { + /// Member size in decimal. + pub size: [u8; 20], + /// Offset of next member in decimal. + pub next_member: [u8; 20], + /// Offset of previous member in decimal. + pub prev_member: [u8; 20], + /// File modification timestamp in decimal. + pub date: [u8; 12], + /// User ID in decimal. + pub uid: [u8; 12], + /// Group ID in decimal. + pub gid: [u8; 12], + /// File mode in octal. + pub mode: [u8; 12], + /// Name length in decimal. + pub name_length: [u8; 4], +} + +/// Discriminated union for multiple type headers +#[derive(Debug, Clone, Copy)] +pub enum MemberHeader { + /// GNU or BSD style header + SystemV(Header), + /// AIX style big archive header + AixBig(AixHeader), +} + unsafe_impl_pod!(Header); +unsafe_impl_pod!(AixHeader); + +/// The AIX big archive fixed len header. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct AIXBigFixedHeader { + /// We read the magic number in advance , so don't put this in struct. + /// Offset to member table + pub memoffset: [u8; 20], + /// Offset to Global offset + pub globsymoffset: [u8; 20], + /// Offset to 64 bit Sym + pub globsym64offset: [u8; 20], + /// Offset to first Child + pub firstchildoffset: [u8; 20], + /// Offset to last child + pub lastchildoffset: [u8; 20], + /// Offset to free list + pub freeoffset: [u8; 20], +} + +unsafe_impl_pod!(AIXBigFixedHeader); diff --git a/src/read/archive.rs b/src/read/archive.rs index 0208878e..db79e6c3 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -23,6 +23,8 @@ pub enum ArchiveKind { Bsd64, /// The Windows COFF archive format. Coff, + /// The AIX big archive format. + AixBig, } /// A partially parsed archive file. @@ -44,10 +46,48 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { let magic = data .read_bytes(&mut tail, archive::MAGIC.len() as u64) .read_error("Invalid archive size")?; - if magic != &archive::MAGIC[..] { + let mut kind_by_header = ArchiveKind::Unknown; + if magic == &archive::AIX_BIG_MAGIC[..] { + kind_by_header = ArchiveKind::AixBig; + } else if magic != &archive::MAGIC[..] { return Err(Error("Unsupported archive identifier")); } + if kind_by_header == ArchiveKind::AixBig { + // Parse the Fix Header to get member offset + let fixedheader = data + .read::(&mut tail) + .read_error("Invalid AIX big archive fixed header")?; + let firstchildoff = parse_u64_digits(&fixedheader.firstchildoffset, 10) + .read_error("Invalid first child offset")?; + + // Move to firstchild + tail = firstchildoff; + + let mut file = ArchiveFile { + data, + offset: tail, + len, + kind: kind_by_header, + symbols: (0, 0), + names: &[], + }; + + // Both the member table and the global symbol table exist as members of the archive and + // are kept at the end of the archive file. + let mut gst64off = parse_u64_digits(&fixedheader.globsym64offset, 10) + .read_error("Invalid global symbol64 table offset")?; + if gst64off == 0 { + // Empty archive has 0 for globsym64offset. + return Ok(file); + } + + let member = ArchiveMember::parse(data, &mut gst64off, &[], file.kind)?; + file.symbols = member.file_range(); + + return Ok(file); + } + let mut file = ArchiveFile { data, offset: tail, @@ -72,7 +112,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { // BSD may use the extended name for the symbol table. This is handled // by `ArchiveMember::parse`. if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], file.kind)?; if member.name == b"/" { // GNU symbol table (unless we later determine this is COFF). file.kind = ArchiveKind::Gnu; @@ -80,7 +120,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { file.offset = tail; if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], file.kind)?; if member.name == b"/" { // COFF linker member. file.kind = ArchiveKind::Coff; @@ -88,7 +128,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { file.offset = tail; if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], file.kind)?; if member.name == b"//" { // COFF names table. file.names = member.data(data)?; @@ -108,7 +148,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { file.offset = tail; if tail < len { - let member = ArchiveMember::parse(data, &mut tail, &[])?; + let member = ArchiveMember::parse(data, &mut tail, &[], file.kind)?; if member.name == b"//" { // GNU names table. file.names = member.data(data)?; @@ -153,6 +193,7 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { offset: self.offset, len: self.len, names: self.names, + kind: self.kind, } } } @@ -164,6 +205,7 @@ pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> { offset: u64, len: u64, names: &'data [u8], + kind: ArchiveKind, } impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { @@ -173,7 +215,7 @@ impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { if self.offset >= self.len { return None; } - let member = ArchiveMember::parse(self.data, &mut self.offset, self.names); + let member = ArchiveMember::parse(self.data, &mut self.offset, self.names, self.kind); if member.is_err() { self.offset = self.len; } @@ -184,17 +226,60 @@ impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { /// A partially parsed archive member. #[derive(Debug)] pub struct ArchiveMember<'data> { - header: &'data archive::Header, + header: archive::MemberHeader, name: &'data [u8], offset: u64, size: u64, } impl<'data> ArchiveMember<'data> { - /// Parse the archive member header, name, and file data. - /// - /// This reads the extended name (if any) and adjusts the file size. - fn parse>( + /// Parse with AIX big archive style. + fn parse_aixbig>( + data: R, + offset: &mut u64, + _names: &'data [u8], + ) -> read::Result { + // The format was described at + // https://www.ibm.com/docs/en/aix/7.3?topic=formats-ar-file-format-big + let header = data + .read::(offset) + .read_error("Invalid AIX big archive member header")?; + let name_length = parse_u64_digits(&header.name_length, 10) + .read_error("Invalid archive member name length")?; + let name = data + .read_bytes(offset, name_length) + .read_error("Invalid archive member name")?; + + // The actual data for a file member begins at the first even-byte boundary beyond the + // member header and continues for the number of bytes specified by the ar_size field. The + // ar command inserts null bytes for padding where necessary. + if *offset & 1 != 0 { + *offset = offset.saturating_add(1); + } + let terminator = data + .read_bytes(offset, 2) + .read_error("Invalid archive head terminator")?; + if terminator != archive::TERMINATOR { + return Err(Error("Invalid archive terminator")); + } + let file_offset = *offset; + let nextmbroff = + parse_u64_digits(&header.next_member, 10).read_error("Invalid next member offset")?; + + // Move the offset to next member offset + *offset = nextmbroff; + let file_size = + parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?; + Ok(ArchiveMember { + header: archive::MemberHeader::AixBig(*header), + name, + offset: file_offset, + size: file_size, + }) + } + + /// Parse with SystemV style. + fn parse_systemv>( data: R, offset: &mut u64, names: &'data [u8], @@ -212,6 +297,7 @@ impl<'data> ArchiveMember<'data> { *offset = offset .checked_add(file_size) .read_error("Archive member size is too large")?; + // Entries are padded to an even number of bytes. if (file_size & 1) != 0 { *offset = offset.saturating_add(1); @@ -236,18 +322,33 @@ impl<'data> ArchiveMember<'data> { }; Ok(ArchiveMember { - header, + header: archive::MemberHeader::SystemV(*header), name, offset: file_offset, size: file_size, }) } + /// Parse the archive member header, name, and file data. + /// + /// This reads the extended name (if any) and adjusts the file size. + fn parse>( + data: R, + offset: &mut u64, + names: &'data [u8], + kind: ArchiveKind, + ) -> read::Result { + match kind { + ArchiveKind::AixBig => Self::parse_aixbig(data, offset, &names), + _ => Self::parse_systemv(data, offset, &names), + } + } + /// Return the raw header. #[inline] - pub fn header(&self) -> &'data archive::Header { - self.header - } + pub fn header(&self) -> &archive::MemberHeader { + &self.header + } /// Return the parsed file name. /// @@ -260,25 +361,37 @@ impl<'data> ArchiveMember<'data> { /// Parse the file modification timestamp from the header. #[inline] pub fn date(&self) -> Option { - parse_u64_digits(&self.header.date, 10) + match &self.header { + archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.date, 10), + archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.date, 10), + } } /// Parse the user ID from the header. #[inline] pub fn uid(&self) -> Option { - parse_u64_digits(&self.header.uid, 10) + match &self.header { + archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.uid, 10), + archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.uid, 10), + } } /// Parse the group ID from the header. #[inline] pub fn gid(&self) -> Option { - parse_u64_digits(&self.header.gid, 10) + match &self.header { + archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.gid, 10), + archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.gid, 10), + } } /// Parse the file mode from the header. #[inline] pub fn mode(&self) -> Option { - parse_u64_digits(&self.header.mode, 8) + match &self.header { + archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.mode, 8), + archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.mode, 8), + } } /// Return the offset and size of the file data. @@ -442,6 +555,27 @@ mod tests { 0000"; let archive = ArchiveFile::parse(&data[..]).unwrap(); assert_eq!(archive.kind(), ArchiveKind::Coff); + + let data = b"\ + \n\ + 0\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x200\x20\x20\x20\ + \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ + 0\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x200\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x20\x20\x20\x20\x200\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20128\x20\ + \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ + 6\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x20\x30\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x20\x20\x20\x20\x20\x30\x20\x20\x20\x20\x20\x20\x20\ + \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\0\0\0\0\ + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ + \0\0\0\0\0\0\0\0"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::AixBig); } #[test] From 2a4f97e9960f8f5058ca32cbe8b61a3ac32db612 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Fri, 9 Sep 2022 15:38:04 +0800 Subject: [PATCH 2/5] Add more tests and remove fixed header def --- src/archive.rs | 21 ------ src/read/archive.rs | 151 +++++++++++++++++++++++--------------------- 2 files changed, 80 insertions(+), 92 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 7ad1cdef..e92ee591 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -72,24 +72,3 @@ pub enum MemberHeader { unsafe_impl_pod!(Header); unsafe_impl_pod!(AixHeader); - -/// The AIX big archive fixed len header. -#[derive(Debug, Clone, Copy)] -#[repr(C)] -pub struct AIXBigFixedHeader { - /// We read the magic number in advance , so don't put this in struct. - /// Offset to member table - pub memoffset: [u8; 20], - /// Offset to Global offset - pub globsymoffset: [u8; 20], - /// Offset to 64 bit Sym - pub globsym64offset: [u8; 20], - /// Offset to first Child - pub firstchildoffset: [u8; 20], - /// Offset to last child - pub lastchildoffset: [u8; 20], - /// Offset to free list - pub freeoffset: [u8; 20], -} - -unsafe_impl_pod!(AIXBigFixedHeader); diff --git a/src/read/archive.rs b/src/read/archive.rs index db79e6c3..b306f8d8 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -46,72 +46,57 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { let magic = data .read_bytes(&mut tail, archive::MAGIC.len() as u64) .read_error("Invalid archive size")?; - let mut kind_by_header = ArchiveKind::Unknown; - if magic == &archive::AIX_BIG_MAGIC[..] { - kind_by_header = ArchiveKind::AixBig; - } else if magic != &archive::MAGIC[..] { + let kind = if magic == &archive::MAGIC { + ArchiveKind::Unknown + } else if magic == &archive::AIX_BIG_MAGIC { + ArchiveKind::AixBig + } else { return Err(Error("Unsupported archive identifier")); - } - - if kind_by_header == ArchiveKind::AixBig { - // Parse the Fix Header to get member offset - let fixedheader = data - .read::(&mut tail) - .read_error("Invalid AIX big archive fixed header")?; - let firstchildoff = parse_u64_digits(&fixedheader.firstchildoffset, 10) - .read_error("Invalid first child offset")?; - - // Move to firstchild - tail = firstchildoff; - - let mut file = ArchiveFile { - data, - offset: tail, - len, - kind: kind_by_header, - symbols: (0, 0), - names: &[], - }; - - // Both the member table and the global symbol table exist as members of the archive and - // are kept at the end of the archive file. - let mut gst64off = parse_u64_digits(&fixedheader.globsym64offset, 10) - .read_error("Invalid global symbol64 table offset")?; - if gst64off == 0 { - // Empty archive has 0 for globsym64offset. - return Ok(file); - } - - let member = ArchiveMember::parse(data, &mut gst64off, &[], file.kind)?; - file.symbols = member.file_range(); - - return Ok(file); - } + }; let mut file = ArchiveFile { data, offset: tail, len, - kind: ArchiveKind::Unknown, + kind, symbols: (0, 0), names: &[], }; - // The first few members may be special, so parse them. - // GNU has: - // - "/" or "/SYM64/": symbol table (optional) - // - "//": names table (optional) - // COFF has: - // - "/": first linker member - // - "/": second linker member - // - "//": names table - // BSD has: - // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional) - // BSD 64-bit has: - // - "__.SYMDEF_64" or "__.SYMDEF_64 SORTED": symbol table (optional) - // BSD may use the extended name for the symbol table. This is handled - // by `ArchiveMember::parse`. - if tail < len { + if file.kind == ArchiveKind::AixBig { + // Structure after magic number (fixed header): + // Offset of member table - 20 bytes + // Offset of global symbol table - 20 bytes + // Offset of global symbol table for 64-bit objects - 20 bytes + // Offset of first member - 20 bytes + // Offset of last member - 20 bytes + // Offset of first member on free list - 20 bytes + let fixed_header = data.read_bytes(&mut tail, 120) + .read_error("Invalid fixed header")?; + file.offset = parse_u64_digits(&fixed_header[60..80], 10) + .read_error("Invalid offset for first archive member")?; + // Member table is located just after all archive members. + file.len = parse_u64_digits(&fixed_header[0..20], 10) + .read_error("Invalid offset for member table")?; + + // TODO: Parse symbol table according to the offset. + + return Ok(file); + } else if tail < len { + // The first few members may be special, so parse them. + // GNU has: + // - "/" or "/SYM64/": symbol table (optional) + // - "//": names table (optional) + // COFF has: + // - "/": first linker member + // - "/": second linker member + // - "//": names table + // BSD has: + // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional) + // BSD 64-bit has: + // - "__.SYMDEF_64" or "__.SYMDEF_64 SORTED": symbol table (optional) + // BSD may use the extended name for the symbol table. This is handled + // by `ArchiveMember::parse`. let member = ArchiveMember::parse(data, &mut tail, &[], file.kind)?; if member.name == b"/" { // GNU symbol table (unless we later determine this is COFF). @@ -348,7 +333,7 @@ impl<'data> ArchiveMember<'data> { #[inline] pub fn header(&self) -> &archive::MemberHeader { &self.header - } + } /// Return the parsed file name. /// @@ -558,22 +543,14 @@ mod tests { let data = b"\ \n\ - 0\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x200\x20\x20\x20\ - \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ - 0\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x200\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x20\x20\x20\x20\x200\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20128\x20\ - \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ - 6\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x20\x30\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x20\x20\x20\x20\x20\x30\x20\x20\x20\x20\x20\x20\x20\ - \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\0\0\0\0\ + 0 0 \ + 0 0 \ + 0 128 \ + 6 0 \ + 0 \0\0\0\0\0\0\0\0\0\0\0\0\ \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ - \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ - \0\0\0\0\0\0\0\0"; + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; let archive = ArchiveFile::parse(&data[..]).unwrap(); assert_eq!(archive.kind(), ArchiveKind::AixBig); } @@ -633,4 +610,36 @@ mod tests { assert!(members.next().is_none()); } + + #[test] + fn aix_names() { + let data = b"\ + \n\ + 396 0 0 \ + 128 262 0 \ + 4 262 0 \ + 1662610370 223 1 644 16 \ + 0123456789abcdef`\nord\n\ + 4 396 128 \ + 1662610374 223 1 644 16 \ + fedcba9876543210`\nrev\n\ + 94 0 262 \ + 0 0 0 0 0 \ + `\n2 128 \ + 262 0123456789abcdef\0fedcba9876543210\0"; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::AixBig); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcdef"); + assert_eq!(member.data(data).unwrap(), &b"ord\n"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"fedcba9876543210"); + assert_eq!(member.data(data).unwrap(), &b"rev\n"[..]); + + assert!(members.next().is_none()); + } } From 6c8ff13493542758e2747e58e81f878bce1c45ea Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Sat, 10 Sep 2022 00:35:21 +0800 Subject: [PATCH 3/5] Refactor comments --- src/archive.rs | 22 +++++++++++----------- src/read/archive.rs | 5 +++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index e92ee591..d81d0ba5 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -43,22 +43,22 @@ pub struct Header { #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct AixHeader { - /// Member size in decimal. + /// File member size in decimal. pub size: [u8; 20], - /// Offset of next member in decimal. - pub next_member: [u8; 20], - /// Offset of previous member in decimal. - pub prev_member: [u8; 20], - /// File modification timestamp in decimal. + /// Next member offset in decimal. + pub nxtmem: [u8; 20], + /// Previous member offset in decimal. + pub prvmem: [u8; 20], + /// File member date in decimal. pub date: [u8; 12], - /// User ID in decimal. + /// File member user id in decimal. pub uid: [u8; 12], - /// Group ID in decimal. + /// File member group id in decimal. pub gid: [u8; 12], - /// File mode in octal. + /// File member mode in octal. pub mode: [u8; 12], - /// Name length in decimal. - pub name_length: [u8; 4], + /// File member name length in decimal. + pub namlen: [u8; 4], } /// Discriminated union for multiple type headers diff --git a/src/read/archive.rs b/src/read/archive.rs index b306f8d8..ca2fdc68 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -229,7 +229,7 @@ impl<'data> ArchiveMember<'data> { let header = data .read::(offset) .read_error("Invalid AIX big archive member header")?; - let name_length = parse_u64_digits(&header.name_length, 10) + let name_length = parse_u64_digits(&header.namlen, 10) .read_error("Invalid archive member name length")?; let name = data .read_bytes(offset, name_length) @@ -241,6 +241,7 @@ impl<'data> ArchiveMember<'data> { if *offset & 1 != 0 { *offset = offset.saturating_add(1); } + // Because of the even-byte boundary, we have to read and check terminator after header. let terminator = data .read_bytes(offset, 2) .read_error("Invalid archive head terminator")?; @@ -249,7 +250,7 @@ impl<'data> ArchiveMember<'data> { } let file_offset = *offset; let nextmbroff = - parse_u64_digits(&header.next_member, 10).read_error("Invalid next member offset")?; + parse_u64_digits(&header.nxtmem, 10).read_error("Invalid next member offset")?; // Move the offset to next member offset *offset = nextmbroff; From 5682bf1f86cc22ae25c66ded7fc727976e9338f3 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 15 Sep 2022 10:57:26 +0800 Subject: [PATCH 4/5] Make MemberHeader private --- src/archive.rs | 9 --- src/read/archive.rs | 167 ++++++++++++++++++++++++++------------------ 2 files changed, 98 insertions(+), 78 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index d81d0ba5..c29c1f4e 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -61,14 +61,5 @@ pub struct AixHeader { pub namlen: [u8; 4], } -/// Discriminated union for multiple type headers -#[derive(Debug, Clone, Copy)] -pub enum MemberHeader { - /// GNU or BSD style header - SystemV(Header), - /// AIX style big archive header - AixBig(AixHeader), -} - unsafe_impl_pod!(Header); unsafe_impl_pod!(AixHeader); diff --git a/src/read/archive.rs b/src/read/archive.rs index ca2fdc68..2850e0cc 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -5,6 +5,15 @@ use core::convert::TryInto; use crate::archive; use crate::read::{self, Error, ReadError, ReadRef}; +/// Discriminated union for multiple type headers +#[derive(Debug, Clone, Copy)] +enum MemberHeader<'data> { + /// GNU or BSD style header + SystemV(&'data archive::Header), + /// AIX style big archive header + AixBig(&'data archive::AixHeader), +} + /// The kind of archive format. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] @@ -72,15 +81,24 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { // Offset of last member - 20 bytes // Offset of first member on free list - 20 bytes let fixed_header = data.read_bytes(&mut tail, 120) - .read_error("Invalid fixed header")?; + .read_error("Invalid AIX big archive fixed header")?; file.offset = parse_u64_digits(&fixed_header[60..80], 10) - .read_error("Invalid offset for first archive member")?; + .read_error("Invalid offset for first archive member in AIX big archive")?; + // Member table is located just after all archive members. file.len = parse_u64_digits(&fixed_header[0..20], 10) - .read_error("Invalid offset for member table")?; - - // TODO: Parse symbol table according to the offset. - + .read_error("Invalid offset for member table of AIX big archive")?; + let symtbl64 = parse_u64_digits(&fixed_header[40..60], 10) + .read_error("Invalid offset to 64-bit symbol table in AIX big archive")?; + if symtbl64 > 0 { + file.symbols = (symtbl64, len); + } else { + let symtbl = parse_u64_digits(&fixed_header[20..40], 10) + .read_error("Invalid offset to symbol table in AIX big archive")?; + if symtbl > 0 { + file.symbols = (symtbl, len); + } + } return Ok(file); } else if tail < len { // The first few members may be special, so parse them. @@ -211,57 +229,26 @@ impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { /// A partially parsed archive member. #[derive(Debug)] pub struct ArchiveMember<'data> { - header: archive::MemberHeader, + header: MemberHeader<'data>, name: &'data [u8], offset: u64, size: u64, } impl<'data> ArchiveMember<'data> { - /// Parse with AIX big archive style. - fn parse_aixbig>( + /// Parse the archive member header, name, and file data. + /// + /// This reads the extended name (if any) and adjusts the file size. + fn parse>( data: R, offset: &mut u64, - _names: &'data [u8], + names: &'data [u8], + kind: ArchiveKind, ) -> read::Result { - // The format was described at - // https://www.ibm.com/docs/en/aix/7.3?topic=formats-ar-file-format-big - let header = data - .read::(offset) - .read_error("Invalid AIX big archive member header")?; - let name_length = parse_u64_digits(&header.namlen, 10) - .read_error("Invalid archive member name length")?; - let name = data - .read_bytes(offset, name_length) - .read_error("Invalid archive member name")?; - - // The actual data for a file member begins at the first even-byte boundary beyond the - // member header and continues for the number of bytes specified by the ar_size field. The - // ar command inserts null bytes for padding where necessary. - if *offset & 1 != 0 { - *offset = offset.saturating_add(1); - } - // Because of the even-byte boundary, we have to read and check terminator after header. - let terminator = data - .read_bytes(offset, 2) - .read_error("Invalid archive head terminator")?; - if terminator != archive::TERMINATOR { - return Err(Error("Invalid archive terminator")); + match kind { + ArchiveKind::AixBig => Self::parse_aixbig(data, offset, &names), + _ => Self::parse_systemv(data, offset, &names), } - let file_offset = *offset; - let nextmbroff = - parse_u64_digits(&header.nxtmem, 10).read_error("Invalid next member offset")?; - - // Move the offset to next member offset - *offset = nextmbroff; - let file_size = - parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?; - Ok(ArchiveMember { - header: archive::MemberHeader::AixBig(*header), - name, - offset: file_offset, - size: file_size, - }) } /// Parse with SystemV style. @@ -283,7 +270,6 @@ impl<'data> ArchiveMember<'data> { *offset = offset .checked_add(file_size) .read_error("Archive member size is too large")?; - // Entries are padded to an even number of bytes. if (file_size & 1) != 0 { *offset = offset.saturating_add(1); @@ -308,32 +294,75 @@ impl<'data> ArchiveMember<'data> { }; Ok(ArchiveMember { - header: archive::MemberHeader::SystemV(*header), + header: MemberHeader::SystemV(header), name, offset: file_offset, size: file_size, }) } - /// Parse the archive member header, name, and file data. - /// - /// This reads the extended name (if any) and adjusts the file size. - fn parse>( + /// Parse with AIX big archive style. + fn parse_aixbig>( data: R, offset: &mut u64, - names: &'data [u8], - kind: ArchiveKind, + _names: &'data [u8], ) -> read::Result { - match kind { - ArchiveKind::AixBig => Self::parse_aixbig(data, offset, &names), - _ => Self::parse_systemv(data, offset, &names), + // The format was described at + // https://www.ibm.com/docs/en/aix/7.3?topic=formats-ar-file-format-big + let header = data + .read::(offset) + .read_error("Invalid AIX big archive member header")?; + let name_length = parse_u64_digits(&header.namlen, 10) + .read_error("Invalid AIX big archive member name length")?; + let name = data + .read_bytes(offset, name_length) + .read_error("Invalid AIX big archive member name")?; + + // The actual data for a file member begins at the first even-byte boundary beyond the + // member header and continues for the number of bytes specified by the ar_size field. The + // ar command inserts null bytes for padding where necessary. + if *offset & 1 != 0 { + *offset = offset.saturating_add(1); + } + // Because of the even-byte boundary, we have to read and check terminator after header. + let terminator = data + .read_bytes(offset, 2) + .read_error("Invalid AIX big archive head terminator")?; + if terminator != archive::TERMINATOR { + return Err(Error("Invalid AIX big archive terminator")); + } + let file_offset = *offset; + let nextmbroff = parse_u64_digits(&header.nxtmem, 10) + .read_error("Invalid next member offset in AIX big archive")?; + + // Move the offset to next member offset + *offset = nextmbroff; + let file_size = parse_u64_digits(&header.size, 10) + .read_error("Invalid archive member size in AIX big archive")?; + Ok(ArchiveMember { + header: MemberHeader::AixBig(header), + name, + offset: file_offset, + size: file_size, + }) + } + + /// Return the raw header for SystemV archives. + #[inline] + pub fn header(&self) -> Option<&'data archive::Header> { + match self.header { + MemberHeader::SystemV(head) => Some(&head), + _ => None, } } - /// Return the raw header. + /// Return the raw header for AIX big archives. #[inline] - pub fn header(&self) -> &archive::MemberHeader { - &self.header + pub fn aix_header(&self) -> Option<&'data archive::AixHeader> { + match self.header { + MemberHeader::AixBig(head) => Some(&head), + _ => None, + } } /// Return the parsed file name. @@ -348,8 +377,8 @@ impl<'data> ArchiveMember<'data> { #[inline] pub fn date(&self) -> Option { match &self.header { - archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.date, 10), - archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.date, 10), + MemberHeader::AixBig(head) => parse_u64_digits(&head.date, 10), + MemberHeader::SystemV(head) => parse_u64_digits(&head.date, 10), } } @@ -357,8 +386,8 @@ impl<'data> ArchiveMember<'data> { #[inline] pub fn uid(&self) -> Option { match &self.header { - archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.uid, 10), - archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.uid, 10), + MemberHeader::AixBig(head) => parse_u64_digits(&head.uid, 10), + MemberHeader::SystemV(head) => parse_u64_digits(&head.uid, 10), } } @@ -366,8 +395,8 @@ impl<'data> ArchiveMember<'data> { #[inline] pub fn gid(&self) -> Option { match &self.header { - archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.gid, 10), - archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.gid, 10), + MemberHeader::AixBig(head) => parse_u64_digits(&head.gid, 10), + MemberHeader::SystemV(head) => parse_u64_digits(&head.gid, 10), } } @@ -375,8 +404,8 @@ impl<'data> ArchiveMember<'data> { #[inline] pub fn mode(&self) -> Option { match &self.header { - archive::MemberHeader::AixBig(head) => parse_u64_digits(&head.mode, 8), - archive::MemberHeader::SystemV(head) => parse_u64_digits(&head.mode, 8), + MemberHeader::AixBig(head) => parse_u64_digits(&head.mode, 8), + MemberHeader::SystemV(head) => parse_u64_digits(&head.mode, 8), } } From 8db15e0c13231e1eea3c0769af0e4e0e1741ca40 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Fri, 16 Sep 2022 17:17:40 +0800 Subject: [PATCH 5/5] Move AIX fixed length header definition back Use 'AixFileHeader' to avoid confusion. --- src/archive.rs | 20 ++++++++++++++++++++ src/read/archive.rs | 20 +++++++------------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index c29c1f4e..1b38b763 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -61,5 +61,25 @@ pub struct AixHeader { pub namlen: [u8; 4], } +/// The AIX big archive's fixed length header at file beginning. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct AIXFileHeader { + /// Offset of member table + pub memoff: [u8; 20], + /// Offset of global symbol table + pub gstoff: [u8; 20], + /// ffset of global symbol table for 64-bit objects + pub gst64off: [u8; 20], + /// Offset of first member + pub fstmoff: [u8; 20], + /// Offset of last member + pub lstmoff: [u8; 20], + /// Offset of first member on free list + pub freeoff: [u8; 20], +} + + unsafe_impl_pod!(Header); unsafe_impl_pod!(AixHeader); +unsafe_impl_pod!(AIXFileHeader); diff --git a/src/read/archive.rs b/src/read/archive.rs index 2850e0cc..879d84fd 100644 --- a/src/read/archive.rs +++ b/src/read/archive.rs @@ -73,27 +73,21 @@ impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { }; if file.kind == ArchiveKind::AixBig { - // Structure after magic number (fixed header): - // Offset of member table - 20 bytes - // Offset of global symbol table - 20 bytes - // Offset of global symbol table for 64-bit objects - 20 bytes - // Offset of first member - 20 bytes - // Offset of last member - 20 bytes - // Offset of first member on free list - 20 bytes - let fixed_header = data.read_bytes(&mut tail, 120) - .read_error("Invalid AIX big archive fixed header")?; - file.offset = parse_u64_digits(&fixed_header[60..80], 10) + // The fixed length header is located just after magic number. + let file_header = data.read::(&mut tail) + .read_error("Invalid AIX big archive file header")?; + file.offset = parse_u64_digits(&file_header.fstmoff, 10) .read_error("Invalid offset for first archive member in AIX big archive")?; // Member table is located just after all archive members. - file.len = parse_u64_digits(&fixed_header[0..20], 10) + file.len = parse_u64_digits(&file_header.memoff, 10) .read_error("Invalid offset for member table of AIX big archive")?; - let symtbl64 = parse_u64_digits(&fixed_header[40..60], 10) + let symtbl64 = parse_u64_digits(&file_header.gst64off, 10) .read_error("Invalid offset to 64-bit symbol table in AIX big archive")?; if symtbl64 > 0 { file.symbols = (symtbl64, len); } else { - let symtbl = parse_u64_digits(&fixed_header[20..40], 10) + let symtbl = parse_u64_digits(&file_header.gstoff, 10) .read_error("Invalid offset to symbol table in AIX big archive")?; if symtbl > 0 { file.symbols = (symtbl, len);