diff --git a/src/libutil-tests/meson.build b/src/libutil-tests/meson.build index eb4844ef37b..bcc9a607a17 100644 --- a/src/libutil-tests/meson.build +++ b/src/libutil-tests/meson.build @@ -78,6 +78,7 @@ sources = files( 'position.cc', 'processes.cc', 'ref.cc', + 'serialise.cc', 'sort.cc', 'source-accessor.cc', 'spawn.cc', diff --git a/src/libutil-tests/serialise.cc b/src/libutil-tests/serialise.cc new file mode 100644 index 00000000000..13b285e2b44 --- /dev/null +++ b/src/libutil-tests/serialise.cc @@ -0,0 +1,25 @@ +#include "nix/util/serialise.hh" + +#include + +namespace nix { + +template + requires std::is_integral_v +auto makeNumSource(T num) +{ + return sinkToSource([num](Sink & writer) { writer << num; }); +} + +TEST(readNum, negativeValuesSerialiseWellDefined) +{ + EXPECT_THROW(readNum(*makeNumSource(int64_t(-1))), SerialisationError); + EXPECT_THROW(readNum(*makeNumSource(int16_t(-1))), SerialisationError); + EXPECT_EQ(readNum(*makeNumSource(int64_t(-1))), std::numeric_limits::max()); + EXPECT_EQ(readNum(*makeNumSource(int64_t(-2))), std::numeric_limits::max() - 1); + /* The result doesn't depend on the source type - only the destination matters. */ + EXPECT_EQ(readNum(*makeNumSource(int32_t(-1))), std::numeric_limits::max()); + EXPECT_EQ(readNum(*makeNumSource(int16_t(-1))), std::numeric_limits::max()); +} + +} // namespace nix diff --git a/src/libutil/include/nix/util/serialise.hh b/src/libutil/include/nix/util/serialise.hh index 9a155aa9738..f8b545f49b9 100644 --- a/src/libutil/include/nix/util/serialise.hh +++ b/src/libutil/include/nix/util/serialise.hh @@ -521,6 +521,27 @@ sinkToSource(fun writer, fun eof = []() { throw EndOfFile( void writePadding(size_t len, Sink & sink); void writeString(std::string_view s, Sink & sink); +/** + * Write a serialisation of an integer to the sink in little endian order. + * + * Types other than uint64_t (including signed types) get implicitly converted to uint64_t.A + * + * Negative number to unsigned conversion is actually well-defined in C++: + * + * [n4950] 7.3.9 Integral conversions: + * the result is the unique value of the destination type that is congruent to the source integer + * modulo 2^N, where N is the width of the destination type. + * + * [n4950] 6.8.2 Fundamental types: + * An unsigned integer type has the same object representation, value + * representation, and alignment requirements (6.7.6) as the corresponding signed + * integer type. For each value x of a signed integer type, the value of the + * corresponding unsigned integer type congruent to x modulo 2 N has the same value + * of corresponding bits in its value representation. + * This is also known as two's complement representation. + * + * @todo Should we even allow negative values to get serialised? + */ inline Sink & operator<<(Sink & sink, uint64_t n) { unsigned char buf[8]; diff --git a/src/libutil/include/nix/util/util.hh b/src/libutil/include/nix/util/util.hh index adbb9e19ad1..144c83cc0c9 100644 --- a/src/libutil/include/nix/util/util.hh +++ b/src/libutil/include/nix/util/util.hh @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -175,8 +176,25 @@ template T readLittleEndian(unsigned char * p) { T x = 0; - for (size_t i = 0; i < sizeof(x); ++i, ++p) { - x |= ((T) *p) << (i * 8); + /* Byte types such as char/unsigned char/std::byte are a bit special because + they are allowed to alias anything else. Thus a raw loop iterating + over the bytes here would be quite inefficient and iterate byte-by-byte + (the compiler cannot optimise anything because the pointer might alias + something). Use a memcpy + byteswap here as needed. */ + std::memcpy(&x, p, sizeof(T)); + /* Don't need to do anything if we are not on a big endian machine. */ + if constexpr (std::endian::native != std::endian::little) { + if constexpr (std::is_same_v) { + x = __builtin_bswap64(x); + } else if constexpr (std::is_same_v) { + x = __builtin_bswap32(x); + } else if constexpr (std::is_same_v) { + x = __builtin_bswap16(x); + } else { + /* Signed types don't make their way here. Though it would be fine + since C++20 mandates 2's complement representation. */ + static_assert(false); + } } return x; }