From f077afafdaf3a91874f7e69d1d07914a30f09fd2 Mon Sep 17 00:00:00 2001
From: Joyee Cheung <joyeec9h3@gmail.com>
Date: Fri, 22 Mar 2024 19:51:49 +0100
Subject: [PATCH] src: add utilities to help debugging reproducibility of
 snapshots

- Print offsets in blob serializer
- Add a special node:generate_default_snapshot ID to generate
  the built-in snapshot.
- Improve logging
- Add a test to check the reproducibilty of the snapshot

PR-URL: https://github.com/nodejs/node/pull/50983
Refs: https://github.com/nodejs/build/issues/3043
Reviewed-By: Daniel Lemire <daniel@lemire.me>
Reviewed-By: James M Snell <jasnell@gmail.com>
---
 src/blob_serializer_deserializer-inl.h      | 24 +++++--
 src/node.cc                                 | 24 ++++---
 src/node_snapshotable.cc                    | 11 ++--
 test/parallel/test-snapshot-reproducible.js | 70 +++++++++++++++++++++
 4 files changed, 110 insertions(+), 19 deletions(-)
 create mode 100644 test/parallel/test-snapshot-reproducible.js

diff --git a/src/blob_serializer_deserializer-inl.h b/src/blob_serializer_deserializer-inl.h
index f47a1e0cdf8a44..ecf664fc807b68 100644
--- a/src/blob_serializer_deserializer-inl.h
+++ b/src/blob_serializer_deserializer-inl.h
@@ -238,7 +238,8 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
   if (is_debug) {
     std::string str = std::is_arithmetic_v<T> ? "" : ToStr(data);
     std::string name = GetName<T>();
-    Debug("\nWriteVector<%s>() (%d-byte), count=%d: %s\n",
+    Debug("\nAt 0x%x: WriteVector<%s>() (%d-byte), count=%d: %s\n",
+          sink.size(),
           name.c_str(),
           sizeof(T),
           data.size(),
@@ -270,7 +271,10 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
 template <typename Impl>
 size_t BlobSerializer<Impl>::WriteStringView(std::string_view data,
                                              StringLogMode mode) {
-  Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data());
+  Debug("At 0x%x: WriteStringView(), length=%zu: %p\n",
+        sink.size(),
+        data.size(),
+        data.data());
   size_t written_total = WriteArithmetic<size_t>(data.size());
 
   size_t length = data.size();
@@ -294,6 +298,8 @@ size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
   return WriteStringView(data, StringLogMode::kAddressAndContent);
 }
 
+static size_t kPreviewCount = 16;
+
 // Helper for writing an array of numeric types.
 template <typename Impl>
 template <typename T>
@@ -301,10 +307,18 @@ size_t BlobSerializer<Impl>::WriteArithmetic(const T* data, size_t count) {
   static_assert(std::is_arithmetic_v<T>, "Arithmetic type");
   DCHECK_GT(count, 0);  // Should not write contents for vectors of size 0.
   if (is_debug) {
-    std::string str =
-        "{ " + std::to_string(data[0]) + (count > 1 ? ", ... }" : " }");
+    size_t preview_count = count < kPreviewCount ? count : kPreviewCount;
+    std::string str = "{ ";
+    for (size_t i = 0; i < preview_count; ++i) {
+      str += (std::to_string(data[i]) + ",");
+    }
+    if (count > preview_count) {
+      str += "...";
+    }
+    str += "}";
     std::string name = GetName<T>();
-    Debug("Write<%s>() (%zu-byte), count=%zu: %s",
+    Debug("At 0x%x: Write<%s>() (%zu-byte), count=%zu: %s",
+          sink.size(),
           name.c_str(),
           sizeof(T),
           count,
diff --git a/src/node.cc b/src/node.cc
index b43368715cc056..cfda35d4b9eb27 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -1337,18 +1337,24 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr,
       return exit_code;
     }
   } else {
+    std::optional<std::string> builder_script_content;
     // Otherwise, load and run the specified builder script.
     std::unique_ptr<SnapshotData> generated_data =
         std::make_unique<SnapshotData>();
-    std::string builder_script_content;
-    int r = ReadFileSync(&builder_script_content, builder_script.c_str());
-    if (r != 0) {
-      FPrintF(stderr,
-              "Cannot read builder script %s for building snapshot. %s: %s",
-              builder_script,
-              uv_err_name(r),
-              uv_strerror(r));
-      return ExitCode::kGenericUserError;
+    if (builder_script != "node:generate_default_snapshot") {
+      builder_script_content = std::string();
+      int r = ReadFileSync(&(builder_script_content.value()),
+                           builder_script.c_str());
+      if (r != 0) {
+        FPrintF(stderr,
+                "Cannot read builder script %s for building snapshot. %s: %s\n",
+                builder_script,
+                uv_err_name(r),
+                uv_strerror(r));
+        return ExitCode::kGenericUserError;
+      }
+    } else {
+      snapshot_config.builder_script_path = std::nullopt;
     }
 
     exit_code = node::SnapshotBuilder::Generate(generated_data.get(),
diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc
index f665d9dad01feb..eaeafd8e16a91c 100644
--- a/src/node_snapshotable.cc
+++ b/src/node_snapshotable.cc
@@ -599,16 +599,17 @@ std::vector<char> SnapshotData::ToBlob() const {
   size_t written_total = 0;
 
   // Metadata
-  w.Debug("Write magic %" PRIx32 "\n", kMagic);
+  w.Debug("0x%x: Write magic %" PRIx32 "\n", w.sink.size(), kMagic);
   written_total += w.WriteArithmetic<uint32_t>(kMagic);
-  w.Debug("Write metadata\n");
+  w.Debug("0x%x: Write metadata\n", w.sink.size());
   written_total += w.Write<SnapshotMetadata>(metadata);
-
+  w.Debug("0x%x: Write snapshot blob\n", w.sink.size());
   written_total += w.Write<v8::StartupData>(v8_snapshot_blob_data);
-  w.Debug("Write isolate_data_indices\n");
+  w.Debug("0x%x: Write IsolateDataSerializeInfo\n", w.sink.size());
   written_total += w.Write<IsolateDataSerializeInfo>(isolate_data_info);
+  w.Debug("0x%x: Write EnvSerializeInfo\n", w.sink.size());
   written_total += w.Write<EnvSerializeInfo>(env_info);
-  w.Debug("Write code_cache\n");
+  w.Debug("0x%x: Write CodeCacheInfo\n", w.sink.size());
   written_total += w.WriteVector<builtins::CodeCacheInfo>(code_cache);
   w.Debug("SnapshotData::ToBlob() Wrote %d bytes\n", written_total);
 
diff --git a/test/parallel/test-snapshot-reproducible.js b/test/parallel/test-snapshot-reproducible.js
new file mode 100644
index 00000000000000..f9392a7fb4adfc
--- /dev/null
+++ b/test/parallel/test-snapshot-reproducible.js
@@ -0,0 +1,70 @@
+'use strict';
+
+require('../common');
+const { spawnSyncAndAssert } = require('../common/child_process');
+const tmpdir = require('../common/tmpdir');
+const fs = require('fs');
+const assert = require('assert');
+
+// When the test fails this helper can be modified to write outputs
+// differently and aid debugging.
+function log(line) {
+  console.log(line);
+}
+
+function generateSnapshot() {
+  tmpdir.refresh();
+
+  spawnSyncAndAssert(
+    process.execPath,
+    [
+      '--random_seed=42',
+      '--predictable',
+      '--build-snapshot',
+      'node:generate_default_snapshot',
+    ],
+    {
+      env: { ...process.env, NODE_DEBUG_NATIVE: 'SNAPSHOT_SERDES' },
+      cwd: tmpdir.path
+    },
+    {
+      stderr(output) {
+        const lines = output.split('\n');
+        for (const line of lines) {
+          if (line.startsWith('0x')) {
+            log(line);
+          }
+        }
+      },
+    }
+  );
+  const blobPath = tmpdir.resolve('snapshot.blob');
+  return fs.readFileSync(blobPath);
+}
+
+const buf1 = generateSnapshot();
+const buf2 = generateSnapshot();
+
+const diff = [];
+let offset = 0;
+const step = 16;
+do {
+  const length = Math.min(buf1.length - offset, step);
+  const slice1 = buf1.slice(offset, offset + length).toString('hex');
+  const slice2 = buf2.slice(offset, offset + length).toString('hex');
+  if (slice1 !== slice2) {
+    diff.push({ offset: '0x' + (offset).toString(16), slice1, slice2 });
+  }
+  offset += length;
+} while (offset < buf1.length);
+
+assert.strictEqual(offset, buf1.length);
+if (offset < buf2.length) {
+  const length = Math.min(buf2.length - offset, step);
+  const slice2 = buf2.slice(offset, offset + length).toString('hex');
+  diff.push({ offset, slice1: '', slice2 });
+  offset += length;
+} while (offset < buf2.length);
+
+assert.deepStrictEqual(diff, []);
+assert.strictEqual(buf1.length, buf2.length);