diff --git a/README.md b/README.md
index d55e8ef94b..44f56edd01 100644
--- a/README.md
+++ b/README.md
@@ -203,6 +203,7 @@ gitnexus analyze --skip-git        # Index folders that are not Git repositories
 gitnexus analyze --embeddings    # Enable embedding generation (slower, better search)
 gitnexus analyze --verbose       # Log skipped files when parsers are unavailable
 gitnexus analyze --worker-timeout 60  # Increase worker idle timeout for slow parses
+gitnexus analyze --wal-checkpoint-threshold 67108864  # 64 MiB. Control LadybugDB WAL auto-checkpoint threshold (default: 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB)
 gitnexus analyze --workers <n>        # Parse worker pool size (default: cores-1, capped at 16; 0 = sequential)
 gitnexus mcp                     # Start MCP server (stdio) — serves all indexed repos
 gitnexus serve                   # Start local HTTP server (multi-repo) for web UI connection
@@ -241,6 +242,7 @@ Most `analyze` knobs are also CLI flags (`--workers`, `--worker-timeout`, `--max
 | `GITNEXUS_PROFILE_DEFERRED_SLOW_MS`    | `3000` (verbose) / `5000` | Per-file threshold in ms above which `processCallsFromExtracted` emits a `slow file …` log line. Parsed via `Number()`: accepts integers (`5000`), scientific notation (`2.5e3`), decimals (`.5`), and hex (`0x10`). Non-finite or non-positive values fall back to the default. | Hunting a few outlier files dominating the deferred call-resolution stage; lower to surface more, raise to focus only on the worst.          |
 | `GITNEXUS_MAX_FILE_SIZE`               | `512` (KB)                | Walker skip threshold in KB. Hard cap is `32768` (tree-sitter buffer ceiling). Equivalent to `--max-file-size <kb>`.                                       | Indexing repos with intentionally-large source files (generated parsers, vendored bundles) that should still be parsed.                     |
 | `GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS` | `30000`                   | Worker idle timeout in milliseconds before retry/fallback. Equivalent to `--worker-timeout <seconds>` × 1000.                                              | Slow-parsing files (large minified JS, deeply-nested TS types) that legitimately need more than 30s.                                        |
+| `GITNEXUS_WAL_CHECKPOINT_THRESHOLD`       | `67108864` (64 MiB)       | LadybugDB WAL auto-checkpoint threshold in bytes. Equivalent to `--wal-checkpoint-threshold <bytes>`. `-1` keeps LadybugDB's stock threshold (~16 MiB). Larger thresholds reduce checkpoint frequency but increase the WAL size at rotation time — choose a smaller value on disk-constrained environments. | You need a larger or smaller WAL auto-checkpoint threshold for your analyze workload.                                                         |
 | `GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES`  | `8388608` (8 MB)          | Per-job byte budget the pool will send to a worker in one `postMessage`.                                                                                   | Very large individual files; mostly diagnostic — bumping past 8 MB risks structured-clone memory pressure.                                  |
 | `GITNEXUS_WORKER_MAX_RESPAWNS_PER_SLOT`        | `3`                       | Max replacement spawns per worker slot before the slot is dropped from the active rotation. Bounds respawn loops on a chronically-crashing slot.           | Hosts where a flaky worker should retry more (raise) or fail-fast (lower) before the slot is dropped.                                       |
 | `GITNEXUS_WORKER_MAX_CUMULATIVE_TIMEOUT_MS`    | `5 × subBatchTimeoutMs`   | Total retry wall-time budget per job before quarantining. Combined with `timeoutBackoffFactor`, prevents exponentially-growing retries from stalling for hours. | Slow files that legitimately need long total retry windows; lower to fail-fast on stalls.                                                    |
diff --git a/gitnexus/README.md b/gitnexus/README.md
index 61ae131ace..9b18e1dbbe 100644
--- a/gitnexus/README.md
+++ b/gitnexus/README.md
@@ -158,6 +158,7 @@ gitnexus analyze --skip-agents-md  # Preserve custom AGENTS.md/CLAUDE.md gitnexu
 gitnexus analyze --verbose       # Log skipped files when parsers are unavailable
 gitnexus analyze --max-file-size 1024  # Skip files larger than N KB (default: 512, cap: 32768)
 gitnexus analyze --worker-timeout 60  # Increase worker idle timeout for slow parses
+gitnexus analyze --wal-checkpoint-threshold 67108864  # 64 MiB. Control LadybugDB WAL auto-checkpoint threshold (default: 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB)
 gitnexus mcp                     # Start MCP server (stdio) — serves all indexed repos
 gitnexus serve                   # Start local HTTP server (multi-repo) for web UI
 gitnexus index                   # Register an existing .gitnexus/ folder into the global registry
@@ -307,6 +308,7 @@ Configure the behavior with two environment variables:
 |----------|--------|---------|--------|
 | `GITNEXUS_LBUG_EXTENSION_INSTALL` | `auto`, `load-only`, `never` | `auto` | `auto` runs one bounded INSTALL if LOAD fails. `load-only` only uses already-installed extensions (recommended for offline / firewalled environments). `never` skips optional extensions entirely. |
 | `GITNEXUS_LBUG_EXTENSION_INSTALL_TIMEOUT_MS` | positive integer | `15000` | Wall-clock budget for the out-of-process `INSTALL` child before it is killed. |
+| `GITNEXUS_WAL_CHECKPOINT_THRESHOLD` | integer `>= -1` | `67108864` (64 MiB) | LadybugDB WAL auto-checkpoint threshold during analyze (bytes). Auto-checkpoint remains enabled; `-1` keeps Ladybug's stock ~16 MiB. Larger thresholds reduce checkpoint frequency but increase the WAL size at rotation time — choose a smaller value on disk-constrained environments. |
 
 ```bash
 # Offline/airgapped: never reach the network for extensions
diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts
index 55995b76bd..32ceaca623 100644
--- a/gitnexus/src/cli/analyze.ts
+++ b/gitnexus/src/cli/analyze.ts
@@ -13,7 +13,12 @@ import { spawn } from 'child_process';
 import v8 from 'v8';
 import cliProgress from 'cli-progress';
 import { closeLbug } from '../core/lbug/lbug-adapter.js';
-import { isWalCorruptionError, WAL_RECOVERY_SUGGESTION } from '../core/lbug/lbug-config.js';
+import {
+  isLbugCheckpointIoError,
+  isWalCorruptionError,
+  parseWalCheckpointThreshold,
+  WAL_RECOVERY_SUGGESTION,
+} from '../core/lbug/lbug-config.js';
 import {
   getStoragePaths,
   getGlobalRegistryPath,
@@ -415,6 +420,15 @@ const forceHeapOOMForTestIfEnabled = (): void => {
   for (;;) chunks.push('x'.repeat(1024 * 1024));
 };
 
+// 64 MiB keeps auto-checkpoint enabled but triggers less frequently than
+// Ladybug's stock ~16 MiB threshold, reducing rename/remove churn on large
+// runs. Also matches the GitNexus default in `lbug-config.ts`.
+//
+// IMPORTANT: keep README examples (`README.md`, `gitnexus/README.md`) and
+// the `DEFAULT_WAL_CHECKPOINT_THRESHOLD` constant in
+// `gitnexus/src/core/lbug/lbug-config.ts` in sync with this value.
+const RECOMMENDED_WAL_CHECKPOINT_THRESHOLD = 64 * 1024 * 1024;
+
 /** Re-exec the process with a 16GB heap and larger stack if we're currently below that. */
 async function ensureHeap(): Promise<boolean> {
   const nodeOpts = process.env.NODE_OPTIONS || '';
@@ -477,6 +491,8 @@ const ANALYZE_CLI_ENV_KEYS = [
   'GITNEXUS_PROFILE_DEFERRED_SLOW_MS',
   'GITNEXUS_MAX_FILE_SIZE',
   'GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS',
+  'GITNEXUS_WAL_CHECKPOINT_THRESHOLD',
+  'GITNEXUS_WAL_MANUAL_CHECKPOINT',
   'GITNEXUS_EMBEDDING_THREADS',
   'GITNEXUS_EMBEDDING_BATCH_SIZE',
   'GITNEXUS_EMBEDDING_SUB_BATCH_SIZE',
@@ -562,6 +578,8 @@ export interface AnalyzeOptions {
   maxFileSize?: string;
   /** Override worker sub-batch idle timeout in seconds. */
   workerTimeout?: string;
+  /** Control LadybugDB WAL auto-checkpoint threshold during analyze. */
+  walCheckpointThreshold?: string;
   /** Parse worker pool size; 0 disables workers (sequential fallback). */
   workers?: string;
   embeddingThreads?: string;
@@ -633,6 +651,16 @@ const analyzeCommandImpl = async (inputPath?: string, options?: AnalyzeOptions):
     );
   }
 
+  if (options?.walCheckpointThreshold !== undefined) {
+    const parsed = parseWalCheckpointThreshold(options.walCheckpointThreshold);
+    if (parsed === undefined) {
+      cliError('  --wal-checkpoint-threshold must be an integer >= -1.\n');
+      process.exitCode = 1;
+      return;
+    }
+    process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD = String(parsed);
+  }
+
   // `--workers` is threaded through `runFullAnalysis` options → PipelineOptions
   // → createWorkerPool, intentionally bypassing the GITNEXUS_WORKER_POOL_SIZE
   // env channel so this CLI surface never mutates `process.env` for pool size.
@@ -1130,6 +1158,20 @@ const analyzeCommandImpl = async (inputPath?: string, options?: AnalyzeOptions):
       return;
     }
 
+    if (isLbugCheckpointIoError(err)) {
+      cliError(
+        `  LadybugDB failed while rotating/removing WAL checkpoint files.\n` +
+          `  This can happen when auto-checkpoint runs at the default threshold (~16MB).\n` +
+          `  Retry with a larger checkpoint threshold to reduce checkpoint frequency:\n` +
+          `    gitnexus analyze --wal-checkpoint-threshold ${RECOMMENDED_WAL_CHECKPOINT_THRESHOLD}\n` +
+          `    (or set GITNEXUS_WAL_CHECKPOINT_THRESHOLD=${RECOMMENDED_WAL_CHECKPOINT_THRESHOLD})\n` +
+          `    (Try 33554432 = 32 MiB on small-disk / CI runners.)\n`,
+        { recoveryHint: 'wal-checkpoint-threshold' },
+      );
+      process.exitCode = 1;
+      return;
+    }
+
     // HF download failure — show clean guidance without the raw stack trace.
     // Checked before writeFatalToStderr so the user sees one focused message
     // rather than a stack-trace dump followed by a second remediation block.
diff --git a/gitnexus/src/cli/cli-message.ts b/gitnexus/src/cli/cli-message.ts
index db5f51fab5..1f1eb9521e 100644
--- a/gitnexus/src/cli/cli-message.ts
+++ b/gitnexus/src/cli/cli-message.ts
@@ -29,6 +29,38 @@
  */
 import { logger } from '../core/logger.js';
 
+/**
+ * String-literal union of all `recoveryHint` tags emitted by the CLI.
+ *
+ * Centralized so a new recovery branch added in `analyze.ts` cannot land
+ * without updating this union — TypeScript will reject the unknown literal
+ * passed via `cliError({ recoveryHint: '...' })`. To add a new hint:
+ *   1. Add the tag string to this union.
+ *   2. Pass it as the `recoveryHint` field at the relevant `cliError`
+ *      call site.
+ *
+ * Consumers can import this type to narrow log-record `recoveryHint`
+ * fields without restating the literal list.
+ */
+export type RecoveryHint =
+  | 'wal-corruption'
+  | 'wal-checkpoint-threshold'
+  | 'heap-oom-respawn'
+  | 'native-worker-abort'
+  | 'hf-endpoint-unreachable'
+  | 'large-repo'
+  | 'npm-resolution'
+  | 'module-not-found';
+
+/**
+ * Common shape for the optional structured-field bag passed to
+ * `cliError`/`cliWarn`/`cliInfo`. Typed so the `recoveryHint` slot is
+ * checked against the {@link RecoveryHint} union.
+ */
+export interface CliMessageFields extends Record<string, unknown> {
+  recoveryHint?: RecoveryHint;
+}
+
 function writeStderr(msg: string): void {
   // Direct write — bypassing `console.*` so it cannot be intercepted by
   // progress-bar redirection (see `cli/analyze.ts:barLog`) or other
@@ -41,7 +73,7 @@ function writeStderr(msg: string): void {
  * User-facing informational message. Use for banners, listening URLs,
  * and any message the user expects to read in plain text.
  */
-export function cliInfo(msg: string, fields?: Record<string, unknown>): void {
+export function cliInfo(msg: string, fields?: CliMessageFields): void {
   writeStderr(msg);
   logger.info(fields ?? {}, msg);
 }
@@ -50,7 +82,7 @@ export function cliInfo(msg: string, fields?: Record<string, unknown>): void {
  * User-facing warning. Operator-actionable but non-fatal — `cliWarn`
  * indicates the command can still proceed in some form.
  */
-export function cliWarn(msg: string, fields?: Record<string, unknown>): void {
+export function cliWarn(msg: string, fields?: CliMessageFields): void {
   writeStderr(msg);
   logger.warn(fields ?? {}, msg);
 }
@@ -59,7 +91,7 @@ export function cliWarn(msg: string, fields?: Record<string, unknown>): void {
  * User-facing error. Indicates the command cannot proceed; usually
  * paired with a non-zero exit code at the call site.
  */
-export function cliError(msg: string, fields?: Record<string, unknown>): void {
+export function cliError(msg: string, fields?: CliMessageFields): void {
   writeStderr(msg);
   logger.error(fields ?? {}, msg);
 }
diff --git a/gitnexus/src/cli/index.ts b/gitnexus/src/cli/index.ts
index 31446cf6fb..17fab20685 100644
--- a/gitnexus/src/cli/index.ts
+++ b/gitnexus/src/cli/index.ts
@@ -71,6 +71,11 @@ program
     '--worker-timeout <seconds>',
     'Worker sub-batch idle timeout before retry/fallback. Default: 30.',
   )
+  .option(
+    '--wal-checkpoint-threshold <bytes>',
+    'LadybugDB WAL auto-checkpoint threshold in bytes during analyze ' +
+      '(integer >= -1; default: 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB).',
+  )
   .option(
     '--workers <n>',
     'Parse worker pool size. Default: cores-1 capped at 16. Pass 0 to disable workers (sequential).',
@@ -85,6 +90,7 @@ program
       '  GITNEXUS_NO_GITIGNORE=1   Skip .gitignore parsing (still reads .gitnexusignore)\n' +
       '  GITNEXUS_MAX_FILE_SIZE=N  Override large-file skip threshold (KB). Default 512, max 32768.\n' +
       '  GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS=N  Worker idle timeout in milliseconds. Default 30000.\n' +
+      '  GITNEXUS_WAL_CHECKPOINT_THRESHOLD=N  LadybugDB WAL auto-checkpoint threshold in bytes (default 67108864 = 64 MiB; -1 keeps Ladybug stock ~16 MiB).\n' +
       '  GITNEXUS_WORKER_SUB_BATCH_MAX_BYTES=N  Worker job byte budget. Default 8388608.\n' +
       '  GITNEXUS_WORKER_POOL_SIZE=N  Parse worker count override. Default cores-1 capped at 16.\n' +
       '  GITNEXUS_PARSE_CHUNK_CONCURRENCY=N  Concurrent in-flight parse chunks. Default 2.\n' +
@@ -93,6 +99,7 @@ program
       '  GITNEXUS_WORKER_CONSECUTIVE_FAILURE_THRESHOLD=N  Per-slot deaths to trip circuit breaker. Default max(3, poolSize).\n' +
       '  GITNEXUS_EMBEDDING_THREADS=N  Limit local ONNX CPU threads for --embeddings.\n' +
       '  GITNEXUS_SEMANTIC_EXACT_SCAN_LIMIT=N  Max embedding chunks for exact-scan fallback. Default 10000.\n' +
+      '\nFlags override the corresponding env vars when both are provided.\n' +
       '\nTip: `.gitnexusignore` supports `.gitignore`-style negation. Add e.g.\n' +
       '     `!__tests__/` to index a directory that is auto-filtered by default (#771).',
   )
diff --git a/gitnexus/src/core/lbug/lbug-adapter.ts b/gitnexus/src/core/lbug/lbug-adapter.ts
index da9809dbae..5e2a346019 100644
--- a/gitnexus/src/core/lbug/lbug-adapter.ts
+++ b/gitnexus/src/core/lbug/lbug-adapter.ts
@@ -1527,6 +1527,27 @@ export const flushWAL = async (): Promise<void> => {
   }
 };
 
+/**
+ * Issue a manual `CHECKPOINT` against the current connection and surface
+ * any engine error to the caller. Unlike {@link flushWAL}, this variant
+ * does NOT swallow Ladybug rename/remove IO failures — the manual
+ * checkpoint driver (`wal-checkpoint-driver.ts`) relies on the rejection
+ * to drive its bounded retry loop. Returns `false` when no connection is
+ * open (the caller treats this as a no-op success — there is no WAL to
+ * flush). Returns `true` after a successful CHECKPOINT + drain.
+ *
+ * The split from `flushWAL` is deliberate: every other CHECKPOINT site
+ * (server flush, safeClose) is best-effort and prefers a silent skip;
+ * the manual driver, by contrast, must observe failures to decide
+ * whether to retry.
+ */
+export const tryFlushWAL = async (): Promise<boolean> => {
+  if (!conn) return false;
+  const checkpointResult = await conn.query('CHECKPOINT');
+  await drainQueryResult(checkpointResult);
+  return true;
+};
+
 /**
  * Flush the WAL and close the connection and database handles.
  *
diff --git a/gitnexus/src/core/lbug/lbug-config.ts b/gitnexus/src/core/lbug/lbug-config.ts
index 22f2d3b184..a71b25930a 100644
--- a/gitnexus/src/core/lbug/lbug-config.ts
+++ b/gitnexus/src/core/lbug/lbug-config.ts
@@ -2,6 +2,7 @@ import fs from 'fs/promises';
 import os from 'os';
 import path from 'path';
 import type lbug from '@ladybugdb/core';
+import { logger } from '../logger.js';
 
 /**
  * Shared configuration for `@ladybugdb/core` `Database` construction.
@@ -45,6 +46,44 @@ export const LBUG_MAX_DB_SIZE: number = (() => {
   return 16 * 1024 * 1024 * 1024;
 })();
 
+export const parseWalCheckpointThreshold = (raw: string | undefined): number | undefined => {
+  if (raw === undefined) return undefined;
+  const normalized = raw.trim();
+  if (normalized.length === 0) return undefined;
+  const parsed = Number(normalized);
+  if (!Number.isInteger(parsed) || parsed < -1) return undefined;
+  return parsed;
+};
+
+/**
+ * Default GitNexus WAL auto-checkpoint threshold in bytes (64 MiB).
+ *
+ * Larger than Ladybug's stock ~16 MiB to reduce checkpoint rename/remove
+ * churn under heavy analyze write load — the original race that motivated
+ * issue #1741 triggered at the stock threshold. README examples in
+ * `README.md` and `gitnexus/README.md` and the recovery hint in
+ * `analyze.ts` MUST stay in sync with this value.
+ */
+const DEFAULT_WAL_CHECKPOINT_THRESHOLD = 64 * 1024 * 1024;
+
+const resolveCheckpointThreshold = (): number => {
+  const raw = process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD;
+  if (raw === undefined) return DEFAULT_WAL_CHECKPOINT_THRESHOLD;
+  const parsed = parseWalCheckpointThreshold(raw);
+  if (parsed !== undefined) return parsed;
+  // Non-empty but unparseable input: warn the operator and fall back. Mirrors
+  // the CLI's `--wal-checkpoint-threshold` validation (which hard-errors)
+  // but the env-var path stays soft to preserve "set once in your shell"
+  // ergonomics across mixed-version invocations.
+  if (raw.trim().length > 0) {
+    logger.warn(
+      { rawValue: raw, fallback: DEFAULT_WAL_CHECKPOINT_THRESHOLD },
+      `Ignoring invalid GITNEXUS_WAL_CHECKPOINT_THRESHOLD=${raw}; expected integer >= -1; falling back to default (${DEFAULT_WAL_CHECKPOINT_THRESHOLD}).`,
+    );
+  }
+  return DEFAULT_WAL_CHECKPOINT_THRESHOLD;
+};
+
 /** Matches WAL corruption errors from the LadybugDB engine. */
 const WAL_CORRUPTION_RE = /corrupt(ed)?\s+wal|invalid\s+wal\s+record|wal.*corrupt|checksum.*wal/i;
 
@@ -57,6 +96,50 @@ export function isWalCorruptionError(err: unknown): boolean {
   return WAL_CORRUPTION_RE.test(msg);
 }
 
+// ─── Ladybug WAL checkpoint IO error matchers ───────────────────────────────
+//
+// Matched against LadybugDB v0.16.1 (see `gitnexus/package.json`
+// @ladybugdb/core). Strict regexes encode local_file_system.cpp wording
+// verified at that version. Two-tier strategy: strict matchers first so we
+// only fire on real checkpoint-rotation shapes; a permissive fallback
+// catches future Ladybug message drift so the recovery hint keeps surfacing
+// even if upstream wording changes.
+//
+// From Ladybug native LocalFileSystem exceptions (`local_file_system.cpp`),
+// surfaced in Node as:
+// "Runtime exception: IO exception: Error renaming file ..."
+// "Runtime exception: IO exception: Error removing directory or file ..."
+// We only match checkpoint-rotation shapes:
+//   - "<db>.wal -> <db>.wal.checkpoint" rename failures
+//   - "<db>.wal.checkpoint" remove failures
+// Example matches:
+//   "Runtime exception: IO exception: Error renaming file /x/lbug.wal to /x/lbug.wal.checkpoint. ErrorMessage: Permission denied"
+//   "Runtime exception: IO exception: Error removing directory or file /x/lbug.wal.checkpoint.  Error Message: Permission denied"
+// Matching is case-insensitive to remain robust across wrappers/platforms.
+const LBUG_CHECKPOINT_RENAME_RE =
+  /^runtime exception: io exception:\s*error renaming file\s+.+?\.wal\s+to\s+.+?\.wal\.checkpoint(?:\.|\s|$)/i;
+const LBUG_CHECKPOINT_REMOVE_RE =
+  /^runtime exception: io exception:\s*error removing directory or file\s+.+?\.wal\.checkpoint(?:\.|\s|$)/i;
+/**
+ * Permissive fallback: any IO-exception-shaped message that mentions a
+ * `.wal.checkpoint` path. Catches future Ladybug message drift (different
+ * verb, additional preamble, locale variation) so the recovery hint keeps
+ * surfacing even if the strict regexes go stale.
+ */
+const LBUG_CHECKPOINT_PERMISSIVE_RE = /io exception.*\.wal\.checkpoint/i;
+
+/**
+ * True when `err` looks like a Ladybug WAL-checkpoint rotation/remove IO
+ * failure. Tries strict matchers first (renames + removes), then falls
+ * back to the permissive matcher.
+ */
+export const isLbugCheckpointIoError = (err: unknown): boolean => {
+  if (!err) return false;
+  const msg = err instanceof Error ? err.message : String(err);
+  if (LBUG_CHECKPOINT_RENAME_RE.test(msg) || LBUG_CHECKPOINT_REMOVE_RE.test(msg)) return true;
+  return LBUG_CHECKPOINT_PERMISSIVE_RE.test(msg);
+};
+
 type LbugModule = typeof lbug;
 
 export interface LbugDatabaseOptions {
@@ -103,8 +186,8 @@ export function createLbugDatabase(
     false, // enableCompression (pinned for v0.16.0)
     options.readOnly ?? false,
     LBUG_MAX_DB_SIZE,
-    true, // autoCheckpoint
-    -1, // checkpointThreshold
+    true, // autoCheckpoint (always on)
+    resolveCheckpointThreshold(), // checkpointThreshold (default 64 MiB; override with GITNEXUS_WAL_CHECKPOINT_THRESHOLD; -1 keeps Ladybug stock ~16 MiB)
     options.throwOnWalReplayFailure ?? true,
     true, // enableChecksums
   ) as lbug.Database;
diff --git a/gitnexus/src/core/lbug/wal-checkpoint-driver.ts b/gitnexus/src/core/lbug/wal-checkpoint-driver.ts
new file mode 100644
index 0000000000..57dc6b2d68
--- /dev/null
+++ b/gitnexus/src/core/lbug/wal-checkpoint-driver.ts
@@ -0,0 +1,232 @@
+/**
+ * Manual WAL checkpoint driver with bounded retry (#1741 follow-up).
+ *
+ * Background
+ * ----------
+ * LadybugDB's native auto-checkpoint runs from inside the C++ engine on a
+ * background path that has no JS-side hook for mid-write rotation. When
+ * the rename of `<db>.wal` → `<db>.wal.checkpoint` races a transient file
+ * lock (Windows Defender, AV scanner, NTFS shadow copy) the engine raises
+ * a `Runtime exception: IO exception: Error renaming file …` that aborts
+ * the in-flight write. There is no engine-level retry.
+ *
+ * The auto-checkpoint cannot be made retryable from JS, but a *manual*
+ * `CHECKPOINT` query that the JS layer issues itself CAN be wrapped in a
+ * bounded retry. By draining the WAL on a tight cadence — more often than
+ * the native threshold — the auto-checkpoint almost never has work left
+ * to do, so the un-retriable native rename race is moved into the
+ * JS-controlled path where this module's retry absorbs it.
+ *
+ * Design contract
+ * ---------------
+ * - `autoCheckpoint` stays on (maintainer requirement). This driver is
+ *   additive: it preempts the native checkpoint, it does not replace it.
+ * - The driver runs ONLY during analyze (callers opt-in explicitly). MCP
+ *   and other long-lived flows continue to rely on the close-time
+ *   CHECKPOINT in `safeClose`.
+ * - Opt-out is via `GITNEXUS_WAL_MANUAL_CHECKPOINT=0`. Default is on.
+ * - Retries only fire on `isLbugCheckpointIoError` — every other error
+ *   surfaces immediately. The retry budget is small (3 attempts) with
+ *   jittered backoff so a chronic rename failure escalates fast.
+ * - Retry attempts log at `debug`; only the final, exhausted failure
+ *   surfaces to the caller (and is logged at `warn` here for operators).
+ */
+
+import { logger } from '../logger.js';
+import { tryFlushWAL } from './lbug-adapter.js';
+import { isLbugCheckpointIoError } from './lbug-config.js';
+
+/**
+ * Bounded retry budget. Total worst-case wall time is dominated by the
+ * three sleeps below (~750 ms before jitter) plus three CHECKPOINT round
+ * trips — small enough to stay invisible during a large analyze, large
+ * enough to ride out a single AV scanner sweep on Windows.
+ */
+const CHECKPOINT_RETRY_ATTEMPTS = 3;
+
+/**
+ * Base back-off in ms. Each attempt waits `BASE_DELAYS[attempt-1]`
+ * milliseconds before the next try, plus a small jitter to avoid
+ * synchronized retries when multiple analyzers ever share a host.
+ */
+const BASE_DELAYS_MS: readonly number[] = [50, 200, 500];
+
+/** Maximum jitter added on top of each base delay. */
+const JITTER_MAX_MS = 50;
+
+const sleep = (ms: number): Promise<void> => new Promise((resolve) => setTimeout(resolve, ms));
+
+/**
+ * Run a single CHECKPOINT with bounded retry on
+ * `isLbugCheckpointIoError`. Returns the number of attempts actually
+ * spent (1-`CHECKPOINT_RETRY_ATTEMPTS`) on success, or rethrows the last
+ * checkpoint error after exhausting the budget. Non-checkpoint errors
+ * (e.g. WAL corruption, lock-busy) propagate immediately on the first
+ * attempt — those are not what this retry is designed to absorb.
+ *
+ * The split from `flushWAL` is deliberate: `flushWAL` is the swallow-and-
+ * log helper used by `safeClose` and the server's best-effort flush,
+ * which by contract cannot fail the surrounding operation. The manual
+ * driver MUST observe failures to decide whether to retry, and that is
+ * the role of `tryFlushWAL`.
+ *
+ * Exported for direct unit testing — production callers use
+ * {@link startWalCheckpointDriver} or {@link checkpointOnce}.
+ */
+export const runCheckpointWithRetry = async (
+  options: {
+    /** Override the sleep implementation for tests. */
+    sleepFn?: (ms: number) => Promise<void>;
+    /** Override the CHECKPOINT call for tests. */
+    checkpointFn?: () => Promise<boolean>;
+    /** Override the jitter source for tests. Returns a value in [0, 1). */
+    randomFn?: () => number;
+  } = {},
+): Promise<{ attempts: number; flushed: boolean }> => {
+  const sleepImpl = options.sleepFn ?? sleep;
+  const checkpointImpl = options.checkpointFn ?? tryFlushWAL;
+  const randomImpl = options.randomFn ?? Math.random;
+
+  let lastError: unknown;
+  for (let attempt = 1; attempt <= CHECKPOINT_RETRY_ATTEMPTS; attempt++) {
+    try {
+      const flushed = await checkpointImpl();
+      return { attempts: attempt, flushed };
+    } catch (err) {
+      lastError = err;
+      if (!isLbugCheckpointIoError(err)) {
+        // Non-checkpoint error — propagate immediately. Examples:
+        // WAL corruption, missing connection, query syntax failure.
+        // Retrying these would only mask the real signal.
+        throw err;
+      }
+      if (attempt === CHECKPOINT_RETRY_ATTEMPTS) break;
+      const base = BASE_DELAYS_MS[Math.min(attempt - 1, BASE_DELAYS_MS.length - 1)] ?? 500;
+      // randomImpl defaults to Math.random — non-cryptographic by design; jitter only avoids
+      // synchronized retries between concurrent analyzers.
+      const delayMs = base + Math.floor(randomImpl() * JITTER_MAX_MS);
+      logger.debug(
+        { attempt, totalAttempts: CHECKPOINT_RETRY_ATTEMPTS, delayMs },
+        'GitNexus: WAL checkpoint IO error — retrying',
+      );
+      await sleepImpl(delayMs);
+    }
+  }
+
+  logger.warn(
+    { attempts: CHECKPOINT_RETRY_ATTEMPTS },
+    'GitNexus: manual WAL checkpoint exhausted retry budget — surfacing IO error to caller',
+  );
+  throw lastError;
+};
+
+/**
+ * Single-shot manual checkpoint. Use this when the caller drives the
+ * cadence itself (e.g. a phase boundary in `runFullAnalysis`).
+ *
+ * Honors the `GITNEXUS_WAL_MANUAL_CHECKPOINT=0` opt-out so operators can
+ * disable the manual path if it ever interacts badly with a future
+ * Ladybug release.
+ */
+export const checkpointOnce = async (): Promise<void> => {
+  if (!isManualCheckpointEnabled()) return;
+  await runCheckpointWithRetry();
+};
+
+/** Default cadence (ms) for the periodic driver. */
+const DEFAULT_PERIOD_MS = 5_000;
+
+/**
+ * Start a periodic manual checkpoint driver. The returned handle has a
+ * `stop()` method that resolves once the in-flight checkpoint (if any)
+ * settles, so callers can `await driver.stop()` before close-time
+ * `safeClose` and avoid racing the final flush.
+ *
+ * The first checkpoint fires after `periodMs` (not immediately) so a
+ * cold analyze does not pay a CHECKPOINT round trip before any writes
+ * have happened.
+ */
+export interface WalCheckpointDriver {
+  /** Stop the driver and await any in-flight checkpoint. Idempotent. */
+  stop(): Promise<void>;
+}
+
+export const startWalCheckpointDriver = (
+  options: { periodMs?: number } = {},
+): WalCheckpointDriver => {
+  if (!isManualCheckpointEnabled()) {
+    return { stop: async () => undefined };
+  }
+
+  const periodMs = options.periodMs ?? DEFAULT_PERIOD_MS;
+  let stopped = false;
+  let inflight: Promise<void> | null = null;
+
+  const tick = async (): Promise<void> => {
+    if (stopped) return;
+    inflight = runCheckpointWithRetry()
+      .then(() => undefined)
+      .catch((err) => {
+        // The retry budget exhausted. The caller's surrounding write
+        // will see the same engine error on its next operation and the
+        // `analyzeCommand` catch block will emit the recovery hint.
+        // Logging here keeps the operator-visible trail without
+        // double-logging the user-facing message.
+        logger.warn(
+          { err: err instanceof Error ? err.message : String(err) },
+          'GitNexus: manual WAL checkpoint failed after retries',
+        );
+      });
+    try {
+      await inflight;
+    } finally {
+      inflight = null;
+    }
+  };
+
+  const handle = setInterval(() => {
+    // Fire-and-forget: setInterval cannot await directly. The next tick
+    // is guarded by `stopped` and the `inflight` reference.
+    void tick();
+  }, periodMs);
+  // `setInterval` returned by Node is a `Timeout` object with `.unref()`
+  // so a hung driver never prevents process exit.
+  if (typeof (handle as NodeJS.Timeout).unref === 'function') {
+    (handle as NodeJS.Timeout).unref();
+  }
+
+  return {
+    stop: async () => {
+      if (stopped) {
+        if (inflight) await inflight;
+        return;
+      }
+      stopped = true;
+      clearInterval(handle);
+      if (inflight) {
+        try {
+          await inflight;
+        } catch {
+          /* swallowed in tick() — surface path is the surrounding write */
+        }
+      }
+    },
+  };
+};
+
+/**
+ * Reading `GITNEXUS_WAL_MANUAL_CHECKPOINT` at every call site (rather
+ * than caching at module load) keeps `analyzeCommand` env restoration
+ * honest: tests that toggle the flag between invocations see the live
+ * value, matching the `ANALYZE_CLI_ENV_KEYS` snapshot/restore contract
+ * in `analyze.ts`.
+ *
+ * Accepted opt-out values: '0', 'false', 'off', 'no' (case-insensitive).
+ * Anything else — including undefined — leaves the driver enabled.
+ */
+export const isManualCheckpointEnabled = (): boolean => {
+  const raw = process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
+  if (raw === undefined) return true;
+  const normalized = raw.trim().toLowerCase();
+  return !['0', 'false', 'off', 'no'].includes(normalized);
+};
diff --git a/gitnexus/src/core/run-analyze.ts b/gitnexus/src/core/run-analyze.ts
index 79601a9a22..c28504c4c3 100644
--- a/gitnexus/src/core/run-analyze.ts
+++ b/gitnexus/src/core/run-analyze.ts
@@ -26,6 +26,10 @@ import {
   queryImporters,
 } from './lbug/lbug-adapter.js';
 import { createSearchFTSIndexes, verifySearchFTSIndexes } from './search/fts-indexes.js';
+import {
+  startWalCheckpointDriver,
+  type WalCheckpointDriver,
+} from './lbug/wal-checkpoint-driver.js';
 import {
   getStoragePaths,
   saveMeta,
@@ -521,6 +525,16 @@ export async function runFullAnalysis(
   }
 
   await initLbug(lbugPath);
+
+  // Manual WAL checkpoint driver (#1741): periodically drain the WAL
+  // from JS so the un-retriable native auto-checkpoint almost never
+  // has work left to do. Failures of the manual CHECKPOINT are absorbed
+  // by the driver's bounded retry; the final un-recoverable error still
+  // surfaces via the surrounding write that follows the failed flush.
+  // Opt-out via `GITNEXUS_WAL_MANUAL_CHECKPOINT=0` (the driver itself
+  // returns a no-op handle when disabled). Analyze-only: MCP and serve
+  // paths continue to rely on the close-time CHECKPOINT in `safeClose`.
+  const walCheckpointDriver: WalCheckpointDriver = startWalCheckpointDriver();
   try {
     // All work after initLbug is wrapped in try/finally to ensure closeLbug()
     // is called even if an error occurs — the module-level singleton DB handle
@@ -961,6 +975,9 @@ export async function runFullAnalysis(
     }
 
     // ── Close LadybugDB ──────────────────────────────────────────────
+    // Stop the manual checkpoint driver before closeLbug so its
+    // in-flight CHECKPOINT cannot race the `safeClose` CHECKPOINT.
+    await walCheckpointDriver.stop();
     await closeLbug();
 
     progress('done', 100, 'Done');
@@ -972,7 +989,13 @@ export async function runFullAnalysis(
       pipelineResult,
     };
   } catch (err) {
-    // Ensure LadybugDB is closed even on error
+    // Ensure LadybugDB is closed even on error. Stop the driver first
+    // so its retry loop cannot extend an already-failing analyze.
+    try {
+      await walCheckpointDriver.stop();
+    } catch {
+      /* swallow — surface path is the rethrow below */
+    }
     try {
       await closeLbug();
     } catch {
diff --git a/gitnexus/test/integration/analyze-wal-checkpoint-failure.test.ts b/gitnexus/test/integration/analyze-wal-checkpoint-failure.test.ts
new file mode 100644
index 0000000000..c517263dfd
--- /dev/null
+++ b/gitnexus/test/integration/analyze-wal-checkpoint-failure.test.ts
@@ -0,0 +1,133 @@
+/**
+ * Integration test: WAL auto-checkpoint rename failure (#1741 / #1772).
+ *
+ * Drives the real `analyzeCommand` against a real LadybugDB instance and
+ * provokes a genuine Ladybug-engine `IO exception: Error renaming file
+ * <db>.wal to <db>.wal.checkpoint` by pre-planting a *directory* at the
+ * `<db>.wal.checkpoint` rename target. `fs.rename` (which Ladybug's native
+ * `LocalFileSystem` ultimately invokes) cannot overwrite a non-empty
+ * directory with a file on either POSIX or Windows, and Ladybug's
+ * `doInitLbug` orphan-cleanup uses `fs.unlink` which fails on a directory
+ * — so the blocker survives initialization and the next auto-checkpoint
+ * fires the natural rename failure that motivated PR #1772.
+ *
+ * No test-only hooks, no env-var fault toggles in production code: we use
+ * the same `GITNEXUS_WAL_CHECKPOINT_THRESHOLD=1` knob that real users have
+ * available to force checkpointing on every write, then arrange a real
+ * filesystem state that makes the rename impossible.
+ *
+ * Verifies that:
+ *   1. The CLI exits non-zero.
+ *   2. stderr contains the actionable recovery hint pointing at
+ *      `--wal-checkpoint-threshold 67108864` (the
+ *      `RECOMMENDED_WAL_CHECKPOINT_THRESHOLD` constant in `analyze.ts`).
+ *   3. The recovery message references the
+ *      `GITNEXUS_WAL_CHECKPOINT_THRESHOLD` env var as a parallel route.
+ *
+ * Empirically confirmed portable on Windows; the same mechanism is
+ * expected to work on POSIX (`rename(2)` fails with `EISDIR`/`ENOTEMPTY`
+ * when the target is a non-empty directory). If a future Ladybug release
+ * changes the rename ordering, the loose match on the recovery hint
+ * (rather than the exact engine error wording) keeps this test stable.
+ */
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { spawnSync } from 'child_process';
+import path from 'path';
+import fs from 'fs';
+import os from 'os';
+import { createRequire } from 'module';
+import { fileURLToPath, pathToFileURL } from 'url';
+import { cleanupTempDirSync } from '../helpers/test-db.js';
+
+const testDir = path.dirname(fileURLToPath(import.meta.url));
+const repoRoot = path.resolve(testDir, '../..');
+const cliEntry = path.join(repoRoot, 'src/cli/index.ts');
+const FIXTURE_SRC = path.resolve(testDir, '..', 'fixtures', 'mini-repo');
+
+const _require = createRequire(import.meta.url);
+const tsxPkgDir = path.dirname(_require.resolve('tsx/package.json'));
+const tsxImportUrl = pathToFileURL(path.join(tsxPkgDir, 'dist', 'loader.mjs')).href;
+
+let tmpParent: string;
+let suiteGitnexusHome: string;
+let repoPath: string;
+
+beforeAll(() => {
+  tmpParent = fs.mkdtempSync(path.join(os.tmpdir(), 'gn-wal-checkpoint-e2e-'));
+  suiteGitnexusHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gn-wal-checkpoint-home-'));
+  repoPath = path.join(tmpParent, 'mini-repo');
+  fs.cpSync(FIXTURE_SRC, repoPath, { recursive: true });
+
+  spawnSync('git', ['init'], { cwd: repoPath, stdio: 'pipe' });
+  spawnSync('git', ['add', '-A'], { cwd: repoPath, stdio: 'pipe' });
+  spawnSync('git', ['commit', '-m', 'initial commit'], {
+    cwd: repoPath,
+    stdio: 'pipe',
+    env: {
+      ...process.env,
+      GIT_AUTHOR_NAME: 'test',
+      GIT_AUTHOR_EMAIL: 'test@test',
+      GIT_COMMITTER_NAME: 'test',
+      GIT_COMMITTER_EMAIL: 'test@test',
+    },
+  });
+});
+
+afterAll(() => {
+  if (tmpParent) cleanupTempDirSync(tmpParent);
+  if (suiteGitnexusHome) cleanupTempDirSync(suiteGitnexusHome);
+});
+
+describe('analyze WAL auto-checkpoint rename failure (real lbug, no mocks)', () => {
+  it('surfaces the --wal-checkpoint-threshold recovery hint when the rename target is blocked', () => {
+    // Plant a non-empty directory at the path Ladybug's auto-checkpoint
+    // will try to rename `<db>.wal` over. `fs.rename` cannot overwrite a
+    // non-empty directory, and the adapter's orphan-sidecar cleanup uses
+    // `fs.unlink` (which fails on directories) — so the blocker persists
+    // through `doInitLbug` and trips the very first auto-checkpoint that
+    // a `GITNEXUS_WAL_CHECKPOINT_THRESHOLD=1` setting forces.
+    const storageDir = path.join(repoPath, '.gitnexus');
+    fs.mkdirSync(storageDir, { recursive: true });
+    const blockerDir = path.join(storageDir, 'lbug.wal.checkpoint');
+    fs.mkdirSync(blockerDir, { recursive: true });
+    fs.writeFileSync(path.join(blockerDir, 'blocker'), 'cannot-be-renamed-over');
+
+    const result = spawnSync(
+      process.execPath,
+      ['--import', tsxImportUrl, cliEntry, 'analyze', '--skip-skills'],
+      {
+        cwd: repoPath,
+        encoding: 'utf8',
+        // Generous timeout: the test does real CSV/COPY work before the
+        // first failing checkpoint, and CI runners are slow.
+        timeout: process.env.CI ? 120_000 : 60_000,
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env: {
+          ...process.env,
+          GITNEXUS_HOME: suiteGitnexusHome,
+          // Skip ensureHeap re-exec (which drops the tsx loader).
+          NODE_OPTIONS: `${process.env.NODE_OPTIONS || ''} --max-old-space-size=8192`.trim(),
+          // Tiny threshold forces auto-checkpoint on every write so the
+          // first write into the WAL trips the planted rename blocker.
+          GITNEXUS_WAL_CHECKPOINT_THRESHOLD: '1',
+          CI: '1',
+        },
+      },
+    );
+
+    const combined = `${result.stderr}\n${result.stdout}`;
+
+    // The CLI must exit non-zero. status === null means the timeout fired
+    // without a clean exit — also a failure for this assertion.
+    expect(result.status === null ? 'timeout' : result.status).not.toBe(0);
+
+    // Recovery hint must reference the CLI flag and the recommended
+    // 64 MiB threshold (67_108_864 bytes). Both come from the
+    // RECOMMENDED_WAL_CHECKPOINT_THRESHOLD constant in analyze.ts; keep
+    // those values in sync with this assertion if the constant changes.
+    expect(combined).toContain('gitnexus analyze --wal-checkpoint-threshold');
+    expect(combined).toContain('67108864');
+    // The env-var route should be advertised alongside the flag.
+    expect(combined).toContain('GITNEXUS_WAL_CHECKPOINT_THRESHOLD');
+  }, 180_000);
+});
diff --git a/gitnexus/test/unit/analyze-lbug-checkpoint-threshold.test.ts b/gitnexus/test/unit/analyze-lbug-checkpoint-threshold.test.ts
new file mode 100644
index 0000000000..de55d4e712
--- /dev/null
+++ b/gitnexus/test/unit/analyze-lbug-checkpoint-threshold.test.ts
@@ -0,0 +1,99 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+const runFullAnalysisMock = vi.fn();
+
+vi.mock('../../src/core/run-analyze.js', () => ({
+  runFullAnalysis: runFullAnalysisMock,
+}));
+
+vi.mock('../../src/core/lbug/lbug-adapter.js', () => ({
+  closeLbug: vi.fn(async () => undefined),
+}));
+
+vi.mock('../../src/storage/repo-manager.js', () => ({
+  getStoragePaths: vi.fn(() => ({ storagePath: '.gitnexus', lbugPath: '.gitnexus/lbug' })),
+  getGlobalRegistryPath: vi.fn(() => 'registry.json'),
+  RegistryNameCollisionError: class RegistryNameCollisionError extends Error {},
+  AnalysisNotFinalizedError: class AnalysisNotFinalizedError extends Error {},
+  assertAnalysisFinalized: vi.fn(async () => undefined),
+}));
+
+vi.mock('../../src/storage/git.js', () => ({
+  getGitRoot: vi.fn(() => '/repo'),
+  hasGitDir: vi.fn(() => true),
+}));
+
+vi.mock('../../src/core/ingestion/utils/max-file-size.js', () => ({
+  getMaxFileSizeBannerMessage: vi.fn(() => null),
+}));
+
+describe('analyzeCommand --wal-checkpoint-threshold parsing', () => {
+  const ORIGINAL_NODE_OPTIONS = process.env.NODE_OPTIONS;
+  const ORIGINAL_THRESHOLD = process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD;
+
+  beforeEach(() => {
+    vi.resetModules();
+    runFullAnalysisMock.mockReset();
+    process.exitCode = undefined;
+    process.env.NODE_OPTIONS = `${process.env.NODE_OPTIONS ?? ''} --max-old-space-size=8192`.trim();
+  });
+
+  afterEach(() => {
+    if (ORIGINAL_NODE_OPTIONS === undefined) {
+      delete process.env.NODE_OPTIONS;
+    } else {
+      process.env.NODE_OPTIONS = ORIGINAL_NODE_OPTIONS;
+    }
+    if (ORIGINAL_THRESHOLD === undefined) {
+      delete process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD;
+    } else {
+      process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD = ORIGINAL_THRESHOLD;
+    }
+  });
+
+  it.each(['maybe', '-2', '1.5', ''])(
+    'rejects invalid --wal-checkpoint-threshold value %s before analysis starts',
+    async (walCheckpointThreshold) => {
+      const { _captureLogger } = await import('../../src/core/logger.js');
+      const cap = _captureLogger();
+      const { analyzeCommand } = await import('../../src/cli/analyze.js');
+
+      await analyzeCommand(undefined, { walCheckpointThreshold });
+
+      expect(process.exitCode).toBe(1);
+      expect(runFullAnalysisMock).not.toHaveBeenCalled();
+      expect(
+        cap
+          .records()
+          .some((r) => r.msg === '  --wal-checkpoint-threshold must be an integer >= -1.\n'),
+      ).toBe(true);
+      cap.restore();
+    },
+  );
+
+  it.each([
+    ['-1', '-1'],
+    ['0', '0'],
+    ['1024', '1024'],
+  ])(
+    'sets GITNEXUS_WAL_CHECKPOINT_THRESHOLD=%s during runFullAnalysis and restores afterwards',
+    async (cliValue, expectedEnv) => {
+      const { analyzeCommand } = await import('../../src/cli/analyze.js');
+      let envAtCallTime: string | undefined;
+      runFullAnalysisMock.mockImplementation(async () => {
+        envAtCallTime = process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD;
+        return {
+          repoName: 'repo',
+          repoPath: '/repo',
+          stats: {},
+          alreadyUpToDate: true,
+        };
+      });
+
+      await analyzeCommand(undefined, { walCheckpointThreshold: cliValue });
+
+      expect(envAtCallTime).toBe(expectedEnv);
+      expect(process.env.GITNEXUS_WAL_CHECKPOINT_THRESHOLD).toBe(ORIGINAL_THRESHOLD);
+    },
+  );
+});
diff --git a/gitnexus/test/unit/analyze-wal-error.test.ts b/gitnexus/test/unit/analyze-wal-error.test.ts
index 1b4ed5101b..5264dfd3aa 100644
--- a/gitnexus/test/unit/analyze-wal-error.test.ts
+++ b/gitnexus/test/unit/analyze-wal-error.test.ts
@@ -134,4 +134,82 @@ describe('analyzeCommand WAL corruption error handling', () => {
 
     cap.restore();
   });
+
+  it('recommends --wal-checkpoint-threshold on Ladybug checkpoint I/O failures', async () => {
+    runFullAnalysisMock.mockRejectedValue(
+      new Error(
+        'Runtime exception: IO exception: Error renaming file /repo/.gitnexus/lbug.wal to /repo/.gitnexus/lbug.wal.checkpoint. ErrorMessage: Permission denied',
+      ),
+    );
+
+    const { _captureLogger } = await import('../../src/core/logger.js');
+    const cap = _captureLogger();
+    const { analyzeCommand } = await import('../../src/cli/analyze.js');
+
+    await analyzeCommand(undefined, {});
+
+    expect(process.exitCode).toBe(1);
+    const records = cap.records();
+    expect(
+      records.some(
+        (r) =>
+          typeof r.msg === 'string' &&
+          r.msg.includes('gitnexus analyze --wal-checkpoint-threshold'),
+      ),
+    ).toBe(true);
+
+    cap.restore();
+  });
+
+  it('also recommends threshold on .wal.checkpoint remove failures', async () => {
+    runFullAnalysisMock.mockRejectedValue(
+      new Error(
+        'Runtime exception: IO exception: Error removing directory or file /repo/.gitnexus/lbug.wal.checkpoint.  Error Message: Permission denied',
+      ),
+    );
+
+    const { _captureLogger } = await import('../../src/core/logger.js');
+    const cap = _captureLogger();
+    const { analyzeCommand } = await import('../../src/cli/analyze.js');
+
+    await analyzeCommand(undefined, {});
+
+    expect(process.exitCode).toBe(1);
+    const records = cap.records();
+    expect(
+      records.some(
+        (r) =>
+          typeof r.msg === 'string' &&
+          r.msg.includes('gitnexus analyze --wal-checkpoint-threshold'),
+      ),
+    ).toBe(true);
+
+    cap.restore();
+  });
+
+  it('does not recommend threshold for non-checkpoint IO exceptions', async () => {
+    runFullAnalysisMock.mockRejectedValue(
+      new Error(
+        'Runtime exception: IO exception: Error renaming file /repo/.gitnexus/data.tmp to /repo/.gitnexus/data.tmp.bak. ErrorMessage: Permission denied',
+      ),
+    );
+
+    const { _captureLogger } = await import('../../src/core/logger.js');
+    const cap = _captureLogger();
+    const { analyzeCommand } = await import('../../src/cli/analyze.js');
+
+    await analyzeCommand(undefined, {});
+
+    expect(process.exitCode).toBe(1);
+    const records = cap.records();
+    expect(
+      records.some(
+        (r) =>
+          typeof r.msg === 'string' &&
+          r.msg.includes('gitnexus analyze --wal-checkpoint-threshold'),
+      ),
+    ).toBe(false);
+
+    cap.restore();
+  });
 });
diff --git a/gitnexus/test/unit/lbug-checkpoint.test.ts b/gitnexus/test/unit/lbug-checkpoint.test.ts
index 5b9603997a..6eac7e73a0 100644
--- a/gitnexus/test/unit/lbug-checkpoint.test.ts
+++ b/gitnexus/test/unit/lbug-checkpoint.test.ts
@@ -53,9 +53,19 @@ describe('flushWAL / safeClose — consolidation guard (#1376)', () => {
     expect(closeLbugBlock).not.toMatch(/db\.close\(\)/);
   });
 
-  it('flushWAL is the only place that issues conn.query(CHECKPOINT)', () => {
+  it('CHECKPOINT is issued only by flushWAL (best-effort) and tryFlushWAL (rethrows for the retry driver)', () => {
     const matches = adapterSource.match(/conn\.query\('CHECKPOINT'\)/g) ?? [];
-    expect(matches.length).toBe(1);
+    // Two authorized sites: `flushWAL` (swallows errors — used by
+    // `safeClose` and the server's best-effort flush) and `tryFlushWAL`
+    // (rethrows so the manual checkpoint driver in `wal-checkpoint-driver.ts`
+    // can apply its bounded retry). Any third occurrence is a regression —
+    // a CHECKPOINT outside these two helpers will be invisible to the
+    // retry/error policy.
+    expect(matches.length).toBe(2);
+  });
+
+  it('exports tryFlushWAL (CHECKPOINT-with-rethrow for the manual retry driver)', () => {
+    expect(adapterSource).toMatch(/export const tryFlushWAL/);
   });
 
   it('flushWAL drains and closes the CHECKPOINT result before returning', () => {
diff --git a/gitnexus/test/unit/lbug-config-wal.test.ts b/gitnexus/test/unit/lbug-config-wal.test.ts
index 6baea3621b..626e5d2357 100644
--- a/gitnexus/test/unit/lbug-config-wal.test.ts
+++ b/gitnexus/test/unit/lbug-config-wal.test.ts
@@ -1,5 +1,12 @@
 import { describe, expect, it, vi } from 'vitest';
-import { createLbugDatabase, isWalCorruptionError } from '../../src/core/lbug/lbug-config.js';
+import {
+  createLbugDatabase,
+  isLbugCheckpointIoError,
+  isWalCorruptionError,
+} from '../../src/core/lbug/lbug-config.js';
+import { _captureLogger } from '../../src/core/logger.js';
+
+const DEFAULT_THRESHOLD = 64 * 1024 * 1024;
 
 describe('isWalCorruptionError', () => {
   it.each([
@@ -35,6 +42,101 @@ describe('isWalCorruptionError', () => {
 });
 
 describe('createLbugDatabase WAL replay option', () => {
+  it('enables auto-checkpoint by default and uses default threshold (64 MiB)', () => {
+    const Database = vi.fn(function (this: any) {});
+    const lbugModule = { Database } as any;
+
+    createLbugDatabase(lbugModule, '/tmp/lbug-default');
+
+    expect(Database).toHaveBeenCalledWith(
+      '/tmp/lbug-default',
+      0,
+      false,
+      false,
+      expect.any(Number),
+      true,
+      DEFAULT_THRESHOLD,
+      true,
+      true,
+    );
+  });
+
+  it.each([
+    ['0', 0],
+    ['1024', 1024],
+    ['-1', -1],
+    ['invalid', DEFAULT_THRESHOLD],
+    ['', DEFAULT_THRESHOLD],
+  ])('respects GITNEXUS_WAL_CHECKPOINT_THRESHOLD=%s', (raw, expectedCheckpointThreshold) => {
+    try {
+      vi.stubEnv('GITNEXUS_WAL_CHECKPOINT_THRESHOLD', raw);
+      const Database = vi.fn(function (this: any) {});
+      const lbugModule = { Database } as any;
+
+      createLbugDatabase(lbugModule, '/tmp/lbug-env');
+
+      expect(Database).toHaveBeenCalledWith(
+        '/tmp/lbug-env',
+        0,
+        false,
+        false,
+        expect.any(Number),
+        true,
+        expectedCheckpointThreshold,
+        true,
+        true,
+      );
+    } finally {
+      vi.unstubAllEnvs();
+    }
+  });
+
+  it('warns and falls back to default when GITNEXUS_WAL_CHECKPOINT_THRESHOLD is invalid', () => {
+    const cap = _captureLogger();
+    try {
+      vi.stubEnv('GITNEXUS_WAL_CHECKPOINT_THRESHOLD', 'invalid');
+      const Database = vi.fn(function (this: any) {});
+      const lbugModule = { Database } as any;
+
+      createLbugDatabase(lbugModule, '/tmp/lbug-invalid');
+
+      const warn = cap
+        .records()
+        .find(
+          (r) =>
+            typeof r.msg === 'string' &&
+            r.msg.includes('Ignoring invalid GITNEXUS_WAL_CHECKPOINT_THRESHOLD'),
+        );
+      expect(warn).toBeDefined();
+    } finally {
+      vi.unstubAllEnvs();
+      cap.restore();
+    }
+  });
+
+  it('does NOT warn when GITNEXUS_WAL_CHECKPOINT_THRESHOLD is empty (treated as unset)', () => {
+    const cap = _captureLogger();
+    try {
+      vi.stubEnv('GITNEXUS_WAL_CHECKPOINT_THRESHOLD', '');
+      const Database = vi.fn(function (this: any) {});
+      const lbugModule = { Database } as any;
+
+      createLbugDatabase(lbugModule, '/tmp/lbug-empty');
+
+      const warn = cap
+        .records()
+        .find(
+          (r) =>
+            typeof r.msg === 'string' &&
+            r.msg.includes('Ignoring invalid GITNEXUS_WAL_CHECKPOINT_THRESHOLD'),
+        );
+      expect(warn).toBeUndefined();
+    } finally {
+      vi.unstubAllEnvs();
+      cap.restore();
+    }
+  });
+
   it('passes throwOnWalReplayFailure and checksum constructor args explicitly', () => {
     const Database = vi.fn(function (this: any) {});
     const lbugModule = { Database } as any;
@@ -51,9 +153,43 @@ describe('createLbugDatabase WAL replay option', () => {
       true,
       expect.any(Number),
       true,
-      -1,
+      DEFAULT_THRESHOLD,
       false,
       true,
     );
   });
 });
+
+// ─── Finding 8: strict + permissive checkpoint IO matchers ─────────────────
+describe('isLbugCheckpointIoError', () => {
+  it.each([
+    [
+      'native rename failure (v0.16.x exact)',
+      'Runtime exception: IO exception: Error renaming file /repo/.gitnexus/lbug.wal to /repo/.gitnexus/lbug.wal.checkpoint. ErrorMessage: Permission denied',
+    ],
+    [
+      'native remove failure (v0.16.x exact)',
+      'Runtime exception: IO exception: Error removing directory or file /repo/.gitnexus/lbug.wal.checkpoint.  Error Message: Permission denied',
+    ],
+  ])('matches strict %s', (_label, msg) => {
+    expect(isLbugCheckpointIoError(msg)).toBe(true);
+    expect(isLbugCheckpointIoError(new Error(msg))).toBe(true);
+  });
+
+  it('matches permissive fallback for hypothetical message drift', () => {
+    // Permissive matcher accepts any IO-exception-shaped message mentioning .wal.checkpoint.
+    const drift =
+      'Some new wrapper preamble :: IO exception when finalizing /repo/.gitnexus/lbug.wal.checkpoint';
+    expect(isLbugCheckpointIoError(drift)).toBe(true);
+  });
+
+  it('does NOT match unrelated IO errors', () => {
+    expect(
+      isLbugCheckpointIoError(
+        'Runtime exception: IO exception: Error renaming file /repo/data.tmp to /repo/data.tmp.bak',
+      ),
+    ).toBe(false);
+    expect(isLbugCheckpointIoError('Some other error')).toBe(false);
+    expect(isLbugCheckpointIoError(undefined)).toBe(false);
+  });
+});
diff --git a/gitnexus/test/unit/wal-checkpoint-driver.test.ts b/gitnexus/test/unit/wal-checkpoint-driver.test.ts
new file mode 100644
index 0000000000..2079c74dcb
--- /dev/null
+++ b/gitnexus/test/unit/wal-checkpoint-driver.test.ts
@@ -0,0 +1,159 @@
+/**
+ * Unit tests for the manual WAL checkpoint driver (#1741 follow-up).
+ *
+ * The driver wraps a CHECKPOINT call in a bounded retry that fires only
+ * on `isLbugCheckpointIoError` shapes. These tests inject a fake
+ * `checkpointFn`, fake `sleepFn`, and fake `randomFn` to exercise the
+ * retry policy deterministically without touching a real LadybugDB.
+ *
+ * Integration-level coverage that the driver actually runs against a
+ * native engine lives in `test/integration/analyze-wal-checkpoint-failure.test.ts`.
+ */
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+  isManualCheckpointEnabled,
+  runCheckpointWithRetry,
+  startWalCheckpointDriver,
+} from '../../src/core/lbug/wal-checkpoint-driver.js';
+
+const makeCheckpointError = () =>
+  // Matches the strict rename matcher in lbug-config.ts.
+  new Error(
+    'Runtime exception: IO exception: Error renaming file /tmp/lbug.wal to /tmp/lbug.wal.checkpoint. ErrorMessage: Permission denied',
+  );
+
+describe('runCheckpointWithRetry — retry policy', () => {
+  it('returns on first success with attempts=1 and no sleeps', async () => {
+    const checkpointFn = vi.fn().mockResolvedValue(true);
+    const sleepFn = vi.fn().mockResolvedValue(undefined);
+    const randomFn = vi.fn().mockReturnValue(0);
+
+    const result = await runCheckpointWithRetry({ checkpointFn, sleepFn, randomFn });
+
+    expect(result.attempts).toBe(1);
+    expect(result.flushed).toBe(true);
+    expect(checkpointFn).toHaveBeenCalledTimes(1);
+    expect(sleepFn).toHaveBeenCalledTimes(0);
+  });
+
+  it('retries up to 3 times on checkpoint IO errors and succeeds on the final attempt', async () => {
+    const checkpointFn = vi
+      .fn()
+      .mockRejectedValueOnce(makeCheckpointError())
+      .mockRejectedValueOnce(makeCheckpointError())
+      .mockResolvedValueOnce(true);
+    const sleepFn = vi.fn().mockResolvedValue(undefined);
+    // Fixed random returns 0, so jitter contributes 0 ms and we can
+    // assert exact delays against BASE_DELAYS_MS = [50, 200, 500].
+    const randomFn = vi.fn().mockReturnValue(0);
+
+    const result = await runCheckpointWithRetry({ checkpointFn, sleepFn, randomFn });
+
+    expect(result.attempts).toBe(3);
+    expect(result.flushed).toBe(true);
+    expect(checkpointFn).toHaveBeenCalledTimes(3);
+    // Sleeps happen between attempts: after attempt 1 (50 ms) and after
+    // attempt 2 (200 ms). No sleep after the final attempt.
+    expect(sleepFn).toHaveBeenCalledTimes(2);
+    expect(sleepFn).toHaveBeenNthCalledWith(1, 50);
+    expect(sleepFn).toHaveBeenNthCalledWith(2, 200);
+  });
+
+  it('rethrows the last error after exhausting all retries on persistent IO failures', async () => {
+    const persistent = makeCheckpointError();
+    const checkpointFn = vi.fn().mockRejectedValue(persistent);
+    const sleepFn = vi.fn().mockResolvedValue(undefined);
+    const randomFn = vi.fn().mockReturnValue(0);
+
+    await expect(runCheckpointWithRetry({ checkpointFn, sleepFn, randomFn })).rejects.toBe(
+      persistent,
+    );
+
+    expect(checkpointFn).toHaveBeenCalledTimes(3);
+    // Two backoffs (50 ms, 200 ms) but no sleep after the final attempt.
+    expect(sleepFn).toHaveBeenCalledTimes(2);
+    expect(sleepFn).toHaveBeenNthCalledWith(1, 50);
+    expect(sleepFn).toHaveBeenNthCalledWith(2, 200);
+  });
+
+  it('does NOT retry non-checkpoint errors (e.g. WAL corruption surfaces immediately)', async () => {
+    const corruption = new Error('Runtime exception: Corrupted wal file.');
+    const checkpointFn = vi.fn().mockRejectedValue(corruption);
+    const sleepFn = vi.fn().mockResolvedValue(undefined);
+
+    await expect(runCheckpointWithRetry({ checkpointFn, sleepFn })).rejects.toBe(corruption);
+
+    expect(checkpointFn).toHaveBeenCalledTimes(1);
+    expect(sleepFn).toHaveBeenCalledTimes(0);
+  });
+
+  it('jitter is bounded: 0 <= jitter < 50 ms regardless of random source', async () => {
+    const checkpointFn = vi
+      .fn()
+      .mockRejectedValueOnce(makeCheckpointError())
+      .mockResolvedValueOnce(true);
+    const sleepFn = vi.fn().mockResolvedValue(undefined);
+    // Random returns 0.999... — jitter should still be <50 ms (floor).
+    const randomFn = vi.fn().mockReturnValue(0.9999);
+
+    await runCheckpointWithRetry({ checkpointFn, sleepFn, randomFn });
+
+    expect(sleepFn).toHaveBeenCalledTimes(1);
+    const delay = sleepFn.mock.calls[0][0] as number;
+    expect(delay).toBe(50 + Math.floor(0.9999 * 50)); // == 99
+  });
+});
+
+describe('isManualCheckpointEnabled — env var parsing', () => {
+  let originalEnv: string | undefined;
+  beforeEach(() => {
+    originalEnv = process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
+  });
+  afterEach(() => {
+    if (originalEnv === undefined) delete process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
+    else process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT = originalEnv;
+  });
+
+  it('defaults to enabled when the env var is unset', () => {
+    delete process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
+    expect(isManualCheckpointEnabled()).toBe(true);
+  });
+
+  it.each(['0', 'false', 'FALSE', 'off', 'no', ' 0 '])(
+    'returns false for opt-out value %s',
+    (value) => {
+      process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT = value;
+      expect(isManualCheckpointEnabled()).toBe(false);
+    },
+  );
+
+  it.each(['1', 'true', 'on', 'yes', ''])('returns true for non-opt-out value %s', (value) => {
+    process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT = value;
+    expect(isManualCheckpointEnabled()).toBe(true);
+  });
+});
+
+describe('startWalCheckpointDriver — lifecycle', () => {
+  let originalEnv: string | undefined;
+  beforeEach(() => {
+    originalEnv = process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
+  });
+  afterEach(() => {
+    if (originalEnv === undefined) delete process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT;
+    else process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT = originalEnv;
+  });
+
+  it('returns a no-op handle when manual checkpoint is disabled', async () => {
+    process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT = '0';
+    const driver = startWalCheckpointDriver({ periodMs: 10 });
+    // stop() must resolve cleanly even when no interval was scheduled.
+    await expect(driver.stop()).resolves.toBeUndefined();
+  });
+
+  it('stop() is idempotent (second call resolves without throwing)', async () => {
+    process.env.GITNEXUS_WAL_MANUAL_CHECKPOINT = '0';
+    const driver = startWalCheckpointDriver({ periodMs: 10 });
+    await driver.stop();
+    await expect(driver.stop()).resolves.toBeUndefined();
+  });
+});