diff --git a/op-challenger/cmd/run_trace.go b/op-challenger/cmd/run_trace.go
index e2fee2e800f3c..588b0894751d7 100644
--- a/op-challenger/cmd/run_trace.go
+++ b/op-challenger/cmd/run_trace.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"strings"
+	"time"
 
 	"github.com/ethereum-optimism/optimism/op-challenger/flags"
 	gameTypes "github.com/ethereum-optimism/optimism/op-challenger/game/types"
@@ -43,11 +44,12 @@ func RunTrace(ctx *cli.Context, _ context.CancelCauseFunc) (cliapp.Lifecycle, er
 			runConfigs = append(runConfigs, runner.RunConfig{GameType: gameType})
 		}
 	}
-	return runner.NewRunner(logger, cfg, runConfigs), nil
+	vmTimeout := ctx.Duration(VMTimeoutFlag.Name)
+	return runner.NewRunner(logger, cfg, runConfigs, vmTimeout), nil
 }
 
 func runTraceFlags() []cli.Flag {
-	return append(flags.Flags, RunTraceRunFlag)
+	return append(flags.Flags, RunTraceRunFlag, VMTimeoutFlag)
 }
 
 var RunTraceCommand = &cli.Command{
@@ -58,6 +60,8 @@ var RunTraceCommand = &cli.Command{
 	Flags:       runTraceFlags(),
 }
 
+const DefaultVMTimeout = 3 * time.Hour
+
 var (
 	RunTraceRunFlag = &cli.StringSliceFlag{
 		Name: "run",
@@ -69,6 +73,12 @@ var (
 			"If the prestateHash is omitted, the absolute prestate hash used for new games on-chain.",
 		EnvVars: opservice.PrefixEnvVar(flags.EnvVarPrefix, "RUN"),
 	}
+	VMTimeoutFlag = &cli.DurationFlag{
+		Name:    "vm-timeout",
+		Usage:   fmt.Sprintf("Maximum duration for VM execution per run. Default is %s. Set to 0 to disable timeout.", DefaultVMTimeout),
+		EnvVars: opservice.PrefixEnvVar(flags.EnvVarPrefix, "VM_TIMEOUT"),
+		Value:   DefaultVMTimeout,
+	}
 )
 
 func parseRunArgs(args []string) ([]runner.RunConfig, error) {
diff --git a/op-challenger/runner/metrics.go b/op-challenger/runner/metrics.go
index 8aa7494fa2c04..9b0fd98a56846 100644
--- a/op-challenger/runner/metrics.go
+++ b/op-challenger/runner/metrics.go
@@ -20,16 +20,22 @@ type Metrics struct {
 	*metrics.VmMetrics
 	opmetrics.RPCMetrics
 
-	up                         prometheus.Gauge
-	vmLastExecutionTime        *prometheus.GaugeVec
-	vmLastMemoryUsed           *prometheus.GaugeVec
-	successTotal               *prometheus.CounterVec
-	failuresTotal              *prometheus.CounterVec
-	consecutiveFailuresCurrent *prometheus.GaugeVec
-	panicsTotal                *prometheus.CounterVec
-	invalidTotal               *prometheus.CounterVec
+	up                              prometheus.Gauge
+	vmLastExecutionTime             *prometheus.GaugeVec
+	vmLastMemoryUsed                *prometheus.GaugeVec
+	successTotal                    *prometheus.CounterVec
+	setupFailuresTotal              *prometheus.CounterVec
+	consecutiveSetupFailuresCurrent *prometheus.GaugeVec
+	vmFailuresTotal                 *prometheus.CounterVec
 }
 
+// Reason labels for vmFailuresTotal metric
+const (
+	ReasonIncorrectStatus = "incorrect_status"
+	ReasonPanic           = "panic"
+	ReasonTimeout         = "timeout"
+)
+
 var _ Metricer = (*Metrics)(nil)
 
 // Metrics implementation must implement RegistryMetricer to allow the metrics server to work.
@@ -68,34 +74,30 @@ func NewMetrics(runConfigs []RunConfig) *Metrics {
 			Name:      "success_total",
 			Help:      "Number of VM executions that successfully verified the output root",
 		}, []string{"type"}),
-		failuresTotal: factory.NewCounterVec(prometheus.CounterOpts{
+		setupFailuresTotal: factory.NewCounterVec(prometheus.CounterOpts{
 			Namespace: Namespace,
-			Name:      "failures_total",
-			Help:      "Number of failures to execute a VM",
+			Name:      "setup_failures_total",
+			Help:      "Number of setup failures before VM execution",
 		}, []string{"type"}),
-		consecutiveFailuresCurrent: factory.NewGaugeVec(prometheus.GaugeOpts{
+		consecutiveSetupFailuresCurrent: factory.NewGaugeVec(prometheus.GaugeOpts{
 			Namespace: Namespace,
-			Name:      "consecutive_failures_current",
+			Name:      "consecutive_setup_failures_current",
 			Help:      "Number of consecutive setup failures by VM type. Resets to 0 on any complete run.",
 		}, []string{"type"}),
-		panicsTotal: factory.NewCounterVec(prometheus.CounterOpts{
-			Namespace: Namespace,
-			Name:      "panics_total",
-			Help:      "Number of times the VM panicked",
-		}, []string{"type"}),
-		invalidTotal: factory.NewCounterVec(prometheus.CounterOpts{
+		vmFailuresTotal: factory.NewCounterVec(prometheus.CounterOpts{
 			Namespace: Namespace,
-			Name:      "invalid_total",
-			Help:      "Number of runs that determined the output root was invalid",
-		}, []string{"type"}),
+			Name:      "vm_failures_total",
+			Help:      "Number of VM execution failures by type and reason (incorrect_status, panic, timeout)",
+		}, []string{"type", "reason"}),
 	}
 
 	for _, runConfig := range runConfigs {
 		metrics.successTotal.WithLabelValues(runConfig.Name).Add(0)
-		metrics.failuresTotal.WithLabelValues(runConfig.Name).Add(0)
-		metrics.consecutiveFailuresCurrent.WithLabelValues(runConfig.Name).Set(0)
-		metrics.panicsTotal.WithLabelValues(runConfig.Name).Add(0)
-		metrics.invalidTotal.WithLabelValues(runConfig.Name).Add(0)
+		metrics.setupFailuresTotal.WithLabelValues(runConfig.Name).Add(0)
+		metrics.consecutiveSetupFailuresCurrent.WithLabelValues(runConfig.Name).Set(0)
+		metrics.vmFailuresTotal.WithLabelValues(runConfig.Name, ReasonIncorrectStatus).Add(0)
+		metrics.vmFailuresTotal.WithLabelValues(runConfig.Name, ReasonPanic).Add(0)
+		metrics.vmFailuresTotal.WithLabelValues(runConfig.Name, ReasonTimeout).Add(0)
 		metrics.RecordUp()
 	}
 
@@ -123,22 +125,15 @@ func (m *Metrics) RecordVmMemoryUsed(vmType string, memoryUsed uint64) {
 
 func (m *Metrics) RecordSuccess(vmType string) {
 	m.successTotal.WithLabelValues(vmType).Inc()
-	m.consecutiveFailuresCurrent.WithLabelValues(vmType).Set(0)
-}
-
-func (m *Metrics) RecordFailure(vmType string) {
-	m.failuresTotal.WithLabelValues(vmType).Inc()
-	m.consecutiveFailuresCurrent.WithLabelValues(vmType).Inc()
+	m.consecutiveSetupFailuresCurrent.WithLabelValues(vmType).Set(0)
 }
 
-func (m *Metrics) RecordPanic(vmType string) {
-	m.panicsTotal.WithLabelValues(vmType).Inc()
-	// The result was bad, but we still completed setup successfully
-	m.consecutiveFailuresCurrent.WithLabelValues(vmType).Set(0)
+func (m *Metrics) RecordSetupFailure(vmType string) {
+	m.setupFailuresTotal.WithLabelValues(vmType).Inc()
+	m.consecutiveSetupFailuresCurrent.WithLabelValues(vmType).Inc()
 }
 
-func (m *Metrics) RecordInvalid(vmType string) {
-	m.invalidTotal.WithLabelValues(vmType).Inc()
-	// The result was bad, but we still completed setup successfully
-	m.consecutiveFailuresCurrent.WithLabelValues(vmType).Set(0)
+func (m *Metrics) RecordVmFailure(vmType string, reason string) {
+	m.vmFailuresTotal.WithLabelValues(vmType, reason).Inc()
+	m.consecutiveSetupFailuresCurrent.WithLabelValues(vmType).Set(0)
 }
diff --git a/op-challenger/runner/runner.go b/op-challenger/runner/runner.go
index 796a9da1897a3..da3b06a26c6e5 100644
--- a/op-challenger/runner/runner.go
+++ b/op-challenger/runner/runner.go
@@ -36,6 +36,7 @@ import (
 
 var (
 	ErrUnexpectedStatusCode = errors.New("unexpected status code")
+	ErrVMTimeout            = errors.New("VM execution timed out")
 )
 
 type Metricer interface {
@@ -43,9 +44,8 @@ type Metricer interface {
 	metrics.VmMetricer
 	opmetrics.RPCMetricer
 
-	RecordFailure(vmType string)
-	RecordPanic(vmType string)
-	RecordInvalid(vmType string)
+	RecordSetupFailure(vmType string)
+	RecordVmFailure(vmType string, reason string)
 	RecordSuccess(vmType string)
 }
 
@@ -56,11 +56,24 @@ type RunConfig struct {
 	PrestateFilename string
 }
 
+type TraceProviderCreator func(
+	ctx context.Context,
+	logger log.Logger,
+	m trace.Metricer,
+	cfg *config.Config,
+	prestateSource prestateFetcher,
+	gameType gameTypes.GameType,
+	localInputs utils.LocalGameInputs,
+	dir string,
+) (types.TraceProvider, error)
+
 type Runner struct {
-	log        log.Logger
-	cfg        *config.Config
-	runConfigs []RunConfig
-	m          Metricer
+	log                  log.Logger
+	cfg                  *config.Config
+	runConfigs           []RunConfig
+	m                    Metricer
+	vmTimeout            time.Duration
+	traceProviderCreator TraceProviderCreator
 
 	running    atomic.Bool
 	ctx        context.Context
@@ -69,12 +82,14 @@ type Runner struct {
 	metricsSrv *httputil.HTTPServer
 }
 
-func NewRunner(logger log.Logger, cfg *config.Config, runConfigs []RunConfig) *Runner {
+func NewRunner(logger log.Logger, cfg *config.Config, runConfigs []RunConfig, vmTimeout time.Duration) *Runner {
 	return &Runner{
-		log:        logger,
-		cfg:        cfg,
-		runConfigs: runConfigs,
-		m:          NewMetrics(runConfigs),
+		log:                  logger,
+		cfg:                  cfg,
+		runConfigs:           runConfigs,
+		m:                    NewMetrics(runConfigs),
+		vmTimeout:            vmTimeout,
+		traceProviderCreator: createTraceProvider,
 	}
 }
 
@@ -142,13 +157,16 @@ func (r *Runner) runAndRecordOnce(ctx context.Context, rlog log.Logger, runConfi
 	recordError := func(err error, configName string, m Metricer, log log.Logger) {
 		if errors.Is(err, ErrUnexpectedStatusCode) {
 			log.Error("Incorrect status code", "type", runConfig.Name, "err", err)
-			m.RecordInvalid(configName)
+			m.RecordVmFailure(configName, ReasonIncorrectStatus)
 		} else if errors.Is(err, trace.ErrVMPanic) {
 			log.Error("VM panicked", "type", runConfig.Name)
-			m.RecordPanic(configName)
+			m.RecordVmFailure(configName, ReasonPanic)
+		} else if errors.Is(err, ErrVMTimeout) {
+			log.Error("VM execution timed out", "type", runConfig.Name, "timeout", r.vmTimeout)
+			m.RecordVmFailure(configName, ReasonTimeout)
 		} else if err != nil {
 			log.Error("Failed to run", "type", runConfig.Name, "err", err)
-			m.RecordFailure(configName)
+			m.RecordSetupFailure(configName)
 		} else {
 			log.Info("Successfully verified output root", "type", runConfig.Name)
 			m.RecordSuccess(configName)
@@ -195,12 +213,20 @@ func (r *Runner) runAndRecordOnce(ctx context.Context, rlog log.Logger, runConfi
 }
 
 func (r *Runner) runOnce(ctx context.Context, logger log.Logger, name string, gameType gameTypes.GameType, prestateSource prestateFetcher, localInputs utils.LocalGameInputs, dir string) error {
-	provider, err := createTraceProvider(ctx, logger, metrics.NewTypedVmMetrics(r.m, name), r.cfg, prestateSource, gameType, localInputs, dir)
+	if r.vmTimeout > 0 {
+		var cancel context.CancelFunc
+		ctx, cancel = context.WithTimeout(ctx, r.vmTimeout)
+		defer cancel()
+	}
+	provider, err := r.traceProviderCreator(ctx, logger, metrics.NewTypedVmMetrics(r.m, name), r.cfg, prestateSource, gameType, localInputs, dir)
 	if err != nil {
 		return fmt.Errorf("failed to create trace provider: %w", err)
 	}
 	hash, err := provider.Get(ctx, types.RootPosition)
 	if err != nil {
+		if errors.Is(err, context.DeadlineExceeded) {
+			return fmt.Errorf("%w: %w", ErrVMTimeout, err)
+		}
 		return fmt.Errorf("failed to execute trace provider: %w", err)
 	}
 	if hash[0] != mipsevm.VMStatusValid {
diff --git a/op-challenger/runner/runner_test.go b/op-challenger/runner/runner_test.go
new file mode 100644
index 0000000000000..2af0cea0b9065
--- /dev/null
+++ b/op-challenger/runner/runner_test.go
@@ -0,0 +1,138 @@
+package runner
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/ethereum-optimism/optimism/cannon/mipsevm"
+	"github.com/ethereum-optimism/optimism/op-challenger/config"
+	"github.com/ethereum-optimism/optimism/op-challenger/game/fault/trace/utils"
+	"github.com/ethereum-optimism/optimism/op-challenger/game/fault/trace/vm"
+	"github.com/ethereum-optimism/optimism/op-challenger/game/fault/types"
+	gameTypes "github.com/ethereum-optimism/optimism/op-challenger/game/types"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNewRunnerSetsTimeout(t *testing.T) {
+	timeout := 5 * time.Minute
+	r := NewRunner(nil, nil, nil, timeout)
+	require.Equal(t, timeout, r.vmTimeout)
+}
+
+func TestRunOnceAppliesTimeout(t *testing.T) {
+	tests := []struct {
+		name           string
+		vmTimeout      time.Duration
+		expectDeadline bool
+	}{
+		{
+			name:           "timeout applied when set",
+			vmTimeout:      100 * time.Millisecond,
+			expectDeadline: true,
+		},
+		{
+			name:           "no deadline when timeout is zero",
+			vmTimeout:      0,
+			expectDeadline: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var capturedCtx context.Context
+
+			mockCreator := func(
+				ctx context.Context,
+				_ log.Logger,
+				_ vm.Metricer,
+				_ *config.Config,
+				_ prestateFetcher,
+				_ gameTypes.GameType,
+				_ utils.LocalGameInputs,
+				_ string,
+			) (types.TraceProvider, error) {
+				capturedCtx = ctx
+				return &stubTraceProvider{}, nil
+			}
+
+			r := &Runner{
+				vmTimeout:            tt.vmTimeout,
+				traceProviderCreator: mockCreator,
+			}
+
+			err := r.runOnce(context.Background(), log.New(), "test", gameTypes.CannonGameType, nil, utils.LocalGameInputs{}, "")
+			require.NoError(t, err)
+
+			_, hasDeadline := capturedCtx.Deadline()
+			require.Equal(t, tt.expectDeadline, hasDeadline)
+		})
+	}
+}
+
+type stubTraceProvider struct{}
+
+func (s *stubTraceProvider) Get(_ context.Context, _ types.Position) (common.Hash, error) {
+	// Return a hash with VMStatusValid as the first byte
+	var hash common.Hash
+	hash[0] = mipsevm.VMStatusValid
+	return hash, nil
+}
+
+func (s *stubTraceProvider) GetStepData(_ context.Context, _ types.Position) ([]byte, []byte, *types.PreimageOracleData, error) {
+	return nil, nil, nil, nil
+}
+
+func (s *stubTraceProvider) AbsolutePreStateCommitment(_ context.Context) (common.Hash, error) {
+	return common.Hash{}, nil
+}
+
+func (s *stubTraceProvider) GetL2BlockNumberChallenge(_ context.Context) (*types.InvalidL2BlockNumberChallenge, error) {
+	return nil, types.ErrL2BlockNumberValid
+}
+
+// slowTraceProvider blocks until context is done, simulating a slow VM
+type slowTraceProvider struct{}
+
+func (s *slowTraceProvider) Get(ctx context.Context, _ types.Position) (common.Hash, error) {
+	<-ctx.Done()
+	return common.Hash{}, ctx.Err()
+}
+
+func (s *slowTraceProvider) GetStepData(_ context.Context, _ types.Position) ([]byte, []byte, *types.PreimageOracleData, error) {
+	return nil, nil, nil, nil
+}
+
+func (s *slowTraceProvider) AbsolutePreStateCommitment(_ context.Context) (common.Hash, error) {
+	return common.Hash{}, nil
+}
+
+func (s *slowTraceProvider) GetL2BlockNumberChallenge(_ context.Context) (*types.InvalidL2BlockNumberChallenge, error) {
+	return nil, types.ErrL2BlockNumberValid
+}
+
+func TestRunOnceReturnsTimeoutError(t *testing.T) {
+	mockCreator := func(
+		_ context.Context,
+		_ log.Logger,
+		_ vm.Metricer,
+		_ *config.Config,
+		_ prestateFetcher,
+		_ gameTypes.GameType,
+		_ utils.LocalGameInputs,
+		_ string,
+	) (types.TraceProvider, error) {
+		return &slowTraceProvider{}, nil
+	}
+
+	r := &Runner{
+		vmTimeout:            50 * time.Millisecond,
+		traceProviderCreator: mockCreator,
+	}
+
+	err := r.runOnce(context.Background(), log.New(), "test", gameTypes.CannonGameType, nil, utils.LocalGameInputs{}, "")
+	require.ErrorIs(t, err, ErrVMTimeout)
+	require.ErrorIs(t, err, context.DeadlineExceeded)
+}