Support new compile API from autoparallel PR #77

wconstab · wconstab · commit 6c782eba53b5 · 2025-08-07T20:07:39.000-07:00
diff --git a/torchtitan/experiments/auto_parallel/parallelize_llama.py b/torchtitan/experiments/auto_parallel/parallelize_llama.py
@@ -64,7 +64,7 @@ def input_fn():
     param_dtype = TORCH_DTYPE_MAP[job_config.training.mixed_precision_param]
     reduce_dtype = TORCH_DTYPE_MAP[job_config.training.mixed_precision_reduce]
     mp_policy = MixedPrecisionPolicy(param_dtype=param_dtype, reduce_dtype=reduce_dtype)
-    with AutoParallel(model, input_fn, world_mesh, mp_policy=mp_policy) as autop:
+    with AutoParallel(model, input_fn, world_mesh, mp_policy=mp_policy, compile=job_config.training.compile) as autop:
         autop.add_parameter_memory_constraint(low=None, high=None)
 
         possible_input_shardings = {
@@ -87,8 +87,4 @@ def input_fn():
         logger.info(f"AutoParallel took {t1 - t0} seconds")
         parallel_mod = autop.apply_placement(sharding_placement)
 
-    if job_config.training.compile:
-        torch._inductor.config.reorder_for_peak_memory = False
-        parallel_mod.compile(fullgraph=True)
-
     return parallel_mod