diff --git a/sky/utils/kubernetes/gpu_labeler.py b/sky/utils/kubernetes/gpu_labeler.py
index 50938f9b1cd..3a4ea946252 100644
--- a/sky/utils/kubernetes/gpu_labeler.py
+++ b/sky/utils/kubernetes/gpu_labeler.py
@@ -183,9 +183,17 @@ def wait_for_jobs_completion(jobs_to_node_names: Dict[str, str],
     batch_v1 = kubernetes.batch_api(context=context)
     w = kubernetes.watch()
     completed_jobs = []
+    # Use resource_version="0" to start from the oldest available version.
+    # In multi-replica API server environments, replicas may be at different
+    # resource versions due to replication lag. Without specifying this, the
+    # watch may get version X from one replica but connect to another replica
+    # that only has up to version Y < X, causing "Too large resource version"
+    # errors. Using "0" ensures all replicas can serve the request from their
+    # oldest available version, avoiding version mismatches.
     for event in w.stream(func=batch_v1.list_namespaced_job,
                           namespace=namespace,
-                          timeout_seconds=timeout):
+                          timeout_seconds=timeout,
+                          resource_version='0'):
         job = event['object']
         job_name = job.metadata.name
         if job_name in jobs_to_node_names:
@@ -212,7 +220,7 @@ def wait_for_jobs_completion(jobs_to_node_names: Dict[str, str],
         _format_string(
             f'Timed out after waiting {timeout} seconds '
             'for job to complete', colorama.Style.DIM))
-    return False  #Timed out
+    return False  # Timed out
 
 
 def main():