diff --git a/docs/reference/api.md b/docs/reference/api.md
index c7d9e46ffda..dc621718f0a 100644
--- a/docs/reference/api.md
+++ b/docs/reference/api.md
@@ -55,6 +55,25 @@ _Appears in:_
 
 
 
+#### ClusterUpgradeOptions
+
+
+
+These options are currently only supported for the IncrementalUpgrade type.
+
+
+
+_Appears in:_
+- [RayServiceUpgradeStrategy](#rayserviceupgradestrategy)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `maxSurgePercent` _integer_ | The capacity of serve requests the upgraded cluster should scale to handle each interval.<br />Defaults to 100%. | 100 |  |
+| `stepSizePercent` _integer_ | The percentage of traffic to switch to the upgraded RayCluster at a set interval after scaling by MaxSurgePercent. |  |  |
+| `intervalSeconds` _integer_ | The interval in seconds between transferring StepSize traffic from the old to new RayCluster. |  |  |
+| `gatewayClassName` _string_ | The name of the Gateway Class installed by the Kubernetes Cluster admin. |  |  |
+
+
 #### DeletionCondition
 
 
@@ -377,6 +396,7 @@ _Appears in:_
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
 | `type` _[RayServiceUpgradeType](#rayserviceupgradetype)_ | Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. |  |  |
+| `clusterUpgradeOptions` _[ClusterUpgradeOptions](#clusterupgradeoptions)_ | ClusterUpgradeOptions defines the behavior of a NewClusterWithIncrementalUpgrade type.<br />RayServiceIncrementalUpgrade feature gate must be enabled to set ClusterUpgradeOptions. |  |  |
 
 
 #### RayServiceUpgradeType
diff --git a/go.mod b/go.mod
index 472e6d593df..e93dc132eda 100644
--- a/go.mod
+++ b/go.mod
@@ -73,7 +73,7 @@ require (
 	github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
 	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.19 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mitchellh/go-wordwrap v1.0.1 // indirect
 	github.com/moby/spdystream v0.5.0 // indirect
 	github.com/moby/term v0.5.0 // indirect
@@ -95,12 +95,12 @@ require (
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/net v0.38.0 // indirect
+	golang.org/x/net v0.39.0 // indirect
 	golang.org/x/oauth2 v0.27.0 // indirect
-	golang.org/x/sync v0.12.0 // indirect
+	golang.org/x/sync v0.13.0 // indirect
 	golang.org/x/sys v0.32.0 // indirect
-	golang.org/x/term v0.30.0 // indirect
-	golang.org/x/text v0.23.0 // indirect
+	golang.org/x/term v0.31.0 // indirect
+	golang.org/x/text v0.24.0 // indirect
 	golang.org/x/time v0.10.0 // indirect
 	golang.org/x/tools v0.31.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
@@ -112,6 +112,7 @@ require (
 	k8s.io/component-base v0.33.1 // indirect
 	k8s.io/component-helpers v0.33.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
+	sigs.k8s.io/gateway-api v1.3.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
 	sigs.k8s.io/kustomize/api v0.19.0 // indirect
 	sigs.k8s.io/kustomize/kyaml v0.19.0 // indirect
diff --git a/go.sum b/go.sum
index dddab9f7e86..22e4f1113d9 100644
--- a/go.sum
+++ b/go.sum
@@ -139,8 +139,9 @@ github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUt
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
 github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
 github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
@@ -263,8 +264,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
-golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
-golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
+golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
 golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
@@ -274,8 +275,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
-golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
+golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -292,12 +293,12 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
 golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
-golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o=
+golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
-golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
+golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
 golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4=
 golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -380,6 +381,8 @@ k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97
 k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
 sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
+sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
+sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/kustomize/api v0.19.0 h1:F+2HB2mU1MSiR9Hp1NEgoU2q9ItNOaBJl0I4Dlus5SQ=
diff --git a/helm-chart/kuberay-operator/README.md b/helm-chart/kuberay-operator/README.md
index 2a50677e9e1..ecc0f8cf988 100644
--- a/helm-chart/kuberay-operator/README.md
+++ b/helm-chart/kuberay-operator/README.md
@@ -174,6 +174,8 @@ spec:
 | featureGates[1].enabled | bool | `false` |  |
 | featureGates[2].name | string | `"RayMultiHostIndexing"` |  |
 | featureGates[2].enabled | bool | `false` |  |
+| featureGates[3].name | string | `"RayServiceIncrementalUpgrade"` |  |
+| featureGates[3].enabled | bool | `false` |  |
 | metrics.enabled | bool | `true` | Whether KubeRay operator should emit control plane metrics. |
 | metrics.serviceMonitor.enabled | bool | `false` | Enable a prometheus ServiceMonitor |
 | metrics.serviceMonitor.interval | string | `"30s"` | Prometheus ServiceMonitor interval |
diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml
index e2d61172a3c..267de9a20f8 100644
--- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml
+++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml
@@ -8241,6 +8241,25 @@ spec:
                 type: integer
               upgradeStrategy:
                 properties:
+                  clusterUpgradeOptions:
+                    properties:
+                      gatewayClassName:
+                        type: string
+                      intervalSeconds:
+                        format: int32
+                        type: integer
+                      maxSurgePercent:
+                        default: 100
+                        format: int32
+                        type: integer
+                      stepSizePercent:
+                        format: int32
+                        type: integer
+                    required:
+                    - gatewayClassName
+                    - intervalSeconds
+                    - stepSizePercent
+                    type: object
                   type:
                     type: string
                 type: object
@@ -8269,6 +8288,9 @@ spec:
                           type: string
                       type: object
                     type: object
+                  lastTrafficMigratedTime:
+                    format: date-time
+                    type: string
                   rayClusterName:
                     type: string
                   rayClusterStatus:
@@ -8383,6 +8405,12 @@ spec:
                           type: string
                         type: object
                     type: object
+                  targetCapacity:
+                    format: int32
+                    type: integer
+                  trafficRoutedPercent:
+                    format: int32
+                    type: integer
                 type: object
               conditions:
                 items:
@@ -8452,6 +8480,9 @@ spec:
                           type: string
                       type: object
                     type: object
+                  lastTrafficMigratedTime:
+                    format: date-time
+                    type: string
                   rayClusterName:
                     type: string
                   rayClusterStatus:
@@ -8566,6 +8597,12 @@ spec:
                           type: string
                         type: object
                     type: object
+                  targetCapacity:
+                    format: int32
+                    type: integer
+                  trafficRoutedPercent:
+                    format: int32
+                    type: integer
                 type: object
               serviceStatus:
                 type: string
diff --git a/helm-chart/kuberay-operator/templates/_helpers.tpl b/helm-chart/kuberay-operator/templates/_helpers.tpl
index 5d14510a61b..d5e0e7352d0 100644
--- a/helm-chart/kuberay-operator/templates/_helpers.tpl
+++ b/helm-chart/kuberay-operator/templates/_helpers.tpl
@@ -222,6 +222,17 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - gateways
+  - httproutes
+  verbs:
+  - create
+  - get
+  - list
+  - update
+  - watch
 - apiGroups:
   - networking.k8s.io
   resources:
diff --git a/helm-chart/kuberay-operator/values.yaml b/helm-chart/kuberay-operator/values.yaml
index 3bc1d2765c4..1b7b46020b0 100644
--- a/helm-chart/kuberay-operator/values.yaml
+++ b/helm-chart/kuberay-operator/values.yaml
@@ -119,6 +119,8 @@ featureGates:
   enabled: false
 - name: RayMultiHostIndexing
   enabled: false
+- name: RayServiceIncrementalUpgrade
+  enabled: false
 
 # Configurations for KubeRay operator metrics.
 metrics:
diff --git a/ray-operator/Makefile b/ray-operator/Makefile
index faab31894b5..1ef2ad7e6db 100644
--- a/ray-operator/Makefile
+++ b/ray-operator/Makefile
@@ -75,8 +75,16 @@ test-e2e-autoscaler: WHAT ?= ./test/e2eautoscaler
 test-e2e-autoscaler: manifests fmt vet ## Run e2e autoscaler tests.
 	go test -timeout 30m -v $(WHAT)
 
+test-e2e-rayservice: WHAT ?= ./test/e2erayservice
+test-e2e-rayservice: manifests fmt vet ## Run e2e RayService tests.
+	go test -timeout 30m -v $(WHAT)
+
 test-e2e-upgrade: WHAT ?= ./test/e2eupgrade
-test-e2e-upgrade: manifests fmt vet ## Run e2e tests.
+test-e2e-upgrade: manifests fmt vet ## Run e2e operator upgrade tests.
+	go test -timeout 30m -v $(WHAT)
+
+test-e2e-incremental-upgrade: WHAT ?= ./test/e2eincrementalupgrade
+test-e2e-incremental-upgrade: manifests fmt vet ## Run e2e RayService incremental upgrade tests.
 	go test -timeout 30m -v $(WHAT)
 
 test-e2e-rayjob-submitter: WHAT ?= ./test/e2erayjobsubmitter
diff --git a/ray-operator/apis/ray/v1/rayservice_types.go b/ray-operator/apis/ray/v1/rayservice_types.go
index e7d73e07d8e..b8fb29a81ae 100644
--- a/ray-operator/apis/ray/v1/rayservice_types.go
+++ b/ray-operator/apis/ray/v1/rayservice_types.go
@@ -22,6 +22,9 @@ const (
 type RayServiceUpgradeType string
 
 const (
+	// During upgrade, NewClusterWithIncrementalUpgrade strategy will create an upgraded cluster to gradually scale
+	// and migrate traffic to using Gateway API.
+	NewClusterWithIncrementalUpgrade RayServiceUpgradeType = "NewClusterWithIncrementalUpgrade"
 	// During upgrade, NewCluster strategy will create new upgraded cluster and switch to it when it becomes ready
 	NewCluster RayServiceUpgradeType = "NewCluster"
 	// No new cluster will be created while the strategy is set to None
@@ -57,10 +60,27 @@ var DeploymentStatusEnum = struct {
 	UNHEALTHY: "UNHEALTHY",
 }
 
+// These options are currently only supported for the IncrementalUpgrade type.
+type ClusterUpgradeOptions struct {
+	// The capacity of serve requests the upgraded cluster should scale to handle each interval.
+	// Defaults to 100%.
+	// +kubebuilder:default:=100
+	MaxSurgePercent *int32 `json:"maxSurgePercent,omitempty"`
+	// The percentage of traffic to switch to the upgraded RayCluster at a set interval after scaling by MaxSurgePercent.
+	StepSizePercent *int32 `json:"stepSizePercent"`
+	// The interval in seconds between transferring StepSize traffic from the old to new RayCluster.
+	IntervalSeconds *int32 `json:"intervalSeconds"`
+	// The name of the Gateway Class installed by the Kubernetes Cluster admin.
+	GatewayClassName string `json:"gatewayClassName"`
+}
+
 type RayServiceUpgradeStrategy struct {
 	// Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`.
 	// +optional
 	Type *RayServiceUpgradeType `json:"type,omitempty"`
+	// ClusterUpgradeOptions defines the behavior of a NewClusterWithIncrementalUpgrade type.
+	// RayServiceIncrementalUpgrade feature gate must be enabled to set ClusterUpgradeOptions.
+	ClusterUpgradeOptions *ClusterUpgradeOptions `json:"clusterUpgradeOptions,omitempty"`
 }
 
 // RayServiceSpec defines the desired state of RayService
@@ -129,6 +149,20 @@ type RayServiceStatus struct {
 	// Important: Run "make" to regenerate code after modifying this file
 	// +optional
 	Applications map[string]AppStatus `json:"applicationStatuses,omitempty"`
+	// TargetCapacity is the `target_capacity` percentage for all Serve replicas
+	// across the cluster for this RayService. The `num_replicas`, `min_replicas`, `max_replicas`,
+	// and `initial_replicas` for each deployment will be scaled by this percentage."
+	// +optional
+	TargetCapacity *int32 `json:"targetCapacity,omitempty"`
+	// TrafficRoutedPercent is the percentage of traffic that is routed to the Serve service
+	// for this RayService. TrafficRoutedPercent is updated to reflect the weight on the HTTPRoute
+	// created for this RayService during incremental upgrades to a new cluster.
+	// +optional
+	TrafficRoutedPercent *int32 `json:"trafficRoutedPercent,omitempty"`
+	// LastTrafficMigratedTime is the last time that TrafficRoutedPercent was updated to a new value
+	// for this RayService.
+	// +optional
+	LastTrafficMigratedTime *metav1.Time `json:"lastTrafficMigratedTime,omitempty"`
 	// +optional
 	RayClusterName string `json:"rayClusterName,omitempty"`
 	// +optional
@@ -184,8 +218,7 @@ const (
 type RayService struct {
 	metav1.TypeMeta   `json:",inline"`
 	metav1.ObjectMeta `json:"metadata,omitempty"`
-
-	Spec RayServiceSpec `json:"spec,omitempty"`
+	Spec              RayServiceSpec `json:"spec,omitempty"`
 	// +optional
 	Status RayServiceStatuses `json:"status,omitempty"`
 }
diff --git a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go
index 5a6ce86bc10..8deb750000c 100644
--- a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go
+++ b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go
@@ -103,6 +103,36 @@ func (in *AutoscalerOptions) DeepCopy() *AutoscalerOptions {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterUpgradeOptions) DeepCopyInto(out *ClusterUpgradeOptions) {
+	*out = *in
+	if in.MaxSurgePercent != nil {
+		in, out := &in.MaxSurgePercent, &out.MaxSurgePercent
+		*out = new(int32)
+		**out = **in
+	}
+	if in.StepSizePercent != nil {
+		in, out := &in.StepSizePercent, &out.StepSizePercent
+		*out = new(int32)
+		**out = **in
+	}
+	if in.IntervalSeconds != nil {
+		in, out := &in.IntervalSeconds, &out.IntervalSeconds
+		*out = new(int32)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterUpgradeOptions.
+func (in *ClusterUpgradeOptions) DeepCopy() *ClusterUpgradeOptions {
+	if in == nil {
+		return nil
+	}
+	out := new(ClusterUpgradeOptions)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *DeletionCondition) DeepCopyInto(out *DeletionCondition) {
 	*out = *in
@@ -721,6 +751,20 @@ func (in *RayServiceStatus) DeepCopyInto(out *RayServiceStatus) {
 			(*out)[key] = *val.DeepCopy()
 		}
 	}
+	if in.TargetCapacity != nil {
+		in, out := &in.TargetCapacity, &out.TargetCapacity
+		*out = new(int32)
+		**out = **in
+	}
+	if in.TrafficRoutedPercent != nil {
+		in, out := &in.TrafficRoutedPercent, &out.TrafficRoutedPercent
+		*out = new(int32)
+		**out = **in
+	}
+	if in.LastTrafficMigratedTime != nil {
+		in, out := &in.LastTrafficMigratedTime, &out.LastTrafficMigratedTime
+		*out = (*in).DeepCopy()
+	}
 	in.RayClusterStatus.DeepCopyInto(&out.RayClusterStatus)
 }
 
@@ -770,6 +814,11 @@ func (in *RayServiceUpgradeStrategy) DeepCopyInto(out *RayServiceUpgradeStrategy
 		*out = new(RayServiceUpgradeType)
 		**out = **in
 	}
+	if in.ClusterUpgradeOptions != nil {
+		in, out := &in.ClusterUpgradeOptions, &out.ClusterUpgradeOptions
+		*out = new(ClusterUpgradeOptions)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceUpgradeStrategy.
diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml
index e2d61172a3c..267de9a20f8 100644
--- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml
+++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml
@@ -8241,6 +8241,25 @@ spec:
                 type: integer
               upgradeStrategy:
                 properties:
+                  clusterUpgradeOptions:
+                    properties:
+                      gatewayClassName:
+                        type: string
+                      intervalSeconds:
+                        format: int32
+                        type: integer
+                      maxSurgePercent:
+                        default: 100
+                        format: int32
+                        type: integer
+                      stepSizePercent:
+                        format: int32
+                        type: integer
+                    required:
+                    - gatewayClassName
+                    - intervalSeconds
+                    - stepSizePercent
+                    type: object
                   type:
                     type: string
                 type: object
@@ -8269,6 +8288,9 @@ spec:
                           type: string
                       type: object
                     type: object
+                  lastTrafficMigratedTime:
+                    format: date-time
+                    type: string
                   rayClusterName:
                     type: string
                   rayClusterStatus:
@@ -8383,6 +8405,12 @@ spec:
                           type: string
                         type: object
                     type: object
+                  targetCapacity:
+                    format: int32
+                    type: integer
+                  trafficRoutedPercent:
+                    format: int32
+                    type: integer
                 type: object
               conditions:
                 items:
@@ -8452,6 +8480,9 @@ spec:
                           type: string
                       type: object
                     type: object
+                  lastTrafficMigratedTime:
+                    format: date-time
+                    type: string
                   rayClusterName:
                     type: string
                   rayClusterStatus:
@@ -8566,6 +8597,12 @@ spec:
                           type: string
                         type: object
                     type: object
+                  targetCapacity:
+                    format: int32
+                    type: integer
+                  trafficRoutedPercent:
+                    format: int32
+                    type: integer
                 type: object
               serviceStatus:
                 type: string
diff --git a/ray-operator/config/rbac/role.yaml b/ray-operator/config/rbac/role.yaml
index ba840f0c27f..9ea1db93190 100644
--- a/ray-operator/config/rbac/role.yaml
+++ b/ray-operator/config/rbac/role.yaml
@@ -107,6 +107,17 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - gateways
+  - httproutes
+  verbs:
+  - create
+  - get
+  - list
+  - update
+  - watch
 - apiGroups:
   - networking.k8s.io
   resources:
diff --git a/ray-operator/controllers/ray/common/association.go b/ray-operator/controllers/ray/common/association.go
index 63eefa94bc4..1539e49aa88 100644
--- a/ray-operator/controllers/ray/common/association.go
+++ b/ray-operator/controllers/ray/common/association.go
@@ -203,3 +203,17 @@ func RayClusterNetworkResourcesOptions(instance *rayv1.RayCluster) AssociationOp
 		},
 	}
 }
+
+func RayServiceGatewayNamespacedName(rayService *rayv1.RayService) types.NamespacedName {
+	return types.NamespacedName{
+		Name:      fmt.Sprintf("%s-gateway", rayService.Name),
+		Namespace: rayService.Namespace,
+	}
+}
+
+func RayServiceHTTPRouteNamespacedName(rayService *rayv1.RayService) types.NamespacedName {
+	return types.NamespacedName{
+		Name:      fmt.Sprintf("%s-httproute", rayService.Name),
+		Namespace: rayService.Namespace,
+	}
+}
diff --git a/ray-operator/controllers/ray/common/job.go b/ray-operator/controllers/ray/common/job.go
index 3cb070be168..05025a3e86e 100644
--- a/ray-operator/controllers/ray/common/job.go
+++ b/ray-operator/controllers/ray/common/job.go
@@ -91,7 +91,7 @@ func BuildJobSubmitCommand(rayJobInstance *rayv1.RayJob, submissionMode rayv1.Jo
 		// The sidecar submitter shares the same network namespace as the Ray dashboard,
 		// so it uses 127.0.0.1 to connect to the Ray dashboard.
 		rayHeadContainer := rayJobInstance.Spec.RayClusterSpec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex]
-		port = utils.FindContainerPort(&rayHeadContainer, utils.DashboardPortName, utils.DefaultDashboardPort)
+		port = int(utils.FindContainerPort(&rayHeadContainer, utils.DashboardPortName, utils.DefaultDashboardPort))
 		address = "http://127.0.0.1:" + strconv.Itoa(port)
 	case rayv1.K8sJobMode:
 		// Submitter is a separate K8s Job; use cluster dashboard address.
diff --git a/ray-operator/controllers/ray/common/service.go b/ray-operator/controllers/ray/common/service.go
index 71cea97c005..545b3a6ae98 100644
--- a/ray-operator/controllers/ray/common/service.go
+++ b/ray-operator/controllers/ray/common/service.go
@@ -10,6 +10,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	ctrl "sigs.k8s.io/controller-runtime"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
@@ -184,7 +185,10 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus
 	namespace := rayCluster.Namespace
 	crdType := utils.RayClusterCRD
 	if isRayService {
-		name = rayService.Name
+		// For IncrementalUpgrade, the name is based on the unique RayCluster.
+		if !utils.IsIncrementalUpgradeEnabled(&rayService.Spec) {
+			name = rayService.Name
+		}
 		namespace = rayService.Namespace
 		crdType = utils.RayServiceCRD
 	}
@@ -225,7 +229,7 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus
 				"otherwise, the Kubernetes service for Ray Serve will not be created.")
 		}
 
-		if rayService.Spec.ServeService != nil {
+		if rayService.Spec.ServeService != nil && !utils.IsIncrementalUpgradeEnabled(&rayService.Spec) {
 			// Use the provided "custom" ServeService.
 			// Deep copy the ServeService to avoid modifying the original object
 			serveService := rayService.Spec.ServeService.DeepCopy()
@@ -317,6 +321,26 @@ func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) *corev1.Serv
 	return headlessService
 }
 
+// GetServePort finds the container port named "serve" in the RayCluster's head group spec.
+// It returns the default Ray Serve port 8000 if not explicitly defined.
+func GetServePort(cluster *rayv1.RayCluster) gwv1.PortNumber {
+	if cluster == nil || len(cluster.Spec.HeadGroupSpec.Template.Spec.Containers) == 0 {
+		return gwv1.PortNumber(utils.DefaultServingPort)
+	}
+
+	// Get the head container
+	headContainer := &cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex]
+
+	// Find the port named "serve" in the head group's container spec.
+	port := utils.FindContainerPort(
+		headContainer,
+		utils.ServingPortName,
+		utils.DefaultServingPort,
+	)
+
+	return gwv1.PortNumber(port)
+}
+
 func setServiceTypeForUserProvidedService(ctx context.Context, service *corev1.Service, defaultType corev1.ServiceType) {
 	log := ctrl.LoggerFrom(ctx)
 	// If the user has not specified a service type, use the default service type
diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go
index 7a1a50a36f6..26ff10a6373 100644
--- a/ray-operator/controllers/ray/rayservice_controller.go
+++ b/ray-operator/controllers/ray/rayservice_controller.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"math"
 	"os"
+	"reflect"
 	"strconv"
 	"strings"
 	"time"
@@ -21,6 +22,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/yaml"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/utils/lru"
+	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -28,6 +30,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/manager"
 	"sigs.k8s.io/controller-runtime/pkg/predicate"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/common"
@@ -90,6 +93,8 @@ func NewRayServiceReconciler(_ context.Context, mgr manager.Manager, provider ut
 // +kubebuilder:rbac:groups=core,resources=services/proxy,verbs=get;update;patch
 // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update
 // +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;delete
+// +kubebuilder:rbac:groups="gateway.networking.k8s.io",resources=gateways,verbs=get;list;watch;create;update;
+// +kubebuilder:rbac:groups="gateway.networking.k8s.io",resources=httproutes,verbs=get;list;watch;create;update;
 // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=get;list;watch;create;delete;update
 // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=rolebindings,verbs=get;list;watch;create;delete
 
@@ -146,6 +151,8 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
 	// 1. If there is a pending cluster, reconcile serve applications for the pending cluster.
 	// 2. If there are both active and pending clusters, reconcile serve applications for the pending cluster only.
 	// 3. If there is no pending cluster, reconcile serve applications for the active cluster.
+	// 4. During NewClusterWithIncrementalUpgrade, reconcileServe will reconcile either the pending or active cluster
+	// based on total TargetCapacity.
 	var isActiveClusterReady, isPendingClusterReady bool = false, false
 	var activeClusterServeApplications, pendingClusterServeApplications map[string]rayv1.AppStatus = nil, nil
 	if pendingRayClusterInstance != nil {
@@ -162,6 +169,35 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
 		if isActiveClusterReady, activeClusterServeApplications, err = r.reconcileServe(ctx, rayServiceInstance, activeRayClusterInstance); err != nil {
 			return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err
 		}
+	} else if activeRayClusterInstance != nil && pendingRayClusterInstance != nil && utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) {
+		logger.Info("Reconciling the Serve applications for active cluster during NewClusterWithIncrementalUpgrade", "clusterName", activeRayClusterInstance.Name)
+		if isActiveClusterReady, activeClusterServeApplications, err = r.reconcileServe(ctx, rayServiceInstance, activeRayClusterInstance); err != nil {
+			return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err
+		}
+	}
+
+	// Check if NewClusterWithIncrementalUpgrade is enabled, if so reconcile Gateway objects.
+	var httpRouteInstance *gwv1.HTTPRoute
+	if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) {
+		// Ensure per-cluster Serve service exists for the active and pending RayClusters.
+		if err = r.reconcilePerClusterServeService(ctx, rayServiceInstance, activeRayClusterInstance); err != nil {
+			return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err
+		}
+		if err = r.reconcilePerClusterServeService(ctx, rayServiceInstance, pendingRayClusterInstance); err != nil {
+			return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err
+		}
+		// Creates or updates a Gateway CR that points to the Serve services of
+		// the active and pending (if it exists) RayClusters. For incremental upgrades,
+		// the Gateway endpoint is used rather than the Serve service.
+		err = r.reconcileGateway(ctx, rayServiceInstance)
+		if err != nil {
+			return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, client.IgnoreNotFound(err)
+		}
+		// Create or update the HTTPRoute for the Gateway, passing in the pending cluster readiness status.
+		httpRouteInstance, err = r.reconcileHTTPRoute(ctx, rayServiceInstance, isPendingClusterReady)
+		if err != nil {
+			return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, client.IgnoreNotFound(err)
+		}
 	}
 
 	// Reconcile K8s services and make sure it points to the correct RayCluster.
@@ -170,7 +206,10 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
 		targetCluster := activeRayClusterInstance
 		logMsg := "Reconciling K8s services to point to the active Ray cluster."
 
-		if isPendingClusterReady {
+		isIncrementalUpgradeInProgress := utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) && meta.IsStatusConditionTrue(rayServiceInstance.Status.Conditions, string(rayv1.UpgradeInProgress))
+		if isPendingClusterReady && !isIncrementalUpgradeInProgress {
+			// This step is skipped for incremental upgrade, because the pending cluster is ready during the upgrade
+			// and creates its own per-cluster Serve service.
 			targetCluster = pendingRayClusterInstance
 			logMsg = "Reconciling K8s services to point to the pending Ray cluster to switch traffic because it is ready."
 		}
@@ -197,6 +236,7 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
 		pendingRayClusterInstance,
 		activeClusterServeApplications,
 		pendingClusterServeApplications,
+		httpRouteInstance,
 	); err != nil {
 		return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err
 	}
@@ -224,7 +264,70 @@ func (r *RayServiceReconciler) reconcileServicesToReadyCluster(ctx context.Conte
 	return headSvc, serveSvc, nil
 }
 
-func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceInstance *rayv1.RayService, headSvc, serveSvc *corev1.Service, activeCluster, pendingCluster *rayv1.RayCluster, activeClusterServeApplications, pendingClusterServeApplications map[string]rayv1.AppStatus) error {
+// reconcilePromotionAndServingStatus handles the promotion logic after an upgrade, returning
+// isPendingClusterServing: True if the main Kubernetes services are pointing to the pending cluster.
+func reconcilePromotionAndServingStatus(ctx context.Context, headSvc, serveSvc *corev1.Service, rayServiceInstance *rayv1.RayService, pendingCluster *rayv1.RayCluster) (isPendingClusterServing bool) {
+	logger := ctrl.LoggerFrom(ctx)
+
+	// Step 1: Service Consistency Check. Ensure head and serve services point to the
+	// same cluster (active or pending).
+	clusterSvcPointsTo := utils.GetRayClusterNameFromService(headSvc)
+	if clusterSvcPointsTo != utils.GetRayClusterNameFromService(serveSvc) {
+		// This indicates a broken state that the controller cannot recover from automatically.
+		panic("headSvc and serveSvc are not pointing to the same cluster")
+	}
+
+	// Step 2: Cluster Switching Logic. Determine which cluster the services are currently pointing to and
+	// determine if promotion should occur.
+	pendingClusterName := rayServiceInstance.Status.PendingServiceStatus.RayClusterName
+	activeClusterName := rayServiceInstance.Status.ActiveServiceStatus.RayClusterName
+
+	// Verify that the service points to a known cluster (either active or pending).
+	if clusterSvcPointsTo != pendingClusterName && clusterSvcPointsTo != activeClusterName {
+		panic("clusterName from services is not equal to pendingCluster or activeCluster")
+	}
+
+	var shouldPromote bool
+	if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) {
+		// An incremental upgrade is complete when the active cluster has 0% capacity and the pending cluster has
+		// 100% of the traffic. We can't promote the pending cluster until traffic has been fully migrated.
+		if meta.IsStatusConditionTrue(rayServiceInstance.Status.Conditions, string(rayv1.UpgradeInProgress)) {
+			if utils.IsIncrementalUpgradeComplete(rayServiceInstance, pendingCluster) {
+				shouldPromote = true
+				logger.Info("Incremental upgrade completed, triggering promotion.", "rayService", rayServiceInstance.Name)
+			}
+		} else if activeClusterName == "" && pendingClusterName != "" {
+			// The Active cluster is empty when the RayCluster is first scaling up.
+			shouldPromote = true
+		}
+	} else {
+		// For traditional blue/green upgrade, promotion is complete when the Service selector has switched.
+		if activeClusterName != clusterSvcPointsTo {
+			shouldPromote = true
+		}
+	}
+
+	// Step 3: Promote the pending cluster if prior conditions are met.
+	if shouldPromote {
+		logger.Info("Promoting pending cluster to active.",
+			"oldCluster", rayServiceInstance.Status.ActiveServiceStatus.RayClusterName,
+			"newCluster", rayServiceInstance.Status.PendingServiceStatus.RayClusterName)
+
+		rayServiceInstance.Status.ActiveServiceStatus = rayServiceInstance.Status.PendingServiceStatus
+		rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{}
+	}
+
+	return (clusterSvcPointsTo == pendingClusterName)
+}
+
+func (r *RayServiceReconciler) calculateStatus(
+	ctx context.Context,
+	rayServiceInstance *rayv1.RayService,
+	headSvc, serveSvc *corev1.Service,
+	activeCluster, pendingCluster *rayv1.RayCluster,
+	activeClusterServeApplications, pendingClusterServeApplications map[string]rayv1.AppStatus,
+	httpRoute *gwv1.HTTPRoute,
+) error {
 	logger := ctrl.LoggerFrom(ctx)
 
 	rayServiceInstance.Status.ObservedGeneration = rayServiceInstance.ObjectMeta.Generation
@@ -244,32 +347,35 @@ func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceIn
 	rayServiceInstance.Status.ActiveServiceStatus.Applications = activeClusterServeApplications
 	rayServiceInstance.Status.PendingServiceStatus.Applications = pendingClusterServeApplications
 
-	isPendingClusterServing := false
+	var isPendingClusterServing bool
 	if headSvc != nil && serveSvc != nil {
-		pendingClusterName := rayServiceInstance.Status.PendingServiceStatus.RayClusterName
-		activeClusterName := rayServiceInstance.Status.ActiveServiceStatus.RayClusterName
-
-		// Promote the pending cluster to the active cluster if both RayService's head and serve services
-		// have already pointed to the pending cluster.
-		clusterName := utils.GetRayClusterNameFromService(headSvc)
-		if clusterName != utils.GetRayClusterNameFromService(serveSvc) {
-			panic("headSvc and serveSvc are not pointing to the same cluster")
-		}
-		// Verify cluster name matches either pending or active cluster
-		if clusterName != pendingClusterName && clusterName != activeClusterName {
-			panic("clusterName is not equal to pendingCluster or activeCluster")
-		}
-		isPendingClusterServing = clusterName == pendingClusterName
-
-		// If services point to a different cluster than the active one, promote pending to active
-		logger.Info("calculateStatus", "clusterSvcPointingTo", clusterName, "pendingClusterName", pendingClusterName, "activeClusterName", activeClusterName)
-		if activeClusterName != clusterName {
-			logger.Info("Promoting pending cluster to active",
-				"oldCluster", rayServiceInstance.Status.ActiveServiceStatus.RayClusterName,
-				"newCluster", clusterName)
-			rayServiceInstance.Status.ActiveServiceStatus = rayServiceInstance.Status.PendingServiceStatus
-			rayServiceInstance.Status.PendingServiceStatus = rayv1.RayServiceStatus{}
+		if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) {
+			logger.Info("Processing NewClusterWithIncrementalUpgrade strategy.", "rayService", rayServiceInstance.Name)
+			oldActivePercent := ptr.Deref(rayServiceInstance.Status.ActiveServiceStatus.TrafficRoutedPercent, -1)
+			oldPendingPercent := ptr.Deref(rayServiceInstance.Status.PendingServiceStatus.TrafficRoutedPercent, -1)
+
+			// Update TrafficRoutedPercent to each RayService based on current weights from HTTPRoute.
+			activeWeight, pendingWeight := utils.GetWeightsFromHTTPRoute(httpRoute, rayServiceInstance)
+			now := metav1.Time{Time: time.Now()}
+			if activeWeight >= 0 {
+				rayServiceInstance.Status.ActiveServiceStatus.TrafficRoutedPercent = ptr.To(activeWeight)
+				logger.Info("Updated active TrafficRoutedPercent from HTTPRoute", "activeClusterWeight", activeWeight)
+				if activeWeight != oldActivePercent {
+					rayServiceInstance.Status.ActiveServiceStatus.LastTrafficMigratedTime = &now
+					logger.Info("Updated LastTrafficMigratedTime of Active Service.")
+				}
+			}
+			if pendingWeight >= 0 {
+				rayServiceInstance.Status.PendingServiceStatus.TrafficRoutedPercent = ptr.To(pendingWeight)
+				logger.Info("Updated pending TrafficRoutedPercent from HTTPRoute", "pendingClusterWeight", pendingWeight)
+				if pendingWeight != oldPendingPercent {
+					rayServiceInstance.Status.PendingServiceStatus.LastTrafficMigratedTime = &now
+					logger.Info("Updated LastTrafficMigratedTime of Pending Service.")
+				}
+			}
 		}
+		// Reconcile serving status and promotion logic for all upgrade strategies.
+		isPendingClusterServing = reconcilePromotionAndServingStatus(ctx, headSvc, serveSvc, rayServiceInstance, pendingCluster)
 	}
 
 	if shouldPrepareNewCluster(ctx, rayServiceInstance, activeCluster, pendingCluster, isPendingClusterServing) {
@@ -278,10 +384,34 @@ func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceIn
 		}
 		logger.Info("Preparing a new pending RayCluster instance by setting RayClusterName",
 			"clusterName", rayServiceInstance.Status.PendingServiceStatus.RayClusterName)
+
+		if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) {
+			// Set IncrementalUpgrade related Status fields for new pending RayCluster if enabled.
+			if rayServiceInstance.Status.ActiveServiceStatus.RayClusterName == "" {
+				// If no Active RayCluster exists - default to starting with 100% TargetCapacity.
+				// This is the case when a RayCluster is first starting for a RayService, so we should
+				// immediately scale it to full target capacity.
+				if rayServiceInstance.Status.ActiveServiceStatus.TargetCapacity == nil {
+					rayServiceInstance.Status.PendingServiceStatus.TargetCapacity = ptr.To(int32(100))
+				}
+			} else if meta.IsStatusConditionTrue(rayServiceInstance.Status.Conditions, string(rayv1.UpgradeInProgress)) {
+				// Pending RayCluster during an upgrade should start with 0% TargetCapacity, since
+				// traffic will be gradually migrated to the new cluster.
+				if rayServiceInstance.Status.PendingServiceStatus.TargetCapacity == nil {
+					rayServiceInstance.Status.PendingServiceStatus.TargetCapacity = ptr.To(int32(0))
+				}
+			}
+		}
 	}
 
 	serveEndPoints := &corev1.Endpoints{}
-	if err := r.Get(ctx, common.RayServiceServeServiceNamespacedName(rayServiceInstance), serveEndPoints); err != nil && !errors.IsNotFound(err) {
+	serveServiceName := common.RayServiceServeServiceNamespacedName(rayServiceInstance)
+	if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) && activeCluster != nil {
+		// The Serve service name is based on the unique RayCluster name, since we use the
+		// per-cluster Serve services for traffic routing during an incremental upgrade.
+		serveServiceName.Name = utils.GenerateServeServiceName(activeCluster.Name)
+	}
+	if err := r.Get(ctx, serveServiceName, serveEndPoints); err != nil && !errors.IsNotFound(err) {
 		return err
 	}
 
@@ -291,9 +421,22 @@ func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceIn
 	for _, subset := range serveEndPoints.Subsets {
 		numServeEndpoints += len(subset.Addresses)
 	}
+
+	// During NewClusterWithIncrementalUpgrade, the pending RayCluster is also serving.
+	if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) && pendingCluster != nil {
+		pendingServeServiceName := common.RayClusterServeServiceNamespacedName(pendingCluster)
+		if err := r.Get(ctx, pendingServeServiceName, serveEndPoints); err != nil && !errors.IsNotFound(err) {
+			return err
+		}
+		for _, subset := range serveEndPoints.Subsets {
+			numServeEndpoints += len(subset.Addresses)
+		}
+	}
+
 	if numServeEndpoints > math.MaxInt32 {
 		return errstd.New("numServeEndpoints exceeds math.MaxInt32")
 	}
+
 	rayServiceInstance.Status.NumServeEndpoints = int32(numServeEndpoints) //nolint:gosec // This is a false positive from gosec. See https://github.com/securego/gosec/issues/1212 for more details.
 	calculateConditions(rayServiceInstance)
 
@@ -302,6 +445,7 @@ func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceIn
 	if meta.IsStatusConditionTrue(rayServiceInstance.Status.Conditions, string(rayv1.RayServiceReady)) {
 		rayServiceInstance.Status.ServiceStatus = rayv1.Running
 	}
+
 	return nil
 }
 
@@ -392,7 +536,12 @@ func isZeroDowntimeUpgradeEnabled(ctx context.Context, upgradeStrategy *rayv1.Ra
 	if upgradeStrategy != nil {
 		upgradeType := upgradeStrategy.Type
 		if upgradeType != nil {
-			if *upgradeType != rayv1.NewCluster {
+			if features.Enabled(features.RayServiceIncrementalUpgrade) {
+				if *upgradeType != rayv1.NewCluster && *upgradeType != rayv1.NewClusterWithIncrementalUpgrade {
+					logger.Info("Zero-downtime upgrade is disabled because UpgradeStrategy.Type is not set to %s or %s.", string(rayv1.NewCluster), string(rayv1.NewClusterWithIncrementalUpgrade))
+					return false
+				}
+			} else if *upgradeType != rayv1.NewCluster {
 				logger.Info("Zero-downtime upgrade is disabled because UpgradeStrategy.Type is not set to NewCluster.")
 				return false
 			}
@@ -407,6 +556,286 @@ func isZeroDowntimeUpgradeEnabled(ctx context.Context, upgradeStrategy *rayv1.Ra
 	return true
 }
 
+// `createGateway` creates a Gateway for a RayService or updates an existing Gateway.
+func (r *RayServiceReconciler) createGateway(rayServiceInstance *rayv1.RayService) (*gwv1.Gateway, error) {
+	options := utils.GetRayServiceClusterUpgradeOptions(&rayServiceInstance.Spec)
+	if options == nil {
+		return nil, errstd.New("Missing RayService ClusterUpgradeOptions during upgrade.")
+	}
+
+	gatewayName := rayServiceInstance.Name + "-gateway"
+	// Define the desired Gateway object
+	rayServiceGateway := &gwv1.Gateway{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      gatewayName,
+			Namespace: rayServiceInstance.Namespace,
+		},
+		Spec: gwv1.GatewaySpec{
+			GatewayClassName: gwv1.ObjectName(options.GatewayClassName),
+			Listeners: []gwv1.Listener{
+				{
+					Name:     gwv1.SectionName(utils.GatewayListenerPortName),
+					Protocol: gwv1.HTTPProtocolType,
+					Port:     utils.DefaultGatewayListenerPort,
+				},
+			},
+		},
+	}
+
+	return rayServiceGateway, nil
+}
+
+// `reconcileGateway` reconciles a Gateway resource for a RayService. The possible cases are:
+// (1) Create a new Gateway instance. (2) Update the Gateway instance if RayService has updated. (3) Do nothing.
+func (r *RayServiceReconciler) reconcileGateway(ctx context.Context, rayServiceInstance *rayv1.RayService) error {
+	logger := ctrl.LoggerFrom(ctx)
+	var err error
+
+	// Construct desired Gateway object for RayService
+	desiredGateway, err := r.createGateway(rayServiceInstance)
+	if err != nil {
+		logger.Error(err, "Failed to build Gateway object for Rayservice")
+		return err
+	}
+	if desiredGateway == nil {
+		logger.Info("Skipping Gateway reconciliation: desired Gateway is nil")
+		return nil
+	}
+
+	// Check for existing RayService Gateway, create the desired Gateway if none is found
+	existingGateway := &gwv1.Gateway{}
+	if err := r.Get(ctx, common.RayServiceGatewayNamespacedName(rayServiceInstance), existingGateway); err != nil {
+		if errors.IsNotFound(err) {
+			// Set the ownership in order to do the garbage collection by k8s.
+			if err := ctrl.SetControllerReference(rayServiceInstance, desiredGateway, r.Scheme); err != nil {
+				return err
+			}
+			logger.Info("Creating a new Gateway instance", "Gateway Listeners", desiredGateway.Spec.Listeners)
+			if err := r.Create(ctx, desiredGateway); err != nil {
+				r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.FailedToCreateGateway), "Failed to create Gateway for RayService %s/%s: %v", desiredGateway.Namespace, desiredGateway.Name, err)
+				return err
+			}
+			r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeNormal, string(utils.CreatedGateway), "Created Gateway for RayService %s/%s", desiredGateway.Namespace, desiredGateway.Name)
+			return nil
+		}
+		return err
+	}
+
+	// If Gateway already exists, check if update is needed to reach desired state
+	if !reflect.DeepEqual(existingGateway.Spec, desiredGateway.Spec) {
+		logger.Info("Updating existing Gateway", "name", existingGateway.Name)
+		existingGateway.Spec = desiredGateway.Spec
+		if err := r.Update(ctx, existingGateway); err != nil {
+			r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.FailedToUpdateGateway), "Failed to update the Gateway %s/%s: %v", existingGateway.Namespace, existingGateway.Name, err)
+			return err
+		}
+		r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeNormal, string(utils.UpdatedGateway), "Updated the Gateway %s/%s", existingGateway.Namespace, existingGateway.Name)
+	}
+
+	return nil
+}
+
+// calculateTrafficRoutedPercent determines the HTTPRoute traffic split between the active and pending RayClusters.
+//
+// The new weights are calculated using:
+// - Current TrafficRoutedPercent values
+// - Time-based migration using StepSizePercent and IntervalSeconds
+// - TargetCapacity constraints
+//
+// Returns the active cluster traffic weight, pending cluster traffic weight, and an error if any.
+func (r *RayServiceReconciler) calculateTrafficRoutedPercent(ctx context.Context, rayServiceInstance *rayv1.RayService, isPendingClusterReady bool) (activeClusterWeight, pendingClusterWeight int32, err error) {
+	logger := ctrl.LoggerFrom(ctx)
+	activeServiceStatus := &rayServiceInstance.Status.ActiveServiceStatus
+	pendingServiceStatus := &rayServiceInstance.Status.PendingServiceStatus
+
+	// Default to 100% traffic on the active cluster.
+	activeClusterWeight = ptr.Deref(activeServiceStatus.TrafficRoutedPercent, 100)
+	pendingClusterWeight = ptr.Deref(pendingServiceStatus.TrafficRoutedPercent, 0)
+
+	if isPendingClusterReady {
+		// Zero-downtime upgrade in progress.
+		options := utils.GetRayServiceClusterUpgradeOptions(&rayServiceInstance.Spec)
+		if options == nil {
+			return 0, 0, errstd.New("ClusterUpgradeOptions are not set during upgrade.")
+		}
+
+		// Check that target_capacity has been updated before migrating traffic.
+		pendingClusterTargetCapacity := ptr.Deref(pendingServiceStatus.TargetCapacity, 0)
+
+		if pendingClusterWeight == pendingClusterTargetCapacity {
+			// Stop traffic migration because the pending cluster's current traffic weight has reached its target capacity limit.
+			return activeClusterWeight, pendingClusterWeight, nil
+		}
+
+		// If IntervalSeconds has passed since LastTrafficMigratedTime, migrate StepSizePercent traffic
+		// from the active RayCluster to the pending RayCluster.
+		intervalSeconds := time.Duration(*options.IntervalSeconds) * time.Second
+		lastTrafficMigratedTime := pendingServiceStatus.LastTrafficMigratedTime
+		if lastTrafficMigratedTime == nil || time.Since(lastTrafficMigratedTime.Time) >= intervalSeconds {
+			// Gradually shift traffic from the active to the pending cluster.
+			logger.Info("Upgrade in progress. Migrating traffic by StepSizePercent.", "stepSize", *options.StepSizePercent)
+			proposedPendingWeight := pendingClusterWeight + *options.StepSizePercent
+			pendingClusterWeight = min(100, proposedPendingWeight, pendingClusterTargetCapacity)
+			activeClusterWeight = 100 - pendingClusterWeight
+		}
+	}
+
+	return activeClusterWeight, pendingClusterWeight, nil
+}
+
+// createHTTPRoute creates a desired HTTPRoute object for RayService incremental upgrade.
+//
+// The function performs the following operations:
+// 1. Retrieves Gateway instance to attach the HTTPRoute
+// 2. Gets active and pending RayCluster instances and their Serve services
+// 3. Calls `calculateTrafficRoutedPercent` to calculate the new traffic weights
+// 4. Configures HTTPRoute with appropriate backend references and weights
+//
+// Returns the configured HTTPRoute object or error if any step fails.
+func (r *RayServiceReconciler) createHTTPRoute(ctx context.Context, rayServiceInstance *rayv1.RayService, isPendingClusterReady bool) (*gwv1.HTTPRoute, error) {
+	logger := ctrl.LoggerFrom(ctx)
+
+	// Retrieve Gateway instance to attach this HTTPRoute to.
+	gatewayInstance := &gwv1.Gateway{}
+	if err := r.Get(ctx, common.RayServiceGatewayNamespacedName(rayServiceInstance), gatewayInstance); err != nil {
+		return nil, err
+	}
+
+	// Retrieve the active RayCluster
+	activeRayCluster, err := r.getRayClusterByNamespacedName(ctx, common.RayServiceActiveRayClusterNamespacedName(rayServiceInstance))
+	if err != nil && !errors.IsNotFound(err) {
+		logger.Error(err, "Failed to retrieve active RayCluster")
+		return nil, err
+	}
+	if activeRayCluster == nil {
+		logger.Info("Active RayCluster not found, skipping HTTPRoute creation.")
+		return nil, nil
+	}
+
+	// Attempt to retrieve pending RayCluster
+	pendingRayCluster, err := r.getRayClusterByNamespacedName(ctx, common.RayServicePendingRayClusterNamespacedName(rayServiceInstance))
+	if err != nil && !errors.IsNotFound(err) {
+		logger.Error(err, "Failed to retrieve pending RayCluster.")
+		return nil, err
+	}
+
+	activeClusterWeight, pendingClusterWeight, err := r.calculateTrafficRoutedPercent(ctx, rayServiceInstance, isPendingClusterReady)
+	if err != nil {
+		logger.Info("Failed to reconcile TrafficRoutedPercent for active and pending clusters.")
+		return nil, err
+	}
+
+	activeClusterServeSvcName := utils.GenerateServeServiceName(activeRayCluster.Name)
+	activeServePort := common.GetServePort(activeRayCluster)
+
+	backendRefs := []gwv1.HTTPBackendRef{
+		{
+			BackendRef: gwv1.BackendRef{
+				BackendObjectReference: gwv1.BackendObjectReference{
+					Name:      gwv1.ObjectName(activeClusterServeSvcName),
+					Namespace: ptr.To(gwv1.Namespace(gatewayInstance.Namespace)),
+					Port:      ptr.To(activeServePort),
+				},
+				Weight: ptr.To(activeClusterWeight),
+			},
+		},
+	}
+
+	if pendingRayCluster != nil {
+		logger.Info("Pending RayCluster exists. Including it in HTTPRoute.", "RayCluster", pendingRayCluster.Name)
+		pendingClusterServeSvcName := utils.GenerateServeServiceName(pendingRayCluster.Name)
+		pendingServePort := common.GetServePort(pendingRayCluster)
+
+		backendRefs = append(backendRefs, gwv1.HTTPBackendRef{
+			BackendRef: gwv1.BackendRef{
+				BackendObjectReference: gwv1.BackendObjectReference{
+					Name:      gwv1.ObjectName(pendingClusterServeSvcName),
+					Namespace: ptr.To(gwv1.Namespace(gatewayInstance.Namespace)),
+					Port:      ptr.To(pendingServePort),
+				},
+				Weight: ptr.To(pendingClusterWeight),
+			},
+		})
+	}
+
+	httpRouteName := rayServiceInstance.Name + "-httproute"
+	desiredHTTPRoute := &gwv1.HTTPRoute{
+		ObjectMeta: metav1.ObjectMeta{Name: httpRouteName, Namespace: gatewayInstance.Namespace},
+		Spec: gwv1.HTTPRouteSpec{
+			CommonRouteSpec: gwv1.CommonRouteSpec{
+				ParentRefs: []gwv1.ParentReference{
+					{
+						Name:      gwv1.ObjectName(gatewayInstance.Name),
+						Namespace: ptr.To(gwv1.Namespace(gatewayInstance.Namespace)),
+					},
+				},
+			},
+			Rules: []gwv1.HTTPRouteRule{
+				{
+					Matches: []gwv1.HTTPRouteMatch{
+						{
+							Path: &gwv1.HTTPPathMatch{
+								Type:  ptr.To(gwv1.PathMatchPathPrefix),
+								Value: ptr.To("/"),
+							},
+						},
+					},
+					BackendRefs: backendRefs,
+				},
+			},
+		},
+	}
+
+	return desiredHTTPRoute, nil
+}
+
+// reconcileHTTPRoute reconciles a HTTPRoute resource for a RayService to route traffic during a NewClusterWithIncrementalUpgrade.
+func (r *RayServiceReconciler) reconcileHTTPRoute(ctx context.Context, rayServiceInstance *rayv1.RayService, isPendingClusterReady bool) (*gwv1.HTTPRoute, error) {
+	logger := ctrl.LoggerFrom(ctx)
+	var err error
+
+	desiredHTTPRoute, err := r.createHTTPRoute(ctx, rayServiceInstance, isPendingClusterReady)
+	if err != nil {
+		logger.Error(err, "Failed to build HTTPRoute for RayService upgrade")
+		return nil, err
+	}
+	if desiredHTTPRoute == nil {
+		logger.Info("Skipping HTTPRoute reconciliation: desired HTTPRoute is nil")
+		return nil, nil
+	}
+
+	// Check for existing HTTPRoute for RayService
+	existingHTTPRoute := &gwv1.HTTPRoute{}
+	if err := r.Get(ctx, common.RayServiceHTTPRouteNamespacedName(rayServiceInstance), existingHTTPRoute); err != nil {
+		if errors.IsNotFound(err) {
+			// Set the ownership in order to do the garbage collection by k8s.
+			if err := ctrl.SetControllerReference(rayServiceInstance, desiredHTTPRoute, r.Scheme); err != nil {
+				return nil, err
+			}
+			if err = r.Create(ctx, desiredHTTPRoute); err != nil {
+				r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.FailedToCreateHTTPRoute), "Failed to create the HTTPRoute for RayService %s/%s: %v", desiredHTTPRoute.Namespace, desiredHTTPRoute.Name, err)
+				return nil, err
+			}
+			r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeNormal, string(utils.CreatedHTTPRoute), "Created HTTPRoute for RayService %s/%s", desiredHTTPRoute.Namespace, desiredHTTPRoute.Name)
+			return desiredHTTPRoute, nil
+		}
+		return nil, err
+	}
+
+	// If HTTPRoute already exists, check if update is needed
+	if !reflect.DeepEqual(existingHTTPRoute.Spec, desiredHTTPRoute.Spec) {
+		logger.Info("Updating existing HTTPRoute", "name", desiredHTTPRoute.Name)
+		existingHTTPRoute.Spec = desiredHTTPRoute.Spec
+		if err := r.Update(ctx, existingHTTPRoute); err != nil {
+			r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.FailedToUpdateHTTPRoute), "Failed to update the HTTPRoute %s/%s: %v", existingHTTPRoute.Namespace, existingHTTPRoute.Name, err)
+			return nil, err
+		}
+		r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeNormal, string(utils.UpdatedHTTPRoute), "Updated the HTTPRoute %s/%s", existingHTTPRoute.Namespace, existingHTTPRoute.Name)
+	}
+
+	return existingHTTPRoute, nil
+}
+
 // `reconcileRayCluster` reconciles the active and pending Ray clusters. There are 4 possible cases:
 // (1) Create a new pending cluster. (2) Update the active cluster. (3) Update the pending cluster. (4) Do nothing.
 func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServiceInstance *rayv1.RayService) (*rayv1.RayCluster, *rayv1.RayCluster, error) {
@@ -700,6 +1129,17 @@ func constructRayClusterForRayService(rayService *rayv1.RayService, rayClusterNa
 	// set the KubeRay version used to create the RayCluster
 	rayClusterAnnotations[utils.KubeRayVersion] = utils.KUBERAY_VERSION
 
+	clusterSpec := rayService.Spec.RayClusterSpec.DeepCopy()
+	isPendingClusterForUpgrade := utils.IsIncrementalUpgradeEnabled(&rayService.Spec) &&
+		rayService.Status.ActiveServiceStatus.RayClusterName != ""
+	if isPendingClusterForUpgrade {
+		// For incremental upgrade, start the pending cluster without a replicas value so
+		// that it autoscales based on the value of target_capacity from MinReplicas.
+		for i := range clusterSpec.WorkerGroupSpecs {
+			clusterSpec.WorkerGroupSpecs[i].Replicas = nil
+		}
+	}
+
 	rayCluster := &rayv1.RayCluster{
 		ObjectMeta: metav1.ObjectMeta{
 			Labels:      rayClusterLabel,
@@ -707,7 +1147,7 @@ func constructRayClusterForRayService(rayService *rayv1.RayService, rayClusterNa
 			Name:        rayClusterName,
 			Namespace:   rayService.Namespace,
 		},
-		Spec: rayService.Spec.RayClusterSpec,
+		Spec: *clusterSpec,
 	}
 
 	// Set the ownership in order to do the garbage collection by k8s.
@@ -748,6 +1188,24 @@ func (r *RayServiceReconciler) updateServeDeployment(ctx context.Context, raySer
 		return err
 	}
 
+	if utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) {
+		// For incremental upgrades, set target_capacity if specified to avoid
+		// scaling initial Serve deployment to 100% immediately.
+		var targetCapacity *int32
+		activeStatus := rayServiceInstance.Status.ActiveServiceStatus
+		pendingStatus := rayServiceInstance.Status.PendingServiceStatus
+
+		if clusterName == activeStatus.RayClusterName && activeStatus.TargetCapacity != nil {
+			targetCapacity = activeStatus.TargetCapacity
+		} else if clusterName == pendingStatus.RayClusterName && pendingStatus.TargetCapacity != nil {
+			targetCapacity = pendingStatus.TargetCapacity
+		}
+		if targetCapacity != nil {
+			logger.Info("Setting target_capacity from status in Serve config.", "target_capacity", *targetCapacity)
+			serveConfig["target_capacity"] = *targetCapacity
+		}
+	}
+
 	configJson, err := json.Marshal(serveConfig)
 	if err != nil {
 		return fmt.Errorf("failed to marshal converted serve config into bytes: %w", err)
@@ -767,6 +1225,194 @@ func (r *RayServiceReconciler) updateServeDeployment(ctx context.Context, raySer
 	return nil
 }
 
+// checkIfNeedTargetCapacityUpdate returns whether the controller should adjust the target_capacity
+// of the Serve config associated with a RayCluster during NewClusterWithIncrementalUpgrade.
+//
+// This function implements the incremental upgrade state machine as defined in the design document:
+// https://github.com/ray-project/enhancements/blob/main/reps/2024-12-4-ray-service-incr-upgrade.md
+//
+// The upgrade process follows these phases:
+// 1. Phase 1 (Steps 7-8): New cluster scales up to target capacity
+//   - pendingTargetCapacity: 0% → 100%
+//   - Returns true: "Pending RayCluster has not finished scaling up."
+//
+// 2. Phase 2 (Step 9): Traffic gradually migrates to new cluster
+//   - pendingTrafficRoutedPercent: 0% → 100%
+//   - Returns true: "Pending RayCluster has not finished scaling up."
+//
+// 3. Phase 3 (Step 10): Old cluster scales down after new cluster is ready
+//   - activeTargetCapacity: 100% → 0%
+//   - Returns true: "Active RayCluster TargetCapacity has not finished scaling down."
+//
+// 4. Phase 4 (Step 11): Upgrade completion
+//   - Both clusters reach final state: active=0%, pending=100%
+//   - Returns false: "All traffic has migrated to the upgraded cluster and NewClusterWithIncrementalUpgrade migration
+//     is complete."
+//
+// The function ensures that traffic migration only proceeds when the target cluster has reached
+// its capacity limit, preventing resource conflicts and ensuring upgrade stability.
+func (r *RayServiceReconciler) checkIfNeedTargetCapacityUpdate(ctx context.Context, rayServiceInstance *rayv1.RayService) (bool, string) {
+	activeRayServiceStatus := rayServiceInstance.Status.ActiveServiceStatus
+	pendingRayServiceStatus := rayServiceInstance.Status.PendingServiceStatus
+
+	if activeRayServiceStatus.RayClusterName == "" || pendingRayServiceStatus.RayClusterName == "" {
+		return false, "Both active and pending RayCluster instances are required for NewClusterWithIncrementalUpgrade."
+	}
+
+	// Validate Gateway and HTTPRoute objects are ready
+	gatewayInstance := &gwv1.Gateway{}
+	if err := r.Get(ctx, common.RayServiceGatewayNamespacedName(rayServiceInstance), gatewayInstance); err != nil {
+		return false, fmt.Sprintf("Failed to retrieve Gateway for RayService: %v", err)
+	}
+	if !utils.IsGatewayReady(gatewayInstance) {
+		return false, "Gateway for RayService NewClusterWithIncrementalUpgrade is not ready."
+	}
+
+	httpRouteInstance := &gwv1.HTTPRoute{}
+	if err := r.Get(ctx, common.RayServiceHTTPRouteNamespacedName(rayServiceInstance), httpRouteInstance); err != nil {
+		return false, fmt.Sprintf("Failed to retrieve HTTPRoute for RayService: %v", err)
+	}
+	if !utils.IsHTTPRouteReady(gatewayInstance, httpRouteInstance) {
+		return false, "HTTPRoute for RayService NewClusterWithIncrementalUpgrade is not ready."
+	}
+
+	// Retrieve the current observed NewClusterWithIncrementalUpgrade Status fields for each RayService.
+	if activeRayServiceStatus.TargetCapacity == nil || activeRayServiceStatus.TrafficRoutedPercent == nil {
+		return true, "Active RayServiceStatus missing TargetCapacity or TrafficRoutedPercent."
+	}
+	if pendingRayServiceStatus.TargetCapacity == nil || pendingRayServiceStatus.TrafficRoutedPercent == nil {
+		return true, "Pending RayServiceStatus missing TargetCapacity or TrafficRoutedPercent."
+	}
+	activeTargetCapacity := int(*activeRayServiceStatus.TargetCapacity)
+	pendingTargetCapacity := int(*pendingRayServiceStatus.TargetCapacity)
+	pendingTrafficRoutedPercent := int(*pendingRayServiceStatus.TrafficRoutedPercent)
+
+	if activeTargetCapacity == 0 && pendingTargetCapacity == 100 {
+		return false, "All traffic has migrated to the upgraded cluster and NewClusterWithIncrementalUpgrade is complete."
+	} else if pendingTargetCapacity < 100 || pendingTrafficRoutedPercent < 100 {
+		return true, "Pending RayCluster has not finished scaling up."
+	}
+	return true, "Active RayCluster TargetCapacity has not finished scaling down."
+}
+
+// applyServeTargetCapacity updates the target_capacity for a given RayCluster's Serve applications.
+func (r *RayServiceReconciler) applyServeTargetCapacity(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster, rayDashboardClient dashboardclient.RayDashboardClientInterface, goalTargetCapacity int32) error {
+	logger := ctrl.LoggerFrom(ctx).WithValues("RayCluster", rayClusterInstance.Name)
+
+	// Retrieve cached ServeConfig from last reconciliation for cluster to update
+	cachedConfig := r.getServeConfigFromCache(rayServiceInstance, rayClusterInstance.Name)
+	if cachedConfig == "" {
+		cachedConfig = rayServiceInstance.Spec.ServeConfigV2
+	}
+
+	serveConfig := make(map[string]interface{})
+	if err := yaml.Unmarshal([]byte(cachedConfig), &serveConfig); err != nil {
+		return err
+	}
+
+	// Check if ServeConfig requires update
+	if currentTargetCapacity, ok := serveConfig["target_capacity"].(float64); ok {
+		if int32(currentTargetCapacity) == goalTargetCapacity {
+			logger.Info("target_capacity already updated on RayCluster", "target_capacity", currentTargetCapacity)
+			// No update required, return early
+			return nil
+		}
+	}
+
+	serveConfig["target_capacity"] = goalTargetCapacity
+	configJson, err := json.Marshal(serveConfig)
+	if err != nil {
+		return fmt.Errorf("failed to marshal serve config: %w", err)
+	}
+
+	logger.Info("Applying new target_capacity to Ray cluster.", "goal", goalTargetCapacity)
+	if err := rayDashboardClient.UpdateDeployments(ctx, configJson); err != nil {
+		err = fmt.Errorf(
+			"fail to create / update Serve applications. If you observe this error consistently, "+
+				"please check \"Issue 5: Fail to create / update Serve applications.\" in "+
+				"https://docs.ray.io/en/master/cluster/kubernetes/troubleshooting/rayservice-troubleshooting.html#kuberay-raysvc-troubleshoot for more details. "+
+				"err: %v", err)
+		return err
+	}
+
+	// Update the status fields and cache new Serve config.
+	if rayClusterInstance.Name == rayServiceInstance.Status.ActiveServiceStatus.RayClusterName {
+		rayServiceInstance.Status.ActiveServiceStatus.TargetCapacity = ptr.To(goalTargetCapacity)
+	} else if rayClusterInstance.Name == rayServiceInstance.Status.PendingServiceStatus.RayClusterName {
+		rayServiceInstance.Status.PendingServiceStatus.TargetCapacity = ptr.To(goalTargetCapacity)
+	}
+	r.cacheServeConfig(rayServiceInstance, rayClusterInstance.Name)
+
+	return nil
+}
+
+// reconcileServeTargetCapacity reconciles the target_capacity of the ServeConfig for a given RayCluster during
+// a NewClusterWithIncrementalUpgrade while also updating the Status.TargetCapacity of the Active and Pending RayServices.
+func (r *RayServiceReconciler) reconcileServeTargetCapacity(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster, rayDashboardClient dashboardclient.RayDashboardClientInterface) error {
+	logger := ctrl.LoggerFrom(ctx)
+	logger.Info("reconcileServeTargetCapacity", "RayService", rayServiceInstance.Name)
+
+	activeRayServiceStatus := &rayServiceInstance.Status.ActiveServiceStatus
+	pendingRayServiceStatus := &rayServiceInstance.Status.PendingServiceStatus
+
+	// Set initial TargetCapacity values if unset
+	if activeRayServiceStatus.TargetCapacity == nil {
+		activeRayServiceStatus.TargetCapacity = ptr.To(int32(100))
+	}
+	if pendingRayServiceStatus.TargetCapacity == nil {
+		pendingRayServiceStatus.TargetCapacity = ptr.To(int32(0))
+	}
+
+	// Retrieve the current observed Status fields for NewClusterWithIncrementalUpgrade
+	activeTargetCapacity := *activeRayServiceStatus.TargetCapacity
+	pendingTargetCapacity := *pendingRayServiceStatus.TargetCapacity
+	pendingTrafficRoutedPercent := ptr.Deref(pendingRayServiceStatus.TrafficRoutedPercent, 0)
+
+	// Retrieve MaxSurgePercent - the maximum amount to change TargetCapacity by
+	options := utils.GetRayServiceClusterUpgradeOptions(&rayServiceInstance.Spec)
+	if options == nil {
+		return errstd.New("Missing RayService ClusterUpgradeOptions during upgrade")
+	}
+	maxSurgePercent := ptr.Deref(options.MaxSurgePercent, 100)
+
+	// Defer updating the target_capacity until traffic weights are updated
+	if pendingTargetCapacity != pendingTrafficRoutedPercent {
+		logger.Info("Traffic is currently being migrated to pending cluster", "RayCluster", pendingRayServiceStatus.RayClusterName, "TargetCapacity", pendingTargetCapacity, "TrafficRoutedPercent", pendingTrafficRoutedPercent)
+		return nil
+	}
+
+	// There are two cases:
+	// 1. The total target_capacity is greater than 100. This means the pending RayCluster has
+	// scaled up traffic and the active RayCluster can be scaled down by MaxSurgePercent.
+	// 2. The total target_capacity is equal to 100. This means the pending RayCluster can
+	// increase its target_capacity by MaxSurgePercent.
+	// If the rayClusterInstance passed into this function is not the cluster to update based
+	// on the above conditions, we return without doing anything.
+	var goalTargetCapacity int32
+	shouldUpdate := false
+	if rayClusterInstance.Name == activeRayServiceStatus.RayClusterName {
+		if activeTargetCapacity+pendingTargetCapacity > 100 {
+			// Scale down the Active RayCluster TargetCapacity on this iteration.
+			goalTargetCapacity = max(int32(0), activeTargetCapacity-maxSurgePercent)
+			shouldUpdate = true
+			logger.Info("Setting target_capacity for active Raycluster", "Raycluster", rayClusterInstance.Name, "target_capacity", goalTargetCapacity)
+		}
+	} else if rayClusterInstance.Name == pendingRayServiceStatus.RayClusterName {
+		if activeTargetCapacity+pendingTargetCapacity <= 100 {
+			// Scale up the Pending RayCluster TargetCapacity on this iteration.
+			goalTargetCapacity = min(int32(100), pendingTargetCapacity+maxSurgePercent)
+			shouldUpdate = true
+			logger.Info("Setting target_capacity for pending Raycluster", "Raycluster", rayClusterInstance.Name, "target_capacity", goalTargetCapacity)
+		}
+	}
+
+	if !shouldUpdate {
+		return nil
+	}
+
+	return r.applyServeTargetCapacity(ctx, rayServiceInstance, rayClusterInstance, rayDashboardClient, goalTargetCapacity)
+}
+
 // `getAndCheckServeStatus` gets Serve applications' and deployments' statuses and check whether the
 // Serve applications are ready to serve incoming traffic or not. It returns three values:
 //
@@ -950,6 +1596,18 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns
 		return false, serveApplications, err
 	}
 
+	skipConfigUpdate := false
+	isActiveCluster := rayClusterInstance.Name == rayServiceInstance.Status.ActiveServiceStatus.RayClusterName
+	isIncrementalUpgradeInProgress := utils.IsIncrementalUpgradeEnabled(&rayServiceInstance.Spec) &&
+		meta.IsStatusConditionTrue(rayServiceInstance.Status.Conditions, string(rayv1.UpgradeInProgress))
+
+	if isActiveCluster && isIncrementalUpgradeInProgress {
+		// Skip updating the Serve config for the Active cluster during NewClusterWithIncrementalUpgrade. The updated
+		// Serve config is applied to the pending RayService's RayCluster.
+		skipConfigUpdate = true
+		logger.Info("Blocking new Serve config submission for Active cluster during upgrade.", "clusterName", rayClusterInstance.Name)
+	}
+
 	cachedServeConfigV2 := r.getServeConfigFromCache(rayServiceInstance, rayClusterInstance.Name)
 	isReady, serveApplications, err := getAndCheckServeStatus(ctx, rayDashboardClient)
 	if err != nil {
@@ -958,13 +1616,26 @@ func (r *RayServiceReconciler) reconcileServe(ctx context.Context, rayServiceIns
 	shouldUpdate, reason := checkIfNeedSubmitServeApplications(cachedServeConfigV2, rayServiceInstance.Spec.ServeConfigV2, serveApplications)
 	logger.Info("checkIfNeedSubmitServeApplications", "shouldUpdate", shouldUpdate, "reason", reason)
 
-	if shouldUpdate {
+	if shouldUpdate && !skipConfigUpdate {
 		if err = r.updateServeDeployment(ctx, rayServiceInstance, rayDashboardClient, rayClusterInstance.Name); err != nil {
 			r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.FailedToUpdateServeApplications), "Failed to update serve applications to the RayCluster %s/%s: %v", rayClusterInstance.Namespace, rayClusterInstance.Name, err)
 			return false, serveApplications, err
 		}
 		r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeNormal, string(utils.UpdatedServeApplications), "Updated serve applications to the RayCluster %s/%s", rayClusterInstance.Namespace, rayClusterInstance.Name)
 	}
+	if isIncrementalUpgradeInProgress {
+		incrementalUpgradeUpdate, reason := r.checkIfNeedTargetCapacityUpdate(ctx, rayServiceInstance)
+		logger.Info("checkIfNeedTargetCapacityUpdate", "incrementalUpgradeUpdate", incrementalUpgradeUpdate, "reason", reason)
+		if incrementalUpgradeUpdate {
+			if err := r.reconcileServeTargetCapacity(ctx, rayServiceInstance, rayClusterInstance, rayDashboardClient); err != nil {
+				r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeWarning, string(utils.FailedToUpdateTargetCapacity), "Failed to update target_capacity of serve applications to the RayCluster %s/%s: %v", rayClusterInstance.Namespace, rayClusterInstance.Name, err)
+				return false, serveApplications, err
+			}
+			r.Recorder.Eventf(rayServiceInstance, corev1.EventTypeNormal, string(utils.UpdatedServeTargetCapacity),
+				"Updated target_capacity of serve applications to to the RayCluster %s/%s", rayClusterInstance.Namespace, rayClusterInstance.Name)
+		}
+	}
+
 	return isReady, serveApplications, nil
 }
 
@@ -986,7 +1657,7 @@ func (r *RayServiceReconciler) updateHeadPodServeLabel(ctx context.Context, rayS
 	}
 
 	rayContainer := headPod.Spec.Containers[utils.RayContainerIndex]
-	servingPort := utils.FindContainerPort(&rayContainer, utils.ServingPortName, utils.DefaultServingPort)
+	servingPort := int(utils.FindContainerPort(&rayContainer, utils.ServingPortName, utils.DefaultServingPort))
 
 	client := r.httpProxyClientFunc(headPod.Status.PodIP, headPod.Namespace, headPod.Name, servingPort)
 	if headPod.Labels == nil {
@@ -1041,3 +1712,34 @@ func (r *RayServiceReconciler) isHeadPodRunningAndReady(ctx context.Context, ins
 	}
 	return utils.IsRunningAndReady(headPod), nil
 }
+
+// reconcilePerClusterServeService reconciles a load-balancing serve service for a given RayCluster.
+func (r *RayServiceReconciler) reconcilePerClusterServeService(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster) error {
+	if rayClusterInstance == nil {
+		return nil
+	}
+
+	logger := ctrl.LoggerFrom(ctx).WithValues("RayCluster", rayClusterInstance.Name)
+
+	logger.Info("Building per-cluster RayService")
+
+	// Create a serve service for the RayCluster associated with this RayService. During an incremental
+	// upgrade, this will be called for the pending RayCluster instance.
+	desiredSvc, err := common.BuildServeService(ctx, *rayServiceInstance, *rayClusterInstance, true)
+	if err != nil {
+		logger.Error(err, "Failed to build per-cluster serve service spec")
+		return err
+	}
+	if err := ctrl.SetControllerReference(rayClusterInstance, desiredSvc, r.Scheme); err != nil {
+		return err
+	}
+
+	existingSvc := &corev1.Service{}
+	err = r.Get(ctx, client.ObjectKey{Name: desiredSvc.Name, Namespace: desiredSvc.Namespace}, existingSvc)
+	if errors.IsNotFound(err) {
+		logger.Info("Creating new per-cluster serve service for incremental upgrade.", "Service", desiredSvc.Name)
+		return r.Create(ctx, desiredSvc)
+	}
+
+	return err
+}
diff --git a/ray-operator/controllers/ray/rayservice_controller_unit_test.go b/ray-operator/controllers/ray/rayservice_controller_unit_test.go
index 638af6b26fb..169e6d2bc5d 100644
--- a/ray-operator/controllers/ray/rayservice_controller_unit_test.go
+++ b/ray-operator/controllers/ray/rayservice_controller_unit_test.go
@@ -13,13 +13,16 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/client-go/tools/record"
+	"k8s.io/utils/lru"
 	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	clientFake "sigs.k8s.io/controller-runtime/pkg/client/fake"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/common"
@@ -27,6 +30,7 @@ import (
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils/dashboardclient"
 	utiltypes "github.com/ray-project/kuberay/ray-operator/controllers/ray/utils/types"
 	"github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned/scheme"
+	"github.com/ray-project/kuberay/ray-operator/pkg/features"
 	"github.com/ray-project/kuberay/ray-operator/test/support"
 )
 
@@ -1319,3 +1323,906 @@ func TestRayClusterDeletionDelaySeconds(t *testing.T) {
 		})
 	}
 }
+
+// Helper function to create a RayService object undergoing an incremental upgrade.
+func makeIncrementalUpgradeRayService(
+	withOptions bool,
+	gatewayClassName string,
+	stepSizePercent *int32,
+	intervalSeconds *int32,
+	routedPercent *int32,
+	lastTrafficMigratedTime *metav1.Time,
+) *rayv1.RayService {
+	spec := rayv1.RayServiceSpec{
+		ServeService: &corev1.Service{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "serve-service",
+				Namespace: "test-ns",
+			},
+			Spec: corev1.ServiceSpec{
+				Ports: []corev1.ServicePort{
+					{
+						Name: "http",
+						Port: 8000,
+					},
+				},
+			},
+		},
+	}
+	if withOptions {
+		spec.UpgradeStrategy = &rayv1.RayServiceUpgradeStrategy{
+			Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+			ClusterUpgradeOptions: &rayv1.ClusterUpgradeOptions{
+				GatewayClassName: gatewayClassName,
+				StepSizePercent:  stepSizePercent,
+				IntervalSeconds:  intervalSeconds,
+			},
+		}
+	}
+
+	return &rayv1.RayService{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "incremental-ray-service",
+			Namespace: "test-ns",
+		},
+		Spec: spec,
+		Status: rayv1.RayServiceStatuses{
+			ActiveServiceStatus: rayv1.RayServiceStatus{
+				RayClusterName: "active-ray-cluster",
+				RayClusterStatus: rayv1.RayClusterStatus{
+					Head: rayv1.HeadInfo{ServiceName: "active-service"},
+				},
+				TrafficRoutedPercent:    routedPercent,
+				LastTrafficMigratedTime: lastTrafficMigratedTime,
+			},
+			PendingServiceStatus: rayv1.RayServiceStatus{
+				RayClusterName: "pending-ray-cluster",
+				RayClusterStatus: rayv1.RayClusterStatus{
+					Head: rayv1.HeadInfo{ServiceName: "pending-service"},
+				},
+				TrafficRoutedPercent:    ptr.To(int32(100) - *routedPercent),
+				LastTrafficMigratedTime: lastTrafficMigratedTime,
+			},
+		},
+	}
+}
+
+func TestCreateGateway(t *testing.T) {
+	serveService := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "serve-service",
+			Namespace: "test-ns",
+		},
+		Spec: corev1.ServiceSpec{
+			Ports: []corev1.ServicePort{
+				{
+					Port: 8000,
+				},
+			},
+		},
+	}
+	newScheme := runtime.NewScheme()
+	_ = corev1.AddToScheme(newScheme)
+
+	fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(serveService).Build()
+	reconciler := &RayServiceReconciler{
+		Client: fakeClient,
+	}
+
+	tests := []struct {
+		rayService          *rayv1.RayService
+		name                string
+		expectedGatewayName string
+		expectedClass       string
+		expectedListeners   int
+		expectErr           bool
+	}{
+		{
+			name:                "valid gateway creation",
+			expectedGatewayName: "incremental-ray-service-gateway",
+			rayService:          makeIncrementalUpgradeRayService(true, "gateway-class", ptr.To(int32(50)), ptr.To(int32(10)), ptr.To(int32(80)), &metav1.Time{Time: time.Now()}),
+			expectErr:           false,
+			expectedClass:       "gateway-class",
+			expectedListeners:   1,
+		},
+		{
+			name:       "missing ClusterUpgradeOptions",
+			rayService: makeIncrementalUpgradeRayService(false, "gateway-class", ptr.To(int32(0)), ptr.To(int32(0)), ptr.To(int32(0)), &metav1.Time{Time: time.Now()}),
+			expectErr:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gw, err := reconciler.createGateway(tt.rayService)
+			if tt.expectErr {
+				require.Error(t, err)
+				assert.Nil(t, gw)
+			} else {
+				require.NoError(t, err)
+				require.NotNil(t, gw)
+				assert.Equal(t, tt.expectedGatewayName, gw.Name)
+				assert.Equal(t, tt.rayService.Namespace, gw.Namespace)
+				assert.Equal(t, gwv1.ObjectName(tt.expectedClass), gw.Spec.GatewayClassName)
+				assert.Len(t, gw.Spec.Listeners, tt.expectedListeners)
+			}
+		})
+	}
+}
+
+func TestCreateHTTPRoute(t *testing.T) {
+	ctx := context.TODO()
+	namespace := "test-ns"
+	stepSize := int32(10)
+	interval := int32(30)
+
+	activeCluster := &rayv1.RayCluster{ObjectMeta: metav1.ObjectMeta{Name: "rayservice-active", Namespace: namespace}}
+	pendingCluster := &rayv1.RayCluster{ObjectMeta: metav1.ObjectMeta{Name: "rayservice-pending", Namespace: namespace}}
+	gateway := &gwv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: "test-rayservice-gateway", Namespace: namespace}}
+	activeServeService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: utils.GenerateServeServiceName(activeCluster.Name), Namespace: namespace}}
+	pendingServeService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: utils.GenerateServeServiceName(pendingCluster.Name), Namespace: namespace}}
+
+	baseRayService := &rayv1.RayService{
+		ObjectMeta: metav1.ObjectMeta{Name: "test-rayservice", Namespace: namespace},
+		Spec: rayv1.RayServiceSpec{
+			UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{
+				Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+				ClusterUpgradeOptions: &rayv1.ClusterUpgradeOptions{
+					StepSizePercent:  &stepSize,
+					IntervalSeconds:  &interval,
+					GatewayClassName: "istio",
+				},
+			},
+		},
+		Status: rayv1.RayServiceStatuses{
+			ActiveServiceStatus: rayv1.RayServiceStatus{
+				RayClusterName:       activeCluster.Name,
+				TrafficRoutedPercent: ptr.To(int32(100)),
+				TargetCapacity:       ptr.To(int32(100)),
+			},
+			PendingServiceStatus: rayv1.RayServiceStatus{
+				RayClusterName:       pendingCluster.Name,
+				TrafficRoutedPercent: ptr.To(int32(0)),
+				TargetCapacity:       ptr.To(int32(30)),
+			},
+		},
+	}
+
+	tests := []struct {
+		name                  string
+		modifier              func(rs *rayv1.RayService)
+		runtimeObjects        []runtime.Object
+		expectError           bool
+		expectedActiveWeight  int32
+		expectedPendingWeight int32
+		isPendingClusterReady bool
+	}{
+		{
+			name: "NewClusterWithIncrementalUpgrade, but pending cluster is not ready, so no traffic shift.",
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus.LastTrafficMigratedTime = &metav1.Time{Time: time.Now().Add(-time.Duration(interval+1) * time.Second)}
+			},
+			runtimeObjects:        []runtime.Object{activeCluster, pendingCluster, gateway, activeServeService, pendingServeService},
+			isPendingClusterReady: false,
+			expectedActiveWeight:  100,
+			expectedPendingWeight: 0,
+		},
+		{
+			name: "NewClusterWithIncrementalUpgrade, time since LastTrafficMigratedTime < IntervalSeconds.",
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus.LastTrafficMigratedTime = &metav1.Time{Time: time.Now()}
+			},
+			runtimeObjects:        []runtime.Object{activeCluster, pendingCluster, gateway, activeServeService, pendingServeService},
+			isPendingClusterReady: true,
+			expectedActiveWeight:  100,
+			expectedPendingWeight: 0,
+		},
+		{
+			name: "NewClusterWithIncrementalUpgrade, time since LastTrafficMigratedTime >= IntervalSeconds.",
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus.LastTrafficMigratedTime = &metav1.Time{Time: time.Now().Add(-time.Duration(interval+1) * time.Second)}
+				rs.Status.PendingServiceStatus.TargetCapacity = ptr.To(int32(60))
+			},
+			runtimeObjects:        []runtime.Object{activeCluster, pendingCluster, gateway, activeServeService, pendingServeService},
+			isPendingClusterReady: true,
+			expectedActiveWeight:  90,
+			expectedPendingWeight: 10,
+		},
+		{
+			name: "NewClusterWithIncrementalUpgrade, TrafficRoutedPercent capped to pending TargetCapacity.",
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus.LastTrafficMigratedTime = &metav1.Time{Time: time.Now().Add(-time.Duration(interval+1) * time.Second)}
+				rs.Status.PendingServiceStatus.TargetCapacity = ptr.To(int32(5))
+			},
+			runtimeObjects:        []runtime.Object{activeCluster, pendingCluster, gateway, activeServeService, pendingServeService},
+			isPendingClusterReady: true,
+			expectedActiveWeight:  95,
+			expectedPendingWeight: 5, // can only migrate 5% to pending until TargetCapacity reached
+		},
+		{
+			name: "Create HTTPRoute called with missing ClusterUpgradeOptions.",
+			modifier: func(rs *rayv1.RayService) {
+				rs.Spec.UpgradeStrategy.ClusterUpgradeOptions = nil
+			},
+			runtimeObjects:        []runtime.Object{activeCluster, pendingCluster, gateway, activeServeService, pendingServeService},
+			isPendingClusterReady: true,
+			expectError:           true,
+		},
+		{
+			name: "No on-going upgrade, pending cluster does not exist.",
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus = rayv1.RayServiceStatus{}
+			},
+			runtimeObjects:        []runtime.Object{activeCluster, gateway, activeServeService},
+			isPendingClusterReady: false,
+			expectedActiveWeight:  100,
+			expectedPendingWeight: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rayService := baseRayService.DeepCopy()
+			tt.modifier(rayService)
+			tt.runtimeObjects = append(tt.runtimeObjects, rayService)
+
+			newScheme := runtime.NewScheme()
+			_ = rayv1.AddToScheme(newScheme)
+			_ = corev1.AddToScheme(newScheme)
+			_ = gwv1.AddToScheme(newScheme)
+			fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(tt.runtimeObjects...).Build()
+
+			reconciler := RayServiceReconciler{
+				Client:   fakeClient,
+				Scheme:   newScheme,
+				Recorder: record.NewFakeRecorder(1),
+			}
+
+			route, err := reconciler.createHTTPRoute(ctx, rayService, tt.isPendingClusterReady)
+
+			if tt.expectError {
+				require.Error(t, err)
+				assert.Nil(t, route)
+			} else {
+				require.NoError(t, err)
+				require.NotNil(t, route)
+
+				assert.Equal(t, "test-rayservice-httproute", route.Name)
+				assert.Equal(t, "test-ns", route.Namespace)
+
+				require.Len(t, route.Spec.Rules, 1)
+				rule := route.Spec.Rules[0]
+
+				require.GreaterOrEqual(t, len(rule.BackendRefs), 1)
+				assert.Equal(t, gwv1.ObjectName(activeServeService.Name), rule.BackendRefs[0].BackendRef.Name)
+				assert.Equal(t, tt.expectedActiveWeight, *rule.BackendRefs[0].Weight)
+
+				if len(rule.BackendRefs) > 1 {
+					assert.Equal(t, gwv1.ObjectName(pendingServeService.Name), rule.BackendRefs[1].BackendRef.Name)
+					assert.Equal(t, tt.expectedPendingWeight, *rule.BackendRefs[1].Weight)
+				} else {
+					assert.Equal(t, int32(0), tt.expectedPendingWeight)
+				}
+			}
+		})
+	}
+}
+
+func TestReconcileHTTPRoute(t *testing.T) {
+	newScheme := runtime.NewScheme()
+	_ = rayv1.AddToScheme(newScheme)
+	_ = corev1.AddToScheme(newScheme)
+	_ = gwv1.AddToScheme(newScheme)
+
+	ctx := context.TODO()
+	namespace := "test-ns"
+	stepSize := int32(10)
+	interval := int32(30)
+	gatewayName := "test-rayservice-gateway"
+	routeName := "test-rayservice-httproute"
+
+	activeCluster := &rayv1.RayCluster{ObjectMeta: metav1.ObjectMeta{Name: "active-ray-cluster", Namespace: namespace}}
+	pendingCluster := &rayv1.RayCluster{ObjectMeta: metav1.ObjectMeta{Name: "pending-ray-cluster", Namespace: namespace}}
+	activeServeService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: utils.GenerateServeServiceName(activeCluster.Name), Namespace: namespace}}
+	pendingServeService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: utils.GenerateServeServiceName(pendingCluster.Name), Namespace: namespace}}
+	gateway := &gwv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: gatewayName, Namespace: namespace}}
+
+	baseRayService := &rayv1.RayService{
+		ObjectMeta: metav1.ObjectMeta{Name: "test-rayservice", Namespace: namespace},
+		Spec: rayv1.RayServiceSpec{
+			UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{
+				Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+				ClusterUpgradeOptions: &rayv1.ClusterUpgradeOptions{
+					StepSizePercent:  &stepSize,
+					IntervalSeconds:  &interval,
+					GatewayClassName: "istio",
+				},
+			},
+		},
+		Status: rayv1.RayServiceStatuses{
+			ActiveServiceStatus: rayv1.RayServiceStatus{
+				RayClusterName:       activeCluster.Name,
+				TrafficRoutedPercent: ptr.To(int32(100)),
+				TargetCapacity:       ptr.To(int32(100)),
+			},
+			PendingServiceStatus: rayv1.RayServiceStatus{
+				RayClusterName:       pendingCluster.Name,
+				TrafficRoutedPercent: ptr.To(int32(0)),
+				TargetCapacity:       ptr.To(int32(100)),
+			},
+		},
+	}
+
+	tests := []struct {
+		modifier              func(rs *rayv1.RayService)
+		existingRoute         *gwv1.HTTPRoute
+		name                  string
+		expectedActiveWeight  int32
+		expectedPendingWeight int32
+		pendingClusterExists  bool
+		isPendingClusterReady bool
+	}{
+		{
+			name:                  "Create HTTPRoute with no pending cluster.",
+			isPendingClusterReady: false,
+			pendingClusterExists:  false,
+			expectedActiveWeight:  100,
+			expectedPendingWeight: 0,
+		},
+		{
+			name:                  "Create HTTPRoute when pending cluster exists, but is not ready.",
+			isPendingClusterReady: false,
+			pendingClusterExists:  true,
+			expectedActiveWeight:  100,
+			expectedPendingWeight: 0,
+		},
+		{
+			name:                  "Create new HTTPRoute with existing weights.",
+			isPendingClusterReady: true,
+			pendingClusterExists:  true,
+			expectedActiveWeight:  90,
+			expectedPendingWeight: 10,
+		},
+		{
+			name:                  "Update HTTPRoute when pending cluster is ready.",
+			isPendingClusterReady: true,
+			pendingClusterExists:  true,
+			expectedActiveWeight:  90,
+			expectedPendingWeight: 10,
+		},
+		{
+			name:                  "Existing HTTPRoute, time since LastTrafficMigratedTime >= IntervalSeconds so updates HTTPRoute.",
+			isPendingClusterReady: true,
+			pendingClusterExists:  true,
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus.LastTrafficMigratedTime = &metav1.Time{Time: time.Now().Add(-time.Duration(interval+1) * time.Second)}
+			},
+			existingRoute: &gwv1.HTTPRoute{
+				ObjectMeta: metav1.ObjectMeta{Name: routeName, Namespace: namespace},
+				Spec:       gwv1.HTTPRouteSpec{},
+			},
+			expectedActiveWeight:  90,
+			expectedPendingWeight: 10,
+		},
+		{
+			name:                  "Existing HTTPRoute, time since LastTrafficMigratedTime < IntervalSeconds so no update.",
+			isPendingClusterReady: true,
+			pendingClusterExists:  true,
+			modifier: func(rs *rayv1.RayService) {
+				rs.Status.PendingServiceStatus.LastTrafficMigratedTime = &metav1.Time{Time: time.Now()}
+			},
+			expectedActiveWeight:  100,
+			expectedPendingWeight: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rayService := baseRayService.DeepCopy()
+			if tt.modifier != nil {
+				tt.modifier(rayService)
+			}
+
+			if !tt.pendingClusterExists {
+				rayService.Status.PendingServiceStatus.RayClusterName = ""
+			}
+
+			runtimeObjects := []runtime.Object{rayService, activeCluster, pendingCluster, gateway, activeServeService, pendingServeService}
+			if tt.existingRoute != nil {
+				runtimeObjects = append(runtimeObjects, tt.existingRoute)
+			}
+
+			fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(runtimeObjects...).Build()
+			reconciler := RayServiceReconciler{Client: fakeClient, Scheme: newScheme, Recorder: record.NewFakeRecorder(10)}
+
+			reconciledRoute, err := reconciler.reconcileHTTPRoute(ctx, rayService, tt.isPendingClusterReady)
+			require.NoError(t, err)
+
+			require.Len(t, reconciledRoute.Spec.Rules, 1)
+			rule := reconciledRoute.Spec.Rules[0]
+			if tt.pendingClusterExists {
+				require.Len(t, rule.BackendRefs, 2)
+				// Assert weights are set as expected.
+				assert.Equal(t, tt.expectedActiveWeight, *rule.BackendRefs[0].Weight)
+				assert.Equal(t, tt.expectedPendingWeight, *rule.BackendRefs[1].Weight)
+			} else {
+				require.Len(t, rule.BackendRefs, 1)
+				// Assert active weight is as expected.
+				assert.Equal(t, tt.expectedActiveWeight, *rule.BackendRefs[0].Weight)
+			}
+			// Assert ParentRef namespace is correctly set.
+			parent := reconciledRoute.Spec.ParentRefs[0]
+			assert.Equal(t, gwv1.ObjectName(gatewayName), parent.Name)
+			assert.Equal(t, ptr.To(gwv1.Namespace(namespace)), parent.Namespace)
+		})
+	}
+}
+
+func TestReconcileGateway(t *testing.T) {
+	newScheme := runtime.NewScheme()
+	_ = rayv1.AddToScheme(newScheme)
+	_ = corev1.AddToScheme(newScheme)
+	_ = gwv1.AddToScheme(newScheme)
+
+	ctx := context.TODO()
+	namespace := "test-ns"
+
+	rayService := makeIncrementalUpgradeRayService(
+		true,
+		"gateway-class",
+		ptr.To(int32(20)),
+		ptr.To(int32(30)),
+		ptr.To(int32(80)),
+		ptr.To(metav1.Now()),
+	)
+	gateway := makeGateway(fmt.Sprintf("%s-gateway", rayService.Name), rayService.Namespace, true)
+
+	tests := []struct {
+		name                 string
+		expectedGatewayName  string
+		expectedClass        string
+		runtimeObjects       []runtime.Object
+		expectedNumListeners int
+	}{
+		{
+			name:                 "creates new Gateway if missing",
+			runtimeObjects:       []runtime.Object{rayService},
+			expectedGatewayName:  "incremental-ray-service-gateway",
+			expectedClass:        "gateway-class",
+			expectedNumListeners: 1,
+		},
+		{
+			name:                 "updates Gateway if spec differs",
+			runtimeObjects:       []runtime.Object{rayService, gateway},
+			expectedGatewayName:  "incremental-ray-service-gateway",
+			expectedClass:        "gateway-class",
+			expectedNumListeners: 1,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			fakeClient := clientFake.NewClientBuilder().
+				WithScheme(newScheme).
+				WithRuntimeObjects(tt.runtimeObjects...).
+				Build()
+
+			reconciler := RayServiceReconciler{
+				Client:   fakeClient,
+				Scheme:   newScheme,
+				Recorder: record.NewFakeRecorder(10),
+			}
+
+			err := reconciler.reconcileGateway(ctx, rayService)
+			require.NoError(t, err)
+
+			reconciledGateway := &gwv1.Gateway{}
+			err = fakeClient.Get(ctx, client.ObjectKey{Name: tt.expectedGatewayName, Namespace: namespace}, reconciledGateway)
+			require.NoError(t, err, "Failed to get the reconciled Gateway")
+
+			assert.Equal(t, tt.expectedGatewayName, reconciledGateway.Name)
+			assert.Equal(t, namespace, reconciledGateway.Namespace)
+			assert.Equal(t, gwv1.ObjectName(tt.expectedClass), reconciledGateway.Spec.GatewayClassName)
+			assert.Len(t, reconciledGateway.Spec.Listeners, tt.expectedNumListeners)
+		})
+	}
+}
+
+func TestReconcileServeTargetCapacity(t *testing.T) {
+	features.SetFeatureGateDuringTest(t, features.RayServiceIncrementalUpgrade, true)
+
+	tests := []struct {
+		name                    string
+		updatedCluster          string
+		activeCapacity          int32
+		pendingCapacity         int32
+		activeRoutedPercent     int32
+		pendingRoutedPercent    int32
+		maxSurgePercent         int32
+		expectedActiveCapacity  int32
+		expectedPendingCapacity int32
+	}{
+		{
+			name:                    "Scale up pending RayCluster when total TargetCapacity < 100",
+			pendingRoutedPercent:    10,
+			activeCapacity:          70,
+			pendingCapacity:         10,
+			maxSurgePercent:         20,
+			expectedActiveCapacity:  70,
+			expectedPendingCapacity: 30,
+			updatedCluster:          "pending",
+		},
+		{
+			name:                    "Scale down active RayCluster when total TargetCapacity > 100",
+			pendingRoutedPercent:    30,
+			activeCapacity:          80,
+			pendingCapacity:         30,
+			maxSurgePercent:         20,
+			expectedActiveCapacity:  60,
+			expectedPendingCapacity: 30,
+			updatedCluster:          "active",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctx := context.TODO()
+			rayService := &rayv1.RayService{
+				Spec: rayv1.RayServiceSpec{
+					UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{
+						Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+						ClusterUpgradeOptions: &rayv1.ClusterUpgradeOptions{
+							MaxSurgePercent: ptr.To(tt.maxSurgePercent),
+						},
+					},
+					ServeConfigV2: `{"target_capacity": 0}`,
+				},
+				Status: rayv1.RayServiceStatuses{
+					ActiveServiceStatus: rayv1.RayServiceStatus{
+						RayClusterName:       "active",
+						TargetCapacity:       ptr.To(tt.activeCapacity),
+						TrafficRoutedPercent: ptr.To(tt.activeRoutedPercent),
+					},
+					PendingServiceStatus: rayv1.RayServiceStatus{
+						RayClusterName:       "pending",
+						TargetCapacity:       ptr.To(tt.pendingCapacity),
+						TrafficRoutedPercent: ptr.To(tt.pendingRoutedPercent),
+					},
+				},
+			}
+
+			var rayCluster *rayv1.RayCluster
+			if tt.updatedCluster == "active" {
+				rayCluster = &rayv1.RayCluster{ObjectMeta: metav1.ObjectMeta{Name: "active"}}
+			} else {
+				rayCluster = &rayv1.RayCluster{ObjectMeta: metav1.ObjectMeta{Name: "pending"}}
+			}
+
+			fakeDashboard := &utils.FakeRayDashboardClient{}
+			reconciler := &RayServiceReconciler{
+				ServeConfigs: lru.New(10),
+			}
+
+			err := reconciler.reconcileServeTargetCapacity(ctx, rayService, rayCluster, fakeDashboard)
+			require.NoError(t, err)
+			require.NotEmpty(t, fakeDashboard.LastUpdatedConfig)
+
+			if tt.updatedCluster == "active" {
+				assert.Equal(t, tt.expectedActiveCapacity, *rayService.Status.ActiveServiceStatus.TargetCapacity)
+				assert.Equal(t, tt.pendingCapacity, *rayService.Status.PendingServiceStatus.TargetCapacity)
+				expectedServeConfig := `{"target_capacity":` + strconv.Itoa(int(tt.expectedActiveCapacity)) + `}`
+				assert.JSONEq(t, expectedServeConfig, string(fakeDashboard.LastUpdatedConfig))
+			} else {
+				assert.Equal(t, tt.expectedPendingCapacity, *rayService.Status.PendingServiceStatus.TargetCapacity)
+				assert.Equal(t, tt.activeCapacity, *rayService.Status.ActiveServiceStatus.TargetCapacity)
+				expectedServeConfig := `{"target_capacity":` + strconv.Itoa(int(tt.expectedPendingCapacity)) + `}`
+				assert.JSONEq(t, expectedServeConfig, string(fakeDashboard.LastUpdatedConfig))
+			}
+		})
+	}
+}
+
+// MakeGateway is a helper function to return an Gateway object
+func makeGateway(name, namespace string, isReady bool) *gwv1.Gateway {
+	status := metav1.ConditionFalse
+	if isReady {
+		status = metav1.ConditionTrue
+	}
+	return &gwv1.Gateway{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: namespace,
+		},
+		Status: gwv1.GatewayStatus{
+			Conditions: []metav1.Condition{
+				{
+					Type:   string(gwv1.GatewayConditionAccepted),
+					Status: status,
+				},
+				{
+					Type:   string(gwv1.GatewayConditionProgrammed),
+					Status: status,
+				},
+			},
+		},
+	}
+}
+
+// MakeHTTPRoute is a helper function to return an HTTPRoute object
+func makeHTTPRoute(name, namespace string, isReady bool) *gwv1.HTTPRoute {
+	status := metav1.ConditionFalse
+	if isReady {
+		status = metav1.ConditionTrue
+	}
+	return &gwv1.HTTPRoute{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: namespace,
+		},
+		Status: gwv1.HTTPRouteStatus{
+			RouteStatus: gwv1.RouteStatus{
+				Parents: []gwv1.RouteParentStatus{
+					{
+						ParentRef: gwv1.ParentReference{
+							Name:      gwv1.ObjectName("test-rayservice-gateway"),
+							Namespace: ptr.To(gwv1.Namespace(namespace)),
+						},
+						Conditions: []metav1.Condition{
+							{
+								Type:   string(gwv1.RouteConditionAccepted),
+								Status: status,
+							},
+							{
+								Type:   string(gwv1.RouteConditionResolvedRefs),
+								Status: status,
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func TestCheckIfNeedTargetCapacityUpdate(t *testing.T) {
+	rayServiceName := "test-rayservice"
+	gatewayName := fmt.Sprintf("%s-%s", rayServiceName, "gateway")
+	httpRouteName := fmt.Sprintf("%s-%s", rayServiceName, "httproute")
+	namespace := "test-ns"
+
+	tests := []struct {
+		name                string
+		expectedReason      string
+		runtimeObjects      []runtime.Object
+		activeStatus        rayv1.RayServiceStatus
+		pendingStatus       rayv1.RayServiceStatus
+		expectedNeedsUpdate bool
+	}{
+		{
+			name:                "Missing RayClusterNames",
+			expectedNeedsUpdate: false,
+			expectedReason:      "Both active and pending RayCluster instances are required for NewClusterWithIncrementalUpgrade.",
+		},
+		{
+			name:          "Gateway not ready",
+			activeStatus:  rayv1.RayServiceStatus{RayClusterName: "active"},
+			pendingStatus: rayv1.RayServiceStatus{RayClusterName: "pending"},
+			runtimeObjects: []runtime.Object{
+				makeGateway(gatewayName, namespace, false), makeHTTPRoute(httpRouteName, namespace, true),
+			},
+			expectedNeedsUpdate: false,
+			expectedReason:      "Gateway for RayService NewClusterWithIncrementalUpgrade is not ready.",
+		},
+		{
+			name:          "HTTPRoute not ready",
+			activeStatus:  rayv1.RayServiceStatus{RayClusterName: "active"},
+			pendingStatus: rayv1.RayServiceStatus{RayClusterName: "pending"},
+			runtimeObjects: []runtime.Object{
+				makeGateway(gatewayName, namespace, true), makeHTTPRoute(httpRouteName, namespace, false),
+			},
+			expectedNeedsUpdate: false,
+			expectedReason:      "HTTPRoute for RayService NewClusterWithIncrementalUpgrade is not ready.",
+		},
+		{
+			name: "NewClusterWithIncrementalUpgrade is complete",
+			activeStatus: rayv1.RayServiceStatus{
+				RayClusterName:       "active",
+				TargetCapacity:       ptr.To(int32(0)),
+				TrafficRoutedPercent: ptr.To(int32(0)),
+			},
+			pendingStatus: rayv1.RayServiceStatus{
+				RayClusterName:       "pending",
+				TargetCapacity:       ptr.To(int32(100)),
+				TrafficRoutedPercent: ptr.To(int32(100)),
+			},
+			runtimeObjects: []runtime.Object{
+				makeGateway(gatewayName, namespace, true), makeHTTPRoute(httpRouteName, namespace, true),
+			},
+			expectedNeedsUpdate: false,
+			expectedReason:      "All traffic has migrated to the upgraded cluster and NewClusterWithIncrementalUpgrade is complete.",
+		},
+		{
+			name: "Pending RayCluster is still incrementally scaling",
+			activeStatus: rayv1.RayServiceStatus{
+				RayClusterName:       "active",
+				TargetCapacity:       ptr.To(int32(70)),
+				TrafficRoutedPercent: ptr.To(int32(70)),
+			},
+			pendingStatus: rayv1.RayServiceStatus{
+				RayClusterName:       "pending",
+				TargetCapacity:       ptr.To(int32(30)),
+				TrafficRoutedPercent: ptr.To(int32(30)),
+			},
+			runtimeObjects: []runtime.Object{
+				makeGateway(gatewayName, namespace, true), makeHTTPRoute(httpRouteName, namespace, true),
+			},
+			expectedNeedsUpdate: true,
+			expectedReason:      "Pending RayCluster has not finished scaling up.",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			newScheme := runtime.NewScheme()
+			_ = corev1.AddToScheme(newScheme)
+			_ = gwv1.AddToScheme(newScheme)
+			fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(tt.runtimeObjects...).Build()
+			// Initialize RayService reconciler.
+			ctx := context.TODO()
+			r := RayServiceReconciler{
+				Client:   fakeClient,
+				Recorder: &record.FakeRecorder{},
+				Scheme:   scheme.Scheme,
+			}
+			rayService := &rayv1.RayService{
+				ObjectMeta: metav1.ObjectMeta{Name: rayServiceName, Namespace: namespace},
+				Status: rayv1.RayServiceStatuses{
+					ActiveServiceStatus:  tt.activeStatus,
+					PendingServiceStatus: tt.pendingStatus,
+				},
+			}
+			needsUpdate, reason := r.checkIfNeedTargetCapacityUpdate(ctx, rayService)
+			assert.Equal(t, tt.expectedNeedsUpdate, needsUpdate)
+			assert.Equal(t, tt.expectedReason, reason)
+		})
+	}
+}
+
+func TestReconcilePerClusterServeService(t *testing.T) {
+	features.SetFeatureGateDuringTest(t, features.RayServiceIncrementalUpgrade, true)
+
+	ctx := context.TODO()
+	namespace := "test-ns"
+
+	// Minimal RayCluster with at least one container.
+	rayCluster := &rayv1.RayCluster{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-ray-cluster",
+			Namespace: namespace,
+			UID:       "test-uid",
+		},
+		Spec: rayv1.RayClusterSpec{
+			HeadGroupSpec: rayv1.HeadGroupSpec{
+				Template: corev1.PodTemplateSpec{
+					Spec: corev1.PodSpec{
+						Containers: []corev1.Container{
+							{Name: "ray-head"},
+						},
+					},
+				},
+			},
+		},
+	}
+	rayService := makeIncrementalUpgradeRayService(
+		true,
+		"istio",
+		ptr.To(int32(20)),
+		ptr.To(int32(30)),
+		ptr.To(int32(80)),
+		ptr.To(metav1.Now()),
+	)
+
+	// The expected pending RayCluster serve service.
+	expectedServeSvcName := utils.GenerateServeServiceName(rayCluster.Name)
+	expectedServeService := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      expectedServeSvcName,
+			Namespace: namespace,
+		},
+		Spec: corev1.ServiceSpec{
+			Selector: map[string]string{
+				utils.RayClusterLabelKey:               rayCluster.Name,
+				utils.RayClusterServingServiceLabelKey: "true",
+			},
+		},
+	}
+
+	tests := []struct {
+		name                 string
+		rayCluster           *rayv1.RayCluster
+		runtimeObjects       []runtime.Object
+		expectServiceCreated bool
+		expectError          bool
+	}{
+		{
+			name:                 "RayCluster is nil, no-op.",
+			rayCluster:           nil,
+			runtimeObjects:       []runtime.Object{rayService},
+			expectServiceCreated: false,
+			expectError:          false,
+		},
+		{
+			name:                 "Create a new Serve service for the RayCluster.",
+			rayCluster:           rayCluster,
+			runtimeObjects:       []runtime.Object{rayService, rayCluster},
+			expectServiceCreated: true,
+			expectError:          false,
+		},
+		{
+			name:                 "Pending RayCluster serve service already exists, no-op.",
+			rayCluster:           rayCluster,
+			runtimeObjects:       []runtime.Object{rayService, rayCluster, expectedServeService},
+			expectServiceCreated: false,
+			expectError:          false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			newScheme := runtime.NewScheme()
+			_ = rayv1.AddToScheme(newScheme)
+			_ = corev1.AddToScheme(newScheme)
+
+			fakeClient := clientFake.NewClientBuilder().WithScheme(newScheme).WithRuntimeObjects(tt.runtimeObjects...).Build()
+			reconciler := RayServiceReconciler{
+				Client:   fakeClient,
+				Scheme:   newScheme,
+				Recorder: record.NewFakeRecorder(1),
+			}
+
+			err := reconciler.reconcilePerClusterServeService(ctx, rayService, tt.rayCluster)
+
+			if tt.expectError {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+
+			reconciledSvc := &corev1.Service{}
+			err = fakeClient.Get(ctx, client.ObjectKey{Name: expectedServeSvcName, Namespace: namespace}, reconciledSvc)
+
+			// No-op case, no service should be created when RayCluster is nil.
+			if tt.rayCluster == nil {
+				assert.True(t, errors.IsNotFound(err))
+				return
+			}
+
+			// Otherwise, a valid serve service should be created for the RayCluster.
+			require.NoError(t, err, "The Serve service should exist in the client")
+
+			// Validate the expected Serve service exists for the RayCluster.
+			require.NotNil(t, reconciledSvc)
+			assert.Equal(t, expectedServeSvcName, reconciledSvc.Name)
+
+			createdSvc := &corev1.Service{}
+			err = fakeClient.Get(ctx, client.ObjectKey{Name: expectedServeSvcName, Namespace: namespace}, createdSvc)
+			require.NoError(t, err, "The Serve service should exist in the client")
+
+			// Verify the Serve service selector.
+			expectedSelector := map[string]string{
+				utils.RayClusterLabelKey:               rayCluster.Name,
+				utils.RayClusterServingServiceLabelKey: "true",
+			}
+			assert.Equal(t, expectedSelector, createdSvc.Spec.Selector)
+
+			// Validate owner ref is set to the expected RayCluster.
+			if tt.expectServiceCreated {
+				require.Len(t, createdSvc.OwnerReferences, 1)
+				ownerRef := createdSvc.OwnerReferences[0]
+				assert.Equal(t, rayCluster.Name, ownerRef.Name)
+				assert.Equal(t, "RayCluster", ownerRef.Kind)
+				assert.Equal(t, rayCluster.UID, ownerRef.UID)
+			}
+		})
+	}
+}
diff --git a/ray-operator/controllers/ray/utils/consistency.go b/ray-operator/controllers/ray/utils/consistency.go
index 2c2ba0fe616..4d04e9f5e3d 100644
--- a/ray-operator/controllers/ray/utils/consistency.go
+++ b/ray-operator/controllers/ray/utils/consistency.go
@@ -4,6 +4,7 @@ import (
 	"reflect"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
+	"github.com/ray-project/kuberay/ray-operator/pkg/features"
 )
 
 // Checks whether the old and new RayClusterStatus are inconsistent by comparing different fields. If the only
@@ -74,6 +75,15 @@ func inconsistentRayServiceStatus(oldStatus rayv1.RayServiceStatus, newStatus ra
 		}
 	}
 
+	if features.Enabled(features.RayServiceIncrementalUpgrade) {
+		// Also check for changes in IncrementalUpgrade related Status fields.
+		if oldStatus.TrafficRoutedPercent != newStatus.TrafficRoutedPercent ||
+			oldStatus.TargetCapacity != newStatus.TargetCapacity ||
+			oldStatus.LastTrafficMigratedTime != newStatus.LastTrafficMigratedTime {
+			return true
+		}
+	}
+
 	return false
 }
 
diff --git a/ray-operator/controllers/ray/utils/constant.go b/ray-operator/controllers/ray/utils/constant.go
index 20e44e0f888..c4988850137 100644
--- a/ray-operator/controllers/ray/utils/constant.go
+++ b/ray-operator/controllers/ray/utils/constant.go
@@ -87,6 +87,10 @@ const (
 	MetricsPortName   = "metrics"
 	ServingPortName   = "serve"
 
+	// Gateway defaults for HTTP protocol
+	GatewayListenerPortName    = "http"
+	DefaultGatewayListenerPort = 80
+
 	// The default AppProtocol for Kubernetes service
 	DefaultServiceAppProtocol = "tcp"
 
@@ -324,12 +328,22 @@ const (
 	RayClusterNotFound            K8sEventType = "RayClusterNotFound"
 
 	// RayService event list
+	CreatedGateway                  K8sEventType = "CreatedGateway"
+	CreatedHTTPRoute                K8sEventType = "CreatedHTTPRoute"
 	InvalidRayServiceSpec           K8sEventType = "InvalidRayServiceSpec"
 	InvalidRayServiceMetadata       K8sEventType = "InvalidRayServiceMetadata"
 	UpdatedHeadPodServeLabel        K8sEventType = "UpdatedHeadPodServeLabel"
+	UpdatedGateway                  K8sEventType = "UpdatedGateway"
+	UpdatedHTTPRoute                K8sEventType = "UpdatedHTTPRoute"
 	UpdatedServeApplications        K8sEventType = "UpdatedServeApplications"
+	UpdatedServeTargetCapacity      K8sEventType = "UpdatedServeTargetCapacity"
 	FailedToUpdateHeadPodServeLabel K8sEventType = "FailedToUpdateHeadPodServeLabel"
 	FailedToUpdateServeApplications K8sEventType = "FailedToUpdateServeApplications"
+	FailedToUpdateTargetCapacity    K8sEventType = "FailedToUpdateTargetCapacity"
+	FailedToCreateGateway           K8sEventType = "FailedToCreateGateway"
+	FailedToUpdateGateway           K8sEventType = "FailedToUpdateGateway"
+	FailedToCreateHTTPRoute         K8sEventType = "FailedToCreateHTTPRoute"
+	FailedToUpdateHTTPRoute         K8sEventType = "FailedToUpdateHTTPRoute"
 
 	// Generic Pod event list
 	DeletedPod                  K8sEventType = "DeletedPod"
diff --git a/ray-operator/controllers/ray/utils/fake_serve_httpclient.go b/ray-operator/controllers/ray/utils/fake_serve_httpclient.go
index 21a3fdb91be..1bf0588c403 100644
--- a/ray-operator/controllers/ray/utils/fake_serve_httpclient.go
+++ b/ray-operator/controllers/ray/utils/fake_serve_httpclient.go
@@ -12,9 +12,10 @@ import (
 )
 
 type FakeRayDashboardClient struct {
-	multiAppStatuses map[string]*utiltypes.ServeApplicationStatus
-	GetJobInfoMock   atomic.Pointer[func(context.Context, string) (*utiltypes.RayJobInfo, error)]
-	serveDetails     utiltypes.ServeDetails
+	multiAppStatuses  map[string]*utiltypes.ServeApplicationStatus
+	GetJobInfoMock    atomic.Pointer[func(context.Context, string) (*utiltypes.RayJobInfo, error)]
+	serveDetails      utiltypes.ServeDetails
+	LastUpdatedConfig []byte
 }
 
 var _ dashboardclient.RayDashboardClientInterface = (*FakeRayDashboardClient)(nil)
@@ -22,7 +23,8 @@ var _ dashboardclient.RayDashboardClientInterface = (*FakeRayDashboardClient)(ni
 func (r *FakeRayDashboardClient) InitClient(_ *http.Client, _ string) {
 }
 
-func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, _ []byte) error {
+func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, configJson []byte) error {
+	r.LastUpdatedConfig = configJson
 	fmt.Print("UpdateDeployments fake succeeds.")
 	return nil
 }
diff --git a/ray-operator/controllers/ray/utils/util.go b/ray-operator/controllers/ray/utils/util.go
index e7406b44c99..540162bf9f4 100644
--- a/ray-operator/controllers/ray/utils/util.go
+++ b/ray-operator/controllers/ray/utils/util.go
@@ -16,17 +16,21 @@ import (
 
 	batchv1 "k8s.io/api/batch/v1"
 	corev1 "k8s.io/api/core/v1"
+	meta "k8s.io/apimachinery/pkg/api/meta"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/json"
 	"k8s.io/apimachinery/pkg/util/rand"
 	"k8s.io/client-go/discovery"
+	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils/dashboardclient"
+	"github.com/ray-project/kuberay/ray-operator/pkg/features"
 )
 
 const (
@@ -605,10 +609,10 @@ func GenerateJsonHash(obj interface{}) (string, error) {
 // FindContainerPort searches for a specific port $portName in the container.
 // If the port is found in the container, the corresponding port is returned.
 // If the port is not found, the $defaultPort is returned instead.
-func FindContainerPort(container *corev1.Container, portName string, defaultPort int) int {
+func FindContainerPort(container *corev1.Container, portName string, defaultPort int32) int32 {
 	for _, port := range container.Ports {
 		if port.Name == portName {
-			return int(port.ContainerPort)
+			return port.ContainerPort
 		}
 	}
 	return defaultPort
@@ -681,6 +685,115 @@ func GetRayClusterNameFromService(svc *corev1.Service) string {
 	return svc.Spec.Selector[RayClusterLabelKey]
 }
 
+// IsGatewayReady checks if a Gateway is considered "ready".
+//
+// A Gateway is "ready" only if both the `Accepted` and `Programmed` conditions
+// are set to 'True'.
+//
+//  1. 'Accepted': Signifies that the Gateway controller understands and accepts
+//     the Gateway resource. If 'False', it often indicates a conflict or an invalid
+//     specification.
+//
+//  2. 'Programmed': Signifies that the underlying network infrastructure for the Gateway
+//     (e.g. load balancer) has been successfully provisioned and configured.
+func IsGatewayReady(gatewayInstance *gwv1.Gateway) bool {
+	if gatewayInstance == nil {
+		return false
+	}
+
+	hasAccepted := meta.IsStatusConditionTrue(gatewayInstance.Status.Conditions, string(gwv1.GatewayConditionAccepted))
+	hasProgrammed := meta.IsStatusConditionTrue(gatewayInstance.Status.Conditions, string(gwv1.GatewayConditionProgrammed))
+
+	return hasAccepted && hasProgrammed
+}
+
+// IsHTTPRouteReady checks if an HTTPRoute is considered ready for a given Gateway.
+//
+// It returns true only if the route's parent status entry matching the Gateway has both
+// the 'Accepted' and 'ResolvedRefs' conditions set to 'True'.
+//
+//  1. 'Accepted': Signifies that the Gateway controller has validated the HTTPRoute's
+//     configuration (e.g. syntax, filters, matching rules). An 'Accepted' status of
+//     'False' means the route's specification is invalid.
+//
+//  2. 'ResolvedRefs': Signifies that all references within the route are valid, exist,
+//     and are resolvable by the Gateway.
+func IsHTTPRouteReady(gatewayInstance *gwv1.Gateway, httpRouteInstance *gwv1.HTTPRoute) bool {
+	if httpRouteInstance == nil {
+		return false
+	}
+	for _, parent := range httpRouteInstance.Status.Parents {
+		if parent.ParentRef.Name != gwv1.ObjectName(gatewayInstance.Name) {
+			continue
+		}
+		if parent.ParentRef.Namespace != nil && *parent.ParentRef.Namespace != gwv1.Namespace(gatewayInstance.Namespace) {
+			continue
+		}
+		hasAccepted := meta.IsStatusConditionTrue(parent.Conditions, string(gwv1.RouteConditionAccepted))
+		hasResolved := meta.IsStatusConditionTrue(parent.Conditions, string(gwv1.RouteConditionResolvedRefs))
+
+		if hasAccepted && hasResolved {
+			return true
+		}
+	}
+	return false
+}
+
+func IsIncrementalUpgradeEnabled(spec *rayv1.RayServiceSpec) bool {
+	if !features.Enabled(features.RayServiceIncrementalUpgrade) {
+		return false
+	}
+	return spec != nil && spec.UpgradeStrategy != nil &&
+		*spec.UpgradeStrategy.Type == rayv1.NewClusterWithIncrementalUpgrade
+}
+
+func GetRayServiceClusterUpgradeOptions(spec *rayv1.RayServiceSpec) *rayv1.ClusterUpgradeOptions {
+	if spec != nil && spec.UpgradeStrategy != nil {
+		return spec.UpgradeStrategy.ClusterUpgradeOptions
+	}
+	return nil
+}
+
+// IsIncrementalUpgradeComplete checks if the conditions for completing an incremental upgrade are met.
+func IsIncrementalUpgradeComplete(rayServiceInstance *rayv1.RayService, pendingCluster *rayv1.RayCluster) bool {
+	return pendingCluster != nil &&
+		ptr.Deref(rayServiceInstance.Status.ActiveServiceStatus.TargetCapacity, -1) == 0 &&
+		ptr.Deref(rayServiceInstance.Status.PendingServiceStatus.TrafficRoutedPercent, -1) == 100
+}
+
+// GetWeightsFromHTTPRoute parses a given HTTPRoute object and extracts the traffic weights
+// for the active and pending clusters (if present) of a RayService.
+func GetWeightsFromHTTPRoute(httpRoute *gwv1.HTTPRoute, rayServiceInstance *rayv1.RayService) (activeWeight int32, pendingWeight int32) {
+	var activeClusterName, pendingClusterName string
+	if rayServiceInstance != nil {
+		activeClusterName = rayServiceInstance.Status.ActiveServiceStatus.RayClusterName
+		pendingClusterName = rayServiceInstance.Status.PendingServiceStatus.RayClusterName
+	}
+
+	// Defaults if weights can't be detected. This is so that we avoid setting TrafficRoutedPercent
+	// before the HTTPRoute actually exists.
+	activeWeight = -1
+	pendingWeight = -1
+
+	if httpRoute == nil || len(httpRoute.Spec.Rules) == 0 || len(httpRoute.Spec.Rules[0].BackendRefs) == 0 {
+		return
+	}
+
+	for _, backendRef := range httpRoute.Spec.Rules[0].BackendRefs {
+		backendName := string(backendRef.Name)
+		weight := ptr.Deref(backendRef.Weight, 0)
+
+		if activeClusterName != "" && strings.Contains(backendName, activeClusterName) {
+			activeWeight = weight
+		}
+		if pendingClusterName != "" && strings.Contains(backendName, pendingClusterName) {
+			pendingWeight = weight
+		}
+	}
+
+	return
+}
+
 // Check where we are running. We are trying to distinguish here whether
 // this is vanilla kubernetes cluster or Openshift
 func GetClusterType() bool {
diff --git a/ray-operator/controllers/ray/utils/util_test.go b/ray-operator/controllers/ray/utils/util_test.go
index 851e37af3ea..8bd37a2e7f8 100644
--- a/ray-operator/controllers/ray/utils/util_test.go
+++ b/ray-operator/controllers/ray/utils/util_test.go
@@ -12,9 +12,11 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/utils/ptr"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils/dashboardclient"
+	"github.com/ray-project/kuberay/ray-operator/pkg/features"
 )
 
 func TestGetClusterDomainName(t *testing.T) {
@@ -486,11 +488,11 @@ func TestFindContainerPort(t *testing.T) {
 		},
 	}
 	port := FindContainerPort(&container, "port1", -1)
-	assert.NotEqual(t, port, -1, "expect port1 found")
+	assert.NotEqual(t, port, int32(-1), "expect port1 found")
 	port = FindContainerPort(&container, "port2", -1)
-	assert.NotEqual(t, port, -1, "expect port2 found")
+	assert.NotEqual(t, port, int32(-1), "expect port2 found")
 	port = FindContainerPort(&container, "port3", -1)
-	assert.Equal(t, port, -1, "expect port3 not found")
+	assert.Equal(t, port, int32(-1), "expect port3 not found")
 }
 
 func TestGenerateHeadServiceName(t *testing.T) {
@@ -1248,6 +1250,235 @@ func TestCalculateResources(t *testing.T) {
 	}
 }
 
+// helper function to return a Gateway object with GatewayStatus Conditions for testing.
+func makeGatewayWithCondition(accepted bool, programmed bool) *gwv1.Gateway {
+	var conditions []metav1.Condition
+
+	if accepted {
+		conditions = append(conditions, metav1.Condition{
+			Type:   string(gwv1.GatewayConditionAccepted),
+			Status: metav1.ConditionTrue,
+		})
+	}
+
+	if programmed {
+		conditions = append(conditions, metav1.Condition{
+			Type:   string(gwv1.GatewayConditionProgrammed),
+			Status: metav1.ConditionTrue,
+		})
+	}
+
+	return &gwv1.Gateway{
+		Status: gwv1.GatewayStatus{
+			Conditions: conditions,
+		},
+	}
+}
+
+func TestIsGatewayReady(t *testing.T) {
+	tests := []struct {
+		gateway  *gwv1.Gateway
+		name     string
+		expected bool
+	}{
+		{
+			name:     "missing Gateway instance",
+			gateway:  nil,
+			expected: false,
+		},
+		{
+			name:     "Gateway created with Programmed condition only",
+			gateway:  makeGatewayWithCondition(false, true),
+			expected: false,
+		},
+		{
+			name:     "Gateway created with Accepted condition only",
+			gateway:  makeGatewayWithCondition(true, false),
+			expected: false,
+		},
+		{
+			name:     "Gateway created with both Accepted and Programmed conditions",
+			gateway:  makeGatewayWithCondition(true, true),
+			expected: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expected, IsGatewayReady(tt.gateway))
+		})
+	}
+}
+
+// helper function to return a HTTPRoute with HTTPRouteStatus for testing
+func makeHTTPRouteWithParentRef(
+	parentRefName string,
+	namespace string,
+	accepted bool,
+	resolvedRefs bool,
+) *gwv1.HTTPRoute {
+	var acceptedStatus, resolvedRefsStatus metav1.ConditionStatus
+	if accepted {
+		acceptedStatus = metav1.ConditionTrue
+	} else {
+		acceptedStatus = metav1.ConditionFalse
+	}
+	if resolvedRefs {
+		resolvedRefsStatus = metav1.ConditionTrue
+	} else {
+		resolvedRefsStatus = metav1.ConditionFalse
+	}
+
+	return &gwv1.HTTPRoute{
+		Status: gwv1.HTTPRouteStatus{
+			RouteStatus: gwv1.RouteStatus{
+				Parents: []gwv1.RouteParentStatus{
+					{
+						ParentRef: gwv1.ParentReference{
+							Name:      gwv1.ObjectName(parentRefName),
+							Namespace: ptr.To(gwv1.Namespace(namespace)),
+						},
+						Conditions: []metav1.Condition{
+							{
+								Type:   string(gwv1.RouteConditionAccepted),
+								Status: acceptedStatus,
+							},
+							{
+								Type:   string(gwv1.RouteConditionResolvedRefs),
+								Status: resolvedRefsStatus,
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func TestIsHTTPRouteReady(t *testing.T) {
+	gateway := &gwv1.Gateway{
+		ObjectMeta: metav1.ObjectMeta{Name: "test-gateway", Namespace: "test-ns"},
+	}
+
+	tests := []struct {
+		httpRoute *gwv1.HTTPRoute
+		name      string
+		expected  bool
+	}{
+		{
+			name:      "missing HTTPRoute",
+			httpRoute: nil,
+			expected:  false,
+		},
+		{
+			name:      "ParentRef does not match",
+			httpRoute: makeHTTPRouteWithParentRef("not-a-match", "other-test-ns", true, true),
+			expected:  false,
+		},
+		{
+			name:      "matching ParentRef with Accepted condition but without ResolvedRefs",
+			httpRoute: makeHTTPRouteWithParentRef("test-gateway", "test-ns", true, false),
+			expected:  false,
+		},
+		{
+			name:      "matching ParentRef with ResolvedRefs but without Accepted",
+			httpRoute: makeHTTPRouteWithParentRef("test-gateway", "test-ns", false, true),
+			expected:  false,
+		},
+		{
+			name:      "ready HTTPRoute with all required conditions",
+			httpRoute: makeHTTPRouteWithParentRef("test-gateway", "test-ns", true, true),
+			expected:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expected, IsHTTPRouteReady(gateway, tt.httpRoute))
+		})
+	}
+}
+
+func TestIsIncrementalUpgradeEnabled(t *testing.T) {
+	tests := []struct {
+		spec           *rayv1.RayServiceSpec
+		name           string
+		featureEnabled bool
+		expected       bool
+	}{
+		{
+			name:           "missing UpgradeStrategy Type",
+			spec:           &rayv1.RayServiceSpec{},
+			featureEnabled: true,
+			expected:       false,
+		},
+		{
+			name: "UpgradeStrategy Type is NewClusterWithIncrementalUpgrade but feature disabled",
+			spec: &rayv1.RayServiceSpec{
+				UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{
+					Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+				},
+			},
+			featureEnabled: false,
+			expected:       false,
+		},
+		{
+			name: "UpgradeStrategy Type is NewClusterWithIncrementalUpgrade and feature enabled",
+			spec: &rayv1.RayServiceSpec{
+				UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{
+					Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+				},
+			},
+			featureEnabled: true,
+			expected:       true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			features.SetFeatureGateDuringTest(t, features.RayServiceIncrementalUpgrade, tc.featureEnabled)
+			assert.Equal(t, tc.expected, IsIncrementalUpgradeEnabled(tc.spec))
+		})
+	}
+}
+
+func TestGetRayServiceClusterUpgradeOptions(t *testing.T) {
+	upgradeOptions := &rayv1.ClusterUpgradeOptions{GatewayClassName: "gateway-class"}
+
+	tests := []struct {
+		rayServiceSpec  *rayv1.RayServiceSpec
+		expectedOptions *rayv1.ClusterUpgradeOptions
+		name            string
+	}{
+		{
+			name:            "RayServiceSpec is nil, return nil ClusterUpgradeOptions",
+			rayServiceSpec:  nil,
+			expectedOptions: nil,
+		},
+		{
+			name:            "UpgradeStrategy is nil, return nil ClusterUpgradeOptions",
+			rayServiceSpec:  &rayv1.RayServiceSpec{},
+			expectedOptions: nil,
+		},
+		{
+			name: "Valid ClusterUpgradeOptions",
+			rayServiceSpec: &rayv1.RayServiceSpec{
+				UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{
+					ClusterUpgradeOptions: upgradeOptions,
+				},
+			},
+			expectedOptions: upgradeOptions,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actualOptions := GetRayServiceClusterUpgradeOptions(tt.rayServiceSpec)
+			assert.Equal(t, tt.expectedOptions, actualOptions)
+		})
+	}
+}
+
 func TestGetContainerCommand(t *testing.T) {
 	tests := []struct {
 		name              string
@@ -1291,3 +1522,96 @@ func TestGetContainerCommand(t *testing.T) {
 		})
 	}
 }
+
+func TestGetWeightsFromHTTPRoute(t *testing.T) {
+	activeClusterName := "rayservice-active"
+	pendingClusterName := "rayservice-pending"
+
+	// Helper to create a RayService with specified cluster names in its status.
+	makeRayService := func(activeName, pendingName string) *rayv1.RayService {
+		return &rayv1.RayService{
+			Status: rayv1.RayServiceStatuses{
+				ActiveServiceStatus:  rayv1.RayServiceStatus{RayClusterName: activeName},
+				PendingServiceStatus: rayv1.RayServiceStatus{RayClusterName: pendingName},
+			},
+		}
+	}
+
+	// Helper to create an HTTPRoute with specified backend weights.
+	makeHTTPRoute := func(activeWeight, pendingWeight *int32) *gwv1.HTTPRoute {
+		backends := []gwv1.HTTPBackendRef{}
+		if activeWeight != nil {
+			backends = append(backends, gwv1.HTTPBackendRef{
+				BackendRef: gwv1.BackendRef{
+					BackendObjectReference: gwv1.BackendObjectReference{Name: gwv1.ObjectName(GenerateServeServiceName(activeClusterName))},
+					Weight:                 activeWeight,
+				},
+			})
+		}
+		if pendingWeight != nil {
+			backends = append(backends, gwv1.HTTPBackendRef{
+				BackendRef: gwv1.BackendRef{
+					BackendObjectReference: gwv1.BackendObjectReference{Name: gwv1.ObjectName(GenerateServeServiceName(pendingClusterName))},
+					Weight:                 pendingWeight,
+				},
+			})
+		}
+		return &gwv1.HTTPRoute{
+			Spec: gwv1.HTTPRouteSpec{
+				Rules: []gwv1.HTTPRouteRule{{BackendRefs: backends}},
+			},
+		}
+	}
+
+	tests := []struct {
+		httpRoute       *gwv1.HTTPRoute
+		rayService      *rayv1.RayService
+		name            string
+		expectedActive  int32
+		expectedPending int32
+	}{
+		{
+			name:            "No HTTPRoute, return defaults for both weights",
+			httpRoute:       nil,
+			rayService:      makeRayService(activeClusterName, ""),
+			expectedActive:  -1,
+			expectedPending: -1,
+		},
+		{
+			name:            "HTTPRoute with missing backends, return defaults for both weights",
+			httpRoute:       &gwv1.HTTPRoute{Spec: gwv1.HTTPRouteSpec{Rules: []gwv1.HTTPRouteRule{{}}}},
+			rayService:      makeRayService(activeClusterName, pendingClusterName),
+			expectedActive:  -1,
+			expectedPending: -1,
+		},
+		{
+			name:            "Valid weights returned for both active and pending clusters",
+			httpRoute:       makeHTTPRoute(ptr.To(int32(80)), ptr.To(int32(20))),
+			rayService:      makeRayService(activeClusterName, pendingClusterName),
+			expectedActive:  80,
+			expectedPending: 20,
+		},
+		{
+			name:            "Valid HTTPRoute with only active cluster backend",
+			httpRoute:       makeHTTPRoute(ptr.To(int32(100)), nil),
+			rayService:      makeRayService(activeClusterName, ""),
+			expectedActive:  100,
+			expectedPending: -1,
+		},
+		{
+			name:            "Valid HTTPRoute with only pending cluster backend",
+			httpRoute:       makeHTTPRoute(nil, ptr.To(int32(100))),
+			rayService:      makeRayService("", pendingClusterName),
+			expectedActive:  -1,
+			expectedPending: 100,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			active, pending := GetWeightsFromHTTPRoute(tt.httpRoute, tt.rayService)
+			assert.Equal(t, tt.expectedActive, active, "Active weight mismatch")
+			assert.Equal(t, tt.expectedPending, pending, "Pending weight mismatch")
+		})
+	}
+}
diff --git a/ray-operator/controllers/ray/utils/validation.go b/ray-operator/controllers/ray/utils/validation.go
index 7e9097dd846..e03bbd319b8 100644
--- a/ray-operator/controllers/ray/utils/validation.go
+++ b/ray-operator/controllers/ray/utils/validation.go
@@ -306,12 +306,13 @@ func ValidateRayServiceSpec(rayService *rayv1.RayService) error {
 		return fmt.Errorf("spec.rayClusterConfig.headGroupSpec.headService.metadata.name should not be set")
 	}
 
-	// only NewCluster and None are valid upgradeType
+	// only NewClusterWithIncrementalUpgrade, NewCluster, and None are valid upgradeType
 	if rayService.Spec.UpgradeStrategy != nil &&
 		rayService.Spec.UpgradeStrategy.Type != nil &&
 		*rayService.Spec.UpgradeStrategy.Type != rayv1.None &&
-		*rayService.Spec.UpgradeStrategy.Type != rayv1.NewCluster {
-		return fmt.Errorf("Spec.UpgradeStrategy.Type value %s is invalid, valid options are %s or %s", *rayService.Spec.UpgradeStrategy.Type, rayv1.NewCluster, rayv1.None)
+		*rayService.Spec.UpgradeStrategy.Type != rayv1.NewCluster &&
+		*rayService.Spec.UpgradeStrategy.Type != rayv1.NewClusterWithIncrementalUpgrade {
+		return fmt.Errorf("Spec.UpgradeStrategy.Type value %s is invalid, valid options are %s, %s, or %s", *rayService.Spec.UpgradeStrategy.Type, rayv1.NewClusterWithIncrementalUpgrade, rayv1.NewCluster, rayv1.None)
 	}
 
 	if rayService.Spec.RayClusterDeletionDelaySeconds != nil &&
@@ -319,6 +320,41 @@ func ValidateRayServiceSpec(rayService *rayv1.RayService) error {
 		return fmt.Errorf("Spec.RayClusterDeletionDelaySeconds should be a non-negative integer, got %d", *rayService.Spec.RayClusterDeletionDelaySeconds)
 	}
 
+	// If type is NewClusterWithIncrementalUpgrade, validate the ClusterUpgradeOptions
+	if IsIncrementalUpgradeEnabled(&rayService.Spec) {
+		return ValidateClusterUpgradeOptions(rayService)
+	}
+
+	return nil
+}
+
+func ValidateClusterUpgradeOptions(rayService *rayv1.RayService) error {
+	if !IsAutoscalingEnabled(&rayService.Spec.RayClusterSpec) {
+		return fmt.Errorf("Ray Autoscaler is required for NewClusterWithIncrementalUpgrade")
+	}
+
+	options := rayService.Spec.UpgradeStrategy.ClusterUpgradeOptions
+	if options == nil {
+		return fmt.Errorf("ClusterUpgradeOptions are required for NewClusterWithIncrementalUpgrade")
+	}
+
+	// MaxSurgePercent defaults to 100% if unset.
+	if *options.MaxSurgePercent < 0 || *options.MaxSurgePercent > 100 {
+		return fmt.Errorf("maxSurgePercent must be between 0 and 100")
+	}
+
+	if options.StepSizePercent == nil || *options.StepSizePercent < 0 || *options.StepSizePercent > 100 {
+		return fmt.Errorf("stepSizePercent must be between 0 and 100")
+	}
+
+	if options.IntervalSeconds == nil || *options.IntervalSeconds <= 0 {
+		return fmt.Errorf("intervalSeconds must be greater than 0")
+	}
+
+	if options.GatewayClassName == "" {
+		return fmt.Errorf("gatewayClassName is required for NewClusterWithIncrementalUpgrade")
+	}
+
 	return nil
 }
 
diff --git a/ray-operator/controllers/ray/utils/validation_test.go b/ray-operator/controllers/ray/utils/validation_test.go
index dbee9e612e7..2f3f7a64502 100644
--- a/ray-operator/controllers/ray/utils/validation_test.go
+++ b/ray-operator/controllers/ray/utils/validation_test.go
@@ -1664,3 +1664,112 @@ func createBasicRayClusterSpec() *rayv1.RayClusterSpec {
 		},
 	}
 }
+
+func TestValidateClusterUpgradeOptions(t *testing.T) {
+	tests := []struct {
+		maxSurgePercent   *int32
+		stepSizePercent   *int32
+		intervalSeconds   *int32
+		name              string
+		gatewayClassName  string
+		spec              rayv1.RayServiceSpec
+		enableAutoscaling bool
+		expectError       bool
+	}{
+		{
+			name:              "valid config",
+			maxSurgePercent:   ptr.To(int32(50)),
+			stepSizePercent:   ptr.To(int32(50)),
+			intervalSeconds:   ptr.To(int32(10)),
+			gatewayClassName:  "istio",
+			enableAutoscaling: true,
+			expectError:       false,
+		},
+		{
+			name:              "missing autoscaler",
+			maxSurgePercent:   ptr.To(int32(50)),
+			stepSizePercent:   ptr.To(int32(50)),
+			intervalSeconds:   ptr.To(int32(10)),
+			gatewayClassName:  "istio",
+			enableAutoscaling: false,
+			expectError:       true,
+		},
+		{
+			name:              "missing options",
+			enableAutoscaling: true,
+			expectError:       true,
+		},
+		{
+			name:              "invalid MaxSurgePercent",
+			maxSurgePercent:   ptr.To(int32(200)),
+			stepSizePercent:   ptr.To(int32(50)),
+			intervalSeconds:   ptr.To(int32(10)),
+			gatewayClassName:  "istio",
+			enableAutoscaling: true,
+			expectError:       true,
+		},
+		{
+			name:              "missing StepSizePercent",
+			maxSurgePercent:   ptr.To(int32(50)),
+			intervalSeconds:   ptr.To(int32(10)),
+			gatewayClassName:  "istio",
+			enableAutoscaling: true,
+			expectError:       true,
+		},
+		{
+			name:              "invalid IntervalSeconds",
+			maxSurgePercent:   ptr.To(int32(50)),
+			stepSizePercent:   ptr.To(int32(50)),
+			intervalSeconds:   ptr.To(int32(0)),
+			gatewayClassName:  "istio",
+			enableAutoscaling: true,
+			expectError:       true,
+		},
+		{
+			name:              "missing GatewayClassName",
+			maxSurgePercent:   ptr.To(int32(50)),
+			stepSizePercent:   ptr.To(int32(50)),
+			intervalSeconds:   ptr.To(int32(10)),
+			enableAutoscaling: true,
+			expectError:       true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var upgradeStrategy *rayv1.RayServiceUpgradeStrategy
+			if tt.maxSurgePercent != nil || tt.stepSizePercent != nil || tt.intervalSeconds != nil || tt.gatewayClassName != "" {
+				upgradeStrategy = &rayv1.RayServiceUpgradeStrategy{
+					Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+					ClusterUpgradeOptions: &rayv1.ClusterUpgradeOptions{
+						MaxSurgePercent:  tt.maxSurgePercent,
+						StepSizePercent:  tt.stepSizePercent,
+						IntervalSeconds:  tt.intervalSeconds,
+						GatewayClassName: tt.gatewayClassName,
+					},
+				}
+			} else if tt.expectError {
+				upgradeStrategy = &rayv1.RayServiceUpgradeStrategy{
+					Type: ptr.To(rayv1.NewClusterWithIncrementalUpgrade),
+				}
+			}
+
+			rayClusterSpec := *createBasicRayClusterSpec()
+			rayClusterSpec.EnableInTreeAutoscaling = ptr.To(tt.enableAutoscaling)
+
+			rayService := &rayv1.RayService{
+				Spec: rayv1.RayServiceSpec{
+					RayClusterSpec:  rayClusterSpec,
+					UpgradeStrategy: upgradeStrategy,
+				},
+			}
+
+			err := ValidateClusterUpgradeOptions(rayService)
+			if tt.expectError {
+				require.Error(t, err, tt.name)
+			} else {
+				require.NoError(t, err, tt.name)
+			}
+		})
+	}
+}
diff --git a/ray-operator/go.mod b/ray-operator/go.mod
index 94d155da29f..78f3870ae24 100644
--- a/ray-operator/go.mod
+++ b/ray-operator/go.mod
@@ -4,22 +4,21 @@ go 1.24.0
 
 require (
 	github.com/Masterminds/semver/v3 v3.3.1
+	github.com/coder/websocket v1.8.13
 	github.com/go-logr/logr v1.4.3
 	github.com/go-logr/zapr v1.3.0
-	github.com/google/go-cmp v0.7.0
 	github.com/jarcoal/httpmock v1.4.0
 	github.com/onsi/ginkgo/v2 v2.23.4
 	github.com/onsi/gomega v1.37.0
 	github.com/openshift/api v0.0.0-20250602203052-b29811a290c7
 	github.com/orcaman/concurrent-map/v2 v2.0.1
-	github.com/pkg/errors v0.9.1
 	github.com/prometheus/client_golang v1.22.0
+	github.com/spf13/pflag v1.0.6
 	github.com/stretchr/testify v1.10.0
 	go.uber.org/mock v0.5.2
 	go.uber.org/zap v1.27.0
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1
 	k8s.io/api v0.33.1
-	k8s.io/apiextensions-apiserver v0.33.1
 	k8s.io/apimachinery v0.33.1
 	k8s.io/apiserver v0.33.1
 	k8s.io/client-go v0.33.1
@@ -28,6 +27,7 @@ require (
 	k8s.io/klog/v2 v2.130.1
 	k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979
 	sigs.k8s.io/controller-runtime v0.21.0
+	sigs.k8s.io/gateway-api v1.3.0
 	sigs.k8s.io/scheduler-plugins v0.31.8
 	sigs.k8s.io/structured-merge-diff/v4 v4.7.0
 	sigs.k8s.io/yaml v1.4.0
@@ -38,19 +38,19 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/coder/websocket v1.8.13 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
-	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
+	github.com/emicklei/go-restful/v3 v3.12.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/fsnotify/fsnotify v1.7.0 // indirect
 	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
-	github.com/go-openapi/jsonreference v0.20.2 // indirect
+	github.com/go-openapi/jsonreference v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/google/btree v1.1.3 // indirect
 	github.com/google/gnostic-models v0.6.9 // indirect
+	github.com/google/go-cmp v0.7.0 // indirect
 	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
@@ -62,11 +62,11 @@ require (
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
+	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/client_model v0.6.1 // indirect
 	github.com/prometheus/common v0.62.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
-	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/stretchr/objx v0.5.2 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.opentelemetry.io/otel v1.33.0 // indirect
@@ -74,19 +74,20 @@ require (
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	golang.org/x/mod v0.24.0 // indirect
-	golang.org/x/net v0.38.0 // indirect
+	golang.org/x/net v0.39.0 // indirect
 	golang.org/x/oauth2 v0.27.0 // indirect
-	golang.org/x/sync v0.12.0 // indirect
+	golang.org/x/sync v0.13.0 // indirect
 	golang.org/x/sys v0.32.0 // indirect
-	golang.org/x/term v0.30.0 // indirect
-	golang.org/x/text v0.23.0 // indirect
+	golang.org/x/term v0.31.0 // indirect
+	golang.org/x/text v0.24.0 // indirect
 	golang.org/x/time v0.9.0 // indirect
 	golang.org/x/tools v0.31.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
-	google.golang.org/protobuf v1.36.5 // indirect
+	google.golang.org/protobuf v1.36.6 // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/apiextensions-apiserver v0.33.1 // indirect
 	k8s.io/gengo/v2 v2.0.0-20250207200755-1244d31929d7 // indirect
 	k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
 	sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
diff --git a/ray-operator/go.sum b/ray-operator/go.sum
index 6d6e0b27493..2d1825ab836 100644
--- a/ray-operator/go.sum
+++ b/ray-operator/go.sum
@@ -10,13 +10,12 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/coder/websocket v1.8.13 h1:f3QZdXy7uGVz+4uCJy2nTZyM0yTBj8yANEHhqlXZ9FE=
 github.com/coder/websocket v1.8.13/go.mod h1:LNVeNrXQZfe5qhS9ALED3uA+l5pPqvwXg3CKoDBB2gs=
-github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
-github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/emicklei/go-restful/v3 v3.12.0 h1:y2DdzBAURM29NFF94q6RaY4vjIH1rtwDapwQtU84iWk=
+github.com/emicklei/go-restful/v3 v3.12.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
 github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -29,12 +28,10 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
-github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
 github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
 github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
-github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
-github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
-github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
+github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
 github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
 github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
@@ -67,11 +64,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
@@ -116,17 +110,12 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg
 github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
 github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
-github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
-github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
 github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
@@ -158,26 +147,26 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
-golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
+golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
 golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
 golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
-golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
+golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
 golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
-golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o=
+golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
-golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
+golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
 golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
 golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -192,8 +181,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
 gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
-google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
-google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
+google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -203,7 +192,6 @@ gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
 gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 k8s.io/api v0.33.1 h1:tA6Cf3bHnLIrUK4IqEgb2v++/GYUtqiu9sRVk3iBXyw=
@@ -230,6 +218,8 @@ k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97
 k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
 sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
+sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
+sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
 sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
diff --git a/ray-operator/main.go b/ray-operator/main.go
index 5666a438733..ceba7d4772e 100644
--- a/ray-operator/main.go
+++ b/ray-operator/main.go
@@ -27,6 +27,7 @@ import (
 	k8szap "sigs.k8s.io/controller-runtime/pkg/log/zap"
 	ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	configapi "github.com/ray-project/kuberay/ray-operator/apis/config/v1alpha1"
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
@@ -191,6 +192,10 @@ func main() {
 	}
 	features.LogFeatureGates(setupLog)
 
+	if features.Enabled(features.RayServiceIncrementalUpgrade) {
+		utilruntime.Must(gwv1.AddToScheme(scheme))
+	}
+
 	// Manager options
 	options := ctrl.Options{
 		Cache: cache.Options{
diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/clusterupgradeoptions.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/clusterupgradeoptions.go
new file mode 100644
index 00000000000..1e43d339716
--- /dev/null
+++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/clusterupgradeoptions.go
@@ -0,0 +1,50 @@
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1
+
+// ClusterUpgradeOptionsApplyConfiguration represents a declarative configuration of the ClusterUpgradeOptions type for use
+// with apply.
+type ClusterUpgradeOptionsApplyConfiguration struct {
+	MaxSurgePercent  *int32  `json:"maxSurgePercent,omitempty"`
+	StepSizePercent  *int32  `json:"stepSizePercent,omitempty"`
+	IntervalSeconds  *int32  `json:"intervalSeconds,omitempty"`
+	GatewayClassName *string `json:"gatewayClassName,omitempty"`
+}
+
+// ClusterUpgradeOptionsApplyConfiguration constructs a declarative configuration of the ClusterUpgradeOptions type for use with
+// apply.
+func ClusterUpgradeOptions() *ClusterUpgradeOptionsApplyConfiguration {
+	return &ClusterUpgradeOptionsApplyConfiguration{}
+}
+
+// WithMaxSurgePercent sets the MaxSurgePercent field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the MaxSurgePercent field is set to the value of the last call.
+func (b *ClusterUpgradeOptionsApplyConfiguration) WithMaxSurgePercent(value int32) *ClusterUpgradeOptionsApplyConfiguration {
+	b.MaxSurgePercent = &value
+	return b
+}
+
+// WithStepSizePercent sets the StepSizePercent field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the StepSizePercent field is set to the value of the last call.
+func (b *ClusterUpgradeOptionsApplyConfiguration) WithStepSizePercent(value int32) *ClusterUpgradeOptionsApplyConfiguration {
+	b.StepSizePercent = &value
+	return b
+}
+
+// WithIntervalSeconds sets the IntervalSeconds field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the IntervalSeconds field is set to the value of the last call.
+func (b *ClusterUpgradeOptionsApplyConfiguration) WithIntervalSeconds(value int32) *ClusterUpgradeOptionsApplyConfiguration {
+	b.IntervalSeconds = &value
+	return b
+}
+
+// WithGatewayClassName sets the GatewayClassName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the GatewayClassName field is set to the value of the last call.
+func (b *ClusterUpgradeOptionsApplyConfiguration) WithGatewayClassName(value string) *ClusterUpgradeOptionsApplyConfiguration {
+	b.GatewayClassName = &value
+	return b
+}
diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatus.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatus.go
index b0fcd8032bb..2d7f2984cef 100644
--- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatus.go
+++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicestatus.go
@@ -2,12 +2,19 @@
 
 package v1
 
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
 // RayServiceStatusApplyConfiguration represents a declarative configuration of the RayServiceStatus type for use
 // with apply.
 type RayServiceStatusApplyConfiguration struct {
-	Applications     map[string]AppStatusApplyConfiguration `json:"applicationStatuses,omitempty"`
-	RayClusterName   *string                                `json:"rayClusterName,omitempty"`
-	RayClusterStatus *RayClusterStatusApplyConfiguration    `json:"rayClusterStatus,omitempty"`
+	Applications            map[string]AppStatusApplyConfiguration `json:"applicationStatuses,omitempty"`
+	TargetCapacity          *int32                                 `json:"targetCapacity,omitempty"`
+	TrafficRoutedPercent    *int32                                 `json:"trafficRoutedPercent,omitempty"`
+	LastTrafficMigratedTime *metav1.Time                           `json:"lastTrafficMigratedTime,omitempty"`
+	RayClusterName          *string                                `json:"rayClusterName,omitempty"`
+	RayClusterStatus        *RayClusterStatusApplyConfiguration    `json:"rayClusterStatus,omitempty"`
 }
 
 // RayServiceStatusApplyConfiguration constructs a declarative configuration of the RayServiceStatus type for use with
@@ -30,6 +37,30 @@ func (b *RayServiceStatusApplyConfiguration) WithApplications(entries map[string
 	return b
 }
 
+// WithTargetCapacity sets the TargetCapacity field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the TargetCapacity field is set to the value of the last call.
+func (b *RayServiceStatusApplyConfiguration) WithTargetCapacity(value int32) *RayServiceStatusApplyConfiguration {
+	b.TargetCapacity = &value
+	return b
+}
+
+// WithTrafficRoutedPercent sets the TrafficRoutedPercent field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the TrafficRoutedPercent field is set to the value of the last call.
+func (b *RayServiceStatusApplyConfiguration) WithTrafficRoutedPercent(value int32) *RayServiceStatusApplyConfiguration {
+	b.TrafficRoutedPercent = &value
+	return b
+}
+
+// WithLastTrafficMigratedTime sets the LastTrafficMigratedTime field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the LastTrafficMigratedTime field is set to the value of the last call.
+func (b *RayServiceStatusApplyConfiguration) WithLastTrafficMigratedTime(value metav1.Time) *RayServiceStatusApplyConfiguration {
+	b.LastTrafficMigratedTime = &value
+	return b
+}
+
 // WithRayClusterName sets the RayClusterName field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the RayClusterName field is set to the value of the last call.
diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go
index 361a98f6ac9..c8cfc02aed6 100644
--- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go
+++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go
@@ -9,7 +9,8 @@ import (
 // RayServiceUpgradeStrategyApplyConfiguration represents a declarative configuration of the RayServiceUpgradeStrategy type for use
 // with apply.
 type RayServiceUpgradeStrategyApplyConfiguration struct {
-	Type *rayv1.RayServiceUpgradeType `json:"type,omitempty"`
+	Type                  *rayv1.RayServiceUpgradeType             `json:"type,omitempty"`
+	ClusterUpgradeOptions *ClusterUpgradeOptionsApplyConfiguration `json:"clusterUpgradeOptions,omitempty"`
 }
 
 // RayServiceUpgradeStrategyApplyConfiguration constructs a declarative configuration of the RayServiceUpgradeStrategy type for use with
@@ -25,3 +26,11 @@ func (b *RayServiceUpgradeStrategyApplyConfiguration) WithType(value rayv1.RaySe
 	b.Type = &value
 	return b
 }
+
+// WithClusterUpgradeOptions sets the ClusterUpgradeOptions field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ClusterUpgradeOptions field is set to the value of the last call.
+func (b *RayServiceUpgradeStrategyApplyConfiguration) WithClusterUpgradeOptions(value *ClusterUpgradeOptionsApplyConfiguration) *RayServiceUpgradeStrategyApplyConfiguration {
+	b.ClusterUpgradeOptions = value
+	return b
+}
diff --git a/ray-operator/pkg/client/applyconfiguration/utils.go b/ray-operator/pkg/client/applyconfiguration/utils.go
index 050733b0c5e..feecbde7f06 100644
--- a/ray-operator/pkg/client/applyconfiguration/utils.go
+++ b/ray-operator/pkg/client/applyconfiguration/utils.go
@@ -20,6 +20,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} {
 		return &rayv1.AppStatusApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("AutoscalerOptions"):
 		return &rayv1.AutoscalerOptionsApplyConfiguration{}
+	case v1.SchemeGroupVersion.WithKind("ClusterUpgradeOptions"):
+		return &rayv1.ClusterUpgradeOptionsApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("DeletionCondition"):
 		return &rayv1.DeletionConditionApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("DeletionPolicy"):
diff --git a/ray-operator/pkg/features/features.go b/ray-operator/pkg/features/features.go
index 5aedc155c81..16b23ab83ac 100644
--- a/ray-operator/pkg/features/features.go
+++ b/ray-operator/pkg/features/features.go
@@ -30,6 +30,13 @@ const (
 	// alpha: v1.0
 	// Enables multi-host worker indexing
 	RayMultiHostIndexing featuregate.Feature = "RayMultiHostIndexing"
+
+	// owner: @ryanaoleary
+	// rep: N/A
+	// alpha: v1.0
+	//
+	// Enabled NewClusterWithIncrementalUpgrade type for RayService zero-downtime upgrades.
+	RayServiceIncrementalUpgrade featuregate.Feature = "RayServiceIncrementalUpgrade"
 )
 
 func init() {
@@ -37,9 +44,10 @@ func init() {
 }
 
 var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
-	RayClusterStatusConditions: {Default: true, PreRelease: featuregate.Beta},
-	RayJobDeletionPolicy:       {Default: false, PreRelease: featuregate.Alpha},
-	RayMultiHostIndexing:       {Default: false, PreRelease: featuregate.Alpha},
+	RayClusterStatusConditions:   {Default: true, PreRelease: featuregate.Beta},
+	RayJobDeletionPolicy:         {Default: false, PreRelease: featuregate.Alpha},
+	RayMultiHostIndexing:         {Default: false, PreRelease: featuregate.Alpha},
+	RayServiceIncrementalUpgrade: {Default: false, PreRelease: featuregate.Alpha},
 }
 
 // SetFeatureGateDuringTest is a helper method to override feature gates in tests.
diff --git a/ray-operator/test/e2eincrementalupgrade/rayservice_incremental_upgrade_test.go b/ray-operator/test/e2eincrementalupgrade/rayservice_incremental_upgrade_test.go
new file mode 100644
index 00000000000..e9290bedda3
--- /dev/null
+++ b/ray-operator/test/e2eincrementalupgrade/rayservice_incremental_upgrade_test.go
@@ -0,0 +1,218 @@
+package e2eincrementalupgrade
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/utils/ptr"
+
+	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
+	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
+	rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1"
+	"github.com/ray-project/kuberay/ray-operator/pkg/features"
+	. "github.com/ray-project/kuberay/ray-operator/test/support"
+)
+
+// helper function to get RayCluster head service external IP to use to poll the RayService
+func GetHeadServiceExternalIP(t *testing.T, clusterName, namespace string) (string, error) {
+	test := With(t)
+
+	svc, err := test.Client().Core().CoreV1().Services(namespace).Get(test.Ctx(), clusterName+"-head-svc", metav1.GetOptions{})
+	if err != nil {
+		return "", err
+	}
+	if len(svc.Status.LoadBalancer.Ingress) == 0 {
+		return "", fmt.Errorf("no ingress for service %s", svc.Name)
+	}
+	return svc.Status.LoadBalancer.Ingress[0].IP, nil
+}
+
+func TestRayServiceIncrementalUpgrade(t *testing.T) {
+	features.SetFeatureGateDuringTest(t, features.RayServiceIncrementalUpgrade, true)
+
+	test := With(t)
+	g := NewWithT(t)
+
+	namespace := test.NewTestNamespace()
+	rayServiceName := "incremental-rayservice"
+
+	// Create a RayService with IncrementalUpgrade enabled
+	stepSize := ptr.To(int32(25))
+	interval := ptr.To(int32(5))
+	maxSurge := ptr.To(int32(50))
+
+	rayServiceAC := rayv1ac.RayService(rayServiceName, namespace.Name).
+		WithSpec(IncrementalUpgradeRayServiceApplyConfiguration(stepSize, interval, maxSurge))
+	rayService, err := test.Client().Ray().RayV1().RayServices(namespace.Name).Apply(test.Ctx(), rayServiceAC, TestApplyOptions)
+	g.Expect(err).NotTo(HaveOccurred())
+	g.Expect(rayService).NotTo(BeNil())
+
+	LogWithTimestamp(test.T(), "Waiting for RayService %s/%s to be ready", rayService.Namespace, rayService.Name)
+	g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
+		Should(WithTransform(IsRayServiceReady, BeTrue()))
+
+	rayService, err = GetRayService(test, namespace.Name, rayServiceName)
+	g.Expect(err).NotTo(HaveOccurred())
+
+	// Validate Gateway and HTTPRoute objects have been created for incremental upgrade.
+	gatewayName := fmt.Sprintf("%s-%s", rayServiceName, "gateway")
+	LogWithTimestamp(test.T(), "Waiting for Gateway %s/%s to be ready", rayService.Namespace, gatewayName)
+	g.Eventually(Gateway(test, rayService.Namespace, gatewayName), TestTimeoutMedium).
+		Should(WithTransform(utils.IsGatewayReady, BeTrue()))
+
+	// Get the Gateway endpoint to send requests to
+	gateway, err := GetGateway(test, namespace.Name, fmt.Sprintf("%s-%s", rayServiceName, "gateway"))
+	g.Expect(err).NotTo(HaveOccurred())
+	g.Expect(gateway).NotTo(BeNil())
+
+	httpRouteName := fmt.Sprintf("%s-%s", rayServiceName, "httproute")
+	LogWithTimestamp(test.T(), "Waiting for HTTPRoute %s/%s to be ready", rayService.Namespace, httpRouteName)
+	g.Eventually(HTTPRoute(test, rayService.Namespace, httpRouteName), TestTimeoutMedium).
+		Should(Not(BeNil()))
+
+	httpRoute, err := GetHTTPRoute(test, namespace.Name, httpRouteName)
+	g.Expect(err).NotTo(HaveOccurred())
+	g.Expect(utils.IsHTTPRouteReady(gateway, httpRoute)).To(BeTrue())
+
+	// Create curl pod to test traffic routing through Gateway to RayService
+	curlPodName := "curl-pod"
+	curlContainerName := "curl-container"
+	curlPod, err := CreateCurlPod(g, test, curlPodName, curlContainerName, namespace.Name)
+	g.Expect(err).NotTo(HaveOccurred())
+
+	LogWithTimestamp(test.T(), "Waiting for Curl Pod %s to be ready", curlPodName)
+	g.Eventually(func(g Gomega) *corev1.Pod {
+		updatedPod, err := test.Client().Core().CoreV1().Pods(curlPod.Namespace).Get(test.Ctx(), curlPod.Name, metav1.GetOptions{})
+		g.Expect(err).NotTo(HaveOccurred())
+		return updatedPod
+	}, TestTimeoutShort).Should(WithTransform(IsPodRunningAndReady, BeTrue()))
+
+	gatewayIP := GetGatewayIP(gateway)
+	g.Expect(gatewayIP).NotTo(BeEmpty())
+
+	LogWithTimestamp(test.T(), "Verifying RayService is serving traffic")
+	stdout, _ := CurlRayServiceGateway(test, gatewayIP, curlPod, curlContainerName, "/fruit", `["MANGO", 2]`)
+	g.Expect(stdout.String()).To(Equal("6"))
+	stdout, _ = CurlRayServiceGateway(test, gatewayIP, curlPod, curlContainerName, "/calc", `["MUL", 3]`)
+	g.Expect(stdout.String()).To(Equal("15 pizzas please!"))
+
+	// Attempt to trigger NewClusterWithIncrementalUpgrade by updating RayService serve config and RayCluster spec
+	g.Eventually(func() error {
+		latestRayService, err := GetRayService(test, namespace.Name, rayServiceName)
+		if err != nil {
+			return err
+		}
+		latestRayService.Spec.RayClusterSpec.WorkerGroupSpecs[0].Template.Spec.Containers[0].Resources.Requests[corev1.ResourceCPU] = resource.MustParse("500m")
+		serveConfig := latestRayService.Spec.ServeConfigV2
+		serveConfig = strings.Replace(serveConfig, "price: 3", "price: 4", -1)
+		serveConfig = strings.Replace(serveConfig, "factor: 5", "factor: 3", -1)
+		latestRayService.Spec.ServeConfigV2 = serveConfig
+
+		_, err = test.Client().Ray().RayV1().RayServices(namespace.Name).Update(
+			test.Ctx(),
+			latestRayService,
+			metav1.UpdateOptions{},
+		)
+		return err
+	}, TestTimeoutShort).Should(Succeed(), "Failed to update RayService to trigger upgrade")
+
+	LogWithTimestamp(test.T(), "Waiting for RayService %s/%s UpgradeInProgress condition to be true", rayService.Namespace, rayService.Name)
+	g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutShort).Should(WithTransform(IsRayServiceUpgrading, BeTrue()))
+
+	LogWithTimestamp(test.T(), "Verifying temporary service creation and HTTPRoute backends")
+	upgradingRaySvc, err := GetRayService(test, namespace.Name, rayServiceName)
+	g.Expect(err).NotTo(HaveOccurred())
+	activeClusterName := upgradingRaySvc.Status.ActiveServiceStatus.RayClusterName
+	g.Expect(activeClusterName).NotTo(BeEmpty(), "The active cluster should be set when a RayService is ready.")
+	pendingClusterName := upgradingRaySvc.Status.PendingServiceStatus.RayClusterName
+	g.Expect(pendingClusterName).NotTo(BeEmpty(), "The controller should have created a pending cluster.")
+
+	// Validate serve service for the active cluster exists.
+	activeServeSvcName := utils.GenerateServeServiceName(activeClusterName)
+	_, err = test.Client().Core().CoreV1().Services(namespace.Name).Get(test.Ctx(), activeServeSvcName, metav1.GetOptions{})
+	g.Expect(err).NotTo(HaveOccurred(), "The serve service for the active cluster should be created.")
+
+	// Validate serve service for the pending cluster has been created for the upgrade.
+	pendingServeSvcName := utils.GenerateServeServiceName(pendingClusterName)
+	g.Eventually(func(g Gomega) {
+		_, err = test.Client().Core().CoreV1().Services(namespace.Name).Get(test.Ctx(), pendingServeSvcName, metav1.GetOptions{})
+		g.Expect(err).NotTo(HaveOccurred(), "The serve service for the pending cluster should be created.")
+	}, TestTimeoutShort).Should(Succeed())
+
+	LogWithTimestamp(test.T(), "Waiting for pending RayCluster %s to have a ready head pod", pendingClusterName)
+	g.Eventually(RayCluster(test, namespace.Name, pendingClusterName), TestTimeoutMedium).
+		Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady)))
+
+	// Wait for the HTTPRoute to reflect the two backends.
+	LogWithTimestamp(test.T(), "Waiting for HTTPRoute to have two backends")
+	g.Eventually(func(g Gomega) {
+		route, err := GetHTTPRoute(test, namespace.Name, httpRouteName)
+		g.Expect(err).NotTo(HaveOccurred())
+		g.Expect(route.Spec.Rules).To(HaveLen(1))
+		g.Expect(route.Spec.Rules[0].BackendRefs).To(HaveLen(2))
+		g.Expect(string(route.Spec.Rules[0].BackendRefs[1].Name)).To(Equal(pendingServeSvcName))
+	}, TestTimeoutShort).Should(Succeed())
+
+	LogWithTimestamp(test.T(), "Validating stepwise traffic and capacity migration")
+	intervalSeconds := *interval
+	var lastMigratedTime *metav1.Time
+	oldVersionServed := false
+	newVersionServed := false
+
+	// Validate expected behavior during an IncrementalUpgrade. The following checks ensures
+	// that no requests are dropped throughout the upgrade process.
+	upgradeSteps := generateUpgradeSteps(*stepSize, *maxSurge)
+	for _, step := range upgradeSteps {
+		LogWithTimestamp(test.T(), "%s", step.name)
+		g.Eventually(func(g Gomega) int32 {
+			// Fetch updated RayService.
+			svc, err := GetRayService(test, namespace.Name, rayServiceName)
+			g.Expect(err).NotTo(HaveOccurred())
+			return step.getValue(svc)
+		}, TestTimeoutShort).Should(Equal(step.expectedValue))
+
+		// Send a request to the RayService to validate no requests are dropped. Check that
+		// both endpoints are serving requests.
+		stdout, _ := CurlRayServiceGateway(test, gatewayIP, curlPod, curlContainerName, "/fruit", `["MANGO", 2]`)
+		response := stdout.String()
+		g.Expect(response).To(Or(Equal("6"), Equal("8")), "Response should be from the old or new app version during the upgrade")
+		if response == "6" {
+			oldVersionServed = true
+		}
+		if response == "8" {
+			newVersionServed = true
+		}
+
+		if strings.Contains(step.name, "pending traffic to shift") {
+			svc, err := GetRayService(test, namespace.Name, rayServiceName)
+			g.Expect(err).NotTo(HaveOccurred())
+
+			currentMigratedTime := svc.Status.PendingServiceStatus.LastTrafficMigratedTime
+			g.Expect(currentMigratedTime).NotTo(BeNil())
+
+			// Verify IntervalSeconds have passed since last TrafficRoutedPercent update.
+			if lastMigratedTime != nil {
+				duration := currentMigratedTime.Sub(lastMigratedTime.Time)
+				g.Expect(duration).To(BeNumerically(">=", intervalSeconds),
+					"Time between traffic steps should be >= IntervalSeconds")
+			}
+			lastMigratedTime = currentMigratedTime
+		}
+	}
+	LogWithTimestamp(test.T(), "Verifying both old and new versions served traffic during the upgrade")
+	g.Expect(oldVersionServed).To(BeTrue(), "The old version of the service should have served traffic during the upgrade.")
+	g.Expect(newVersionServed).To(BeTrue(), "The new version of the service should have served traffic during the upgrade.")
+
+	// Check that RayService completed upgrade
+	LogWithTimestamp(test.T(), "Waiting for RayService %s/%s UpgradeInProgress condition to be false", rayService.Namespace, rayService.Name)
+	g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutShort).Should(WithTransform(IsRayServiceUpgrading, BeFalse()))
+
+	LogWithTimestamp(test.T(), "Verifying RayService uses updated ServeConfig after upgrade completes")
+	stdout, _ = CurlRayServiceGateway(test, gatewayIP, curlPod, curlContainerName, "/fruit", `["MANGO", 2]`)
+	g.Expect(stdout.String()).To(Equal("8"))
+}
diff --git a/ray-operator/test/e2eincrementalupgrade/support.go b/ray-operator/test/e2eincrementalupgrade/support.go
new file mode 100644
index 00000000000..b5e6293f491
--- /dev/null
+++ b/ray-operator/test/e2eincrementalupgrade/support.go
@@ -0,0 +1,247 @@
+package e2eincrementalupgrade
+
+import (
+	"bytes"
+	"fmt"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
+	"k8s.io/utils/ptr"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
+
+	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
+	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
+	rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1"
+	. "github.com/ray-project/kuberay/ray-operator/test/support"
+)
+
+func CurlRayServiceGateway(
+	t Test,
+	gatewayIP string,
+	curlPod *corev1.Pod,
+	curlPodContainerName,
+	rayServicePath,
+	body string,
+) (bytes.Buffer, bytes.Buffer) {
+	cmd := []string{
+		"curl",
+		"--max-time", "10",
+		"-X", "POST",
+		"-H", "Connection: close", // avoid re-using the same connection for test
+		"-H", "Content-Type: application/json",
+		fmt.Sprintf("http://%s%s", gatewayIP, rayServicePath),
+		"-d", body,
+	}
+
+	return ExecPodCmd(t, curlPod, curlPodContainerName, cmd)
+}
+
+func IncrementalUpgradeRayServiceApplyConfiguration(
+	stepSizePercent, intervalSeconds, maxSurgePercent *int32,
+) *rayv1ac.RayServiceSpecApplyConfiguration {
+	return rayv1ac.RayServiceSpec().
+		WithUpgradeStrategy(rayv1ac.RayServiceUpgradeStrategy().
+			WithType(rayv1.NewClusterWithIncrementalUpgrade).
+			WithClusterUpgradeOptions(
+				rayv1ac.ClusterUpgradeOptions().
+					WithGatewayClassName("istio").
+					WithStepSizePercent(*stepSizePercent).
+					WithIntervalSeconds(*intervalSeconds).
+					WithMaxSurgePercent(*maxSurgePercent),
+			)).
+		WithServeConfigV2(`applications:
+  - name: fruit_app
+    import_path: fruit.deployment_graph
+    route_prefix: /fruit
+    runtime_env:
+      working_dir: "https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip"
+    deployments:
+      - name: MangoStand
+        num_replicas: 1
+        user_config:
+          price: 3
+        ray_actor_options:
+          num_cpus: 0.1
+      - name: OrangeStand
+        num_replicas: 1
+        user_config:
+          price: 2
+        ray_actor_options:
+          num_cpus: 0.1
+      - name: FruitMarket
+        num_replicas: 1
+        ray_actor_options:
+          num_cpus: 0.1
+  - name: math_app
+    import_path: conditional_dag.serve_dag
+    route_prefix: /calc
+    runtime_env:
+      working_dir: "https://github.com/ray-project/test_dag/archive/78b4a5da38796123d9f9ffff59bab2792a043e95.zip"
+    deployments:
+      - name: Adder
+        num_replicas: 1
+        user_config:
+          increment: 3
+        ray_actor_options:
+          num_cpus: 0.1
+      - name: Multiplier
+        num_replicas: 1
+        user_config:
+          factor: 5
+        ray_actor_options:
+          num_cpus: 0.1
+      - name: Router
+        ray_actor_options:
+          num_cpus: 0.1
+        num_replicas: 1`).
+		WithRayClusterSpec(rayv1ac.RayClusterSpec().
+			WithRayVersion(GetRayVersion()).
+			WithEnableInTreeAutoscaling(true).
+			WithHeadGroupSpec(rayv1ac.HeadGroupSpec().
+				WithRayStartParams(map[string]string{"dashboard-host": "0.0.0.0"}).
+				WithTemplate(corev1ac.PodTemplateSpec().
+					WithSpec(corev1ac.PodSpec().
+						WithRestartPolicy(corev1.RestartPolicyNever).
+						WithContainers(corev1ac.Container().
+							WithName("ray-head").
+							WithImage(GetRayImage()).
+							WithEnv(corev1ac.EnvVar().WithName(utils.RAY_ENABLE_AUTOSCALER_V2).WithValue("1")).
+							WithPorts(
+								corev1ac.ContainerPort().WithName(utils.GcsServerPortName).WithContainerPort(utils.DefaultGcsServerPort),
+								corev1ac.ContainerPort().WithName(utils.ServingPortName).WithContainerPort(utils.DefaultServingPort),
+								corev1ac.ContainerPort().WithName(utils.DashboardPortName).WithContainerPort(utils.DefaultDashboardPort),
+								corev1ac.ContainerPort().WithName(utils.ClientPortName).WithContainerPort(utils.DefaultClientPort),
+							).
+							WithResources(corev1ac.ResourceRequirements().
+								WithRequests(corev1.ResourceList{
+									corev1.ResourceCPU:    resource.MustParse("2"),
+									corev1.ResourceMemory: resource.MustParse("3Gi"),
+								}).
+								WithLimits(corev1.ResourceList{
+									corev1.ResourceCPU:    resource.MustParse("2"),
+									corev1.ResourceMemory: resource.MustParse("3Gi"),
+								})))))).
+			WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec().
+				WithReplicas(1).
+				WithMinReplicas(1).
+				WithMaxReplicas(4).
+				WithRayStartParams(map[string]string{"num-cpus": "1"}).
+				WithGroupName("small-group").
+				WithTemplate(corev1ac.PodTemplateSpec().
+					WithSpec(corev1ac.PodSpec().
+						WithRestartPolicy(corev1.RestartPolicyNever).
+						WithContainers(corev1ac.Container().
+							WithName("ray-worker").
+							WithImage(GetRayImage()).
+							WithResources(corev1ac.ResourceRequirements().
+								WithRequests(corev1.ResourceList{
+									corev1.ResourceCPU:    resource.MustParse("300m"),
+									corev1.ResourceMemory: resource.MustParse("1G"),
+								}).
+								WithLimits(corev1.ResourceList{
+									corev1.ResourceCPU:    resource.MustParse("500m"),
+									corev1.ResourceMemory: resource.MustParse("1G"),
+								})))))),
+		)
+}
+
+// GetGatewayIP retrieves the external IP for a Gateway object
+func GetGatewayIP(gateway *gwv1.Gateway) string {
+	if gateway == nil {
+		return ""
+	}
+	for _, addr := range gateway.Status.Addresses {
+		if addr.Type == nil || *addr.Type == gwv1.IPAddressType {
+			return addr.Value
+		}
+	}
+
+	return ""
+}
+
+func GetPendingCapacity(rs *rayv1.RayService) int32 {
+	return ptr.Deref(rs.Status.PendingServiceStatus.TargetCapacity, 0)
+}
+
+func GetPendingTraffic(rs *rayv1.RayService) int32 {
+	return ptr.Deref(rs.Status.PendingServiceStatus.TrafficRoutedPercent, 0)
+}
+
+func GetActiveCapacity(rs *rayv1.RayService) int32 {
+	return ptr.Deref(rs.Status.ActiveServiceStatus.TargetCapacity, 100)
+}
+
+func GetActiveTraffic(rs *rayv1.RayService) int32 {
+	return ptr.Deref(rs.Status.ActiveServiceStatus.TrafficRoutedPercent, 100)
+}
+
+func GetLastTrafficMigratedTime(rs *rayv1.RayService) *metav1.Time {
+	return rs.Status.ActiveServiceStatus.LastTrafficMigratedTime
+}
+
+// testStep defines a validation condition to wait for during the upgrade.
+type testStep struct {
+	getValue      func(rs *rayv1.RayService) int32
+	name          string
+	expectedValue int32
+}
+
+// generateUpgradeSteps is a helper function for testing that the controller follows the expected
+// sequence of updates to TrafficRoutedPercent and TargetCapacity during an incremental upgrade.
+func generateUpgradeSteps(stepSize, maxSurge int32) []testStep {
+	var steps []testStep
+
+	pendingCapacity := int32(0)
+	pendingTraffic := int32(0)
+	activeCapacity := int32(100)
+	activeTraffic := int32(100)
+
+	for pendingTraffic < 100 {
+		// Scale up the pending cluster's TargetCapacity.
+		if pendingTraffic == pendingCapacity {
+			nextPendingCapacity := min(pendingCapacity+maxSurge, 100)
+			if nextPendingCapacity > pendingCapacity {
+				steps = append(steps, testStep{
+					name:          fmt.Sprintf("Waiting for pending capacity to scale up to %d", nextPendingCapacity),
+					getValue:      GetPendingCapacity,
+					expectedValue: nextPendingCapacity,
+				})
+				pendingCapacity = nextPendingCapacity
+			}
+		}
+
+		// Shift traffic over from the active to the pending cluster by StepSizePercent.
+		for pendingTraffic < pendingCapacity {
+			nextPendingTraffic := min(pendingTraffic+stepSize, 100)
+			steps = append(steps, testStep{
+				name:          fmt.Sprintf("Waiting for pending traffic to shift to %d", nextPendingTraffic),
+				getValue:      GetPendingTraffic,
+				expectedValue: nextPendingTraffic,
+			})
+			pendingTraffic = nextPendingTraffic
+
+			nextActiveTraffic := max(activeTraffic-stepSize, 0)
+			steps = append(steps, testStep{
+				name:          fmt.Sprintf("Waiting for active traffic to shift down to %d", nextActiveTraffic),
+				getValue:      GetActiveTraffic,
+				expectedValue: nextActiveTraffic,
+			})
+			activeTraffic = nextActiveTraffic
+		}
+
+		// Scale down the active cluster's target capacity. The final scale
+		// down is when the pending cluster is promoted to active.
+		nextActiveCapacity := max(activeCapacity-maxSurge, 0)
+		if nextActiveCapacity < activeCapacity && nextActiveCapacity > 0 {
+			steps = append(steps, testStep{
+				name:          fmt.Sprintf("Waiting for active capacity to scale down to %d", nextActiveCapacity),
+				getValue:      GetActiveCapacity,
+				expectedValue: nextActiveCapacity,
+			})
+			activeCapacity = nextActiveCapacity
+		}
+	}
+	return steps
+}
diff --git a/ray-operator/test/support/client.go b/ray-operator/test/support/client.go
index 2e313483966..4925184d46b 100644
--- a/ray-operator/test/support/client.go
+++ b/ray-operator/test/support/client.go
@@ -8,6 +8,7 @@ import (
 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
+	gatewayclient "sigs.k8s.io/gateway-api/pkg/client/clientset/versioned"
 
 	rayclient "github.com/ray-project/kuberay/ray-operator/pkg/client/clientset/versioned"
 )
@@ -17,6 +18,7 @@ type Client interface {
 	Ray() rayclient.Interface
 	Dynamic() dynamic.Interface
 	Config() rest.Config
+	Gateway() gatewayclient.Interface
 }
 
 type testClient struct {
@@ -24,6 +26,7 @@ type testClient struct {
 	ray     rayclient.Interface
 	dynamic dynamic.Interface
 	config  rest.Config
+	gateway gatewayclient.Interface
 }
 
 var _ Client = (*testClient)(nil)
@@ -44,6 +47,10 @@ func (t *testClient) Config() rest.Config {
 	return t.config
 }
 
+func (t *testClient) Gateway() gatewayclient.Interface {
+	return t.gateway
+}
+
 func newTestClient() (Client, error) {
 	cfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
 		clientcmd.NewDefaultClientConfigLoadingRules(),
@@ -68,10 +75,16 @@ func newTestClient() (Client, error) {
 		return nil, err
 	}
 
+	gatewayClient, err := gatewayclient.NewForConfig(cfg)
+	if err != nil {
+		return nil, err
+	}
+
 	return &testClient{
 		core:    kubeClient,
 		ray:     rayClient,
 		dynamic: dynamicClient,
 		config:  *cfg,
+		gateway: gatewayClient,
 	}, nil
 }
diff --git a/ray-operator/test/support/ray.go b/ray-operator/test/support/ray.go
index ffea3c75d87..0b5c525abcf 100644
--- a/ray-operator/test/support/ray.go
+++ b/ray-operator/test/support/ray.go
@@ -9,6 +9,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	gwv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/common"
@@ -226,3 +227,23 @@ func GetRayClusterWorkerGroupReplicaSum(cluster *rayv1.RayCluster) int32 {
 	}
 	return replicas
 }
+
+func GetHTTPRoute(t Test, namespace, name string) (*gwv1.HTTPRoute, error) {
+	return t.Client().Gateway().GatewayV1().HTTPRoutes(namespace).Get(t.Ctx(), name, metav1.GetOptions{})
+}
+
+func HTTPRoute(t Test, namespace, name string) func() (*gwv1.HTTPRoute, error) {
+	return func() (*gwv1.HTTPRoute, error) {
+		return GetHTTPRoute(t, namespace, name)
+	}
+}
+
+func GetGateway(t Test, namespace, name string) (*gwv1.Gateway, error) {
+	return t.Client().Gateway().GatewayV1().Gateways(namespace).Get(t.Ctx(), name, metav1.GetOptions{})
+}
+
+func Gateway(t Test, namespace, name string) func() (*gwv1.Gateway, error) {
+	return func() (*gwv1.Gateway, error) {
+		return GetGateway(t, namespace, name)
+	}
+}