diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml
index 3717fd6c..7b4671c2 100644
--- a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml
+++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml
@@ -15,13 +15,11 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.0.0
+version: 1.1.0
 
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "2.0"
+# This is the version number of the application being deployed. Keep this aligned 
+# with operator image MAJOR.MINOR version.
+appVersion: "2.1"
 
 dependencies:
 - name: aws-mountpoint-s3-csi-driver
diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml
index 7f43c89a..7616f134 100644
--- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml
+++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml
@@ -696,7 +696,7 @@ spec:
                       l2CacheBackend:
                         description: L2 cache backend type. Required when L2CacheSpec
                           is provided.
-                        pattern: (?i)redis
+                        pattern: (?i)redis|tieredstorage
                         type: string
                       l2CacheLocalUrl:
                         description: Provide the L2 cache URL to local storage
@@ -721,6 +721,12 @@ spec:
                     - round_robin
                     type: string
                 type: object
+              maxDeployTimeInSeconds:
+                default: 3600
+                description: Maximum allowed time in seconds for the deployment to
+                  complete before timing out. Defaults to 1 hour (3600 seconds)
+                format: int32
+                type: integer
               metrics:
                 description: Configuration for metrics collection and exposure
                 properties:
@@ -1617,12 +1623,6 @@ spec:
                     - round_robin
                     type: string
                 type: object
-              maxDeployTimeInSeconds:
-                default: 3600
-                description: Maximum allowed time in seconds for the deployment to
-                  complete before timing out. Defaults to 1 hour (3600 seconds)
-                format: int32
-                type: integer
               metrics:
                 description: Configuration for metrics collection and exposure
                 properties:
diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml
index 68ea257e..4e1b5443 100644
--- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml
+++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml
@@ -350,6 +350,349 @@ spec:
                   type: object
                 maxItems: 100
                 type: array
+              intelligentRoutingSpec:
+                description: |-
+                  Configuration for intelligent routing
+                  This feature is currently not supported for existing deployments.
+                  Adding this configuration to an existing deployment will be rejected.
+                properties:
+                  autoScalingSpec:
+                    properties:
+                      cloudWatchTrigger:
+                        description: CloudWatch metric trigger to use for autoscaling
+                        properties:
+                          activationTargetValue:
+                            default: 0
+                            description: Activation Value for CloudWatch metric to
+                              scale from 0 to 1. Only applicable if minReplicaCount
+                              = 0
+                            type: number
+                          dimensions:
+                            description: Dimensions for Cloudwatch metrics
+                            items:
+                              properties:
+                                name:
+                                  description: CloudWatch Metric dimension name
+                                  type: string
+                                value:
+                                  description: CloudWatch Metric dimension value
+                                  type: string
+                              required:
+                              - name
+                              - value
+                              type: object
+                            type: array
+                          metricCollectionPeriod:
+                            default: 300
+                            description: Defines the Period for CloudWatch query
+                            format: int32
+                            type: integer
+                          metricCollectionStartTime:
+                            default: 300
+                            description: Defines the StartTime for CloudWatch query
+                            format: int32
+                            type: integer
+                          metricName:
+                            description: Metric name to query for Cloudwatch trigger
+                            type: string
+                          metricStat:
+                            default: Average
+                            description: Statistics metric to be used by Trigger.
+                              Used to define Stat for CloudWatch query. Default is
+                              Average.
+                            type: string
+                          metricType:
+                            default: Average
+                            description: 'The type of metric to be used by HPA. Enum:
+                              AverageValue - Uses average value of metric per pod,
+                              Value - Uses absolute metric value'
+                            enum:
+                            - Value
+                            - Average
+                            type: string
+                          minValue:
+                            default: 0
+                            description: Minimum metric value used in case of empty
+                              response from CloudWatch. Default is 0.
+                            type: number
+                          name:
+                            description: Name for the CloudWatch trigger
+                            type: string
+                          namespace:
+                            description: AWS CloudWatch namespace for metric
+                            type: string
+                          targetValue:
+                            description: TargetValue for CloudWatch metric
+                            type: number
+                          useCachedMetrics:
+                            default: true
+                            description: Enable caching of metric values during polling
+                              interval. Default is true
+                            type: boolean
+                        type: object
+                      cloudWatchTriggerList:
+                        description: Multiple CloudWatch metric triggers to use for
+                          autoscaling. Takes priority over CloudWatchTrigger if both
+                          are provided.
+                        items:
+                          properties:
+                            activationTargetValue:
+                              default: 0
+                              description: Activation Value for CloudWatch metric
+                                to scale from 0 to 1. Only applicable if minReplicaCount
+                                = 0
+                              type: number
+                            dimensions:
+                              description: Dimensions for Cloudwatch metrics
+                              items:
+                                properties:
+                                  name:
+                                    description: CloudWatch Metric dimension name
+                                    type: string
+                                  value:
+                                    description: CloudWatch Metric dimension value
+                                    type: string
+                                required:
+                                - name
+                                - value
+                                type: object
+                              type: array
+                            metricCollectionPeriod:
+                              default: 300
+                              description: Defines the Period for CloudWatch query
+                              format: int32
+                              type: integer
+                            metricCollectionStartTime:
+                              default: 300
+                              description: Defines the StartTime for CloudWatch query
+                              format: int32
+                              type: integer
+                            metricName:
+                              description: Metric name to query for Cloudwatch trigger
+                              type: string
+                            metricStat:
+                              default: Average
+                              description: Statistics metric to be used by Trigger.
+                                Used to define Stat for CloudWatch query. Default
+                                is Average.
+                              type: string
+                            metricType:
+                              default: Average
+                              description: 'The type of metric to be used by HPA.
+                                Enum: AverageValue - Uses average value of metric
+                                per pod, Value - Uses absolute metric value'
+                              enum:
+                              - Value
+                              - Average
+                              type: string
+                            minValue:
+                              default: 0
+                              description: Minimum metric value used in case of empty
+                                response from CloudWatch. Default is 0.
+                              type: number
+                            name:
+                              description: Name for the CloudWatch trigger
+                              type: string
+                            namespace:
+                              description: AWS CloudWatch namespace for metric
+                              type: string
+                            targetValue:
+                              description: TargetValue for CloudWatch metric
+                              type: number
+                            useCachedMetrics:
+                              default: true
+                              description: Enable caching of metric values during
+                                polling interval. Default is true
+                              type: boolean
+                          type: object
+                        maxItems: 100
+                        type: array
+                      cooldownPeriod:
+                        default: 300
+                        description: The period to wait after the last trigger reported
+                          active before scaling the resource back to 0. Default 300
+                          seconds.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      initialCooldownPeriod:
+                        default: 300
+                        description: The delay before the cooldownPeriod starts after
+                          the initial creation of the ScaledObject. Default 300 seconds.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      maxReplicaCount:
+                        default: 5
+                        description: The maximum number of model pods to scale to.
+                          Default 5.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      minReplicaCount:
+                        default: 1
+                        description: The minimum number of model pods to scale down
+                          to. Default 1.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      pollingInterval:
+                        default: 30
+                        description: This is the interval to check each trigger on.
+                          Default 30 seconds.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      prometheusTrigger:
+                        description: Prometheus metric trigger to use for autoscaling
+                        properties:
+                          activationTargetValue:
+                            default: 0
+                            description: Activation Value for Prometheus metric to
+                              scale from 0 to 1. Only applicable if minReplicaCount
+                              = 0
+                            type: number
+                          customHeaders:
+                            description: Custom headers to include while querying
+                              the prometheus endpoint.
+                            type: string
+                          metricType:
+                            default: Average
+                            description: 'The type of metric to be used by HPA. Enum:
+                              AverageValue - Uses average value of metric per pod,
+                              Value - Uses absolute metric value'
+                            enum:
+                            - Value
+                            - Average
+                            type: string
+                          name:
+                            description: Name for the Prometheus trigger
+                            type: string
+                          namespace:
+                            description: Namespace for namespaced queries
+                            type: string
+                          query:
+                            description: PromQLQuery for the metric.
+                            type: string
+                          serverAddress:
+                            description: Server address for AMP workspace
+                            pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$
+                            type: string
+                          targetValue:
+                            description: Target metric value for scaling
+                            type: number
+                          useCachedMetrics:
+                            default: true
+                            description: Enable caching of metric values during polling
+                              interval. Default is true
+                            type: boolean
+                        type: object
+                      prometheusTriggerList:
+                        description: Multiple Prometheus metric triggers to use for
+                          autoscaling. Takes priority over PrometheusTrigger if both
+                          are provided.
+                        items:
+                          properties:
+                            activationTargetValue:
+                              default: 0
+                              description: Activation Value for Prometheus metric
+                                to scale from 0 to 1. Only applicable if minReplicaCount
+                                = 0
+                              type: number
+                            customHeaders:
+                              description: Custom headers to include while querying
+                                the prometheus endpoint.
+                              type: string
+                            metricType:
+                              default: Average
+                              description: 'The type of metric to be used by HPA.
+                                Enum: AverageValue - Uses average value of metric
+                                per pod, Value - Uses absolute metric value'
+                              enum:
+                              - Value
+                              - Average
+                              type: string
+                            name:
+                              description: Name for the Prometheus trigger
+                              type: string
+                            namespace:
+                              description: Namespace for namespaced queries
+                              type: string
+                            query:
+                              description: PromQLQuery for the metric.
+                              type: string
+                            serverAddress:
+                              description: Server address for AMP workspace
+                              pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$
+                              type: string
+                            targetValue:
+                              description: Target metric value for scaling
+                              type: number
+                            useCachedMetrics:
+                              default: true
+                              description: Enable caching of metric values during
+                                polling interval. Default is true
+                              type: boolean
+                          type: object
+                        maxItems: 100
+                        type: array
+                      scaleDownStabilizationTime:
+                        default: 300
+                        description: The time window to stabilize for HPA before scaling
+                          down. Default 300 seconds.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                      scaleUpStabilizationTime:
+                        default: 0
+                        description: The time window to stabilize for HPA before scaling
+                          up. Default 0 seconds.
+                        format: int32
+                        minimum: 0
+                        type: integer
+                    type: object
+                  enabled:
+                    default: false
+                    description: Once set, the enabled field cannot be modified
+                    type: boolean
+                  routingStrategy:
+                    default: prefixaware
+                    enum:
+                    - prefixaware
+                    - kvaware
+                    - session
+                    - roundrobin
+                    type: string
+                type: object
+              kvCacheSpec:
+                description: |-
+                  Configuration for KV Cache specification
+                  By default L1CacheOffloading will be enabled
+                properties:
+                  cacheConfigFile:
+                    description: KVCache configuration file path. If specified, override
+                      other configurations provided via spec
+                    type: string
+                  enableL1Cache:
+                    default: true
+                    description: Enable CPU offloading
+                    type: boolean
+                  enableL2Cache:
+                    default: false
+                    type: boolean
+                  l2CacheSpec:
+                    description: Configuration for providing L2 Cache offloading
+                    properties:
+                      l2CacheBackend:
+                        description: L2 cache backend type. Required when L2CacheSpec
+                          is provided.
+                        pattern: (?i)redis|tieredstorage
+                        type: string
+                      l2CacheLocalUrl:
+                        description: Provide the L2 cache URL to local storage
+                        type: string
+                    type: object
+                type: object
               loadBalancer:
                 description: Configuration for Application Load Balancer
                 properties:
@@ -477,6 +820,10 @@ spec:
                 type: object
               server:
                 properties:
+                  acceleratorPartitionType:
+                    description: MIG profile to use for GPU partitioning
+                    pattern: ^mig-.*$
+                    type: string
                   executionRole:
                     description: The Amazon Resource Name (ARN) of an IAM role that
                       will be used to deploy and manage the inference server
@@ -489,6 +836,15 @@ spec:
                       Must be one of the supported types.
                     pattern: ^ml\..*
                     type: string
+                  validations:
+                    description: Validations configuration for the server
+                    properties:
+                      acceleratorPartitionValidation:
+                        default: true
+                        description: Enable MIG validation for GPU partitioning. Default
+                          is true.
+                        type: boolean
+                    type: object
                 required:
                 - instanceType
                 type: object
diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml
index 9fe34cdb..24075cef 100644
--- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml
+++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/manager/manager.yaml
@@ -48,6 +48,94 @@ spec:
         # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
         # seccompProfile:
         #   type: RuntimeDefault
+      initContainers:
+        - command:
+            - bash
+            - -lc
+            - |
+              set -euo pipefail
+              KUBECTL="$(command -v kubectl || true)"
+              if [ -z "${KUBECTL}" ]; then
+                for p in /opt/bitnami/kubectl/bin/kubectl /usr/local/bin/kubectl /usr/bin/kubectl /bin/kubectl; do
+                if [ -x "$p" ]; then KUBECTL="$p"; break; fi
+              done
+              fi
+              if [ -z "${KUBECTL}" ]; then
+                echo "kubectl not found in PATH or common locations" > /dev/termination-log
+                exit 2
+              fi
+
+              CHECKS="${CHECKS:-drivers crds}"
+
+              log() { echo "$1" > /dev/termination-log; }
+
+              require_csidriver() {
+                local provisioner="$1"
+                local friendly="$2"
+
+                # Try with error capture so we can disambiguate RBAC vs missing
+                if "${KUBECTL}" get csidriver "$provisioner" >/dev/null 2>&1 || \
+                   "${KUBECTL}" get csidrivers.storage.k8s.io "$provisioner" >/dev/null 2>&1; then
+                  return 0
+                fi
+
+
+                # Final attempt to capture the real error
+                err_msg="$("${KUBECTL}" get csidriver "$provisioner" 2>&1 || true)"
+                [ -z "$err_msg" ] && err_msg="$("${KUBECTL}" get csidrivers.storage.k8s.io "$provisioner" 2>&1 || true)"
+
+                if echo "$err_msg" | grep -qiE 'forbidden|permission|unauthorized|cannot.*get'; then
+                  log "$friendly check failed: RBAC insufficient to read CSIDriver $provisioner. "${KUBECTL}" said: ${err_msg}"
+                  exit 2
+                fi
+
+                log "$friendly not installed (missing CSIDriver $provisioner). kubectl said: ${err_msg}"
+                exit 1
+              }
+
+              require_crd() {
+                local crd="$1"
+                # Same idea: attempt and parse error text
+                if "${KUBECTL}" get crd "$crd" >/dev/null 2>&1; then
+                  return 0
+                fi
+                err="$("${KUBECTL}" get crd "$crd" 2>&1 || true)"
+                if echo "$err" | grep -qiE 'forbidden|permission|unauthorized|cannot.*get'; then
+                  log "CRD check failed: RBAC insufficient to read $crd. "${KUBECTL}" said: ${err}"
+                  exit 2
+                fi
+                log "Missing required CRD: $crd. "${KUBECTL}" said: ${err}"
+                exit 1
+              }
+
+              # Dispatch selected checks
+              for c in $CHECKS; do
+                case "$c" in
+                  drivers)
+                    require_csidriver "s3.csi.aws.com" "S3 CSI driver"
+                    require_csidriver "fsx.csi.aws.com" "FSx CSI driver"
+                    ;;
+                  crds)
+                    require_crd "certificaterequests.cert-manager.io" "cert-manager CRD"
+                    require_crd "certificates.cert-manager.io" "cert-manager CRD"
+                    ;;
+                  *)
+                    log "Unknown check: $c"
+                    exit 1
+                    ;;
+                esac
+              done
+
+              log "Checks passed: $CHECKS"
+              exit 0
+          env:
+            - name: CHECKS
+              value: "drivers crds"
+          image: "public.ecr.aws/bitnami/kubectl:1.30"
+          imagePullPolicy: Always
+          name: check-csi-drivers
+          resources: { }
+          terminationMessagePath: /dev/termination-log
       containers:
       - command:
         - /hyperpod-inference-manager
@@ -93,7 +181,7 @@ spec:
         resources:
           limits:
             cpu: 500m
-            memory: 128Mi
+            memory: 256Mi
           requests:
             cpu: 10m
             memory: 64Mi
@@ -125,4 +213,4 @@ spec:
       volumes:
       - name: webhook-certs
         secret:
-          secretName: webhook-server-cert
\ No newline at end of file
+          secretName: webhook-server-cert
diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml
index 868b7765..878fb183 100644
--- a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml
+++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml
@@ -21,7 +21,7 @@ image:
     ap-southeast-4: 311141544681.dkr.ecr.ap-southeast-4.amazonaws.com
     ap-southeast-3: 158128612970.dkr.ecr.ap-southeast-3.amazonaws.com
     eu-south-2: 025050981094.dkr.ecr.eu-south-2.amazonaws.com
-  tag: v2.0
+  tag: v2.1
   pullPolicy: Always
   repository:
 hyperpodClusterArn: