diff --git a/.github/workflows/aws.yaml b/.github/workflows/aws.yaml
new file mode 100644
index 0000000000..4991a70195
--- /dev/null
+++ b/.github/workflows/aws.yaml
@@ -0,0 +1,100 @@
+name: Build and Push AWS Karpenter Provider Image
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+      - release-*
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    env:
+      KO_DOCKER_REPO: docker.io/inftyai/karpenter-provider-aws
+
+    steps:
+      - name: Checkout forked karpenter
+        uses: actions/checkout@v4
+
+      - name: Set up Go 1.24
+        uses: actions/setup-go@v5
+        with:
+          go-version: "1.24"
+
+      - name: Generate commit info and image tag
+        id: tag
+        run: |
+          BRANCH="${GITHUB_REF##*/}"
+          COMMIT=$(git rev-parse HEAD)
+          TIMESTAMP=$(git show -s --format=%ct "$COMMIT")
+          VERSION_DATE=$(date -u -d "@$TIMESTAMP" +'%Y%m%d%H%M%S')
+          PSEUDO_VERSION="v0.0.0-${VERSION_DATE}-${COMMIT:0:12}"
+
+          if [[ "$BRANCH" == "main" ]]; then
+            TAG="latest"
+            IMAGE_TAG="latest"
+          elif [[ "$BRANCH" == release-* ]]; then
+            TAG="${BRANCH#release-}"       # e.g. v0.36.2
+            IMAGE_TAG="${TAG#v}"           # e.g. 0.36.2
+          else
+            TAG="fork-${PSEUDO_VERSION}"
+            IMAGE_TAG="${TAG}"             # keep full tag
+          fi
+
+          {
+            echo "commit=$COMMIT"
+            echo "version=$PSEUDO_VERSION"
+            echo "tag=$TAG"
+            echo "image_tag=$IMAGE_TAG"
+          } >> "$GITHUB_OUTPUT"
+          echo "✅ Using image tag: $IMAGE_TAG"
+
+      - name: Clone karpenter-provider-aws
+        run: |
+          git clone https://github.com/aws/karpenter-provider-aws.git
+          cd karpenter-provider-aws
+
+          TAG="${{ steps.tag.outputs.tag }}"
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "🔄 Attempting to checkout provider tag: $TAG"
+            if git rev-parse "refs/tags/$TAG" >/dev/null 2>&1; then
+              git checkout "tags/$TAG" -b "build-from-tag-$TAG"
+            else
+              echo "❌ Tag '$TAG' not found in karpenter-provider-aws repo."
+              exit 1
+            fi
+          else
+            echo "🔄 Checking out provider branch: main"
+            git checkout main
+          fi
+
+      - name: Replace karpenter module with forked commit version
+        run: |
+          cd karpenter-provider-aws
+          go mod edit -replace sigs.k8s.io/karpenter=github.com/InftyAI/karpenter@${{ steps.tag.outputs.version }}
+          go mod tidy
+
+      - name: Install build tools via make toolchain
+        run: |
+          cd karpenter-provider-aws
+          make toolchain
+
+      - name: Login to DockerHub
+        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push image using ko
+        run: |
+          cd karpenter-provider-aws
+          ko build --bare \
+            --tags ${{ steps.tag.outputs.image_tag }} \
+            github.com/aws/karpenter-provider-aws/cmd/controller
+
+      - name: Show pushed image
+        run: |
+          echo "✅ Image pushed to:"
+          echo "${{ env.KO_DOCKER_REPO }}:${{ steps.tag.outputs.image_tag }}"
diff --git a/go.mod b/go.mod
index 489ec88943..34f1b92b24 100644
--- a/go.mod
+++ b/go.mod
@@ -9,6 +9,7 @@ require (
 	github.com/docker/docker v28.2.2+incompatible
 	github.com/go-logr/logr v1.4.3
 	github.com/imdario/mergo v0.3.16
+	github.com/inftyai/llmaz v0.1.4
 	github.com/mitchellh/hashstructure/v2 v2.0.2
 	github.com/onsi/ginkgo/v2 v2.23.4
 	github.com/onsi/gomega v1.37.0
@@ -20,12 +21,12 @@ require (
 	go.uber.org/zap v1.27.0
 	golang.org/x/text v0.26.0
 	golang.org/x/time v0.12.0
-	k8s.io/api v0.32.3
-	k8s.io/apiextensions-apiserver v0.32.3
-	k8s.io/apimachinery v0.32.3
-	k8s.io/client-go v0.32.3
+	k8s.io/api v0.32.5
+	k8s.io/apiextensions-apiserver v0.32.5
+	k8s.io/apimachinery v0.32.5
+	k8s.io/client-go v0.32.5
 	k8s.io/cloud-provider v0.32.3
-	k8s.io/component-base v0.32.3
+	k8s.io/component-base v0.32.5
 	k8s.io/csi-translation-lib v0.32.3
 	k8s.io/klog/v2 v2.130.1
 	k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738
@@ -36,12 +37,12 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
-	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
+	github.com/emicklei/go-restful/v3 v3.12.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
 	github.com/go-logr/zapr v1.3.0
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
-	github.com/go-openapi/jsonreference v0.20.2 // indirect
+	github.com/go-openapi/jsonreference v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
@@ -77,7 +78,7 @@ require (
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect
 	sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
-	sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
 	sigs.k8s.io/yaml v1.4.0
 )
 
@@ -87,6 +88,7 @@ require (
 	github.com/rogpeppe/go-internal v1.13.1 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	golang.org/x/sync v0.15.0 // indirect
+	sigs.k8s.io/lws v0.5.1 // indirect
 )
 
 retract (
diff --git a/go.sum b/go.sum
index 6c96ac4392..f83519426a 100644
--- a/go.sum
+++ b/go.sum
@@ -9,15 +9,14 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
 github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
-github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/emicklei/go-restful/v3 v3.12.0 h1:y2DdzBAURM29NFF94q6RaY4vjIH1rtwDapwQtU84iWk=
+github.com/emicklei/go-restful/v3 v3.12.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
 github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -30,12 +29,10 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
-github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
 github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
 github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
-github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
-github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
-github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
+github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
 github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
 github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
@@ -62,6 +59,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/inftyai/llmaz v0.1.4 h1:8loib3UMUxbETDchYkYhKrBV/6SmmNxRrJtiiBI0YE4=
+github.com/inftyai/llmaz v0.1.4/go.mod h1:Em0c6giN3lPfjLbxdjjeXMHVwa6lM4XvUm6NlqKAGxg=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
@@ -70,11 +69,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
 github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
@@ -124,12 +120,7 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
 github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
 github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
@@ -196,21 +187,20 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP
 gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-k8s.io/api v0.32.3 h1:Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls=
-k8s.io/api v0.32.3/go.mod h1:2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k=
-k8s.io/apiextensions-apiserver v0.32.3 h1:4D8vy+9GWerlErCwVIbcQjsWunF9SUGNu7O7hiQTyPY=
-k8s.io/apiextensions-apiserver v0.32.3/go.mod h1:8YwcvVRMVzw0r1Stc7XfGAzB/SIVLunqApySV5V7Dss=
-k8s.io/apimachinery v0.32.3 h1:JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U=
-k8s.io/apimachinery v0.32.3/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE=
-k8s.io/client-go v0.32.3 h1:RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU=
-k8s.io/client-go v0.32.3/go.mod h1:3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY=
+k8s.io/api v0.32.5 h1:uqjjsYo1kTJr5NIcoIaP9F+TgXgADH7nKQx91FDAhtk=
+k8s.io/api v0.32.5/go.mod h1:bXXFU3fGCZ/eFMZvfHZC69PeGbXEL4zzjuPVzOxHF64=
+k8s.io/apiextensions-apiserver v0.32.5 h1:o0aKvmzIIs8Uk54pidk32pxET+Pg2ULnh9WI1PuKTwE=
+k8s.io/apiextensions-apiserver v0.32.5/go.mod h1:5fpedJa3HJJFBukAZ6ur91DEDye5gYuXISPbOiNLYpU=
+k8s.io/apimachinery v0.32.5 h1:6We3aJ6crC0ap8EhsEXcgX3LpI6SEjubpiOMXLROwPM=
+k8s.io/apimachinery v0.32.5/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE=
+k8s.io/client-go v0.32.5 h1:huFmQMzgWu0z4kbWsuZci+Gt4Fo72I4CcrvhToZ/Qp0=
+k8s.io/client-go v0.32.5/go.mod h1:Qchw6f9WIVrur7DKojAHpRgGLcANT0RLIvF39Jz58xA=
 k8s.io/cloud-provider v0.32.3 h1:WC7KhWrqXsU4b0E4tjS+nBectGiJbr1wuc1TpWXvtZM=
 k8s.io/cloud-provider v0.32.3/go.mod h1:/fwBfgRPuh16n8vLHT+PPT+Bc4LAEaJYj38opO2wsYY=
-k8s.io/component-base v0.32.3 h1:98WJvvMs3QZ2LYHBzvltFSeJjEx7t5+8s71P7M74u8k=
-k8s.io/component-base v0.32.3/go.mod h1:LWi9cR+yPAv7cu2X9rZanTiFKB2kHA+JjmhkKjCZRpI=
+k8s.io/component-base v0.32.5 h1:2HiX+m3s9Iz5CMqdCVDH2V942UqzQvjuhcXb4W+KCsg=
+k8s.io/component-base v0.32.5/go.mod h1:jDsPNFFElv9m27TcYxlpEX7TZ3vdgx2g4PaqMUHpV/Y=
 k8s.io/csi-translation-lib v0.32.3 h1:fKdc9LMVEMk18xsgoPm1Ga8GjfhI7AM3UX8gnIeXZKs=
 k8s.io/csi-translation-lib v0.32.3/go.mod h1:VX6+hCKgQyFnUX3VrnXZAgYYBXkrqx4BZk9vxr9qRcE=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
@@ -223,7 +213,11 @@ sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+
 sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
-sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA=
-sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4=
+sigs.k8s.io/lws v0.5.1 h1:eaeMNkP0manRluQZLN32atoULaGrzP611gSLdFaHZs4=
+sigs.k8s.io/lws v0.5.1/go.mod h1:qprXSTTFnfmPZY3V3sUfk6ZPmAodsdoKS8XVElJ9kN0=
+sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016 h1:kXv6kKdoEtedwuqMmkqhbkgvYKeycVbC8+iPCP9j5kQ=
+sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=
+sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
 sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/kwok/charts/templates/clusterrole.yaml b/kwok/charts/templates/clusterrole.yaml
index 82a402e0f4..0b9b9a5d26 100644
--- a/kwok/charts/templates/clusterrole.yaml
+++ b/kwok/charts/templates/clusterrole.yaml
@@ -47,6 +47,9 @@ rules:
   - apiGroups: ["policy"]
     resources: ["poddisruptionbudgets"]
     verbs: ["get", "list", "watch"]
+  - apiGroups: ["llmaz.io"]
+    resources: ["openmodels"]
+    verbs: ["get", "list", "watch"]
   # Write
   - apiGroups: ["karpenter.sh"]
     resources: ["nodeclaims", "nodeclaims/status"]
diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go
index 2cf733967c..6844ec9dd2 100644
--- a/pkg/controllers/provisioning/provisioner.go
+++ b/pkg/controllers/provisioning/provisioner.go
@@ -79,6 +79,7 @@ type Provisioner struct {
 	kubeClient     client.Client
 	batcher        *Batcher[types.UID]
 	volumeTopology *scheduler.VolumeTopology
+	modelInference *scheduler.ModelInference
 	cluster        *state.Cluster
 	recorder       events.Recorder
 	cm             *pretty.ChangeMonitor
@@ -94,6 +95,7 @@ func NewProvisioner(kubeClient client.Client, recorder events.Recorder,
 		cloudProvider:  cloudProvider,
 		kubeClient:     kubeClient,
 		volumeTopology: scheduler.NewVolumeTopology(kubeClient),
+		modelInference: scheduler.NewModelInference(kubeClient),
 		cluster:        cluster,
 		recorder:       recorder,
 		cm:             pretty.NewChangeMonitor(),
@@ -266,6 +268,12 @@ func (p *Provisioner) NewScheduler(
 		return nil, fmt.Errorf("injecting volume topology requirements, %w", err)
 	}
 
+	// inject model inference requirements
+	pods, err = p.injectInferenceFlavorRequirements(ctx, pods)
+	if err != nil {
+		return nil, fmt.Errorf("injecting model inference requirements, %w", err)
+	}
+
 	// Calculate cluster topology, if a context error occurs, it is wrapped and returned
 	topology, err := scheduler.NewTopology(ctx, p.kubeClient, p.cluster, stateNodes, nodePools, instanceTypes, pods, opts...)
 	if err != nil {
@@ -471,6 +479,7 @@ func (p *Provisioner) Validate(ctx context.Context, pod *corev1.Pod) error {
 		validateNodeSelector(ctx, pod),
 		validateAffinity(ctx, pod),
 		p.volumeTopology.ValidatePersistentVolumeClaims(ctx, pod),
+		p.modelInference.ValidateInferenceFlavors(ctx, pod),
 	)
 }
 
@@ -500,6 +509,21 @@ func (p *Provisioner) injectVolumeTopologyRequirements(ctx context.Context, pods
 	return schedulablePods, nil
 }
 
+func (p *Provisioner) injectInferenceFlavorRequirements(ctx context.Context, pods []*corev1.Pod) ([]*corev1.Pod, error) {
+	var schedulablePods []*corev1.Pod
+	for _, pod := range pods {
+		if err := p.modelInference.Inject(ctx, pod); err != nil {
+			if errors.Is(err, context.DeadlineExceeded) {
+				return nil, err
+			}
+			log.FromContext(ctx).WithValues("Pod", klog.KObj(pod)).Error(err, "failed getting model inference requirements")
+		} else {
+			schedulablePods = append(schedulablePods, pod)
+		}
+	}
+	return schedulablePods, nil
+}
+
 func validateNodeSelector(ctx context.Context, p *corev1.Pod) (errs error) {
 	terms := lo.MapToSlice(p.Spec.NodeSelector, func(k string, v string) corev1.NodeSelectorTerm {
 		return corev1.NodeSelectorTerm{
diff --git a/pkg/controllers/provisioning/scheduling/modelinference.go b/pkg/controllers/provisioning/scheduling/modelinference.go
new file mode 100644
index 0000000000..afbb45b3c4
--- /dev/null
+++ b/pkg/controllers/provisioning/scheduling/modelinference.go
@@ -0,0 +1,181 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package scheduling
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/awslabs/operatorpkg/serrors"
+	"github.com/samber/lo"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/types"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/klog/v2"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	"sigs.k8s.io/karpenter/pkg/utils/pretty"
+
+	llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
+	llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
+)
+
+func init() {
+	// Add support for llmaz CRDs.
+	utilruntime.Must(llmazcoreapi.AddToScheme(scheme.Scheme))
+	utilruntime.Must(llmazinferenceapi.AddToScheme(scheme.Scheme))
+}
+
+func NewModelInference(kubeClient client.Client) *ModelInference {
+	return &ModelInference{kubeClient: kubeClient}
+}
+
+type ModelInference struct {
+	kubeClient client.Client
+}
+
+func (m *ModelInference) Inject(ctx context.Context, pod *v1.Pod) error {
+	flavors, err := m.getInferenceFlavors(ctx, pod)
+	if err != nil {
+		return err
+	}
+
+	kept, rejected := lo.FilterReject(flavors, func(flavor llmazcoreapi.Flavor, _ int) bool {
+		return len(flavor.NodeSelector) > 0
+	})
+	if len(rejected) > 0 || len(kept) == 0 {
+		return nil
+	}
+
+	if pod.Spec.Affinity == nil {
+		pod.Spec.Affinity = &v1.Affinity{}
+	}
+	if pod.Spec.Affinity.NodeAffinity == nil {
+		pod.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{}
+	}
+	if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
+		pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{}
+	}
+	if len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 {
+		pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []v1.NodeSelectorTerm{{}}
+	}
+
+	podCopy := pod.DeepCopy()
+	pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = nil
+
+	// Add the inference flavor requirements to the pod's node affinity. This causes it to be OR'd with every merged requirement,
+	// so that relaxation employs our flavor requirements according to the orders of the merged flavors,
+	// when no existing node, in-flight node claim, or node pool can satisfy the current flavor requirements.
+	lo.ForEach(kept, func(flavor llmazcoreapi.Flavor, _ int) {
+		matchExpressions := lo.MapToSlice(flavor.NodeSelector, func(key string, value string) v1.NodeSelectorRequirement {
+			return v1.NodeSelectorRequirement{
+				Key:      key,
+				Operator: v1.NodeSelectorOpIn,
+				Values:   []string{value},
+			}
+		})
+		// We add our inference requirement to every node selector term.  This causes it to be AND'd with every existing
+		// requirement so that relaxation won't remove our inference requirement.
+		nodeSelectorTermsCopy := podCopy.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.DeepCopy().NodeSelectorTerms
+		for i := 0; i < len(nodeSelectorTermsCopy); i++ {
+			nodeSelectorTermsCopy[i].MatchExpressions = append(nodeSelectorTermsCopy[i].MatchExpressions, matchExpressions...)
+		}
+		log.FromContext(ctx).
+			WithValues("Pod", klog.KObj(pod)).
+			V(1).Info(fmt.Sprintf("adding requirements derived from pod's inference flavor %q, %s", flavor.Name, matchExpressions))
+		pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, nodeSelectorTermsCopy...)
+	})
+
+	log.FromContext(ctx).
+		WithValues("Pod", klog.KObj(pod)).
+		V(1).Info(fmt.Sprintf("adding requirements derived from pod's inference flavors, %s", pretty.Concise(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution)))
+
+	return nil
+}
+
+func (m *ModelInference) getInferenceFlavors(ctx context.Context, pod *v1.Pod) ([]llmazcoreapi.Flavor, error) {
+	modelName, ok := pod.Labels[llmazcoreapi.ModelNameLabelKey]
+	if !ok {
+		// Ignore the pod that is not created via llmaz's inference service.
+		return nil, nil
+	}
+
+	model := &llmazcoreapi.OpenModel{}
+	if err := m.kubeClient.Get(ctx, types.NamespacedName{Name: modelName}, model); err != nil {
+		return nil, fmt.Errorf("getting open model %q, %w", modelName, err)
+	}
+	modelFlavors := lo.FromPtrOr(model.Spec.InferenceConfig, llmazcoreapi.InferenceConfig{}).Flavors
+
+	serviceFlavorRawStr, ok := pod.Annotations[llmazinferenceapi.InferenceServiceFlavorsAnnoKey]
+	if !ok {
+		// Not all inference pods specify the inference service flavors.
+		return modelFlavors, nil
+	}
+
+	modelFlavorMap := lo.SliceToMap(modelFlavors, func(flavor llmazcoreapi.Flavor) (llmazcoreapi.FlavorName, llmazcoreapi.Flavor) {
+		return flavor.Name, flavor
+	})
+
+	var result []llmazcoreapi.Flavor
+	for _, flavorNameVal := range strings.Split(serviceFlavorRawStr, ",") {
+		flavor, ok := modelFlavorMap[llmazcoreapi.FlavorName(flavorNameVal)]
+		if !ok {
+			return nil, fmt.Errorf("unknown service inference flavor %q", flavorNameVal)
+		}
+		result = append(result, flavor)
+	}
+	return result, nil
+}
+
+func (m *ModelInference) ValidateInferenceFlavors(ctx context.Context, pod *v1.Pod) (err error) {
+	modelName, ok := pod.Labels[llmazcoreapi.ModelNameLabelKey]
+	if !ok {
+		// Ignore the pod that is not created via llmaz's inference service.
+		return nil
+	}
+
+	model := &llmazcoreapi.OpenModel{}
+	if err := m.kubeClient.Get(ctx, types.NamespacedName{Name: modelName}, model); err != nil {
+		return serrors.Wrap(fmt.Errorf("failed to validate open model, %w", err), "OpenModel", klog.KRef("", modelName))
+	}
+
+	serviceFlavorRawStr, ok := pod.Annotations[llmazinferenceapi.InferenceServiceFlavorsAnnoKey]
+	if !ok {
+		// Not all inference pods specify the inference service flavors.
+		return nil
+	}
+
+	// Get all flavors from the model and check if the service flavors are valid.
+	allFlavors := lo.SliceToMap(
+		lo.FromPtrOr(model.Spec.InferenceConfig, llmazcoreapi.InferenceConfig{}).Flavors,
+		func(flavor llmazcoreapi.Flavor) (llmazcoreapi.FlavorName, llmazcoreapi.Flavor) {
+			return flavor.Name, flavor
+		},
+	)
+	unknownFlavors := lo.Reject(strings.Split(serviceFlavorRawStr, ","), func(flavor string, _ int) bool {
+		return lo.HasKey(allFlavors, llmazcoreapi.FlavorName(flavor))
+	})
+
+	if len(unknownFlavors) > 0 {
+		err = serrors.Wrap(fmt.Errorf("unknown service inference flavors, %v", unknownFlavors), "OpenModel", klog.KRef("", modelName))
+		return err
+	}
+	return nil
+}
diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go
index 2d43e51018..fa2dd6eda3 100644
--- a/pkg/controllers/provisioning/suite_test.go
+++ b/pkg/controllers/provisioning/suite_test.go
@@ -49,9 +49,13 @@ import (
 	"sigs.k8s.io/karpenter/pkg/operator/options"
 	"sigs.k8s.io/karpenter/pkg/scheduling"
 	"sigs.k8s.io/karpenter/pkg/test"
+	testcrds "sigs.k8s.io/karpenter/pkg/test/crds"
 	. "sigs.k8s.io/karpenter/pkg/test/expectations"
 	"sigs.k8s.io/karpenter/pkg/test/v1alpha1"
 	. "sigs.k8s.io/karpenter/pkg/utils/testing"
+
+	llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
+	llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 )
 
 var (
@@ -73,7 +77,7 @@ func TestAPIs(t *testing.T) {
 }
 
 var _ = BeforeSuite(func() {
-	env = test.NewEnvironment(test.WithCRDs(apis.CRDs...), test.WithCRDs(v1alpha1.CRDs...))
+	env = test.NewEnvironment(test.WithCRDs(apis.CRDs...), test.WithCRDs(v1alpha1.CRDs...), test.WithCRDs(testcrds.CRDs...))
 	ctx = options.ToContext(ctx, test.Options())
 	cloudProvider = fake.NewCloudProvider()
 	fakeClock = clock.NewFakeClock(time.Now())
@@ -2583,6 +2587,264 @@ var _ = Describe("Provisioning", func() {
 			})
 		})
 	})
+
+	Context("Model Inference Requirements", func() {
+		It("should not schedule if the model is missing", func() {
+			ExpectApplied(ctx, env.Client, test.NodePool())
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: "unknown",
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			ExpectMetricGaugeValue(pscheduling.IgnoredPodCount, 1, nil)
+			ExpectNotScheduled(ctx, env.Client, pod)
+		})
+		It("should schedule with model if the model does not have an inference flavor", func() {
+			model := test.OpenModel(test.OpenModelOptions{})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			ExpectScheduled(ctx, env.Client, pod)
+		})
+		It("should not schedule if the inference flavor annotation is incompatible between model and inference service", func() {
+			model := test.OpenModel(test.OpenModelOptions{})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+					Annotations: map[string]string{
+						llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "unknown",
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			ExpectMetricGaugeValue(pscheduling.IgnoredPodCount, 1, nil)
+			ExpectNotScheduled(ctx, env.Client, pod)
+		})
+		It("should schedule to target instance type if the model has the flavor", func() {
+			model := test.OpenModel(test.OpenModelOptions{
+				Flavors: []llmazcoreapi.Flavor{
+					{
+						Name: "test-flavor",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type",
+						},
+					},
+				},
+			})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			node := ExpectScheduled(ctx, env.Client, pod)
+			Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-instance-type"))
+		})
+		It("should schedule to target instance type if inference service specifies the flavor and model has multiple flavors", func() {
+			model := test.OpenModel(test.OpenModelOptions{
+				Flavors: []llmazcoreapi.Flavor{
+					{
+						Name: "test-flavor-1",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type",
+						},
+					},
+					{
+						Name: "test-flavor-2",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type",
+						},
+					},
+				},
+			})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+					Annotations: map[string]string{
+						llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "test-flavor-2",
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			node := ExpectScheduled(ctx, env.Client, pod)
+			Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type"))
+		})
+		It("should schedule to first available instance type if some inference flavors are not supported by node pools", func() {
+			model := test.OpenModel(test.OpenModelOptions{
+				Flavors: []llmazcoreapi.Flavor{
+					{
+						Name: "test-flavor-1",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "unavailable",
+						},
+					},
+					{
+						Name: "test-flavor-2",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type",
+						},
+					},
+				},
+			})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			node := ExpectScheduled(ctx, env.Client, pod)
+			Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type"))
+		})
+		It("shouldn't schedule to the in-flight node claim even if the node claim is compatible with second inference flavor when the first inference flavor is supported by node pools", func() {
+			model := test.OpenModel(test.OpenModelOptions{
+				Flavors: []llmazcoreapi.Flavor{
+					{
+						Name: "test-flavor-1",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type",
+						},
+					},
+					{
+						Name: "test-flavor-2",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type",
+						},
+					},
+				},
+			})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod1 := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+					Annotations: map[string]string{
+						llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "test-flavor-2",
+					},
+				},
+			})
+			pod2 := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod1, pod2)
+			node1 := ExpectScheduled(ctx, env.Client, pod1)
+			Expect(node1.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type"))
+			node2 := ExpectScheduled(ctx, env.Client, pod2)
+			Expect(node2.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-instance-type"))
+		})
+		It("should schedule to the in-flight node claim if the node claim is compatible with second inference flavor and the first inference flavor is unsupported by node pools", func() {
+			model := test.OpenModel(test.OpenModelOptions{
+				Flavors: []llmazcoreapi.Flavor{
+					{
+						Name: "test-flavor-1",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "unavailable",
+						},
+					},
+					{
+						Name: "test-flavor-2",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type",
+						},
+					},
+				},
+			})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+			pod1 := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+					Annotations: map[string]string{
+						llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "test-flavor-2",
+					},
+				},
+			})
+			pod2 := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+				},
+			})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod1, pod2)
+			node1 := ExpectScheduled(ctx, env.Client, pod1)
+			Expect(node1.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type"))
+			node2 := ExpectScheduled(ctx, env.Client, pod2)
+			Expect(node2.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type"))
+			Expect(node2.Name).To(Equal(node1.Name))
+		})
+		It("should not relax an added service inference node-selector away", func() {
+			model := test.OpenModel(test.OpenModelOptions{
+				Flavors: []llmazcoreapi.Flavor{
+					{
+						Name: "test-flavor",
+						NodeSelector: map[string]string{
+							corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type",
+						},
+					},
+				},
+			})
+			ExpectApplied(ctx, env.Client, test.NodePool(), model)
+
+			pod := test.UnschedulablePod(test.PodOptions{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						llmazcoreapi.ModelNameLabelKey: model.Name,
+					},
+				},
+				NodeRequirements: []corev1.NodeSelectorRequirement{
+					{
+						Key:      "example.com/label",
+						Operator: corev1.NodeSelectorOpIn,
+						Values:   []string{"unsupported"},
+					},
+				},
+			})
+
+			// Add the second capacity type that is OR'd with the first. Previously we only added the service inference requirement
+			// to a single node selector term which would sometimes get relaxed away.  Now we add it to all of them to AND
+			// it with each existing term.
+			pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms,
+				corev1.NodeSelectorTerm{
+					MatchExpressions: []corev1.NodeSelectorRequirement{
+						{
+							Key:      v1.CapacityTypeLabelKey,
+							Operator: corev1.NodeSelectorOpIn,
+							Values:   []string{v1.CapacityTypeOnDemand},
+						},
+					},
+				})
+			ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
+			node := ExpectScheduled(ctx, env.Client, pod)
+			Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-instance-type"))
+		})
+	})
 })
 
 func ExpectNodeClaimRequirements(nodeClaim *v1.NodeClaim, requirements ...corev1.NodeSelectorRequirement) {
diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go
index 3885d5ce13..c45050ebb0 100644
--- a/pkg/operator/operator.go
+++ b/pkg/operator/operator.go
@@ -60,6 +60,8 @@ import (
 	"sigs.k8s.io/karpenter/pkg/operator/logging"
 	"sigs.k8s.io/karpenter/pkg/operator/options"
 	"sigs.k8s.io/karpenter/pkg/utils/env"
+
+	llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 )
 
 const (
@@ -192,7 +194,7 @@ func NewOperator() (context.Context, *Operator) {
 		return lo.Ternary(mgr.GetCache().WaitForCacheSync(req.Context()), nil, fmt.Errorf("failed to sync caches"))
 	}))
 	lo.Must0(mgr.AddReadyzCheck("crd", func(_ *http.Request) error {
-		objects := []client.Object{&v1.NodePool{}, &v1.NodeClaim{}}
+		objects := []client.Object{&v1.NodePool{}, &v1.NodeClaim{}, &llmazcoreapi.OpenModel{}}
 		for _, obj := range objects {
 			gvk, err := apiutil.GVKForObject(obj, scheme.Scheme)
 			if err != nil {
diff --git a/pkg/test/crds/apis.go b/pkg/test/crds/apis.go
new file mode 100644
index 0000000000..d2e975dca4
--- /dev/null
+++ b/pkg/test/crds/apis.go
@@ -0,0 +1,32 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package crds
+
+import (
+	_ "embed"
+
+	"github.com/awslabs/operatorpkg/object"
+	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+)
+
+var (
+	//go:embed openmodel-crd.yaml
+	OpenModelCRD []byte
+	CRDs         = []*apiextensionsv1.CustomResourceDefinition{
+		object.Unmarshal[apiextensionsv1.CustomResourceDefinition](OpenModelCRD),
+	}
+)
diff --git a/pkg/test/crds/openmodel-crd.yaml b/pkg/test/crds/openmodel-crd.yaml
new file mode 100644
index 0000000000..001f81004a
--- /dev/null
+++ b/pkg/test/crds/openmodel-crd.yaml
@@ -0,0 +1,243 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.1
+  name: openmodels.llmaz.io
+spec:
+  conversion:
+    strategy: Webhook
+    webhook:
+      clientConfig:
+        service:
+          name: llmaz-webhook-service
+          namespace: llmaz-system
+          path: /convert
+      conversionReviewVersions:
+      - v1
+  group: llmaz.io
+  names:
+    kind: OpenModel
+    listKind: OpenModelList
+    plural: openmodels
+    shortNames:
+    - om
+    singular: openmodel
+  scope: Cluster
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: OpenModel is the Schema for the open models API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: ModelSpec defines the desired state of Model
+            properties:
+              familyName:
+                description: |-
+                  FamilyName represents the model type, like llama2, which will be auto injected
+                  to the labels with the key of `llmaz.io/model-family-name`.
+                type: string
+              inferenceConfig:
+                description: InferenceConfig represents the inference configurations
+                  for the model.
+                properties:
+                  flavors:
+                    description: |-
+                      Flavors represents the accelerator requirements to serve the model.
+                      Flavors are fungible following the priority represented by the slice order.
+                    items:
+                      description: |-
+                        Flavor defines the accelerator requirements for a model and the necessary parameters
+                        in autoscaling. Right now, it will be used in two places:
+                        - Pod scheduling with node selectors specified.
+                        - Cluster autoscaling with essential parameters provided.
+                      properties:
+                        limits:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Limits defines the required accelerators to serve the model for each replica,
+                            like <nvidia.com/gpu: 8>. For multi-hosts cases, the limits here indicates
+                            the resource requirements for each replica, usually equals to the TP size.
+                            Not recommended to set the cpu and memory usage here:
+                            - if using playground, you can define the cpu/mem usage at backendConfig.
+                            - if using inference service, you can define the cpu/mem at the container resources.
+                            However, if you define the same accelerator resources at playground/service as well,
+                            the resources will be overwritten by the flavor limit here.
+                          type: object
+                        name:
+                          description: Name represents the flavor name, which will
+                            be used in model claim.
+                          type: string
+                        nodeSelector:
+                          additionalProperties:
+                            type: string
+                          description: |-
+                            NodeSelector represents the node candidates for Pod placements, if a node doesn't
+                            meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin.
+                            If nodeSelector is empty, it means every node is a candidate.
+                          type: object
+                        params:
+                          additionalProperties:
+                            type: string
+                          description: |-
+                            Params stores other useful parameters and will be consumed by cluster-autoscaler / Karpenter
+                            for autoscaling or be defined as model parallelism parameters like TP or PP size.
+                            E.g. with autoscaling, when scaling up nodes with 8x Nvidia A00, the parameter can be injected
+                            with <INSTANCE-TYPE: p4d.24xlarge> for AWS.
+                            Preset parameters: TP, PP, INSTANCE-TYPE.
+                          type: object
+                      required:
+                      - name
+                      type: object
+                    maxItems: 8
+                    type: array
+                type: object
+              source:
+                description: |-
+                  Source represents the source of the model, there're several ways to load
+                  the model such as loading from huggingface, OCI registry, s3, host path and so on.
+                properties:
+                  modelHub:
+                    description: ModelHub represents the model registry for model
+                      downloads.
+                    properties:
+                      allowPatterns:
+                        description: AllowPatterns refers to files matched with at
+                          least one pattern will be downloaded.
+                        items:
+                          type: string
+                        type: array
+                      filename:
+                        description: |-
+                          Filename refers to a specified model file rather than the whole repo.
+                          This is helpful to download a specified GGUF model rather than downloading
+                          the whole repo which includes all kinds of quantized models.
+                          in the near future.
+                          Note: once filename is set, allowPatterns and ignorePatterns should be left unset.
+                        type: string
+                      ignorePatterns:
+                        description: IgnorePatterns refers to files matched with any
+                          of the patterns will not be downloaded.
+                        items:
+                          type: string
+                        type: array
+                      modelID:
+                        description: |-
+                          ModelID refers to the model identifier on model hub,
+                          such as meta-llama/Meta-Llama-3-8B.
+                        type: string
+                      name:
+                        default: Huggingface
+                        description: Name refers to the model registry, such as huggingface.
+                        enum:
+                        - Huggingface
+                        - ModelScope
+                        type: string
+                      revision:
+                        default: main
+                        description: Revision refers to a Git revision id which can
+                          be a branch name, a tag, or a commit hash.
+                        type: string
+                    type: object
+                  uri:
+                    description: |-
+                      URI represents a various kinds of model sources following the uri protocol, protocol://<address>, e.g.
+                      - oss://<bucket>.<endpoint>/<path-to-your-model>
+                      - ollama://llama3.3
+                      - host://<path-to-your-model>
+                    type: string
+                type: object
+            required:
+            - familyName
+            - source
+            type: object
+          status:
+            description: ModelStatus defines the observed state of Model
+            properties:
+              conditions:
+                description: Conditions represents the Inference condition.
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+
diff --git a/pkg/test/openmodel.go b/pkg/test/openmodel.go
new file mode 100644
index 0000000000..d897c02339
--- /dev/null
+++ b/pkg/test/openmodel.go
@@ -0,0 +1,50 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package test
+
+import (
+	"fmt"
+
+	"github.com/imdario/mergo"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
+)
+
+type OpenModelOptions struct {
+	metav1.ObjectMeta
+
+	Flavors []llmazcoreapi.Flavor
+}
+
+func OpenModel(overrides ...OpenModelOptions) *llmazcoreapi.OpenModel {
+	options := OpenModelOptions{}
+	for _, opts := range overrides {
+		if err := mergo.Merge(&options, opts, mergo.WithOverride); err != nil {
+			panic(fmt.Sprintf("Failed to merge options: %s", err))
+		}
+	}
+
+	return &llmazcoreapi.OpenModel{
+		ObjectMeta: ObjectMeta(options.ObjectMeta),
+		Spec: llmazcoreapi.ModelSpec{
+			InferenceConfig: &llmazcoreapi.InferenceConfig{
+				Flavors: options.Flavors,
+			},
+		},
+	}
+}