diff --git a/.github/workflows/aws.yaml b/.github/workflows/aws.yaml new file mode 100644 index 0000000000..4991a70195 --- /dev/null +++ b/.github/workflows/aws.yaml @@ -0,0 +1,100 @@ +name: Build and Push AWS Karpenter Provider Image + +on: + workflow_dispatch: + push: + branches: + - main + - release-* + +jobs: + build: + runs-on: ubuntu-latest + + env: + KO_DOCKER_REPO: docker.io/inftyai/karpenter-provider-aws + + steps: + - name: Checkout forked karpenter + uses: actions/checkout@v4 + + - name: Set up Go 1.24 + uses: actions/setup-go@v5 + with: + go-version: "1.24" + + - name: Generate commit info and image tag + id: tag + run: | + BRANCH="${GITHUB_REF##*/}" + COMMIT=$(git rev-parse HEAD) + TIMESTAMP=$(git show -s --format=%ct "$COMMIT") + VERSION_DATE=$(date -u -d "@$TIMESTAMP" +'%Y%m%d%H%M%S') + PSEUDO_VERSION="v0.0.0-${VERSION_DATE}-${COMMIT:0:12}" + + if [[ "$BRANCH" == "main" ]]; then + TAG="latest" + IMAGE_TAG="latest" + elif [[ "$BRANCH" == release-* ]]; then + TAG="${BRANCH#release-}" # e.g. v0.36.2 + IMAGE_TAG="${TAG#v}" # e.g. 0.36.2 + else + TAG="fork-${PSEUDO_VERSION}" + IMAGE_TAG="${TAG}" # keep full tag + fi + + { + echo "commit=$COMMIT" + echo "version=$PSEUDO_VERSION" + echo "tag=$TAG" + echo "image_tag=$IMAGE_TAG" + } >> "$GITHUB_OUTPUT" + echo "✅ Using image tag: $IMAGE_TAG" + + - name: Clone karpenter-provider-aws + run: | + git clone https://github.com/aws/karpenter-provider-aws.git + cd karpenter-provider-aws + + TAG="${{ steps.tag.outputs.tag }}" + if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "🔄 Attempting to checkout provider tag: $TAG" + if git rev-parse "refs/tags/$TAG" >/dev/null 2>&1; then + git checkout "tags/$TAG" -b "build-from-tag-$TAG" + else + echo "❌ Tag '$TAG' not found in karpenter-provider-aws repo." + exit 1 + fi + else + echo "🔄 Checking out provider branch: main" + git checkout main + fi + + - name: Replace karpenter module with forked commit version + run: | + cd karpenter-provider-aws + go mod edit -replace sigs.k8s.io/karpenter=github.com/InftyAI/karpenter@${{ steps.tag.outputs.version }} + go mod tidy + + - name: Install build tools via make toolchain + run: | + cd karpenter-provider-aws + make toolchain + + - name: Login to DockerHub + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push image using ko + run: | + cd karpenter-provider-aws + ko build --bare \ + --tags ${{ steps.tag.outputs.image_tag }} \ + github.com/aws/karpenter-provider-aws/cmd/controller + + - name: Show pushed image + run: | + echo "✅ Image pushed to:" + echo "${{ env.KO_DOCKER_REPO }}:${{ steps.tag.outputs.image_tag }}" diff --git a/go.mod b/go.mod index 489ec88943..34f1b92b24 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/docker/docker v28.2.2+incompatible github.com/go-logr/logr v1.4.3 github.com/imdario/mergo v0.3.16 + github.com/inftyai/llmaz v0.1.4 github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.37.0 @@ -20,12 +21,12 @@ require ( go.uber.org/zap v1.27.0 golang.org/x/text v0.26.0 golang.org/x/time v0.12.0 - k8s.io/api v0.32.3 - k8s.io/apiextensions-apiserver v0.32.3 - k8s.io/apimachinery v0.32.3 - k8s.io/client-go v0.32.3 + k8s.io/api v0.32.5 + k8s.io/apiextensions-apiserver v0.32.5 + k8s.io/apimachinery v0.32.5 + k8s.io/client-go v0.32.5 k8s.io/cloud-provider v0.32.3 - k8s.io/component-base v0.32.3 + k8s.io/component-base v0.32.5 k8s.io/csi-translation-lib v0.32.3 k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 @@ -36,12 +37,12 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/emicklei/go-restful/v3 v3.12.0 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-logr/zapr v1.3.0 github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -77,7 +78,7 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect sigs.k8s.io/yaml v1.4.0 ) @@ -87,6 +88,7 @@ require ( github.com/rogpeppe/go-internal v1.13.1 // indirect go.uber.org/automaxprocs v1.6.0 // indirect golang.org/x/sync v0.15.0 // indirect + sigs.k8s.io/lws v0.5.1 // indirect ) retract ( diff --git a/go.sum b/go.sum index 6c96ac4392..f83519426a 100644 --- a/go.sum +++ b/go.sum @@ -9,15 +9,14 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw= github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.12.0 h1:y2DdzBAURM29NFF94q6RaY4vjIH1rtwDapwQtU84iWk= +github.com/emicklei/go-restful/v3 v3.12.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= @@ -30,12 +29,10 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= @@ -62,6 +59,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/inftyai/llmaz v0.1.4 h1:8loib3UMUxbETDchYkYhKrBV/6SmmNxRrJtiiBI0YE4= +github.com/inftyai/llmaz v0.1.4/go.mod h1:Em0c6giN3lPfjLbxdjjeXMHVwa6lM4XvUm6NlqKAGxg= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -70,11 +69,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= @@ -124,12 +120,7 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= @@ -196,21 +187,20 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.32.3 h1:Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls= -k8s.io/api v0.32.3/go.mod h1:2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k= -k8s.io/apiextensions-apiserver v0.32.3 h1:4D8vy+9GWerlErCwVIbcQjsWunF9SUGNu7O7hiQTyPY= -k8s.io/apiextensions-apiserver v0.32.3/go.mod h1:8YwcvVRMVzw0r1Stc7XfGAzB/SIVLunqApySV5V7Dss= -k8s.io/apimachinery v0.32.3 h1:JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U= -k8s.io/apimachinery v0.32.3/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= -k8s.io/client-go v0.32.3 h1:RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU= -k8s.io/client-go v0.32.3/go.mod h1:3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY= +k8s.io/api v0.32.5 h1:uqjjsYo1kTJr5NIcoIaP9F+TgXgADH7nKQx91FDAhtk= +k8s.io/api v0.32.5/go.mod h1:bXXFU3fGCZ/eFMZvfHZC69PeGbXEL4zzjuPVzOxHF64= +k8s.io/apiextensions-apiserver v0.32.5 h1:o0aKvmzIIs8Uk54pidk32pxET+Pg2ULnh9WI1PuKTwE= +k8s.io/apiextensions-apiserver v0.32.5/go.mod h1:5fpedJa3HJJFBukAZ6ur91DEDye5gYuXISPbOiNLYpU= +k8s.io/apimachinery v0.32.5 h1:6We3aJ6crC0ap8EhsEXcgX3LpI6SEjubpiOMXLROwPM= +k8s.io/apimachinery v0.32.5/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= +k8s.io/client-go v0.32.5 h1:huFmQMzgWu0z4kbWsuZci+Gt4Fo72I4CcrvhToZ/Qp0= +k8s.io/client-go v0.32.5/go.mod h1:Qchw6f9WIVrur7DKojAHpRgGLcANT0RLIvF39Jz58xA= k8s.io/cloud-provider v0.32.3 h1:WC7KhWrqXsU4b0E4tjS+nBectGiJbr1wuc1TpWXvtZM= k8s.io/cloud-provider v0.32.3/go.mod h1:/fwBfgRPuh16n8vLHT+PPT+Bc4LAEaJYj38opO2wsYY= -k8s.io/component-base v0.32.3 h1:98WJvvMs3QZ2LYHBzvltFSeJjEx7t5+8s71P7M74u8k= -k8s.io/component-base v0.32.3/go.mod h1:LWi9cR+yPAv7cu2X9rZanTiFKB2kHA+JjmhkKjCZRpI= +k8s.io/component-base v0.32.5 h1:2HiX+m3s9Iz5CMqdCVDH2V942UqzQvjuhcXb4W+KCsg= +k8s.io/component-base v0.32.5/go.mod h1:jDsPNFFElv9m27TcYxlpEX7TZ3vdgx2g4PaqMUHpV/Y= k8s.io/csi-translation-lib v0.32.3 h1:fKdc9LMVEMk18xsgoPm1Ga8GjfhI7AM3UX8gnIeXZKs= k8s.io/csi-translation-lib v0.32.3/go.mod h1:VX6+hCKgQyFnUX3VrnXZAgYYBXkrqx4BZk9vxr9qRcE= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= @@ -223,7 +213,11 @@ sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+ sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= -sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA= -sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= +sigs.k8s.io/lws v0.5.1 h1:eaeMNkP0manRluQZLN32atoULaGrzP611gSLdFaHZs4= +sigs.k8s.io/lws v0.5.1/go.mod h1:qprXSTTFnfmPZY3V3sUfk6ZPmAodsdoKS8XVElJ9kN0= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016 h1:kXv6kKdoEtedwuqMmkqhbkgvYKeycVbC8+iPCP9j5kQ= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI= +sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/kwok/charts/templates/clusterrole.yaml b/kwok/charts/templates/clusterrole.yaml index 82a402e0f4..0b9b9a5d26 100644 --- a/kwok/charts/templates/clusterrole.yaml +++ b/kwok/charts/templates/clusterrole.yaml @@ -47,6 +47,9 @@ rules: - apiGroups: ["policy"] resources: ["poddisruptionbudgets"] verbs: ["get", "list", "watch"] + - apiGroups: ["llmaz.io"] + resources: ["openmodels"] + verbs: ["get", "list", "watch"] # Write - apiGroups: ["karpenter.sh"] resources: ["nodeclaims", "nodeclaims/status"] diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index 2cf733967c..6844ec9dd2 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -79,6 +79,7 @@ type Provisioner struct { kubeClient client.Client batcher *Batcher[types.UID] volumeTopology *scheduler.VolumeTopology + modelInference *scheduler.ModelInference cluster *state.Cluster recorder events.Recorder cm *pretty.ChangeMonitor @@ -94,6 +95,7 @@ func NewProvisioner(kubeClient client.Client, recorder events.Recorder, cloudProvider: cloudProvider, kubeClient: kubeClient, volumeTopology: scheduler.NewVolumeTopology(kubeClient), + modelInference: scheduler.NewModelInference(kubeClient), cluster: cluster, recorder: recorder, cm: pretty.NewChangeMonitor(), @@ -266,6 +268,12 @@ func (p *Provisioner) NewScheduler( return nil, fmt.Errorf("injecting volume topology requirements, %w", err) } + // inject model inference requirements + pods, err = p.injectInferenceFlavorRequirements(ctx, pods) + if err != nil { + return nil, fmt.Errorf("injecting model inference requirements, %w", err) + } + // Calculate cluster topology, if a context error occurs, it is wrapped and returned topology, err := scheduler.NewTopology(ctx, p.kubeClient, p.cluster, stateNodes, nodePools, instanceTypes, pods, opts...) if err != nil { @@ -471,6 +479,7 @@ func (p *Provisioner) Validate(ctx context.Context, pod *corev1.Pod) error { validateNodeSelector(ctx, pod), validateAffinity(ctx, pod), p.volumeTopology.ValidatePersistentVolumeClaims(ctx, pod), + p.modelInference.ValidateInferenceFlavors(ctx, pod), ) } @@ -500,6 +509,21 @@ func (p *Provisioner) injectVolumeTopologyRequirements(ctx context.Context, pods return schedulablePods, nil } +func (p *Provisioner) injectInferenceFlavorRequirements(ctx context.Context, pods []*corev1.Pod) ([]*corev1.Pod, error) { + var schedulablePods []*corev1.Pod + for _, pod := range pods { + if err := p.modelInference.Inject(ctx, pod); err != nil { + if errors.Is(err, context.DeadlineExceeded) { + return nil, err + } + log.FromContext(ctx).WithValues("Pod", klog.KObj(pod)).Error(err, "failed getting model inference requirements") + } else { + schedulablePods = append(schedulablePods, pod) + } + } + return schedulablePods, nil +} + func validateNodeSelector(ctx context.Context, p *corev1.Pod) (errs error) { terms := lo.MapToSlice(p.Spec.NodeSelector, func(k string, v string) corev1.NodeSelectorTerm { return corev1.NodeSelectorTerm{ diff --git a/pkg/controllers/provisioning/scheduling/modelinference.go b/pkg/controllers/provisioning/scheduling/modelinference.go new file mode 100644 index 0000000000..afbb45b3c4 --- /dev/null +++ b/pkg/controllers/provisioning/scheduling/modelinference.go @@ -0,0 +1,181 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduling + +import ( + "context" + "fmt" + "strings" + + "github.com/awslabs/operatorpkg/serrors" + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + "sigs.k8s.io/karpenter/pkg/utils/pretty" + + llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1" + llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1" +) + +func init() { + // Add support for llmaz CRDs. + utilruntime.Must(llmazcoreapi.AddToScheme(scheme.Scheme)) + utilruntime.Must(llmazinferenceapi.AddToScheme(scheme.Scheme)) +} + +func NewModelInference(kubeClient client.Client) *ModelInference { + return &ModelInference{kubeClient: kubeClient} +} + +type ModelInference struct { + kubeClient client.Client +} + +func (m *ModelInference) Inject(ctx context.Context, pod *v1.Pod) error { + flavors, err := m.getInferenceFlavors(ctx, pod) + if err != nil { + return err + } + + kept, rejected := lo.FilterReject(flavors, func(flavor llmazcoreapi.Flavor, _ int) bool { + return len(flavor.NodeSelector) > 0 + }) + if len(rejected) > 0 || len(kept) == 0 { + return nil + } + + if pod.Spec.Affinity == nil { + pod.Spec.Affinity = &v1.Affinity{} + } + if pod.Spec.Affinity.NodeAffinity == nil { + pod.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{} + } + if pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{} + } + if len(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 { + pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []v1.NodeSelectorTerm{{}} + } + + podCopy := pod.DeepCopy() + pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = nil + + // Add the inference flavor requirements to the pod's node affinity. This causes it to be OR'd with every merged requirement, + // so that relaxation employs our flavor requirements according to the orders of the merged flavors, + // when no existing node, in-flight node claim, or node pool can satisfy the current flavor requirements. + lo.ForEach(kept, func(flavor llmazcoreapi.Flavor, _ int) { + matchExpressions := lo.MapToSlice(flavor.NodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{ + Key: key, + Operator: v1.NodeSelectorOpIn, + Values: []string{value}, + } + }) + // We add our inference requirement to every node selector term. This causes it to be AND'd with every existing + // requirement so that relaxation won't remove our inference requirement. + nodeSelectorTermsCopy := podCopy.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.DeepCopy().NodeSelectorTerms + for i := 0; i < len(nodeSelectorTermsCopy); i++ { + nodeSelectorTermsCopy[i].MatchExpressions = append(nodeSelectorTermsCopy[i].MatchExpressions, matchExpressions...) + } + log.FromContext(ctx). + WithValues("Pod", klog.KObj(pod)). + V(1).Info(fmt.Sprintf("adding requirements derived from pod's inference flavor %q, %s", flavor.Name, matchExpressions)) + pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, nodeSelectorTermsCopy...) + }) + + log.FromContext(ctx). + WithValues("Pod", klog.KObj(pod)). + V(1).Info(fmt.Sprintf("adding requirements derived from pod's inference flavors, %s", pretty.Concise(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution))) + + return nil +} + +func (m *ModelInference) getInferenceFlavors(ctx context.Context, pod *v1.Pod) ([]llmazcoreapi.Flavor, error) { + modelName, ok := pod.Labels[llmazcoreapi.ModelNameLabelKey] + if !ok { + // Ignore the pod that is not created via llmaz's inference service. + return nil, nil + } + + model := &llmazcoreapi.OpenModel{} + if err := m.kubeClient.Get(ctx, types.NamespacedName{Name: modelName}, model); err != nil { + return nil, fmt.Errorf("getting open model %q, %w", modelName, err) + } + modelFlavors := lo.FromPtrOr(model.Spec.InferenceConfig, llmazcoreapi.InferenceConfig{}).Flavors + + serviceFlavorRawStr, ok := pod.Annotations[llmazinferenceapi.InferenceServiceFlavorsAnnoKey] + if !ok { + // Not all inference pods specify the inference service flavors. + return modelFlavors, nil + } + + modelFlavorMap := lo.SliceToMap(modelFlavors, func(flavor llmazcoreapi.Flavor) (llmazcoreapi.FlavorName, llmazcoreapi.Flavor) { + return flavor.Name, flavor + }) + + var result []llmazcoreapi.Flavor + for _, flavorNameVal := range strings.Split(serviceFlavorRawStr, ",") { + flavor, ok := modelFlavorMap[llmazcoreapi.FlavorName(flavorNameVal)] + if !ok { + return nil, fmt.Errorf("unknown service inference flavor %q", flavorNameVal) + } + result = append(result, flavor) + } + return result, nil +} + +func (m *ModelInference) ValidateInferenceFlavors(ctx context.Context, pod *v1.Pod) (err error) { + modelName, ok := pod.Labels[llmazcoreapi.ModelNameLabelKey] + if !ok { + // Ignore the pod that is not created via llmaz's inference service. + return nil + } + + model := &llmazcoreapi.OpenModel{} + if err := m.kubeClient.Get(ctx, types.NamespacedName{Name: modelName}, model); err != nil { + return serrors.Wrap(fmt.Errorf("failed to validate open model, %w", err), "OpenModel", klog.KRef("", modelName)) + } + + serviceFlavorRawStr, ok := pod.Annotations[llmazinferenceapi.InferenceServiceFlavorsAnnoKey] + if !ok { + // Not all inference pods specify the inference service flavors. + return nil + } + + // Get all flavors from the model and check if the service flavors are valid. + allFlavors := lo.SliceToMap( + lo.FromPtrOr(model.Spec.InferenceConfig, llmazcoreapi.InferenceConfig{}).Flavors, + func(flavor llmazcoreapi.Flavor) (llmazcoreapi.FlavorName, llmazcoreapi.Flavor) { + return flavor.Name, flavor + }, + ) + unknownFlavors := lo.Reject(strings.Split(serviceFlavorRawStr, ","), func(flavor string, _ int) bool { + return lo.HasKey(allFlavors, llmazcoreapi.FlavorName(flavor)) + }) + + if len(unknownFlavors) > 0 { + err = serrors.Wrap(fmt.Errorf("unknown service inference flavors, %v", unknownFlavors), "OpenModel", klog.KRef("", modelName)) + return err + } + return nil +} diff --git a/pkg/controllers/provisioning/suite_test.go b/pkg/controllers/provisioning/suite_test.go index 2d43e51018..fa2dd6eda3 100644 --- a/pkg/controllers/provisioning/suite_test.go +++ b/pkg/controllers/provisioning/suite_test.go @@ -49,9 +49,13 @@ import ( "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/test" + testcrds "sigs.k8s.io/karpenter/pkg/test/crds" . "sigs.k8s.io/karpenter/pkg/test/expectations" "sigs.k8s.io/karpenter/pkg/test/v1alpha1" . "sigs.k8s.io/karpenter/pkg/utils/testing" + + llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1" + llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1" ) var ( @@ -73,7 +77,7 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func() { - env = test.NewEnvironment(test.WithCRDs(apis.CRDs...), test.WithCRDs(v1alpha1.CRDs...)) + env = test.NewEnvironment(test.WithCRDs(apis.CRDs...), test.WithCRDs(v1alpha1.CRDs...), test.WithCRDs(testcrds.CRDs...)) ctx = options.ToContext(ctx, test.Options()) cloudProvider = fake.NewCloudProvider() fakeClock = clock.NewFakeClock(time.Now()) @@ -2583,6 +2587,264 @@ var _ = Describe("Provisioning", func() { }) }) }) + + Context("Model Inference Requirements", func() { + It("should not schedule if the model is missing", func() { + ExpectApplied(ctx, env.Client, test.NodePool()) + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: "unknown", + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectMetricGaugeValue(pscheduling.IgnoredPodCount, 1, nil) + ExpectNotScheduled(ctx, env.Client, pod) + }) + It("should schedule with model if the model does not have an inference flavor", func() { + model := test.OpenModel(test.OpenModelOptions{}) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + }) + It("should not schedule if the inference flavor annotation is incompatible between model and inference service", func() { + model := test.OpenModel(test.OpenModelOptions{}) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + Annotations: map[string]string{ + llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "unknown", + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectMetricGaugeValue(pscheduling.IgnoredPodCount, 1, nil) + ExpectNotScheduled(ctx, env.Client, pod) + }) + It("should schedule to target instance type if the model has the flavor", func() { + model := test.OpenModel(test.OpenModelOptions{ + Flavors: []llmazcoreapi.Flavor{ + { + Name: "test-flavor", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + node := ExpectScheduled(ctx, env.Client, pod) + Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-instance-type")) + }) + It("should schedule to target instance type if inference service specifies the flavor and model has multiple flavors", func() { + model := test.OpenModel(test.OpenModelOptions{ + Flavors: []llmazcoreapi.Flavor{ + { + Name: "test-flavor-1", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type", + }, + }, + { + Name: "test-flavor-2", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + Annotations: map[string]string{ + llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "test-flavor-2", + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + node := ExpectScheduled(ctx, env.Client, pod) + Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type")) + }) + It("should schedule to first available instance type if some inference flavors are not supported by node pools", func() { + model := test.OpenModel(test.OpenModelOptions{ + Flavors: []llmazcoreapi.Flavor{ + { + Name: "test-flavor-1", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "unavailable", + }, + }, + { + Name: "test-flavor-2", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + node := ExpectScheduled(ctx, env.Client, pod) + Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type")) + }) + It("shouldn't schedule to the in-flight node claim even if the node claim is compatible with second inference flavor when the first inference flavor is supported by node pools", func() { + model := test.OpenModel(test.OpenModelOptions{ + Flavors: []llmazcoreapi.Flavor{ + { + Name: "test-flavor-1", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type", + }, + }, + { + Name: "test-flavor-2", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod1 := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + Annotations: map[string]string{ + llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "test-flavor-2", + }, + }, + }) + pod2 := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod1, pod2) + node1 := ExpectScheduled(ctx, env.Client, pod1) + Expect(node1.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type")) + node2 := ExpectScheduled(ctx, env.Client, pod2) + Expect(node2.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-instance-type")) + }) + It("should schedule to the in-flight node claim if the node claim is compatible with second inference flavor and the first inference flavor is unsupported by node pools", func() { + model := test.OpenModel(test.OpenModelOptions{ + Flavors: []llmazcoreapi.Flavor{ + { + Name: "test-flavor-1", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "unavailable", + }, + }, + { + Name: "test-flavor-2", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-b-instance-type", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + pod1 := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + Annotations: map[string]string{ + llmazinferenceapi.InferenceServiceFlavorsAnnoKey: "test-flavor-2", + }, + }, + }) + pod2 := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod1, pod2) + node1 := ExpectScheduled(ctx, env.Client, pod1) + Expect(node1.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type")) + node2 := ExpectScheduled(ctx, env.Client, pod2) + Expect(node2.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-b-instance-type")) + Expect(node2.Name).To(Equal(node1.Name)) + }) + It("should not relax an added service inference node-selector away", func() { + model := test.OpenModel(test.OpenModelOptions{ + Flavors: []llmazcoreapi.Flavor{ + { + Name: "test-flavor", + NodeSelector: map[string]string{ + corev1.LabelInstanceTypeStable: "gpu-vendor-instance-type", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, test.NodePool(), model) + + pod := test.UnschedulablePod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + llmazcoreapi.ModelNameLabelKey: model.Name, + }, + }, + NodeRequirements: []corev1.NodeSelectorRequirement{ + { + Key: "example.com/label", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"unsupported"}, + }, + }, + }) + + // Add the second capacity type that is OR'd with the first. Previously we only added the service inference requirement + // to a single node selector term which would sometimes get relaxed away. Now we add it to all of them to AND + // it with each existing term. + pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, + corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: v1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{v1.CapacityTypeOnDemand}, + }, + }, + }) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + node := ExpectScheduled(ctx, env.Client, pod) + Expect(node.Labels).To(HaveKeyWithValue(corev1.LabelInstanceTypeStable, "gpu-vendor-instance-type")) + }) + }) }) func ExpectNodeClaimRequirements(nodeClaim *v1.NodeClaim, requirements ...corev1.NodeSelectorRequirement) { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 3885d5ce13..c45050ebb0 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -60,6 +60,8 @@ import ( "sigs.k8s.io/karpenter/pkg/operator/logging" "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/utils/env" + + llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1" ) const ( @@ -192,7 +194,7 @@ func NewOperator() (context.Context, *Operator) { return lo.Ternary(mgr.GetCache().WaitForCacheSync(req.Context()), nil, fmt.Errorf("failed to sync caches")) })) lo.Must0(mgr.AddReadyzCheck("crd", func(_ *http.Request) error { - objects := []client.Object{&v1.NodePool{}, &v1.NodeClaim{}} + objects := []client.Object{&v1.NodePool{}, &v1.NodeClaim{}, &llmazcoreapi.OpenModel{}} for _, obj := range objects { gvk, err := apiutil.GVKForObject(obj, scheme.Scheme) if err != nil { diff --git a/pkg/test/crds/apis.go b/pkg/test/crds/apis.go new file mode 100644 index 0000000000..d2e975dca4 --- /dev/null +++ b/pkg/test/crds/apis.go @@ -0,0 +1,32 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package crds + +import ( + _ "embed" + + "github.com/awslabs/operatorpkg/object" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" +) + +var ( + //go:embed openmodel-crd.yaml + OpenModelCRD []byte + CRDs = []*apiextensionsv1.CustomResourceDefinition{ + object.Unmarshal[apiextensionsv1.CustomResourceDefinition](OpenModelCRD), + } +) diff --git a/pkg/test/crds/openmodel-crd.yaml b/pkg/test/crds/openmodel-crd.yaml new file mode 100644 index 0000000000..001f81004a --- /dev/null +++ b/pkg/test/crds/openmodel-crd.yaml @@ -0,0 +1,243 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: openmodels.llmaz.io +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: llmaz-webhook-service + namespace: llmaz-system + path: /convert + conversionReviewVersions: + - v1 + group: llmaz.io + names: + kind: OpenModel + listKind: OpenModelList + plural: openmodels + shortNames: + - om + singular: openmodel + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: OpenModel is the Schema for the open models API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ModelSpec defines the desired state of Model + properties: + familyName: + description: |- + FamilyName represents the model type, like llama2, which will be auto injected + to the labels with the key of `llmaz.io/model-family-name`. + type: string + inferenceConfig: + description: InferenceConfig represents the inference configurations + for the model. + properties: + flavors: + description: |- + Flavors represents the accelerator requirements to serve the model. + Flavors are fungible following the priority represented by the slice order. + items: + description: |- + Flavor defines the accelerator requirements for a model and the necessary parameters + in autoscaling. Right now, it will be used in two places: + - Pod scheduling with node selectors specified. + - Cluster autoscaling with essential parameters provided. + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits defines the required accelerators to serve the model for each replica, + like . For multi-hosts cases, the limits here indicates + the resource requirements for each replica, usually equals to the TP size. + Not recommended to set the cpu and memory usage here: + - if using playground, you can define the cpu/mem usage at backendConfig. + - if using inference service, you can define the cpu/mem at the container resources. + However, if you define the same accelerator resources at playground/service as well, + the resources will be overwritten by the flavor limit here. + type: object + name: + description: Name represents the flavor name, which will + be used in model claim. + type: string + nodeSelector: + additionalProperties: + type: string + description: |- + NodeSelector represents the node candidates for Pod placements, if a node doesn't + meet the nodeSelector, it will be filtered out in the resourceFungibility scheduler plugin. + If nodeSelector is empty, it means every node is a candidate. + type: object + params: + additionalProperties: + type: string + description: |- + Params stores other useful parameters and will be consumed by cluster-autoscaler / Karpenter + for autoscaling or be defined as model parallelism parameters like TP or PP size. + E.g. with autoscaling, when scaling up nodes with 8x Nvidia A00, the parameter can be injected + with for AWS. + Preset parameters: TP, PP, INSTANCE-TYPE. + type: object + required: + - name + type: object + maxItems: 8 + type: array + type: object + source: + description: |- + Source represents the source of the model, there're several ways to load + the model such as loading from huggingface, OCI registry, s3, host path and so on. + properties: + modelHub: + description: ModelHub represents the model registry for model + downloads. + properties: + allowPatterns: + description: AllowPatterns refers to files matched with at + least one pattern will be downloaded. + items: + type: string + type: array + filename: + description: |- + Filename refers to a specified model file rather than the whole repo. + This is helpful to download a specified GGUF model rather than downloading + the whole repo which includes all kinds of quantized models. + in the near future. + Note: once filename is set, allowPatterns and ignorePatterns should be left unset. + type: string + ignorePatterns: + description: IgnorePatterns refers to files matched with any + of the patterns will not be downloaded. + items: + type: string + type: array + modelID: + description: |- + ModelID refers to the model identifier on model hub, + such as meta-llama/Meta-Llama-3-8B. + type: string + name: + default: Huggingface + description: Name refers to the model registry, such as huggingface. + enum: + - Huggingface + - ModelScope + type: string + revision: + default: main + description: Revision refers to a Git revision id which can + be a branch name, a tag, or a commit hash. + type: string + type: object + uri: + description: |- + URI represents a various kinds of model sources following the uri protocol, protocol://
, e.g. + - oss://./ + - ollama://llama3.3 + - host:// + type: string + type: object + required: + - familyName + - source + type: object + status: + description: ModelStatus defines the observed state of Model + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} + diff --git a/pkg/test/openmodel.go b/pkg/test/openmodel.go new file mode 100644 index 0000000000..d897c02339 --- /dev/null +++ b/pkg/test/openmodel.go @@ -0,0 +1,50 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test + +import ( + "fmt" + + "github.com/imdario/mergo" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1" +) + +type OpenModelOptions struct { + metav1.ObjectMeta + + Flavors []llmazcoreapi.Flavor +} + +func OpenModel(overrides ...OpenModelOptions) *llmazcoreapi.OpenModel { + options := OpenModelOptions{} + for _, opts := range overrides { + if err := mergo.Merge(&options, opts, mergo.WithOverride); err != nil { + panic(fmt.Sprintf("Failed to merge options: %s", err)) + } + } + + return &llmazcoreapi.OpenModel{ + ObjectMeta: ObjectMeta(options.ObjectMeta), + Spec: llmazcoreapi.ModelSpec{ + InferenceConfig: &llmazcoreapi.InferenceConfig{ + Flavors: options.Flavors, + }, + }, + } +}