diff --git a/Makefile b/Makefile index 59b1b377..4e24bc6f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,6 @@ include Makefile-deps.mk # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.32.0 -ENVTEST_LWS_VERSION = v0.5.1 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) ifeq (,$(shell go env GOBIN)) @@ -84,9 +83,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \ crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \ webhook output:webhook:artifacts:config=config/webhook \ - paths="./cmd/..." - paths="./api/..." - paths="./pkg/..." + paths="./api/...;./pkg/...;./cmd/..." .PHONY: generate generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. @@ -136,7 +133,7 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests. .PHONY: test-e2e test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build - E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ENVTEST_LWS_VERSION=$(ENVTEST_LWS_VERSION) ./hack/e2e-test.sh + E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh test-deploy-with-helm: kind-image-build E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) TAG=$(GIT_TAG) ./hack/test-deploy-with-helm.sh diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go index ca98a8dc..21c8f765 100644 --- a/api/core/v1alpha1/model_types.go +++ b/api/core/v1alpha1/model_types.go @@ -71,11 +71,6 @@ type ModelHub struct { // URIProtocol represents the protocol of the URI. type URIProtocol string -// Add roles for operating leaderWorkerSet. -// -// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch - // ModelSource represents the source of the model. // Only one model source will be used. type ModelSource struct { diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml index 00ec2e20..e4e887f4 100644 --- a/config/crd/bases/inference.llmaz.io_playgrounds.yaml +++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml @@ -897,7 +897,7 @@ spec: description: |- InferenceFlavors represents a list of flavor names with fungibility supported to serve the model. - - If not set, always apply with the 0-index model by default. + - If not set, will employ the model configured flavors by default. - If set, will lookup the flavor names following the model orders. items: type: string diff --git a/config/crd/bases/inference.llmaz.io_services.yaml b/config/crd/bases/inference.llmaz.io_services.yaml index 908ef21a..1f76e4ab 100644 --- a/config/crd/bases/inference.llmaz.io_services.yaml +++ b/config/crd/bases/inference.llmaz.io_services.yaml @@ -52,7 +52,7 @@ spec: description: |- InferenceFlavors represents a list of flavor names with fungibility supported to serve the model. - - If not set, always apply with the 0-index model by default. + - If not set, will employ the model configured flavors by default. - If set, will lookup the flavor names following the model orders. items: type: string diff --git a/config/default/configmap.yaml b/config/default/configmap.yaml new file mode 100644 index 00000000..26a5c3ee --- /dev/null +++ b/config/default/configmap.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: global-config +data: + config.data: | + scheduler-name: default-scheduler + # init-container-image: inftyai/model-loader:v0.0.10 diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index e5f8ab99..4f09b691 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -22,6 +22,9 @@ resources: # crd/kustomization.yaml - ../webhook +# [customized] +- configmap.yaml + # [INTERNALCERT] - ../internalcert diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index f1b7bc3a..0c89acbf 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -21,3 +21,4 @@ spec: selector: matchLabels: app.kubernetes.io/name: llmaz + control-plane: controller-manager diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index a18ba05f..61c42763 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -4,6 +4,13 @@ kind: ClusterRole metadata: name: manager-role rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/docs/examples/envoy-ai-gateway/basic-vllm.yaml b/docs/examples/envoy-ai-gateway/basic-vllm.yaml new file mode 100644 index 00000000..e6e051d9 --- /dev/null +++ b/docs/examples/envoy-ai-gateway/basic-vllm.yaml @@ -0,0 +1,79 @@ +apiVersion: llmaz.io/v1alpha1 +kind: OpenModel +metadata: + name: qwen3-0--6b +spec: + familyName: qwen3 + source: + modelHub: + modelID: Qwen/Qwen3-0.6B + inferenceConfig: + flavors: + - name: t4 # GPU type + limits: + nvidia.com/gpu: 1 +--- +apiVersion: inference.llmaz.io/v1alpha1 +kind: Playground +metadata: + name: qwen3-0--6b +spec: + replicas: 1 + modelClaim: + modelName: qwen3-0--6b + backendRuntimeConfig: + backendName: vllm + version: v0.8.5 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + name: default-envoy-ai-gateway +spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: default-envoy-ai-gateway +spec: + gatewayClassName: default-envoy-ai-gateway + listeners: + - name: http + protocol: HTTP + port: 80 +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIGatewayRoute +metadata: + name: default-envoy-ai-gateway +spec: + schema: + name: OpenAI + targetRefs: + - name: default-envoy-ai-gateway + kind: Gateway + group: gateway.networking.k8s.io + rules: + - matches: + - headers: + - type: Exact + name: x-ai-eg-model + value: qwen3-0--6b + backendRefs: + - name: qwen3-0--6b +--- +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: AIServiceBackend +metadata: + name: qwen3-0--6b +spec: + timeouts: + request: 3m + schema: + name: OpenAI + backendRef: + name: qwen3-0--6b-lb + kind: Service + port: 8080 +--- diff --git a/docs/examples/envoy-ai-gateway/basic.yaml b/docs/examples/envoy-ai-gateway/basic.yaml index 93dd348e..0f203e96 100644 --- a/docs/examples/envoy-ai-gateway/basic.yaml +++ b/docs/examples/envoy-ai-gateway/basic.yaml @@ -82,14 +82,14 @@ spec: - headers: - type: Exact name: x-ai-eg-model - value: qwen2-0.5b + value: qwen2-0--5b backendRefs: - name: qwen2-0--5b - matches: - headers: - type: Exact name: x-ai-eg-model - value: qwen2.5-coder + value: qwen2--5-coder backendRefs: - name: qwen2--5-coder --- diff --git a/go.mod b/go.mod index 6b68a0ba..fae388a8 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/onsi/gomega v1.37.0 github.com/open-policy-agent/cert-controller v0.12.0 github.com/stretchr/testify v1.9.0 + gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.32.5 k8s.io/apiextensions-apiserver v0.32.5 k8s.io/apimachinery v0.32.5 diff --git a/go.sum b/go.sum index dbd66e6d..28b875a3 100644 --- a/go.sum +++ b/go.sum @@ -232,6 +232,8 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/hack/e2e-test.sh b/hack/e2e-test.sh index 6b47d184..bc3f9aa9 100755 --- a/hack/e2e-test.sh +++ b/hack/e2e-test.sh @@ -40,7 +40,7 @@ function deploy { $KUSTOMIZE build $CWD/test/e2e/config | $KUBECTL apply --server-side -f - } function deploy_lws { - $KUBECTL apply --server-side -f https://github.com/kubernetes-sigs/lws/releases/download/$ENVTEST_LWS_VERSION/manifests.yaml + $KUBECTL apply --server-side -f $CWD/test/config/lws/ } trap cleanup EXIT startup diff --git a/hack/test-deploy-with-helm.sh b/hack/test-deploy-with-helm.sh index 921e0f38..4ecb5c9d 100755 --- a/hack/test-deploy-with-helm.sh +++ b/hack/test-deploy-with-helm.sh @@ -25,7 +25,7 @@ function kind_load { function deploy { cd $CWD HELM_EXT_OPTS='--set controllerManager.manager.image.tag=${TAG}' make helm-install - $KUBECTL wait --timeout=30m --for=condition=ready pods --namespace=llmaz-system -l app.kubernetes.io/component!=open-webui,app!=certgen + $KUBECTL wait --timeout=10m --for=condition=ready pods --namespace=llmaz-system -l app.kubernetes.io/component!=open-webui,app!=certgen echo "all pods of llmaz-system is ready..." $KUBECTL get pod -n llmaz-system } diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go index 8688671d..12b10d80 100644 --- a/pkg/controller/inference/service_controller.go +++ b/pkg/controller/inference/service_controller.go @@ -68,6 +68,9 @@ func NewServiceReconciler(client client.Client, scheme *runtime.Scheme, record r //+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch //+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update //+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list +//+kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -83,6 +86,31 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct logger.V(10).Info("reconcile Service", "Service", klog.KObj(service)) + cm := &corev1.ConfigMap{} + if err := r.Get(ctx, types.NamespacedName{Name: "llmaz-global-config", Namespace: "llmaz-system"}, cm); err != nil { + if client.IgnoreNotFound(err) != nil { + return ctrl.Result{}, fmt.Errorf("failed to get llmaz-global-config configmap: %w", err) + } + } + configs, err := helper.ParseGlobalConfigmap(cm) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to parse global configurations: %w", err) + } + + // Set the global configurations to the service. + if configs.SchedulerName != "" { + if service.Spec.WorkloadTemplate.LeaderTemplate != nil && service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName == "" { + service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName = configs.SchedulerName + } + if service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName == "" { + service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName = configs.SchedulerName + } + + if err := r.Client.Update(ctx, service); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to update service: %w", err) + } + } + models, err := helper.FetchModelsByService(ctx, r.Client, service) if err != nil { return ctrl.Result{}, err @@ -93,8 +121,6 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, err } - // TODO: handle fungibility - if err := util.Patch(ctx, r.Client, workloadApplyConfiguration); err != nil { return ctrl.Result{}, err } diff --git a/pkg/controller_helper/configmap.go b/pkg/controller_helper/configmap.go new file mode 100644 index 00000000..bd4d9f45 --- /dev/null +++ b/pkg/controller_helper/configmap.go @@ -0,0 +1,44 @@ +/* +Copyright 2025 The InftyAI Team. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package helper + +import ( + "fmt" + + "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" +) + +type GlobalConfigs struct { + SchedulerName string `yaml:"scheduler-name"` + InitContainerImage string `yaml:"init-container-image"` +} + +func ParseGlobalConfigmap(cm *corev1.ConfigMap) (*GlobalConfigs, error) { + rawConfig, ok := cm.Data["config.data"] + if !ok { + return nil, fmt.Errorf("config.data not found in ConfigMap") + } + + var configs GlobalConfigs + err := yaml.Unmarshal([]byte(rawConfig), &configs) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal config.data: %v", err) + } + + return &configs, nil +} diff --git a/site/content/en/docs/integrations/prometheus-operator.md b/site/content/en/docs/integrations/prometheus-operator.md index 70001b35..fe8444f9 100644 --- a/site/content/en/docs/integrations/prometheus-operator.md +++ b/site/content/en/docs/integrations/prometheus-operator.md @@ -10,7 +10,7 @@ This document provides deployment steps to install and configure Prometheus Oper Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install prometheus operator or simply run the following command: ```bash -curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl delete -f - +curl -sL https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.81.0/bundle.yaml | kubectl create -f - ``` Ensure that the Prometheus Operator Pod is running successfully. diff --git a/test/config/others/global-configmap.yaml b/test/config/others/global-configmap.yaml new file mode 100644 index 00000000..12d0a650 --- /dev/null +++ b/test/config/others/global-configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: llmaz-global-config + namespace: llmaz-system +data: + config.data: | + scheduler-name: inftyai-scheduler + init-container-image: inftyai/model-loader:v0.0.10 diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go index d82bd708..58fc0704 100644 --- a/test/e2e/playground_test.go +++ b/test/e2e/playground_test.go @@ -55,7 +55,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() { Image("ollama/ollama").Version("latest"). Command([]string{"sh", "-c"}). Arg("default", []string{"ollama serve & while true;do output=$(ollama list 2>&1);if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;ollama run {{.ModelName}};while true;do sleep 60;done"}). - Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj() + Request("default", "cpu", "1").Request("default", "memory", "2Gi").Limit("default", "cpu", "2").Limit("default", "memory", "4Gi").Obj() gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed()) model := wrapper.MakeModel("qwen2-0--5b").FamilyName("qwen2").ModelSourceWithURI("ollama://qwen2:0.5b").Obj() diff --git a/test/integration/controller/inference/suit_test.go b/test/integration/controller/inference/suit_test.go index 5171c377..fd55a02b 100644 --- a/test/integration/controller/inference/suit_test.go +++ b/test/integration/controller/inference/suit_test.go @@ -31,6 +31,7 @@ import ( autoscalingv2 "k8s.io/api/autoscaling/v2" corev1 "k8s.io/api/core/v1" "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -120,7 +121,14 @@ var _ = BeforeSuite(func() { serviceController := inferencecontroller.NewServiceReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("service")) Expect(serviceController.SetupWithManager(mgr)).NotTo(HaveOccurred()) + Expect(k8sClient.Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "llmaz-system", + }, + })).ToNot(HaveOccurred()) + Expect(util.Setup(ctx, k8sClient, "../../../config/backends")).To(Succeed()) + Expect(util.Setup(ctx, k8sClient, "../../../config/others")).To(Succeed()) go func() { defer GinkgoRecover() diff --git a/test/util/validation/validate_service.go b/test/util/validation/validate_service.go index 4e7b24ca..6ed13b6c 100644 --- a/test/util/validation/validate_service.go +++ b/test/util/validation/validate_service.go @@ -43,6 +43,7 @@ import ( coreapi "github.com/inftyai/llmaz/api/core/v1alpha1" inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1" "github.com/inftyai/llmaz/pkg" + helper "github.com/inftyai/llmaz/pkg/controller_helper" modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource" pkgUtil "github.com/inftyai/llmaz/pkg/util" "github.com/inftyai/llmaz/test/util" @@ -50,6 +51,10 @@ import ( func ValidateService(ctx context.Context, k8sClient client.Client, service *inferenceapi.Service) { gomega.Eventually(func() error { + if err := k8sClient.Get(ctx, types.NamespacedName{Name: service.Name, Namespace: service.Namespace}, service); err != nil { + return errors.New("failed to get service") + } + workload := lws.LeaderWorkerSet{} if err := k8sClient.Get(ctx, types.NamespacedName{Name: service.Name, Namespace: service.Namespace}, &workload); err != nil { return errors.New("failed to get lws") @@ -58,8 +63,6 @@ func ValidateService(ctx context.Context, k8sClient client.Client, service *infe return fmt.Errorf("unexpected replicas %d, got %d", *service.Spec.Replicas, *workload.Spec.Replicas) } - // TODO: multi-host - models := []*coreapi.OpenModel{} for _, mr := range service.Spec.ModelClaims.Models { model := &coreapi.OpenModel{} @@ -100,6 +103,10 @@ func ValidateService(ctx context.Context, k8sClient client.Client, service *infe return err } + if err := ValidateConfigmap(ctx, k8sClient, service); err != nil { + return err + } + return nil }, util.IntegrationTimeout, util.Interval).Should(gomega.Succeed()) } @@ -348,3 +355,27 @@ func CheckServiceAvaliable() error { } return nil } + +func ValidateConfigmap(ctx context.Context, k8sClient client.Client, service *inferenceapi.Service) error { + cm := corev1.ConfigMap{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: "llmaz-global-config", Namespace: "llmaz-system"}, &cm); err != nil { + return err + } + + data, err := helper.ParseGlobalConfigmap(&cm) + if err != nil { + return fmt.Errorf("failed to parse global configmap: %v", err) + } + + if service.Spec.WorkloadTemplate.LeaderTemplate != nil { + if service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName != data.SchedulerName { + return fmt.Errorf("unexpected scheduler name %s, want %s", service.Spec.WorkloadTemplate.LeaderTemplate.Spec.SchedulerName, data.SchedulerName) + } + } + + if service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName != data.SchedulerName { + return fmt.Errorf("unexpected scheduler name %s, want %s", service.Spec.WorkloadTemplate.WorkerTemplate.Spec.SchedulerName, data.SchedulerName) + } + + return nil +}