Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE-THIRD-PARTY
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

github.com/Masterminds/semver/v3,v3.4.0,https://github.com/Masterminds/semver/blob/v3.4.0/LICENSE.txt,MIT
github.com/beorn7/perks/quantile,v1.0.1,https://github.com/beorn7/perks/blob/v1.0.1/LICENSE,MIT
github.com/blang/semver,v3.5.1,https://github.com/blang/semver/blob/v3.5.1/LICENSE,MIT
github.com/cenkalti/backoff/v4,v4.3.0,https://github.com/cenkalti/backoff/blob/v4.3.0/LICENSE,MIT
Expand Down
12 changes: 12 additions & 0 deletions api/v1/search/mongodbsearch_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ type MongoDBSearchSpec struct {
// Configure prometheus metrics endpoint in mongot. If not set, the metrics endpoint will be disabled.
// +optional
Prometheus *Prometheus `json:"prometheus,omitempty"`
// Configure MongoDB Search's automatic generation of vector embeddings using an embedding model service.
// `embedding` field of mongot config is generated using the values provided here.
// +optional
AutoEmbedding *EmbeddingConfig `json:"autoEmbedding,omitempty"`
}

type EmbeddingConfig struct {
ProviderEndpoint string `json:"providerEndpoint,omitempty"`
// EmbeddingModelAPIKeySecret would have the name of the secret that has two keys
// query-key and indexing-key for embedding model's API keys.
// +kubebuilder:validation:Required
EmbeddingModelAPIKeySecret corev1.LocalObjectReference `json:"embeddingModelAPIKeySecret"`
}

type MongoDBSource struct {
Expand Down
21 changes: 21 additions & 0 deletions api/v1/search/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions config/crd/bases/mongodb.com_mongodbsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,32 @@ spec:
type: object
spec:
properties:
autoEmbedding:
description: |-
Configure MongoDB Search's automatic generation of vector embeddings using an embedding model service.
`embedding` field of mongot config is generated using the values provided here.
properties:
embeddingModelAPIKeySecret:
description: |-
EmbeddingModelAPIKeySecret would have the name of the secret that has two keys
query-key and indexing-key for embedding model's API keys.
properties:
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
type: object
x-kubernetes-map-type: atomic
providerEndpoint:
type: string
required:
- embeddingModelAPIKeySecret
type: object
logLevel:
description: Configure verbosity of mongot logs. Defaults to INFO
if not set.
Expand Down
4 changes: 4 additions & 0 deletions controllers/operator/mongodbsearch_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ func (r *MongoDBSearchReconciler) Reconcile(ctx context.Context, request reconci
r.watch.AddWatchedResourceIfNotAdded(mdbSearch.Spec.Security.TLS.CertificateKeySecret.Name, mdbSearch.Namespace, watch.Secret, mdbSearch.NamespacedName())
}

if mdbSearch.Spec.AutoEmbedding != nil {
r.watch.AddWatchedResourceIfNotAdded(mdbSearch.Spec.AutoEmbedding.EmbeddingModelAPIKeySecret.Name, mdbSearch.Namespace, watch.Secret, mdbSearch.NamespacedName())
}

reconcileHelper := searchcontroller.NewMongoDBSearchReconcileHelper(kubernetesClient.NewClient(r.kubeClient), mdbSearch, searchSource, r.operatorSearchConfig)

return reconcileHelper.Reconcile(ctx, log).ReconcileResult()
Expand Down
149 changes: 145 additions & 4 deletions controllers/searchcontroller/mongodbsearch_reconcile_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"context"
"crypto/sha256"
"encoding/base32"
"encoding/json"
"fmt"
"strings"

semver "github.com/Masterminds/semver/v3"
"github.com/ghodss/yaml"
"go.uber.org/zap"
"golang.org/x/xerrors"
Expand All @@ -27,6 +29,7 @@ import (
kubernetesClient "github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/client"
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/container"
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/podtemplatespec"
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/secret"
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/kube/service"
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/mongot"
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/tls"
Expand All @@ -42,6 +45,26 @@ const (
"The operator will ignore this resource: it will not reconcile or reconfigure the workload. " +
"Existing deployments will continue to run, but cannot be managed by the operator. " +
"To regain operator management, you must delete and recreate the MongoDBSearch resource."

// embeddingKeyFilePath is the path that is used in mongot config to specify the api keys
// this where query and index keys would be available.
embeddingKeyFilePath = "/etc/mongot/secrets"
embeddingKeyVolumeName = "auto-embedding-api-keys"

indexingKeyName = "indexing-key"
queryKeyName = "query-key"

apiKeysTempVolumeName = "api-keys-config"
// To overcome the strict requirement of api keys having 0400 permission we mount the api keys
// to a temp location apiKeysTempVolumeMount and then copy it to correct location embeddingKeyFilePath,
// changing the permission to 0400.
apiKeysTempVolumeMount = "/tmp/auto-embedding-api-keys"

// is the minimum search image version that is required to enable the auto embeddings for vector search
minSearchImageVersionForEmbedding = "0.58.0"

// autoEmbeddingDetailsAnnKey has the annotation key that would be added to search pod with emebdding API Key secret hash
autoEmbeddingDetailsAnnKey = "autoEmbeddingDetailsHash"
)

type OperatorSearchConfig struct {
Expand Down Expand Up @@ -119,8 +142,13 @@ func (r *MongoDBSearchReconcileHelper) reconcile(ctx context.Context, log *zap.S

egressTlsMongotModification, egressTlsStsModification := r.ensureEgressTlsConfig(ctx)

embeddingConfigMongotModification, embeddingConfigStsModification, err := r.ensureEmbeddingConfig(ctx, log)
if err != nil {
return workflow.Failed(err)
}

// the egress TLS modification needs to always be applied after the ingress one, because it toggles mTLS based on the mode set by the ingress modification
configHash, err := r.ensureMongotConfig(ctx, log, createMongotConfig(r.mdbSearch, r.db), ingressTlsMongotModification, egressTlsMongotModification)
configHash, err := r.ensureMongotConfig(ctx, log, createMongotConfig(r.mdbSearch, r.db), ingressTlsMongotModification, egressTlsMongotModification, embeddingConfigMongotModification)
if err != nil {
return workflow.Failed(err)
}
Expand All @@ -131,7 +159,16 @@ func (r *MongoDBSearchReconcileHelper) reconcile(ctx context.Context, log *zap.S
},
))

if err := r.createOrUpdateStatefulSet(ctx, log, CreateSearchStatefulSetFunc(r.mdbSearch, r.db, r.buildImageString()), configHashModification, keyfileStsModification, ingressTlsStsModification, egressTlsStsModification); err != nil {
image, version := r.searchImageAndVersion()
if err := r.createOrUpdateStatefulSet(ctx,
log,
CreateSearchStatefulSetFunc(r.mdbSearch, r.db, fmt.Sprintf("%s:%s", image, version)),
configHashModification,
keyfileStsModification,
ingressTlsStsModification,
egressTlsStsModification,
embeddingConfigStsModification,
); err != nil {
return workflow.Failed(err)
}

Expand Down Expand Up @@ -161,12 +198,12 @@ func (r *MongoDBSearchReconcileHelper) ensureSourceKeyfile(ctx context.Context,
), nil
}

func (r *MongoDBSearchReconcileHelper) buildImageString() string {
func (r *MongoDBSearchReconcileHelper) searchImageAndVersion() (string, string) {
imageVersion := r.mdbSearch.Spec.Version
if imageVersion == "" {
imageVersion = r.operatorSearchConfig.SearchVersion
}
return fmt.Sprintf("%s/%s:%s", r.operatorSearchConfig.SearchRepo, r.operatorSearchConfig.SearchName, imageVersion)
return fmt.Sprintf("%s/%s", r.operatorSearchConfig.SearchRepo, r.operatorSearchConfig.SearchName), imageVersion
}

func (r *MongoDBSearchReconcileHelper) createOrUpdateStatefulSet(ctx context.Context, log *zap.SugaredLogger, modifications ...statefulset.Modification) error {
Expand Down Expand Up @@ -231,6 +268,110 @@ func (r *MongoDBSearchReconcileHelper) ensureMongotConfig(ctx context.Context, l
return hashBytes(configData), nil
}

// EnsureEmbeddingAPIKeySecret makes sure that the scret that is provided in MDBSearch resource
// for embedding model's keys is present and has expected keys.
func ensureEmbeddingAPIKeySecret(ctx context.Context, client secret.Getter, secretObj client.ObjectKey) (string, error) {
data, err := secret.ReadByteData(ctx, client, secretObj)
if err != nil {
return "", err
}

if _, ok := data[indexingKeyName]; !ok {
return "", fmt.Errorf(`Required key "%s" is not present in the Secret %s/%s`, indexingKeyName, secretObj.Namespace, secretObj.Name)
}
if _, ok := data[queryKeyName]; !ok {
return "", fmt.Errorf(`Required key "%s" is not present in the Secret %s/%s`, queryKeyName, secretObj.Namespace, secretObj.Name)
}

d, err := json.Marshal(data)
if err != nil {
return "", err
}

return hashBytes(d), nil
}

func validateSearchVesionForEmbedding(version string, log *zap.SugaredLogger) error {
searchVersion, err := semver.NewVersion(version)
if err != nil {
log.Debugf("Failed getting semver of search image version. Version %s doesn't seem to be valid semver.", version)
return nil
}
minAllowedVersion, _ := semver.NewVersion(minSearchImageVersionForEmbedding)

if a := searchVersion.Compare(minAllowedVersion); a == -1 {
return xerrors.Errorf("The MongoDB search version %s doesn't support auto embeddings. Please use version %s or newer.", version, minSearchImageVersionForEmbedding)
}
return nil
}

// ensureEmbeddingConfig returns the mongot config and stateful set modification function based on the values provided in the search CR, it
// also returns the hash of the secret that has the embedding API keys so that if the keys are changed the search pod is automatically restarted.
func (r *MongoDBSearchReconcileHelper) ensureEmbeddingConfig(ctx context.Context, log *zap.SugaredLogger) (mongot.Modification, statefulset.Modification, error) {
if r.mdbSearch.Spec.AutoEmbedding == nil {
return mongot.NOOP(), statefulset.NOOP(), nil
}

// If AutoEmbedding is not nil, it's safe to assume that EmbeddingModelAPIKeySecret would be provided because we have marked it
// a required field.
apiKeySecretHash, err := ensureEmbeddingAPIKeySecret(ctx, r.client, client.ObjectKey{
Name: r.mdbSearch.Spec.AutoEmbedding.EmbeddingModelAPIKeySecret.Name,
Namespace: r.mdbSearch.Namespace,
})
if err != nil {
return nil, nil, err
}

_, version := r.searchImageAndVersion()
if err := validateSearchVesionForEmbedding(version, log); err != nil {
return nil, nil, err
}

autoEmbeddingViewWriterTrue := true
mongotModification := func(config *mongot.Config) {
config.Embedding = &mongot.EmbeddingConfig{
IndexingKeyFile: fmt.Sprintf("%s/%s", embeddingKeyFilePath, indexingKeyName),
QueryKeyFile: fmt.Sprintf("%s/%s", embeddingKeyFilePath, queryKeyName),
}

// Since MCK right now installs search with one replica only it's safe to alway set IsAutoEmbeddingViewWriter to true.
// Once we start supporting multiple mongot instances, we need to figure this out and then set here.
config.Embedding.IsAutoEmbeddingViewWriter = &autoEmbeddingViewWriterTrue

if r.mdbSearch.Spec.AutoEmbedding.ProviderEndpoint != "" {
config.Embedding.ProviderEndpoint = r.mdbSearch.Spec.AutoEmbedding.ProviderEndpoint
}
}
readOnlyByOwnerPermission := int32(400)
apiKeyVolume := statefulset.CreateVolumeFromSecret(embeddingKeyVolumeName, r.mdbSearch.Spec.AutoEmbedding.EmbeddingModelAPIKeySecret.Name, statefulset.WithSecretDefaultMode(&readOnlyByOwnerPermission))
apiKeyVolumeMount := statefulset.CreateVolumeMount(embeddingKeyVolumeName, apiKeysTempVolumeMount, statefulset.WithReadOnly(true))

emptyDirVolume := statefulset.CreateVolumeFromEmptyDir(apiKeysTempVolumeName)
emptyDirVolumeMount := statefulset.CreateVolumeMount(apiKeysTempVolumeName, embeddingKeyFilePath)

stsModification := statefulset.WithPodSpecTemplate(podtemplatespec.Apply(
podtemplatespec.WithVolume(apiKeyVolume),
podtemplatespec.WithVolumeMounts(MongotContainerName, apiKeyVolumeMount),
podtemplatespec.WithVolume(emptyDirVolume),
podtemplatespec.WithVolumeMounts(MongotContainerName, emptyDirVolumeMount),
podtemplatespec.WithContainer(MongotContainerName, setupMongotContainerArgsForAPIKeys()),
podtemplatespec.WithAnnotations(map[string]string{
autoEmbeddingDetailsAnnKey: apiKeySecretHash,
}),
))
return mongotModification, stsModification, nil
}

func setupMongotContainerArgsForAPIKeys() container.Modification {
// Since API keys are expected to have 0400 permission, add the arg into the search container to make
// sure we copy the api keys from temp location (apiKeysTempVolumeMount) to correct location (embeddingKeyFilePath)
// with correct permissions.
// Directly setting the permission in the volume doesn't work because volumes are mounted as symlinks and they would have diff permissions,
// using subpath kind of resolves the probelm but because of fsGroup that we set K8s makes sure that the file is group readable,
// and that's why the file permissions still don't become 0400 (it's -r--r-----). That's why copying is necessary.
return prependCommand(sensitiveFilePermissionsForAPIKeys(apiKeysTempVolumeMount, embeddingKeyFilePath, "0400"))
}

func (r *MongoDBSearchReconcileHelper) ensureIngressTlsConfig(ctx context.Context) (mongot.Modification, statefulset.Modification, error) {
if r.mdbSearch.Spec.Security.TLS == nil {
return mongot.NOOP(), statefulset.NOOP(), nil
Expand Down
Loading