Skip to content

Commit 159700f

Browse files
authored
PI-2526 Switch to cross-account SageMaker connector (#4541)
* PI-2526 Switch to cross-account SageMaker connector * Fix embedding of text chunks * Set endpoint name for all envs * Switch to environment-specific endpoint names
1 parent 5787ca9 commit 159700f

File tree

9 files changed

+55
-43
lines changed

9 files changed

+55
-43
lines changed

projects/person-search-index-from-delius/container/pipelines/contact/index/bedrock-connector.json

-28
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"name": "sagemaker-embeddings",
3+
"description": "Generate text embeddings using a model provided by a SageMaker endpoint in an external account",
4+
"version": 1,
5+
"protocol": "aws_sigv4",
6+
"credential": {
7+
"roleArn": "${CONNECTOR_ROLE_ARN}",
8+
"externalAccountRoleArn": "${CONNECTOR_EXTERNAL_ACCOUNT_ROLE_ARN}"
9+
},
10+
"parameters": {
11+
"region": "eu-west-2",
12+
"service_name": "sagemaker"
13+
},
14+
"actions": [
15+
{
16+
"action_type": "predict",
17+
"method": "POST",
18+
"url": "https://runtime.sagemaker.eu-west-2.amazonaws.com/endpoints/${SAGEMAKER_ENDPOINT_NAME}/invocations",
19+
"headers": {
20+
"content-type": "application/json"
21+
},
22+
"request_body": "[${parameters.input}]",
23+
"pre_process_function": "connector.pre_process.default.embedding",
24+
"post_process_function": "connector.post_process.default.embedding"
25+
}
26+
]
27+
}

projects/person-search-index-from-delius/container/scripts/deploy-semantic-model.sh

+9-9
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,39 @@ eval "$(sentry-cli bash-hook --no-environ)"
55

66
## Create model group if it doesn't exist
77
echo Searching for existing model group...
8-
model_group_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/model_groups/_search" --data '{"query":{"match":{"name":"bedrock_model_group"}}}' | jq -r '.hits.hits[0]._id // ""')
8+
model_group_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/model_groups/_search" --data '{"query":{"match":{"name":"sagemaker_model_group"}}}' | jq -r '.hits.hits[0]._id // ""')
99
if [ -z "$model_group_id" ]; then
1010
echo Creating model group...
1111
model_group=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/model_groups/_register" --data '{
12-
"name": "bedrock_model_group",
13-
"description": "A model group for Bedrock models"
12+
"name": "sagemaker_model_group",
13+
"description": "A model group for SageMaker models"
1414
}')
1515
if [ "$(jq -r '.status' <<<"$model_group")" != "CREATED" ]; then fail "Failed to create model group: $model_group"; fi
1616
model_group_id=$(jq -r '.model_group_id' <<<"$model_group")
1717
else
1818
echo "Found model group with id=$model_group_id"
1919
fi
2020

21-
## Create Bedrock connector if it doesn't exist
21+
## Create SageMaker connector if it doesn't exist
2222
echo Searching for existing connector...
23-
connector_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/connectors/_search" --data "{\"query\":{\"match\":{\"name.keyword\":\"bedrock-${BEDROCK_MODEL_NAME}\"}}}" | jq -r '.hits.hits[0]._id // ""')
23+
connector_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/connectors/_search" --data "{\"query\":{\"match\":{\"name.keyword\":\"sagemaker-embeddings\"}}}" | jq -r '.hits.hits[0]._id // ""')
2424
if [ -z "$connector_id" ]; then
2525
echo Creating connector...
26-
connector_body=$(envsubst < /pipelines/contact/index/bedrock-connector.json)
26+
connector_body=$(envsubst < /pipelines/contact/index/sagemaker-connector.json)
2727
connector_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/connectors/_create" --data "$connector_body" | jq -r '.connector_id')
2828
else
2929
echo "Found connector with id=$connector_id"
3030
fi
3131

3232
## Register model if it doesn't exist
3333
model_body="{
34-
\"name\": \"bedrock-${BEDROCK_MODEL_NAME}\",
35-
\"description\": \"Bedrock embedding model\",
34+
\"name\": \"sagemaker-embeddings\",
35+
\"description\": \"SageMaker embedding model\",
3636
\"function_name\": \"remote\",
3737
\"model_group_id\": \"${model_group_id}\",
3838
\"connector_id\": \"${connector_id}\"
3939
}"
40-
model_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/models/_search" --data "{\"query\":{\"match\":{\"name.keyword\":\"bedrock-${BEDROCK_MODEL_NAME}\"}}}" | jq -r '.hits.hits[0]._id // ""')
40+
model_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/models/_search" --data "{\"query\":{\"match\":{\"name.keyword\":\"sagemaker-embeddings\"}}}" | jq -r '.hits.hits[0]._id // ""')
4141
if [ -z "$model_id" ]; then
4242
echo Registering model...
4343
curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/models/_register" --data "${model_body}"

projects/person-search-index-from-delius/container/scripts/startup.sh

+6-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@ curl_json -XPUT "${SEARCH_INDEX_HOST}/_cluster/settings" --data '{
1313
"action.auto_create_index": "false",
1414
"plugins.ml_commons.only_run_on_ml_node": "false",
1515
"plugins.ml_commons.model_access_control_enabled": "false",
16-
"plugins.ml_commons.native_memory_threshold": "90"
16+
"plugins.ml_commons.native_memory_threshold": "90",
17+
"plugins.ml_commons.trusted_connector_endpoints_regex": [
18+
"^https://bedrock-runtime\\..*[a-z0-9-]\\.amazonaws\\.com/.*$",
19+
"^https://runtime\\.sagemaker\\..*[a-z0-9-]\\.amazonaws\\.com/.*$"
20+
]
1721
}
1822
}'
1923

@@ -39,9 +43,8 @@ if grep -q 'contact' <<<"$PIPELINES_ENABLED"; then
3943
fi
4044

4145
# Setup semantic search for contacts
42-
export BEDROCK_MODEL_NAME=amazon.titan-embed-text-v2:0
4346
/scripts/deploy-semantic-model.sh
44-
model_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/models/_search" --data "{\"query\":{\"match\":{\"name.keyword\":\"bedrock-${BEDROCK_MODEL_NAME}\"}}}" | jq -r '.hits.hits[0]._id // ""')
47+
model_id=$(curl_json -XPOST "${SEARCH_INDEX_HOST}/_plugins/_ml/models/_search" --data "{\"query\":{\"match\":{\"name.keyword\":\"sagemaker-embeddings\"}}}" | jq -r '.hits.hits[0]._id // ""')
4548
export model_id
4649
echo "Deployed semantic search model. model_id=${model_id}"
4750
envsubst < /pipelines/contact/index/ingest-pipeline.tpl.json > /pipelines/contact/index/ingest-pipeline.json

projects/person-search-index-from-delius/deploy/templates/contact-reindex-cronjob.yml

+8-2
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,16 @@ spec:
9393
name: person-search-index-from-delius-sentry
9494
key: CONTACT_REINDEXING_SENTRY_MONITOR_ID
9595
optional: false
96-
- name: BEDROCK_CONNECTOR_IAM_ROLE_ARN
96+
- name: CONNECTOR_ROLE_ARN
9797
valueFrom:
9898
secretKeyRef:
9999
name: person-search-index-from-delius-opensearch
100-
key: bedrock_role_arn
100+
key: connector_role_arn
101+
optional: false
102+
- name: CONNECTOR_EXTERNAL_ACCOUNT_ROLE_ARN
103+
valueFrom:
104+
secretKeyRef:
105+
name: person-search-index-from-delius-opensearch
106+
key: connector_external_account_role_arn
101107
optional: false
102108
restartPolicy: Never

projects/person-search-index-from-delius/deploy/values-dev.yml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ generic-service:
88

99
env:
1010
SENTRY_ENVIRONMENT: dev
11+
SAGEMAKER_ENDPOINT_NAME: hmpps-probation-search-dev-sagemaker-endpoint
1112

1213
generic-prometheus-alerts:
1314
businessHoursOnly: true

projects/person-search-index-from-delius/deploy/values-preprod.yml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ generic-service:
77

88
env:
99
SENTRY_ENVIRONMENT: preprod
10+
SAGEMAKER_ENDPOINT_NAME: hmpps-probation-search-preprod-sagemaker-endpoint
1011

1112
generic-prometheus-alerts:
1213
businessHoursOnly: true

projects/person-search-index-from-delius/deploy/values-prod.yml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ generic-service:
44

55
env:
66
SENTRY_ENVIRONMENT: prod
7+
SAGEMAKER_ENDPOINT_NAME: hmpps-probation-search-prod-sagemaker-endpoint
78

89
namespace_secrets:
910
common:

projects/person-search-index-from-delius/deploy/values.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ generic-service:
4040
JDBC_PASSWORD: DB_PASSWORD
4141
person-search-index-from-delius-opensearch:
4242
SEARCH_INDEX_HOST: url
43-
BEDROCK_CONNECTOR_IAM_ROLE_ARN: bedrock_role_arn
43+
CONNECTOR_ROLE_ARN: connector_role_arn
44+
CONNECTOR_EXTERNAL_ACCOUNT_ROLE_ARN: connector_external_account_role_arn
4445
person-search-index-from-delius-sentry:
4546
SENTRY_DSN: SENTRY_DSN
4647
person-search-index-from-delius-person-queue:

0 commit comments

Comments
 (0)