From 10a475f2e381dfce32c75d26be05927e7f9e06e8 Mon Sep 17 00:00:00 2001 From: Doruk Ozturk Date: Fri, 8 Dec 2023 03:33:51 -0500 Subject: [PATCH] feat: Jupyterhub cognito fine grained access control (#380) * Add cognito identity pool * Add iam role for authenticated users * Separate cognito to its own module * Add python code that injects credentials * Fix linter issues * Add conditionals for cognito case * Fix index error with resources --- ai-ml/jupyterhub/addons.tf | 3 + ai-ml/jupyterhub/cognito.tf | 224 ++++++++++++++++++ .../jupyterhub/jupyterhub-values-cognito.yaml | 37 +++ ai-ml/jupyterhub/jupyterhub.tf | 47 ---- ai-ml/jupyterhub/versions.tf | 5 + 5 files changed, 269 insertions(+), 47 deletions(-) create mode 100644 ai-ml/jupyterhub/cognito.tf diff --git a/ai-ml/jupyterhub/addons.tf b/ai-ml/jupyterhub/addons.tf index 90959931d..277cf7889 100755 --- a/ai-ml/jupyterhub/addons.tf +++ b/ai-ml/jupyterhub/addons.tf @@ -297,7 +297,10 @@ module "eks_data_addons" { userdata_url = try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/userInfo", "") client_id = try(aws_cognito_user_pool_client.user_pool_client[0].id, "") client_secret = try(aws_cognito_user_pool_client.user_pool_client[0].client_secret, "") + user_pool_id = try(aws_cognito_user_pool.pool[0].id, "") + identity_pool_id = try(aws_cognito_identity_pool.identity_pool[0].id, "") jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name + region = var.region })] } diff --git a/ai-ml/jupyterhub/cognito.tf b/ai-ml/jupyterhub/cognito.tf new file mode 100644 index 000000000..57338986b --- /dev/null +++ b/ai-ml/jupyterhub/cognito.tf @@ -0,0 +1,224 @@ +#--------------------------------------------------------------- +# Lambda function for pre token generation +#---------------------------------------------------------------- + +data "aws_iam_policy_document" "assume_role" { + statement { + effect = "Allow" + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com", "cognito-idp.amazonaws.com"] + } + actions = ["sts:AssumeRole"] + } +} + +data "aws_iam_policy" "lambda_execution_policy" { + arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" +} + +resource "aws_iam_role" "iam_for_lambda" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + name = "iam_for_lambda" + assume_role_policy = data.aws_iam_policy_document.assume_role.json +} + +resource "aws_iam_role_policy_attachment" "lambda_policy_attachment" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + role = aws_iam_role.iam_for_lambda[0].name + policy_arn = data.aws_iam_policy.lambda_execution_policy.arn +} + +data "archive_file" "lambda" { + type = "zip" + output_path = "/tmp/lambda.zip" + source { + filename = "index.mjs" + content = <<-EOF + export const handler = async (event) => { + event.response = { + claimsOverrideDetails: { + claimsToAddOrOverride: { + department: "engineering", + }, + }, + }; + + return event; + }; + + EOF + } +} + +resource "aws_lambda_function" "pretoken_trigger" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + function_name = "pretoken-trigger-function" + filename = data.archive_file.lambda.output_path + source_code_hash = data.archive_file.lambda.output_base64sha256 + + runtime = "nodejs18.x" + handler = "index.handler" + + role = aws_iam_role.iam_for_lambda[0].arn +} + +#--------------------------------------------------------------- +# Cognito pool, domain and client creation. +# This can be used +# Auth integration later. +#---------------------------------------------------------------- +resource "aws_cognito_user_pool" "pool" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + name = "jupyterhub-userpool" + + username_attributes = ["email"] + auto_verified_attributes = ["email"] + + password_policy { + minimum_length = 6 + } + + lambda_config { + pre_token_generation = aws_lambda_function.pretoken_trigger[0].arn + } +} + +resource "aws_cognito_user_pool_domain" "domain" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + domain = local.cognito_custom_domain + user_pool_id = aws_cognito_user_pool.pool[0].id +} + +resource "aws_cognito_user_pool_client" "user_pool_client" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + name = "jupyter-client" + access_token_validity = 1 + token_validity_units { + access_token = "days" + } + callback_urls = ["https://${var.jupyterhub_domain}/hub/oauth_callback"] + user_pool_id = aws_cognito_user_pool.pool[0].id + allowed_oauth_flows_user_pool_client = true + allowed_oauth_flows = ["code"] + allowed_oauth_scopes = ["openid", "email"] + generate_secret = true + supported_identity_providers = [ + "COGNITO" + ] + + depends_on = [aws_cognito_user_pool_domain.domain] +} + +#--------------------------------------------------------------- +# Cognito identity pool creation. +#---------------------------------------------------------------- +resource "aws_cognito_identity_pool" "identity_pool" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + identity_pool_name = "jupyterhub-identity-pool" + allow_unauthenticated_identities = false + cognito_identity_providers { + client_id = aws_cognito_user_pool_client.user_pool_client[0].id + provider_name = aws_cognito_user_pool.pool[0].endpoint + server_side_token_check = true + } + + depends_on = [aws_cognito_user_pool_client.user_pool_client] +} + +resource "aws_s3_bucket" "jupyterhub_bucket" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + bucket_prefix = "jupyterhub-test-bucket-" +} + +resource "aws_s3_object" "engineering_object" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + bucket = aws_s3_bucket.jupyterhub_bucket[0].id + key = "engineering/" +} + +resource "aws_s3_object" "legal_object" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + bucket = aws_s3_bucket.jupyterhub_bucket[0].id + key = "legal/" +} + +#--------------------------------------------------------------- +# IAM role for a team member from the engineering department +# In theory there would be other departments such as "legal" +#---------------------------------------------------------------- +resource "aws_iam_role" "cognito_authenticated_engineering_role" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + + name = "EngineeringTeamRole" + + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = ["sts:AssumeRoleWithWebIdentity", "sts:TagSession"], + Effect = "Allow", + Principal = { + Federated = "cognito-identity.amazonaws.com" + }, + Condition = { + StringEquals = { + "cognito-identity.amazonaws.com:aud" = aws_cognito_identity_pool.identity_pool[0].id + }, + "ForAnyValue:StringLike" : { + "cognito-identity.amazonaws.com:amr" : "authenticated" + } + } + } + ] + }) +} + +resource "aws_iam_role_policy" "s3_cognito_engineering_policy" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + name = "s3_cognito_engineering_policy" + role = aws_iam_role.cognito_authenticated_engineering_role[0].id + + policy = <<-EOF +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:List*"], + "Resource": "*", + "Condition": { + "StringEquals": { + "s3:prefix": "$${aws:PrincipalTag/department}" + } + } + } + ] +} +EOF +} + +resource "aws_cognito_identity_pool_provider_principal_tag" "example" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + identity_pool_id = aws_cognito_identity_pool.identity_pool[0].id + identity_provider_name = aws_cognito_user_pool.pool[0].endpoint + use_defaults = false + principal_tags = { + department = "department" + } +} + +resource "aws_iam_policy_attachment" "s3_readonly_policy_attachment" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + name = "S3ReadOnlyAccessAttachment" + policy_arn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" + roles = [aws_iam_role.cognito_authenticated_engineering_role[0].name] +} + +resource "aws_cognito_identity_pool_roles_attachment" "identity_pool_roles" { + count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 + identity_pool_id = aws_cognito_identity_pool.identity_pool[0].id + roles = { + authenticated = aws_iam_role.cognito_authenticated_engineering_role[0].arn + } +} diff --git a/ai-ml/jupyterhub/helm/jupyterhub/jupyterhub-values-cognito.yaml b/ai-ml/jupyterhub/helm/jupyterhub/jupyterhub-values-cognito.yaml index 5b01d6e2d..56e33efe7 100755 --- a/ai-ml/jupyterhub/helm/jupyterhub/jupyterhub-values-cognito.yaml +++ b/ai-ml/jupyterhub/helm/jupyterhub/jupyterhub-values-cognito.yaml @@ -4,6 +4,7 @@ hub: storage: 50Gi storageClassName: gp3 authenticatePrometheus: false + command: ["sh", "-c", "pip install boto3 && jupyterhub --config /usr/local/etc/jupyterhub/jupyterhub_config.py"] config: GenericOAuthenticator: oauth_callback_url: ${jupyterdomain} @@ -23,7 +24,43 @@ hub: extraConfig: jupyterhub_config.py: |- c.KubeSpawner.start_timeout = 1200 + c.Authenticator.enable_auth_state = True + cognito_config.py: |- + import boto3 + def auth_state_hook(spawner, auth_state): + client_idp = boto3.client('cognito-idp', region_name="${region}") + auth_response = client_idp.initiate_auth( + AuthFlow="REFRESH_TOKEN_AUTH", + AuthParameters={ + "REFRESH_TOKEN": auth_state['refresh_token'], + "SECRET_HASH": "${client_secret}" + }, + ClientId="${client_id}" + ) + id_token = auth_response["AuthenticationResult"]["IdToken"] + client_identity = boto3.client("cognito-identity", region_name="${region}") + identity_response = client_identity.get_id( + IdentityPoolId="${identity_pool_id}", + Logins={ + f"cognito-idp.${region}.amazonaws.com/${user_pool_id}": id_token + } + ) + identity_id = identity_response['IdentityId'] + credentials = client_identity.get_credentials_for_identity( + IdentityId=identity_id, + Logins={ + f"cognito-idp.${region}.amazonaws.com/${user_pool_id}": id_token + } + ) + key = credentials["Credentials"]["AccessKeyId"] + secret = credentials["Credentials"]["SecretKey"] + token = credentials["Credentials"]["SessionToken"] + spawner.environment['AWS_ACCESS_KEY_ID'] = key + spawner.environment['AWS_SECRET_ACCESS_KEY'] = secret + spawner.environment['AWS_SESSION_TOKEN'] = token + + c.Spawner.auth_state_hook = auth_state_hook proxy: https: diff --git a/ai-ml/jupyterhub/jupyterhub.tf b/ai-ml/jupyterhub/jupyterhub.tf index 133b8bba2..3559d323e 100644 --- a/ai-ml/jupyterhub/jupyterhub.tf +++ b/ai-ml/jupyterhub/jupyterhub.tf @@ -132,50 +132,3 @@ module "efs_config" { depends_on = [kubernetes_namespace.jupyterhub] } - -#--------------------------------------------------------------- -# Cognito pool, domain and client creation. -# This can be used -# Auth integration later. -#---------------------------------------------------------------- -resource "aws_cognito_user_pool" "pool" { - count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 - name = "jupyterhub-userpool" - - username_attributes = ["email"] - auto_verified_attributes = ["email"] - - password_policy { - minimum_length = 6 - } -} - -resource "random_string" "random" { - count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 - length = 8 - special = false - lower = true - upper = false -} - -resource "aws_cognito_user_pool_domain" "domain" { - count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 - domain = "${local.cognito_custom_domain}-${random_string.random[0].result}" - user_pool_id = aws_cognito_user_pool.pool[0].id -} - -resource "aws_cognito_user_pool_client" "user_pool_client" { - count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0 - name = "jupyter-client" - callback_urls = ["https://${var.jupyterhub_domain}/hub/oauth_callback"] - user_pool_id = aws_cognito_user_pool.pool[0].id - allowed_oauth_flows_user_pool_client = true - allowed_oauth_flows = ["code"] - allowed_oauth_scopes = ["openid", "email"] - generate_secret = true - supported_identity_providers = [ - "COGNITO" - ] - - depends_on = [aws_cognito_user_pool_domain.domain] -} diff --git a/ai-ml/jupyterhub/versions.tf b/ai-ml/jupyterhub/versions.tf index 228869532..9b6678a5f 100755 --- a/ai-ml/jupyterhub/versions.tf +++ b/ai-ml/jupyterhub/versions.tf @@ -18,5 +18,10 @@ terraform { source = "hashicorp/random" version = "3.1.0" # Replace with the appropriate version of the random provider } + + archive = { + source = "hashicorp/archive" + version = "2.4.0" + } } }