diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 3eba66936f880..5eebc2c1b8a59 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -24,19 +24,6 @@ service that makes it easier to discover, prepare, move, and integrate data from multiple sources for analytics, machine learning (ML), and application development. -Without an L2 construct, developers define Glue data sources, connections, -jobs, and workflows for their data and ETL solutions via the AWS console, -the AWS CLI, and Infrastructure as Code tools like CloudFormation and the -CDK. However, there are several challenges to defining Glue resources at -scale that an L2 construct can resolve. First, developers must reference -documentation to determine the valid combinations of job type, Glue version, -worker type, language versions, and other parameters that are required for specific -job types. Additionally, developers must already know or look up the -networking constraints for data source connections, and there is ambiguity -around how to securely store secrets for JDBC connections. Finally, -developers want prescriptive guidance via best practice defaults for -throughput parameters like number of workers and batching. - The Glue L2 construct has convenience methods working backwards from common use cases and sets required parameters to defaults that align with recommended best practices for each job type. It also provides customers with a balance @@ -122,25 +109,25 @@ declare const stack: cdk.Stack; declare const role: iam.IRole; declare const script: glue.Code; new glue.PySparkEtlJob(stack, 'PySparkETLJob', { - jobName: 'PySparkETLJobCustomName', - description: 'This is a description', - role, - script, - glueVersion: glue.GlueVersion.V3_0, - continuousLogging: { enabled: false }, - workerType: glue.WorkerType.G_2X, - maxConcurrentRuns: 100, - timeout: cdk.Duration.hours(2), - connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], - securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), - tags: { - FirstTagName: 'FirstTagValue', - SecondTagName: 'SecondTagValue', - XTagName: 'XTagValue', - }, - numberOfWorkers: 2, - maxRetries: 2, - }); + jobName: 'PySparkETLJobCustomName', + description: 'This is a description', + role, + script, + glueVersion: glue.GlueVersion.V3_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, +}); ``` **Streaming Jobs** @@ -369,11 +356,11 @@ declare const stack: cdk.Stack; declare const role: iam.IRole; declare const script: glue.Code; new glue.PySparkEtlJob(stack, 'PySparkETLJob', { - role, - script, - jobName: 'PySparkETLJob', - jobRunQueuingEnabled: true - }); + role, + script, + jobName: 'PySparkETLJob', + jobRunQueuingEnabled: true +}); ``` ### Uploading scripts from the CDK app repository to S3 @@ -679,20 +666,20 @@ If you have a table with a large number of partitions that grows over time, cons ```ts declare const myDatabase: glue.Database; new glue.S3Table(this, 'MyTable', { - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT, - }, { - name: 'month', - type: glue.Schema.SMALL_INT, - }], - dataFormat: glue.DataFormat.JSON, - enablePartitionFiltering: true, + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }, { + name: 'month', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + enablePartitionFiltering: true, }); ``` diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/code.ts b/packages/@aws-cdk/aws-glue-alpha/lib/code.ts index 88648aa5f3410..004d3ce42562b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/code.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/code.ts @@ -10,7 +10,6 @@ import * as constructs from 'constructs'; * Represents a Glue Job's Code assets (an asset can be a scripts, a jar, a python file or any other file). */ export abstract class Code { - /** * Job code as an S3 object. * @param bucket The S3 bucket diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/connection.ts b/packages/@aws-cdk/aws-glue-alpha/lib/connection.ts index 61981720abd0a..8b1ed3ddaa3c4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/connection.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/connection.ts @@ -12,7 +12,6 @@ import { CfnConnection } from 'aws-cdk-lib/aws-glue'; * @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-connection-connectioninput.html#cfn-glue-connection-connectioninput-connectiontype */ export class ConnectionType { - /** * Designates a connection to a database through Java Database Connectivity (JDBC). */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 757dace81c696..8b3cce47ba70b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -254,7 +254,6 @@ export enum JobType { * The number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. */ export enum MaxCapacity { - /** * DPU value of 1/16th */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/database.ts b/packages/@aws-cdk/aws-glue-alpha/lib/database.ts index 6b208b3fdfecd..819d95fd5511a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/database.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/database.ts @@ -56,7 +56,6 @@ export interface DatabaseProps { * A Glue database. */ export class Database extends Resource implements IDatabase { - public static fromDatabaseArn(scope: Construct, id: string, databaseArn: string): IDatabase { const stack = Stack.of(scope); diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts index 57bb1cc636442..fc27047b0be98 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -131,7 +131,6 @@ export interface ContinuousLoggingProps { * event-driven flow using the job. */ export abstract class JobBase extends cdk.Resource implements IJob { - public abstract readonly jobArn: string; public abstract readonly jobName: string; public abstract readonly grantPrincipal: iam.IPrincipal; @@ -264,7 +263,6 @@ export abstract class JobBase extends cdk.Resource implements IJob { * * @param id construct id. * @param jobState the job state. - * @private */ private metricJobStateRule(id: string, jobState: JobState): events.Rule { return this.node.tryFindChild(id) as events.Rule ?? this.onStateChange(id, jobState); @@ -272,8 +270,6 @@ export abstract class JobBase extends cdk.Resource implements IJob { /** * Returns the job arn - * @param scope - * @param jobName */ protected buildJobArn(scope: constructs.Construct, jobName: string) : string { return cdk.Stack.of(scope).formatArn({ @@ -308,13 +304,12 @@ export interface JobImportAttributes { * JobProperties will be used to create new Glue Jobs using this L2 Construct. */ export interface JobProperties { - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - **/ + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + */ readonly script: Code; /** @@ -326,26 +321,29 @@ export interface JobProperties { * and be granted sufficient permissions. * * @see https://docs.aws.amazon.com/glue/latest/dg/getting-started-access.html - **/ + */ readonly role: iam.IRole; /** * Name of the Glue job (optional) * Developer-specified name of the Glue job + * * @default - a name is automatically generated - **/ + */ readonly jobName?: string; /** * Description (optional) * Developer-specified description of the Glue job + * * @default - no value - **/ + */ readonly description?: string; /** * Number of Workers (optional) * Number of workers for Glue to use during job execution + * * @default 10 */ readonly numberOfWorkers?: number; @@ -354,8 +352,9 @@ export interface JobProperties { * Worker Type (optional) * Type of Worker for Glue to use during job execution * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_1X - **/ + * + * @default WorkerType.G_1X + */ readonly workerType?: WorkerType; /** @@ -366,7 +365,7 @@ export interface JobProperties { * you can specify is controlled by a service limit. * * @default 1 - **/ + */ readonly maxConcurrentRuns?: number; /** @@ -377,7 +376,7 @@ export interface JobProperties { * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html * for a list of reserved parameters * @default - no arguments - **/ + */ readonly defaultArguments?: { [key: string]: string }; /** @@ -386,44 +385,49 @@ export interface JobProperties { * Connections are used to connect to other AWS Service or resources within a VPC. * * @default [] - no connections are added to the job - **/ + */ readonly connections?: IConnection[]; /** * Max Retries (optional) * Maximum number of retry attempts Glue performs if the job fails + * * @default 0 - **/ + */ readonly maxRetries?: number; /** * Timeout (optional) * The maximum time that a job run can consume resources before it is * terminated and enters TIMEOUT status. Specified in minutes. + * * @default 2880 (2 days for non-streaming) * - **/ + */ readonly timeout?: cdk.Duration; /** * Security Configuration (optional) * Defines the encryption options for the Glue job + * * @default - no security configuration. - **/ + */ readonly securityConfiguration?: ISecurityConfiguration; /** * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resourcex + * A list of key:value pairs of tags to apply to this Glue job resources + * * @default {} - no tags - **/ + */ readonly tags?: { [key: string]: string }; /** * Glue Version * The version of Glue to use to execute this job + * * @default 3.0 for ETL - **/ + */ readonly glueVersion?: GlueVersion; /** @@ -431,8 +435,8 @@ export interface JobProperties { * * @default - no profiling metrics emitted. * - * @see `--enable-metrics` at https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - **/ + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ readonly enableProfilingMetrics? :boolean; /** @@ -444,7 +448,6 @@ export interface JobProperties { * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html **/ readonly continuousLogging?: ContinuousLoggingProps; - } /** @@ -452,7 +455,6 @@ export interface JobProperties { * @resource AWS::Glue::Job */ export abstract class Job extends JobBase { - /** * Identifies an existing Glue Job from a subset of attributes that can * be referenced from within another Stack or Construct. @@ -500,7 +502,6 @@ export abstract class Job extends JobBase { * @returns String containing the args for the continuous logging command */ public setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps | undefined) : any { - // If the developer has explicitly disabled continuous logging return no args if (props && !props.enabled) { return {}; @@ -536,7 +537,6 @@ export abstract class Job extends JobBase { const s3Location = code.bind(this, this.role).s3Location; return `s3://${s3Location.bucketName}/${s3Location.objectKey}`; } - } /** diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index f61396bb1da2c..d624920c28194 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -7,23 +7,10 @@ import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '.. import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; import { Code } from '../code'; -/** - * PySpark ETL Jobs class - * ETL jobs support pySpark and Scala languages, for which there are separate - * but similar constructors. ETL jobs default to the G2 worker type, but you - * can override this default with other supported worker type values - * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can - * override to 3.0. The following ETL features are enabled by default: - * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * You can find more details about version, worker type and other features - * in Glue's public documentation. - */ - /** * Properties for creating a Python Spark ETL job */ export interface PySparkEtlJobProps extends JobProperties { - /** * Enables the Spark UI debugging and monitoring with the specified props. * @@ -37,6 +24,7 @@ export interface PySparkEtlJobProps extends JobProperties { /** * Extra Python Files S3 URL (optional) * S3 URL where additional python dependencies are located + * * @default - no extra files */ readonly extraPythonFiles?: Code[]; @@ -46,7 +34,7 @@ export interface PySparkEtlJobProps extends JobProperties { * * @default - no extra files specified. * - * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ readonly extraFiles?: Code[]; @@ -58,17 +46,24 @@ export interface PySparkEtlJobProps extends JobProperties { * the job run field will be used. This property must be set to false for flex jobs. * If this property is enabled, maxRetries must be set to zero. * - * @default - no job run queuing + * @default false */ readonly jobRunQueuingEnabled?: boolean; } /** - * A Python Spark ETL Glue Job + * PySpark ETL Jobs class + * + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. */ export class PySparkEtlJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -84,16 +79,12 @@ export class PySparkEtlJob extends Job { /** * PySparkEtlJob constructor - * - * @param scope - * @param id - * @param props */ constructor(scope: Construct, id: string, props: PySparkEtlJobProps) { - super(scope, id, { physicalName: props.jobName, }); + // Set up role and permissions for principal this.role = props.role, { assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), @@ -153,7 +144,6 @@ export class PySparkEtlJob extends Job { /** * Set the executable arguments with best practices enabled by default * - * @param props * @returns An array of arguments for Glue to use on execution */ private executableArguments(props: PySparkEtlJobProps) { @@ -171,7 +161,6 @@ export class PySparkEtlJob extends Job { } private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { - validateSparkUiPrefix(sparkUiProps.prefix); const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket', { enforceSSL: true, encryption: BucketEncryption.S3_MANAGED }); bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts index 660e547eb2815..5ef2187ea8070 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts @@ -9,21 +9,9 @@ import * as cdk from 'aws-cdk-lib/core'; import { Code } from '../code'; /** - * Flex Jobs class - * - * Flex jobs supports Python and Scala language. - * The flexible execution class is appropriate for non-urgent jobs such as - * pre-production jobs, testing, and one-time data loads. - * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or - * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) - * - * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, - * —enable-continuous-cloudwatch-log - * + * Properties for PySparkFlexEtlJob */ - export interface PySparkFlexEtlJobProps extends JobProperties { - /** * Enables the Spark UI debugging and monitoring with the specified props. * @@ -46,7 +34,7 @@ export interface PySparkFlexEtlJobProps extends JobProperties { * * @default - no extra python files specified. * - * @see `--extra-py-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ readonly extraPythonFiles?: Code[]; @@ -55,18 +43,25 @@ export interface PySparkFlexEtlJobProps extends JobProperties { * * @default - no extra files specified. * - * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ readonly extraFiles?: Code[]; } /** - * A Python Spark ETL Glue Job + * Flex Jobs class + * + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * —enable-continuous-cloudwatch-log */ export class PySparkFlexEtlJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -82,10 +77,6 @@ export class PySparkFlexEtlJob extends Job { /** * PySparkFlexEtlJob constructor - * - * @param scope - * @param id - * @param props */ constructor(scope: Construct, id: string, props: PySparkFlexEtlJobProps) { super(scope, id, { @@ -154,7 +145,6 @@ export class PySparkFlexEtlJob extends Job { /** *Set the executable arguments with best practices enabled by default * - * @param props * @returns An array of arguments for Glue to use on execution */ private executableArguments(props: PySparkFlexEtlJobProps) { @@ -174,7 +164,6 @@ export class PySparkFlexEtlJob extends Job { /** * Set the arguments for sparkUI with best practices enabled by default * - * @param sparkUiProps, role * @returns An array of arguments for enabling sparkUI */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index c4c08783d1597..bec949c7bf6f1 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -1,18 +1,3 @@ -/** - * Python Spark Streaming Jobs class - * - * A Streaming job is similar to an ETL job, except that it performs ETL on data streams - * using the Apache Spark Structured Streaming framework. - * These jobs will default to use Python 3.9. - * - * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, - * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker - * and 4.0 version for streaming jobs which developers can override. - * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * - * RFC : https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md - */ - import { CfnJob } from 'aws-cdk-lib/aws-glue'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; @@ -26,7 +11,6 @@ import { Code } from '../code'; * Properties for creating a Python Spark ETL job */ export interface PySparkStreamingJobProps extends JobProperties { - /** * Enables the Spark UI debugging and monitoring with the specified props. * @@ -40,16 +24,17 @@ export interface PySparkStreamingJobProps extends JobProperties { /** * Extra Python Files S3 URL (optional) * S3 URL where additional python dependencies are located + * * @default - no extra files - */ - + */ readonly extraPythonFiles?: Code[]; + /** * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. * * @default - no extra files specified. * - * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ readonly extraFiles?: Code[]; @@ -64,15 +49,21 @@ export interface PySparkStreamingJobProps extends JobProperties { * @default - no job run queuing */ readonly jobRunQueuingEnabled?: boolean; - } /** - * A Python Spark Streaming Glue Job + * Python Spark Streaming Jobs class + * + * A Streaming job is similar to an ETL job, except that it performs ETL on data streams + * using the Apache Spark Structured Streaming framework. + * These jobs will default to use Python 3.9. + * + * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, + * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker + * and 4.0 version for streaming jobs which developers can override. + * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. */ export class PySparkStreamingJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -87,11 +78,7 @@ export class PySparkStreamingJob extends Job { public readonly sparkUILoggingLocation?: SparkUILoggingLocation; /** - * pySparkStreamingJob constructor - * - * @param scope - * @param id - * @param props + * PySparkStreamingJob constructor */ constructor(scope: Construct, id: string, props: PySparkStreamingJobProps) { super(scope, id, { @@ -175,7 +162,6 @@ export class PySparkStreamingJob extends Job { } private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { - validateSparkUiPrefix(sparkUiProps.prefix); const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket', { enforceSSL: true, encryption: BucketEncryption.S3_MANAGED }); bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts index 10107c0a0c6ee..71bcd566d8c41 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -4,15 +4,6 @@ import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, PythonVersion, MaxCapacity, JobLanguage } from '../constants'; -/** - * Python Shell Jobs class - * - * A Python shell job runs Python scripts as a shell and supports a Python version that - * depends on the AWS Glue version you are using. - * This can be used to schedule and run tasks that don't require an Apache Spark environment. - * - */ - /** * Properties for creating a Python Shell job */ @@ -26,6 +17,7 @@ export interface PythonShellJobProps extends JobProperties { /** * The total number of DPU to assign to the Python Job + * * @default 0.0625 */ readonly maxCapacity?: MaxCapacity; @@ -38,17 +30,19 @@ export interface PythonShellJobProps extends JobProperties { * the job run field will be used. This property must be set to false for flex jobs. * If this property is enabled, maxRetries must be set to zero. * - * @default - no job run queuing + * @default false */ readonly jobRunQueuingEnabled?: boolean; } /** - * A Python Shell Glue Job + * Python Shell Jobs class + * + * A Python shell job runs Python scripts as a shell and supports a Python version that + * depends on the AWS Glue version you are using. + * This can be used to schedule and run tasks that don't require an Apache Spark environment. */ export class PythonShellJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -56,10 +50,6 @@ export class PythonShellJob extends Job { /** * PythonShellJob constructor - * - * @param scope - * @param id - * @param props */ constructor(scope: Construct, id: string, props: PythonShellJobProps) { super(scope, id, { physicalName: props.jobName }); @@ -117,7 +107,6 @@ export class PythonShellJob extends Job { /** * Set the executable arguments with best practices enabled by default * - * @param props * @returns An array of arguments for Glue to use on execution */ private executableArguments(props: PythonShellJobProps) { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 0559c447c4ab4..c18fec4405bf8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -4,15 +4,6 @@ import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, WorkerType, Runtime } from '../constants'; -/** - * Ray Jobs class - * - * Glue Ray jobs use worker type Z.2X and Glue version 4.0. - * These are not overrideable since these are the only configuration that - * Glue Ray jobs currently support. The runtime defaults to Ray2.4 and min - * workers defaults to 3. - */ - /** * Properties for creating a Ray Glue job */ @@ -38,11 +29,14 @@ export interface RayJobProps extends JobProperties { } /** - * A Ray Glue Job + * Ray Jobs class + * + * Glue Ray jobs use worker type Z.2X and Glue version 4.0. + * These are not overrideable since these are the only configuration that + * Glue Ray jobs currently support. The runtime defaults to Ray2.4 and min + * workers defaults to 3. */ export class RayJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -50,12 +44,7 @@ export class RayJob extends Job { /** * RayJob constructor - * - * @param scope - * @param id - * @param props */ - constructor(scope: Construct, id: string, props: RayJobProps) { super(scope, id, { physicalName: props.jobName, @@ -113,5 +102,4 @@ export class RayJob extends Job { this.jobArn = this.buildJobArn(this, resourceName); this.jobName = resourceName; } - } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index ecb42d87c3e59..67bdf275105f7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -1,18 +1,3 @@ -/** - * Spark ETL Jobs class - * ETL jobs support pySpark and Scala languages, for which there are separate - * but similar constructors. ETL jobs default to the G2 worker type, but you - * can override this default with other supported worker type values - * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can - * override to 3.0. The following ETL features are enabled by default: - * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * You can find more details about version, worker type and other features - * in Glue's public documentation. - * - * RFC: https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md - * - */ - import * as iam from 'aws-cdk-lib/aws-iam'; import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; import { CfnJob } from 'aws-cdk-lib/aws-glue'; @@ -26,7 +11,6 @@ import { Code } from '../code'; * Properties for creating a Scala Spark ETL job */ export interface ScalaSparkEtlJobProps extends JobProperties { - /** * Enables the Spark UI debugging and monitoring with the specified props. * @@ -48,7 +32,7 @@ export interface ScalaSparkEtlJobProps extends JobProperties { * Extra Jars S3 URL (optional) * S3 URL where additional jar dependencies are located * @default - no extra jar files - */ + */ readonly extraJars?: Code[]; /** @@ -65,11 +49,18 @@ export interface ScalaSparkEtlJobProps extends JobProperties { } /** - * A Scala Spark ETL Glue Job + * Spark ETL Jobs class + * + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. */ export class ScalaSparkEtlJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -85,10 +76,6 @@ export class ScalaSparkEtlJob extends Job { /** * ScalaSparkEtlJob constructor - * - * @param scope - * @param id - * @param props */ constructor(scope: Construct, id: string, props: ScalaSparkEtlJobProps) { super(scope, id, { @@ -162,7 +149,6 @@ export class ScalaSparkEtlJob extends Job { /** * Set the executable arguments with best practices enabled by default * - * @param props * @returns An array of arguments for Glue to use on execution */ private executableArguments(props: ScalaSparkEtlJobProps) { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts index ee3d69695c30a..7d73540787def 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -1,15 +1,3 @@ -/** - * Spark ETL Jobs class - * ETL jobs support pySpark and Scala languages, for which there are separate - * but similar constructors. ETL jobs default to the G2 worker type, but you - * can override this default with other supported worker type values - * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can - * override to 3.0. The following ETL features are enabled by default: - * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * You can find more details about version, worker type and other features - * in Glue's public documentation. - */ - import * as iam from 'aws-cdk-lib/aws-iam'; import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; import { CfnJob } from 'aws-cdk-lib/aws-glue'; @@ -33,9 +21,7 @@ import { Code } from '../code'; * —enable-continuous-cloudwatch-log * */ - export interface ScalaSparkFlexEtlJobProps extends JobProperties { - /** * Enables the Spark UI debugging and monitoring with the specified props. * @@ -71,12 +57,12 @@ export interface ScalaSparkFlexEtlJobProps extends JobProperties { readonly extraJars?: Code[]; /** - * Setting this value to true prioritizes the customer's extra JAR files in the classpath. - * - * @default false - priority is not given to user-provided jars - * - * @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ + * Setting this value to true prioritizes the customer's extra JAR files in the classpath. + * + * @default false - priority is not given to user-provided jars + * + * @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ readonly extraJarsFirst?: boolean; /** @@ -88,15 +74,21 @@ export interface ScalaSparkFlexEtlJobProps extends JobProperties { * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ readonly extraFiles?: Code[]; - } /** - * A Scala Spark ETL Glue Job + * Spark ETL Jobs class + * + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. */ export class ScalaSparkFlexEtlJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -112,10 +104,6 @@ export class ScalaSparkFlexEtlJob extends Job { /** * ScalaSparkFlexEtlJob constructor - * - * @param scope - * @param id - * @param props */ constructor(scope: Construct, id: string, props: ScalaSparkFlexEtlJobProps) { super(scope, id, { @@ -186,7 +174,6 @@ export class ScalaSparkFlexEtlJob extends Job { /** * Set the executable arguments with best practices enabled by default * - * @param props * @returns An array of arguments for Glue to use on execution */ private executableArguments(props: ScalaSparkFlexEtlJobProps) { @@ -211,7 +198,6 @@ export class ScalaSparkFlexEtlJob extends Job { /** * Set the arguments for sparkUI with best practices enabled by default * - * @param sparkUiProps, role * @returns An array of arguments for enabling sparkUI */ private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index 0467479dc6bd2..5b4b9f9810c44 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -1,18 +1,3 @@ -/** - * Scala Streaming Jobs class - * - * A Streaming job is similar to an ETL job, except that it performs ETL on data streams - * using the Apache Spark Structured Streaming framework. - * These jobs will default to use Python 3.9. - * - * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, - * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker - * and 4.0 version for streaming jobs which developers can override. - * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * - * RFC: https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md - */ - import { CfnJob } from 'aws-cdk-lib/aws-glue'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; @@ -25,15 +10,14 @@ import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSpark * Properties for creating a Scala Spark ETL job */ export interface ScalaSparkStreamingJobProps extends JobProperties { - /** - * Enables the Spark UI debugging and monitoring with the specified props. - * - * @default - Spark UI debugging and monitoring is disabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ readonly sparkUI?: SparkUIProps; /** @@ -64,11 +48,18 @@ export interface ScalaSparkStreamingJobProps extends JobProperties { } /** - * A Scala Spark Streaming Glue Job + * Scala Streaming Jobs class + * + * A Streaming job is similar to an ETL job, except that it performs ETL on data streams + * using the Apache Spark Structured Streaming framework. + * These jobs will default to use Python 3.9. + * + * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, + * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker + * and 4.0 version for streaming jobs which developers can override. + * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. */ export class ScalaSparkStreamingJob extends Job { - - // Implement abstract Job attributes public readonly jobArn: string; public readonly jobName: string; public readonly role: iam.IRole; @@ -84,11 +75,7 @@ export class ScalaSparkStreamingJob extends Job { /** * ScalaSparkStreamingJob constructor - * - * @param scope - * @param id - * @param props - */ + */ constructor(scope: Construct, id: string, props: ScalaSparkStreamingJobProps) { super(scope, id, { physicalName: props.jobName, @@ -161,9 +148,8 @@ export class ScalaSparkStreamingJob extends Job { /** * Set the executable arguments with best practices enabled by default * - * @param props * @returns An array of arguments for Glue to use on execution - */ + */ private executableArguments(props: ScalaSparkStreamingJobProps) { const args: { [key: string]: string } = {}; args['--job-language'] = JobLanguage.SCALA; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts index 3cb2f5937117d..f88eb04d4d2fa 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts @@ -9,7 +9,6 @@ import { EOL } from 'os'; * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ export interface SparkUIProps { - /** * The bucket where the Glue job stores the logs. * diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts index b8ca20aded84b..626f4e8aafb57 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts @@ -41,10 +41,10 @@ export enum TableEncryption { export interface S3TableProps extends TableBaseProps { /** - * S3 bucket in which to store data. - * - * @default one is created for you - */ + * S3 bucket in which to store data. + * + * @default one is created for you + */ readonly bucket?: s3.IBucket; /** diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/security-configuration.ts b/packages/@aws-cdk/aws-glue-alpha/lib/security-configuration.ts index 6af34e41d6728..570a4871d9329 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/security-configuration.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/security-configuration.ts @@ -149,7 +149,6 @@ export interface SecurityConfigurationProps { * - Attach a security configuration to a development endpoint to write encrypted Amazon S3 targets. */ export class SecurityConfiguration extends cdk.Resource implements ISecurityConfiguration { - /** * Creates a Connection construct that represents an external security configuration. * diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts index c1a43438b075d..ce3428d7fe25b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts @@ -1,11 +1,3 @@ -/** - * Triggers - * - * In AWS Glue, developers can use workflows to create and visualize complex extract, - * transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. - * - */ - import * as cdk from 'aws-cdk-lib/core'; import { JobState, CrawlerState, ConditionLogicalOperator, PredicateLogical } from '../constants'; import { IJob } from '../jobs/job'; // Use IJob interface instead of concrete class diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts index 18fb7fd6b6251..7b918a42a7c51 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts @@ -1,39 +1,3 @@ -/** - * This module defines a construct for creating and managing AWS Glue Workflows and Triggers. - * - * AWS Glue Workflows are orchestration services that allow you to create, manage, and monitor complex extract, transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. Workflows are designed to allow you to manage interdependent jobs and crawlers as a single unit, making it easier to orchestrate and monitor complex ETL pipelines. - * - * Triggers are used to initiate an AWS Glue Workflow. You can configure different types of triggers, such as on-demand, scheduled, event-based, or conditional triggers, to start your Workflow based on specific conditions or events. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/workflows_overview.html - * @see https://docs.aws.amazon.com/glue/latest/dg/about-triggers.html - * - * ## Usage Example - * - * ```typescript - * import * as cdk from 'aws-cdk-lib'; - * import * as glue from 'aws-glue-cdk-lib'; - * - * const app = new cdk.App(); - * const stack = new cdk.Stack(app, 'TestStack'); - * - * // Create a Glue Job - * const job = new glue.Job(stack, 'TestJob', { - * // Job properties - * }); - * - * // Create a Glue Workflow - * const workflow = new glue.Workflow(stack, 'TestWorkflow', { - * // Workflow properties - * }); - * - * // Add an on-demand trigger to the Workflow - * workflow.addOnDemandTrigger('OnDemandTrigger', { - * actions: [{ job: job }], - * }); - * ``` - */ - import * as cdk from 'aws-cdk-lib/core'; import * as constructs from 'constructs'; import { CfnWorkflow, CfnTrigger } from 'aws-cdk-lib/aws-glue'; @@ -160,13 +124,13 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { public abstract readonly workflowArn: string; /** - * Add an on-demand trigger to the workflow. - * - * @param id The id of the trigger. - * @param options Additional options for the trigger. - * @throws If both job and crawler are provided, or if neither job nor crawler is provided. - * @returns The created CfnTrigger resource. - */ + * Add an on-demand trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ public addOnDemandTrigger(id: string, options: OnDemandTriggerOptions): CfnTrigger { const trigger = new CfnTrigger(this, id, { ...options, @@ -180,13 +144,13 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } /** - * Add a daily-scheduled trigger to the workflow. - * - * @param id The id of the trigger. - * @param options Additional options for the trigger. - * @throws If both job and crawler are provided, or if neither job nor crawler is provided. - * @returns The created CfnTrigger resource. - */ + * Add a daily-scheduled trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ public addDailyScheduledTrigger(id: string, options: DailyScheduleTriggerOptions): CfnTrigger { const dailySchedule = TriggerSchedule.cron({ minute: '0', @@ -206,13 +170,13 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } /** - * Add a weekly-scheduled trigger to the workflow. - * - * @param id The id of the trigger. - * @param options Additional options for the trigger. - * @throws If both job and crawler are provided, or if neither job nor crawler is provided. - * @returns The created CfnTrigger resource. - */ + * Add a weekly-scheduled trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ public addWeeklyScheduledTrigger(id: string, options: WeeklyScheduleTriggerOptions): CfnTrigger { const weeklySchedule = TriggerSchedule.cron({ minute: '0', @@ -233,13 +197,13 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } /** - * Add a custom-scheduled trigger to the workflow. - * - * @param id The id of the trigger. - * @param options Additional options for the trigger. - * @throws If both job and crawler are provided, or if neither job nor crawler is provided. - * @returns The created CfnTrigger resource. - */ + * Add a custom-scheduled trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ public addCustomScheduledTrigger(id: string, options: CustomScheduledTriggerOptions): CfnTrigger { const trigger = new CfnTrigger(this, id, { ...options, @@ -254,13 +218,13 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } /** - * Add an Event Bridge based trigger to the workflow. - * - * @param id The id of the trigger. - * @param options Additional options for the trigger. - * @throws If both job and crawler are provided, or if neither job nor crawler is provided. - * @returns The created CfnTrigger resource. - */ + * Add an Event Bridge based trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ public addNotifyEventTrigger(id: string, options: NotifyEventTriggerOptions): CfnTrigger { const trigger = new CfnTrigger(this, id, { ...options, @@ -275,14 +239,14 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } /** - * Add a Condition (Predicate) based trigger to the workflow. - * - * @param id The id of the trigger. - * @param options Additional options for the trigger. - * @throws If both job and crawler are provided, or if neither job nor crawler is provided for any condition. - * @throws If a job is provided without a job state, or if a crawler is provided without a crawler state for any condition. - * @returns The created CfnTrigger resource. - */ + * Add a Condition (Predicate) based trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided for any condition. + * @throws If a job is provided without a job state, or if a crawler is provided without a crawler state for any condition. + * @returns The created CfnTrigger resource. + */ public addconditionalTrigger(id: string, options: ConditionalTriggerOptions): CfnTrigger { const trigger = new CfnTrigger(this, id, { ...options, @@ -349,7 +313,6 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } private renderEventBatchingCondition(props: NotifyEventTriggerOptions): CfnTrigger.EventBatchingConditionProperty { - const defaultBatchSize = 1; const defaultBatchWindow = cdk.Duration.seconds(900).toSeconds(); @@ -376,9 +339,34 @@ export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { } /** - * A class used for defining a Glue Workflow + * This module defines a construct for creating and managing AWS Glue Workflows and Triggers. + * + * AWS Glue Workflows are orchestration services that allow you to create, manage, and monitor complex extract, transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. Workflows are designed to allow you to manage interdependent jobs and crawlers as a single unit, making it easier to orchestrate and monitor complex ETL pipelines. + * + * Triggers are used to initiate an AWS Glue Workflow. You can configure different types of triggers, such as on-demand, scheduled, event-based, or conditional triggers, to start your Workflow based on specific conditions or events. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/workflows_overview.html + * @see https://docs.aws.amazon.com/glue/latest/dg/about-triggers.html + * + * ## Usage Example + * + * ```ts + * const app = new App(); + * const stack = new Stack(app, 'TestStack'); * - * @resource AWS::Glue::Workflow + * // Create a Glue Job + * declare const role: iam.IRole; + * declare const script: glue.Code; + * const job = new glue.PySparkStreamingJob(stack, 'ImportedJob', { role, script }); + * + * // Create a Glue Workflow + * const workflow = new glue.Workflow(stack, 'TestWorkflow'); + * + * // Add an on-demand trigger to the Workflow + * workflow.addOnDemandTrigger('OnDemandTrigger', { + * actions: [{ job: job }], + * }); + * ``` */ export class Workflow extends WorkflowBase { /** diff --git a/packages/@aws-cdk/aws-glue-alpha/rosetta/default.ts-fixture b/packages/@aws-cdk/aws-glue-alpha/rosetta/default.ts-fixture index 047a3eee7b647..0567a86d9141b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/rosetta/default.ts-fixture +++ b/packages/@aws-cdk/aws-glue-alpha/rosetta/default.ts-fixture @@ -1,7 +1,7 @@ // Fixture with packages imported, but nothing else import * as path from 'path'; import { Construct } from 'constructs'; -import { Stack } from 'aws-cdk-lib'; +import { App, Stack } from 'aws-cdk-lib'; import * as glue from '@aws-cdk/aws-glue-alpha'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as ec2 from 'aws-cdk-lib/aws-ec2';