diff --git a/packages/@aws-cdk/aws-glue/README.md b/packages/@aws-cdk/aws-glue/README.md index f5e200f0465e7..069b572b6cd71 100644 --- a/packages/@aws-cdk/aws-glue/README.md +++ b/packages/@aws-cdk/aws-glue/README.md @@ -42,7 +42,8 @@ These jobs run in an Apache Spark environment managed by AWS Glue. An ETL job processes data in batches using Apache Spark. ```ts -new glue.Job(stack, 'ScalaSparkEtlJob', { +declare const bucket: s3.Bucket; +new glue.Job(this, 'ScalaSparkEtlJob', { executable: glue.JobExecutable.scalaEtl({ glueVersion: glue.GlueVersion.V2_0, script: glue.Code.fromBucket(bucket, 'src/com/example/HelloWorld.scala'), @@ -58,7 +59,7 @@ new glue.Job(stack, 'ScalaSparkEtlJob', { A Streaming job is similar to an ETL job, except that it performs ETL on data streams. It uses the Apache Spark Structured Streaming framework. Some Spark job features are not available to streaming ETL jobs. ```ts -new glue.Job(stack, 'PythonSparkStreamingJob', { +new glue.Job(this, 'PythonSparkStreamingJob', { executable: glue.JobExecutable.pythonStreaming({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -74,10 +75,11 @@ A Python shell job runs Python scripts as a shell and supports a Python version This can be used to schedule and run tasks that don't require an Apache Spark environment. ```ts -new glue.Job(stack, 'PythonShellJob', { +declare const bucket: s3.Bucket; +new glue.Job(this, 'PythonShellJob', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, - pythonVersion: PythonVersion.THREE, + pythonVersion: glue.PythonVersion.THREE, script: glue.Code.fromBucket(bucket, 'script.py'), }), description: 'an example Python Shell job', @@ -91,8 +93,10 @@ See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for A `Connection` allows Glue jobs, crawlers and development endpoints to access certain types of data stores. For example, to create a network connection to connect to a data source within a VPC: ```ts -new glue.Connection(stack, 'MyConnection', { - connectionType: glue.ConnectionTypes.NETWORK, +declare const securityGroup: ec2.SecurityGroup; +declare const subnet: ec2.Subnet; +new glue.Connection(this, 'MyConnection', { + type: glue.ConnectionType.NETWORK, // The security groups granting AWS Glue inbound access to the data source within the VPC securityGroups: [securityGroup], // The VPC subnet which contains the data source @@ -109,7 +113,7 @@ See [Adding a Connection to Your Data Store](https://docs.aws.amazon.com/glue/la A `SecurityConfiguration` is a set of security properties that can be used by AWS Glue to encrypt data at rest. ```ts -new glue.SecurityConfiguration(stack, 'MySecurityConfiguration', { +new glue.SecurityConfiguration(this, 'MySecurityConfiguration', { securityConfigurationName: 'name', cloudWatchEncryption: { mode: glue.CloudWatchEncryptionMode.KMS, @@ -126,7 +130,8 @@ new glue.SecurityConfiguration(stack, 'MySecurityConfiguration', { By default, a shared KMS key is created for use with the encryption configurations that require one. You can also supply your own key for each encryption config, for example, for CloudWatch encryption: ```ts -new glue.SecurityConfiguration(stack, 'MySecurityConfiguration', { +declare const key: kms.Key; +new glue.SecurityConfiguration(this, 'MySecurityConfiguration', { securityConfigurationName: 'name', cloudWatchEncryption: { mode: glue.CloudWatchEncryptionMode.KMS, @@ -142,8 +147,8 @@ See [documentation](https://docs.aws.amazon.com/glue/latest/dg/encryption-securi A `Database` is a logical grouping of `Tables` in the Glue Catalog. ```ts -new glue.Database(stack, 'MyDatabase', { - databaseName: 'my_database' +new glue.Database(this, 'MyDatabase', { + databaseName: 'my_database', }); ``` @@ -152,7 +157,8 @@ new glue.Database(stack, 'MyDatabase', { A Glue table describes a table of data in S3: its structure (column names and types), location of data (S3 objects with a common prefix in a S3 bucket), and format for the files (Json, Avro, Parquet, etc.): ```ts -new glue.Table(stack, 'MyTable', { +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { database: myDatabase, tableName: 'my_table', columns: [{ @@ -160,20 +166,29 @@ new glue.Table(stack, 'MyTable', { type: glue.Schema.STRING, }, { name: 'col2', - type: glue.Schema.array(Schema.STRING), + type: glue.Schema.array(glue.Schema.STRING), comment: 'col2 is an array of strings' // comment is optional }], - dataFormat: glue.DataFormat.JSON + dataFormat: glue.DataFormat.JSON, }); ``` By default, a S3 bucket will be created to store the table's data but you can manually pass the `bucket` and `s3Prefix`: ```ts -new glue.Table(stack, 'MyTable', { +declare const myBucket: s3.Bucket; +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { bucket: myBucket, - s3Prefix: 'my-table/' - ... + s3Prefix: 'my-table/', + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); ``` @@ -184,21 +199,22 @@ By default, an S3 bucket will be created to store the table's data and stored in To improve query performance, a table can specify `partitionKeys` on which data is stored and queried separately. For example, you might partition a table by `year` and `month` to optimize queries based on a time window: ```ts -new glue.Table(stack, 'MyTable', { +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { database: myDatabase, tableName: 'my_table', columns: [{ name: 'col1', - type: glue.Schema.STRING + type: glue.Schema.STRING, }], partitionKeys: [{ name: 'year', - type: glue.Schema.SMALL_INT + type: glue.Schema.SMALL_INT, }, { name: 'month', - type: glue.Schema.SMALL_INT + type: glue.Schema.SMALL_INT, }], - dataFormat: glue.DataFormat.JSON + dataFormat: glue.DataFormat.JSON, }); ``` @@ -210,52 +226,98 @@ You can enable encryption on a Table's data: * [S3Managed](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html) - Server side encryption (`SSE-S3`) with an Amazon S3-managed key. ```ts -new glue.Table(stack, 'MyTable', { - encryption: glue.TableEncryption.S3_MANAGED - ... +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { + encryption: glue.TableEncryption.S3_MANAGED, + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); ``` * [Kms](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html) - Server-side encryption (`SSE-KMS`) with an AWS KMS Key managed by the account owner. ```ts +declare const myDatabase: glue.Database; // KMS key is created automatically -new glue.Table(stack, 'MyTable', { - encryption: glue.TableEncryption.KMS - ... +new glue.Table(this, 'MyTable', { + encryption: glue.TableEncryption.KMS, + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); // with an explicit KMS key -new glue.Table(stack, 'MyTable', { +new glue.Table(this, 'MyTable', { encryption: glue.TableEncryption.KMS, - encryptionKey: new kms.Key(stack, 'MyKey') - ... + encryptionKey: new kms.Key(this, 'MyKey'), + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); ``` * [KmsManaged](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html) - Server-side encryption (`SSE-KMS`), like `Kms`, except with an AWS KMS Key managed by the AWS Key Management Service. ```ts -new glue.Table(stack, 'MyTable', { - encryption: glue.TableEncryption.KMS_MANAGED - ... +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { + encryption: glue.TableEncryption.KMS_MANAGED, + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); ``` * [ClientSideKms](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingClientSideEncryption.html#client-side-encryption-kms-managed-master-key-intro) - Client-side encryption (`CSE-KMS`) with an AWS KMS Key managed by the account owner. ```ts +declare const myDatabase: glue.Database; // KMS key is created automatically -new glue.Table(stack, 'MyTable', { - encryption: glue.TableEncryption.CLIENT_SIDE_KMS - ... +new glue.Table(this, 'MyTable', { + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); // with an explicit KMS key -new glue.Table(stack, 'MyTable', { +new glue.Table(this, 'MyTable', { encryption: glue.TableEncryption.CLIENT_SIDE_KMS, - encryptionKey: new kms.Key(stack, 'MyKey') - ... + encryptionKey: new kms.Key(this, 'MyKey'), + // ... + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, }); ``` @@ -266,30 +328,35 @@ new glue.Table(stack, 'MyTable', { A table's schema is a collection of columns, each of which have a `name` and a `type`. Types are recursive structures, consisting of primitive and complex types: ```ts -new glue.Table(stack, 'MyTable', { +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { columns: [{ name: 'primitive_column', - type: glue.Schema.STRING + type: glue.Schema.STRING, }, { name: 'array_column', type: glue.Schema.array(glue.Schema.INTEGER), - comment: 'array' + comment: 'array', }, { name: 'map_column', type: glue.Schema.map( glue.Schema.STRING, glue.Schema.TIMESTAMP), - comment: 'map' + comment: 'map', }, { name: 'struct_column', type: glue.Schema.struct([{ name: 'nested_column', type: glue.Schema.DATE, - comment: 'nested comment' + comment: 'nested comment', }]), - comment: "struct" + comment: "struct", }], - ... + // ... + database: myDatabase, + tableName: 'my_table', + dataFormat: glue.DataFormat.JSON, +}); ``` ### Primitives diff --git a/packages/@aws-cdk/aws-glue/package.json b/packages/@aws-cdk/aws-glue/package.json index 29c41f0bd02e6..b4325ec0a648b 100644 --- a/packages/@aws-cdk/aws-glue/package.json +++ b/packages/@aws-cdk/aws-glue/package.json @@ -28,7 +28,14 @@ ] } }, - "projectReferences": true + "projectReferences": true, + "metadata": { + "jsii": { + "rosetta": { + "strict": true + } + } + } }, "repository": { "type": "git", diff --git a/packages/@aws-cdk/aws-glue/rosetta/default.ts-fixture b/packages/@aws-cdk/aws-glue/rosetta/default.ts-fixture new file mode 100644 index 0000000000000..2054032733bfc --- /dev/null +++ b/packages/@aws-cdk/aws-glue/rosetta/default.ts-fixture @@ -0,0 +1,16 @@ +// Fixture with packages imported, but nothing else +import * as path from 'path'; +import { Construct } from 'constructs'; +import { Stack } from '@aws-cdk/core'; +import * as glue from '@aws-cdk/aws-glue'; +import * as s3 from '@aws-cdk/aws-s3'; +import * as ec2 from '@aws-cdk/aws-ec2'; +import * as kms from '@aws-cdk/aws-kms'; + +class Fixture extends Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// here + } +}