diff --git a/data-collection/deploy/deploy-data-collection.yaml b/data-collection/deploy/deploy-data-collection.yaml index 4d16083c..b6b3379f 100644 --- a/data-collection/deploy/deploy-data-collection.yaml +++ b/data-collection/deploy/deploy-data-collection.yaml @@ -17,6 +17,7 @@ Metadata: - ScheduleFrequent - CFNSourceBucket - DataBucketsKmsKeysArns + - LakeFormationEnabled - Label: default: 'Available modules' Parameters: @@ -59,7 +60,9 @@ Metadata: CFNSourceBucket: default: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'" DataBucketsKmsKeysArns: - default: "" + default: "Data Buckets Kms Keys Arns" + LakeFormationEnabled: + default: "Lake Formation Enabled" IncludeTAModule: default: 'Include AWS Trusted Advisor Data Collection Module' IncludeRightsizingModule: @@ -167,6 +170,11 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" + LakeFormationEnabled: + Type: String + Description: Choose 'yes' if Lake Formation permission model is in place for the account. If you are not sure, leave it as 'no'. You need to install prerequisites stack first. + Default: "no" + AllowedValues: ["yes", "no"] IncludeTAModule: Type: String Description: Collects AWS Trusted Advisor recommendations data @@ -298,6 +306,7 @@ Conditions: - "" ProdCFNTemplateUsed: !Equals [ !Ref CFNSourceBucket, 'aws-managed-cost-intelligence-dashboards' ] NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] + LakeFormationEnabled: !Equals [ !Ref LakeFormationEnabled, "yes" ] Resources: S3Bucket: @@ -713,6 +722,8 @@ Resources: - glue:DeleteTable - glue:TagResource - glue:UpdateTable + - lakeformation:AddLFTagsToResource + - lakeformation:RemoveLFTagsFromResource Resource: - !Sub "arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog" - !Sub "arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:database/${DatabaseName}" @@ -733,6 +744,8 @@ Resources: DATABASE_NAME: !Ref DatabaseName RESOURCE_PREFIX: !Ref ResourcePrefix BUCKET_NAME: !Sub "${DestinationBucket}${AWS::AccountId}" + LAKEFORMATION_TAG_VALUE: !If [LakeFormationEnabled, !ImportValue cid-LakeFormation-TagValue, ''] + LAKEFORMATION_TAG_KEY: !If [LakeFormationEnabled, !ImportValue cid-LakeFormation-TagKey, ''] Code: ZipFile: | import os @@ -743,7 +756,10 @@ Resources: database_name = os.environ['DATABASE_NAME'] resource_prefix = os.environ['RESOURCE_PREFIX'] + lakeformation_tag_value = os.environ.get('LAKEFORMATION_TAG_VALUE') + lakeformation_tag_key = os.environ.get('LAKEFORMATION_TAG_KEY') glue_client = boto3.client('glue') + lakeformation_client = boto3.client('lakeformation') def lambda_handler(event, context): #pylint: disable=unused-argument print(json.dumps(event)) @@ -783,12 +799,27 @@ Resources: def create_or_update(table_input): try: glue_client.create_table(DatabaseName=database_name, TableInput=table_input) - return 'SUCCESS', 'created' except glue_client.exceptions.AlreadyExistsException: glue_client.update_table(DatabaseName=database_name, TableInput=table_input) - return 'SUCCESS', 'updated' + if lakeformation_tag_key and lakeformation_tag_value: + try: + lakeformation_client.add_lf_tags_to_resource( + Resource={'Table': {'DatabaseName': database_name, 'Name': table_input['Name']}}, + LFTags=[{"TagKey": lakeformation_tag_key, "TagValue": lakeformation_tag_value}] + ) + except lakeformation_client.exceptions.AlreadyExistsException as exc: + print(f"Lake Formation Error: {exc}") + return 'SUCCESS', 'updated or created' def delete(table_input): + if lakeformation_tag_key and lakeformation_tag_value: + try: + lakeformation_client.remove_lf_tags_from_resource( + Resource={'Table': {'DatabaseName': database_name, 'Name': table_input['Name']}}, + LFTags=[{"TagKey": lakeformation_tag_key, "TagValue": lakeformation_tag_value}] + ) + except lakeformation_client.exceptions.AlreadyExistsException as exc: + print(f"Lake Formation Error: {exc}") try: glue_client.delete_table(DatabaseName=database_name, Name=table_input['Name']) return 'SUCCESS', 'deleted' @@ -939,6 +970,7 @@ Resources: MultiAccountRoleName: !Sub "${ResourcePrefix}${MultiAccountRoleName}" Schedule: !Ref Schedule ResourcePrefix: !Ref ResourcePrefix + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn LambdaAnalyticsARN: !GetAtt LambdaAnalytics.Arn AccountCollectorLambdaARN: !Sub "${AccountCollector.Outputs.LambdaFunctionARN}" CodeBucket: !If [ ProdCFNTemplateUsed, !FindInMap [RegionMap, !Ref "AWS::Region", CodeBucket], !Ref CFNSourceBucket ] @@ -946,7 +978,7 @@ Resources: StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn - RightsizeModule: + RightsizeModule: # TO BE DEPRECATED Type: AWS::CloudFormation::Stack Condition: DeployRightsizingModule Properties: @@ -1009,6 +1041,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn BackupModule: Type: AWS::CloudFormation::Stack @@ -1030,6 +1063,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + # LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn #FIXME: add table management InventoryCollectorModule: Type: AWS::CloudFormation::Stack @@ -1106,7 +1140,7 @@ Resources: StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn - EcsChargebackModule: + EcsChargebackModule: # TO DEPRECATE Type: AWS::CloudFormation::Stack Condition: DeployEcsChargebackModule Properties: @@ -1132,7 +1166,7 @@ Resources: - !Sub "${AWS::Region}" - !Join [ '', !Split [ ' ', !Ref RegionsInScope ] ] # remove spaces - RDSUsageModule: + RDSUsageModule: # TO DEPRECATE Type: AWS::CloudFormation::Stack Condition: DeployRDSUtilizationModule Properties: @@ -1178,6 +1212,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn #FIXME: add table management BudgetsModule: Type: AWS::CloudFormation::Stack @@ -1199,8 +1234,9 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn - TransitGatewayModule: + TransitGatewayModule: #TO DEPRECATE? Type: AWS::CloudFormation::Stack Condition: DeployTransitGatewayModule Properties: @@ -1244,6 +1280,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, standalone-v1, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + #LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn #TODO HealthEventsModule: Type: AWS::CloudFormation::Stack @@ -1265,6 +1302,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn LicenseManagerModule: Type: AWS::CloudFormation::Stack @@ -1286,6 +1324,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn ServiceQuotasModule: Type: AWS::CloudFormation::Stack @@ -1312,6 +1351,7 @@ Resources: - RegionsInScopeIsEmpty - !Sub "${AWS::Region}" - !Join [ '', !Split [ ' ', !Ref RegionsInScope ] ] # remove spaces + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn QuickSightModule: Type: AWS::CloudFormation::Stack @@ -1331,6 +1371,7 @@ Resources: StepFunctionTemplate: !FindInMap [StepFunctionCode, standalone-v1, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn + LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn AccountCollector: Type: AWS::CloudFormation::Stack diff --git a/data-collection/deploy/module-budgets.yaml b/data-collection/deploy/module-budgets.yaml index 8de3e11b..91d90acf 100644 --- a/data-collection/deploy/module-budgets.yaml +++ b/data-collection/deploy/module-budgets.yaml @@ -51,6 +51,9 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Outputs: StepFunctionARN: @@ -271,6 +274,61 @@ Resources: S3Targets: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" + ModuleGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: budgets_data + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + StorageDescriptor: + Columns: + - Name: budgetname + Type: string + - Name: budgetlimit + Type: struct + - Name: costfilters + Type: struct> + - Name: costtypes + Type: struct + - Name: timeunit + Type: string + - Name: timeperiod + Type: struct + - Name: calculatedspend + Type: struct,ForecastedSpend:struct> + - Name: budgettype + Type: string + - Name: lastupdatedtime + Type: string + - Name: collection_time + Type: string + - Name: account_id + Type: string + - Name: account_name + Type: string + - Name: tags + Type: array> + - Name: plannedbudgetlimits_flat + Type: array + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Account_ID,Account_Name,BudgetLimit,BudgetName,BudgetType,CalculatedSpend,CostFilters,CostTypes,LastUpdatedTime,PlannedBudgetLimits_Flat,Tags,TimePeriod,TimeUnit,collection_time + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-health-events.yaml b/data-collection/deploy/module-health-events.yaml index 502c99a3..4b64ed97 100644 --- a/data-collection/deploy/module-health-events.yaml +++ b/data-collection/deploy/module-health-events.yaml @@ -51,6 +51,9 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Conditions: NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] @@ -459,6 +462,83 @@ Resources: S3Targets: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-detail-data/" + ModuleGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: health_events_detail_data + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + - Name: day + Type: string + StorageDescriptor: + Columns: + - Name: payer_account_id + Type: string + - Name: account_id + Type: string + - Name: event_code + Type: string + - Name: event_category + Type: string + - Name: event_scope + Type: string + - Name: status_code + Type: string + - Name: service + Type: string + - Name: region + Type: string + - Name: event_description + Type: string + - Name: affected_entity_value + Type: string + - Name: affected_entity_arn + Type: string + - Name: affected_entity_status_code + Type: string + - Name: affected_entity_last_update + Type: string + - Name: affected_entity_url + Type: string + - Name: availability_zone + Type: string + - Name: deprecated_versions + Type: string + - Name: tags + Type: string + - Name: start_time + Type: string + - Name: end_time + Type: string + - Name: last_updated_time + Type: string + - Name: event_metadata + Type: string + - Name: event_source + Type: string + - Name: event_arn + Type: string + - Name: ingestion_time + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-detail-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: account_id,affected_entity_arn,affected_entity_last_update,affected_entity_status_code,affected_entity_url,affected_entity_value,availability_zone,deprecated_versions,end_time,event_arn,event_category,event_code,event_description,event_metadata,event_scope,event_source,ingestion_time,last_updated_time,payer_account_id,region,service,start_time,status_code,tags + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-license-manager.yaml b/data-collection/deploy/module-license-manager.yaml index 4bc30b7d..8fb4900f 100644 --- a/data-collection/deploy/module-license-manager.yaml +++ b/data-collection/deploy/module-license-manager.yaml @@ -59,6 +59,9 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Conditions: NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] @@ -282,11 +285,116 @@ Resources: Properties: Name: !Sub '${ResourcePrefix}${CFDataName}-${LicenseDataPrefix}-Crawler' Role: !Ref GlueRoleARN - DatabaseName: !Ref DatabaseName Targets: S3Targets: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${LicenseDataPrefix}/" + GrantsGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: license_manager_grants # this must be ${CFDataName}-${GrantDataPrefix} but with underscores instead of - + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + - Name: day + Type: string + StorageDescriptor: + Columns: + - Name: grantarn + Type: string + - Name: grantname + Type: string + - Name: parentarn + Type: string + - Name: licensearn + Type: string + - Name: granteeprincipalarn + Type: string + - Name: homeregion + Type: string + - Name: grantstatus + Type: string + - Name: version + Type: string + - Name: grantedoperations + Type: array + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${GrantDataPrefix}/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: GrantArn,GrantName,GrantStatus,GrantedOperations,GranteePrincipalArn,HomeRegion,LicenseArn,ParentArn,Version + Parameters: + UPDATED_BY_CRAWLER: !Sub '${ResourcePrefix}${CFDataName}-${GrantDataPrefix}-Crawler' + + LicensesGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: license_manager_licenses # this must be ${CFDataName}-${LicenseDataPrefix} but with underscores instead of - + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + - Name: day + Type: string + StorageDescriptor: + Columns: + - Name: licensearn + Type: string + - Name: licensename + Type: string + - Name: productname + Type: string + - Name: productsku + Type: string + - Name: issuer + Type: struct + - Name: homeregion + Type: string + - Name: status + Type: string + - Name: validity + Type: struct + - Name: beneficiary + Type: string + - Name: entitlements + Type: array> + - Name: consumptionconfiguration + Type: struct> + - Name: licensemetadata + Type: array> + - Name: createtime + Type: string + - Name: version + Type: string + - Name: receivedmetadata + Type: struct> + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-${LicenseDataPrefix}/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Beneficiary,ConsumptionConfiguration,CreateTime,Entitlements,HomeRegion,Issuer,LicenseArn,LicenseMetadata,LicenseName,ProductName,ProductSKU,ReceivedMetadata,Status,Validity,Version + Parameters: + UPDATED_BY_CRAWLER: !Sub '${ResourcePrefix}${CFDataName}-${LicenseDataPrefix}-Crawler' + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-organization.yaml b/data-collection/deploy/module-organization.yaml index 2a06e4dd..1fd31dff 100644 --- a/data-collection/deploy/module-organization.yaml +++ b/data-collection/deploy/module-organization.yaml @@ -55,6 +55,9 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Conditions: NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] @@ -305,6 +308,57 @@ Resources: S3Targets: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/organization-data/" #Preserve legacy name in Athena + ModuleGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: organization_data + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + StorageDescriptor: + Columns: + - Name: id + Type: string + - Name: arn + Type: string + - Name: email + Type: string + - Name: name + Type: string + - Name: status + Type: string + - Name: joinedmethod + Type: string + - Name: joinedtimestamp + Type: string + - Name: hierarchy + Type: array> + - Name: hierarchypath + Type: string + - Name: hierarchytags + Type: array> + - Name: managementaccountid + Type: string + - Name: parent + Type: string + - Name: parentid + Type: string + - Name: parenttags + Type: array> + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/organization-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Arn,Email,Hierarchy,HierarchyPath,HierarchyTags,Id,JoinedMethod,JoinedTimestamp,ManagementAccountId,Name,Parent,ParentId,ParentTags,Status + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-quicksight.yaml b/data-collection/deploy/module-quicksight.yaml index 1c75f0f8..57c05c6e 100644 --- a/data-collection/deploy/module-quicksight.yaml +++ b/data-collection/deploy/module-quicksight.yaml @@ -1,4 +1,3 @@ -#IAKOV's version AWSTemplateFormatVersion: '2010-09-09' Description: Retrieves Quicksight User, Group, Group Members data in Data Collection Account only Parameters: @@ -46,6 +45,9 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Conditions: NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] @@ -305,6 +307,109 @@ Resources: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-group-data/" - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-user-data/" + UserGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: quicksight_user_data + TableType: EXTERNAL_TABLE + PartitionKeys: [] + StorageDescriptor: + Columns: + - Name: arn + Type: string + - Name: username + Type: string + - Name: email + Type: string + - Name: role + Type: string + - Name: identitytype + Type: string + - Name: active + Type: boolean + - Name: principalid + Type: string + - Name: account_id + Type: string + - Name: namespace + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-user-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Active,Arn,Email,IdentityType,PrincipalId,Role,UserName,account_id,namespace + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + + GroupGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: quicksight_group_data + TableType: EXTERNAL_TABLE + PartitionKeys: [] + StorageDescriptor: + Columns: + - Name: arn + Type: string + - Name: groupname + Type: string + - Name: principalid + Type: string + - Name: account_id + Type: string + - Name: namespace + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-group-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Arn,GroupName,PrincipalId,account_id,namespace + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + + GroupMembershipGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: quicksight_groupmembership_data + TableType: EXTERNAL_TABLE + PartitionKeys: [] + StorageDescriptor: + Columns: + - Name: arn + Type: string + - Name: membername + Type: string + - Name: groupname + Type: string + - Name: grouparn + Type: string + - Name: account_id + Type: string + - Name: namespace + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-groupmembership-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Arn,GroupArn,GroupName,MemberName,account_id,namespace + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-service-quotas.yaml b/data-collection/deploy/module-service-quotas.yaml index 55462f44..c50b686a 100644 --- a/data-collection/deploy/module-service-quotas.yaml +++ b/data-collection/deploy/module-service-quotas.yaml @@ -54,13 +54,12 @@ Parameters: Type: String Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys." Default: "" -Outputs: - StepFunctionARN: - Description: ARN for the module's Step Function - Value: !GetAtt ModuleStepFunction.Arn + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Conditions: - NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] + NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ] Resources: LambdaRole: @@ -289,6 +288,117 @@ Resources: } } + DataGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: service_quotas_data + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: account_id + Type: string + - Name: region + Type: string + StorageDescriptor: + Columns: + - Name: servicecode + Type: string + - Name: servicename + Type: string + - Name: quotaarn + Type: string + - Name: quotacode + Type: string + - Name: quotaname + Type: string + - Name: value + Type: double + - Name: unit + Type: string + - Name: adjustable + Type: boolean + - Name: globalquota + Type: boolean + - Name: usagemetric + Type: struct,MetricStatisticRecommendation:string> + - Name: period + Type: struct + - Name: quotaappliedatlevel + Type: string + - Name: defaultvalue + Type: double + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: Adjustable,DefaultValue,GlobalQuota,Period,QuotaAppliedAtLevel,QuotaArn,QuotaCode,QuotaName,ServiceCode,ServiceName,Unit,UsageMetric,Value + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + + ModuleGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: service_quotas_history + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: account_id + Type: string + - Name: region + Type: string + StorageDescriptor: + Columns: + - Name: id + Type: string + - Name: servicecode + Type: string + - Name: servicename + Type: string + - Name: quotacode + Type: string + - Name: quotaname + Type: string + - Name: desiredvalue + Type: double + - Name: status + Type: string + - Name: created + Type: string + - Name: lastupdated + Type: string + - Name: requester + Type: string + - Name: quotaarn + Type: string + - Name: globalquota + Type: boolean + - Name: unit + Type: string + - Name: quotarequestedatlevel + Type: string + - Name: caseid + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-history/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: CaseId,Created,DesiredValue,GlobalQuota,Id,LastUpdated,QuotaArn,QuotaCode,QuotaName,QuotaRequestedAtLevel,Requester,ServiceCode,ServiceName,Status,Unit + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-support-cases.yaml b/data-collection/deploy/module-support-cases.yaml index 355e8eed..315b4b2a 100644 --- a/data-collection/deploy/module-support-cases.yaml +++ b/data-collection/deploy/module-support-cases.yaml @@ -50,6 +50,9 @@ Parameters: SchedulerExecutionRoleARN: Type: String Description: Common role for module Scheduler execution + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Outputs: StepFunctionARN: @@ -353,6 +356,133 @@ Resources: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-status/" Configuration: "{\"Version\":1.0,\"Grouping\":{\"TableGroupingPolicy\":\"CombineCompatibleSchemas\"}}" + GlueTableData: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: support_cases_data + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: account_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + - Name: day + Type: string + StorageDescriptor: + Columns: + - Name: caseid + Type: string + - Name: displayid + Type: string + - Name: subject + Type: string + - Name: status + Type: string + - Name: servicecode + Type: string + - Name: categorycode + Type: string + - Name: severitycode + Type: string + - Name: submittedby + Type: string + - Name: timecreated + Type: string + - Name: ccemailaddresses + Type: array + - Name: language + Type: string + - Name: accountalias + Type: string + - Name: summary + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: AccountAlias,CCEmailAddresses,CaseId,CategoryCode,DisplayId,Language,ServiceCode,SeverityCode,Status,Subject,SubmittedBy,Summary,TimeCreated + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + + GlueTableStatus: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: support_cases_status + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + StorageDescriptor: + Columns: + - Name: last_read + Type: string + - Name: account_id + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-status/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: account_id,last_read + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + + GlueTableCommunications: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + DatabaseName: !Ref DatabaseName + TableInput: + Name: support_cases_communications + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: account_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + - Name: day + Type: string + StorageDescriptor: + Columns: + - Name: caseid + Type: string + - Name: body + Type: string + - Name: submittedby + Type: string + - Name: timecreated + Type: string + - Name: attachmentset + Type: string + - Name: accountalias + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-communications/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: AccountAlias,AttachmentSet,Body,CaseId,SubmittedBy,TimeCreated + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" + ModuleStepFunction: Type: AWS::StepFunctions::StateMachine Properties: diff --git a/data-collection/deploy/module-trusted-advisor.yaml b/data-collection/deploy/module-trusted-advisor.yaml index 6e7ea47b..d900bd47 100644 --- a/data-collection/deploy/module-trusted-advisor.yaml +++ b/data-collection/deploy/module-trusted-advisor.yaml @@ -50,7 +50,9 @@ Parameters: SchedulerExecutionRoleARN: Type: String Description: Common role for module Scheduler execution - + LambdaManageGlueTableARN: + Type: String + Description: ARN of a Lambda for Managing GlueTable Outputs: StepFunctionARN: Description: ARN for the module's Step Function @@ -259,6 +261,307 @@ Resources: - Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" Configuration: "{\"Version\":1.0,\"Grouping\":{\"TableGroupingPolicy\":\"CombineCompatibleSchemas\"}}" + ModuleGlueTable: + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + TableInput: + Name: trusted_advisor_data + TableType: EXTERNAL_TABLE + PartitionKeys: + - Name: payer_id + Type: string + - Name: year + Type: string + - Name: month + Type: string + StorageDescriptor: + Columns: + - Name: region + Type: string + - Name: service + Type: string + - Name: limit name + Type: string + - Name: limit amount + Type: string + - Name: current usage + Type: string + - Name: status + Type: string + - Name: accountid + Type: string + - Name: accountname + Type: string + - Name: category + Type: string + - Name: datetime + Type: string + - Name: timestamp + Type: string + - Name: checkname + Type: string + - Name: checkid + Type: string + - Name: resourceid + Type: string + - Name: issuppressed + Type: boolean + - Name: resource + Type: string + - Name: aws config rule + Type: string + - Name: input parameters + Type: string + - Name: last updated time + Type: string + - Name: region/az + Type: string + - Name: function name + Type: string + - Name: function version + Type: string + - Name: memory size(mb) + Type: string + - Name: recommended memory size(mb) + Type: string + - Name: lookback period (days) + Type: string + - Name: performance risk + Type: string + - Name: db instance + Type: string + - Name: vpc id + Type: string + - Name: multi-az + Type: string + - Name: workload arn + Type: string + - Name: workload name + Type: string + - Name: reviewer name + Type: string + - Name: workload type + Type: string + - Name: workload started date + Type: string + - Name: workload last modified date + Type: string + - Name: number of identified hris for performance + Type: string + - Name: number of hris resolved for performance + Type: string + - Name: number of questions answered for performance + Type: string + - Name: total number of questions in performance pillar + Type: string + - Name: number of identified hris for security + Type: string + - Name: number of hris resolved for security + Type: string + - Name: number of questions answered for security + Type: string + - Name: total number of questions in security pillar + Type: string + - Name: number of identified hris for reliability + Type: string + - Name: number of hris resolved for reliability + Type: string + - Name: number of questions answered for reliability + Type: string + - Name: total number of questions in reliability pillar + Type: string + - Name: number of identified hris for cost optimization + Type: string + - Name: number of hris resolved for cost optimization + Type: string + - Name: number of questions answered for cost optimization + Type: string + - Name: total number of questions in cost optimization pillar + Type: string + - Name: trail name + Type: string + - Name: logging status + Type: string + - Name: bucket name + Type: string + - Name: last delivery error + Type: string + - Name: aws account id + Type: string + - Name: security group name + Type: string + - Name: security group id + Type: string + - Name: protocol + Type: string + - Name: port + Type: string + - Name: ip range + Type: string + - Name: db instance identifier + Type: string + - Name: db instance arn + Type: string + - Name: deployment type + Type: string + - Name: backup type + Type: string + - Name: reason + Type: string + - Name: ecs cluster name/ecs service name + Type: string + - Name: number of availability zones + Type: string + - Name: spread task placement strategy enabled and applied correctly + Type: string + - Name: freestoragespace metric (mb) + Type: string + - Name: db instance allocated storage (mb) + Type: string + - Name: db instance storage used percent + Type: string + - Name: db instance name + Type: string + - Name: instance type + Type: string + - Name: storage provisioned (gb) + Type: string + - Name: days since last connection + Type: string + - Name: estimated monthly savings (on demand) + Type: string + - Name: instance id + Type: string + - Name: instance name + Type: string + - Name: estimated monthly savings + Type: string + - Name: day 1 + Type: string + - Name: day 2 + Type: string + - Name: day 3 + Type: string + - Name: day 4 + Type: string + - Name: day 5 + Type: string + - Name: day 6 + Type: string + - Name: day 7 + Type: string + - Name: day 8 + Type: string + - Name: day 9 + Type: string + - Name: day 10 + Type: string + - Name: day 11 + Type: string + - Name: day 12 + Type: string + - Name: day 13 + Type: string + - Name: day 14 + Type: string + - Name: 14-day average cpu utilization + Type: string + - Name: 14-day average network i/o + Type: string + - Name: number of days low utilization + Type: string + - Name: engine name + Type: string + - Name: db instance class + Type: string + - Name: task definition arn + Type: string + - Name: container definition names + Type: string + - Name: password policy + Type: string + - Name: uppercase + Type: string + - Name: lowercase + Type: string + - Name: number + Type: string + - Name: non-alphanumeric + Type: string + - Name: function arn + Type: string + - Name: runtime + Type: string + - Name: days to deprecation + Type: string + - Name: deprecation date + Type: string + - Name: average daily invokes + Type: string + - Name: last refresh time + Type: string + - Name: diskqueuedepth metric + Type: string + - Name: versioning + Type: string + - Name: mfa delete enabled + Type: string + - Name: engine version current + Type: string + - Name: recommended value + Type: string + - Name: the function with version which is flagged. + Type: string + - Name: current day async requests dropped percentage. + Type: string + - Name: current day async requests. + Type: string + - Name: average daily async requests dropped percentage. + Type: string + - Name: average daily async requests. + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/" + SerdeInfo: + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + Parameters: + paths: 14-day average cpu utilization,14-day average network i/o,accountid,accountname,average + daily async requests dropped percentage.,average daily async requests.,average + daily invokes,aws account id,aws config rule,backup type,bucket name,category,checkid,checkname,container + definition names,current day async requests dropped percentage.,current + day async requests.,current usage,datetime,day 1,day 10,day 11,day 12,day + 13,day 14,day 2,day 3,day 4,day 5,day 6,day 7,day 8,day 9,days since + last connection,days to deprecation,db instance,db instance allocated + storage (mb),db instance arn,db instance class,db instance identifier,db + instance name,db instance storage used percent,deployment type,deprecation + date,diskqueuedepth metric,ecs cluster name/ecs service name,engine + name,engine version current,estimated monthly savings,estimated monthly + savings (on demand),freestoragespace metric (mb),function arn,function + name,function version,input parameters,instance id,instance name,instance + type,ip range,issuppressed,last delivery error,last refresh time,last + updated time,limit amount,limit name,logging status,lookback period + (days),lowercase,memory size(mb),mfa delete enabled,multi-az,non-alphanumeric,number,number + of availability zones,number of days low utilization,number of hris + resolved for cost optimization,number of hris resolved for performance,number + of hris resolved for reliability,number of hris resolved for security,number + of identified hris for cost optimization,number of identified hris for + performance,number of identified hris for reliability,number of identified + hris for security,number of questions answered for cost optimization,number + of questions answered for performance,number of questions answered for + reliability,number of questions answered for security,password policy,performance + risk,port,protocol,reason,recommended memory size(mb),recommended value,region,region/az,resource,resourceid,reviewer + name,runtime,security group id,security group name,service,spread task + placement strategy enabled and applied correctly,status,storage provisioned + (gb),task definition arn,the function with version which is flagged.,timestamp,total + number of questions in cost optimization pillar,total number of questions + in performance pillar,total number of questions in reliability pillar,total + number of questions in security pillar,trail name,uppercase,versioning,vpc + id,workload arn,workload last modified date,workload name,workload started + date,workload type + Parameters: + UPDATED_BY_CRAWLER: !Sub "${ResourcePrefix}${CFDataName}-Crawler" ModuleStepFunction: Type: AWS::StepFunctions::StateMachine