Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
{
"metaData": {
"name": "gcp.sample_staging_query.terminal_v1__0",
"team": "gcp",
"version": "0",
"outputNamespace": "sample_namespace",
"tableProperties": {
"sample_config_json": "{\"sample_key\": \"sample value\"}"
},
"sourceFile": "staging_queries/gcp/sample_staging_query.py",
"customJson": "{\"airflowDependencies\": [{\"name\": \"wf_sample_namespace_gcp_sample_staging_query_v1__0_with_offset_1\", \"spec\": \"sample_namespace.gcp_sample_staging_query_v1__0/ds={{ macros.ds_add(ds, 1) }}\"}, {\"name\": \"wf_sample_namespace_gcp_sample_staging_query_v2__0_with_offset_1\", \"spec\": \"sample_namespace.gcp_sample_staging_query_v2__0/ds={{ macros.ds_add(ds, 1) }}\"}, {\"name\": \"wf_sample_namespace_gcp_sample_staging_query_v3__0_with_offset_1\", \"spec\": \"sample_namespace.gcp_sample_staging_query_v3__0/ds={{ macros.ds_add(ds, 1) }}\"}, {\"name\": \"wf_sample_namespace_gcp_sample_staging_query_v4__0_with_offset_1\", \"spec\": \"sample_namespace.gcp_sample_staging_query_v4__0/ds={{ macros.ds_add(ds, 1) }}\"}, {\"name\": \"wf_sample_namespace_gcp_sample_staging_query_v5__0_with_offset_1\", \"spec\": \"sample_namespace.gcp_sample_staging_query_v5__0/ds={{ macros.ds_add(ds, 1) }}\"}, {\"name\": \"wf_sample_namespace_gcp_sample_staging_query_v6__0_with_offset_1\", \"spec\": \"sample_namespace.gcp_sample_staging_query_v6__0/ds={{ macros.ds_add(ds, 1) }}\"}]}",
"executionInfo": {
"env": {
"common": {
"VERSION": "latest",
"JOB_MODE": "local[*]",
"HADOOP_DIR": "[STREAMING-TODO]/path/to/folder/containing",
"CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
"CHRONON_ONLINE_ARGS": " -Ztasks=4",
"PARTITION_COLUMN": "ds",
"PARTITION_FORMAT": "yyyy-MM-dd",
"CUSTOMER_ID": "dev",
"GCP_PROJECT_ID": "canary-443022",
"GCP_REGION": "us-central1",
"GCP_DATAPROC_CLUSTER_NAME": "zipline-canary-cluster",
"GCP_BIGTABLE_INSTANCE_ID": "zipline-canary-instance",
"FLINK_STATE_URI": "gs://zipline-warehouse-canary/flink-state",
"CLOUD_PROVIDER": "gcp",
"ENABLE_PUBSUB": "true",
"ARTIFACT_PREFIX": "gs://zipline-artifacts-dev"
},
"modeEnvironments": {
"upload": {
"VERSION": "latest",
"JOB_MODE": "local[*]",
"HADOOP_DIR": "[STREAMING-TODO]/path/to/folder/containing",
"CHRONON_ONLINE_CLASS": "[ONLINE-TODO]your.online.class",
"CHRONON_ONLINE_ARGS": " -Ztasks=4",
"PARTITION_COLUMN": "ds",
"PARTITION_FORMAT": "yyyy-MM-dd",
"CUSTOMER_ID": "dev",
"GCP_PROJECT_ID": "canary-443022",
"GCP_REGION": "us-central1",
"GCP_DATAPROC_CLUSTER_NAME": "zipline-transient-upload-cluster",
"GCP_BIGTABLE_INSTANCE_ID": "zipline-canary-instance",
"FLINK_STATE_URI": "gs://zipline-warehouse-canary/flink-state",
"CLOUD_PROVIDER": "gcp",
"ENABLE_PUBSUB": "true",
"ARTIFACT_PREFIX": "gs://zipline-artifacts-dev"
}
}
},
"conf": {
"common": {
"spark.chronon.partition.column": "ds",
"spark.chronon.cloud_provider": "gcp",
"spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
"spark.chronon.partition.format": "yyyy-MM-dd",
"spark.chronon.table.gcs.temporary_gcs_bucket": "zipline-warehouse-canary",
"spark.chronon.table.gcs.connector_output_dataset": "data",
"spark.chronon.table.gcs.connector_output_project": "canary-443022",
"spark.chronon.table_write.prefix": "gs://zipline-warehouse-canary/data/tables/",
"spark.chronon.table_write.format": "iceberg",
"spark.sql.catalog.spark_catalog.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.spark_catalog.gcp_location": "us-central1",
"spark.sql.catalog.spark_catalog.gcp_project": "canary-443022",
"spark.sql.catalog.spark_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.default_iceberg.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.default_iceberg.gcp_location": "us-central1",
"spark.sql.catalog.default_iceberg.gcp_project": "canary-443022",
"spark.sql.catalog.default_iceberg.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
"spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
"spark.chronon.coalesce.factor": "10",
"spark.default.parallelism": "10",
"spark.sql.shuffle.partitions": "10"
},
"modeConfigs": {
"backfill": {
"spark.chronon.partition.column": "ds",
"spark.chronon.cloud_provider": "gcp",
"spark.chronon.table.format_provider.class": "ai.chronon.integrations.cloud_gcp.GcpFormatProvider",
"spark.chronon.partition.format": "yyyy-MM-dd",
"spark.chronon.table.gcs.temporary_gcs_bucket": "zipline-warehouse-canary",
"spark.chronon.table.gcs.connector_output_dataset": "data",
"spark.chronon.table.gcs.connector_output_project": "canary-443022",
"spark.chronon.table_write.prefix": "gs://zipline-warehouse-canary/data/tables/",
"spark.chronon.table_write.format": "iceberg",
"spark.sql.catalog.spark_catalog.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.spark_catalog.gcp_location": "us-central1",
"spark.sql.catalog.spark_catalog.gcp_project": "canary-443022",
"spark.sql.catalog.spark_catalog.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.spark_catalog.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.catalog.default_iceberg.warehouse": "gs://zipline-warehouse-canary/data/tables/",
"spark.sql.catalog.default_iceberg.gcp_location": "us-central1",
"spark.sql.catalog.default_iceberg.gcp_project": "canary-443022",
"spark.sql.catalog.default_iceberg.catalog-impl": "org.apache.iceberg.gcp.bigquery.BigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg": "ai.chronon.integrations.cloud_gcp.DelegatingBigQueryMetastoreCatalog",
"spark.sql.catalog.default_iceberg.io-impl": "org.apache.iceberg.io.ResolvingFileIO",
"spark.sql.defaultUrlStreamHandlerFactory.enabled": "false",
"spark.kryo.registrator": "ai.chronon.integrations.cloud_gcp.ChrononIcebergKryoRegistrator",
"spark.chronon.coalesce.factor": "10",
"spark.default.parallelism": "10",
"spark.sql.shuffle.partitions": "10",
"spark.chronon.backfill_cloud_provider": "gcp"
}
}
},
"clusterConf": {
"common": {},
"modeClusterConfigs": {
"upload": {
"dataproc.config": "{\"gceClusterConfig\": {\"subnetworkUri\": \"default\", \"serviceAccount\": \"[email protected]\", \"serviceAccountScopes\": [\"https://www.googleapis.com/auth/cloud-platform\", \"https://www.googleapis.com/auth/cloud.useraccounts.readonly\", \"https://www.googleapis.com/auth/devstorage.read_write\", \"https://www.googleapis.com/auth/logging.write\"], \"metadata\": {\"hive-version\": \"3.1.2\", \"SPARK_BQ_CONNECTOR_URL\": \"gs://spark-lib/bigquery/spark-3.5-bigquery-0.42.1.jar\", \"artifact_prefix\": \"gs://zipline-artifacts-canary\"}, \"tags\": []}, \"masterConfig\": {\"numInstances\": 1, \"machineTypeUri\": \"n2-highmem-8\", \"diskConfig\": {\"bootDiskType\": \"pd-standard\", \"bootDiskSizeGb\": 1024}}, \"workerConfig\": {\"numInstances\": 2, \"machineTypeUri\": \"n2-highmem-4\", \"diskConfig\": {\"bootDiskType\": \"pd-standard\", \"bootDiskSizeGb\": 64, \"numLocalSsds\": 2}}, \"softwareConfig\": {\"imageVersion\": \"2.2.50-debian12\", \"optionalComponents\": [\"FLINK\", \"JUPYTER\"], \"properties\": {}}, \"initializationActions\": [{\"executable_file\": \"gs://zipline-artifacts-canary/scripts/copy_java_security.sh\"}], \"endpointConfig\": {\"enableHttpPortAccess\": true}, \"lifecycleConfig\": {\"idleDeleteTtl\": \"7200s\"}}"
}
}
},
"scheduleCron": "@daily"
}
},
"query": "\nSELECT\n *\nFROM data.gcp_training_set_v1_test__0\nWHERE ds BETWEEN '{ start_date }' AND '{ end_date }'\n",
"startPartition": "2020-03-01",
"tableDependencies": [
{
"tableInfo": {
"table": "sample_namespace.gcp_sample_staging_query_v1__0",
"partitionColumn": "ds"
},
"startOffset": {
"length": 1,
"timeUnit": 1
},
"endOffset": {
"length": 1,
"timeUnit": 1
}
},
{
"tableInfo": {
"table": "sample_namespace.gcp_sample_staging_query_v2__0",
"partitionColumn": "ds"
},
"startOffset": {
"length": 1,
"timeUnit": 1
},
"endOffset": {
"length": 1,
"timeUnit": 1
}
},
{
"tableInfo": {
"table": "sample_namespace.gcp_sample_staging_query_v3__0",
"partitionColumn": "ds"
},
"startOffset": {
"length": 1,
"timeUnit": 1
},
"endOffset": {
"length": 1,
"timeUnit": 1
}
},
{
"tableInfo": {
"table": "sample_namespace.gcp_sample_staging_query_v4__0",
"partitionColumn": "ds"
},
"startOffset": {
"length": 1,
"timeUnit": 1
},
"endOffset": {
"length": 1,
"timeUnit": 1
}
},
{
"tableInfo": {
"table": "sample_namespace.gcp_sample_staging_query_v5__0",
"partitionColumn": "ds"
},
"startOffset": {
"length": 1,
"timeUnit": 1
},
"endOffset": {
"length": 1,
"timeUnit": 1
}
},
{
"tableInfo": {
"table": "sample_namespace.gcp_sample_staging_query_v6__0",
"partitionColumn": "ds"
},
"startOffset": {
"length": 1,
"timeUnit": 1
},
"endOffset": {
"length": 1,
"timeUnit": 1
}
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"name": "gcp.sample_staging_query.v1__0",
"team": "gcp",
"version": "0",
"outputNamespace": "data",
"outputNamespace": "sample_namespace",
"tableProperties": {
"sample_config_json": "{\"sample_key\": \"sample value\"}"
},
Expand Down
Loading
Loading