From 0d36f71a8134e2340bc3856f3275f74e6f2d6a47 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Tue, 21 Jul 2020 08:47:37 -0400 Subject: [PATCH 1/4] adding staging table to convert resource specification date to local timezone --- .../etl.d/cloud_ingest_resource_specs.json | 5 ++ .../etl.d/xdmod-migration-8_5_1-9_0_0.json | 76 ++++++++++--------- .../cloud_common/resource_specifications.json | 12 +-- .../staging_resource_specifications.json | 19 +++++ .../resource_specification_end_day.sql | 12 +-- .../cloud_common/raw_resource_specs.json | 2 +- .../staging_resource_specifications.json | 53 +++++++++++++ 7 files changed, 130 insertions(+), 49 deletions(-) create mode 100644 configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json create mode 100644 configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json diff --git a/configuration/etl/etl.d/cloud_ingest_resource_specs.json b/configuration/etl/etl.d/cloud_ingest_resource_specs.json index 4e29ab817f..0c4ea0e451 100644 --- a/configuration/etl/etl.d/cloud_ingest_resource_specs.json +++ b/configuration/etl/etl.d/cloud_ingest_resource_specs.json @@ -67,6 +67,11 @@ } } }, + { + "name": "CloudResourceSpecsStaging", + "class": "DatabaseIngestor", + "definition_file": "cloud_common/staging_resource_specifications.json" + }, { "name": "CloudResourceSpecsAddEndDay", "namespace": "ETL\\Maintenance", diff --git a/configuration/etl/etl.d/xdmod-migration-8_5_1-9_0_0.json b/configuration/etl/etl.d/xdmod-migration-8_5_1-9_0_0.json index d596add994..8dbf8b57a9 100644 --- a/configuration/etl/etl.d/xdmod-migration-8_5_1-9_0_0.json +++ b/configuration/etl/etl.d/xdmod-migration-8_5_1-9_0_0.json @@ -8,6 +8,45 @@ } }, "migration-8_5_1-9_0_0": [ + { + "name": "update-moddb-tables", + "description": "Update moddb tables", + "namespace": "ETL\\Maintenance", + "class": "ManageTables", + "options_class": "MaintenanceOptions", + "definition_file_list": [ + "xdb/batch-export-requests.json" + ], + "endpoints": { + "destination": { + "type": "mysql", + "name": "XDMoD Database", + "config": "database", + "schema": "moddb" + } + } + }, + { + "name": "update-mod_logger-log_table-table", + "description": "Update mod_logger.log_table table", + "namespace": "ETL\\Maintenance", + "class": "ExecuteSql", + "options_class": "MaintenanceOptions", + "sql_file_list": [ + "migrations/8.5.1-9.0.0/logger/alter-log-table.sql" + ], + "endpoints": { + "destination": { + "type": "mysql", + "name": "Logger Database", + "config": "logger", + "schema": "mod_logger" + } + } + } + ], + "#": "Cloud realm pipeline intentionally named to not match the automatically run migration pipelines", + "cloud-migration-8-5-1_9-0-0": [ { "name": "DeleteCloudAutoincrementColumns", "description": "Post-ingest updates for massaging Open Stack data", @@ -42,6 +81,7 @@ "cloud_common/session_records.json", "cloud_common/event.json", "cloud_common/raw_resource_specs.json", + "cloud_common/staging_resource_specifications.json", "cloud_common/cloud_resource_specs.json", "cloud_common/account.json", "cloud_common/instance_type.json", @@ -56,42 +96,6 @@ } } }, - { - "name": "update-moddb-tables", - "description": "Update moddb tables", - "namespace": "ETL\\Maintenance", - "class": "ManageTables", - "options_class": "MaintenanceOptions", - "definition_file_list": [ - "xdb/batch-export-requests.json" - ], - "endpoints": { - "destination": { - "type": "mysql", - "name": "XDMoD Database", - "config": "database", - "schema": "moddb" - } - } - }, - { - "name": "update-mod_logger-log_table-table", - "description": "Update mod_logger.log_table table", - "namespace": "ETL\\Maintenance", - "class": "ExecuteSql", - "options_class": "MaintenanceOptions", - "sql_file_list": [ - "migrations/8.5.1-9.0.0/logger/alter-log-table.sql" - ], - "endpoints": { - "destination": { - "type": "mysql", - "name": "Logger Database", - "config": "logger", - "schema": "mod_logger" - } - } - }, { "name": "OpenStackDomainToSubmissionVenueMappingStagingIngestor", "description": "Load schema version history", diff --git a/configuration/etl/etl_action_defs.d/cloud_common/resource_specifications.json b/configuration/etl/etl_action_defs.d/cloud_common/resource_specifications.json index 436b1dd6a7..118c34f604 100644 --- a/configuration/etl/etl_action_defs.d/cloud_common/resource_specifications.json +++ b/configuration/etl/etl_action_defs.d/cloud_common/resource_specifications.json @@ -8,18 +8,18 @@ "hostname": "r1.hostname", "vcpus": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, r2.vcpus, r1.vcpus)", "memory_mb": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, r2.memory_mb, r1.memory_mb)", - "start_date_ts": "UNIX_TIMESTAMP(IF(r1.memory_mb = -1 AND r1.vcpus = -1, r2.fact_date, r1.fact_date))", - "end_date_ts": "UNIX_TIMESTAMP(CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN MIN(r2.fact_date) - INTERVAL 1 DAY WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.raw_resource_specs) != MAX(r2.fact_date) THEN MAX(r1.fact_date) - INTERVAL 1 DAY ELSE CURDATE() END)", - "start_day_id": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, YEAR(r2.fact_date) * 100000 + DAYOFYEAR(r2.fact_date), YEAR(r1.fact_date) * 100000 + DAYOFYEAR(r1.fact_date))", - "end_day_id": "CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN YEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.raw_resource_specs) != MAX(r2.fact_date) THEN YEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) ELSE YEAR(CURDATE()) * 100000 + DAYOFYEAR(CURDATE()) END" + "start_date_ts": "UNIX_TIMESTAMP(IF(r1.memory_mb = -1 AND r1.vcpus = -1, MIN(r2.fact_date), MIN(r1.fact_date)))", + "end_date_ts": "UNIX_TIMESTAMP(CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN MIN(r2.fact_date) - INTERVAL 1 DAY WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.staging_resource_specifications) != MAX(r2.fact_date) THEN MAX(r1.fact_date) - INTERVAL 1 DAY ELSE CURDATE() END)", + "start_day_id": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, YEAR(MIN(r2.fact_date)) * 100000 + DAYOFYEAR(MIN(r2.fact_date)), YEAR(MIN(r1.fact_date)) * 100000 + DAYOFYEAR(MIN(r1.fact_date)))", + "end_day_id": "CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN YEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.staging_resource_specifications) != MAX(r2.fact_date) THEN YEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) ELSE YEAR(CURDATE()) * 100000 + DAYOFYEAR(CURDATE()) END" }, "joins": [{ - "name": "raw_resource_specs", + "name": "staging_resource_specifications", "schema": "${SOURCE_SCHEMA}", "alias": "r1" }, { - "name": "raw_resource_specs", + "name": "staging_resource_specifications", "schema": "${SOURCE_SCHEMA}", "alias": "r2", "type": "LEFT", diff --git a/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json b/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json new file mode 100644 index 0000000000..2b766bc50c --- /dev/null +++ b/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json @@ -0,0 +1,19 @@ +{ + "table_definition": { + "$ref": "${table_definition_dir}/cloud_common/staging_resource_specifications.json#/table_definition" + }, + "source_query": { + "records": { + "resource_id": "raw.resource_id", + "hostname": "raw.hostname", + "vcpus": "raw.vcpus", + "memory_mb": "raw.memory_mb", + "fact_date": "DATE(CONVERT_TZ(raw.fact_date, '+00:00', @@session.time_zone))" + }, + "joins": [{ + "name": "raw_resource_specs", + "schema": "${SOURCE_SCHEMA}", + "alias": "raw" + }] + } +} diff --git a/configuration/etl/etl_sql.d/cloud_common/resource_specification_end_day.sql b/configuration/etl/etl_sql.d/cloud_common/resource_specification_end_day.sql index 1f6f8ba300..7944ff6664 100644 --- a/configuration/etl/etl_sql.d/cloud_common/resource_specification_end_day.sql +++ b/configuration/etl/etl_sql.d/cloud_common/resource_specification_end_day.sql @@ -1,7 +1,7 @@ -- This sql statement inserts -1 values for the memory_mb and vcpus for a day that a compute node has been -- removed from the most recently ingested resource specifications file. The -1 helps when setting start and -- end times of a cpu and memory configuration for a compute node. -INSERT INTO modw_cloud.raw_resource_specs (hostname, resource_id, memory_mb, vcpus, fact_date) SELECT +INSERT INTO modw_cloud.staging_resource_specifications (hostname, resource_id, memory_mb, vcpus, fact_date) SELECT rs.hostname, rs.resource_id, rs.memory_mb, @@ -20,9 +20,9 @@ FROM rs.resource_id, rs2.hostname FROM - modw_cloud.raw_resource_specs AS rs + modw_cloud.staging_resource_specifications AS rs LEFT JOIN - (SELECT r.resource_id, r.hostname, r.fact_date FROM modw_cloud.raw_resource_specs AS r GROUP BY r.resource_id, r.hostname) AS rs2 ON rs.resource_id = rs2.resource_id + (SELECT r.resource_id, r.hostname, r.fact_date FROM modw_cloud.staging_resource_specifications AS r GROUP BY r.resource_id, r.hostname) AS rs2 ON rs.resource_id = rs2.resource_id GROUP BY rs.resource_id, rs.fact_date, @@ -30,7 +30,7 @@ FROM HAVING MIN(rs2.fact_date) <= rs.fact_date) AS rss1 LEFT JOIN - `modw_cloud`.`raw_resource_specs` AS rss2 + `modw_cloud`.`staging_resource_specifications` AS rss2 ON rss1.resource_id = rss2.resource_id AND rss1.fact_date = rss2.fact_date AND rss1.hostname = rss2.hostname WHERE @@ -38,11 +38,11 @@ FROM GROUP BY rss1.resource_id, rss1.hostname, rss1.fact_date) as rs LEFT JOIN - (SELECT MAX(r.fact_date) AS fact_date, r.hostname, r.resource_id FROM modw_cloud.raw_resource_specs AS r GROUP BY r.resource_id, r.hostname) AS r1 + (SELECT MAX(r.fact_date) AS fact_date, r.hostname, r.resource_id FROM modw_cloud.staging_resource_specifications AS r GROUP BY r.resource_id, r.hostname) AS r1 ON rs.resource_id = r1.resource_id and rs.hostname = r1.hostname LEFT JOIN - `modw_cloud`.`raw_resource_specs` AS rs2 + `modw_cloud`.`staging_resource_specifications` AS rs2 ON r1.resource_id = rs2.resource_id AND r1.hostname = rs2.hostname AND r1.fact_date = rs2.fact_date AND rs2.memory_mb != -1 AND rs2.vcpus != -1 WHERE diff --git a/configuration/etl/etl_tables.d/cloud_common/raw_resource_specs.json b/configuration/etl/etl_tables.d/cloud_common/raw_resource_specs.json index 1e11386d39..4c213c885f 100644 --- a/configuration/etl/etl_tables.d/cloud_common/raw_resource_specs.json +++ b/configuration/etl/etl_tables.d/cloud_common/raw_resource_specs.json @@ -32,7 +32,7 @@ }, { "name": "fact_date", - "type": "date", + "type": "datetime", "nullable": false } ], diff --git a/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json b/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json new file mode 100644 index 0000000000..b1fa119f01 --- /dev/null +++ b/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json @@ -0,0 +1,53 @@ +{ + "#": "Record types are global to all cloud resources", + + "table_definition": { + "name": "staging_resource_specifications", + "engine": "MyISAM", + "comment": "Record type: accounting, administrative, derived, etc.", + "columns": [ + { + "name": "hostname", + "type": "varchar(225)", + "nullable": false + }, + { + "name": "resource_id", + "type": "int(11)", + "nullable": false, + "comment": "Unknown = -1 for global dimensions" + }, + { + "name": "memory_mb", + "type": "int(11)", + "nullable": false, + "comment": "Amount of memory available on the associated node." + }, + { + "name": "vcpus", + "type": "int(5)", + "nullable": false, + "default": null, + "comment": "Number of vcpus available on the associated node." + }, + { + "name": "fact_date", + "type": "date", + "nullable": false + } + ], + "indexes": [ + { + "name": "PRIMARY", + "columns": [ + "resource_id", + "hostname", + "memory_mb", + "vcpus", + "fact_date" + ], + "is_unique": true + } + ] + } +} From 57b1db26d52caa2e06baa6903a6df76e589d5bfc Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Tue, 21 Jul 2020 14:51:13 -0400 Subject: [PATCH 2/4] documentation updates --- .../cloud_common/staging_resource_specifications.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json b/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json index 2b766bc50c..a65c514f98 100644 --- a/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json +++ b/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json @@ -1,4 +1,6 @@ { + "#": "@@session.time_zone is used instead of using the time zone listed in the resourcefact table to keep consistency", + "#": "with how time zone conversion works for other places in the cloud realm", "table_definition": { "$ref": "${table_definition_dir}/cloud_common/staging_resource_specifications.json#/table_definition" }, From a304b314c48f2ef8dd22e7dcde907a59c3de2051 Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Tue, 21 Jul 2020 14:53:52 -0400 Subject: [PATCH 3/4] documentation updates --- .../cloud_common/staging_resource_specifications.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json b/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json index a65c514f98..4793f4f1c6 100644 --- a/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json +++ b/configuration/etl/etl_action_defs.d/cloud_common/staging_resource_specifications.json @@ -1,5 +1,5 @@ { - "#": "@@session.time_zone is used instead of using the time zone listed in the resourcefact table to keep consistency", + "#": "@@session.time_zone is used instead of using the time zone listed in resources.json to keep consistency", "#": "with how time zone conversion works for other places in the cloud realm", "table_definition": { "$ref": "${table_definition_dir}/cloud_common/staging_resource_specifications.json#/table_definition" From 7de2c529b1c6eb2a336a271d7c048e8ec3820b1d Mon Sep 17 00:00:00 2001 From: Greg Dean Date: Tue, 21 Jul 2020 15:02:25 -0400 Subject: [PATCH 4/4] updating configuration.md with note about cloud resources and timezones --- docs/configuration.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index e51d0bb392..86a37955b3 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -422,6 +422,9 @@ The default is that resources are assumed to not allow node sharing. If the SUPReMM module is in use and a resource does allow node sharing then this should be set to `true`. +For cloud resources the timezone is not used and times are converted to +the local timezone that the server is in. + ```json [ {