Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert date for cloud resource specifications to correct timezone from UTC #1391

Merged
5 changes: 5 additions & 0 deletions configuration/etl/etl.d/cloud_ingest_resource_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@
}
}
},
{
"name": "CloudResourceSpecsStaging",
"class": "DatabaseIngestor",
"definition_file": "cloud_common/staging_resource_specifications.json"
},
{
"name": "CloudResourceSpecsAddEndDay",
"namespace": "ETL\\Maintenance",
Expand Down
1 change: 1 addition & 0 deletions configuration/etl/etl.d/xdmod-migration-8_5_1-9_0_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
"cloud_common/session_records.json",
"cloud_common/event.json",
"cloud_common/raw_resource_specs.json",
"cloud_common/staging_resource_specifications.json",
"cloud_common/cloud_resource_specs.json",
"cloud_common/account.json",
"cloud_common/instance_type.json",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
"hostname": "r1.hostname",
"vcpus": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, r2.vcpus, r1.vcpus)",
"memory_mb": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, r2.memory_mb, r1.memory_mb)",
"start_date_ts": "UNIX_TIMESTAMP(IF(r1.memory_mb = -1 AND r1.vcpus = -1, r2.fact_date, r1.fact_date))",
"end_date_ts": "UNIX_TIMESTAMP(CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN MIN(r2.fact_date) - INTERVAL 1 DAY WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.raw_resource_specs) != MAX(r2.fact_date) THEN MAX(r1.fact_date) - INTERVAL 1 DAY ELSE CURDATE() END)",
"start_day_id": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, YEAR(r2.fact_date) * 100000 + DAYOFYEAR(r2.fact_date), YEAR(r1.fact_date) * 100000 + DAYOFYEAR(r1.fact_date))",
"end_day_id": "CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN YEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.raw_resource_specs) != MAX(r2.fact_date) THEN YEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) ELSE YEAR(CURDATE()) * 100000 + DAYOFYEAR(CURDATE()) END"
"start_date_ts": "UNIX_TIMESTAMP(IF(r1.memory_mb = -1 AND r1.vcpus = -1, MIN(r2.fact_date), MIN(r1.fact_date)))",
"end_date_ts": "UNIX_TIMESTAMP(CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN MIN(r2.fact_date) - INTERVAL 1 DAY WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.staging_resource_specifications) != MAX(r2.fact_date) THEN MAX(r1.fact_date) - INTERVAL 1 DAY ELSE CURDATE() END)",
"start_day_id": "IF(r1.memory_mb = -1 AND r1.vcpus = -1, YEAR(MIN(r2.fact_date)) * 100000 + DAYOFYEAR(MIN(r2.fact_date)), YEAR(MIN(r1.fact_date)) * 100000 + DAYOFYEAR(MIN(r1.fact_date)))",
"end_day_id": "CASE WHEN MIN(r2.fact_date) IS NOT NULL AND (r1.memory_mb != -1 AND r1.vcpus != -1) THEN YEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MIN(r2.fact_date) - INTERVAL 1 DAY) WHEN r1.memory_mb = -1 AND r1.vcpus = -1 AND (SELECT MAX(fact_date) FROM modw_cloud.staging_resource_specifications) != MAX(r2.fact_date) THEN YEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) * 100000 + DAYOFYEAR(MAX(r1.fact_date) - INTERVAL 1 DAY) ELSE YEAR(CURDATE()) * 100000 + DAYOFYEAR(CURDATE()) END"
},
"joins": [{
"name": "raw_resource_specs",
"name": "staging_resource_specifications",
"schema": "${SOURCE_SCHEMA}",
"alias": "r1"
},
{
"name": "raw_resource_specs",
"name": "staging_resource_specifications",
"schema": "${SOURCE_SCHEMA}",
"alias": "r2",
"type": "LEFT",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"#": "@@session.time_zone is used instead of using the time zone listed in resources.json to keep consistency",
"#": "with how time zone conversion works for other places in the cloud realm",
"table_definition": {
"$ref": "${table_definition_dir}/cloud_common/staging_resource_specifications.json#/table_definition"
},
"source_query": {
"records": {
"resource_id": "raw.resource_id",
"hostname": "raw.hostname",
"vcpus": "raw.vcpus",
"memory_mb": "raw.memory_mb",
"fact_date": "DATE(CONVERT_TZ(raw.fact_date, '+00:00', @@session.time_zone))"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to use the time zone specified in resources.json? Does this handle resources in a different time zone than the database server?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jtpalmer Anywhere else in the cloud realm where we need to convert timezones it does it this way so I'd like to keep it consistent with that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update the documentation to note that cloud resources ignore the timezone configuration option (https://github.com/ubccr/xdmod/blame/xdmod9.0/docs/configuration.md#L424).

},
"joins": [{
"name": "raw_resource_specs",
"schema": "${SOURCE_SCHEMA}",
"alias": "raw"
}]
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-- This sql statement inserts -1 values for the memory_mb and vcpus for a day that a compute node has been
-- removed from the most recently ingested resource specifications file. The -1 helps when setting start and
-- end times of a cpu and memory configuration for a compute node.
INSERT INTO modw_cloud.raw_resource_specs (hostname, resource_id, memory_mb, vcpus, fact_date) SELECT
INSERT INTO modw_cloud.staging_resource_specifications (hostname, resource_id, memory_mb, vcpus, fact_date) SELECT
rs.hostname,
rs.resource_id,
rs.memory_mb,
Expand All @@ -20,29 +20,29 @@ FROM
rs.resource_id,
rs2.hostname
FROM
modw_cloud.raw_resource_specs AS rs
modw_cloud.staging_resource_specifications AS rs
LEFT JOIN
(SELECT r.resource_id, r.hostname, r.fact_date FROM modw_cloud.raw_resource_specs AS r GROUP BY r.resource_id, r.hostname) AS rs2 ON rs.resource_id = rs2.resource_id
(SELECT r.resource_id, r.hostname, r.fact_date FROM modw_cloud.staging_resource_specifications AS r GROUP BY r.resource_id, r.hostname) AS rs2 ON rs.resource_id = rs2.resource_id
GROUP BY
rs.resource_id,
rs.fact_date,
rs2.hostname
HAVING
MIN(rs2.fact_date) <= rs.fact_date) AS rss1
LEFT JOIN
`modw_cloud`.`raw_resource_specs` AS rss2
`modw_cloud`.`staging_resource_specifications` AS rss2
ON
rss1.resource_id = rss2.resource_id AND rss1.fact_date = rss2.fact_date AND rss1.hostname = rss2.hostname
WHERE
rss2.memory_mb IS NULL AND rss2.vcpus IS NULL
GROUP BY
rss1.resource_id, rss1.hostname, rss1.fact_date) as rs
LEFT JOIN
(SELECT MAX(r.fact_date) AS fact_date, r.hostname, r.resource_id FROM modw_cloud.raw_resource_specs AS r GROUP BY r.resource_id, r.hostname) AS r1
(SELECT MAX(r.fact_date) AS fact_date, r.hostname, r.resource_id FROM modw_cloud.staging_resource_specifications AS r GROUP BY r.resource_id, r.hostname) AS r1
ON
rs.resource_id = r1.resource_id and rs.hostname = r1.hostname
LEFT JOIN
`modw_cloud`.`raw_resource_specs` AS rs2
`modw_cloud`.`staging_resource_specifications` AS rs2
ON
r1.resource_id = rs2.resource_id AND r1.hostname = rs2.hostname AND r1.fact_date = rs2.fact_date AND rs2.memory_mb != -1 AND rs2.vcpus != -1
WHERE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
},
{
"name": "fact_date",
"type": "date",
"type": "datetime",
"nullable": false
}
],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"#": "Record types are global to all cloud resources",

"table_definition": {
"name": "staging_resource_specifications",
"engine": "MyISAM",
"comment": "Record type: accounting, administrative, derived, etc.",
"columns": [
{
"name": "hostname",
"type": "varchar(225)",
"nullable": false
},
{
"name": "resource_id",
"type": "int(11)",
"nullable": false,
"comment": "Unknown = -1 for global dimensions"
},
{
"name": "memory_mb",
"type": "int(11)",
"nullable": false,
"comment": "Amount of memory available on the associated node."
},
{
"name": "vcpus",
"type": "int(5)",
"nullable": false,
"default": null,
"comment": "Number of vcpus available on the associated node."
},
{
"name": "fact_date",
"type": "date",
"nullable": false
}
],
"indexes": [
{
"name": "PRIMARY",
"columns": [
"resource_id",
"hostname",
"memory_mb",
"vcpus",
"fact_date"
],
"is_unique": true
}
]
}
}
3 changes: 3 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ The default is that resources are assumed to not allow node sharing. If
the SUPReMM module is in use and a resource does allow node sharing then
this should be set to `true`.

For cloud resources the timezone is not used and times are converted to
the local timezone that the server is in.

```json
[
{
Expand Down