Skip to content

Commit

Permalink
Merge pull request #1489 from eiffel777/change-state-cloud-resource-s…
Browse files Browse the repository at this point in the history
…pecs

Move to using custom Ingestor for cloud resource specifications
  • Loading branch information
eiffel777 authored Feb 25, 2021
2 parents 8708fab + d7f51de commit a9145bb
Show file tree
Hide file tree
Showing 8 changed files with 543 additions and 50 deletions.
140 changes: 140 additions & 0 deletions classes/ETL/Ingestor/CloudResourceSpecsStateTransformIngestor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
<?php
/* ==========================================================================================
* This class simulates a Finite State Machine to reconstruct the start and end time of a specific
* set of vcpus and memory for a cloud host. It get a set of rows that contains the vcpus and memory
* sorted by resource, host and start time of the configuration. This list is iterated over and an end
* time is set anytime a changed is detected in the number of vcpus or memory for a host.
*
* If no changes are found the current date is considered the end date of the configuration
*
* A -1 value for the vcpus and memory_mb means the host was not available on that day
*
* @author Greg Dean <[email protected]>
* @date 2021-01-27
*/

namespace ETL\Ingestor;

use ETL\aOptions;
use ETL\iAction;
use ETL\aAction;
use ETL\Configuration\EtlConfiguration;
use ETL\EtlOverseerOptions;

use Psr\Log\LoggerInterface;

class CloudResourceSpecsStateTransformIngestor extends pdoIngestor implements iAction
{

private $_instance_state;

/**
* @see ETL\Ingestor\pdoIngestor::__construct()
*/
public function __construct(aOptions $options, EtlConfiguration $etlConfig, LoggerInterface $logger = null)
{
parent::__construct($options, $etlConfig, $logger);

$this->_end_time = $etlConfig->getVariableStore()->endDate ? date('Y-m-d H:i:s', strtotime($etlConfig->getVariableStore()->endDate)) : null;

$this->resetInstance();
}

private function initInstance($srcRecord)
{
// Since we only get information for when a configuration changes we assume a configuration has an end date
// of today unless we have a row that tells us otherwise
$default_end_time = isset($this->_end_time) ? $this->_end_time : date('Y-m-d') . ' 23:59:59';

$this->_instance_state = array(
'resource_id' => $srcRecord['resource_id'],
'hostname' => $srcRecord['hostname'],
'vcpus' => $srcRecord['vcpus'],
'memory_mb' => $srcRecord['memory_mb'],
'start_date_ts' => strtotime($srcRecord['fact_date'] . " 00:00:00"),
'end_date_ts' => strtotime($default_end_time),
'start_day_id' => date('Y', strtotime($srcRecord['fact_date'])) * 100000 + date('z', strtotime($srcRecord['fact_date'])) + 1,
'end_day_id' => date('Y', strtotime($default_end_time)) * 100000 + date('z', strtotime($default_end_time)) + 1
);
}

private function resetInstance()
{
$this->_instance_state = null;
}

private function updateInstance($srcRecord)
{
// The -1 is to make sure we use the last second of the previous day
$end_date_timestamp = strtotime($srcRecord['fact_date'] . " 00:00:00") - 1;
$this->_instance_state['end_date_ts'] = $end_date_timestamp;

// date(z) is zero indexed so +1 is needed to get the correct day of the year
$this->_instance_state['end_day_id'] = date('Y', $end_date_timestamp) * 100000 + date('z', $end_date_timestamp) + 1;
}

/**
* @see ETL\Ingestor\pdoIngestor::transform()
*/
protected function transform(array $srcRecord, &$orderId)
{
// We want to just flush when we hit the dummy row
if ($srcRecord['fact_date'] === 0) {
if (isset($this->_instance_state)) {
return array($this->_instance_state);
} else {
return array();
}
}

if ($this->_instance_state === null) {
if($srcRecord['vcpus'] == -1 && $srcRecord['memory_mb'] == -1) {
return array();
}

$this->initInstance($srcRecord);
}

$transformedRecord = array();

if (($this->_instance_state['hostname'] != $srcRecord['hostname']) || ($this->_instance_state['resource_id'] != $srcRecord['resource_id']) || ($this->_instance_state['vcpus'] != $srcRecord['vcpus'] || $this->_instance_state['memory_mb'] != $srcRecord['memory_mb'])) {

// Only update the instance if the only thing that is different between $srcRecord and $this->_instance_state is that either the memory or vcpus changed
if (($this->_instance_state['vcpus'] != $srcRecord['vcpus'] || $this->_instance_state['memory_mb'] != $srcRecord['memory_mb'])
&& ($this->_instance_state['hostname'] == $srcRecord['hostname']) && ($this->_instance_state['resource_id'] == $srcRecord['resource_id'])) {
$this->updateInstance($srcRecord);
}

$transformedRecord[] = $this->_instance_state;
$this->resetInstance();

// Under most circumstances when we detect a change we want to start a new row with data from the row that has changed. This is not
// the case when the change detected is a -1 value for vcpus or memory_mb. When vcpus or memory_mb is -1 it means the host has been
// removed and we just want to end the row and not create a new row.
if($srcRecord['vcpus'] != -1 && $srcRecord['memory_mb'] != -1) {
$this->initInstance($srcRecord);
}
}

return $transformedRecord;
}

protected function getSourceQueryString()
{
$sql = parent::getSourceQueryString();

// Due to the way the Finite State Machine handles the rows in event reconstruction, the last row
// is lost. To work around this we add a dummy row filled with zeroes.
$colCount = count($this->etlSourceQuery->records);
$unionValues = array_fill(0, $colCount, 0);
$sql .= "\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 ASC, 5 ASC";

return $sql;
}

public function transformHelper(array $srcRecord)
{
$orderId = 0;
return $this->transform($srcRecord, $orderId);
}
}
4 changes: 2 additions & 2 deletions configuration/etl/etl.d/cloud_ingest_resource_specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@
"ingest-cloud-resource-specs": [
{
"name": "CloudResourceSpecsReconstructor",
"class": "DatabaseIngestor",
"definition_file": "cloud_common/resource_specifications.json",
"class": "CloudResourceSpecsStateTransformIngestor",
"definition_file": "cloud_common/resource_specifications_transformer.json",
"description": "Sets a start and end time for memory and vcpu paring for a compute node on a cloud resource"
}
],
Expand Down
93 changes: 54 additions & 39 deletions configuration/etl/etl.d/xdmod-migration-9_0_0-9_5_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@
"namespace": "ETL\\Maintenance",
"class": "ExecuteSql",
"options_class": "MaintenanceOptions"
},
"cloud-migration-9_0_0-9_5_0": {
},
"cloud-migration-9_0_0-9_5_0": {
"namespace": "ETL\\Ingestor",
"options_class": "IngestorOptions",
"endpoints": {
"source": {
"type": "mysql",
"name": "Cloud DB",
"config": "datawarehouse",
"schema": "modw_cloud"
},
"destination": {
"type": "mysql",
"name": "Cloud DB",
"config": "datawarehouse",
"schema": "modw_cloud"
}
}
}
"source": {
"type": "mysql",
"name": "Cloud DB",
"config": "datawarehouse",
"schema": "modw_cloud"
},
"destination": {
"type": "mysql",
"name": "Cloud DB",
"config": "datawarehouse",
"schema": "modw_cloud"
}
}
}
},
"migration-9_0_0-9_5_0": [
{
Expand Down Expand Up @@ -104,28 +104,43 @@
}
],
"cloud-migration-9_0_0-9_5_0": [
{
"#": "Asset data must be aggregated post ingestion",
"name": "CloudResourceSpecsAggregator",
"class": "SimpleAggregator",
"namespace": "ETL\\Aggregator",
"options_class": "AggregatorOptions",
"description": "Aggregate cloud records.",
"definition_file": "cloud_common/cloud_resource_specs_aggregation.json",
"table_prefix": "resourcespecsfact_by_",
"aggregation_units": [
"day", "month", "quarter", "year"
],
"endpoints": {
"destination": {
"type": "mysql",
"name": "Aggregate DB",
"config": "datawarehouse",
"schema": "modw_aggregates",
"create_schema_if_not_exists": true,
"truncate_destination": true
{
"name": "CloudResourceSpecsReconstructor",
"class": "CloudResourceSpecsStateTransformIngestor",
"definition_file": "cloud_common/resource_specifications_transformer.json",
"description": "Sets a start and end time for memory and vcpu paring for a compute node on a cloud resource",
"endpoints": {
"destination": {
"type": "mysql",
"name": "Cloud DB",
"config": "datawarehouse",
"schema": "modw_cloud",
"truncate_destination": true
}
}
}
}
]
},
{
"#": "Asset data must be aggregated post ingestion",
"name": "CloudResourceSpecsAggregator",
"class": "SimpleAggregator",
"namespace": "ETL\\Aggregator",
"options_class": "AggregatorOptions",
"description": "Aggregate cloud records.",
"definition_file": "cloud_common/cloud_resource_specs_aggregation.json",
"table_prefix": "resourcespecsfact_by_",
"aggregation_units": [
"day", "month", "quarter", "year"
],
"endpoints": {
"destination": {
"type": "mysql",
"name": "Aggregate DB",
"config": "datawarehouse",
"schema": "modw_aggregates",
"create_schema_if_not_exists": true,
"truncate_destination": true
}
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"table_definition": {
"$ref": "${table_definition_dir}/cloud_common/cloud_resource_specs.json#/table_definition"
},
"destination_record_map": {
"cloud_resource_specs": {
"resource_id": "resource_id",
"hostname": "hostname",
"vcpus": "vcpus",
"memory_mb": "memory_mb",
"start_date_ts": "start_date_ts",
"end_date_ts": "end_date_ts",
"start_day_id": "start_day_id",
"end_day_id": "end_day_id"
}
},
"source_query": {
"records": {
"resource_id": "srs.resource_id",
"hostname": "srs.hostname",
"vcpus": "srs.vcpus",
"memory_mb": "srs.memory_mb",
"fact_date": "srs.fact_date",
"start_date_ts": -1,
"end_date_ts": -1,
"start_day_id": -1,
"end_day_id": -1
},
"joins": [{
"name": "staging_resource_specifications",
"schema": "${SOURCE_SCHEMA}",
"alias": "srs"
}]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@
{
"name": "vcpus",
"type": "int(5)",
"nullable": false,
"default": null
"nullable": false
},
{
"name": "start_date_ts",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"table_definition": {
"$ref": "../etl_tables_8.0.0.d/cloud_resource_specs.json#/table_definition"
},
"destination_record_map": {
"cloud_resource_specs": {
"resource_id": "resource_id",
"hostname": "hostname",
"vcpus": "vcpus",
"memory_mb": "memory_mb",
"start_date_ts": "start_date_ts",
"end_date_ts": "end_date_ts",
"start_day_id": "start_day_id",
"end_day_id": "end_day_id"
}
},
"source_query": {
"records": {
"resource_id": "srs.resource_id",
"hostname": "srs.hostname",
"vcpus": "srs.vcpus",
"memory_mb": "srs.memory_mb",
"start_date_ts": "srs.fact_date",
"end_date_ts": -1,
"start_day_id": -1,
"end_day_id": -1
},
"joins": [{
"name": "staging_resource_specifications",
"schema": "${SOURCE_SCHEMA}",
"alias": "srs"
}]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,34 @@
{
"name": "vcpus",
"type": "int(5)",
"nullable": true,
"default": null
"nullable": false
},
{
"name": "start_date_ts",
"type": "int(12) unsigned",
"nullable": false
},
{
"name": "end_date_ts",
"type": "int(12) unsigned",
"nullable": true
},
{
"name": "start_date",
"type": "date",
"name": "start_day_id",
"type": "int(11) unsigned",
"nullable": false
},
{
"name": "end_date",
"type": "date",
"name": "end_day_id",
"type": "int(11) unsigned",
"nullable": true
},
{
"name": "last_modified",
"type": "timestamp",
"nullable": false,
"default": "CURRENT_TIMESTAMP",
"extra": "on update CURRENT_TIMESTAMP"
}
],

Expand All @@ -59,7 +75,7 @@
"hostname",
"memory_mb",
"vcpus",
"start_date"
"start_date_ts"
],
"is_unique": true
},
Expand Down
Loading

0 comments on commit a9145bb

Please sign in to comment.