From 9c435165c4077e39d31ac4ce3e755549821487b1 Mon Sep 17 00:00:00 2001 From: "Joseph P. White" Date: Tue, 17 Mar 2020 11:52:33 -0400 Subject: [PATCH] Improve performance of the timeseries queries. --- .../DataWarehouse/Access/MetricExplorer.php | 2 +- .../Data/SimpleTimeseriesData.php | 5 + .../Data/SimpleTimeseriesDataIterator.php | 153 ---- .../Data/SimpleTimeseriesDataset.php | 848 ------------------ .../DataWarehouse/Data/TimeseriesDataset.php | 377 ++++++++ .../DataWarehouse/Query/TimeseriesQuery.php | 82 ++ .../Visualization/HighChartTimeseries2.php | 126 +-- 7 files changed, 473 insertions(+), 1120 deletions(-) delete mode 100644 classes/DataWarehouse/Data/SimpleTimeseriesDataIterator.php delete mode 100644 classes/DataWarehouse/Data/SimpleTimeseriesDataset.php create mode 100644 classes/DataWarehouse/Data/TimeseriesDataset.php diff --git a/classes/DataWarehouse/Access/MetricExplorer.php b/classes/DataWarehouse/Access/MetricExplorer.php index d275b9700c..d12b05e051 100644 --- a/classes/DataWarehouse/Access/MetricExplorer.php +++ b/classes/DataWarehouse/Access/MetricExplorer.php @@ -84,7 +84,7 @@ public function get_data($user) $dataset_classname = $timeseries - ? '\DataWarehouse\Data\SimpleTimeseriesDataset' + ? '\DataWarehouse\Data\TimeseriesDataset' : '\DataWarehouse\Data\SimpleDataset'; $highchart_classname diff --git a/classes/DataWarehouse/Data/SimpleTimeseriesData.php b/classes/DataWarehouse/Data/SimpleTimeseriesData.php index 71fdfc720b..2f923acee1 100644 --- a/classes/DataWarehouse/Data/SimpleTimeseriesData.php +++ b/classes/DataWarehouse/Data/SimpleTimeseriesData.php @@ -22,6 +22,11 @@ public function __construct($name) parent::__construct($name); } + public function addDatum($time, $value, $error_value) { + $this->_start_ts[] = $time; + $this->_values[] = $value; + $this->_errors[] = $error_value; + } /** * JMS June 2015 */ diff --git a/classes/DataWarehouse/Data/SimpleTimeseriesDataIterator.php b/classes/DataWarehouse/Data/SimpleTimeseriesDataIterator.php deleted file mode 100644 index 97ea64cf52..0000000000 --- a/classes/DataWarehouse/Data/SimpleTimeseriesDataIterator.php +++ /dev/null @@ -1,153 +0,0 @@ -groupColumn = $groupColumn; - $this->dataset = $dataset; - $this->index = 0; - $this->column_type_and_name = $column_type_and_name; - - if ($column_type_and_name =='time') { - $this->is_dimension = true; - $this->column_name = $dataset->getAggregationUnit()->getUnitName(); - $this->where_column_name = $dataset->_query->getAggregationUnit()->getUnitName() . '_id'; - } else { - - $this->is_dimension = substr($column_type_and_name, 0, 3) == 'dim'; - $this->column_name = substr($column_type_and_name, 4); - - $this->where_column_name = $groupColumn->getName(); - $gpBy = $groupColumn->getGroupBy(); - if (isset( $gpBy )) $this->where_column_name .= '_id'; - } - } - - //------------------------------------------------- - // public function current() - // - // Returns the current SimpleTimeseriesDataset object from the iterator - // - // @return \DataWarehouse\Data\SimpleTimeseriesData - //------------------------------------------------- - public function current() - { - if (!$this->valid()) { return NULL; } - - $value = $this->groupColumn->getValue($this->index); - $id = $this->groupColumn->getId($this->index); - - if ( !in_array( $id, $this->limit_ids ) ) { - // TODO: instead of keeping a limit_ids array and a dataObjects array, - // just keep an assoc array of dataObjects... - - // add the current id to the iterator object's id array - // so these ids can be excluded from the summary query: - $this->limit_ids[] = $id; - - $dataObject = $this->dataset->getColumn( - $this->column_type_and_name, - null, - null, - $this->where_column_name, - $id - ); - - $dataObject->setName($value); - - $dataObject->setGroupName ( $value ); - $dataObject->setGroupId ( $this->groupColumn->getId($this->index) ); - - $dataObject->setUnit ( $this->dataset->getColumnLabel( - $this->column_name, - $this->is_dimension) - ); - - $this->dataObjects[$id] = $dataObject; - - } else { - $dataObject = $this->dataObjects[$id]; - } - - // SimpleTimeseriesData object - return $dataObject; - } - - // reset index to 0 - public function rewind() - { - $this->index = 0; - } - - // return current index - // @return integer - public function key() - { - return $this->index; - } - - // advance index then return current - // @return SimpleTimeseriesData object - public function next() - { - ++$this->index; - return $this->current(); - } - - public function valid() - { - $vals = $this->groupColumn->getValues(); - return isset($vals[$this->index]); - } - - public function count() - { - return $this->groupColumn->getCount(true); - } - - // ----- accessor functions ------- - - // use an array to keep the ids of records <= $limit - // these records are not stored in the iterator - // @return array - public function getLimitIds() { - return $this->limit_ids; - } - -} // class SimpleTimeseriesDataIterator diff --git a/classes/DataWarehouse/Data/SimpleTimeseriesDataset.php b/classes/DataWarehouse/Data/SimpleTimeseriesDataset.php deleted file mode 100644 index e6bf4efa10..0000000000 --- a/classes/DataWarehouse/Data/SimpleTimeseriesDataset.php +++ /dev/null @@ -1,848 +0,0 @@ -_query->getAggregationUnit()->getUnitName(); - } - - // Handle the where clause for ids, equivalence or not-in: - $hasWhere = $wherecolumn_name != NULL && $where_value !== NULL; - - // run the query; set $this->_results - $this->getResults( - $limit, - $offset, - true, // force reexec!!! - true, // get column metadata so we can assign colnames - $hasWhere ? $wherecolumn_name : null, // where column name - $hasWhere ? "'" . $where_value . "'" : null // where values - ); - - return $this->assembleDataObject( - $column_name, - $is_dimension, - $hasWhere, - $wherecolumn_name, - $where_value - ); - } - - //------------------------------------------------- - // public function assembleDataObject - // - // @return \DataWarehouse\Data\SimpleTimeseriesData - //------------------------------------------------- - public function assembleDataObject( - $column_name, - $is_dimension, - $hasWhere, - $wherecolumn_name, - $where_value - ) { - // assign column names for returned data: - $values_column_name = null; - $sem_column_name = null; - $ids_column_name = null; - $order_ids_column_name = null; - $start_ts_column_name = null; // timeseries only - - $start_ts_column_name = $this->_query->getAggregationUnit()->getUnitName() - . '_start_ts'; - // standard error - $semStatId = \Realm\Realm::getStandardErrorStatisticFromStatistic( - $column_name - ); - if (isset($this->_query->_stats[$semStatId])) { - $sem_column_name = $semStatId; - } - - // create the data object - $dataObject = new \DataWarehouse\Data\SimpleTimeseriesData( - $this->getColumnLabel($column_name, $is_dimension) - ); - - $dataObject->setUnit( $this->getColumnLabel($column_name, $is_dimension) ); - if ($is_dimension) { - $dataObject->setGroupBy( $this->_query->_group_bys[$column_name] ); - $values_column_name = $column_name . '_name'; - $ids_column_name = $column_name . '_id'; - $order_ids_column_name = $column_name . '_order_id'; - - } else { - $dataObject->setStatistic( $this->_query->_stats[$column_name] ); - $values_column_name = $column_name; - } - - // accumulate the values in temp variables, then set everything at once. - $dataErrors = array(); - $dataValues = array(); - $dataIds = array(); - $dataOrderIds = array(); - $dataStartTs = array(); - - // walk through the result set and assign ... - foreach ($this->_results as $row) { - - // This section unique to TS - if ($hasWhere && $row[$wherecolumn_name] != $where_value) { - continue; - } - - if ( - $start_ts_column_name != NULL - && !isset($row[$start_ts_column_name]) - ) { - throw new \Exception( - get_class($this). ":" . __FUNCTION__ ."()" - . " start_ts_column_name=$start_ts_column_name does not" - . " exist in the dataset." - ); - } - - $start_ts = $row[$start_ts_column_name]; - $dataStartTs[] = $start_ts; - - // End of section unique to TS - - if ($values_column_name != NULL) { - if (!array_key_exists($values_column_name, $row)) { - throw new \Exception( - get_class($this). ":" . __FUNCTION__ ."()" - . " values_column_name=$values_column_name does not" - . " exist in the dataset."); - } else { - $dataValues[] = $this->convertSQLtoPHP( - $row[$values_column_name], - $this->_columnTypes[$values_column_name]['native_type'], - $this->_columnTypes[$values_column_name]['precision'] - ); - } - } - - if ($sem_column_name != NULL) { - if (!array_key_exists($sem_column_name, $row)) { - $dataErrors[] = 0; - } else { - $dataErrors[] = $this->convertSQLtoPHP( - $row[$sem_column_name], - $this->_columnTypes[$sem_column_name]['native_type'], - $this->_columnTypes[$sem_column_name]['precision'] - ); - } - } - - if ($ids_column_name != NULL) { - if (!array_key_exists($ids_column_name, $row)) { - throw new \Exception( - get_class($this). ":" . __FUNCTION__ ."()" - . " ids_column_name=$ids_column_name does not exist" - . " in the dataset." - ); - } else { - $dataIds[] = $row[$ids_column_name]; - } - } - - if ($order_ids_column_name != NULL) { - if (!array_key_exists($order_ids_column_name, $row)) { - throw new \Exception( - get_class($this). ":" . __FUNCTION__ ."()" - . " order_ids_column_name=$order_ids_column_name does" - . " not exist in the dataset." - ); - } else { - $dataOrderIds[] = $row[$order_ids_column_name]; - } - } - } - - $dataObject->setValues( $dataValues ); - $dataObject->setErrors( $dataErrors ); - $dataObject->setStartTs( $dataStartTs ); - - if ($is_dimension) { - $dataObject->setIds( $dataIds ); - $dataObject->setOrderIds( $dataOrderIds ); - } - - return $dataObject; - } // function assembleDataObject - - //------------------------------------------------- - // public function getSummaryOperation - // - // Use data object's statistic alias to determine - // what operation will be used for data series - // summarization beyond the top-n, for display. - // - // Summarization performed by database will consist - // of SUM, MIN, MAX, or AVG by time aggregation unit - // - // @return String - //------------------------------------------------- - public function getSummaryOperation($stat) - { - - $operation = "SUM"; - - // Determine operation for summarizing the dataset - if ( strpos($stat, 'min_') !== false ) { - $operation = "MIN"; - - } elseif ( strpos($stat, 'max_') !== false ) { - $operation = "MAX"; - - } // if strpos - - return $operation; - } // getSummaryOperation - - //------------------------------------------------- - // public function getUniqueCount - // - // Query for the total count of unique dimension values - // in the chosen $realm, over the selected time period. - // - // Used by HighChartTimeseries2 configure() - // - // @return int - //------------------------------------------------- - public function getUniqueCount( - $column_name, - $realm - ) { - - $agg_query = new \DataWarehouse\Query\AggregateQuery( - $realm, - $this->_query->getAggregationUnit()->getUnitName(), - $this->_query->getStartDate(), - $this->_query->getEndDate() - ); - - $agg_query->addGroupBy($column_name); - - foreach ($this->_query->_stats as $stat_name => $stat) { - $agg_query->addStat($stat_name); - } - - // we only return a count here, so remove this unneeded order by: - $agg_query->clearOrders(); - $agg_query->cloneParameters($this->_query); - - return $agg_query->getCount(); - } // getUniqueCount - - //------------------------------------------------- - // public function getColumnUniqueOrdered - // - // Query for the highest average $limit dimension values - // in the chosen $realm, over the selected time period. - // - // This is the old way to fetch, order, and return - // the "top n" values for a given dimension. - // What "top" means varies by the type of column - // we are dealing with. Some are sorted by dimension label, - // others by metric. - // - // Used by HighChartTimeseries2 configure() for - // fetching the top $limit examples of a metric. - // - // @return \DataWarehouse\Data\SimpleTimeseriesData - //------------------------------------------------- - public function getColumnUniqueOrdered( - $column_type_and_name, - $limit = null, - $offset = null, - $realm = null - ) { - $column_type = substr($column_type_and_name, 0, 3); - $column_name = substr($column_type_and_name, 4); - - $is_dimension = $column_type == 'dim'; - - if ($column_type_and_name == 'time') { - $is_dimension = true; - $column_name = $this->_query->getAggregationUnit()->getUnitName(); - } - - $agg_query = new \DataWarehouse\Query\AggregateQuery( - $realm, - $this->_query->getAggregationUnit()->getUnitName(), - $this->_query->getStartDate(), - $this->_query->getEndDate() - ); - - $agg_query->addGroupBy($column_name); - - foreach ($this->_query->_stats as $stat_name => $stat) { - $agg_query->addStat($stat_name); - } - - // No need to clear orders as there is no order by time; this is an Aggregate query. Keep - // the ordering, and we'll actually match the dataset ordering that aggregate achieves. - // JMS 12 Nov 15 - - // Note: only one item can be stored in Query::sortInfo array at present - // @refer Query member function addOrderByAndSetSortInfo() - if (isset($this->_query->sortInfo)) { - foreach ($this->_query->sortInfo as $sort) { - $agg_query->addOrderBy( - $sort['column_name'], - $sort['direction'] - ); - } - } - - $agg_query->cloneParameters($this->_query); - - $dataObject = new \DataWarehouse\Data\SimpleTimeseriesData($column_name); - - if ($is_dimension) { - $dataObject->setGroupBy( $agg_query->_group_bys[$column_name] ); - - $values_column_name = $column_name . '_name'; - $ids_column_name = $column_name . '_id'; - $order_ids_column_name = $column_name . '_order_id'; - - $dataObject->setUnit( $agg_query->_group_bys[$column_name] ); - } else { - $dataObject->setStatistic ( $agg_query->_stats[$column_name] ); - $values_column_name = $column_name; - - $dataObject->setUnit( $agg_query->_stats[$column_name] ); - } - - $query_string = $agg_query->getQueryString($limit, $offset); - - $statement = DB::factory($agg_query->_db_profile)->query( - $query_string, - array(), - true - ); - $statement->execute(); - - $columnTypes = array(); - - for ($end = $statement->columnCount(), $i = 0; $i < $end; $i++) { - $raw_meta = $statement->getColumnMeta($i); - $columnTypes[$raw_meta['name']] = $raw_meta; - } - - // accumulate the values in a temp variable, then set everything - // at once. - $dataErrors = array(); - $dataValues = array(); - $dataIds = array(); - $dataOrderIds = array(); - $dataStartTs = array(); - - while ( - $row = $statement->fetch(\PDO::FETCH_ASSOC, \PDO::FETCH_ORI_NEXT) - ) { - if ($values_column_name != NULL) { - if (!array_key_exists($values_column_name, $row)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " values_column_name=$values_column_name does not" - . " exist in the dataset." - ); - } else { - $dataValues[] = $this->convertSQLtoPHP( - $row[$values_column_name], - $columnTypes[$values_column_name]['native_type'], - $columnTypes[$values_column_name]['precision'] - ); - } - - $sem_column_name = \Realm\Realm::getStandardErrorStatisticFromStatistic( - $values_column_name - ); - - if (!array_key_exists($sem_column_name, $row)) { - $dataErrors[] = 0; - } else { - $dataErrors[] = $this->convertSQLtoPHP( - $row[$sem_column_name], - $columnTypes[$sem_column_name]['native_type'], - $columnTypes[$sem_column_name]['precision'] - ); - } - } - - if ($ids_column_name != NULL) { - if (!array_key_exists($ids_column_name, $row)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " ids_column_name=$ids_column_name does not exist" - . " in the dataset." - ); - } else { - $dataIds[] = $row[$ids_column_name]; - } - } - - if ($order_ids_column_name != NULL) { - if (!array_key_exists($order_ids_column_name, $row)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " order_ids_column_name=$order_ids_column_name does" - . " not exist in the dataset." - ); - } else { - $dataOrderIds[] = $row[$order_ids_column_name]; - } - } - } - - $dataObject->setValues( $dataValues ); - $dataObject->setErrors( $dataErrors ); - $dataObject->setStartTs( $dataStartTs); - - // Generalize for class type: TODO JMS - if ($is_dimension) { - $dataObject->setIds( $dataIds ); - $dataObject->setOrderIds( $dataOrderIds ); - } - - return $dataObject; - } - - - //------------------------------------------------- - // public function getTimestamps - // - // @return \DataWarehouse\Data\SimpleTimeseriesData - //------------------------------------------------- - public function getTimestamps() - { - $raw_timetamps = $this->_query->getTimestamps(); - $column_name = $this->_query->getAggregationUnit()->getUnitName(); - - $timestampsDataObject = new \DataWarehouse\Data\SimpleTimeseriesData( - $this->getColumnLabel($column_name, true) - ); - - $values_column_name = 'short_name'; - $ids_column_name = 'id'; - $order_ids_column_name = 'id'; - $start_ts_column_name = 'start_ts'; - - // JMS: accumulate the values in a temp variable, then set everything - // at once. - $dataErrors = array(); - $dataValues = array(); - $dataIds = array(); - $dataOrderIds = array(); - $dataStartTs = array(); - - foreach ($raw_timetamps as $raw_timetamp) { - if (!array_key_exists($start_ts_column_name, $raw_timetamp)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " start_ts_column_name=$start_ts_column_name does not" - . " exist in the dataset." - ); - } - - $start_ts = $raw_timetamp[$start_ts_column_name]; - $dataStartTs[] = $start_ts; - - if (!array_key_exists($values_column_name, $raw_timetamp)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " values_column_name=$values_column_name does not exist" - . " in the dataset." - ); - } else { - $dataValues[] - = $raw_timetamp[$values_column_name]; - } - - $dataErrors[] = 0; - - if (!array_key_exists($ids_column_name, $raw_timetamp)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " ids_column_name=$ids_column_name does not exist in" - . " the dataset." - ); - } else { - $dataIds[] - = $raw_timetamp[$ids_column_name]; - } - - if (!array_key_exists($order_ids_column_name, $raw_timetamp)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " order_ids_column_name=$order_ids_column_name does not" - . " exist in the dataset." - ); - } else { - $dataOrderIds[] - = $raw_timetamp[$order_ids_column_name]; - } - } - - $timestampsDataObject->setValues( $dataValues ); - $timestampsDataObject->setErrors( $dataErrors ); - $timestampsDataObject->setStartTs( $dataStartTs); - - // Generalize for class type: TODO JMS may need to test for this - //if ($is_dimension) { - $timestampsDataObject->setIds( $dataIds ); - $timestampsDataObject->setOrderIds( $dataOrderIds ); - //} - return $timestampsDataObject; - } // function getTimestamps() - - //------------------------------------------------- - // public function getColumnIteratorBy - // - // @param string description of column as metric - // @param SimpleTimeseriesData column - // - // @return SimpleTimeseriesDataIterator - //------------------------------------------------- - public function getColumnIteratorBy( - $column_type_and_name, - $datagroup_type_and_name - ) { - return new SimpleTimeseriesDataIterator( - $this, - $column_type_and_name, - $datagroup_type_and_name - ); - } // function getColumnIteratorBy - - //------------------------------------------------- - // public function export() - // - // @param title of export - // - // @see SimpleDataset::export - //------------------------------------------------- - public function export($export_title = 'title') - { - $exportData = parent::export($export_title); - - // Organize the rows by dimension and get all dates used. - $dateSet = array(); - $dimensionValuesSet = array(); - foreach ($exportData['rows'] as $row) { - $rowDate = reset($row); - $rowDimension = next($row); - $rowValue = next($row); - - $dateSet[$rowDate] = true; - $dimensionValuesSet[$rowDimension][$rowDate] = $rowValue; - } - - // Convert the date set into an ordered list. - $dates = array_keys($dateSet); - sort($dates); - - // Order the dimensions as requested. - $queryGroupById = 'none'; - foreach ($this->_query->getGroupBys() as $groupBy) { - $groupById = $groupBy->getId(); - if ( - $groupById !== 'day' - && $groupById !== 'month' - && $groupById !== 'quarter' - && $groupById !== 'year' - ) { - $queryGroupById = $groupById; - break; - } - } - if ($queryGroupById !== 'none') { - $datasetIterator = $this->getColumnIteratorBy( - 'met_' . reset($this->_query->getStats())->getId(), - $this->getColumnUniqueOrdered( - 'dim_' . $queryGroupById, - null, - null, - $this->_query->getRealmName() - ) - ); - - $dimensionNames = array_map(function ($dataset) { - return $dataset->getName(); - }, iterator_to_array($datasetIterator)); - - $orderedDimensionValuesSet = array(); - foreach ($dimensionNames as $dimensionName) { - if (!array_key_exists($dimensionName, $dimensionValuesSet)) { - continue; - } - - $orderedDimensionValuesSet[$dimensionName] = $dimensionValuesSet[$dimensionName]; - } - foreach ($dimensionValuesSet as $dimension => $dimensionValues) { - if (array_key_exists($dimension, $orderedDimensionValuesSet)) { - continue; - } - - $orderedDimensionValuesSet[$dimension] = $dimensionValues; - } - - $dimensionValuesSet = $orderedDimensionValuesSet; - } - - // Change the set of rows and their headers so that there is one - // column per dimension. - $newHeaders = array_slice($exportData['headers'], 0, 1); - $seriesName = $exportData['headers'][2]; - foreach ($dimensionValuesSet as $dimension => $dimensionValues) { - $newHeaders[] = "[$dimension] $seriesName"; - } - - $dateOrderedRows = array(); - foreach ($dates as $date) { - $dateRow = array( - $date, - ); - - foreach ($dimensionValuesSet as $dimensionValues) { - $dateRow[] = \xd_utilities\array_get($dimensionValues, $date, 0); - } - - $dateOrderedRows[] = $dateRow; - } - - $exportData['headers'] = $newHeaders; - $exportData['rows'] = $dateOrderedRows; - - return $exportData; - } - - //------------------------------------------------- - // public function getSummarizedColumn - // - // Query to summarize the non-top $limit timeseries metrics - // for $column_name in the chosen $realm, over the selected - // time period. - // - // Error values are not retained as they are not meaningful - // here. This is consistent with the previous version of this - // functionality. - // - // Used by HighChartTimeseries2 configure() for - // fetching and summarizing the "other" examples of a metric. - // - // @param type and name of column being summarized - // @param type and name of where clause column - // @param count of values we are averaging over, if operation is AVG. - // @param array of ids corresponding to top n. Exclude these in where clause - // @param name of data's realm - // - // @return \DataWarehouse\Data\SimpleTimeseriesData - // @author J.M. Sperhac - //------------------------------------------------- - public function getSummarizedColumn( - $column_name, - $where_name, - $normalizeBy, // should we report the mean for the summarized column? - // if so, normalize by this total. - $whereExcludeArray, // array of top-n ids to exclude from query - $realm = null - ) { - - // determine the selected time aggregation unit - $aggunit_name = $this->_query->getAggregationUnit()->getUnitName(); - - // assign column names for returned data: - $values_column_name = $column_name; - $start_ts_column_name = $aggunit_name . '_start_ts'; - - // Construct a TS query using the selected time agg unit - // Group by the nothing in constructor call, so you *dont* roll up by time; - // later, add the where column name for the group by - $q = new \DataWarehouse\Query\TimeseriesQuery( - $realm, - $aggunit_name, - $this->_query->getStartDate(), - $this->_query->getEndDate() - ); - - // add the stats - foreach ($this->_query->_stats as $stat_name => $stat) { - $q->addStat($stat_name); - } - - // if we have additional parameters: - $q->cloneParameters($this->_query); - - // group on the where clause column, which will be enforced after time agg. unit - $q->addGroupBy($where_name); - - // add a where condition on the array of excluded ids. These are the top-n - if (!empty($whereExcludeArray)) { - $q->addWhereAndJoin($where_name, "NOT IN", $whereExcludeArray); - } - - // set up data object for return - $dataObject = new \DataWarehouse\Data\SimpleTimeseriesData($column_name); - $dataObject->setStatistic ( $q->_stats[$column_name] ); - // set unit part of label on dataseries' legend - $dataObject->setUnit( $this->getColumnLabel( $column_name, false) ); - - // perform the summarization right in the database - - // Take AVG, MIN, MAX, or SUM of the column_name, grouped by time aggregation unit - $statAlias = $dataObject->getStatistic()->getId(); - $operation = $this->getSummaryOperation($statAlias); - - $useMean = $operation === 'SUM' && (strpos($statAlias, 'avg_') !== false - || strpos($statAlias, 'count') !== false - || strpos($statAlias, 'utilization') !== false - || strpos($statAlias, 'rate') !== false - || strpos($statAlias, 'expansion_factor') !== false); - - // Now perform the summarization, making use of the Query class query string, fetch: - // * the timeseries unit appropriate to the time aggregation level, - // * the actual count of values being summarized over (for normalizing averaging) - // * the averaged/min/max/summed data over the time aggregation unit. - $query_string = "SELECT t.$start_ts_column_name AS $start_ts_column_name, - $operation( t.$column_name ) AS $column_name " - . " FROM ( " - . $q->getQueryString() - . " ) t " - . " GROUP BY t.$start_ts_column_name"; - - $statement = DB::factory($q->_db_profile)->query( - $query_string, - array(), - true - ); - $statement->execute(); - - $columnTypes = array(); - - for ($end = $statement->columnCount(), $i = 0; $i < $end; $i++) { - $raw_meta = $statement->getColumnMeta($i); - $columnTypes[$raw_meta['name']] = $raw_meta; - } - - // accumulate the values in a temp variable, then set everything - // at once. - $dataValues = array(); - $dataStartTs = array(); - - while ( $row = $statement->fetch(\PDO::FETCH_ASSOC, \PDO::FETCH_ORI_NEXT)) { - - if ( - $start_ts_column_name != NULL - && !isset($row[$start_ts_column_name]) - ) { - throw new \Exception( - get_class($this). ":" . __FUNCTION__ ."()" - . " start_ts_column_name=$start_ts_column_name does not" - . " exist in the dataset." - ); - } - - $start_ts = $row[$start_ts_column_name]; - $dataStartTs[] = $start_ts; - - // populate the values - if ($values_column_name != NULL) { - if (!array_key_exists($values_column_name, $row)) { - throw new \Exception( - get_class($this) .":". __FUNCTION__ ."()" - . " values_column_name=$values_column_name does not" - . " exist in the dataset." - ); - } else { - - - // get the data value - $dataCurrentValue = $this->convertSQLtoPHP( - $row[$values_column_name], - $columnTypes[$values_column_name]['native_type'], - $columnTypes[$values_column_name]['precision'] - ); - - if ($useMean) { - $dataCurrentValue /= $normalizeBy; - } - - // stuff it onto the array - $dataValues[] = $dataCurrentValue; - - } // if (!array_key_exists($values_column_name, $row)) - } // if ($values_column_name != NULL) - } // while - - $dataObject->setValues( $dataValues ); - $dataObject->setStartTs( $dataStartTs); - - // Prevent drilldown from this summarized data series - // @refer html/gui/js/DrillDownMenu.js - // this.groupByIdParam < -9999 - $dataSummaryGroupVal = -99999; - $dataObject->setGroupId( $dataSummaryGroupVal ); - $dataObject->setGroupName( $dataSummaryGroupVal ); - - return $dataObject; - } // public function getSummarizedColumn -} diff --git a/classes/DataWarehouse/Data/TimeseriesDataset.php b/classes/DataWarehouse/Data/TimeseriesDataset.php new file mode 100644 index 0000000000..8b8d97350e --- /dev/null +++ b/classes/DataWarehouse/Data/TimeseriesDataset.php @@ -0,0 +1,377 @@ +query = $query; + $this->agg_query = $query->getAggregateQuery(); + + } + + /** + * Get the ordered list of data series identifiers based on the + * aggregate query. This is used to order the datasets that are returned + * from the timeseries query. + */ + protected function getSeriesIds($limit, $offset) + { + $statement = $this->agg_query->getRawStatement($limit, $offset); + $statement->execute(); + + $groupInstance = reset($this->agg_query->getGroupBys()); + $groupIdColumn = $groupInstance->getId() . '_id'; + + $seriesIds = array(); + + while($row = $statement->fetch(\PDO::FETCH_ASSOC, \PDO::FETCH_ORI_NEXT)) { + $seriesIds[] = "${row[$groupIdColumn]}"; + } + + return $seriesIds; + } + + /** + * Get the time-based and space-based groupby class instances from the underlying + * query class. Note this class only supports a single space-based group by + * class per query. + */ + protected function getGroupByClasses() + { + $timeGroup = null; + $spaceGroup = null; + + foreach ($this->query->getGroupBys() as $name => $groupBy) { + if ($name === $this->query->getAggregationUnit()->getUnitName()) { + $timeGroup = $groupBy; + } else { + $spaceGroup = $groupBy; + } + } + + return array($timeGroup, $spaceGroup); + } + + /** + * return an array of timeseries datasets. If the summarize flag is set true and there + * are more data series that the $limit then $limit + 1 datasets will be returned with + * the last one being the summarized version of the remainder. + */ + public function getDatasets($limit, $offset, $summarize) + { + $summaryDataset = null; + + list($timeGroup, $spaceGroup) = $this->getGroupByClasses(); + + $statObj = reset($this->query->getStats()); + $seriesIds = $this->getSeriesIds($limit, $offset); + + if (!empty($seriesIds)) { + if ($summarize && $limit < $this->getUniqueCount()) { + $summaryDataset = $this->getSummarizedColumn( + $statObj->getId(), + $spaceGroup->getId(), + $this->getUniqueCount() - $limit, + $seriesIds + ); + } + + $this->query->addWhereAndJoin($spaceGroup->getId(), 'IN', $seriesIds); + } + + $statement = $this->query->getRawStatement(); + $statement->execute(); + + $columnTypes = array(); + for ($end = $statement->columnCount(), $i = 0; $i < $end; $i++) { + $raw_meta = $statement->getColumnMeta($i); + $columnTypes[$raw_meta['name']] = $raw_meta; + } + + $dataSets = array(); + foreach ($seriesIds as $seriesId) { + $dataSets[$seriesId] = null; + } + + while($row = $statement->fetch(\PDO::FETCH_ASSOC, \PDO::FETCH_ORI_NEXT)) { + + $seriesId = $row[$spaceGroup->getId() . '_id']; + $dimension = $row[$spaceGroup->getId() . '_name']; + + $dataSet = $dataSets[$seriesId]; + if ($dataSet === null) { + $dataSet = $dataSets[$seriesId] = new SimpleTimeseriesData($dimension); + + $dataSet->setUnit($statObj->getName()); // <- check this is correct + $dataSet->setStatistic($statObj); + $dataSet->setGroupName($dimension); + $dataSet->setGroupId($row[$spaceGroup->getId() . '_id']); // <- check this is correct + } + + $value_col = $statObj->getId(); + + $start_ts = $row[$timeGroup->getId() . '_start_ts']; + $value = SimpleDataset::convertSQLtoPHP( + $row[$value_col], + $columnTypes[$value_col]['native_type'], + $columnTypes[$value_col]['precision'] + ); + + $error = null; + $semStatId = \Realm\Realm::getStandardErrorStatisticFromStatistic($value_col); + + if (isset($this->query->_stats[$semStatId])) { + $error = SimpleDataset::convertSQLtoPHP( + $row[$semStatId], + $columnTypes[$semStatId]['native_type'], + $columnTypes[$semStatId]['precision'] + ); + } + + $dataSet->addDatum($start_ts, $value, $error); + } + + $retVal = array_values($dataSets); + + if ($summaryDataset !== null) { + $retVal[] = $summaryDataset; + } + + return $retVal; + } + + /** + * The choice of summary algorithm is determined based on the alias name + * for the statistic. + */ + protected function getSummaryOp($column_name, $normalizeBy) + { + $series_name = "All $normalizeBy Others"; + $sql = "SUM(t.$column_name)"; + $type = 'sum'; + + if (strpos($column_name, 'min_') !== false) + { + $series_name = "Minimum over all $normalizeBy others"; + $sql = "MIN(t.$column_name)"; + $type = 'min'; + } + elseif (strpos($column_name, 'max_') !== false) + { + $series_name = "Maximum over all $normalizeBy others"; + $sql = "MAX(t.$column_name)"; + $type = 'max'; + } + elseif (strpos($column_name, 'avg_') !== false + || strpos($column_name, 'count') !== false + || strpos($column_name, 'utilization') !== false + || strpos($column_name, 'rate') !== false + || strpos($column_name, 'expansion_factor') !== false) + { + $series_name = "Avg of $normalizeBy Others"; + $sql = "SUM(t.$column_name)"; + $type = 'avg'; + } + + return array($sql, $series_name, $type); + } + + protected function getSummarizedColumn( + $column_name, + $where_name, + $normalizeBy, + array $whereExcludeArray + ) { + // determine the selected time aggregation unit + $aggunit_name = $this->query->getAggregationUnit()->getUnitName(); + + // assign column names for returned data: + $start_ts_column_name = $aggunit_name . '_start_ts'; + + $query = clone $this->query; + + // group on the where clause column, which will be enforced after time agg. unit + $query->addGroupBy($where_name); + $query->addWhereAndJoin($where_name, "NOT IN", $whereExcludeArray); + + list($sql, $series_name, $type) = $this->getSummaryOp($column_name, $normalizeBy); + + $dataObject = new \DataWarehouse\Data\SimpleTimeseriesData($series_name); + $dataObject->setStatistic($query->_stats[$column_name]); + $dataObject->setUnit($query->_stats[$column_name]->getName()); + $dataObject->setGroupId(self::SUMMARY_GROUP_ID); + $dataObject->setGroupName($series_name); + + $query_string = "SELECT t.$start_ts_column_name AS $start_ts_column_name, + $sql AS $column_name " + . " FROM ( " + . $query->getQueryString() + . " ) t " + . " GROUP BY t.$start_ts_column_name"; + + $statement = DB::factory($query->_db_profile)->query($query_string, array(), true); + $statement->execute(); + + $columnTypes = array(); + for ($end = $statement->columnCount(), $i = 0; $i < $end; $i++) { + $raw_meta = $statement->getColumnMeta($i); + $columnTypes[$raw_meta['name']] = $raw_meta; + } + + while ( $row = $statement->fetch(\PDO::FETCH_ASSOC, \PDO::FETCH_ORI_NEXT)) { + $start_ts = $row[$start_ts_column_name]; + $value = SimpleDataset::convertSQLtoPHP( + $row[$column_name], + $columnTypes[$column_name]['native_type'], + $columnTypes[$column_name]['precision'] + ); + if ($type === 'avg') { + $value = $value / $normalizeBy; + } + $dataObject->addDatum($start_ts, $value, null); + } + + return $dataObject; + } + + /** + * Build a SimpleTimeseriesData object containing the timeseries data. + */ + public function getTimestamps() + { + $dataStartTs = array(); + + foreach ($this->query->getTimestamps() as $raw_timetamp) { + $dataStartTs[] = $raw_timetamp['start_ts']; + } + + $column_name = $this->query->getAggregationUnit()->getUnitName(); + + $timestampsDataObject = new \DataWarehouse\Data\SimpleTimeseriesData( + $this->query->_group_bys[$column_name]->getId() + ); + + $timestampsDataObject->setStartTs($dataStartTs); + + return $timestampsDataObject; + } + + /** + * Returns the number of data series in this dataset. The count is determined from the + * aggregate version of the supplied timeseries query. + */ + public function getUniqueCount() + { + if ($this->series_count === null) { + $this->series_count = $this->agg_query->getCount(); + } + return $this->series_count; + } + + public function export($export_title = 'title') + { + $exportData = array( + 'title' => array( + 'title' => $export_title + ), + 'title2' => array( + 'parameters' => $this->query->roleParameterDescriptions + ), + 'duration' => array( + 'start' => $this->query->getStartDate(), + 'end' => $this->query->getEndDate(), + ), + 'headers' => array(), + 'rows' => array() + ); + + list($timeGroup, $spaceGroup) = $this->getGroupByClasses(); + + $exportData['headers'][] = $timeGroup->getName(); + + $stat = reset($this->query->getStats()); + $stat_unit = $stat->getUnit(); + + $seriesName = $stat->getName(); + if ( $seriesName != $stat_unit && strpos($seriesName, $stat_unit) === false) { + $seriesName .= ' (' . $stat_unit . ')'; + } + if (count($this->query->filterParameterDescriptions) > 0) { + $seriesName .= ' {' . implode(', ', $this->query->filterParameterDescriptions) . '}'; + } + + $dimensions = $this->getSeriesIds(null, null); + + $dimensionNames = array(); + $timeData = array(); + $timestamps = array(); + + $statement = $this->query->getRawStatement(); + $statement->execute(); + while($row = $statement->fetch(\PDO::FETCH_ASSOC, \PDO::FETCH_ORI_NEXT)) { + + $dimension = $row[$spaceGroup->getId() . '_id']; + + if (!isset($dimensionNames[$dimension])) { + $dimensionNames[$dimension] = $row[$spaceGroup->getId() . '_name']; + } + + $timeTs = $row[$timeGroup->getId() . '_start_ts']; + + if (!isset($timestamps[$timeTs]) ) { + $timestamps[$timeTs] = $row[$timeGroup->getId() . '_name']; + $timeData[$timeTs] = array(); + } + + $timeData[$timeTs][$dimension] = $row[$stat->getId()]; + } + + // Build header + foreach ($dimensions as $dimension) { + $exportData['headers'][] = "[{$dimensionNames[$dimension]}] $seriesName"; + } + + // Data are returned in time order, but every dimension may not have all timestamps + // so the timestamps array may not be in time order + ksort($timestamps); + + foreach ($timestamps as $timeTs => $timeName) { + $values = array($timeName); + + foreach ($dimensions as $dimension) { + if (isset($timeData[$timeTs][$dimension])) { + $values[] = $timeData[$timeTs][$dimension]; + } else { + $values[] = 0; + } + } + $exportData['rows'][] = $values; + } + + return $exportData; + } +} diff --git a/classes/DataWarehouse/Query/TimeseriesQuery.php b/classes/DataWarehouse/Query/TimeseriesQuery.php index e3767e5ea1..74e73ad190 100644 --- a/classes/DataWarehouse/Query/TimeseriesQuery.php +++ b/classes/DataWarehouse/Query/TimeseriesQuery.php @@ -94,6 +94,88 @@ public function __construct( $this->addGroupBy($aggregationUnitName); } + /** + * returns the equivalent aggregate query instance that has + * identical statistics, groupbys and where conditions. + */ + public function getAggregateQuery() + { + $agg_query = new \DataWarehouse\Query\AggregateQuery( + $this->getRealmName(), + $this->_aggregation_unit->getUnitName(), + $this->_start_date, + $this->_end_date + ); + + foreach ($this->_group_bys as $groupBy) { + if ($groupBy->getId() === $this->_aggregation_unit->getUnitName()) { + // skip the time-based groupby + continue; + } + $agg_query->addGroupBy($groupBy->getId()); + } + + foreach ($this->_stats as $stat_name => $stat) { + $agg_query->addStat($stat_name); + } + + if (isset($this->sortInfo)) { + foreach ($this->sortInfo as $sort) { + $agg_query->addOrderBy($sort['column_name'], $sort['direction']); + } + } + + $agg_query->cloneParameters($this); + + return $agg_query; + } + + /** + * This call does not change the sort order of the timeseries query + * itself, rather it is used to change the sort order of the associated + * aggregrate query that is returned by the getAggregateQuery() function. + */ + public function addOrderByAndSetSortInfo($data_description) + { + switch ($data_description->sort_type) { + case 'value_asc': + $this->sortInfo = array( + array( + 'column_name' => $data_description->metric, + 'direction' => 'asc' + ) + ); + break; + + case 'value_desc': + $this->sortInfo = array( + array( + 'column_name' => $data_description->metric, + 'direction' => 'desc' + ) + ); + break; + + case 'label_asc': + $this->sortInfo = array( + array( + 'column_name' => $data_description->group_by, + 'direction' => 'asc' + ) + ); + break; + + case 'label_desc': + $this->sortInfo = array( + array( + 'column_name' => $data_description->group_by, + 'direction' => 'desc' + ) + ); + break; + } + } + protected function setDuration( $start_date, $end_date diff --git a/classes/DataWarehouse/Visualization/HighChartTimeseries2.php b/classes/DataWarehouse/Visualization/HighChartTimeseries2.php index bfee47a01b..ba36217f31 100644 --- a/classes/DataWarehouse/Visualization/HighChartTimeseries2.php +++ b/classes/DataWarehouse/Visualization/HighChartTimeseries2.php @@ -2,6 +2,7 @@ namespace DataWarehouse\Visualization; use DataWarehouse; +use DataWarehouse\Data\TimeseriesDataset; /* * @@ -82,43 +83,6 @@ private function useMean($stat) { return $retVal; } - //------------------------------------------------- - // getDataname( $stat, $remainder_count ) - // - // Based on inspection of the Statistic Alias for the dataset, - // what operation should be reported for the remainder set? - // - // @param is $dataObj->getStatistic()->getAlias(); or $data_description->metric - // @param is number of full datasets to represent in chart - //------------------------------------------------- - private function getDataname($stat, $remainder_count) { - - $useMean = $this->useMean($stat ); - - $isMin = strpos($stat, 'min_') !== false ; - $isMax = strpos($stat, 'max_') !== false ; - - // Determine label for the summary dataset - $dataname - = ($useMean ? 'Avg of ' : 'All ') - . $remainder_count - . ' Others'; - - if ($isMin) { - $dataname - = 'Minimum over all ' - . $remainder_count - . ' others'; - } elseif ($isMax) { - $dataname - = 'Maximum over all ' - . $remainder_count - . ' others'; - } // if $isMin - - return $dataname; - } - // --------------------------------------------------------- // configure() // @@ -247,14 +211,6 @@ public function configure( $query->setFilters($data_description->filters); - // TODO JMS: special case for Timeseries. Handle after init() - if($data_description->filters->total === 1 && $data_description->group_by === 'none') - { - $data_description->group_by = $data_description->filters->data[0]->dimension_id; - } - // end TODO JMS: special case for Timeseries. Handle after init() - - // @refer ComplexDataset::init() $dataSources[$query->getDataSource()] = 1; $group_by = $query->addGroupBy($data_description->group_by); $this->_chart['dimensions'][$group_by->getName()] = $group_by->getHtmlDescription(); @@ -279,24 +235,14 @@ public function configure( $query->addOrderByAndSetSortInfo($data_description); - // JMS: - // and here we instantiate the dataset. - // Note that while this var name is a ComplexDataset in the parent class - // the item it contains is a Simple*Dataset. - // so when we iterate over datasets in parent class we are fiddling with SImple*Datsets - - $dataset = new \DataWarehouse\Data\SimpleTimeseriesDataset($query); - - // JMS: to here we have the ComplexDataset::init() function - $statisticObject = $query->_stats[$data_description->metric]; $decimals = $statisticObject->getPrecision(); $defaultYAxisLabel = 'yAxis'.$yAxisIndex; $yAxisLabel = ($data_description->combine_type=='percent'? '% of ':'').( ($this->_hasLegend && $dataSeriesCount > 1) - ? $dataset->getColumnUnit($data_description->metric, false) - : $dataset->getColumnLabel($data_description->metric, false) + ? $statisticObject->getUnit() + : $statisticObject->getName() ); if($this->_shareYAxis) { @@ -370,6 +316,8 @@ public function configure( $this->_chart['yAxis'][] = $yAxis; } // if($yAxis == null) + $dataset = new TimeseriesDataset($query); + $xAxisData = $dataset->getTimestamps(); $start_ts_array = array(); @@ -454,71 +402,13 @@ public function configure( $semDecimals = $semStatisticObject->getPrecision(); } - // get the full dataset count: how many unique values in the dimension we group by? - $datagroupFullCount = $dataset->getUniqueCount( - $data_description->group_by, - $data_description->realm - ); - $this->_total = max($this->_total, $datagroupFullCount); - - // If summarizeDataseries (Usage), use $limit to minimize the number of sorted - // queries we must execute. If ME, use the $limit (default 10) to enable paging - // thru dataset. - - // Query for the top $limit categories in the realm, over the selected time period - $datagroupDataObject = $dataset->getColumnUniqueOrdered( - 'dim_'.$data_description->group_by, - $limit, - $offset, - $data_description->realm - ); - - // Use the top $limit categories to build an iterator with $limit objects inside: - $yAxisDataObjectsIterator = $dataset->getColumnIteratorBy( - 'met_'.$data_description->metric, - $datagroupDataObject - ); - - // --- Set up dataset truncation for Usage tab support: ---- - // Populate an array with our iterator contents, but only up to $limit. - // (implicitly) run the queries and populate the array, but only up to $limit: - // that is taken care of by the limit on datagroupDataObject above. - $yAxisDataObjectsArray = array(); - foreach($yAxisDataObjectsIterator as $yAxisDataObject) - { - $yAxisDataObjectsArray[] = $yAxisDataObject; - } - - $dataTruncated = false; - if ( $summarizeDataseries && $datagroupFullCount > $limit ) - { - // Run one more query containing everything that was NOT in the top n. - // Populate SimpleTimeseriesData object yAxisTruncateObj with this - // summary information - $yAxisTruncateObj = $dataset->getSummarizedColumn( - $data_description->metric, - $data_description->group_by, - $this->_total - $limit, - $yAxisDataObjectsIterator->getLimitIds(), - $data_description->realm - ); + $this->_total = max($this->_total, $dataset->getUniqueCount()); - // set the remainder dataset label for plotting - $dataname = $this->getDataname($data_description->metric, $datagroupFullCount - $limit ); - $yAxisTruncateObj->setGroupName($dataname ); - // set label on the dataseries' legend - $yAxisTruncateObj->setName($dataname ); - - $yAxisDataObjectsArray[] = $yAxisTruncateObj; - $dataTruncated = true; - } - unset($yAxisDataObjectsIterator); + $yAxisDataObjectsArray = $dataset->getDatasets($limit, $offset, $summarizeDataseries); // operate on each yAxisDataObject, a SimpleTimeseriesData object // @refer HighChart2 line 866 - $numYAxisDataObjects = count($yAxisDataObjectsArray); - foreach($yAxisDataObjectsArray as $yIndex => $yAxisDataObject) { if( $yAxisDataObject != null) @@ -574,7 +464,7 @@ public function configure( ($values_count < 21 && $this->_width > \DataWarehouse\Visualization::$thumbnail_width) || $y_values_count == 1; - $isRemainder = $dataTruncated && ($yIndex === $numYAxisDataObjects - 1); + $isRemainder = $yAxisDataObject->getGroupId() === TimeseriesDataset::SUMMARY_GROUP_ID; $filterParametersTitle = $data_description->long_legend == 1?$query->getFilterParametersTitle():''; if($filterParametersTitle != '')