From ef1f6c87a88cf1819c68c1c8957ba657d49057e3 Mon Sep 17 00:00:00 2001 From: Aleksandr Solovev Date: Fri, 22 Dec 2023 16:35:11 +0100 Subject: [PATCH] feature: adding new parameters in pca (#2601) --- cpp/daal/include/algorithms/pca/pca_types.h | 2 + cpp/daal/include/services/error_indexes.h | 2 + .../src/algorithms/pca/pca_baseparameter.cpp | 2 +- cpp/daal/src/algorithms/pca/pca_dense_base.h | 3 + .../algorithms/pca/pca_dense_base_fpt_cpu.cpp | 2 +- .../src/algorithms/pca/pca_dense_base_impl.i | 33 +++ .../pca/pca_dense_correlation_base.h | 3 + .../pca/pca_dense_correlation_base_iface.h | 5 +- .../pca/pca_dense_correlation_base_impl.i | 48 +++- .../pca/pca_dense_correlation_batch_impl.i | 30 ++- .../pca/pca_dense_correlation_batch_kernel.h | 17 +- .../pca/pca_dense_correlation_online_kernel.h | 6 + .../src/algorithms/pca/pca_dense_svd_base.h | 26 +++ .../algorithms/pca/pca_dense_svd_batch_impl.i | 28 ++- .../pca/pca_dense_svd_batch_kernel.h | 8 + .../pca/pca_dense_svd_online_kernel.h | 4 + .../backend/cpu/finalize_train_kernel_cov.cpp | 67 +++++- .../backend/cpu/finalize_train_kernel_svd.cpp | 39 +++- .../dal/algo/pca/backend/cpu/infer_kernel.cpp | 65 +++++- .../backend/cpu/partial_train_kernel_cov.cpp | 4 + .../backend/cpu/partial_train_kernel_svd.cpp | 6 +- .../algo/pca/backend/cpu/train_kernel_cov.cpp | 51 +++- .../backend/cpu/train_kernel_precomputed.cpp | 41 ++-- .../algo/pca/backend/cpu/train_kernel_svd.cpp | 74 ++++-- .../gpu/finalize_train_kernel_cov_dpc.cpp | 221 +++++++++++++++--- .../algo/pca/backend/gpu/infer_kernel_dpc.cpp | 145 ++++++++++-- .../backend/gpu/train_kernel_cov_impl_dpc.cpp | 128 +++++++--- .../gpu/train_kernel_precomputed_impl_dpc.cpp | 3 +- cpp/oneapi/dal/algo/pca/common.cpp | 80 ++++++- cpp/oneapi/dal/algo/pca/common.hpp | 77 +++++- cpp/oneapi/dal/algo/pca/test/batch.cpp | 32 ++- cpp/oneapi/dal/algo/pca/test/fixture.hpp | 4 +- .../dal/algo/pca/test/serialization.cpp | 3 +- cpp/oneapi/dal/algo/pca/train_types.cpp | 57 ++++- cpp/oneapi/dal/algo/pca/train_types.hpp | 24 ++ .../api/algorithms/decomposition/pca.rst | 4 + .../pca/principal-component-analysis.rst | 2 +- .../includes/decomposition/pca-examples.rst | 2 +- ...ense_batch.cpp => pca_cor_dense_batch.cpp} | 7 +- ...se_online.cpp => pca_cor_dense_online.cpp} | 9 +- .../cpp/source/pca/pca_cov_dense_batch.cpp | 62 +++++ .../cpp/source/pca/pca_cov_dense_online.cpp | 70 ++++++ .../pca/pca_precomputed_dense_batch.cpp | 8 +- .../cpp/source/pca/pca_svd_dense_batch.cpp | 50 ++++ .../cpp/source/pca/pca_svd_dense_online.cpp | 54 +++++ examples/oneapi/data/pca_non_normalized.csv | 100 ++++++++ .../dpc/source/pca/pca_cor_dense_batch.cpp | 30 ++- .../dpc/source/pca/pca_cor_dense_online.cpp | 36 ++- .../dpc/source/pca/pca_cov_dense_batch.cpp | 74 ++++++ .../dpc/source/pca/pca_cov_dense_online.cpp | 81 +++++++ .../pca/pca_precomputed_cor_dense_batch.cpp | 8 +- .../pca/pca_precomputed_cov_dense_batch.cpp | 8 +- 52 files changed, 1720 insertions(+), 225 deletions(-) rename examples/oneapi/cpp/source/pca/{pca_dense_batch.cpp => pca_cor_dense_batch.cpp} (87%) rename examples/oneapi/cpp/source/pca/{pca_dense_online.cpp => pca_cor_dense_online.cpp} (87%) create mode 100644 examples/oneapi/cpp/source/pca/pca_cov_dense_batch.cpp create mode 100644 examples/oneapi/cpp/source/pca/pca_cov_dense_online.cpp create mode 100644 examples/oneapi/cpp/source/pca/pca_svd_dense_batch.cpp create mode 100644 examples/oneapi/cpp/source/pca/pca_svd_dense_online.cpp create mode 100644 examples/oneapi/data/pca_non_normalized.csv create mode 100644 examples/oneapi/dpc/source/pca/pca_cov_dense_batch.cpp create mode 100644 examples/oneapi/dpc/source/pca/pca_cov_dense_online.cpp diff --git a/cpp/daal/include/algorithms/pca/pca_types.h b/cpp/daal/include/algorithms/pca/pca_types.h index 8816cdf5505..8c8472b140b 100644 --- a/cpp/daal/include/algorithms/pca/pca_types.h +++ b/cpp/daal/include/algorithms/pca/pca_types.h @@ -672,6 +672,8 @@ class DAAL_EXPORT BaseBatchParameter : public daal::algorithms::Parameter DAAL_UINT64 resultsToCompute; /*!< 64 bit integer flag that indicates the results to compute */ size_t nComponents; /*!< number of components for reduced implementation */ bool isDeterministic; /*!< sign flip if required */ + bool doScale; /*!< scaling if required */ + bool isCorrelation; /*!< correlation is provided */ }; /** diff --git a/cpp/daal/include/services/error_indexes.h b/cpp/daal/include/services/error_indexes.h index 9c9c634cd08..1d5fa5124ce 100644 --- a/cpp/daal/include/services/error_indexes.h +++ b/cpp/daal/include/services/error_indexes.h @@ -267,6 +267,8 @@ enum ErrorID computation modes */ ErrorIncorrectNComponents = -7805, /*!< Incorrect nComponents parameter: nComponents should be less or equal to number of columns in testing dataset */ + ErrorIncorrectEigenValuesSum = -7806, /*!< The sum of eigenvalues is less or equal to zero */ + ErrorIncorrectSingularValuesDenominator = -7807, /*!< The denominator of eigenvalues is less or equal to zero */ // QR errors: -8000..-8199 ErrorQRInternal = -8000, /*!< QR internal error */ diff --git a/cpp/daal/src/algorithms/pca/pca_baseparameter.cpp b/cpp/daal/src/algorithms/pca/pca_baseparameter.cpp index e174f1b3cf9..05566fb00c2 100644 --- a/cpp/daal/src/algorithms/pca/pca_baseparameter.cpp +++ b/cpp/daal/src/algorithms/pca/pca_baseparameter.cpp @@ -31,7 +31,7 @@ namespace pca { namespace interface3 { -BaseBatchParameter::BaseBatchParameter() : resultsToCompute(none), nComponents(0), isDeterministic(false) {} +BaseBatchParameter::BaseBatchParameter() : resultsToCompute(none), nComponents(0), isDeterministic(false), doScale(true), isCorrelation(false) {} } // namespace interface3 } // namespace pca } // namespace algorithms diff --git a/cpp/daal/src/algorithms/pca/pca_dense_base.h b/cpp/daal/src/algorithms/pca/pca_dense_base.h index 4133b410e55..e7a4443f1e7 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_base.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_base.h @@ -42,6 +42,9 @@ class PCADenseBase : public Kernel services::Status signFlipEigenvectors(NumericTable & eigenvectors) const; services::Status fillTable(NumericTable & table, algorithmFPType val) const; services::Status copyTable(NumericTable & source, NumericTable & dest) const; + services::Status computeExplainedVariancesRatio(const data_management::NumericTable & eigenvalues, + const data_management::NumericTable & variances, + data_management::NumericTable & explained_variances_ratio); private: void signFlipArray(size_t size, algorithmFPType * source) const; diff --git a/cpp/daal/src/algorithms/pca/pca_dense_base_fpt_cpu.cpp b/cpp/daal/src/algorithms/pca/pca_dense_base_fpt_cpu.cpp index 8954d2b77bd..921115764e1 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_base_fpt_cpu.cpp +++ b/cpp/daal/src/algorithms/pca/pca_dense_base_fpt_cpu.cpp @@ -25,7 +25,7 @@ namespace pca { namespace internal { -template class PCADenseBase; +template class DAAL_EXPORT PCADenseBase; } // namespace internal } // namespace pca } // namespace algorithms diff --git a/cpp/daal/src/algorithms/pca/pca_dense_base_impl.i b/cpp/daal/src/algorithms/pca/pca_dense_base_impl.i index 54424fd14e8..1876d4ba655 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_base_impl.i +++ b/cpp/daal/src/algorithms/pca/pca_dense_base_impl.i @@ -27,6 +27,39 @@ namespace internal { using namespace daal::internal; +template +services::Status PCADenseBase::computeExplainedVariancesRatio(const data_management::NumericTable & eigenvalues, + const data_management::NumericTable & variances, + data_management::NumericTable & explained_variances_ratio) +{ + const size_t nComponents = eigenvalues.getNumberOfColumns(); + const size_t nColumns = variances.getNumberOfColumns(); + + ReadRows eigenValuesBlock(const_cast(eigenvalues), 0, 1); + DAAL_CHECK_BLOCK_STATUS(eigenValuesBlock); + const algorithmFPType * const eigenValuesArray = eigenValuesBlock.get(); + ReadRows variancesBlock(const_cast(variances), 0, 1); + DAAL_CHECK_BLOCK_STATUS(variancesBlock); + const algorithmFPType * const variancesBlockArray = variancesBlock.get(); + WriteRows explainedVariancesRatioBlock(explained_variances_ratio, 0, 1); + DAAL_CHECK_MALLOC(explainedVariancesRatioBlock.get()); + algorithmFPType * explainedVariancesRatioArray = explainedVariancesRatioBlock.get(); + algorithmFPType sum = 0; + for (size_t i = 0; i < nColumns; i++) + { + sum += variancesBlockArray[i]; + } + if (sum <= algorithmFPType(0)) + { + return services::Status(services::ErrorIncorrectEigenValuesSum); + } + for (size_t i = 0; i < nComponents; i++) + { + explainedVariancesRatioArray[i] = eigenValuesArray[i] / sum; + } + return services::Status(); +} + template services::Status PCADenseBase::copyTable(NumericTable & source, NumericTable & dest) const { diff --git a/cpp/daal/src/algorithms/pca/pca_dense_correlation_base.h b/cpp/daal/src/algorithms/pca/pca_dense_correlation_base.h index 11890ea1698..271162199b3 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_correlation_base.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_correlation_base.h @@ -47,6 +47,9 @@ class PCACorrelationBase : public PCACorrelationBaseIface, publ data_management::NumericTable & eigenvalues) DAAL_C11_OVERRIDE; services::Status computeEigenvectorsInplace(size_t nFeatures, algorithmFPType * eigenvectors, algorithmFPType * eigenvalues); services::Status sortEigenvectorsDescending(size_t nFeatures, algorithmFPType * eigenvectors, algorithmFPType * eigenvalues); + services::Status computeSingularValues(const data_management::NumericTable & eigenvalues, data_management::NumericTable & variances, + size_t nRows); + services::Status computeVariancesFromCov(const data_management::NumericTable & correlation, data_management::NumericTable & variances); services::Status signFlipEigenvectors(NumericTable & eigenvectors) const DAAL_C11_OVERRIDE; services::Status fillTable(NumericTable & table, algorithmFPType val) const DAAL_C11_OVERRIDE; services::Status copyVarianceFromCovarianceTable(NumericTable & source, NumericTable & dest) const; diff --git a/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_iface.h b/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_iface.h index 0299cdb3ba6..f9a8294bf0d 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_iface.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_iface.h @@ -41,9 +41,8 @@ class PCACorrelationBaseIface virtual services::Status computeCorrelationEigenvalues(const data_management::NumericTable & correlation, data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues) = 0; - - virtual services::Status signFlipEigenvectors(NumericTable & eigenvectors) const = 0; - virtual services::Status fillTable(NumericTable & table, algorithmFPType val) const = 0; + virtual services::Status signFlipEigenvectors(NumericTable & eigenvectors) const = 0; + virtual services::Status fillTable(NumericTable & table, algorithmFPType val) const = 0; }; } // namespace internal diff --git a/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_impl.i b/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_impl.i index e409e130451..e62deb2f96b 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_impl.i +++ b/cpp/daal/src/algorithms/pca/pca_dense_correlation_base_impl.i @@ -53,6 +53,27 @@ void PCACorrelationBase::copyArray(size_t size, const algo } } +template +services::Status PCACorrelationBase::computeVariancesFromCov(const data_management::NumericTable & covariance, + data_management::NumericTable & variances) +{ + size_t nFeatures = covariance.getNumberOfRows(); + DAAL_OVERFLOW_CHECK_BY_MULTIPLICATION(size_t, nFeatures, sizeof(algorithmFPType)); + ReadRows covarianceBlock(const_cast(covariance), 0, nFeatures); + DAAL_CHECK_BLOCK_STATUS(covarianceBlock); + const algorithmFPType * covarianceArray = covarianceBlock.get(); + + WriteRows variancesBlock(variances, 0, 1); + DAAL_CHECK_MALLOC(variancesBlock.get()); + algorithmFPType * variancesArray = variancesBlock.get(); + + for (size_t i = 0ul; i < nFeatures; ++i) + { + variancesArray[i] = covarianceArray[i * nFeatures + i]; + } + return services::Status(); +} + template services::Status PCACorrelationBase::correlationFromCovarianceTable(NumericTable & covariance) const { @@ -67,12 +88,12 @@ services::Status PCACorrelationBase::correlationFromCovari algorithmFPType * covarianceArray = covarianceBlock.get(); algorithmFPType * diagInvSqrts = diagInvSqrtsArray.get(); - for (size_t i = 0; i < nFeatures; i++) + for (size_t i = 0ul; i < nFeatures; ++i) { diagInvSqrts[i] = 1.0 / daal::internal::MathInst::sSqrt(covarianceArray[i * nFeatures + i]); } - for (size_t i = 0; i < nFeatures; i++) + for (size_t i = 0ul; i < nFeatures; ++i) { for (size_t j = 0; j < i; j++) { @@ -82,7 +103,7 @@ services::Status PCACorrelationBase::correlationFromCovari } /* Copy results into symmetric upper triangle */ - for (size_t i = 0; i < nFeatures; i++) + for (size_t i = 0ul; i < nFeatures; ++i) { for (size_t j = 0; j < i; j++) { @@ -113,6 +134,27 @@ services::Status PCACorrelationBase::copyVarianceFromCovar return services::Status(); } +template +services::Status PCACorrelationBase::computeSingularValues(const data_management::NumericTable & eigenvalues, + data_management::NumericTable & singular_values, size_t nRows) +{ + typedef daal::internal::MathInst Math; + const size_t nComponents = eigenvalues.getNumberOfColumns(); + ReadRows eigenValuesBlock(const_cast(eigenvalues), 0, 1); + DAAL_CHECK_BLOCK_STATUS(eigenValuesBlock); + const algorithmFPType * const eigenValuesArray = eigenValuesBlock.get(); + WriteRows singularValuesBlock(singular_values, 0, 1); + DAAL_CHECK_MALLOC(singularValuesBlock.get()); + algorithmFPType * singularValuesArray = singularValuesBlock.get(); + const algorithmFPType factor = nRows - 1; + for (size_t i = 0ul; i < nComponents; ++i) + { + singularValuesArray[i] = factor * eigenValuesArray[i]; + } + Math::vSqrt(nComponents, singularValuesArray, singularValuesArray); + return services::Status(); +} + template services::Status PCACorrelationBase::computeCorrelationEigenvalues(const data_management::NumericTable & correlation, data_management::NumericTable & eigenvectors, diff --git a/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_impl.i b/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_impl.i index 0c2f430dcba..abd78acfc47 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_impl.i +++ b/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_impl.i @@ -43,6 +43,7 @@ namespace internal using namespace daal::services::internal; using namespace daal::data_management; using namespace daal::internal; + template services::Status PCACorrelationKernel::compute(bool isCorrelation, const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, @@ -62,7 +63,7 @@ template services::Status PCACorrelationKernel::compute( bool isCorrelation, bool isDeterministic, const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, DAAL_UINT64 resultsToCompute, data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues, - data_management::NumericTable & means, data_management::NumericTable & variances) + data_management::NumericTable & means, data_management::NumericTable & variances, bool doScale) { DAAL_ITTNOTIFY_SCOPED_TASK(compute); @@ -108,7 +109,7 @@ services::Status PCACorrelationKernel::compute( DAAL_ITTNOTIFY_SCOPED_TASK(compute.full.copyVariances); DAAL_CHECK_STATUS(status, this->copyVarianceFromCovarianceTable(covarianceTable, variances)); } - + if (doScale) { DAAL_ITTNOTIFY_SCOPED_TASK(compute.full.correlationFromCovariance); DAAL_CHECK_STATUS(status, this->correlationFromCovarianceTable(covarianceTable)); @@ -129,6 +130,31 @@ services::Status PCACorrelationKernel::compute( return status; } +template +services::Status PCACorrelationKernel::compute( + const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, data_management::NumericTable & eigenvectors, + data_management::NumericTable & eigenvalues, data_management::NumericTable & means, data_management::NumericTable & variances, + data_management::NumericTable * singular_values, data_management::NumericTable * explained_variances_ratio, const BaseBatchParameter * parameter) +{ + DAAL_ITTNOTIFY_SCOPED_TASK(compute); + + services::Status status; + this->compute(parameter->isCorrelation, parameter->isDeterministic, dataTable, covarianceAlg, parameter->resultsToCompute, eigenvectors, + eigenvalues, means, variances, parameter->doScale); + + if (singular_values != nullptr) + { + DAAL_ITTNOTIFY_SCOPED_TASK(compute.correlation.computeSingularValues); + DAAL_CHECK_STATUS(status, this->computeSingularValues(eigenvalues, *singular_values, dataTable.getNumberOfRows())); + } + if (explained_variances_ratio != nullptr) + { + DAAL_ITTNOTIFY_SCOPED_TASK(compute.correlation.computeExplainedVariancesRatio); + DAAL_CHECK_STATUS(status, this->computeExplainedVariancesRatio(eigenvalues, variances, *explained_variances_ratio)); + } + return status; +} + } // namespace internal } // namespace pca } // namespace algorithms diff --git a/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_kernel.h b/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_kernel.h index 9f5d4c5f2b1..9a5b26c1be0 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_kernel.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_correlation_batch_kernel.h @@ -42,13 +42,28 @@ class PCACorrelationKernel : public PCACorrelationB { public: explicit PCACorrelationKernel() {}; + + using PCACorrelationBase::computeCorrelationEigenvalues; + + using PCACorrelationBase::computeSingularValues; + + using PCACorrelationBase::computeVariancesFromCov; + + using PCADenseBase::computeExplainedVariancesRatio; + services::Status compute(bool isCorrelation, const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues); services::Status compute(bool isCorrelation, bool isDeterministic, const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, DAAL_UINT64 resultsToCompute, data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues, data_management::NumericTable & means, - data_management::NumericTable & variances); + data_management::NumericTable & variances, bool doScale = true); + + services::Status compute(const data_management::NumericTable & dataTable, covariance::BatchImpl * covarianceAlg, + data_management::NumericTable & eigenvectors, data_management::NumericTable & eigenvalues, + data_management::NumericTable & means, data_management::NumericTable & variances, + data_management::NumericTable * singular_values, data_management::NumericTable * explained_variances_ratio, + const BaseBatchParameter * parameter); }; } // namespace internal diff --git a/cpp/daal/src/algorithms/pca/pca_dense_correlation_online_kernel.h b/cpp/daal/src/algorithms/pca/pca_dense_correlation_online_kernel.h index 6bce44890ab..aa40f64010f 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_correlation_online_kernel.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_correlation_online_kernel.h @@ -45,6 +45,12 @@ class PCACorrelationKernel : public PCACorrelation using PCACorrelationBase::computeCorrelationEigenvalues; + using PCACorrelationBase::computeSingularValues; + + using PCACorrelationBase::computeVariancesFromCov; + + using PCADenseBase::computeExplainedVariancesRatio; + services::Status compute(const data_management::NumericTablePtr & pData, PartialResult * partialResult, const OnlineParameter * parameter); diff --git a/cpp/daal/src/algorithms/pca/pca_dense_svd_base.h b/cpp/daal/src/algorithms/pca/pca_dense_svd_base.h index db713564cad..04c540c47eb 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_svd_base.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_svd_base.h @@ -35,6 +35,10 @@ namespace pca { namespace internal { +using namespace daal::services::internal; +using namespace daal::data_management; +using namespace daal::internal; + enum InputDataType { nonNormalizedDataset = 0, /*!< Original, non-normalized data set */ @@ -47,12 +51,34 @@ class PCASVDKernelBase : public PCADenseBase { public: PCASVDKernelBase() {} + using PCADenseBase::computeExplainedVariancesRatio; virtual ~PCASVDKernelBase() {} protected: + services::Status computeEigenValues(const data_management::NumericTable & eigenvalues, data_management::NumericTable & singular_values, + size_t nRows); services::Status scaleSingularValues(data_management::NumericTable & eigenvaluesTable, size_t nVectors); }; +template +services::Status PCASVDKernelBase::computeEigenValues(const data_management::NumericTable & singular_values, + data_management::NumericTable & eigenvalues, size_t nRows) +{ + const size_t nComponents = singular_values.getNumberOfColumns(); + ReadRows SingularValuesBlock(const_cast(singular_values), 0, 1); + DAAL_CHECK_BLOCK_STATUS(SingularValuesBlock); + const algorithmFPType * const SingularValuesArray = SingularValuesBlock.get(); + WriteRows EigenValuesBlock(eigenvalues, 0, 1); + DAAL_CHECK_MALLOC(EigenValuesBlock.get()); + algorithmFPType * EigenValuesArray = EigenValuesBlock.get(); + if ((nRows - 1) <= 0) return services::Status(services::ErrorIncorrectSingularValuesDenominator); + for (size_t i = 0; i < nComponents; i++) + { + EigenValuesArray[i] = SingularValuesArray[i] * SingularValuesArray[i] / (nRows - 1); + } + return services::Status(); +} + template services::Status PCASVDKernelBase::scaleSingularValues(NumericTable & eigenvaluesTable, size_t nVectors) { diff --git a/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_impl.i b/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_impl.i index e9bffbfc841..352dd0b229c 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_impl.i +++ b/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_impl.i @@ -102,7 +102,10 @@ services::Status PCASVDBatchKernel::compute } DAAL_CHECK_STATUS(status, this->decompose(normalizedData, eigenvalues, eigenvectors)); - DAAL_CHECK_STATUS(status, this->scaleSingularValues(eigenvalues, data.getNumberOfRows())); + if (parameter->doScale) + { + DAAL_CHECK_STATUS(status, this->scaleSingularValues(eigenvalues, data.getNumberOfRows())); + } if (parameter->isDeterministic) { DAAL_CHECK_STATUS(status, this->signFlipEigenvectors(eigenvectors)); @@ -111,6 +114,29 @@ services::Status PCASVDBatchKernel::compute return status; } +template +services::Status PCASVDBatchKernel::compute( + InputDataType type, data_management::NumericTable & data, data_management::NumericTable & eigenvectors, + data_management::NumericTable & singular_values, data_management::NumericTable & means, data_management::NumericTable & variances, + data_management::NumericTable * eigenvalues, data_management::NumericTable * explained_variances_ratio, const ParameterType * parameter) +{ + Status status; + this->compute(type, data, parameter, singular_values, eigenvectors, means, variances); + if (eigenvalues != nullptr && parameter->doScale == false) + { + DAAL_CHECK_STATUS(status, this->computeEigenValues(singular_values, *eigenvalues, data.getNumberOfRows())); + } + else + { + DAAL_CHECK_STATUS(status, this->copyTable(singular_values, *eigenvalues)); + } + if (explained_variances_ratio != nullptr) + { + DAAL_CHECK_STATUS(status, this->computeExplainedVariancesRatio(*eigenvalues, variances, *explained_variances_ratio)); + } + return status; +} + /********************* tls_data_t class *******************************************************/ template struct tls_data_t diff --git a/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_kernel.h b/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_kernel.h index 9b353af6d92..0e0a639d986 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_kernel.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_svd_batch_kernel.h @@ -43,6 +43,9 @@ class PCASVDBatchKernel : public PCASVDKernelBase { public: PCASVDBatchKernel() {}; + using PCASVDKernelBase::computeEigenValues; + + using PCADenseBase::computeExplainedVariancesRatio; services::Status compute(InputDataType type, const data_management::NumericTablePtr & data, data_management::NumericTable & eigenvalues, data_management::NumericTable & eigenvectors); @@ -51,6 +54,11 @@ class PCASVDBatchKernel : public PCASVDKernelBase data_management::NumericTable & eigenvalues, data_management::NumericTable & eigenvectors, data_management::NumericTable & means, data_management::NumericTable & variances); + services::Status compute(InputDataType type, data_management::NumericTable & data, data_management::NumericTable & eigenvectors, + data_management::NumericTable & singular_values, data_management::NumericTable & means, + data_management::NumericTable & variances, data_management::NumericTable * eigenvalues, + data_management::NumericTable * explained_variances_ratio, const ParameterType * parameter); + protected: services::Status normalizeDataset(const data_management::NumericTablePtr & data, data_management::NumericTablePtr & normalizedData); diff --git a/cpp/daal/src/algorithms/pca/pca_dense_svd_online_kernel.h b/cpp/daal/src/algorithms/pca/pca_dense_svd_online_kernel.h index af43d8db012..712f5c48515 100644 --- a/cpp/daal/src/algorithms/pca/pca_dense_svd_online_kernel.h +++ b/cpp/daal/src/algorithms/pca/pca_dense_svd_online_kernel.h @@ -44,6 +44,10 @@ class PCASVDOnlineKernel : public PCASVDKernelBase public: PCASVDOnlineKernel() {} + using PCASVDKernelBase::computeEigenValues; + + using PCASVDKernelBase::computeExplainedVariancesRatio; + services::Status compute(InputDataType type, const data_management::NumericTablePtr & data, data_management::NumericTable & nObservations, data_management::NumericTable & auxiliaryTable, data_management::NumericTable & sumSVD, data_management::NumericTable & sumSquaresSVD); diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp index d13c70e5626..b72836d655e 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp @@ -87,9 +87,20 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx auto arr_cor_matrix = array::empty(column_count * column_count); const auto daal_cor_matrix = interop::convert_to_daal_homogen_table(arr_cor_matrix, column_count, column_count); + auto arr_singular_values = array::empty(1 * component_count); + const auto daal_singular_values = + interop::convert_to_daal_homogen_table(arr_singular_values, 1, component_count); + auto arr_vars = array::empty(1 * column_count); + const auto daal_variances = interop::convert_to_daal_homogen_table(arr_vars, 1, column_count); + auto arr_explained_variances_ratio = array::empty(1 * component_count); + const auto daal_explained_variances_ratio = + interop::convert_to_daal_homogen_table(arr_explained_variances_ratio, 1, column_count); daal_cov::Parameter daal_parameter; daal_parameter.outputMatrixType = daal_cov::correlationMatrix; + if (desc.get_normalization_mode() == normalization::mean_center) { + daal_parameter.outputMatrixType = daal_cov::covarianceMatrix; + } interop::status_to_exception( interop::call_daal_kernel_finalize_compute( ctx, @@ -101,12 +112,11 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx &daal_parameter, &daal_hyperparameter)); - const auto data_to_compute = daal_cor_matrix; { const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { constexpr auto cpu_type = interop::to_daal_cpu_type::value; return daal_pca_cor_kernel_t().computeCorrelationEigenvalues( - *data_to_compute, + *daal_cor_matrix, *daal_eigenvectors, *daal_eigenvalues); }); @@ -114,15 +124,62 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx interop::status_to_exception(status); } + { + const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + constexpr auto cpu_type = interop::to_daal_cpu_type::value; + return daal_pca_cor_kernel_t().computeSingularValues( + *daal_eigenvalues, + *daal_singular_values, + rows_count_global); + }); + + interop::status_to_exception(status); + } + + { + const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + constexpr auto cpu_type = interop::to_daal_cpu_type::value; + return daal_pca_cor_kernel_t().computeVariancesFromCov( + *daal_cor_matrix, + *daal_variances); + }); + interop::status_to_exception(status); + } + + { + const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + constexpr auto cpu_type = interop::to_daal_cpu_type::value; + return daal_pca_cor_kernel_t().computeExplainedVariancesRatio( + *daal_eigenvalues, + *daal_variances, + *daal_explained_variances_ratio); + }); + + interop::status_to_exception(status); + } + if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto mdl = model_t{}.set_eigenvectors( - homogen_table::wrap(arr_eigvec, component_count, column_count)); - result.set_model(mdl); + result.set_eigenvectors(homogen_table::wrap(arr_eigvec, component_count, column_count)); } if (desc.get_result_options().test(result_options::eigenvalues)) { result.set_eigenvalues(homogen_table::wrap(arr_eigval, 1, component_count)); } + if (desc.get_result_options().test(result_options::singular_values)) { + result.set_singular_values(homogen_table::wrap(arr_singular_values, 1, component_count)); + } + + if (desc.get_result_options().test(result_options::vars)) { + result.set_variances(homogen_table::wrap(arr_vars, 1, column_count)); + } + if (desc.get_result_options().test(result_options::explained_variances_ratio)) { + result.set_explained_variances_ratio( + homogen_table::wrap(arr_explained_variances_ratio, 1, component_count)); + } + + if (desc.get_result_options().test(result_options::means)) { + result.set_means(homogen_table::wrap(arr_means, 1, column_count)); + } return result; } diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp index ada6b7786db..d88ad53ec01 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp @@ -45,12 +45,13 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx const std::int64_t component_count = get_component_count(desc, input.get_partial_crossproduct()); const std::int64_t column_count = input.get_partial_crossproduct().get_column_count(); - + auto rows_count_global = + row_accessor(input.get_partial_n_rows()).pull({ 0, -1 })[0]; auto result = train_result{}.set_result_options(desc.get_result_options()); daal::services::SharedPtr DataCollectionPtr; auto arr_eigvec = array::empty(column_count * column_count); auto arr_eigval = array::empty(1 * column_count); - + auto reshaped_eigvec = array::empty(1 * component_count); const auto daal_eigenvectors = interop::convert_to_daal_homogen_table(arr_eigvec, column_count, column_count); const auto daal_eigenvalues = @@ -65,7 +66,12 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx interop::copy_to_daal_homogen_table(input.get_auxiliary_table(i)); decomposeCollection->push_back(daal_crossproduct); } + daal_pca::internal::InputDataType dtype = daal_pca::internal::nonNormalizedDataset; + + if (desc.get_data_normalization() == normalization::zscore) { + dtype = daal_pca::internal::normalizedDataset; + } interop::status_to_exception( interop::call_daal_kernel_finalize_merge(ctx, dtype, @@ -73,16 +79,35 @@ static train_result call_daal_kernel_finalize_train(const context_cpu& ctx *daal_eigenvalues, *daal_eigenvectors, decomposeCollection)); - if (desc.get_result_options().test(result_options::eigenvectors)) { - auto reshaped_eigvec = arr_eigvec.get_slice(0, component_count * column_count); - const auto mdl = model_t{}.set_eigenvectors( + reshaped_eigvec = arr_eigvec.get_slice(0, component_count * column_count); + result.set_eigenvectors( homogen_table::wrap(reshaped_eigvec, component_count, column_count)); - result.set_model(mdl); } + auto reshaped_eigval = arr_eigval.get_slice(0, component_count); if (desc.get_result_options().test(result_options::eigenvalues)) { - auto reshaped_eigval = arr_eigval.get_slice(0, component_count); + const auto daal_singular_values = + interop::convert_to_daal_homogen_table(reshaped_eigval, 1, component_count); + result.set_singular_values(homogen_table::wrap(reshaped_eigval, 1, component_count)); + if (desc.get_normalization_mode() == normalization::mean_center) { + const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) { + constexpr auto cpu_type = interop::to_daal_cpu_type::value; + return daal_svd_kernel_t().computeEigenValues( + *daal_singular_values, + *daal_eigenvalues, + rows_count_global); + }); + + interop::status_to_exception(status); + result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count)); + } + else { + result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count)); + } + } + + if (desc.whiten()) { result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count)); } diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp index 7c495ef41c2..468f84e77cc 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp @@ -45,24 +45,69 @@ static result_t call_daal_kernel(const context_cpu& ctx, const model_t& model) { const std::int64_t row_count = data.get_row_count(); const std::int64_t component_count = get_component_count(desc, data); - + const auto sklearn_behavior = desc.get_normalization_mode() == normalization::mean_center; + const auto sklearn_behavior_whiten = sklearn_behavior && desc.whiten(); + const auto daal_behavior_whiten = + desc.get_normalization_mode() == normalization::zscore && desc.whiten(); dal::detail::check_mul_overflow(row_count, component_count); auto arr_result = array::empty(row_count * component_count); const auto daal_data = interop::convert_to_daal_table(data); const auto daal_eigenvectors = interop::convert_to_daal_table(model.get_eigenvectors()); + const auto daal_result = interop::convert_to_daal_homogen_table(arr_result, row_count, component_count); + if (sklearn_behavior_whiten) { + const auto daal_means = interop::convert_to_daal_table(model.get_means()); + const auto daal_eigenvalues = + interop::convert_to_daal_table(model.get_eigenvalues()); + interop::status_to_exception( + interop::call_daal_kernel(ctx, + *daal_data, + *daal_eigenvectors, + daal_means.get(), + nullptr, + daal_eigenvalues.get(), + *daal_result)); + } + else if (sklearn_behavior) { + const auto daal_means = interop::convert_to_daal_table(model.get_means()); + interop::status_to_exception( + interop::call_daal_kernel(ctx, + *daal_data, + *daal_eigenvectors, + daal_means.get(), + nullptr, + nullptr, + *daal_result)); + } + else if (daal_behavior_whiten) { + const auto daal_means = interop::convert_to_daal_table(model.get_means()); + const auto daal_eigenvalues = + interop::convert_to_daal_table(model.get_eigenvalues()); + const auto daal_variances = interop::convert_to_daal_table(model.get_variances()); - interop::status_to_exception( - interop::call_daal_kernel(ctx, - *daal_data, - *daal_eigenvectors, - nullptr, - nullptr, - nullptr, - *daal_result)); - + interop::status_to_exception( + interop::call_daal_kernel(ctx, + *daal_data, + *daal_eigenvectors, + daal_means.get(), + daal_variances.get(), + daal_eigenvalues.get(), + *daal_result)); + } + else { + const auto daal_means = interop::convert_to_daal_table(model.get_means()); + const auto daal_variances = interop::convert_to_daal_table(model.get_variances()); + interop::status_to_exception( + interop::call_daal_kernel(ctx, + *daal_data, + *daal_eigenvectors, + daal_means.get(), + daal_variances.get(), + nullptr, + *daal_result)); + } return result_t{}.set_transformed_data( dal::detail::homogen_table_builder{}.reset(arr_result, row_count, component_count).build()); } diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp index 03fc0c3c030..ec2622ff6cd 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_cov.cpp @@ -51,8 +51,12 @@ static partial_train_result call_daal_kernel_partial_train( const std::int64_t component_count = input.get_data().get_column_count(); const auto input_ = input.get_prev(); daal_cov::Parameter daal_parameter; + daal_parameter.outputMatrixType = daal_cov::correlationMatrix; + if (desc.get_normalization_mode() == normalization::mean_center) { + daal_parameter.outputMatrixType = daal_cov::covarianceMatrix; + } const auto data = input.get_data(); ONEDAL_ASSERT(data.has_data()); const auto daal_data = interop::convert_to_daal_table(data); diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_svd.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_svd.cpp index 21d00cce314..518406633a0 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_svd.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/partial_train_kernel_svd.cpp @@ -55,7 +55,6 @@ static partial_train_result call_daal_kernel_partial_train( const descriptor_t& desc, const partial_train_input& input) { const std::int64_t component_count = input.get_data().get_column_count(); - const auto input_ = input.get_prev(); const auto data = input.get_data(); @@ -65,6 +64,11 @@ static partial_train_result call_daal_kernel_partial_train( const bool has_nobs_data = input_.get_partial_n_rows().has_data(); daal_pca::internal::InputDataType dtype = daal_pca::internal::nonNormalizedDataset; + + if (desc.get_data_normalization() == normalization::zscore) { + dtype = daal_pca::internal::normalizedDataset; + } + if (has_nobs_data) { auto result = update_tables(input); auto daal_crossproduct_svd = diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp index d089c5c9b9b..d27cb10dfd3 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_cov.cpp @@ -44,16 +44,22 @@ template static result_t call_daal_kernel(const context_cpu& ctx, const descriptor_t& desc, const table& data) { + const std::int64_t row_count = data.get_row_count(); + ONEDAL_ASSERT(row_count > 0); const std::int64_t column_count = data.get_column_count(); ONEDAL_ASSERT(column_count > 0); const std::int64_t component_count = get_component_count(desc, data); ONEDAL_ASSERT(component_count > 0); + auto result = train_result{}.set_result_options(desc.get_result_options()); - dal::detail::check_mul_overflow(column_count, component_count); + auto arr_eigvec = array::empty(column_count * component_count); auto arr_eigval = array::empty(1 * component_count); auto arr_means = array::empty(1 * column_count); auto arr_vars = array::empty(1 * column_count); + auto arr_singular_values = array::empty(1 * component_count); + auto arr_explained_variances_ratio = array::empty(1 * component_count); + const auto daal_data = interop::convert_to_daal_table(data); const auto daal_eigenvectors = interop::convert_to_daal_homogen_table(arr_eigvec, component_count, column_count); @@ -61,6 +67,10 @@ static result_t call_daal_kernel(const context_cpu& ctx, interop::convert_to_daal_homogen_table(arr_eigval, 1, component_count); const auto daal_means = interop::convert_to_daal_homogen_table(arr_means, 1, column_count); const auto daal_variances = interop::convert_to_daal_homogen_table(arr_vars, 1, column_count); + const auto daal_singular_values = + interop::convert_to_daal_homogen_table(arr_singular_values, 1, component_count); + const auto daal_explained_variances_ratio = + interop::convert_to_daal_homogen_table(arr_explained_variances_ratio, 1, component_count); daal_cov::Batch covariance_alg; covariance_alg.input.set(daal_cov::data, daal_data); @@ -70,40 +80,57 @@ static result_t call_daal_kernel(const context_cpu& ctx, /// to be changed to passing the values from the performance model std::int64_t blockSize = 140; if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) { - const std::int64_t row_count = data.get_row_count(); if (5000 < row_count && row_count <= 50000) { blockSize = 1024; } } + interop::status_to_exception( daal_hyperparameter.set(daal_cov::internal::denseUpdateStepBlockSize, blockSize)); covariance_alg.setHyperparameter(&daal_hyperparameter); - constexpr bool is_correlation = false; - constexpr std::uint64_t results_to_compute = - std::uint64_t(daal_pca::mean | daal_pca::variance | daal_pca::eigenvalue); + daal::algorithms::pca::BaseBatchParameter daal_pca_parameter; + + daal_pca_parameter.isDeterministic = desc.get_deterministic(); + + daal_pca_parameter.resultsToCompute = static_cast( + std::uint64_t(daal_pca::mean | daal_pca::variance | daal_pca::eigenvalue)); + + daal_pca_parameter.isCorrelation = false; + + if (desc.get_normalization_mode() == normalization::mean_center) { + daal_pca_parameter.doScale = false; + } interop::status_to_exception(interop::call_daal_kernel( ctx, - is_correlation, - desc.get_deterministic(), *daal_data, &covariance_alg, - static_cast(results_to_compute), *daal_eigenvectors, *daal_eigenvalues, *daal_means, - *daal_variances)); + *daal_variances, + daal_singular_values.get(), + daal_explained_variances_ratio.get(), + &daal_pca_parameter)); if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto mdl = model_t{}.set_eigenvectors( - homogen_table::wrap(arr_eigvec, component_count, column_count)); - result.set_model(mdl); + result.set_eigenvectors(homogen_table::wrap(arr_eigvec, component_count, column_count)); } if (desc.get_result_options().test(result_options::eigenvalues)) { result.set_eigenvalues(homogen_table::wrap(arr_eigval, 1, component_count)); } + + if (desc.get_result_options().test(result_options::singular_values)) { + result.set_singular_values(homogen_table::wrap(arr_singular_values, 1, component_count)); + } + + if (desc.get_result_options().test(result_options::explained_variances_ratio)) { + result.set_explained_variances_ratio( + homogen_table::wrap(arr_explained_variances_ratio, 1, component_count)); + } + if (desc.get_result_options().test(result_options::vars)) { result.set_variances(homogen_table::wrap(arr_vars, 1, column_count)); } diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_precomputed.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_precomputed.cpp index 2349b4ab8b7..8749534c952 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_precomputed.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_precomputed.cpp @@ -47,12 +47,15 @@ static result_t call_daal_kernel(const context_cpu& ctx, ONEDAL_ASSERT(column_count > 0); const std::int64_t component_count = get_component_count(desc, data); ONEDAL_ASSERT(component_count > 0); + auto result = train_result{}.set_result_options(desc.get_result_options()); dal::detail::check_mul_overflow(column_count, component_count); + auto arr_eigvec = array::empty(column_count * component_count); auto arr_eigval = array::empty(1 * component_count); auto arr_means = array::empty(1 * column_count); auto arr_vars = array::empty(1 * column_count); + const auto daal_data = interop::convert_to_daal_table(data); const auto daal_eigenvectors = interop::convert_to_daal_homogen_table(arr_eigvec, component_count, column_count); @@ -64,28 +67,32 @@ static result_t call_daal_kernel(const context_cpu& ctx, daal_cov::Batch covariance_alg; covariance_alg.input.set(daal_cov::data, daal_data); - constexpr bool is_correlation = true; - constexpr std::uint64_t results_to_compute = std::uint64_t(daal_pca::eigenvalue); - - interop::status_to_exception(interop::call_daal_kernel( - ctx, - is_correlation, - desc.get_deterministic(), - *daal_data, - &covariance_alg, - static_cast(results_to_compute), - *daal_eigenvectors, - *daal_eigenvalues, - *daal_means, - *daal_variances)); + daal::algorithms::pca::BaseBatchParameter daal_pca_parameter; + + daal_pca_parameter.isDeterministic = desc.get_deterministic(); + + daal_pca_parameter.resultsToCompute = + static_cast(std::uint64_t(daal_pca::eigenvalue)); + + daal_pca_parameter.isCorrelation = true; + + interop::status_to_exception( + interop::call_daal_kernel(ctx, + *daal_data, + &covariance_alg, + *daal_eigenvectors, + *daal_eigenvalues, + *daal_means, + *daal_variances, + nullptr, + nullptr, + &daal_pca_parameter)); if (desc.get_result_options().test(result_options::vars)) { result.set_variances(homogen_table::wrap(arr_vars, 1, column_count)); } if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto mdl = model_t{}.set_eigenvectors( - homogen_table::wrap(arr_eigvec, component_count, column_count)); - result.set_model(mdl); + result.set_eigenvectors(homogen_table::wrap(arr_eigvec, component_count, column_count)); } if (desc.get_result_options().test(result_options::eigenvalues)) { diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_svd.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_svd.cpp index fbdbe3619cd..a7f25a978f9 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_svd.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/train_kernel_svd.cpp @@ -51,57 +51,89 @@ template static result_t call_daal_kernel(const context_cpu& ctx, const descriptor_t& desc, const table& data) { + const std::int64_t row_count = data.get_row_count(); + ONEDAL_ASSERT(row_count > 0); const std::int64_t column_count = data.get_column_count(); ONEDAL_ASSERT(column_count > 0); const std::int64_t component_count = get_component_count(desc, data); ONEDAL_ASSERT(component_count > 0); + auto result = train_result{}.set_result_options(desc.get_result_options()); + dal::detail::check_mul_overflow(column_count, component_count); + auto arr_eigvec = array::empty(column_count * component_count); - auto arr_eigval = array::empty(1 * component_count); + auto arr_singular_values = array::empty(1 * component_count); auto arr_means = array::empty(1 * column_count); auto arr_vars = array::empty(1 * column_count); - + auto arr_explained_variances_ratio = array::empty(1 * component_count); + const auto daal_explained_variances_ratio = + interop::convert_to_daal_homogen_table(arr_explained_variances_ratio, 1, column_count); const auto daal_data = interop::convert_to_daal_table(data); + const auto daal_eigenvectors = interop::convert_to_daal_homogen_table(arr_eigvec, component_count, column_count); - const auto daal_eigenvalues = - interop::convert_to_daal_homogen_table(arr_eigval, 1, component_count); + const auto daal_singular_values = + interop::convert_to_daal_homogen_table(arr_singular_values, 1, component_count); const auto daal_means = interop::convert_to_daal_homogen_table(arr_means, 1, column_count); const auto daal_variances = interop::convert_to_daal_homogen_table(arr_vars, 1, column_count); - + auto arr_eigval = array::empty(1 * component_count); + const auto daal_eigenvalues = + interop::convert_to_daal_homogen_table(arr_eigval, 1, component_count); daal_pca::internal::InputDataType dtype = daal_pca::internal::nonNormalizedDataset; + if (desc.get_data_normalization() == normalization::zscore) { + dtype = daal_pca::internal::normalizedDataset; + } + auto norm_alg = get_normalization_algorithm(); norm_alg->input.set(daal_zscore::data, daal_data); norm_alg->parameter().resultsToCompute |= daal_zscore::mean; norm_alg->parameter().resultsToCompute |= daal_zscore::variance; - daal_pca::BatchParameter parameter; - parameter.isDeterministic = desc.get_deterministic(); - parameter.normalization = norm_alg; - parameter.resultsToCompute = + daal_pca::BatchParameter daal_pca_parameter; + + norm_alg->parameter().doScale = true; + daal_pca_parameter.doScale = true; + + if (desc.get_normalization_mode() == normalization::mean_center) { + norm_alg->parameter().doScale = false; + daal_pca_parameter.doScale = false; + } + + daal_pca_parameter.isDeterministic = desc.get_deterministic(); + daal_pca_parameter.normalization = norm_alg; + daal_pca_parameter.resultsToCompute = std::uint64_t(daal_pca::mean | daal_pca::variance | daal_pca::eigenvalue); - interop::status_to_exception( - interop::call_daal_kernel(ctx, - dtype, - *daal_data.get(), - ¶meter, - *daal_eigenvalues.get(), - *daal_eigenvectors.get(), - *daal_means.get(), - *daal_variances.get())); + interop::status_to_exception(interop::call_daal_kernel( + ctx, + dtype, + *daal_data, + *daal_eigenvectors, + *daal_singular_values, + *daal_means, + *daal_variances, + daal_eigenvalues.get(), + daal_explained_variances_ratio.get(), + &daal_pca_parameter)); if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto mdl = model_t{}.set_eigenvectors( - homogen_table::wrap(arr_eigvec, component_count, column_count)); - result.set_model(mdl); + result.set_eigenvectors(homogen_table::wrap(arr_eigvec, component_count, column_count)); + } + + if (desc.get_result_options().test(result_options::singular_values)) { + result.set_singular_values(homogen_table::wrap(arr_singular_values, 1, component_count)); } if (desc.get_result_options().test(result_options::eigenvalues)) { result.set_eigenvalues(homogen_table::wrap(arr_eigval, 1, component_count)); } + + if (desc.get_result_options().test(result_options::explained_variances_ratio)) { + result.set_explained_variances_ratio( + homogen_table::wrap(arr_explained_variances_ratio, 1, component_count)); + } if (desc.get_result_options().test(result_options::vars)) { result.set_variances(homogen_table::wrap(arr_vars, 1, column_count)); } diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_dpc.cpp index 67619264cab..cc570ab9e71 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/finalize_train_kernel_cov_dpc.cpp @@ -36,6 +36,95 @@ using input_t = partial_train_result; using result_t = train_result; using descriptor_t = detail::descriptor_base; +template +auto compute_sums(sycl::queue& q, + const pr::ndview& data, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_sums, q); + ONEDAL_ASSERT(data.has_data()); + ONEDAL_ASSERT(0 < data.get_dimension(1)); + + const std::int64_t column_count = data.get_dimension(1); + auto sums = pr::ndarray::empty(q, { column_count }, alloc::device); + auto reduce_event = + pr::reduce_by_columns(q, data, sums, pr::sum{}, pr::identity{}, deps); + return std::make_tuple(sums, reduce_event); +} + +template +auto compute_means(sycl::queue& q, + std::int64_t row_count, + const pr::ndview& sums, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_means, q); + ONEDAL_ASSERT(sums.has_data()); + ONEDAL_ASSERT(sums.get_dimension(0) > 0); + + const std::int64_t column_count = sums.get_dimension(0); + auto means = pr::ndarray::empty(q, { column_count }, alloc::device); + auto means_event = pr::means(q, row_count, sums, means, deps); + return std::make_tuple(means, means_event); +} + +template +auto compute_variances(sycl::queue& q, + const pr::ndview& cov, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_vars, q); + ONEDAL_ASSERT(cov.has_data()); + ONEDAL_ASSERT(cov.get_dimension(0) > 0); + ONEDAL_ASSERT(cov.get_dimension(0) == cov.get_dimension(1), "Covariance matrix must be square"); + + auto column_count = cov.get_dimension(0); + auto vars = pr::ndarray::empty(q, { column_count }, alloc::device); + auto vars_event = pr::variances(q, cov, vars, deps); + return std::make_tuple(vars, vars_event); +} + +template +auto compute_covariance(sycl::queue& q, + std::int64_t row_count, + const pr::ndview& xtx, + const pr::ndarray& sums, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_covariance, q); + ONEDAL_ASSERT(sums.has_data()); + ONEDAL_ASSERT(xtx.has_data()); + ONEDAL_ASSERT(xtx.get_dimension(1) > 0); + + const std::int64_t column_count = xtx.get_dimension(1); + + auto cov = pr::ndarray::empty(q, { column_count, column_count }, alloc::device); + + auto copy_event = copy(q, cov, xtx, { deps }); + + constexpr bool bias = false; // Currently we use only unbiased covariance for PCA computation. + auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event }); + return std::make_tuple(cov, cov_event); +} + +template +auto compute_correlation_from_covariance(sycl::queue& q, + std::int64_t row_count, + const pr::ndview& cov, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_correlation, q); + ONEDAL_ASSERT(cov.has_data()); + ONEDAL_ASSERT(cov.get_dimension(0) > 0); + ONEDAL_ASSERT(cov.get_dimension(0) == cov.get_dimension(1), "Covariance matrix must be square"); + + const std::int64_t column_count = cov.get_dimension(1); + + auto tmp = pr::ndarray::empty(q, { column_count }, alloc::device); + + auto corr = pr::ndarray::empty(q, { column_count, column_count }, alloc::device); + + const bool bias = false; // Currently we use only unbiased covariance for PCA computation. + auto corr_event = pr::correlation_from_covariance(q, row_count, cov, corr, tmp, bias, deps); + + return std::make_tuple(corr, corr_event); +} + template auto compute_eigenvectors_on_host(sycl::queue& q, pr::ndarray&& corr, @@ -50,7 +139,6 @@ auto compute_eigenvectors_on_host(sycl::queue& q, auto eigvecs = pr::ndarray::empty({ component_count, column_count }); auto eigvals = pr::ndarray::empty(component_count); - auto host_corr = corr.to_host(q, deps); pr::sym_eigvals_descending(host_corr, component_count, eigvecs, eigvals); @@ -58,30 +146,46 @@ auto compute_eigenvectors_on_host(sycl::queue& q, } template -auto compute_correlation(sycl::queue& q, - std::int64_t row_count, - const pr::ndview& xtx, - const pr::ndarray& sums, - const bk::event_vector& deps = {}) { - ONEDAL_PROFILER_TASK(compute_correlation, q); - ONEDAL_ASSERT(sums.has_data()); - ONEDAL_ASSERT(xtx.has_data()); - ONEDAL_ASSERT(xtx.get_dimension(1) > 0); - - const std::int64_t column_count = xtx.get_dimension(1); - - auto tmp = pr::ndarray::empty(q, { column_count }, alloc::device); +auto compute_singular_values_on_host(sycl::queue& q, + pr::ndarray eigenvalues, + std::int64_t row_count, + const dal::backend::event_vector& deps = {}) { + const std::int64_t component_count = eigenvalues.get_dimension(0); - auto corr = pr::ndarray::empty(q, { column_count, column_count }, alloc::device); + auto singular_values = pr::ndarray::empty(component_count); - auto copy_event = copy(q, corr, xtx, { deps }); + auto eigvals_ptr = eigenvalues.get_data(); + auto singular_values_ptr = singular_values.get_mutable_data(); + const Float factor = row_count - 1; + for (std::int64_t i = 0; i < component_count; ++i) { + singular_values_ptr[i] = std::sqrt(factor * eigvals_ptr[i]); + } + return singular_values; +} - auto corr_event = pr::correlation(q, row_count, sums, corr, tmp, { copy_event }); +template +auto compute_explained_variances_on_host(sycl::queue& q, + pr::ndarray eigenvalues, + pr::ndarray vars, + const dal::backend::event_vector& deps = {}) { + const std::int64_t component_count = eigenvalues.get_dimension(0); + const std::int64_t column_count = vars.get_dimension(0); + auto explained_variances_ratio = pr::ndarray::empty(component_count); - auto smart_event = bk::smart_event{ corr_event }.attach(tmp); - return std::make_tuple(corr, smart_event); + auto eigvals_ptr = eigenvalues.get_data(); + auto vars_ptr = vars.get_data(); + auto explained_variances_ratio_ptr = explained_variances_ratio.get_mutable_data(); + Float sum = 0; + for (std::int64_t i = 0; i < column_count; ++i) { + sum += vars_ptr[i]; + } + ONEDAL_ASSERT(0 < sum); + const Float inverse_sum = 1.0 / sum; + for (std::int64_t i = 0; i < component_count; ++i) { + explained_variances_ratio_ptr[i] = eigvals_ptr[i] * inverse_sum; + } + return explained_variances_ratio; } - template static train_result train(const context_gpu& ctx, const descriptor_t& desc, @@ -102,27 +206,68 @@ static train_result train(const context_gpu& ctx, const auto sums = pr::table2ndarray_1d(q, input.get_partial_sum(), sycl::usm::alloc::device); + if (desc.get_result_options().test(result_options::means)) { + auto [means, means_event] = compute_means(q, rows_count_global, sums, {}); + means_event.wait_and_throw(); + result.set_means(homogen_table::wrap(means.flatten(q), 1, column_count)); + } + const auto xtx = pr::table2ndarray(q, input.get_partial_crossproduct(), sycl::usm::alloc::device); + auto [cov, cov_event] = compute_covariance(q, rows_count_global, xtx, sums, {}); + + auto [vars, vars_event] = compute_variances(q, cov, { cov_event }); + vars_event.wait_and_throw(); + if (desc.get_result_options().test(result_options::vars)) { + result.set_variances(homogen_table::wrap(vars.flatten(q), 1, column_count)); + } + auto data_to_compute = cov; + + sycl::event corr_event; + if (desc.get_normalization_mode() == normalization::zscore) { + pr::ndarray corr{}; + std::tie(corr, corr_event) = + compute_correlation_from_covariance(q, rows_count_global, cov, { cov_event }); + corr_event.wait_and_throw(); + data_to_compute = corr; + } + + auto [eigvecs, eigvals] = compute_eigenvectors_on_host(q, + std::move(data_to_compute), + component_count, + { corr_event, vars_event, cov_event }); + if (desc.get_result_options().test(result_options::eigenvalues)) { + result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); + } + + if (desc.get_result_options().test(result_options::singular_values)) { + auto singular_values = + compute_singular_values_on_host(q, + eigvals, + rows_count_global, + { corr_event, vars_event, cov_event }); + result.set_singular_values( + homogen_table::wrap(singular_values.flatten(), 1, component_count)); + } + + if (desc.get_result_options().test(result_options::explained_variances_ratio)) { + auto vars_host = vars.to_host(q); + auto explained_variances_ratio = + compute_explained_variances_on_host(q, + eigvals, + vars_host, + { corr_event, vars_event, cov_event }); + result.set_explained_variances_ratio( + homogen_table::wrap(explained_variances_ratio.flatten(), 1, component_count)); + } + + if (desc.get_deterministic()) { + sign_flip(eigvecs); + } - auto [corr, corr_event] = compute_correlation(q, rows_count_global, xtx, sums); - - if (desc.get_result_options().test(result_options::eigenvectors | - result_options::eigenvalues)) { - auto [eigvecs, eigvals] = - compute_eigenvectors_on_host(q, std::move(corr), component_count, { corr_event }); - if (desc.get_result_options().test(result_options::eigenvalues)) { - result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); - } - - if (desc.get_deterministic()) { - sign_flip(eigvecs); - } - if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto model = model_t{}.set_eigenvectors( - homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); - result.set_model(model); - } + if (desc.get_result_options().test(result_options::eigenvectors)) { + result.set_eigenvectors( + homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); } return result; diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp index 2047db7779e..5e5ff8f3564 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp @@ -26,39 +26,158 @@ namespace oneapi::dal::pca::backend { namespace pr = oneapi::dal::backend::primitives; - +namespace bk = oneapi::dal::backend; using dal::backend::context_gpu; +using alloc = sycl::usm::alloc; + using model_t = model; using input_t = infer_input; using result_t = infer_result; using descriptor_t = detail::descriptor_base; +template +auto get_centered(sycl::queue& q, + pr::ndview& data, + const pr::ndview& means, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_centered_data, q); + const std::int64_t row_count = data.get_dimension(0); + const std::int64_t column_count = data.get_dimension(1); + + auto centered_data_ptr = data.get_mutable_data(); + auto means_ptr = means.get_data(); + + auto centered_event = q.submit([&](sycl::handler& h) { + const auto range = bk::make_range_2d(row_count, column_count); + h.depends_on(deps); + h.parallel_for(range, [=](sycl::id<2> id) { + const std::size_t i = id[0]; + const std::size_t j = id[1]; + centered_data_ptr[i * column_count + j] = + centered_data_ptr[i * column_count + j] - means_ptr[j]; + }); + }); + return centered_event; +} + +template +auto get_scaled(sycl::queue& q, + pr::ndview& data, + const pr::ndview& variances, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_scaled_data, q); + const std::int64_t row_count = data.get_dimension(0); + const std::int64_t column_count = data.get_dimension(1); + + auto scaled_data_ptr = data.get_mutable_data(); + auto variances_ptr = variances.get_data(); + + auto scaled_event = q.submit([&](sycl::handler& h) { + const auto range = bk::make_range_2d(row_count, column_count); + h.depends_on(deps); + h.parallel_for(range, [=](sycl::id<2> id) { + const std::size_t i = id[0]; + const std::size_t j = id[1]; + const Float sqrt_var = sycl::sqrt(variances_ptr[j]); + const Float inv_var = + sqrt_var < std::numeric_limits::epsilon() ? 0 : 1 / sqrt_var; + scaled_data_ptr[i * column_count + j] = scaled_data_ptr[i * column_count + j] * inv_var; + }); + }); + return scaled_event; +} + +template +auto get_whitened(sycl::queue& q, + pr::ndview& data, + const pr::ndview& eigenvalues, + const bk::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_whitened_data, q); + const std::int64_t row_count = data.get_dimension(0); + const std::int64_t column_count = data.get_dimension(1); + + auto whitened_data_ptr = data.get_mutable_data(); + auto eigenvalues_ptr = eigenvalues.get_data(); + + auto whitened_event = q.submit([&](sycl::handler& h) { + const auto range = bk::make_range_2d(row_count, column_count); + h.depends_on(deps); + h.parallel_for(range, [=](sycl::id<2> id) { + const std::size_t i = id[0]; + const std::size_t j = id[1]; + const Float sqrt_eigenvalue = sycl::sqrt(eigenvalues_ptr[j]); + Float inv_eigenvalue = + sqrt_eigenvalue < std::numeric_limits::epsilon() ? 0 : 1 / sqrt_eigenvalue; + whitened_data_ptr[i * column_count + j] = + whitened_data_ptr[i * column_count + j] * inv_eigenvalue; + }); + }); + return whitened_event; +} + template static result_t infer(const context_gpu& ctx, const descriptor_t& desc, const input_t& input) { auto& queue = ctx.get_queue(); + const auto data = input.get_data(); - auto model = input.get_model(); - auto eigenvectors = model.get_eigenvectors(); + const auto model = input.get_model(); + const auto eigenvectors = model.get_eigenvectors(); + const auto eigenvalues = model.get_eigenvalues(); + const std::int64_t row_count = data.get_row_count(); const std::int64_t component_count = get_component_count(desc, data); + const std::int64_t column_count = data.get_column_count(); + dal::detail::check_mul_overflow(row_count, component_count); - const auto data_nd = pr::table2ndarray(queue, data, sycl::usm::alloc::device); - const auto eigenvectors_nd = - pr::table2ndarray(queue, eigenvectors, sycl::usm::alloc::device); + const auto data_nd = pr::table2ndarray(queue, data, alloc::device); + auto data_to_xtx = + pr::ndarray::empty(queue, { row_count, column_count }, alloc::device); + + auto copy_event = copy(queue, data_to_xtx, data_nd, {}); + copy_event.wait_and_throw(); - auto res_nd = pr::ndarray::empty(queue, - { row_count, component_count }, - sycl::usm::alloc::device); + sycl::event mean_centered_event; + if (desc.get_normalization_mode() != normalization::none && model.get_means().has_data()) { + const auto means = model.get_means(); + const auto means_nd = pr::table2ndarray_1d(queue, means, alloc::device); + mean_centered_event = get_centered(queue, data_to_xtx, means_nd, { copy_event }); + } + + sycl::event scaled_event; + if (desc.get_normalization_mode() == normalization::zscore && + model.get_variances().has_data()) { + const auto variances = model.get_variances(); + const auto variances_nd = pr::table2ndarray_1d(queue, variances, alloc::device); + scaled_event = get_scaled(queue, data_to_xtx, variances_nd, { mean_centered_event }); + } + + const auto eigenvectors_nd = pr::table2ndarray(queue, eigenvectors, alloc::device); + + auto res_nd = + pr::ndarray::empty(queue, { row_count, component_count }, alloc::device); sycl::event gemm_event; { ONEDAL_PROFILER_TASK(gemm, queue); - gemm_event = pr::gemm(queue, data_nd, eigenvectors_nd.t(), res_nd, Float(1.0), Float(0.0)); + gemm_event = pr::gemm(queue, + data_to_xtx, + eigenvectors_nd.t(), + res_nd, + Float(1.0), + Float(0.0), + { scaled_event }); + gemm_event.wait_and_throw(); + } + + sycl::event whiten_event; + if (desc.whiten() && model.get_eigenvalues().has_data()) { + const auto eigenvalues = model.get_eigenvalues(); + const auto eigenvalues_nd = pr::table2ndarray_1d(queue, eigenvalues, alloc::device); + whiten_event = get_whitened(queue, res_nd, eigenvalues_nd, { gemm_event }); } - const auto res_array = res_nd.flatten(queue, { gemm_event }); - auto res_table = homogen_table::wrap(res_array, row_count, component_count); - return result_t{}.set_transformed_data(res_table); + return result_t{}.set_transformed_data( + homogen_table::wrap(res_nd.flatten(queue, { whiten_event }), row_count, component_count)); } template diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp index 13be579cf87..82baf0e4ae0 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp @@ -48,7 +48,7 @@ auto compute_sums(sycl::queue& q, const bk::event_vector& deps = {}) { ONEDAL_PROFILER_TASK(compute_sums, q); ONEDAL_ASSERT(data.has_data()); - ONEDAL_ASSERT(data.get_dimension(1) > 0); + ONEDAL_ASSERT(0 < data.get_dimension(1)); const std::int64_t column_count = data.get_dimension(1); auto sums = pr::ndarray::empty(q, { column_count }, alloc::device); @@ -104,7 +104,7 @@ auto compute_covariance(sycl::queue& q, auto copy_event = copy(q, cov, xtx, { deps }); - const bool bias = false; // Currently we use only unbiased covariance for PCA computation. + constexpr bool bias = false; // Currently we use only unbiased covariance for PCA computation. auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event }); return std::make_tuple(cov, cov_event); } @@ -145,13 +145,62 @@ auto compute_eigenvectors_on_host(sycl::queue& q, auto eigvecs = pr::ndarray::empty({ component_count, column_count }); auto eigvals = pr::ndarray::empty(component_count); - auto host_corr = corr.to_host(q, deps); pr::sym_eigvals_descending(host_corr, component_count, eigvecs, eigvals); return std::make_tuple(eigvecs, eigvals); } +template +auto compute_singular_values_on_host(sycl::queue& q, + pr::ndarray eigenvalues, + std::int64_t row_count, + const dal::backend::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_singular_values_on_host); + ONEDAL_ASSERT(eigenvalues.has_mutable_data()); + + const std::int64_t component_count = eigenvalues.get_dimension(0); + + auto singular_values = pr::ndarray::empty(component_count); + + auto eigvals_ptr = eigenvalues.get_data(); + auto singular_values_ptr = singular_values.get_mutable_data(); + + const Float factor = row_count - 1; + for (std::int64_t i = 0; i < component_count; ++i) { + singular_values_ptr[i] = std::sqrt(factor * eigvals_ptr[i]); + } + return singular_values; +} + +template +auto compute_explained_variances_on_host(sycl::queue& q, + pr::ndarray eigenvalues, + pr::ndarray vars, + const dal::backend::event_vector& deps = {}) { + ONEDAL_PROFILER_TASK(compute_explained_variances_on_host); + ONEDAL_ASSERT(eigenvalues.has_mutable_data()); + + const std::int64_t component_count = eigenvalues.get_dimension(0); + const std::int64_t column_count = vars.get_dimension(0); + auto explained_variances_ratio = pr::ndarray::empty(component_count); + + auto eigvals_ptr = eigenvalues.get_data(); + auto vars_ptr = vars.get_data(); + auto explained_variances_ratio_ptr = explained_variances_ratio.get_mutable_data(); + + Float sum = 0; + for (std::int64_t i = 0; i < column_count; ++i) { + sum += vars_ptr[i]; + } + ONEDAL_ASSERT(sum > 0); + const Float inverse_sum = 1.0 / sum; + for (std::int64_t i = 0; i < component_count; ++i) { + explained_variances_ratio_ptr[i] = eigvals_ptr[i] * inverse_sum; + } + return explained_variances_ratio; +} + template result_t train_kernel_cov_impl::operator()(const descriptor_t& desc, const input_t& input) { ONEDAL_ASSERT(input.get_data().has_data()); @@ -175,7 +224,7 @@ result_t train_kernel_cov_impl::operator()(const descriptor_t& desc, cons sycl::event gemm_event; { ONEDAL_PROFILER_TASK(gemm, q_); - gemm_event = gemm(q_, data_nd.t(), data_nd, xtx, Float(1.0), Float(0.0)); + gemm_event = gemm(q_, data_nd.t(), data_nd, xtx, Float(1.0), Float(0.0), { sums_event }); gemm_event.wait_and_throw(); } @@ -190,34 +239,59 @@ result_t train_kernel_cov_impl::operator()(const descriptor_t& desc, cons if (desc.get_result_options().test(result_options::means)) { auto [means, means_event] = compute_means(q_, rows_count_global, sums, { gemm_event }); - result.set_means(homogen_table::wrap(means.flatten(q_), 1, column_count)); + result.set_means(homogen_table::wrap(means.flatten(q_, { means_event }), 1, column_count)); } auto [cov, cov_event] = compute_covariance(q_, rows_count_global, xtx, sums, { gemm_event }); + + auto [vars, vars_event] = compute_variances(q_, cov, { cov_event }); if (desc.get_result_options().test(result_options::vars)) { - auto [vars, vars_event] = compute_variances(q_, cov, { cov_event }); - vars_event.wait_and_throw(); - result.set_variances(homogen_table::wrap(vars.flatten(q_), 1, column_count)); + result.set_variances( + homogen_table::wrap(vars.flatten(q_, { vars_event }), 1, column_count)); + } + auto data_to_compute = cov; + sycl::event corr_event; + if (desc.get_normalization_mode() == normalization::zscore) { + pr::ndarray corr{}; + std::tie(corr, corr_event) = + compute_correlation_from_covariance(q_, rows_count_global, cov, { cov_event }); + corr_event.wait_and_throw(); + data_to_compute = corr; + } + + auto [eigvecs, eigvals] = compute_eigenvectors_on_host(q_, + std::move(data_to_compute), + component_count, + { cov_event, corr_event, vars_event }); + if (desc.get_result_options().test(result_options::eigenvalues)) { + result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); + } + if (desc.get_result_options().test(result_options::singular_values)) { + auto singular_values = + compute_singular_values_on_host(q_, + eigvals, + row_count, + { cov_event, corr_event, vars_event }); + result.set_singular_values( + homogen_table::wrap(singular_values.flatten(), 1, component_count)); + } + if (desc.get_result_options().test(result_options::explained_variances_ratio)) { + auto vars_host = vars.to_host(q_); + auto explained_variances_ratio = + compute_explained_variances_on_host(q_, + eigvals, + vars_host, + { cov_event, corr_event, vars_event }); + result.set_explained_variances_ratio( + homogen_table::wrap(explained_variances_ratio.flatten(), 1, component_count)); + } + + if (desc.get_deterministic()) { + sign_flip(eigvecs); } - if (desc.get_result_options().test(result_options::eigenvectors | - result_options::eigenvalues)) { - auto [corr, corr_event] = - compute_correlation_from_covariance(q_, rows_count_global, cov, { gemm_event }); - - auto [eigvecs, eigvals] = - compute_eigenvectors_on_host(q_, std::move(corr), component_count, { corr_event }); - if (desc.get_result_options().test(result_options::eigenvalues)) { - result.set_eigenvalues(homogen_table::wrap(eigvals.flatten(), 1, component_count)); - } - - if (desc.get_deterministic()) { - sign_flip(eigvecs); - } - if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto model = model_t{}.set_eigenvectors( - homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); - result.set_model(model); - } + if (desc.get_result_options().test(result_options::eigenvectors)) { + result.set_eigenvectors( + homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); } return result; diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp index 9c4b4439e9f..77976ba5f91 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_precomputed_impl_dpc.cpp @@ -107,9 +107,8 @@ result_t train_kernel_precomputed_impl::operator()(const descriptor_t& de sign_flip(eigvecs); } if (desc.get_result_options().test(result_options::eigenvectors)) { - const auto model = model_t{}.set_eigenvectors( + result.set_eigenvectors( homogen_table::wrap(eigvecs.flatten(), component_count, column_count)); - result.set_model(model); } } diff --git a/cpp/oneapi/dal/algo/pca/common.cpp b/cpp/oneapi/dal/algo/pca/common.cpp index f07003463ca..654f49698f1 100644 --- a/cpp/oneapi/dal/algo/pca/common.cpp +++ b/cpp/oneapi/dal/algo/pca/common.cpp @@ -37,6 +37,14 @@ result_option_id get_means_id() { return result_option_id{ result_option_id::make_by_index(3) }; } +result_option_id get_singular_values_id() { + return result_option_id{ result_option_id::make_by_index(4) }; +} + +result_option_id get_explained_variances_ratio_id() { + return result_option_id{ result_option_id::make_by_index(5) }; +} + template result_option_id get_default_result_options() { return result_option_id{}; @@ -44,7 +52,8 @@ result_option_id get_default_result_options() { template <> result_option_id get_default_result_options() { - return get_eigenvectors_id() | get_eigenvalues_id() | get_variances_id() | get_means_id(); + return get_eigenvectors_id() | get_eigenvalues_id() | get_variances_id() | get_means_id() | + get_singular_values_id() | get_explained_variances_ratio_id(); } namespace v1 { @@ -54,6 +63,9 @@ class descriptor_impl : public base { public: std::int64_t component_count = -1; bool deterministic = false; + bool whiten = false; + normalization normalization_mode = normalization::zscore; + normalization data_normalization = normalization::none; result_option_id result_options = get_default_result_options(); }; @@ -61,13 +73,21 @@ template class model_impl : public ONEDAL_SERIALIZABLE(pca_dim_reduction_model_impl_id) { public: table eigenvectors; - + table pMeans; + table pVariances; + table eigenvalues; void serialize(dal::detail::output_archive& ar) const override { ar(eigenvectors); + ar(pMeans); + ar(pVariances); + ar(eigenvalues); } void deserialize(dal::detail::input_archive& ar) override { ar(eigenvectors); + ar(pMeans); + ar(pVariances); + ar(eigenvalues); } }; @@ -84,6 +104,21 @@ bool descriptor_base::get_deterministic() const { return impl_->deterministic; } +template +bool descriptor_base::whiten() const { + return impl_->whiten; +} + +template +normalization descriptor_base::get_normalization_mode() const { + return impl_->normalization_mode; +} + +template +normalization descriptor_base::get_data_normalization() const { + return impl_->data_normalization; +} + template void descriptor_base::set_component_count_impl(std::int64_t value) { if (value < 0) { @@ -97,6 +132,19 @@ void descriptor_base::set_deterministic_impl(bool value) { impl_->deterministic = value; } +template +void descriptor_base::set_normalization_mode_impl(normalization value) { + impl_->normalization_mode = value; +} +template +void descriptor_base::set_data_normalization_impl(normalization value) { + impl_->data_normalization = value; +} + +template +void descriptor_base::set_whiten_impl(bool value) { + impl_->whiten = value; +} template result_option_id descriptor_base::get_result_options() const { return impl_->result_options; @@ -132,7 +180,35 @@ template void model::set_eigenvectors_impl(const table& value) { impl_->eigenvectors = value; } +template +const table& model::get_means() const { + return impl_->pMeans; +} + +template +void model::set_means_impl(const table& value) { + impl_->pMeans = value; +} + +template +const table& model::get_variances() const { + return impl_->pVariances; +} +template +void model::set_variances_impl(const table& value) { + impl_->pVariances = value; +} + +template +const table& model::get_eigenvalues() const { + return impl_->eigenvalues; +} + +template +void model::set_eigenvalues_impl(const table& value) { + impl_->eigenvalues = value; +} template void model::serialize(dal::detail::output_archive& ar) const { dal::detail::serialize_polymorphic_shared(impl_, ar); diff --git a/cpp/oneapi/dal/algo/pca/common.hpp b/cpp/oneapi/dal/algo/pca/common.hpp index 26502b58265..fcee7617fd9 100644 --- a/cpp/oneapi/dal/algo/pca/common.hpp +++ b/cpp/oneapi/dal/algo/pca/common.hpp @@ -61,6 +61,18 @@ using v1::by_default; } // namespace method +namespace v1 { +/// Normalization modes +enum class normalization { + /// No normalization is necessary or data is not normalized + none, + /// Just mean centered is necessary, or data is already centered + mean_center, + /// Normalization is necessary, or data is already normalized + zscore +}; +} // namespace v1 +using v1::normalization; /// Represents result option flag /// Behaves like a regular :expr`enum`. class result_option_id : public result_option_id_base { @@ -76,6 +88,8 @@ ONEDAL_EXPORT result_option_id get_eigenvectors_id(); ONEDAL_EXPORT result_option_id get_eigenvalues_id(); ONEDAL_EXPORT result_option_id get_variances_id(); ONEDAL_EXPORT result_option_id get_means_id(); +ONEDAL_EXPORT result_option_id get_singular_values_id(); +ONEDAL_EXPORT result_option_id get_explained_variances_ratio_id(); } // namespace detail @@ -91,7 +105,11 @@ const inline result_option_id eigenvalues = detail::get_eigenvalues_id(); const inline result_option_id vars = detail::get_variances_id(); /// Return means const inline result_option_id means = detail::get_means_id(); - +/// Return singular values +const inline result_option_id singular_values = detail::get_singular_values_id(); +/// Return means +const inline result_option_id explained_variances_ratio = + detail::get_explained_variances_ratio_id(); } // namespace result_options namespace detail { @@ -125,15 +143,19 @@ class descriptor_base : public base { using task_t = Task; descriptor_base(); - + bool whiten() const; bool get_deterministic() const; std::int64_t get_component_count() const; - + normalization get_normalization_mode() const; + normalization get_data_normalization() const; result_option_id get_result_options() const; protected: + void set_whiten_impl(bool value); void set_deterministic_impl(bool value); void set_component_count_impl(std::int64_t value); + void set_normalization_mode_impl(normalization value); + void set_data_normalization_impl(normalization value); void set_result_options_impl(const result_option_id& value); private: @@ -207,6 +229,34 @@ class descriptor : public detail::descriptor_base { base_t::set_deterministic_impl(value); return *this; } + bool whiten() const { + return base_t::whiten(); + } + + auto& set_whiten(bool value) { + base_t::set_whiten_impl(value); + return *this; + } + + /// @remark default = normalization::zscore + normalization get_normalization_mode() const { + return base_t::get_normalization_mode(); + } + + auto& set_normalization_mode(normalization value) { + base_t::set_normalization_mode_impl(value); + return *this; + } + + /// @remark default = normalization::none + normalization get_data_normalization() const { + return base_t::get_data_normalization(); + } + + auto& set_data_normalization(normalization value) { + base_t::set_data_normalization_impl(value); + return *this; + } /// Choose which results should be computed and returned. result_option_id get_result_options() const { @@ -242,9 +292,30 @@ class model : public base { set_eigenvectors_impl(value); return *this; } + const table& get_means() const; + + auto& set_means(const table& value) { + set_means_impl(value); + return *this; + } + const table& get_variances() const; + + auto& set_variances(const table& value) { + set_variances_impl(value); + return *this; + } + const table& get_eigenvalues() const; + + auto& set_eigenvalues(const table& value) { + set_eigenvalues_impl(value); + return *this; + } protected: void set_eigenvectors_impl(const table&); + void set_means_impl(const table&); + void set_variances_impl(const table&); + void set_eigenvalues_impl(const table&); private: void serialize(dal::detail::output_archive& ar) const; diff --git a/cpp/oneapi/dal/algo/pca/test/batch.cpp b/cpp/oneapi/dal/algo/pca/test/batch.cpp index 9c0f6f79bb4..f6e1c4b4e4d 100644 --- a/cpp/oneapi/dal/algo/pca/test/batch.cpp +++ b/cpp/oneapi/dal/algo/pca/test/batch.cpp @@ -22,6 +22,8 @@ namespace te = dal::test::engine; namespace la = te::linalg; namespace pca = oneapi::dal::pca; using pca_types = COMBINE_TYPES((float, double), (pca::method::cov, method::svd)); +using pca_types_cov = COMBINE_TYPES((float, double), (pca::method::cov)); +using pca_types_svd = COMBINE_TYPES((float, double), (pca::method::svd)); using pca_types_precomputed = COMBINE_TYPES((float, double), (method::precomputed)); template @@ -50,7 +52,7 @@ TEMPLATE_LIST_TEST_M(pca_batch_test, "pca common flow", "[pca][integration][batc TEMPLATE_LIST_TEST_M(pca_batch_test, "pca on gold data", "[pca][integration][batch][gold]", - pca_types) { + pca_types_svd) { SKIP_IF(this->not_available_on_device()); SKIP_IF(this->not_float64_friendly()); @@ -60,7 +62,7 @@ TEMPLATE_LIST_TEST_M(pca_batch_test, const auto gold_data = this->get_gold_data(); const auto pca_result = te::train(this->get_policy(), pca_desc, gold_data); - const auto eigenvalues = pca_result.get_eigenvalues(); + const auto eigenvalues = pca_result.get_singular_values(); const auto eigenvectors = pca_result.get_eigenvectors(); INFO("check eigenvalues") { @@ -74,6 +76,32 @@ TEMPLATE_LIST_TEST_M(pca_batch_test, } } +TEMPLATE_LIST_TEST_M(pca_batch_test, + "pca on gold data", + "[pca][integration][batch][gold]", + pca_types_cov) { + SKIP_IF(this->not_available_on_device()); + SKIP_IF(this->not_float64_friendly()); + + const std::int64_t component_count = 0; + const bool deterministic = true; + const auto pca_desc = this->get_descriptor(component_count, deterministic); + const auto gold_data = this->get_gold_data(); + + const auto pca_result = te::train(this->get_policy(), pca_desc, gold_data); + const auto eigenvalues = pca_result.get_eigenvalues(); + const auto eigenvectors = pca_result.get_eigenvectors(); + + INFO("check eigenvalues") { + const auto gold_eigenvalues = this->get_gold_eigenvalues(); + this->check_eigenvalues(gold_eigenvalues, eigenvalues); + } + + INFO("check eigenvectors") { + const auto gold_eigenvectors = this->get_gold_eigenvectors(); + this->check_eigenvectors(gold_eigenvectors, eigenvectors); + } +} TEMPLATE_LIST_TEST_M(pca_batch_test, "pca common flow higgs", "[external-dataset][pca][integration][batch]", diff --git a/cpp/oneapi/dal/algo/pca/test/fixture.hpp b/cpp/oneapi/dal/algo/pca/test/fixture.hpp index d8ab3c420f6..242932bc61d 100644 --- a/cpp/oneapi/dal/algo/pca/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/pca/test/fixture.hpp @@ -182,7 +182,7 @@ class pca_test : public te::crtp_algo_fixture { check_nans(result); INFO("check if eigenvectors order is descending") - this->check_eigenvalues_order(eigenvalues); + check_eigenvalues_order(eigenvalues); INFO("check if eigenvectors matrix is orthogonal") check_eigenvectors_orthogonality(eigenvectors); @@ -205,7 +205,7 @@ class pca_test : public te::crtp_algo_fixture { check_nans(result); INFO("check if eigenvectors order is descending") - this->check_eigenvalues_order(eigenvalues); + check_eigenvalues_order(eigenvalues); INFO("check if eigenvectors matrix is orthogonal") check_eigenvectors_orthogonality(eigenvectors); diff --git a/cpp/oneapi/dal/algo/pca/test/serialization.cpp b/cpp/oneapi/dal/algo/pca/test/serialization.cpp index f1946c3fe64..d4798d1acec 100644 --- a/cpp/oneapi/dal/algo/pca/test/serialization.cpp +++ b/cpp/oneapi/dal/algo/pca/test/serialization.cpp @@ -35,7 +35,8 @@ class pca_serialization_test : public te::float_algo_fixture; bool not_available_on_device() { - return this->get_policy().is_gpu() && (!std::is_same_v); + return this->get_policy().is_gpu() && (!std::is_same_v)&&( + !std::is_same_v); } auto get_descriptor() { diff --git a/cpp/oneapi/dal/algo/pca/train_types.cpp b/cpp/oneapi/dal/algo/pca/train_types.cpp index 7fe436d99dc..52bcb8b1e15 100644 --- a/cpp/oneapi/dal/algo/pca/train_types.cpp +++ b/cpp/oneapi/dal/algo/pca/train_types.cpp @@ -36,9 +36,8 @@ template class detail::v1::train_result_impl : public base { public: model trained_model; - table eigenvalues; - table variances; - table means; + table singular_values; + table explained_variances_ratio; result_option_id result_options; }; @@ -104,7 +103,7 @@ const table& train_result::get_eigenvalues() const { if (!get_result_options().test(result_options::eigenvalues)) { throw domain_error(msg::this_result_is_not_enabled_via_result_options()); } - return impl_->eigenvalues; + return impl_->trained_model.get_eigenvalues(); } template @@ -120,7 +119,7 @@ const table& train_result::get_variances() const { if (!get_result_options().test(result_options::vars)) { throw domain_error(msg::this_result_is_not_enabled_via_result_options()); } - return impl_->variances; + return impl_->trained_model.get_variances(); } template @@ -128,7 +127,23 @@ const table& train_result::get_means() const { if (!get_result_options().test(result_options::means)) { throw domain_error(msg::this_result_is_not_enabled_via_result_options()); } - return impl_->means; + return impl_->trained_model.get_means(); +} + +template +const table& train_result::get_singular_values() const { + if (!get_result_options().test(result_options::singular_values)) { + throw domain_error(msg::this_result_is_not_enabled_via_result_options()); + } + return impl_->singular_values; +} + +template +const table& train_result::get_explained_variances_ratio() const { + if (!get_result_options().test(result_options::explained_variances_ratio)) { + throw domain_error(msg::this_result_is_not_enabled_via_result_options()); + } + return impl_->explained_variances_ratio; } template @@ -136,12 +151,20 @@ void train_result::set_model_impl(const model& value) { impl_->trained_model = value; } +template +void train_result::set_eigenvectors_impl(const table& value) { + if (!get_result_options().test(result_options::eigenvectors)) { + throw domain_error(msg::this_result_is_not_enabled_via_result_options()); + } + impl_->trained_model.set_eigenvectors(value); +} + template void train_result::set_eigenvalues_impl(const table& value) { if (!get_result_options().test(result_options::eigenvalues)) { throw domain_error(msg::this_result_is_not_enabled_via_result_options()); } - impl_->eigenvalues = value; + impl_->trained_model.set_eigenvalues(value); } template @@ -149,7 +172,7 @@ void train_result::set_variances_impl(const table& value) { if (!get_result_options().test(result_options::vars)) { throw domain_error(msg::this_result_is_not_enabled_via_result_options()); } - impl_->variances = value; + impl_->trained_model.set_variances(value); } template @@ -157,7 +180,23 @@ void train_result::set_means_impl(const table& value) { if (!get_result_options().test(result_options::means)) { throw domain_error(msg::this_result_is_not_enabled_via_result_options()); } - impl_->means = value; + impl_->trained_model.set_means(value); +} + +template +void train_result::set_singular_values_impl(const table& value) { + if (!get_result_options().test(result_options::singular_values)) { + throw domain_error(msg::this_result_is_not_enabled_via_result_options()); + } + impl_->singular_values = value; +} + +template +void train_result::set_explained_variances_ratio_impl(const table& value) { + if (!get_result_options().test(result_options::explained_variances_ratio)) { + throw domain_error(msg::this_result_is_not_enabled_via_result_options()); + } + impl_->explained_variances_ratio = value; } template diff --git a/cpp/oneapi/dal/algo/pca/train_types.hpp b/cpp/oneapi/dal/algo/pca/train_types.hpp index 5e537768be2..bae558d1490 100644 --- a/cpp/oneapi/dal/algo/pca/train_types.hpp +++ b/cpp/oneapi/dal/algo/pca/train_types.hpp @@ -89,6 +89,10 @@ class train_result { /// @invariant :expr:`eigenvectors == model.eigenvectors` const table& get_eigenvectors() const; + auto& set_eigenvectors(const table& value) { + set_eigenvectors_impl(value); + return *this; + } /// The trained PCA model /// @remark default = model{} const model& get_model() const; @@ -127,6 +131,24 @@ class train_result { set_means_impl(value); return *this; } + /// A $1 \\times r$ table that contains the singular values for the first :literal:`r` + /// features. + /// @remark default = table{} + const table& get_singular_values() const; + + auto& set_singular_values(const table& value) { + set_singular_values_impl(value); + return *this; + } + /// A $1 \\times r$ table that contains the explained variances values for the first :literal:`r` + /// features. + /// @remark default = table{} + const table& get_explained_variances_ratio() const; + + auto& set_explained_variances_ratio(const table& value) { + set_explained_variances_ratio_impl(value); + return *this; + } /// Result options that indicates availability of the properties /// @remark default = default_result_options const result_option_id& get_result_options() const; @@ -142,6 +164,8 @@ class train_result { void set_eigenvectors_impl(const table&); void set_variances_impl(const table&); void set_means_impl(const table&); + void set_explained_variances_ratio_impl(const table&); + void set_singular_values_impl(const table&); void set_result_options_impl(const result_option_id&); private: diff --git a/docs/source/api/algorithms/decomposition/pca.rst b/docs/source/api/algorithms/decomposition/pca.rst index b66faecbc01..600f859c74f 100644 --- a/docs/source/api/algorithms/decomposition/pca.rst +++ b/docs/source/api/algorithms/decomposition/pca.rst @@ -38,6 +38,10 @@ All types and functions in this section are declared in the ``oneapi::dal::pca`` namespace and be available via inclusion of the ``oneapi/dal/algo/pca.hpp`` header file. +Enum classes +------------ +.. onedal_enumclass:: oneapi::dal::pca::normalization + Descriptor ---------- .. onedal_class:: oneapi::dal::pca::descriptor diff --git a/docs/source/daal/algorithms/pca/principal-component-analysis.rst b/docs/source/daal/algorithms/pca/principal-component-analysis.rst index 71b4c4d3274..00bd6e8d184 100644 --- a/docs/source/daal/algorithms/pca/principal-component-analysis.rst +++ b/docs/source/daal/algorithms/pca/principal-component-analysis.rst @@ -112,7 +112,7 @@ Examples Batch Processing: - - :ref:`cpp_pca_dense_batch.cpp` + - :ref:`cpp_pca_cor_dense_batch.cpp` .. tab:: C++ (CPU) diff --git a/docs/source/includes/decomposition/pca-examples.rst b/docs/source/includes/decomposition/pca-examples.rst index 987626064ef..cf9a41304dd 100644 --- a/docs/source/includes/decomposition/pca-examples.rst +++ b/docs/source/includes/decomposition/pca-examples.rst @@ -26,4 +26,4 @@ Batch Processing: - - :ref:`cpp_pca_dense_batch.cpp` + - :ref:`cpp_pca_cor_dense_batch.cpp` diff --git a/examples/oneapi/cpp/source/pca/pca_dense_batch.cpp b/examples/oneapi/cpp/source/pca/pca_cor_dense_batch.cpp similarity index 87% rename from examples/oneapi/cpp/source/pca/pca_dense_batch.cpp rename to examples/oneapi/cpp/source/pca/pca_cor_dense_batch.cpp index 176a66ba665..9e3af413b37 100644 --- a/examples/oneapi/cpp/source/pca/pca_dense_batch.cpp +++ b/examples/oneapi/cpp/source/pca/pca_cor_dense_batch.cpp @@ -20,11 +20,11 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - +namespace pca = dal::pca; template void run(const dal::table& x_train, const std::string& method_name) { const auto pca_desc = - dal::pca::descriptor().set_component_count(5).set_deterministic(true); + pca::descriptor().set_component_count(5).set_deterministic(true); const auto result_train = dal::train(pca_desc, x_train); @@ -44,8 +44,7 @@ int main(int argc, char const* argv[]) { const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); - run(x_train, "Training method: Covariance"); - run(x_train, "Training method: SVD"); + run(x_train, "Training method: Covariance"); return 0; } diff --git a/examples/oneapi/cpp/source/pca/pca_dense_online.cpp b/examples/oneapi/cpp/source/pca/pca_cor_dense_online.cpp similarity index 87% rename from examples/oneapi/cpp/source/pca/pca_dense_online.cpp rename to examples/oneapi/cpp/source/pca/pca_cor_dense_online.cpp index aa0270b97f5..abcfa4ba4f9 100644 --- a/examples/oneapi/cpp/source/pca/pca_dense_online.cpp +++ b/examples/oneapi/cpp/source/pca/pca_cor_dense_online.cpp @@ -20,14 +20,14 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - +namespace pca = dal::pca; template void run(const dal::table& x_train, const std::string& method_name) { const auto pca_desc = - dal::pca::descriptor().set_component_count(5).set_deterministic(true); + pca::descriptor().set_component_count(5).set_deterministic(true); const std::int64_t nBlocks = 10; - dal::pca::partial_train_result<> partial_result; + pca::partial_train_result<> partial_result; std::cout << method_name << "\n" << std::endl; auto input_table = split_table_by_rows(x_train, nBlocks); @@ -48,8 +48,7 @@ int main(int argc, char const* argv[]) { const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); - run(x_train, "Training method: Covariance"); - run(x_train, "Training method: SVD"); + run(x_train, "Training method: Covariance"); return 0; } diff --git a/examples/oneapi/cpp/source/pca/pca_cov_dense_batch.cpp b/examples/oneapi/cpp/source/pca/pca_cov_dense_batch.cpp new file mode 100644 index 00000000000..136906ce0c8 --- /dev/null +++ b/examples/oneapi/cpp/source/pca/pca_cov_dense_batch.cpp @@ -0,0 +1,62 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/pca.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +namespace pca = dal::pca; +template +void run(const dal::table& x_train, const std::string& method_name, bool whiten) { + const auto pca_desc = pca::descriptor() + .set_component_count(5) + .set_deterministic(true) + .set_normalization_mode(pca::normalization::mean_center) + .set_whiten(whiten); + + const auto result_train = dal::train(pca_desc, x_train); + + std::cout << method_name << "\n" << std::endl; + + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + + std::cout << "Singular Values:\n" << result_train.get_singular_values() << std::endl; + + std::cout << "Variances:\n" << result_train.get_variances() << std::endl; + + std::cout << "Means:\n" << result_train.get_means() << std::endl; + + std::cout << "Explained variances ratio:\n" + << result_train.get_explained_variances_ratio() << std::endl; + const auto result_infer = dal::infer(pca_desc, result_train.get_model(), x_train); + + std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; +} + +int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_non_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + + run(x_train, "Training method: Covariance, Whiten: false", false); + run(x_train, "Training method: Covariance, Whiten: true", true); + + return 0; +} diff --git a/examples/oneapi/cpp/source/pca/pca_cov_dense_online.cpp b/examples/oneapi/cpp/source/pca/pca_cov_dense_online.cpp new file mode 100644 index 00000000000..0ff277f2765 --- /dev/null +++ b/examples/oneapi/cpp/source/pca/pca_cov_dense_online.cpp @@ -0,0 +1,70 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/pca.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +namespace pca = dal::pca; +template +void run(const dal::table& x_train, const std::string& method_name, bool whiten) { + const auto pca_desc = pca::descriptor() + .set_component_count(5) + .set_deterministic(true) + .set_normalization_mode(pca::normalization::mean_center) + .set_whiten(whiten); + const std::int64_t nBlocks = 2; + + pca::partial_train_result<> partial_result; + std::cout << method_name << "\n" << std::endl; + auto input_table = split_table_by_rows(x_train, nBlocks); + + for (std::int64_t i = 0; i < nBlocks; i++) { + partial_result = dal::partial_train(pca_desc, partial_result, input_table[i]); + } + auto result_train = dal::finalize_train(pca_desc, partial_result); + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + + std::cout << "Singular Values:\n" << result_train.get_singular_values() << std::endl; + + std::cout << "Variances:\n" << result_train.get_variances() << std::endl; + + std::cout << "Means:\n" << result_train.get_means() << std::endl; + + std::cout << "Explained variances ratio:\n" + << result_train.get_explained_variances_ratio() << std::endl; + const auto result_infer = dal::infer(pca_desc, result_train.get_model(), x_train); + + std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; +} + +int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_non_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + + run(x_train, "Training method: Online Covariance, Whiten: false", false); + run(x_train, "Training method: Online Covariance, Whiten: true", true); + //Disabled due to unavailable to replicate sklearn behavior in online SVD method + //run(x_train, "Training method: SVD, Whiten: false", false); + //run(x_train, "Training method: SVD, Whiten: true", true); + + return 0; +} diff --git a/examples/oneapi/cpp/source/pca/pca_precomputed_dense_batch.cpp b/examples/oneapi/cpp/source/pca/pca_precomputed_dense_batch.cpp index e5867e10e3b..676809b90d7 100644 --- a/examples/oneapi/cpp/source/pca/pca_precomputed_dense_batch.cpp +++ b/examples/oneapi/cpp/source/pca/pca_precomputed_dense_batch.cpp @@ -20,11 +20,11 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - +namespace pca = dal::pca; template void run(const dal::table& x_train, const std::string& method_name) { const auto pca_desc = - dal::pca::descriptor().set_component_count(5).set_deterministic(true); + pca::descriptor().set_component_count(5).set_deterministic(true); const auto result_train = dal::train(pca_desc, x_train); @@ -46,8 +46,8 @@ int main(int argc, char const* argv[]) { const auto cov_train = dal::read(dal::csv::data_source{ cov_data_file_name }); const auto cor_train = dal::read(dal::csv::data_source{ cor_data_file_name }); - run(cov_train, "PCA precomputed method with covariance matrix"); - run(cor_train, "PCA precomputed method with correlation matrix"); + run(cov_train, "PCA precomputed method with covariance matrix"); + run(cor_train, "PCA precomputed method with correlation matrix"); return 0; } diff --git a/examples/oneapi/cpp/source/pca/pca_svd_dense_batch.cpp b/examples/oneapi/cpp/source/pca/pca_svd_dense_batch.cpp new file mode 100644 index 00000000000..01bd20be5fd --- /dev/null +++ b/examples/oneapi/cpp/source/pca/pca_svd_dense_batch.cpp @@ -0,0 +1,50 @@ +/******************************************************************************* +* Copyright 2020 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/pca.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +namespace pca = dal::pca; +template +void run(const dal::table& x_train, const std::string& method_name) { + const auto pca_desc = + pca::descriptor().set_component_count(5).set_deterministic(true); + + const auto result_train = dal::train(pca_desc, x_train); + + std::cout << method_name << "\n" << std::endl; + + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + + const auto result_infer = dal::infer(pca_desc, result_train.get_model(), x_train); + + std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; +} + +int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + + run(x_train, "Training method: SVD"); + + return 0; +} diff --git a/examples/oneapi/cpp/source/pca/pca_svd_dense_online.cpp b/examples/oneapi/cpp/source/pca/pca_svd_dense_online.cpp new file mode 100644 index 00000000000..a5b6b577a0c --- /dev/null +++ b/examples/oneapi/cpp/source/pca/pca_svd_dense_online.cpp @@ -0,0 +1,54 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/pca.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +namespace pca = dal::pca; +template +void run(const dal::table& x_train, const std::string& method_name) { + const auto pca_desc = + pca::descriptor().set_component_count(5).set_deterministic(true); + const std::int64_t nBlocks = 10; + + pca::partial_train_result<> partial_result; + std::cout << method_name << "\n" << std::endl; + auto input_table = split_table_by_rows(x_train, nBlocks); + + for (std::int64_t i = 0; i < nBlocks; i++) { + partial_result = dal::partial_train(pca_desc, partial_result, input_table[i]); + } + auto result = dal::finalize_train(pca_desc, partial_result); + std::cout << "Eigenvectors:\n" << result.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result.get_eigenvalues() << std::endl; + const auto result_infer = dal::infer(pca_desc, result.get_model(), x_train); + + std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; +} + +int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + + run(x_train, "Training method: SVD"); + + return 0; +} diff --git a/examples/oneapi/data/pca_non_normalized.csv b/examples/oneapi/data/pca_non_normalized.csv new file mode 100644 index 00000000000..601617a6114 --- /dev/null +++ b/examples/oneapi/data/pca_non_normalized.csv @@ -0,0 +1,100 @@ +-0.4894395270846046,0.4782227145750423,-0.2056087248017172,-1.1596038877796977,-0.22792559463096573,-0.16758420241712452,-0.7540870471622185,0.08873011122011461,0.4059633211442037,-0.3952124878423063, +-0.2936705762757347,0.8775731934558353,-0.5609733008897185,-0.5476732261423879,-0.9496400168063995,0.13233660749322748,-1.2246429500842437,0.8498517703143365,0.37658264524016294,1.3209772906882553, +0.31732920152819283,-0.7399579098731732,-0.8016410524278147,-1.3114836471930373,-0.4991458301653959,-1.1568449614752936,-0.05892578683851021,-1.2388759792403368,-0.27373513546599426,0.3927608051012733, +0.0017490777096821663,0.49306208724410555,-0.6259585928888538,0.6681580132094278,-0.25470034812797365,-0.43197464864945023,-0.37867531736359156,0.2513686517203158,-0.3407544812521041,-0.021282329753063243, +-0.24366860526588413,-1.1872464844306545,1.197814799250492,-0.785818938590462,-0.18697545427215193,1.6867587246077376,-1.3364575405895218,0.7804767264790533,-0.03591295963440413,0.28566526542954485, +-0.702683426913565,-2.2478272826297823,-0.632380053556842,0.2021164528230092,2.0226426201408407,-0.5632489318308577,0.16390698639055556,1.3154246023330654,-0.1968491311406955,-0.855556129257285, +-0.7450374114908462,-0.842117964954797,0.913844045540989,0.5058210481500858,-1.4143744240760172,0.6341485576796191,1.832510600833685,-1.7896586246026123,0.16766122304795777,-1.5480155162068, +0.8258398581278497,-0.5952150867229102,-0.05956853266747463,1.7229333512417482,-0.6731451106129408,-0.9779714986007737,-1.4550244766907297,-0.4797338576935954,-0.21010020800098597,0.3823078879398309, +1.2874595675611946,0.7023555411559113,0.6973219294981006,1.0417061838145714,-0.34470609438152783,0.22328873475182692,-0.5083201089851641,-0.06304971787361029,1.1852370438638522,0.8972207610490645, +-0.22985117325452228,-0.4474711228580779,0.04694945847823597,-1.2806014795215817,-2.0266032717954148,1.1108538035972626,1.1477384754264384,0.018680889329061003,1.0444545272833936,0.25988551087840417, +0.45256261744044596,0.4385862524630318,-0.8213294921512531,0.3921574866543845,1.3834297443559553,1.513637585341824,0.8484134746577701,-0.5935068315826868,-1.712130467237418,-0.8313492837336167, +1.122032277962217,0.7768035610668826,0.3433434371621828,-1.7869628679358838,-0.7975182404615141,-1.0700395143163668,-0.7401353722999243,-0.028887702431866183,-0.47578363932300805,-0.8969097570964671, +-0.05803093343836442,-0.8279865262896255,-0.0912727651544626,0.6372469730464693,-0.15806255909448888,-1.2921218894780446,0.10328641778646325,0.6898456982638808,2.289137886644644,1.5970607366160163, +0.2439850941480152,-0.964378102915521,-0.6183011069997075,-1.4920650913682598,-0.6885869204040976,1.421032483819128,-0.2687688226809734,-0.264791263040289,0.36404382430095833,-1.390076701807508, +-0.5553916692558005,0.9240552239174681,0.5066380454731645,-1.0066230535286396,1.68192105204443,1.6767575117045055,0.2428619895593736,0.40854925121469327,-0.2814548881734565,0.798440401183689, +-1.0662295227716057,1.0426572715963751,-0.18845211344807988,-1.1841011719034582,0.8891475386864464,0.14949682453099572,-0.9662875010219775,1.1186193811009548,-0.6961429923624249,-0.6622803581432037, +-0.10056476375217774,0.8004703973637624,0.4244802503764249,0.1784509609626542,1.9274333855146484,-0.35881118081651375,-0.21450815072433194,1.2683031471561022,-0.391728163087714,-1.2062455373499037, +-0.30956197704813565,-0.11742131525657053,0.5019157142136683,0.13834387079597477,0.8890784289883015,-0.6086307455572603,-0.6692251684011726,0.988765204059065,-0.7309829925611591,0.8830860226614388, +0.25201284327187623,-1.6399711955334644,-0.26460634227499663,0.03141002812076171,-0.5473884654378105,1.6172279212920886,-0.9848059682529131,-0.15085619471577122,0.377351182439366,-1.7557400709511797, +0.35843378778577256,1.496092664417945,-0.8341698708377953,0.7764587277323864,-0.5768122865162595,-0.8262690449209876,0.04772170469740231,-0.08438315892412494,-0.4861619459991102,-1.8875001178069104, +-1.0410930565938787,-1.2020451102529903,-0.15110174261301434,0.5346085747438967,-0.08860607771498129,-0.4712980427586677,-0.3687316092810766,0.21252413873942116,-0.29117204987423545,-0.12445663740868486, +1.3616484166132734,0.8478283484413234,0.7928827098462297,1.181544836259245,-1.944201948905618,-0.3934059892720601,-0.15021272874088135,0.41823877182746305,1.186700585347594,1.6619316099499364, +1.4464261013387971,-0.5832776232060655,-1.0804631968612686,-0.8745076732850477,-1.1213037552340643,1.3064295454987762,-0.981017940912456,0.004500001850963446,0.5177841186069797,-0.22033504286396133, +-2.641401069894319,0.24841763456339525,0.30903081363187934,-0.7604310228192585,1.063467140720936,-0.1623969235596424,-0.3224412739687023,0.6522632650435297,1.3375978417410594,-0.7019221035834213, +0.3725383122636178,0.6715086987841308,0.8845983871100538,-0.9597487148353244,-0.676591303496502,1.0456879012291618,1.1745988764782913,0.10228225394825734,0.5379250225653073,2.2608391986731533, +-0.3737484732426458,-0.8389339330501493,-1.5139477138724424,1.6879238697823462,-0.5129560719998109,-0.801903447990331,2.170939452094135,1.0618978984013336,-2.0891180611202547,-0.20883925944734025, +0.6212116619332426,-0.3962369286354697,1.3415262515333521,-0.471762536424407,-1.3856034460106053,-0.09085036065111476,-2.1735320870691037,-0.06121381711701479,-2.487306087136384,-1.1197116124901276, +-0.9763572867381737,0.4568396232597235,-0.9233897132129959,-0.1307264234923046,2.409212099458706,2.007406553645663,-0.281862195051328,-0.5868632827726844,0.14367791113645995,0.8996279983080616, +-0.046419859695842496,0.5795842965353081,-1.6320260039025511,-0.36556244956898465,-0.8333270274707582,1.5263042272599954,-2.156829255767851,-0.1130081880414644,-0.4153975952873276,-0.08022209158044147, +2.470072634522648,-1.0132727525686789,-0.9438315981141341,0.4494055713154702,0.38866490616861554,-0.18771186755774513,-0.7502798732510992,-1.0550926884486564,0.6069810440430546,-0.791081778443559, +-0.5608863878174021,-0.44529076106413235,0.02499399833594764,-1.1086939473023842,1.1304659575259257,1.7017766312706495,-0.11586896014122029,0.39824119141914843,0.06500627636153891,2.1874976813287925, +0.4638074886757097,-1.1941209438439722,-1.5942086374937223,-0.3101824630163965,-0.5681838782504276,-1.2107134996660363,0.06177449595088415,-1.3955361022179409,-0.05235035556058406,-2.4706189143223347, +-0.597598743632381,0.6663773420159442,-0.2776629873293613,-0.5359424765483027,-0.30027146272981703,-0.1320490027487581,-0.3175209692702496,-0.8718449130030979,-0.7213780669096316,1.2858237544596307, +-0.7439818113550489,-0.20039781575594962,-0.10970185522313734,-0.9159624180187499,0.9624088803676083,0.4381073573733069,1.701201481703785,-1.8176071035752268,0.7030668152708489,0.22286954958129868, +-1.6238947611811239,0.03282080705322925,1.5213737140606225,0.3788767951190237,-0.2607752699454086,0.6216972689838227,-1.0811189378587158,0.11287028138885082,0.8762553045802186,-0.2975353405048947, +-0.8986445158201646,0.10025679344273611,-1.1292680265131274,-2.264692935753024,-0.6293977200527144,0.14366848766482693,-1.4745648885671987,0.040778613850494805,-0.04611815809402704,-0.7206474034889225, +-0.1868280865553794,-1.0698135792528105,1.0355564744730799,-1.52317893847487,2.1129384992656353,1.3170492567877767,-0.8510403865232451,-1.395246706785456,2.466628235071032,1.6957363288936458, +-0.41925182407784467,-0.21269126551492196,1.3691105400622479,0.10249554270090608,0.532845319523707,-0.49029485906525444,1.153608164212892,1.0908893198154797,0.18044031728823567,-0.7830755427728686, +0.05737617418543842,-0.29848102375581365,-2.7134711055038454,-0.055851578996482965,0.47698744914920727,-1.1455953910479435,-0.9717884866923544,1.461657518920883,2.365400227579913,0.03451223911655788, +0.30469929893899145,-0.04170313843465019,-0.6523480554910308,0.3674709636711896,-0.11628696503696649,0.673766833113013,0.2599502853275583,1.8944675985948225,-0.2145405169727119,0.7537904229524567, +-1.3777001999796166,-3.7564333068358264,0.5128501933015785,-0.42002914669733465,0.37476049006374934,0.48138022609673914,-1.023896339234406,0.5355807919990666,-0.8238568327137443,0.9004848375610387, +-1.0680274962334426,0.5286349407772359,-0.3084937244453122,-0.9960674788790194,0.52716283376019,0.6209021868626674,-0.17187743037612954,0.19318827853059115,-0.40699191053416134,1.9651126911031436, +0.3958287301004263,0.4833321801208889,1.4070741965274227,0.7021196040523723,0.36847934736878807,0.10583915671225835,0.09117880139185003,-0.6259651058454042,0.2858255625488106,-0.3735214270619142, +-0.5712914988398158,0.36404227494776903,0.7485041696473334,-0.5745467458043815,-0.3455431185958761,0.4781412632213539,-1.3662818714701908,-0.5064669148295144,-1.607094596355901,0.37007128426595637, +-1.108997164544087,0.3520034739912585,1.1554619277519136,1.093512587136951,1.4728805598512327,-0.2460827037194231,-1.0536942519808834,1.4858034371580646,0.4570078313166751,1.42503188705393, +1.5450585084815633,1.281954457700573,1.0940503849030279,1.6655613550961876,0.6930891904014453,-0.5396350209998049,-1.3315137514842592,0.34113707088106526,-0.7746081022569195,0.5271439538211644, +-0.9427454907833432,1.0951465770974544,-1.025273211652733,0.30139734351228575,0.5475767795189297,-1.4216248581655668,2.2411194729256945,0.6437861381898132,-1.0048536639663654,1.4906122453157982, +-1.886886872199838,1.5573655290238138,-1.1448345706106082,1.244284009352976,-0.1606254798584027,-0.371079012987277,-0.2483780297710399,-0.3196428157748758,-1.6359693425646569,-0.2922943067482743, +-0.9366657463146582,1.3972850320977979,1.2167501913180951,-1.0150043453876934,0.39541678086051035,0.9020029157652916,2.6731153905029674,2.511540126977003,1.7129024801218697,-0.8469653381482249, +0.7520518754350506,0.7126507547293423,2.472123224635769,0.8726346699260072,-0.9572304770729695,0.052334487121023066,1.8769054573315311,-0.19195274586763583,0.005232114849078027,-0.9740040604473592, +0.06477296222365062,-1.2261774447174418,-1.4775735781203534,-1.5814158079210827,0.14460686605536932,-0.5536737640275459,2.3782945850844768,0.16260381712280966,1.7524567026073261,1.4597423126993818, +1.2902713231471423,-0.26096878308512594,0.2218829091332136,-0.43084542283054394,1.8845971667306294,-1.024121870557962,1.3178336649624067,-0.5114536687155472,-0.7240779790973849,0.873514296063373, +0.5657994304045642,1.140562573751388,-1.94649374296141,-0.4297424283157214,-0.027314272082546973,0.8729856907220525,-0.08417012106270763,0.5731029339692499,0.49303642946967396,-0.5314239973270717, +0.5416120003362928,-0.3708854800050764,-0.9339193457266222,0.13125551913690028,2.794121133208902,-0.542331608193647,-1.3462866805622555,-0.5901293849390037,-1.4056443566216117,-0.9625538010290875, +-0.19423231266076896,-0.6046756510985729,0.9445181151033242,-1.264266109944261,0.6665487618607072,-0.1707693449447727,0.5064353420594645,0.5804136809437611,-1.2781964172503113,-0.09121600910074978, +0.5259851602598057,-0.9999647940455413,0.045501792647765466,1.6108024073058094,-0.7099993728357546,-2.0521583673699166,-0.3194812125127862,-0.35804927086256666,0.2560654153555399,0.6923840283788547, +0.9080648455023592,1.7439798202026864,-0.03596222146814517,0.39031531912462275,1.8524967423514822,0.1567340122227937,0.251357698698973,0.3629573944681061,-0.5143070033368221,-1.7545020162798128, +0.6980052348778156,-0.03354878714619265,0.8241776536734485,0.5169686907821317,0.03563668749598692,0.7005155961977627,0.5247740800340378,0.545940627890215,0.6281537192752242,0.8773729822072436, +1.1017193016517095,0.264586466125056,-0.6626470585006473,1.0681412020445873,-0.4801234222922361,-0.8422747185091037,-2.728574476529456,-0.6335545879338574,0.460685025002803,-1.042892177641355, +1.70666724667526,-0.046613776041511915,1.0474470927830521,1.321881966208811,-1.5936388870488145,0.7654587451518622,-1.0593333555082292,0.4802061136002552,0.7045631456972317,0.5033459957456066, +0.23027213502046526,-0.4885638204540596,1.107147674800936,-0.10566661799103907,1.720366865887645,-0.37886369581665114,-0.8454067035074944,-1.1930204354059115,0.5398081011617857,-1.5886955233028333, +-1.28468316797887,0.10631225424915554,-0.21743507805100407,0.9367045975903274,-0.7609861402573258,-3.092323444072416,1.231209748166853,-0.23187394256400698,-1.2469560268203923,1.208076388946217, +-0.8397568134193272,-1.5375529441456377,0.029799077510264397,-0.3736789039597027,-2.066641253786553,3.240887005516722,-0.05769306567610732,1.9086832344961902,0.542929735780981,0.600112322144006, +1.3255238919317112,0.8416556189317737,0.29760056005244595,-1.847664999407991,-0.6219029946150246,0.7244190554539209,0.3660236404172676,-0.12614547797267497,-0.6323009504236679,-0.8188220564770147, +0.9003995471963193,-0.48516398355978446,0.10241464662345479,-1.4128655982245777,1.7768233602651904,-0.5118850243706106,-1.5149852797721692,-0.8905442359927294,0.8784201027934798,0.1143290582404718, +0.5000688124994342,0.7551437869948294,-0.019542838561191753,0.04524905910571688,-1.1699661993732589,-0.5939555134309966,-0.17834310250840543,-1.3234348308829367,0.09267010823932921,-2.3547340368838303, +0.3268554327513394,-0.7335726654729563,0.21759928934661762,0.13214675960607083,-0.547936292967854,0.9959834168766571,-0.33470864667944666,-0.045645617309816235,1.2425088946003373,-1.4333690677958115, +-1.0711028594294019,1.192576419864347,-2.8754912578998977,-0.39821023347937784,-0.2491335698162935,-1.1705324894832216,0.41867328590025965,0.36026192293513215,-0.3627740814923084,0.2609025880296909, +0.8602418942767805,-0.39484342345351275,1.1564286015947274,-0.15319527678262124,0.7339230044385474,-0.6866384758190027,0.382332849483597,-0.6188216171333276,0.8485292055500263,0.5329946181284229, +2.2291125445019477,0.6383726818895612,0.41487468211795236,-0.8039549499822289,0.7241014941906438,-1.0029929408891316,0.4281472570443341,-0.6086602205861127,-0.26466088290247025,0.4193850744832341, +-0.14122887954075952,-0.23087308419582975,-0.6785620188263598,-0.5570001328802707,-0.881708596661445,1.0592627794219402,-0.6985109326310371,-0.5453232200563775,-1.1640808992338176,0.28936449767624084, +0.5121716488860183,0.0731039375078201,2.1099072390357647,0.21087608843941613,1.196236906994629,0.6955277402333694,0.07772140296361134,0.8636056014969951,-1.0604625788162605,-0.32774618095802316, +0.2899067916913363,-0.5478092166104231,0.4683528131601164,-0.3504491754229584,1.110858740158801,-0.1278113090766107,0.5535321089775727,-0.1832476069693799,-0.7767937147129045,1.172467705567801, +0.6787387322734854,-0.7983659754115815,1.0930424695443006,-0.3236631720667679,-2.7232322430813607,0.9822645814517907,-0.3388122511239234,-0.0745468798952108,-1.9398935763958451,1.495395733873871, +-0.3049284003150525,0.5762155389625143,-0.8014545567380862,0.43813435644118953,-0.649515405349544,0.4655365901416243,-0.9636018719008397,-1.121418956721191,1.1729797027809177,-0.8350500518873758, +-0.8982303932845838,-0.8086680446045895,-0.9461449538887682,-0.2598836002408984,-0.5785988177076079,0.20326327133599148,-1.2308987319585354,-1.4419810146356775,0.9474864151774555,3.48631560989601, +1.4711124036117162,1.5418282734505364,0.13319576195890426,-1.8187631402477524,0.4772932269476494,0.7334570427857772,-1.1121755399492772,-0.004713264795192562,0.5136900934765356,-0.06671663367422388, +-1.1087529037241166,0.9196832854230983,-0.8018186825531745,0.0834398072105409,-0.06753206082654808,2.124881528429754,0.2773601988907859,-0.7284874492913913,0.9948256085644661,-0.6791058920601839, +1.3349135498080262,-0.7524912571187148,0.7380844238794382,-1.1656943817612926,0.3206225827904893,-0.1831267861215156,0.05061840900884146,-0.3181440643219913,1.3063134827481973,-0.6813931502530758, +-0.48640824899968627,-0.07109896606960103,0.5261489681219074,-0.7640369561553254,0.031699078327729405,1.2529886172038756,-0.3952599316466706,0.986610467508573,-1.99761732971906,0.3261413341257214, +-2.0778234343245856,-1.4931113255032638,-0.0797915449011487,-0.6034387473666666,0.5976353460364947,1.2866210094110508,-1.544616131816012,0.8065303934129711,-0.8574966999730479,-0.0022235294694127436, +0.39994301299990564,-0.9520795501642406,0.1941469619795966,0.0172975823538266,-0.13631472888815918,-0.5003197803800974,1.2668454196912144,0.8039122974821515,0.864064866197114,1.4308742912014245, +0.92134097498333,-0.14704549317914145,0.7579358835960421,0.024918865319870132,0.055693025557895605,-1.5990535342544396,-1.0969649507363628,-0.7402526866494064,0.42571523519785665,-0.9798388174079765, +0.8305951286833008,-0.9019498850415598,-0.7965065040667071,-0.3907873847642248,-0.8844335198271561,1.454389637079338,0.6616627015591853,0.09695272131114362,0.6743875027958123,0.5933917814342952, +-0.9717744862631208,-0.9431698593655028,-0.29846553438764317,1.424151795858875,0.3854003474352652,1.2988467245669775,-1.3898248317542632,-0.1638076428484137,0.16410929143031822,0.3890887067065585, +-0.6939342447328704,2.5173125828542253,-1.8404672318790047,1.932407277327195,1.1703078950686847,-0.04714463999679679,1.551775033692893,0.4015046347422687,0.776940328391358,-0.964819829988641, +0.13192562547413492,0.7243727483529265,0.9352768421524931,0.8086653298274159,1.0020307331290483,0.9776937074569293,-1.9895444911018931,-1.3980324189612396,0.18637144051855228,0.07708731094329056, +-2.553513811777063,0.7597925836057883,-0.8205425029562394,0.9283505069827288,-1.2546437028418538,-0.9707197333164149,-2.1295095152075283,-1.1906615023194131,-0.35740999665135215,-1.9529831745970587, +0.37288750914678687,0.6495118654579308,1.6475506806707045,-0.9538922317104757,0.40949454447392414,0.45613303557228385,0.26856396953977574,0.2704513801582775,0.7331051752087633,-0.8918644896850195, +0.6198749810665084,0.9227888428118424,-1.2295989381350863,-2.191573248454373,-0.2468217908320692,0.5483283228571106,-0.42945745510780853,-0.024390490087061133,-1.6846814160280879,0.1568824987875178, +0.9009848768990908,3.1773363670757084,-0.3422260524807743,0.12665036626795098,-0.9758481100082796,-0.09519181920919106,-1.0363737179534958,0.5822354074948151,-0.858648184611898,-0.7096049638959103, +-1.831115481538127,0.641166662426857,0.22256753397537843,1.1064072995427758,0.21677324659244204,0.6143506126760513,1.583359133322576,-1.712007107078276,0.1952490718472408,1.0728858532196495, +1.9137903506828962,0.8483492011522299,-0.7050083425872399,0.7627090684562343,0.5839501172788512,-0.6346880581793283,0.3273538737067905,-1.8165730926663832,0.7483037158868688,-0.7199376478819941, +0.5825126796069984,-0.09028123346815849,-1.7043899292280125,-0.10655857024602128,1.048514068105894,0.5512846639677564,-1.4285455205621194,-0.8014781131837048,-1.55005982953192,0.45755182629285673, +-1.9954386592689473,1.096999118492096,1.3537249009276293,-0.05378587505397002,-1.1341101323284708,-1.3478825288411616,0.6929632298768442,0.20116792476432763,0.5439695944688538,-0.5908726753824236, +2.0550948018448105,0.8037495819665421,0.12256372638762443,-1.3255485961979336,0.418974643956128,0.6833560528158458,-1.0574720763879784,0.1354678131040079,0.954404685943061,-0.3481458597067693, +0.2888501658461412,-0.5185669511336548,-0.6528894762199016,-1.130809851108533,0.7058400176970674,-0.3852561058427417,0.9302772050674063,1.775228414811492,-0.2776773729936865,1.8900446722048272, +-0.13887112104161264,0.8852550980728048,0.5272807352340843,-0.3101254095506767,-0.6363826115593768,0.5448109463533937,0.14729214478335007,-0.5402455955915714,1.6084325663488106,-1.4438000534818287, +0.8255617958302222,0.9782787269950584,0.5404603418878953,0.9392940549181147,0.5441690578775725,0.8499567822755905,0.5148202882318623,1.233489399900732,0.8256027399290528,-0.35469261065743257, +0.8904244611281978,-0.1562930492174166,1.0274740513013503,0.019292771207817767,0.25383561614853967,0.2468442868952202,1.3074748275020605,-0.22183469522767973,0.09234881136683232,0.8328997310521157, diff --git a/examples/oneapi/dpc/source/pca/pca_cor_dense_batch.cpp b/examples/oneapi/dpc/source/pca/pca_cor_dense_batch.cpp index 31f7bdd7421..8278bbb8a12 100644 --- a/examples/oneapi/dpc/source/pca/pca_cor_dense_batch.cpp +++ b/examples/oneapi/dpc/source/pca/pca_cor_dense_batch.cpp @@ -28,32 +28,46 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - -void run(sycl::queue& q) { - const auto train_data_file_name = get_data_path("pca_normalized.csv"); - - const auto x_train = dal::read(q, dal::csv::data_source{ train_data_file_name }); - - const auto pca_desc = dal::pca::descriptor<>().set_component_count(5).set_deterministic(true); +namespace pca = dal::pca; +template +void run(sycl::queue& q, const dal::table& x_train, const std::string& method_name, bool whiten) { + const auto pca_desc = + pca::descriptor<>().set_component_count(5).set_deterministic(true).set_whiten(whiten); const auto result_train = dal::train(q, pca_desc, x_train); + std::cout << method_name << "\n" << std::endl; + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + std::cout << "Singular Values:\n" << result_train.get_singular_values() << std::endl; + + std::cout << "Variances:\n" << result_train.get_variances() << std::endl; + + std::cout << "Means:\n" << result_train.get_means() << std::endl; + + std::cout << "Explained variances ratio:\n" + << result_train.get_explained_variances_ratio() << std::endl; + const auto result_infer = dal::infer(q, pca_desc, result_train.get_model(), x_train); std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; } int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + for (auto d : list_devices()) { std::cout << "Running on " << d.get_platform().get_info() << ", " << d.get_info() << "\n" << std::endl; auto q = sycl::queue{ d }; - run(q); + run(q, x_train, "Training method: Correlation Whiten:false", false); + run(q, x_train, "Training method: Correlation Whiten:false", true); } return 0; } diff --git a/examples/oneapi/dpc/source/pca/pca_cor_dense_online.cpp b/examples/oneapi/dpc/source/pca/pca_cor_dense_online.cpp index 798cb6f8f54..c439e66c661 100644 --- a/examples/oneapi/dpc/source/pca/pca_cor_dense_online.cpp +++ b/examples/oneapi/dpc/source/pca/pca_cor_dense_online.cpp @@ -28,33 +28,49 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - -void run(sycl::queue& q) { - const auto train_data_file_name = get_data_path("pca_normalized.csv"); +namespace pca = dal::pca; +template +void run(sycl::queue& q, const dal::table& x_train, const std::string& method_name) { const std::int64_t nBlocks = 10; - const auto x_train = dal::read(q, dal::csv::data_source{ train_data_file_name }); - dal::pca::partial_train_result<> partial_result; - const auto pca_desc = dal::pca::descriptor<>().set_component_count(5).set_deterministic(true); + pca::partial_train_result<> partial_result; + const auto pca_desc = pca::descriptor<>().set_component_count(5).set_deterministic(true); auto input_table = split_table_by_rows(x_train, nBlocks); for (std::int64_t i = 0; i < nBlocks; i++) { - partial_result = dal::partial_train(pca_desc, partial_result, input_table[i]); + partial_result = dal::partial_train(q, pca_desc, partial_result, input_table[i]); } - auto result = dal::finalize_train(pca_desc, partial_result); + auto result_train = dal::finalize_train(q, pca_desc, partial_result); + + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + + std::cout << "Singular Values:\n" << result_train.get_singular_values() << std::endl; + + std::cout << "Variances:\n" << result_train.get_variances() << std::endl; - const auto result_infer = dal::infer(q, pca_desc, result.get_model(), x_train); + std::cout << "Means:\n" << result_train.get_means() << std::endl; + + std::cout << "Explained variances ratio:\n" + << result_train.get_explained_variances_ratio() << std::endl; + + const auto result_infer = dal::infer(q, pca_desc, result_train.get_model(), x_train); std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; } int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + for (auto d : list_devices()) { std::cout << "Running on " << d.get_platform().get_info() << ", " << d.get_info() << "\n" << std::endl; auto q = sycl::queue{ d }; - run(q); + run(q, x_train, "Training method: Online Correlation"); } return 0; } diff --git a/examples/oneapi/dpc/source/pca/pca_cov_dense_batch.cpp b/examples/oneapi/dpc/source/pca/pca_cov_dense_batch.cpp new file mode 100644 index 00000000000..7e6eb45aaec --- /dev/null +++ b/examples/oneapi/dpc/source/pca/pca_cov_dense_batch.cpp @@ -0,0 +1,74 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/pca.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +namespace pca = dal::pca; +template +void run(sycl::queue& q, const dal::table& x_train, const std::string& method_name, bool whiten) { + const auto pca_desc = pca::descriptor<>() + .set_component_count(5) + .set_deterministic(true) + .set_normalization_mode(pca::normalization::mean_center) + .set_whiten(whiten); + + const auto result_train = dal::train(q, pca_desc, x_train); + std::cout << method_name << "\n" << std::endl; + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + + std::cout << "Singular Values:\n" << result_train.get_singular_values() << std::endl; + + std::cout << "Variances:\n" << result_train.get_variances() << std::endl; + + std::cout << "Means:\n" << result_train.get_means() << std::endl; + + std::cout << "Explained variances ratio:\n" + << result_train.get_explained_variances_ratio() << std::endl; + + const auto result_infer = dal::infer(q, pca_desc, result_train.get_model(), x_train); + + std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; +} + +int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_non_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + + for (auto d : list_devices()) { + std::cout << "Running on " << d.get_platform().get_info() + << ", " << d.get_info() << "\n" + << std::endl; + auto q = sycl::queue{ d }; + run(q, x_train, "Training method: Covariance Whiten:false", false); + run(q, x_train, "Training method: Covariance Whiten:true", true); + } + return 0; +} diff --git a/examples/oneapi/dpc/source/pca/pca_cov_dense_online.cpp b/examples/oneapi/dpc/source/pca/pca_cov_dense_online.cpp new file mode 100644 index 00000000000..4e08ec91f14 --- /dev/null +++ b/examples/oneapi/dpc/source/pca/pca_cov_dense_online.cpp @@ -0,0 +1,81 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/pca.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +namespace pca = dal::pca; +template +void run(sycl::queue& q, const dal::table& x_train, const std::string& method_name, bool whiten) { + const std::int64_t nBlocks = 10; + + pca::partial_train_result<> partial_result; + const auto pca_desc = pca::descriptor<>() + .set_component_count(5) + .set_deterministic(true) + .set_normalization_mode(pca::normalization::mean_center) + .set_whiten(whiten); + auto input_table = split_table_by_rows(x_train, nBlocks); + + for (std::int64_t i = 0; i < nBlocks; i++) { + partial_result = dal::partial_train(q, pca_desc, partial_result, input_table[i]); + } + auto result_train = dal::finalize_train(q, pca_desc, partial_result); + std::cout << method_name << "\n" << std::endl; + std::cout << "Eigenvectors:\n" << result_train.get_eigenvectors() << std::endl; + + std::cout << "Eigenvalues:\n" << result_train.get_eigenvalues() << std::endl; + + std::cout << "Singular Values:\n" << result_train.get_singular_values() << std::endl; + + std::cout << "Variances:\n" << result_train.get_variances() << std::endl; + + std::cout << "Means:\n" << result_train.get_means() << std::endl; + + std::cout << "Explained variances ratio:\n" + << result_train.get_explained_variances_ratio() << std::endl; + + const auto result_infer = dal::infer(q, pca_desc, result_train.get_model(), x_train); + + std::cout << "Transformed data:\n" << result_infer.get_transformed_data() << std::endl; +} + +int main(int argc, char const* argv[]) { + const auto train_data_file_name = get_data_path("pca_non_normalized.csv"); + + const auto x_train = dal::read(dal::csv::data_source{ train_data_file_name }); + + for (auto d : list_devices()) { + std::cout << "Running on " << d.get_platform().get_info() + << ", " << d.get_info() << "\n" + << std::endl; + auto q = sycl::queue{ d }; + run(q, x_train, "Training method: Online Covariance Whiten:false", false); + run(q, x_train, "Training method: Online Covariance Whiten:true", true); + } + return 0; +} diff --git a/examples/oneapi/dpc/source/pca/pca_precomputed_cor_dense_batch.cpp b/examples/oneapi/dpc/source/pca/pca_precomputed_cor_dense_batch.cpp index 9acbc60f966..50414074e8c 100644 --- a/examples/oneapi/dpc/source/pca/pca_precomputed_cor_dense_batch.cpp +++ b/examples/oneapi/dpc/source/pca/pca_precomputed_cor_dense_batch.cpp @@ -28,15 +28,15 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - +namespace pca = dal::pca; void run(sycl::queue& q) { const auto train_data_file_name = get_data_path("precomputed_correlation.csv"); const auto x_train = dal::read(q, dal::csv::data_source{ train_data_file_name }); using float_t = float; - using method_t = dal::pca::method::precomputed; - using task_t = dal::pca::task::dim_reduction; - using descriptor_t = dal::pca::descriptor; + using method_t = pca::method::precomputed; + using task_t = pca::task::dim_reduction; + using descriptor_t = pca::descriptor; const auto pca_desc = descriptor_t().set_component_count(5).set_deterministic(true); const auto result_train = dal::train(q, pca_desc, x_train); diff --git a/examples/oneapi/dpc/source/pca/pca_precomputed_cov_dense_batch.cpp b/examples/oneapi/dpc/source/pca/pca_precomputed_cov_dense_batch.cpp index 95b22152571..acb4ab9c2a0 100644 --- a/examples/oneapi/dpc/source/pca/pca_precomputed_cov_dense_batch.cpp +++ b/examples/oneapi/dpc/source/pca/pca_precomputed_cov_dense_batch.cpp @@ -28,15 +28,15 @@ #include "example_util/utils.hpp" namespace dal = oneapi::dal; - +namespace pca = dal::pca; void run(sycl::queue& q) { const auto train_data_file_name = get_data_path("precomputed_covariance.csv"); const auto x_train = dal::read(q, dal::csv::data_source{ train_data_file_name }); using float_t = float; - using method_t = dal::pca::method::precomputed; - using task_t = dal::pca::task::dim_reduction; - using descriptor_t = dal::pca::descriptor; + using method_t = pca::method::precomputed; + using task_t = pca::task::dim_reduction; + using descriptor_t = pca::descriptor; const auto pca_desc = descriptor_t().set_component_count(5).set_deterministic(true); const auto result_train = dal::train(q, pca_desc, x_train);