Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 82 additions & 33 deletions src/include/clBLAS.h → src/include/ablas.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* ************************************************************************ */

/*! \file
* \brief clBLAS.h defines 'C' compatible callable functions and types that
* \brief ablas.h defines 'C' compatible callable functions and types that
* call into the library
* \details The minimum compiler versions the library should support
* ( These compilers have solid C++11 support):
Expand All @@ -25,30 +25,30 @@
*/

#pragma once
#ifndef _CL_BLAS_H_
#define _CL_BLAS_H_
#ifndef _ABLAS_H_
#define _ABLAS_H_

#include <stdbool.h>

/*!
* CMake-generated file to define export related preprocessor macros, including
* CLBLAS_EXPORT and CLBLAS_DEPRECATED
* ABLAS_EXPORT and ABLAS_DEPRECATED
*/
#include "clblas_export.h"
#include "ablas_export.h"

#ifdef __cplusplus
extern "C" {
#endif

#include "clBLAS-types.h"
#include "ablas_types.h"

/*! Define CLBLAS_USE_OPENCL to build library for OpenCL
/*! Define ABLAS_USE_OPENCL to build library for OpenCL
*/
#if defined( CLBLAS_USE_OPENCL )
#include "clBLAS-opencl.h"
#if defined( ABLAS_USE_OPENCL )
#include "ablas_opencl.h"
#else
// Boltzman headers to be included here
#include "clBLAS-hsa.h"
#include "ablas_hsa.h"
#endif

/*!
Expand All @@ -65,17 +65,17 @@ extern "C" {
/**@{*/

/*!
* \brief Enable/Disable asynchronous behavior for clBLAS
* \brief Enable/Disable asynchronous behavior for ablas
*
* \param[in] control A valid clsparseControl created with clblasCreateControl
* \param[in] control A valid clsparseControl created with ablasCreateControl
* \param[in] async True to enable immediate return, false to block execution until event completion
*
* \ingroup STATE-SINGLE
*
* \returns \b clblasSuccess
* \returns \b ablasSuccess
*/
CLBLAS_EXPORT clblasStatus
clblasEnableAsync( clblasControl control, bool async );
ABLAS_EXPORT ablas_status
ablas_enable_async( ablas_control control, bool async );
/**@}*/

/*!
Expand All @@ -92,31 +92,80 @@ CLBLAS_EXPORT clblasStatus
/**@{*/


/*! \brief Refactored clBLAS API
* \details These pointers are not denoting arrays. The batch processing is specified inside of these
* structs with batch_size, \f$ C \leftarrow \alpha \ast A \ast B + \beta \ast C \f$
/*! \brief Refactored ablas API
* \details Generic matrix-matrix multiplication. These pointers are not denoting arrays. The batch processing is specified inside of these
* structs with batch_size
* \f$ c \leftarrow \alpha o (a \ast b) + \beta o c \f$
*
* operator 'o' represent the entrywise (Hadamard) product.
* scalar o scalar
* scalar o vector
* scalar o matrix
* vector o vector
* vector o matrix
* matrix o matrix
*
* The general equation can be simplified by the terms being either ZERO or IDENTITY.
*
* GEMM (L3)
* alpha - scalar, vector or matrix
* a - matrix
* b - matrix
* beta - scalar, vector or matrix
* c - matrix
*
* GEMV (L2)
* alpha - scalar or vector
* a - matrix
* b - vector
* beta - scalar or vector
* c - vector
*
* AXPY (L1)
* alpha - scalar or vector
* a - vector
* b - IDENTITY
* beta - ZERO
* c - vector
*
* SDOT (L1)
* alpha - IDENTITY
* a - vector
* b - vector
* beta - ZERO
* c - scalar
*
* SCAL (L1)
* alpha - ZERO
* a - ZERO
* b - ZERO
* beta - scalar, vector or matrix
* c - scalar, vector or matrix
*
*
* \param[in] alpha Scalar value to be multiplied into the product of A * B
* \param[in] a Source matrix
* \param[in] b Source matrix
* \param[in] beta Scalar value to be multiplied into the matrix C on read
* \param[in,out] c Destination matrix
* \param[in] control clBLAS state object
* \param[in,out] control ablas state object
*/
CLBLAS_EXPORT clblasStatus
clblasGemm( const clblasScalar* alpha,
const clblasMatrix* a,
const clblasMatrix* b,
const clblasScalar* beta,
clblasMatrix* c,
clblasControl control );
ABLAS_EXPORT ablas_status
ablas_gemm(
const ablas_matrix *alpha,
const ablas_matrix *a,
const ablas_matrix *b,
const ablas_matrix *beta,
ablas_matrix *c,
ablas_control *control );
/**@}*/

// Example of older clBLAS API from v2.x.x
// CLBLAS_DEPRECATED clblasStatus
// clblasSgemm(
// clblasOrder order,
// clblasTranspose transA,
// clblasTranspose transB,
// Example of older ablas API from v2.x.x
// ABLAS_DEPRECATED ablasStatus
// ablasSgemm(
// ablasOrder order,
// ablasTranspose transA,
// ablasTranspose transB,
// size_t M,
// size_t N,
// size_t K,
Expand All @@ -141,4 +190,4 @@ CLBLAS_EXPORT clblasStatus
} // extern C
#endif

#endif // _CL_BLAS_H_
#endif // _ABLAS_H_
148 changes: 148 additions & 0 deletions src/include/ablas_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/* ************************************************************************
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************ */

/*! \file
* \brief aBLAS-types.h defines public types to be consummed by the library
* The types are agnostic to the underlying runtime used by the library
*/

#pragma once
#ifndef _ABLAS_TYPES_H_
#define _ABLAS_TYPES_H_

/*! \brief An enumeration to describe the precision of data pointed by a
* particular instance of a struct
* \remarks This impllies that aBLAS can support mixed precision operations
*/
typedef enum ablas_precision_ {
ablas_single_real,
ablas_double_real,
ablas_single_complex,
ablas_double_complex,
} ablas_precision;

/*! \brief Used by the Hermitian, symmetric and triangular matrix
* routines to specify whether the upper or lower triangle is being referenced.
*/
typedef enum ablas_uplo_ {
ablas_upper, /**< Upper triangle. */
ablas_lower /**< Lower triangle. */
} ablas_uplo;

/*! \brief It is used by the triangular matrix routines to specify whether the
* matrix is unit triangular.
*/
typedef enum ablas_diag_ {
ablas_unit, /**< Unit triangular. */
ablas_non_unit /**< Non-unit triangular. */
} ablas_diag;

/*! \brief Indicates the side matrix A is located relative to matrix B during multiplication. */
typedef enum ablas_side_ {
ablas_left, /**< Multiply general matrix by symmetric,
Hermitian or triangular matrix on the left. */
ablas_right /**< Multiply general matrix by symmetric,
Hermitian or triangular matrix on the right. */
} ablas_side;


/*! \brief Structure to encapsulate dense matrix/vector/scalar data to aBLAS API.
* \details Able to store multiple matrices (or vectors, scalars)
* to facilitate high-performance batched oprations;
* gracefully becomes a 'normal' matrix when num_matrices == 1.
* \verbatim
clBLAS V2: Given a column major matrix with M, N, lda, nontranspose
This matrix is represented in aBLAS as:
num_rows = M
num_cols = N
row_stride = 1
col_stride = ldX
num_matrices = 1
\endverbatim
* aBLAS API represents scalars as ablas_matrix with num_rows = 1 and num_cols = 1.
* aBLAS API represents vectors as ablas_matrix with num_rows = 1 or num_cols = 1.
*
* \note It is the users responsibility to allocate/deallocate buffers
* \note Traditional matrix fields not explicitely represented within this structure
* \li \b transpose layout
* \li \b row/column major layout
* \attention There has been significant debate about changing the matrix meta data below from host scalar values
* into batched scalar values by changing their types to ablasScalar. The advantage is that we
* could then process batched matricies of arbitrary row, column and stride values. The problem is
* that both host and device need access to this data, which would introduce mapping calls. The host needs
* the data to figure out how to form launch parameters, and the device needs access to be able to
* handle matrix tail cases properly. This may have reasonable performance on APU's, but the performance
* impact on discrete devices could be significant. For now, we keep the num_rows, num_cols and strides as a size_t on host
*/
typedef struct ablas_matrix_ {

/*! \brief Buffer that holds the matrix data.
* \details Polymorphic pointer for the library. If aBLAS is compiled with BUILD_CLVERSION < 200,
* value will be will be treated as a pointer allocated with clCreateBuffer(). If
* BUILD_CLVERSION >= 200 then this will be treated as a pointer allocated with clSVMalloc()
* For batched matrices, this buffer contains the packed values of all the matrices.
*/
void* data;

/*! Precision of the data.
*/
ablas_precision precision;

/*! \brief This offset is added to the cl_mem location on device to define beginning of the data in the cl_mem buffers
* \details Usually used to define the start a smaller submatrix in larger matrix allocation block.
* This same offset is applied to every matrix in a batch
*/
size_t offset;

/*! \brief Number of rows in each matrix.
* \details For batched matrices, this is a constant property of each 'matrix', where each matrix has the same number
* of rows
*/
size_t num_rows;

/*! \brief Number of columns in each matrix.
* \details For batched matrices, this is a constant property of each 'matrix', where each matrix has the same number
* of columns
*/
size_t num_cols;

/*! Number of matrices stored in the buffer; a single matrix would have num_matrices == 1.
* \pre num_matrices > 0
*/
size_t num_matrices;

/*! \brief Stride to consecutive rows in each matrix.
* \details ptr += row_stride would point to same column, same matrix, next row.
* For column-major matrix, row_stride = 1.
*/
size_t row_stride;

/*! \brief Stride to consecutive columns in each matrix.
* \details ptr += col_stride would point to same row, same matrix, next column.
* For row-major matrix, col_stride = 1.
*/
size_t col_stride;

/*! \bried Stride to consectutive matrices.
* \details ptr += matrix_stride would point to same row, same column, next matrix
* \pre row_major: batch_stride >= num_rows * row_stride
* \pre column_major: batch_stride >= num_cols * col_stride
*/
size_t matrix_stride;

} ablas_matrix;

#endif
Loading