Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Address CR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
larroy authored and Pedro Larroy committed Jan 15, 2019
1 parent bc21978 commit 7d77150
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 104 deletions.
104 changes: 51 additions & 53 deletions include/mxnet/mxfeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,65 +119,63 @@





namespace mxnet {
namespace features {
// Check compile flags such as CMakeLists.txt

/// Compile time features
enum : uint32_t {
// NVIDIA, CUDA
CUDA = 0,
CUDNN,
NCCL,
CUDA_RTC,
TENSORRT,

// CPU Features / optimizations
CPU_SSE,
CPU_SSE2,
CPU_SSE3,
CPU_SSE4_1,
CPU_SSE4_2,
CPU_SSE4A, // AMD extensions to SSE4
CPU_AVX,
CPU_AVX2,


// Multiprocessing / CPU / System
OPENMP,
SSE,
F16C,
JEMALLOC,

// Math libraries & BLAS
// Flavour of BLAS
BLAS_OPEN,
BLAS_ATLAS,
// Intel(R) Math Kernel Library
BLAS_MKL,
BLAS_APPLE,
// Other math libraries:
// Linear Algebra PACKage
LAPACK,
// Intel(R) Math Kernel Library for Deep Neural Networks
MKLDNN,

// Image processing
OPENCV,

// Misc
CAFFE,
PROFILER,
DIST_KVSTORE,
CXX14,
// Signal handler to print stack traces on exceptions
SIGNAL_HANDLER,
DEBUG,

// size indicator
MAX_FEATURES
// NVIDIA, CUDA
CUDA = 0,
CUDNN,
NCCL,
CUDA_RTC,
TENSORRT,

// CPU Features / optimizations
CPU_SSE,
CPU_SSE2,
CPU_SSE3,
CPU_SSE4_1,
CPU_SSE4_2,
CPU_SSE4A, // AMD extensions to SSE4
CPU_AVX,
CPU_AVX2,


// Multiprocessing / CPU / System
OPENMP,
SSE,
F16C,
JEMALLOC,

// Math libraries & BLAS
// Flavour of BLAS
BLAS_OPEN,
BLAS_ATLAS,
// Intel(R) Math Kernel Library
BLAS_MKL,
BLAS_APPLE,
// Other math libraries:
// Linear Algebra PACKage
LAPACK,
// Intel(R) Math Kernel Library for Deep Neural Networks
MKLDNN,

// Image processing
OPENCV,

// Misc
CAFFE,
PROFILER,
DIST_KVSTORE,
CXX14,
// Signal handler to print stack traces on exceptions
SIGNAL_HANDLER,
DEBUG,

// size indicator
MAX_FEATURES
};


Expand Down
2 changes: 1 addition & 1 deletion python/mxnet/mxfeatures.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Licensed to the Apache Software Foundation (ASF) under one
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
Expand Down
97 changes: 47 additions & 50 deletions src/mxfeatures.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/*
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand All @@ -25,90 +25,87 @@

#include "mxnet/mxfeatures.h"
#include <bitset>
#include "dmlc/logging.h"


namespace mxnet {
namespace features {


class FeatureSet {
public:
FeatureSet():
feature_bits() {
// GPU
feature_bits.set(CUDA, MXNET_USE_CUDA);
feature_bits.set(CUDNN, MXNET_USE_CUDNN);
feature_bits.set(NCCL, MXNET_USE_NCCL);
feature_bits.set(CUDA_RTC, MXNET_ENABLE_CUDA_RTC);
feature_bits.set(TENSORRT, MXNET_USE_TENSORRT);

// Check flags for example with gcc -msse3 -mavx2 -dM -E - < /dev/null | egrep "SSE|AVX"
FeatureSet() :
feature_bits() {
// GPU
feature_bits.set(CUDA, MXNET_USE_CUDA);
feature_bits.set(CUDNN, MXNET_USE_CUDNN);
feature_bits.set(NCCL, MXNET_USE_NCCL);
feature_bits.set(CUDA_RTC, MXNET_ENABLE_CUDA_RTC);
feature_bits.set(TENSORRT, MXNET_USE_TENSORRT);

// Check flags for example with gcc -msse3 -mavx2 -dM -E - < /dev/null | egrep "SSE|AVX"
#if __SSE__
feature_bits.set(CPU_SSE);
feature_bits.set(CPU_SSE);
#endif
#if __SSE2__
feature_bits.set(CPU_SSE2);
feature_bits.set(CPU_SSE2);
#endif
#if __SSE3__
feature_bits.set(CPU_SSE3);
feature_bits.set(CPU_SSE3);
#endif
#if __SSE4_1__
feature_bits.set(CPU_SSE4_1);
feature_bits.set(CPU_SSE4_1);
#endif
#if __SSE4_2__
feature_bits.set(CPU_SSE4_2);
feature_bits.set(CPU_SSE4_2);
#endif
#if __SSE4A__
feature_bits.set(CPU_SSE4A);
feature_bits.set(CPU_SSE4A);
#endif
#if __AVX__
feature_bits.set(CPU_AVX);
feature_bits.set(CPU_AVX);
#endif
#if __AVX2__
feature_bits.set(CPU_AVX2);
feature_bits.set(CPU_AVX2);
#endif

// CPU
feature_bits.set(OPENMP, MXNET_USE_OPENMP);
feature_bits.set(F16C, MXNET_USE_F16C);

// Math
feature_bits.set(BLAS_OPEN, MXNET_USE_BLAS_OPEN);
feature_bits.set(BLAS_ATLAS, MXNET_USE_BLAS_ATLAS);
feature_bits.set(BLAS_MKL, MXNET_USE_BLAS_MKL);
feature_bits.set(BLAS_APPLE, MXNET_USE_BLAS_APPLE);
feature_bits.set(LAPACK, MXNET_USE_LAPACK);
feature_bits.set(MKLDNN, MXNET_USE_MKLDNN);

// Image
feature_bits.set(OPENCV, MXNET_USE_OPENCV);

// Misc
feature_bits.set(CAFFE, MXNET_USE_CAFFE);
feature_bits.set(DIST_KVSTORE, MXNET_USE_DIST_KVSTORE);
feature_bits.set(SIGNAL_HANDLER, MXNET_USE_SIGNAL_HANDLER);
// CPU
feature_bits.set(OPENMP, MXNET_USE_OPENMP);
feature_bits.set(F16C, MXNET_USE_F16C);

// Math
feature_bits.set(BLAS_OPEN, MXNET_USE_BLAS_OPEN);
feature_bits.set(BLAS_ATLAS, MXNET_USE_BLAS_ATLAS);
feature_bits.set(BLAS_MKL, MXNET_USE_BLAS_MKL);
feature_bits.set(BLAS_APPLE, MXNET_USE_BLAS_APPLE);
feature_bits.set(LAPACK, MXNET_USE_LAPACK);
feature_bits.set(MKLDNN, MXNET_USE_MKLDNN);

// Image
feature_bits.set(OPENCV, MXNET_USE_OPENCV);

// Misc
feature_bits.set(CAFFE, MXNET_USE_CAFFE);
feature_bits.set(DIST_KVSTORE, MXNET_USE_DIST_KVSTORE);
feature_bits.set(SIGNAL_HANDLER, MXNET_USE_SIGNAL_HANDLER);
#ifndef NDEBUG
feature_bits.set(DEBUG);
feature_bits.set(DEBUG);
#endif

#if USE_JEMALLOC == 1
feature_bits.set(JEMALLOC);
feature_bits.set(JEMALLOC);
#endif
}
bool is_enabled(const unsigned feat) const {
CHECK_LT(feat, MAX_FEATURES);
return feature_bits.test(feat);
}
}
bool is_enabled(const unsigned feat) const {
CHECK_LT(feat, MAX_FEATURES);
return feature_bits.test(feat);
}

private:
std::bitset<MAX_FEATURES> feature_bits;
std::bitset<MAX_FEATURES> feature_bits;
};

static FeatureSet featureSet;

bool is_enabled(const unsigned feat) {
return featureSet.is_enabled(feat);
return featureSet.is_enabled(feat);
}

} // namespace features
Expand Down

0 comments on commit 7d77150

Please sign in to comment.