Skip to content

Commit

Permalink
refactor: option to turn on sse2 optimization (#4666)
Browse files Browse the repository at this point in the history
* refactor: option to turn on sse2 optimization

* remove sse2 option

* add opt for std sqrt

* add opt to ci

* add to vw_core

* more ci defs

* add to more CIs

* ;

* asan preset

* add to valgrind + setup.py

* macOS

* fix wasm

* remove std from wheels

* comment

* macos version

* revert

---------

Co-authored-by: Alexey Taymanov <[email protected]>
  • Loading branch information
bassmang and ataymano authored Jun 20, 2024
1 parent f204897 commit e6ead08
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Install dependencies
run: brew install cmake boost flatbuffers ninja
- name: Configure
run: cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DWARNINGS=Off -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DVW_INSTALL=Off
run: cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DWARNINGS=Off -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DVW_INSTALL=Off -DSTD_INV_SQRT=ON
- name: Build
run: cmake --build build --target all
- name: Unit tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/valgrind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
submodules: recursive
- name: Build C++ VW binary
run: |
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DSTD_INV_SQRT=ON
cmake --build build
- name: Upload vw binary
uses: actions/upload-artifact@v2
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/vendor_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
-DWARNINGS=On
-DWARNING_AS_ERROR=On
-DVW_CXX_STANDARD=17
-DSTD_INV_SQRT=ON
- name: Build
run: cmake --build build
- name: Unit tests
Expand Down Expand Up @@ -85,6 +86,7 @@ jobs:
-DVW_ZLIB_SYS_DEP=Off
-DVW_BOOST_MATH_SYS_DEP=Off
-DVW_INSTALL=Off
-DSTD_INV_SQRT=ON
- name: Build
run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --config ${{ matrix.build_type }}
- name: Test run_tests.py
Expand Down Expand Up @@ -118,6 +120,7 @@ jobs:
-DVW_ZLIB_SYS_DEP=Off
-DVW_BOOST_MATH_SYS_DEP=Off
-DVW_INSTALL=Off
-DSTD_INV_SQRT=ON
- name: Build
run: cmake --build build
- name: Unit tests
Expand Down
2 changes: 1 addition & 1 deletion .scripts/linux/build-static-java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ cd build
# /usr/local/bin/gcc + g++ is 9.2.0 version
cmake -E env LDFLAGS="-Wl,--exclude-libs,ALL -static-libgcc -static-libstdc++" cmake .. -DCMAKE_BUILD_TYPE=Release -DWARNINGS=Off -DBUILD_JAVA=On -DBUILD_DOCS=Off -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On\
-DBUILD_PYTHON=Off -DSTATIC_LINK_VW_JAVA=On -DCMAKE_C_COMPILER=/usr/local/bin/gcc -DCMAKE_CXX_COMPILER=/usr/local/bin/g++ \
-DBUILD_TESTING=Off -DVW_ZLIB_SYS_DEP=Off -DBUILD_SHARED_LIBS=Off -DVW_BUILD_LAS_WITH_SIMD=Off
-DBUILD_TESTING=Off -DVW_ZLIB_SYS_DEP=Off -DBUILD_SHARED_LIBS=Off -DVW_BUILD_LAS_WITH_SIMD=Off -DSTD_INV_SQRT=ON
NUM_PROCESSORS=$(nproc)
make vw_jni -j ${NUM_PROCESSORS}
2 changes: 1 addition & 1 deletion .scripts/linux/build-with-coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_DIR=$SCRIPT_DIR/../../
cd $REPO_DIR

cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DVW_GCOV=ON -DWARNINGS=OFF -DBUILD_JAVA=Off -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DVW_GCOV=ON -DWARNINGS=OFF -DBUILD_JAVA=Off -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DSTD_INV_SQRT=ON
cmake --build build
2 changes: 1 addition & 1 deletion .scripts/linux/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ cd $REPO_DIR
# If parameter 1 is not supplied, it defaults to Release
BUILD_CONFIGURATION=${1:-Release}

cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${BUILD_CONFIGURATION} -DWARNINGS=Off -DWARNING_AS_ERROR=On -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_JAVA=On -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${BUILD_CONFIGURATION} -DWARNINGS=Off -DWARNING_AS_ERROR=On -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_JAVA=On -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DSTD_INV_SQRT=ON
cmake --build build --target all
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ option(VW_BUILD_VW_C_WRAPPER "Enable building the c_wrapper project" ON)
option(vw_BUILD_NET_CORE "Build .NET Core targets" OFF)
option(vw_BUILD_NET_FRAMEWORK "Build .NET Framework targets" OFF)
option(VW_BUILD_WASM "Add WASM target" OFF)
option(STD_INV_SQRT "Use standard library inverse square root" OFF)

if(VW_INSTALL AND NOT VW_ZLIB_SYS_DEP)
message(WARNING "Installing with a vendored version of zlib is not recommended. Use VW_ZLIB_SYS_DEP to use a system dependency or specify VW_INSTALL=OFF to silence this warning.")
Expand Down
8 changes: 8 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@
"VW_FEAT_CB_GRAPH_FEEDBACK": {
"type": "BOOL",
"value": "On"
},
"STD_INV_SQRT": {
"type": "BOOL",
"value": "On"
}
}
},
Expand Down Expand Up @@ -208,6 +212,10 @@
"VCPKG_TARGET_TRIPLET": {
"type": "STRING",
"value": "wasm32-emscripten"
},
"STD_INV_SQRT": {
"type": "BOOL",
"value": "On"
}
}
}
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def build_cmake(self, ext):
"-DBUILD_TESTING=Off",
"-DWARNINGS=Off",
"-DVW_FEAT_CB_GRAPH_FEEDBACK=On",
"-DSTD_INV_SQRT=On",
]

# This doesn't work as expected for Python3.6 and 3.7 on Windows.
Expand Down
4 changes: 4 additions & 0 deletions vowpalwabbit/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,10 @@ if (MSVC_IDE)
target_sources(vw_core PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/vw_types.natvis> )
endif()

if(STD_INV_SQRT)
target_compile_definitions(vw_core PUBLIC STD_INV_SQRT)
endif()

# Clang-cl on Windows has issues with our usage of SIMD types. Turn it off explicitly for Windows + clang-cl to mitigate.
# See issue #
if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
Expand Down
49 changes: 48 additions & 1 deletion vowpalwabbit/core/src/reductions/gd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,53 @@ void sync_weights(VW::workspace& all)
all.sd->contraction = 1.;
}

VW_WARNING_STATE_PUSH
VW_WARNING_DISABLE_UNUSED_FUNCTION
inline float quake_inv_sqrt(float x)
{
// Carmack/Quake/SGI fast method:
float xhalf = 0.5f * x;
static_assert(sizeof(int) == sizeof(float), "Floats and ints are converted between, they must be the same size.");
int i = reinterpret_cast<int&>(x); // store floating-point bits in integer
i = 0x5f3759d5 - (i >> 1); // initial guess for Newton's method
x = reinterpret_cast<float&>(i); // convert new bits into float
x = x * (1.5f - xhalf * x * x); // One round of Newton's method
return x;
}
VW_WARNING_STATE_POP

static inline float inv_sqrt(float x)
{
// Standard library used in CI because SSE2 path has floating point differences in github machines
#if defined(STD_INV_SQRT)
return 1.f / std::sqrt(x);
#endif
#if !defined(VW_NO_INLINE_SIMD)
# if defined(__ARM_NEON__)
// Propagate into vector
float32x2_t v1 = vdup_n_f32(x);
// Estimate
float32x2_t e1 = vrsqrte_f32(v1);
// N-R iteration 1
float32x2_t e2 = vmul_f32(e1, vrsqrts_f32(v1, vmul_f32(e1, e1)));
// N-R iteration 2
float32x2_t e3 = vmul_f32(e2, vrsqrts_f32(v1, vmul_f32(e2, e2)));
// Extract result
return vget_lane_f32(e3, 0);
# elif defined(__SSE2__)
__m128 eta = _mm_load_ss(&x);
eta = _mm_rsqrt_ss(eta);
_mm_store_ss(&x, eta);
# else
x = quake_inv_sqrt(x);
# endif
#else
x = quake_inv_sqrt(x);
#endif

return x;
}

VW_WARNING_STATE_PUSH
VW_WARNING_DISABLE_COND_CONST_EXPR
template <bool sqrt_rate, bool feature_mask_off, size_t adaptive, size_t normalized, size_t spare>
Expand Down Expand Up @@ -580,7 +627,7 @@ inline float compute_rate_decay(power_data& s, float& fw)
float rate_decay = 1.f;
if (adaptive)
{
if (sqrt_rate) { rate_decay = 1.0f / std::sqrt(w[adaptive]); }
if (sqrt_rate) { rate_decay = inv_sqrt(w[adaptive]); }
else { rate_decay = powf(w[adaptive], s.minus_power_t); }
}
if VW_STD17_CONSTEXPR (normalized != 0)
Expand Down

0 comments on commit e6ead08

Please sign in to comment.