Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4c1d308
Initial CMake support
niclimcy Mar 18, 2023
600793e
Utility function for determining CUDA devices
rickardp Apr 2, 2023
b6ee896
Add missing dependency
rickardp Apr 2, 2023
9031c9a
Add device enumeration and device parameterization fixtures to tests
rickardp Apr 2, 2023
509b854
Skip CUDA setup tests if no CUDA support
rickardp Apr 2, 2023
7c2875e
No specific default target. To avoid "make" failing when nvcc not ins…
rickardp Apr 2, 2023
70ac792
Made test portable
rickardp Apr 3, 2023
4ba7471
Made C++ code compile on ARM64
rickardp Apr 3, 2023
411e03a
Renamed pythoninterface.c to .cpp to silence compiler warning about t…
rickardp Apr 4, 2023
542f9e2
Add skip annotation to make test suite not fail on non-CUDA
rickardp Apr 4, 2023
4fd8696
Github action
rickardp Apr 4, 2023
ec8313c
Fixing build
rickardp Apr 9, 2023
34d4a16
Testing to enable Windows to see what happens
rickardp Apr 10, 2023
76ee595
Attempt at three stage pipeline to avoid burning worker minutes
rickardp Apr 10, 2023
418cc79
Merge remote-tracking branch 'cmake/main' into apple-silicon
rickardp Apr 15, 2023
6f88f41
C++-canonical and type-safe threading
rickardp Apr 15, 2023
240a3de
Merge branch 'main' into apple-silicon
rickardp Apr 15, 2023
e9fa158
Updated .gitignore
rickardp Apr 15, 2023
8fc1d8c
Pthread is no more, so we can remove this
rickardp Apr 15, 2023
b654de9
Reworked pipelines to better support cross platform builds
rickardp Apr 15, 2023
eb1fcbe
Fixing Cuda pipelines
rickardp Apr 16, 2023
ef62500
Backported some logic from PR #229
rickardp Apr 16, 2023
1ad81df
Added cusparse_dev module
rickardp Apr 16, 2023
914a518
Put MSVC options where they belong
rickardp Apr 16, 2023
e1713be
Fixed last copy step
rickardp Apr 16, 2023
e5ad20c
Fixed windows build
rickardp Apr 16, 2023
be5f9b6
Update python-package.yml
rickardp Apr 17, 2023
b91ef2a
Update python-package.yml
rickardp Apr 17, 2023
cb8d1f6
Add all extension types
rickardp Apr 18, 2023
67d72cb
test MPS header setup
rickardp Apr 22, 2023
5254caf
Before merge
rickardp Jun 7, 2023
2b77064
Merge branch 'main' into apple-silicon-merge2
rickardp Jun 7, 2023
2801948
Merge remote-tracking branch 'upstream/main' into apple-silicon-merge2
rickardp Dec 30, 2023
84a6b78
Post merge fix errors
rickardp Jan 2, 2024
d03d296
Merge remote-tracking branch 'origin/main' into apple-silicon-merge2
rickardp Jan 2, 2024
c861bd2
Removed deprecated Python versions
rickardp Jan 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
name: Python package

on:
push:
branches: [ "*" ]
pull_request:
branches: [ master ]
release:
types: [ published ]

jobs:

##
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
##
build-shared-libs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requres arm64 Windows agents
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
# Check out code
- uses: actions/checkout@v3
# On Linux we use CMake within Docker
- name: Setup cmake
uses: jwlawson/[email protected]
with:
cmake-version: '3.26.x'
- name: Add msbuild to PATH
uses: microsoft/[email protected]
if: ${{ startsWith(matrix.os, 'windows') }}
# Compile C++ code
- name: Build C++
shell: bash
run: |
set -ex
build_os=${{ matrix.os }}
build_arch=${{ matrix.arch }}
( git clone https://github.com/NVlabs/cub ./dependencies/cub; cd dependencies/cub; git checkout 1.11.0 )
if [ ${build_os:0:6} == ubuntu -a ${build_arch} == aarch64 ]; then
# Allow cross-compile om aarch64
sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
fi
if [ ${build_os:0:5} == macos -a ${build_arch} == aarch64 ]; then
cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DENABLE_CUDA=OFF -DENABLE_MPS=ON .
else
cmake -DENABLE_CUDA=OFF .
fi
if [ ${build_os:0:7} == windows ]; then
pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release"
else
make
fi
mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
- name: Upload build artifact
uses: actions/upload-artifact@v3
with:
name: shared_library
path: output/*
retention-days: 7
##
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
##
build-shared-libs-cuda:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
arch: [x86_64, aarch64]
cuda_version: ['12.1.0']
exclude:
- os: windows-latest # This probably requres arm64 Windows agents
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
# Check out code
- uses: actions/checkout@v3
# Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation)
- name: Set up Docker multiarch
if: startsWith(matrix.os, 'ubuntu')
uses: docker/setup-qemu-action@v2
# On Linux we use CMake within Docker
- name: Setup cmake
if: ${{ !startsWith(matrix.os, 'linux') }}
uses: jwlawson/[email protected]
with:
cmake-version: '3.26.x'
# Windows: We install Cuda on the agent (slow)
- uses: Jimver/[email protected]
if: startsWith(matrix.os, 'windows')
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda_version }}
method: 'local'
#sub-packages: '["nvcc","cudart","nvrtc_dev","cublas_dev","cusparse_dev","visual_studio_integration"]'
- name: Add msbuild to PATH
uses: microsoft/[email protected]
if: ${{ startsWith(matrix.os, 'windows') }}
# Compile C++ code
- name: Build C++
shell: bash
run: |
set -ex
build_os=${{ matrix.os }}
build_arch=${{ matrix.arch }}
( git clone https://github.com/NVlabs/cub ./dependencies/cub; cd dependencies/cub; git checkout 1.11.0 )
if [ ${build_os:0:6} == ubuntu ]; then
image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04
echo "Using image $image"
docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \
"apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
&& cmake -DENABLE_CUDA=ON . \
&& make"
else
cmake -DENABLE_CUDA=ON .
pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release"
fi
mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
- name: Upload build artifact
uses: actions/upload-artifact@v3
with:
name: shared_library
path: output/*
retention-days: 7
build-wheels:
needs:
- build-shared-libs
- build-shared-libs-cuda
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requres arm64 Windows agents
arch: aarch64
runs-on: ${{ matrix.os }}
steps:
# Check out code
- uses: actions/checkout@v3
# Download shared libraries
- name: Download build artifact
uses: actions/download-artifact@v3
with:
name: shared_library
path: output/
- name: Copy correct platform shared library
shell: bash
run: |
cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/
# Compile C++ code
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
#
- name: Install Python dependencies
shell: bash
run: |
pip install -r requirements.txt
# TODO: How to run CUDA tests on GitHub actions?
#- name: Run unit tests
# if: ${{ matrix.arch == 'x86_64' }} # Tests are too slow to run in emulation. Wait for real aarch64 agents
# run: |
# PYTHONPATH=. pytest --log-cli-level=DEBUG tests
- name: Build wheel
shell: bash
run: |
python setup.py bdist_wheel
- name: Upload build artifact
uses: actions/upload-artifact@v3
with:
name: bdist_wheel
path: dist/bitsandbytes-*.whl
retention-days: 7
publish:
needs: build-wheels
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Build dist
run: |
python setup.py sdist
- name: Download build artifact
uses: actions/download-artifact@v3
with:
name: bdist_wheel
path: dist/
- run: |
ls -lR dist/
- name: Publish to PyPi
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.pypi }}
22 changes: 20 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,26 @@
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
*.dll
*.dylib
*.o
*.obj
*.air
*.metallib

# CMake generated files
CMakeCache.txt
CMakeScripts/
cmake_install.cmake
Makefile
CMakeFiles/
*.sln
*.vcxproj*
*.xcodeproj/
bitsandbytes.dir/
Debug/
Release/

# Distribution / packaging
.Python
Expand Down Expand Up @@ -133,4 +150,5 @@ dmypy.json

dependencies
cuda_build
output/
.vscode/*
121 changes: 121 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
cmake_minimum_required(VERSION 3.22.1)

option(ENABLE_CUDA "Build for CUDA (Nvidia)" OFF)
option(ENABLE_MPS "Build for Metal Performance Shaders (Apple)" OFF)

if(ENABLE_CUDA)
if(APPLE)
message(FATAL_ERROR "CUDA is not supported on macOS" )
endif()
option(NO_CUBLASLT "Don't use CUBLAST" OFF)
if(NO_CUBLASLT)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72)
else()
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90)
endif()
endif()

if(ENABLE_CUDA)
message("Building CUDA support for ${CMAKE_CUDA_ARCHITECTURES}")
# Find CUDA tools if we are compiling with CUDA
find_package(CUDAToolkit REQUIRED)
if(NO_CUBLASLT)
set(LIBSUFFIX "cuda${CUDAToolkit_VERSION_MAJOR}${CUDAToolkit_VERSION_MINOR}_nocublaslt")
else()
set(LIBSUFFIX "cuda${CUDAToolkit_VERSION_MAJOR}${CUDAToolkit_VERSION_MINOR}")
endif()

project(bitsandbytes LANGUAGES CXX CUDA)
add_compile_definitions(BUILD_CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(GPU_SOURCES csrc/ops.cu csrc/kernels.cu)
elseif(ENABLE_MPS)
if(NOT APPLE)
message(FATAL_ERROR "MPS is only supported on macOS" )
endif()
message("Building MPS support")
set(LIBSUFFIX "mps")
project(bitsandbytes LANGUAGES CXX OBJCXX)
add_compile_definitions(BUILD_MPS)
set(METAL_SOURCES csrc/mps_kernels.metal)
file(MAKE_DIRECTORY "build")
add_custom_command(OUTPUT "bitsandbytes/bitsandbytes.metallib"
COMMAND xcrun metal -c -o "build/bitsandbytes.air" ${METAL_SOURCES}
COMMAND xcrun metallib "build/bitsandbytes.air" -o "bitsandbytes/bitsandbytes.metallib"
DEPENDS "${METAL_SOURCES}"
COMMENT "Compiling Metal kernels"
VERBATIM)
add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib")
set(GPU_SOURCES csrc/mps_ops.mm)
else()
message("Building with CPU only")
set(LIBSUFFIX "cpu")

project(bitsandbytes LANGUAGES CXX)
set(GPU_SOURCES)
endif()

if(APPLE)
set(CMAKE_OSX_DEPLOYMENT_TARGET 13.1)
endif()
set(CMAKE_CXX_STANDARD 14)
set(CXX_STANDARD_REQUIRED C++14)

if(WIN32)
# Mute warnings
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -diag-suppress=177")

# Enable fast math on VC++
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast")

# Export all symbols
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()

# Weird MSVC hacks
if(MSVC)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:msvcprtd /NODEFAULTLIB:MSVCRTD /NODEFAULTLIB:LIBCMT")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
endif()

# Add csrc files
add_library(bitsandbytes SHARED
${GPU_SOURCES}
csrc/common.cpp
csrc/cpu_ops.cpp
csrc/pythonInterface.cpp)

target_include_directories(bitsandbytes PUBLIC
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
${CMAKE_CURRENT_SOURCE_DIR}/csrc
${CMAKE_CURRENT_SOURCE_DIR}/include)

if(ENABLE_CUDA)
target_include_directories(bitsandbytes PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")

set_target_properties(
bitsandbytes
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON)

target_link_libraries(bitsandbytes CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cusparse)
endif()
if(ENABLE_MPS)
add_dependencies(bitsandbytes metallib)
target_link_libraries(bitsandbytes objc "-framework Foundation" "-framework Metal" "-framework MetalPerformanceShaders" "-framework MetalPerformanceShadersGraph")
endif()

set_target_properties(bitsandbytes PROPERTIES OUTPUT_NAME "bitsandbytes_${LIBSUFFIX}")
# Set the output name of the CUDA library
if(MSVC)
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_RELEASE bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_DEBUG bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE bitsandbytes)
set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG bitsandbytes)
endif()

set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY bitsandbytes)
Loading