From a90d41f71710d28e09fe2ed9179d2f52c43cbb95 Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Tue, 29 Jul 2025 17:11:02 -0700 Subject: [PATCH 01/10] Bump version and simplify patches --- .azure-pipelines/azure-pipelines-linux.yml | 30 +- .azure-pipelines/azure-pipelines-osx.yml | 2 +- .azure-pipelines/azure-pipelines-win.yml | 13 +- ..._version13fortran_compiler_version13.yaml} | 2 - ..._version14fortran_compiler_version14.yaml} | 8 +- ...r_version11fortran_compiler_version11.yaml | 46 -- ..._version13fortran_compiler_version13.yaml} | 2 - ..._version14fortran_compiler_version14.yaml} | 8 +- ..._version12fortran_compiler_version12.yaml} | 2 - ..._version14fortran_compiler_version14.yaml} | 8 +- .ci_support/osx_64_.yaml | 6 +- .ci_support/osx_arm64_.yaml | 6 +- ... => win_64_cuda_compiler_version12.6.yaml} | 5 +- ... => win_64_cuda_compiler_versionNone.yaml} | 7 +- ...compilernvcccuda_compiler_version11.8.yaml | 35 -- .gitattributes | 1 + .scripts/build_steps.sh | 2 +- .scripts/run_docker_build.sh | 2 +- .scripts/run_osx_build.sh | 2 +- .scripts/run_win_build.bat | 2 +- README.md | 46 +- build-locally.py | 8 +- recipe/meta.yaml | 30 +- .../patches/0001-Support-openfst-1.7.6.patch | 128 ---- .../patches/0002-Support-openfst-1.8.0.patch | 50 -- .../patches/0003-Support-openfst-1.8.1.patch | 107 ---- .../patches/0004-Support-openfst-1.8.2.patch | 271 --------- .../0005-Shared-libraries-on-windows.patch | 551 ++++-------------- 28 files changed, 195 insertions(+), 1185 deletions(-) rename .ci_support/{linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml => linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml} (94%) rename .ci_support/{linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml => linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml} (91%) delete mode 100644 .ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11.yaml rename .ci_support/{linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml => linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml} (94%) rename .ci_support/{linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml => linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml} (91%) rename .ci_support/{linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml => linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml} (94%) rename .ci_support/{linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml => linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml} (91%) rename .ci_support/{win_64_cuda_compilercuda-nvcccuda_compiler_version12.6.yaml => win_64_cuda_compiler_version12.6.yaml} (85%) rename .ci_support/{win_64_cuda_compilerNonecuda_compiler_versionNone.yaml => win_64_cuda_compiler_versionNone.yaml} (84%) delete mode 100644 .ci_support/win_64_cuda_compilernvcccuda_compiler_version11.8.yaml delete mode 100644 recipe/patches/0001-Support-openfst-1.7.6.patch delete mode 100644 recipe/patches/0002-Support-openfst-1.8.0.patch delete mode 100644 recipe/patches/0003-Support-openfst-1.8.1.patch delete mode 100644 recipe/patches/0004-Support-openfst-1.8.2.patch diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml index 6ea082b..b52fcea 100755 --- a/.azure-pipelines/azure-pipelines-linux.yml +++ b/.azure-pipelines/azure-pipelines-linux.yml @@ -8,32 +8,28 @@ jobs: vmImage: ubuntu-latest strategy: matrix: - linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13: - CONFIG: linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13 + linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13: + CONFIG: linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13 UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13: - CONFIG: linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13 + linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14: + CONFIG: linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14 UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11: - CONFIG: linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11 - UPLOAD_PACKAGES: 'True' - DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64-cuda11.8:ubi8 - linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13: - CONFIG: linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13 + linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13: + CONFIG: linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13 UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13: - CONFIG: linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13 + linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14: + CONFIG: linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14 UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13: - CONFIG: linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13 + linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12: + CONFIG: linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12 UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 - linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12: - CONFIG: linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12 + linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14: + CONFIG: linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14 UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-x86_64:alma9 timeoutInMinutes: 360 @@ -86,4 +82,4 @@ jobs: env: BINSTAR_TOKEN: $(BINSTAR_TOKEN) FEEDSTOCK_TOKEN: $(FEEDSTOCK_TOKEN) - STAGING_BINSTAR_TOKEN: $(STAGING_BINSTAR_TOKEN) \ No newline at end of file + STAGING_BINSTAR_TOKEN: $(STAGING_BINSTAR_TOKEN) diff --git a/.azure-pipelines/azure-pipelines-osx.yml b/.azure-pipelines/azure-pipelines-osx.yml index ac0bf38..cde7700 100755 --- a/.azure-pipelines/azure-pipelines-osx.yml +++ b/.azure-pipelines/azure-pipelines-osx.yml @@ -37,4 +37,4 @@ jobs: env: BINSTAR_TOKEN: $(BINSTAR_TOKEN) FEEDSTOCK_TOKEN: $(FEEDSTOCK_TOKEN) - STAGING_BINSTAR_TOKEN: $(STAGING_BINSTAR_TOKEN) \ No newline at end of file + STAGING_BINSTAR_TOKEN: $(STAGING_BINSTAR_TOKEN) diff --git a/.azure-pipelines/azure-pipelines-win.yml b/.azure-pipelines/azure-pipelines-win.yml index d42d3d6..6476a21 100755 --- a/.azure-pipelines/azure-pipelines-win.yml +++ b/.azure-pipelines/azure-pipelines-win.yml @@ -8,14 +8,11 @@ jobs: vmImage: windows-2022 strategy: matrix: - win_64_cuda_compilerNonecuda_compiler_versionNone: - CONFIG: win_64_cuda_compilerNonecuda_compiler_versionNone + win_64_cuda_compiler_version12.6: + CONFIG: win_64_cuda_compiler_version12.6 UPLOAD_PACKAGES: 'True' - win_64_cuda_compilercuda-nvcccuda_compiler_version12.6: - CONFIG: win_64_cuda_compilercuda-nvcccuda_compiler_version12.6 - UPLOAD_PACKAGES: 'True' - win_64_cuda_compilernvcccuda_compiler_version11.8: - CONFIG: win_64_cuda_compilernvcccuda_compiler_version11.8 + win_64_cuda_compiler_versionNone: + CONFIG: win_64_cuda_compiler_versionNone UPLOAD_PACKAGES: 'True' timeoutInMinutes: 360 variables: @@ -41,4 +38,4 @@ jobs: UPLOAD_TEMP: $(UPLOAD_TEMP) BINSTAR_TOKEN: $(BINSTAR_TOKEN) FEEDSTOCK_TOKEN: $(FEEDSTOCK_TOKEN) - STAGING_BINSTAR_TOKEN: $(STAGING_BINSTAR_TOKEN) \ No newline at end of file + STAGING_BINSTAR_TOKEN: $(STAGING_BINSTAR_TOKEN) diff --git a/.ci_support/linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml b/.ci_support/linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml similarity index 94% rename from .ci_support/linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml rename to .ci_support/linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml index 5d57254..89ef0ce 100644 --- a/.ci_support/linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml +++ b/.ci_support/linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml @@ -39,8 +39,6 @@ target_platform: zip_keys: - - cxx_compiler_version - fortran_compiler_version - - cuda_compiler - cuda_compiler_version - - docker_image zlib: - '1' diff --git a/.ci_support/linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml b/.ci_support/linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml similarity index 91% rename from .ci_support/linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml rename to .ci_support/linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml index df04c5a..03355d4 100644 --- a/.ci_support/linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml +++ b/.ci_support/linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml @@ -9,19 +9,19 @@ channel_sources: channel_targets: - conda-forge main cuda_compiler: -- None +- cuda-nvcc cuda_compiler_version: - None cxx_compiler: - gxx cxx_compiler_version: -- '13' +- '14' docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 fortran_compiler: - gfortran fortran_compiler_version: -- '13' +- '14' icu: - '75' libblas: @@ -39,8 +39,6 @@ target_platform: zip_keys: - - cxx_compiler_version - fortran_compiler_version - - cuda_compiler - cuda_compiler_version - - docker_image zlib: - '1' diff --git a/.ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11.yaml b/.ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11.yaml deleted file mode 100644 index 3b26e09..0000000 --- a/.ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11.yaml +++ /dev/null @@ -1,46 +0,0 @@ -c_stdlib: -- sysroot -c_stdlib_version: -- '2.17' -cdt_name: -- conda -channel_sources: -- conda-forge -channel_targets: -- conda-forge main -cuda_compiler: -- nvcc -cuda_compiler_version: -- '11.8' -cxx_compiler: -- gxx -cxx_compiler_version: -- '11' -docker_image: -- quay.io/condaforge/linux-anvil-x86_64-cuda11.8:ubi8 -fortran_compiler: -- gfortran -fortran_compiler_version: -- '11' -icu: -- '75' -libblas: -- 3.9.* *netlib -libcblas: -- 3.9.* *netlib -liblapack: -- 3.9.* *netlib -liblapacke: -- 3.9.* *netlib -magma: -- '2.9' -target_platform: -- linux-64 -zip_keys: -- - cxx_compiler_version - - fortran_compiler_version - - cuda_compiler - - cuda_compiler_version - - docker_image -zlib: -- '1' diff --git a/.ci_support/linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml similarity index 94% rename from .ci_support/linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml rename to .ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml index a7c75bd..8703a6c 100644 --- a/.ci_support/linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml +++ b/.ci_support/linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13.yaml @@ -39,8 +39,6 @@ target_platform: zip_keys: - - cxx_compiler_version - fortran_compiler_version - - cuda_compiler - cuda_compiler_version - - docker_image zlib: - '1' diff --git a/.ci_support/linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml similarity index 91% rename from .ci_support/linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml rename to .ci_support/linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml index e036d79..aa08f6b 100644 --- a/.ci_support/linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml +++ b/.ci_support/linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml @@ -9,19 +9,19 @@ channel_sources: channel_targets: - conda-forge main cuda_compiler: -- None +- cuda-nvcc cuda_compiler_version: - None cxx_compiler: - gxx cxx_compiler_version: -- '13' +- '14' docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 fortran_compiler: - gfortran fortran_compiler_version: -- '13' +- '14' icu: - '75' libblas: @@ -39,8 +39,6 @@ target_platform: zip_keys: - - cxx_compiler_version - fortran_compiler_version - - cuda_compiler - cuda_compiler_version - - docker_image zlib: - '1' diff --git a/.ci_support/linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml b/.ci_support/linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml similarity index 94% rename from .ci_support/linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml rename to .ci_support/linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml index 2509628..6b68ea5 100644 --- a/.ci_support/linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml +++ b/.ci_support/linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12.yaml @@ -39,8 +39,6 @@ target_platform: zip_keys: - - cxx_compiler_version - fortran_compiler_version - - cuda_compiler - cuda_compiler_version - - docker_image zlib: - '1' diff --git a/.ci_support/linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml b/.ci_support/linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml similarity index 91% rename from .ci_support/linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml rename to .ci_support/linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml index 6602190..3006538 100644 --- a/.ci_support/linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13.yaml +++ b/.ci_support/linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14.yaml @@ -9,19 +9,19 @@ channel_sources: channel_targets: - conda-forge main cuda_compiler: -- None +- cuda-nvcc cuda_compiler_version: - None cxx_compiler: - gxx cxx_compiler_version: -- '13' +- '14' docker_image: - quay.io/condaforge/linux-anvil-x86_64:alma9 fortran_compiler: - gfortran fortran_compiler_version: -- '13' +- '14' icu: - '75' libblas: @@ -39,8 +39,6 @@ target_platform: zip_keys: - - cxx_compiler_version - fortran_compiler_version - - cuda_compiler - cuda_compiler_version - - docker_image zlib: - '1' diff --git a/.ci_support/osx_64_.yaml b/.ci_support/osx_64_.yaml index dbc6677..c6d645a 100644 --- a/.ci_support/osx_64_.yaml +++ b/.ci_support/osx_64_.yaml @@ -11,17 +11,17 @@ channel_sources: channel_targets: - conda-forge main cuda_compiler: -- None +- cuda-nvcc cuda_compiler_version: - None cxx_compiler: - clangxx cxx_compiler_version: -- '18' +- '19' fortran_compiler: - gfortran fortran_compiler_version: -- '13' +- '14' icu: - '75' libblas: diff --git a/.ci_support/osx_arm64_.yaml b/.ci_support/osx_arm64_.yaml index 4f841e8..39f3d3d 100644 --- a/.ci_support/osx_arm64_.yaml +++ b/.ci_support/osx_arm64_.yaml @@ -11,17 +11,17 @@ channel_sources: channel_targets: - conda-forge main cuda_compiler: -- None +- cuda-nvcc cuda_compiler_version: - None cxx_compiler: - clangxx cxx_compiler_version: -- '18' +- '19' fortran_compiler: - gfortran fortran_compiler_version: -- '13' +- '14' icu: - '75' libblas: diff --git a/.ci_support/win_64_cuda_compilercuda-nvcccuda_compiler_version12.6.yaml b/.ci_support/win_64_cuda_compiler_version12.6.yaml similarity index 85% rename from .ci_support/win_64_cuda_compilercuda-nvcccuda_compiler_version12.6.yaml rename to .ci_support/win_64_cuda_compiler_version12.6.yaml index 85946c8..63916ed 100644 --- a/.ci_support/win_64_cuda_compilercuda-nvcccuda_compiler_version12.6.yaml +++ b/.ci_support/win_64_cuda_compiler_version12.6.yaml @@ -9,7 +9,7 @@ cuda_compiler: cuda_compiler_version: - '12.6' cxx_compiler: -- vs2019 +- vs2022 fortran_compiler: - flang fortran_compiler_version: @@ -28,8 +28,5 @@ magma: - '2.9' target_platform: - win-64 -zip_keys: -- - cuda_compiler - - cuda_compiler_version zlib: - '1' diff --git a/.ci_support/win_64_cuda_compilerNonecuda_compiler_versionNone.yaml b/.ci_support/win_64_cuda_compiler_versionNone.yaml similarity index 84% rename from .ci_support/win_64_cuda_compilerNonecuda_compiler_versionNone.yaml rename to .ci_support/win_64_cuda_compiler_versionNone.yaml index 14dd2b7..ce06064 100644 --- a/.ci_support/win_64_cuda_compilerNonecuda_compiler_versionNone.yaml +++ b/.ci_support/win_64_cuda_compiler_versionNone.yaml @@ -5,11 +5,11 @@ channel_sources: channel_targets: - conda-forge main cuda_compiler: -- None +- cuda-nvcc cuda_compiler_version: - None cxx_compiler: -- vs2019 +- vs2022 fortran_compiler: - flang fortran_compiler_version: @@ -28,8 +28,5 @@ magma: - '2.9' target_platform: - win-64 -zip_keys: -- - cuda_compiler - - cuda_compiler_version zlib: - '1' diff --git a/.ci_support/win_64_cuda_compilernvcccuda_compiler_version11.8.yaml b/.ci_support/win_64_cuda_compilernvcccuda_compiler_version11.8.yaml deleted file mode 100644 index cdd9cb1..0000000 --- a/.ci_support/win_64_cuda_compilernvcccuda_compiler_version11.8.yaml +++ /dev/null @@ -1,35 +0,0 @@ -c_stdlib: -- vs -channel_sources: -- conda-forge -channel_targets: -- conda-forge main -cuda_compiler: -- nvcc -cuda_compiler_version: -- '11.8' -cxx_compiler: -- vs2019 -fortran_compiler: -- flang -fortran_compiler_version: -- '19' -icu: -- '75' -libblas: -- 3.9.* *netlib -libcblas: -- 3.9.* *netlib -liblapack: -- 3.9.* *netlib -liblapacke: -- 3.9.* *netlib -magma: -- '2.9' -target_platform: -- win-64 -zip_keys: -- - cuda_compiler - - cuda_compiler_version -zlib: -- '1' diff --git a/.gitattributes b/.gitattributes index 18f114a..85ccb8f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -24,4 +24,5 @@ bld.bat text eol=crlf /README.md linguist-generated=true azure-pipelines.yml linguist-generated=true build-locally.py linguist-generated=true +pixi.toml linguist-generated=true shippable.yml linguist-generated=true diff --git a/.scripts/build_steps.sh b/.scripts/build_steps.sh index 827a29f..780a358 100755 --- a/.scripts/build_steps.sh +++ b/.scripts/build_steps.sh @@ -97,4 +97,4 @@ fi ( startgroup "Final checks" ) 2> /dev/null -touch "${FEEDSTOCK_ROOT}/build_artifacts/conda-forge-build-done-${CONFIG}" \ No newline at end of file +touch "${FEEDSTOCK_ROOT}/build_artifacts/conda-forge-build-done-${CONFIG}" diff --git a/.scripts/run_docker_build.sh b/.scripts/run_docker_build.sh index 6ee04aa..b63b5a0 100755 --- a/.scripts/run_docker_build.sh +++ b/.scripts/run_docker_build.sh @@ -111,4 +111,4 @@ docker run ${DOCKER_RUN_ARGS} \ test -f "$DONE_CANARY" # This closes the last group opened in `build_steps.sh` -( endgroup "Final checks" ) 2> /dev/null \ No newline at end of file +( endgroup "Final checks" ) 2> /dev/null diff --git a/.scripts/run_osx_build.sh b/.scripts/run_osx_build.sh index 0e3c394..361edeb 100755 --- a/.scripts/run_osx_build.sh +++ b/.scripts/run_osx_build.sh @@ -117,4 +117,4 @@ else fi ( endgroup "Uploading packages" ) 2> /dev/null -fi \ No newline at end of file +fi diff --git a/.scripts/run_win_build.bat b/.scripts/run_win_build.bat index 297498b..27c552b 100755 --- a/.scripts/run_win_build.bat +++ b/.scripts/run_win_build.bat @@ -148,4 +148,4 @@ if /i "%CI%" == "azure" ( echo ##[endgroup] exit /b ) -exit /b \ No newline at end of file +exit /b diff --git a/README.md b/README.md index 0fda935..c6141c3 100644 --- a/README.md +++ b/README.md @@ -34,52 +34,45 @@ Current build status - + - + - + - + - + - + - - - @@ -97,24 +90,17 @@ Current build status - - - - + - + diff --git a/build-locally.py b/build-locally.py index 02c2374..9dfe440 100755 --- a/build-locally.py +++ b/build-locally.py @@ -106,9 +106,7 @@ def main(args=None): action="store_true", help="Setup debug environment using `conda debug`", ) - p.add_argument( - "--output-id", help="If running debug, specify the output to setup." - ) + p.add_argument("--output-id", help="If running debug, specify the output to setup.") ns = p.parse_args(args=args) verify_config(ns) @@ -124,9 +122,7 @@ def main(args=None): elif ns.config.startswith("win"): run_win_build(ns) finally: - recipe_license_file = os.path.join( - "recipe", "recipe-scripts-license.txt" - ) + recipe_license_file = os.path.join("recipe", "recipe-scripts-license.txt") if os.path.exists(recipe_license_file): os.remove(recipe_license_file) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index ccc1925..830d0b2 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,8 +1,9 @@ -{% set version = "5.5.1112" %} +{% set version = "5.5.1162" %} # minor/major is https://github.com/kaldi-asr/kaldi/blob/master/src/.version; # patch version is the number of commits since then, see # https://github.com/kaldi-asr/kaldi/blob/master/cmake/VersionHelper.cmake -{% set commit = "8c451e28582f5d91f84ea3d64bb76c794c3b1683" %} +{% set commit = "f4007661023b98b8081fd875029f0dee62242fd1" %} +{% set cuda_compiler_version = "None" if cuda_compiler_version is undefined %} {% set kaldi_proc_type = "cuda" if cuda_compiler_version != "None" else "cpu" %} {% set kaldi_libraries = [ @@ -40,21 +41,20 @@ package: source: url: https://github.com/kaldi-asr/kaldi/archive/{{ commit }}.tar.gz - sha256: ca169b55f21c7a8420b023ad94b17bfcc9bb0c025bacf2ab3330171ab33d7e08 + sha256: 139de58f1abbf727fee65e709a8fcc6d8714d5e5596a7eb15491faef1ac73304 patches: # Patches based on https://github.com/kaldi-asr/kaldi/pull/4829 - - patches/0001-Support-openfst-1.7.6.patch - - patches/0002-Support-openfst-1.8.0.patch - - patches/0003-Support-openfst-1.8.1.patch - - patches/0004-Support-openfst-1.8.2.patch + #- patches/0001-Support-openfst-1.7.6.patch + #- patches/0002-Support-openfst-1.8.0.patch + #- patches/0003-Support-openfst-1.8.1.patch + #- patches/0004-Support-openfst-1.8.2.patch - patches/0005-Shared-libraries-on-windows.patch - - patches/0006-Cuda-12-support.patch # [(cuda_compiler_version or "").startswith("12")] - # backport https://github.com/kaldi-asr/kaldi/pull/4917 - - patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch + #- patches/0006-Cuda-12-support.patch # [(cuda_compiler_version or "").startswith("12")] + ## backport https://github.com/kaldi-asr/kaldi/pull/4917 + #- patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch build: - number: 9 - skip: true # [cuda_compiler_version == "11.8" and aarch64] - skip: true # [cuda_compiler_version == "11.8" and ppc64le] + number: 0 + skip: true # [cuda_compiler_version == "11.8" and (aarch64 or ppc64le)] requirements: build: @@ -106,7 +106,7 @@ outputs: - nomkl - pthreads-win32 # [win] - zlib - - openfst ==1.8.3 + - openfst ==1.8.4 - dlfcn-win32 # [win] run: - libblas @@ -114,7 +114,7 @@ outputs: - liblapack - liblapacke - magma # [cuda_compiler_version != "None"] - - openfst ==1.8.3 + - openfst ==1.8.4 - ucrt # [win] - __cuda >={{ cuda_compiler_version }} # [cuda_compiler_version != "None"] test: diff --git a/recipe/patches/0001-Support-openfst-1.7.6.patch b/recipe/patches/0001-Support-openfst-1.7.6.patch deleted file mode 100644 index e1b8b8a..0000000 --- a/recipe/patches/0001-Support-openfst-1.7.6.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 3ec514f01c054a537789e596b96014cfa0f53cb8 Mon Sep 17 00:00:00 2001 -From: Yuriy Chernyshov -Date: Thu, 23 Feb 2023 21:23:57 +0300 -Subject: [PATCH 1/7] Support openfst-1.7.6 - ---- - src/chain/chain-supervision.cc | 10 ++++------ - src/fstext/fstext-utils-inl.h | 12 ++++++------ - src/fstext/kaldi-fst-io-inl.h | 2 +- - src/fstext/pre-determinize-inl.h | 4 ++-- - src/kws/kws-functions.cc | 2 +- - src/lat/kaldi-lattice.cc | 4 ++-- - 6 files changed, 16 insertions(+), 18 deletions(-) - -diff --git a/src/chain/chain-supervision.cc b/src/chain/chain-supervision.cc -index f8a2c1d11..b29000a44 100644 ---- a/src/chain/chain-supervision.cc -+++ b/src/chain/chain-supervision.cc -@@ -571,9 +571,8 @@ void Supervision::Write(std::ostream &os, bool binary) const { - // Write using StdAcceptorCompactFst, making use of the fact that it's an - // acceptor. - fst::FstWriteOptions write_options(""); -- fst::StdCompactAcceptorFst::WriteFst( -- fst, fst::AcceptorCompactor(), os, -- write_options); -+ fst::StdCompactAcceptorFst cfst(fst); -+ cfst.Write(os, write_options); - } - } else { - KALDI_ASSERT(e2e_fsts.size() == num_sequences); -@@ -586,9 +585,8 @@ void Supervision::Write(std::ostream &os, bool binary) const { - // Write using StdAcceptorCompactFst, making use of the fact that it's an - // acceptor. - fst::FstWriteOptions write_options(""); -- fst::StdCompactAcceptorFst::WriteFst( -- e2e_fsts[i], fst::AcceptorCompactor(), os, -- write_options); -+ fst::StdCompactAcceptorFst cfst(e2e_fsts[i]); -+ cfst.Write(os, write_options); - } - } - WriteToken(os, binary, ""); -diff --git a/src/fstext/fstext-utils-inl.h b/src/fstext/fstext-utils-inl.h -index 853697387..d877c03e1 100644 ---- a/src/fstext/fstext-utils-inl.h -+++ b/src/fstext/fstext-utils-inl.h -@@ -374,12 +374,12 @@ void GetSymbols(const SymbolTable &symtab, - std::vector *syms_out) { - KALDI_ASSERT(syms_out != NULL); - syms_out->clear(); -- for (SymbolTableIterator iter(symtab); -- !iter.Done(); -- iter.Next()) { -- if (include_eps || iter.Value() != 0) { -- syms_out->push_back(iter.Value()); -- KALDI_ASSERT(syms_out->back() == iter.Value()); // an integer-range thing. -+ for (SymbolTable::iterator iter = symtab.begin(); -+ iter != symtab.end(); -+ ++iter) { -+ if (include_eps || iter->Label() != 0) { -+ syms_out->push_back(iter->Label()); -+ KALDI_ASSERT(syms_out->back() == iter->Label()); // an integer-range thing. - } - } - } -diff --git a/src/fstext/kaldi-fst-io-inl.h b/src/fstext/kaldi-fst-io-inl.h -index b6bae4b9d..f7bb3a7c2 100644 ---- a/src/fstext/kaldi-fst-io-inl.h -+++ b/src/fstext/kaldi-fst-io-inl.h -@@ -44,7 +44,7 @@ void WriteFstKaldi(std::ostream &os, bool binary, - bool acceptor = false, write_one = false; - FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), - NULL, acceptor, write_one, "\t"); -- printer.Print(&os, ""); -+ printer.Print(os, ""); - if (os.fail()) - KALDI_ERR << "Stream failure detected writing FST to stream"; - // Write another newline as a terminating character. The read routine will -diff --git a/src/fstext/pre-determinize-inl.h b/src/fstext/pre-determinize-inl.h -index b67b0ba6f..b531bdef2 100644 ---- a/src/fstext/pre-determinize-inl.h -+++ b/src/fstext/pre-determinize-inl.h -@@ -235,8 +235,8 @@ inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix, - assert(symTable != NULL); - const char *prefix_ptr = prefix.c_str(); - size_t prefix_len = strlen(prefix_ptr); // allowed to be zero but not encouraged. -- for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) { -- const std::string &sym = siter.Symbol(); -+ for (SymbolTable::iterator siter = symTable->begin(); siter != symTable->end(); ++siter) { -+ const std::string &sym = siter->Symbol(); - if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) { // has prefix. - if (isdigit(sym[prefix_len])) { // we don't allow prefix followed by a digit, as a symbol. - // Has at least one digit. -diff --git a/src/kws/kws-functions.cc b/src/kws/kws-functions.cc -index d1d71ce7a..3e27226f1 100644 ---- a/src/kws/kws-functions.cc -+++ b/src/kws/kws-functions.cc -@@ -75,7 +75,7 @@ bool ClusterLattice(CompactLattice *clat, - unordered_map >::iterator iter; - for (iter = head.begin(); iter != head.end(); ++iter) { - // For this ilabel, sort all the arcs on time, from first to last. -- sort(iter->second.begin(), iter->second.end(), CompareInterval); -+ std::sort(iter->second.begin(), iter->second.end(), CompareInterval); - std::vector tmp; - tmp.push_back(iter->second[0]); - for (int32 i = 1; i < iter->second.size(); i++) { -diff --git a/src/lat/kaldi-lattice.cc b/src/lat/kaldi-lattice.cc -index 744cc5384..648e67115 100644 ---- a/src/lat/kaldi-lattice.cc -+++ b/src/lat/kaldi-lattice.cc -@@ -78,7 +78,7 @@ bool WriteCompactLattice(std::ostream &os, bool binary, - fst::FstPrinter printer(t, t.InputSymbols(), - t.OutputSymbols(), - NULL, acceptor, write_one, "\t"); -- printer.Print(&os, ""); -+ printer.Print(os, ""); - if (os.fail()) - KALDI_WARN << "Stream failure detected."; - // Write another newline as a terminating character. The read routine will -@@ -403,7 +403,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) { - fst::FstPrinter printer(t, t.InputSymbols(), - t.OutputSymbols(), - NULL, acceptor, write_one, "\t"); -- printer.Print(&os, ""); -+ printer.Print(os, ""); - if (os.fail()) - KALDI_WARN << "Stream failure detected."; - // Write another newline as a terminating character. The read routine will diff --git a/recipe/patches/0002-Support-openfst-1.8.0.patch b/recipe/patches/0002-Support-openfst-1.8.0.patch deleted file mode 100644 index cad9893..0000000 --- a/recipe/patches/0002-Support-openfst-1.8.0.patch +++ /dev/null @@ -1,50 +0,0 @@ -From af56111501313ea9e05131d1432b610009d1ab3b Mon Sep 17 00:00:00 2001 -From: Yuriy Chernyshov -Date: Thu, 23 Feb 2023 21:24:12 +0300 -Subject: [PATCH 2/7] Support openfst-1.8.0 - ---- - src/fstext/fstext-utils-inl.h | 2 +- - src/fstext/fstext-utils.h | 2 +- - src/fstext/lattice-utils-inl.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/fstext/fstext-utils-inl.h b/src/fstext/fstext-utils-inl.h -index d877c03e1..44e641a3f 100644 ---- a/src/fstext/fstext-utils-inl.h -+++ b/src/fstext/fstext-utils-inl.h -@@ -163,7 +163,7 @@ void RemoveSomeInputSymbols(const std::vector &to_remove, - MutableFst *fst) { - KALDI_ASSERT_IS_INTEGER_TYPE(I); - RemoveSomeInputSymbolsMapper mapper(to_remove); -- Map(fst, mapper); -+ ArcMap(fst, mapper); - } - - template -diff --git a/src/fstext/fstext-utils.h b/src/fstext/fstext-utils.h -index 5789dbe7c..db14ddd35 100644 ---- a/src/fstext/fstext-utils.h -+++ b/src/fstext/fstext-utils.h -@@ -113,7 +113,7 @@ void PushInLog(VectorFst *fst, uint32 ptype, float delta = kDelta) { - template - void MinimizeEncoded(VectorFst *fst, float delta = kDelta) { - -- Map(fst, QuantizeMapper(delta)); -+ ArcMap(fst, QuantizeMapper(delta)); - EncodeMapper encoder(kEncodeLabels | kEncodeWeights, ENCODE); - Encode(fst, &encoder); - internal::AcceptorMinimize(fst); -diff --git a/src/fstext/lattice-utils-inl.h b/src/fstext/lattice-utils-inl.h -index c97a538dd..5d52ed3aa 100644 ---- a/src/fstext/lattice-utils-inl.h -+++ b/src/fstext/lattice-utils-inl.h -@@ -268,7 +268,7 @@ void ConvertFstToLattice( - MutableFst > > *ofst) { - int32 num_states_cache = 50000; - fst::CacheOptions cache_opts(true, num_states_cache); -- fst::MapFstOptions mapfst_opts(cache_opts); -+ fst::ArcMapFstOptions mapfst_opts(cache_opts); - StdToLatticeMapper mapper; - MapFst >, - StdToLatticeMapper > map_fst(ifst, mapper, mapfst_opts); diff --git a/recipe/patches/0003-Support-openfst-1.8.1.patch b/recipe/patches/0003-Support-openfst-1.8.1.patch deleted file mode 100644 index 145688e..0000000 --- a/recipe/patches/0003-Support-openfst-1.8.1.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 836446b2ac22333cfa81b5344737adc50f11218b Mon Sep 17 00:00:00 2001 -From: Yuriy Chernyshov -Date: Thu, 23 Feb 2023 21:24:18 +0300 -Subject: [PATCH 3/7] Support openfst-1.8.1 - ---- - src/fstext/kaldi-fst-io-inl.h | 2 +- - src/fstext/lattice-weight.h | 16 ++++++++-------- - src/lat/kaldi-lattice.cc | 2 +- - src/lat/lattice-functions-transition-model.cc | 4 ++-- - 4 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/src/fstext/kaldi-fst-io-inl.h b/src/fstext/kaldi-fst-io-inl.h -index f7bb3a7c2..01047919c 100644 ---- a/src/fstext/kaldi-fst-io-inl.h -+++ b/src/fstext/kaldi-fst-io-inl.h -@@ -99,7 +99,7 @@ void ReadFstKaldi(std::istream &is, bool binary, - fst->DeleteStates(); - string line; - size_t nline = 0; -- string separator = FLAGS_fst_field_separator + "\r\n"; -+ string separator = FST_FLAGS_fst_field_separator + "\r\n"; - while (std::getline(is, line)) { - nline++; - vector col; -diff --git a/src/fstext/lattice-weight.h b/src/fstext/lattice-weight.h -index 6e7737a19..f03ed7025 100644 ---- a/src/fstext/lattice-weight.h -+++ b/src/fstext/lattice-weight.h -@@ -396,8 +396,8 @@ inline bool ApproxEqual(const LatticeWeightTpl &w1, - template - inline std::ostream &operator <<(std::ostream &strm, const LatticeWeightTpl &w) { - LatticeWeightTpl::WriteFloatType(strm, w.Value1()); -- CHECK(FLAGS_fst_weight_separator.size() == 1); -- strm << FLAGS_fst_weight_separator[0]; // comma by default; -+ CHECK(FST_FLAGS_fst_weight_separator.size() == 1); -+ strm << FST_FLAGS_fst_weight_separator[0]; // comma by default; - // may or may not be settable from Kaldi programs. - LatticeWeightTpl::WriteFloatType(strm, w.Value2()); - return strm; -@@ -405,9 +405,9 @@ inline std::ostream &operator <<(std::ostream &strm, const LatticeWeightTpl - inline std::istream &operator >>(std::istream &strm, LatticeWeightTpl &w1) { -- CHECK(FLAGS_fst_weight_separator.size() == 1); -+ CHECK(FST_FLAGS_fst_weight_separator.size() == 1); - // separator defaults to ',' -- return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]); -+ return w1.ReadNoParen(strm, FST_FLAGS_fst_weight_separator[0]); - } - - -@@ -726,8 +726,8 @@ inline CompactLatticeWeightTpl Divide(const CompactLatticeW - template - inline std::ostream &operator <<(std::ostream &strm, const CompactLatticeWeightTpl &w) { - strm << w.Weight(); -- CHECK(FLAGS_fst_weight_separator.size() == 1); -- strm << FLAGS_fst_weight_separator[0]; // comma by default. -+ CHECK(FST_FLAGS_fst_weight_separator.size() == 1); -+ strm << FST_FLAGS_fst_weight_separator[0]; // comma by default. - for(size_t i = 0; i < w.String().size(); i++) { - strm << w.String()[i]; - if (i+1 < w.String().size()) -@@ -743,8 +743,8 @@ inline std::istream &operator >>(std::istream &strm, CompactLatticeWeightTpl col; -diff --git a/src/lat/lattice-functions-transition-model.cc b/src/lat/lattice-functions-transition-model.cc -index 6172610dc..a8cd7b7e2 100644 ---- a/src/lat/lattice-functions-transition-model.cc -+++ b/src/lat/lattice-functions-transition-model.cc -@@ -248,13 +248,13 @@ bool TestWordAlignedLattice(const WordAlignLatticeLexiconInfo &lexicon_info, - int32 num_paths = 5, seed = Rand(), max_path_length = -1; - BaseFloat delta = 0.2; // some lattices have large costs -> use large delta. - -- FLAGS_v = GetVerboseLevel(); // set the OpenFst verbose level to the Kaldi -+ FST_FLAGS_v = GetVerboseLevel(); // set the OpenFst verbose level to the Kaldi - // verbose level. - if (!RandEquivalent(clat, aligned_clat, num_paths, delta, seed, max_path_length)) { - KALDI_WARN << "Equivalence test failed during lattice alignment."; - return false; - } -- FLAGS_v = 0; -+ FST_FLAGS_v = 0; - - return (num_err == 0); - } diff --git a/recipe/patches/0004-Support-openfst-1.8.2.patch b/recipe/patches/0004-Support-openfst-1.8.2.patch deleted file mode 100644 index ee7ddd8..0000000 --- a/recipe/patches/0004-Support-openfst-1.8.2.patch +++ /dev/null @@ -1,271 +0,0 @@ -From bf3a72723b2626dc54d7e3a8b98085db4b912f13 Mon Sep 17 00:00:00 2001 -From: Yuriy Chernyshov -Date: Thu, 23 Feb 2023 21:24:25 +0300 -Subject: [PATCH 4/7] Support openfst-1.8.2 - ---- - src/base/kaldi-types.h | 43 +++++++-------------------- - src/fstext/lattice-utils-inl.h | 2 +- - src/kws/kws-functions.cc | 4 +-- - src/kws/kws-functions2.cc | 2 +- - src/lat/arctic-weight.h | 2 +- - src/lat/determinize-lattice-pruned.cc | 6 ++-- - src/lat/minimize-lattice.cc | 2 +- - src/lat/push-lattice.cc | 4 +-- - src/lat/sausages.cc | 2 +- - src/nnet3/nnet-batch-compute.cc | 2 +- - src/online/online-tcp-source.cc | 2 +- - src/rnnlm/rnnlm-test-utils.cc | 2 +- - src/tree/tree-renderer.cc | 4 +-- - 13 files changed, 28 insertions(+), 49 deletions(-) - -diff --git a/src/base/kaldi-types.h b/src/base/kaldi-types.h -index 7ebf4f853..68d5578a5 100644 ---- a/src/base/kaldi-types.h -+++ b/src/base/kaldi-types.h -@@ -39,37 +39,16 @@ typedef float BaseFloat; - // we find in the future lacks stdint.h - #include - --// for discussion on what to do if you need compile kaldi --// without OpenFST, see the bottom of this this file --#include -- --namespace kaldi { -- using ::int16; -- using ::int32; -- using ::int64; -- using ::uint16; -- using ::uint32; -- using ::uint64; -- typedef float float32; -- typedef double double64; --} // end namespace kaldi -- --// In a theoretical case you decide compile Kaldi without the OpenFST --// comment the previous namespace statement and uncomment the following --/* --namespace kaldi { -- typedef int8_t int8; -- typedef int16_t int16; -- typedef int32_t int32; -- typedef int64_t int64; -- -- typedef uint8_t uint8; -- typedef uint16_t uint16; -- typedef uint32_t uint32; -- typedef uint64_t uint64; -- typedef float float32; -- typedef double double64; --} // end namespace kaldi --*/ -+typedef int8_t int8; -+typedef int16_t int16; -+typedef int32_t int32; -+typedef int64_t int64; -+ -+typedef uint8_t uint8; -+typedef uint16_t uint16; -+typedef uint32_t uint32; -+typedef uint64_t uint64; -+typedef float float32; -+typedef double double64; - - #endif // KALDI_BASE_KALDI_TYPES_H_ -diff --git a/src/fstext/lattice-utils-inl.h b/src/fstext/lattice-utils-inl.h -index 5d52ed3aa..03ac9947c 100644 ---- a/src/fstext/lattice-utils-inl.h -+++ b/src/fstext/lattice-utils-inl.h -@@ -270,7 +270,7 @@ void ConvertFstToLattice( - fst::CacheOptions cache_opts(true, num_states_cache); - fst::ArcMapFstOptions mapfst_opts(cache_opts); - StdToLatticeMapper mapper; -- MapFst >, -+ ArcMapFst >, - StdToLatticeMapper > map_fst(ifst, mapper, mapfst_opts); - *ofst = map_fst; - } -diff --git a/src/kws/kws-functions.cc b/src/kws/kws-functions.cc -index 3e27226f1..e6819562f 100644 ---- a/src/kws/kws-functions.cc -+++ b/src/kws/kws-functions.cc -@@ -175,7 +175,7 @@ bool CreateFactorTransducer(const CompactLattice &clat, - - // Now we map the CompactLattice to VectorFst. We drop the - // alignment information and only keep the negated log-probs -- Map(clat, factor_transducer, CompactLatticeToKwsProductFstMapper()); -+ ArcMap(clat, factor_transducer, CompactLatticeToKwsProductFstMapper()); - - // Now do the weight pushing manually on the CompactLattice format. Note that - // the alphas and betas in Kaldi are stored as the log-probs, not the negated -@@ -366,7 +366,7 @@ void MaybeDoSanityCheck(const KwsProductFst &product_transducer) { - if (GetVerboseLevel() < 2) return; - KwsLexicographicFst index_transducer; - -- Map(product_transducer, -+ ArcMap(product_transducer, - &index_transducer, - KwsProductFstToKwsLexicographicFstMapper()); - -diff --git a/src/kws/kws-functions2.cc b/src/kws/kws-functions2.cc -index 71f5583af..9e610d205 100644 ---- a/src/kws/kws-functions2.cc -+++ b/src/kws/kws-functions2.cc -@@ -92,7 +92,7 @@ void DoFactorMerging(KwsProductFst *factor_transducer, - - Decode(&dest_transducer, encoder); - -- Map(dest_transducer, index_transducer, KwsProductFstToKwsLexicographicFstMapper()); -+ ArcMap(dest_transducer, index_transducer, KwsProductFstToKwsLexicographicFstMapper()); - } - - void DoFactorDisambiguation(KwsLexicographicFst *index_transducer) { -diff --git a/src/lat/arctic-weight.h b/src/lat/arctic-weight.h -index 5c0c6d3c4..39775ac89 100644 ---- a/src/lat/arctic-weight.h -+++ b/src/lat/arctic-weight.h -@@ -50,7 +50,7 @@ class ArcticWeightTpl : public FloatWeightTpl { - - static const std::string &Type() { - static const std::string type = std::string("arctic") + -- FloatWeightTpl::GetPrecisionString(); -+ std::string(FloatWeightTpl::GetPrecisionString()); - return type; - } - -diff --git a/src/lat/determinize-lattice-pruned.cc b/src/lat/determinize-lattice-pruned.cc -index dbdd9af46..ff3d65d57 100644 ---- a/src/lat/determinize-lattice-pruned.cc -+++ b/src/lat/determinize-lattice-pruned.cc -@@ -1499,7 +1499,7 @@ bool DeterminizeLatticePhonePrunedWrapper( - } - ILabelCompare ilabel_comp; - ArcSort(ifst, ilabel_comp); -- ans = DeterminizeLatticePhonePruned( -+ ans = DeterminizeLatticePhonePruned( - trans_model, ifst, beam, ofst, opts); - Connect(ofst); - return ans; -@@ -1523,7 +1523,7 @@ bool DeterminizeLatticePruned( - DeterminizeLatticePrunedOptions opts); - - template --bool DeterminizeLatticePhonePruned( -+bool DeterminizeLatticePhonePruned( - const kaldi::TransitionInformation &trans_model, - const ExpandedFst &ifst, - double prune, -@@ -1531,7 +1531,7 @@ bool DeterminizeLatticePhonePruned( - DeterminizeLatticePhonePrunedOptions opts); - - template --bool DeterminizeLatticePhonePruned( -+bool DeterminizeLatticePhonePruned( - const kaldi::TransitionInformation &trans_model, - MutableFst *ifst, - double prune, -diff --git a/src/lat/minimize-lattice.cc b/src/lat/minimize-lattice.cc -index ada90efad..416f1e62e 100644 ---- a/src/lat/minimize-lattice.cc -+++ b/src/lat/minimize-lattice.cc -@@ -279,7 +279,7 @@ bool MinimizeCompactLattice( - - // Instantiate for CompactLattice type. - template --bool MinimizeCompactLattice( -+bool MinimizeCompactLattice( - MutableFst *clat, float delta); - - -diff --git a/src/lat/push-lattice.cc b/src/lat/push-lattice.cc -index f4eb322d0..38a990d74 100644 ---- a/src/lat/push-lattice.cc -+++ b/src/lat/push-lattice.cc -@@ -280,11 +280,11 @@ bool PushCompactLatticeWeights( - - // Instantiate for CompactLattice. - template --bool PushCompactLatticeStrings( -+bool PushCompactLatticeStrings( - MutableFst *clat); - - template --bool PushCompactLatticeWeights( -+bool PushCompactLatticeWeights( - MutableFst *clat); - - } // namespace fst -diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc -index b851bc360..03b384f93 100644 ---- a/src/lat/sausages.cc -+++ b/src/lat/sausages.cc -@@ -325,7 +325,7 @@ void MinimumBayesRisk::PrepareLatticeAndInitStats(CompactLattice *clat) { - // paper (i.e. just one final state). - - // Topologically sort the lattice, if not already sorted. -- kaldi::uint64 props = clat->Properties(fst::kFstProperties, false); -+ uint64 props = clat->Properties(fst::kFstProperties, false); - if (!(props & fst::kTopSorted)) { - if (fst::TopSort(clat) == false) - KALDI_ERR << "Cycles detected in lattice."; -diff --git a/src/nnet3/nnet-batch-compute.cc b/src/nnet3/nnet-batch-compute.cc -index 0e07834ed..fd84c4e56 100644 ---- a/src/nnet3/nnet-batch-compute.cc -+++ b/src/nnet3/nnet-batch-compute.cc -@@ -1503,7 +1503,7 @@ NnetBatchDecoder::~NnetBatchDecoder() { - } - // Print diagnostics. - -- kaldi::int64 input_frame_count = -+ int64 input_frame_count = - frame_count_ * computer_->GetOptions().frame_subsampling_factor; - int32 num_threads = static_cast(decode_threads_.size()); - -diff --git a/src/online/online-tcp-source.cc b/src/online/online-tcp-source.cc -index 6d63493b4..8421073d5 100644 ---- a/src/online/online-tcp-source.cc -+++ b/src/online/online-tcp-source.cc -@@ -24,7 +24,7 @@ - - namespace kaldi { - --typedef kaldi::int32 int32; -+typedef int32 int32; - - OnlineTcpVectorSource::OnlineTcpVectorSource(int32 socket) - : socket_desc(socket), -diff --git a/src/rnnlm/rnnlm-test-utils.cc b/src/rnnlm/rnnlm-test-utils.cc -index 32e8b5a42..f415f257a 100644 ---- a/src/rnnlm/rnnlm-test-utils.cc -+++ b/src/rnnlm/rnnlm-test-utils.cc -@@ -78,7 +78,7 @@ void ConvertToInteger( - for (int i = 0; i < string_sentences.size(); i++) { - (*int_sentences)[i].resize(string_sentences[i].size()); - for (int j = 0; j < string_sentences[i].size(); j++) { -- kaldi::int64 key = symbol_table.Find(string_sentences[i][j]); -+ int64 key = symbol_table.Find(string_sentences[i][j]); - KALDI_ASSERT(key != -1); // fst::kNoSymbol - (*int_sentences)[i][j] = static_cast(key); - } -diff --git a/src/tree/tree-renderer.cc b/src/tree/tree-renderer.cc -index bbaa5cda1..8e3b463fe 100644 ---- a/src/tree/tree-renderer.cc -+++ b/src/tree/tree-renderer.cc -@@ -67,7 +67,7 @@ TreeRenderer::MakeEdgeLabel(const EventKeyType &key, - oss << ", "; - if (key != kPdfClass) { - std::string phone = -- phone_syms_.Find(static_cast(*child)); -+ phone_syms_.Find(static_cast(*child)); - if (phone.empty()) - KALDI_ERR << "No phone found for Phone ID " << *child; - oss << phone; -@@ -137,7 +137,7 @@ void TreeRenderer::RenderTable(const EventType *query, int32 id) { - ExpectToken(is_, binary_, "NULL"); // consume the invalid/NULL entry - continue; - } -- std::string phone = phone_syms_.Find(static_cast(t)); -+ std::string phone = phone_syms_.Find(static_cast(t)); - if (phone.empty()) - KALDI_ERR << "Phone ID found in a TableEventMap, but not in the " - << "phone symbol table! ID: " << t; diff --git a/recipe/patches/0005-Shared-libraries-on-windows.patch b/recipe/patches/0005-Shared-libraries-on-windows.patch index 7ce7c09..b7de222 100644 --- a/recipe/patches/0005-Shared-libraries-on-windows.patch +++ b/recipe/patches/0005-Shared-libraries-on-windows.patch @@ -1,104 +1,5 @@ -From a5eded921a6eef9a8e7f9ebcb39000bf747aa32b Mon Sep 17 00:00:00 2001 -From: Michael McAuliffe -Date: Wed, 10 May 2023 13:34:32 -0700 -Subject: [PATCH 5/7] Shared libraries on windows - ---- - CMakeLists.txt | 25 +++++++------ - cmake/gen_cmake_skeleton.py | 28 +++++++++++++++ - src/base/kaldi-dll-exports.h | 29 +++++++++++++++ - src/base/kaldi-error.h | 3 +- - src/base/kaldi-types.h | 36 +++++++++++++------ - src/bin/phones-to-prons.cc | 8 ++--- - src/cudafeat/feature-spectral-cuda.cu | 20 +++++------ - src/cudamatrix/cu-allocator.h | 5 +-- - src/cudamatrix/cu-device.cc | 5 ++- - src/cudamatrix/cu-device.h | 12 +++---- - src/fstbin/make-grammar-fst.cc | 2 +- - src/gmmbin/gmm-decode-faster-regtree-fmllr.cc | 2 +- - src/gmmbin/gmm-decode-faster-regtree-mllr.cc | 2 +- - src/gmmbin/gmm-init-biphone.cc | 6 ++-- - src/kwsbin/kws-search.cc | 8 ++--- - src/latbin/lattice-arc-post.cc | 2 +- - src/latbin/lattice-compose.cc | 8 ++--- - src/latbin/lattice-lmrescore.cc | 4 +-- - src/latbin/lattice-oracle.cc | 2 +- - .../online-audio-server-decode-faster.cc | 2 +- - src/util/kaldi-thread.h | 3 +- - 21 files changed, 146 insertions(+), 66 deletions(-) - create mode 100644 src/base/kaldi-dll-exports.h - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 886af19c6..24cb574a6 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -22,7 +22,6 @@ endif() - - include(third_party/get_third_party) - --include(cmake/third_party/openfst.cmake) - - find_package(PythonInterp) - if(NOT PYTHON_EXECUTABLE) -@@ -43,14 +42,20 @@ execute_process(COMMAND ${PYTHON_EXECUTABLE} - ) - unset(IS_LIB_SHARE) - --set(CMAKE_CXX_STANDARD 14) -+set(CMAKE_CXX_STANDARD 17) - set(CMAKE_CXX_EXTENSIONS OFF) - set(CMAKE_INSTALL_MESSAGE LAZY) # hide "-- Up-to-date: ..." - if(BUILD_SHARED_LIBS) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) -- if(WIN32) -+ if(MSVC) -+ find_package(dlfcn-win32 REQUIRED) -+ set(CMAKE_DL_LIBS dlfcn-win32::dl) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -- message(FATAL_ERROR "DLL is not supported currently") -+ add_definitions(-DWIN32_LEAN_AND_MEAN) -+ add_definitions(-DNOMINMAX) -+ add_definitions(-D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS) -+ add_definitions(-D_USE_MATH_DEFINES) -+ #message(FATAL_ERROR "DLL is not supported currently") - elseif(APPLE) - set(CMAKE_INSTALL_RPATH "@loader_path") - else() -@@ -181,6 +186,8 @@ if(CUDA_FOUND) - list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC") - set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}") - endif() -+ set(CMAKE_CUDA_STANDARD 17) -+ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) - set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS}) - - add_definitions(-DHAVE_CUDA=1) -@@ -251,9 +258,7 @@ add_subdirectory(src/nnet3) - add_subdirectory(src/rnnlm) - add_subdirectory(src/chain) - add_subdirectory(src/ivector) --if(NOT MSVC) -- add_subdirectory(src/online) --endif() -+add_subdirectory(src/online) - add_subdirectory(src/online2) - add_subdirectory(src/kws) - -@@ -283,10 +288,8 @@ add_subdirectory(src/nnet3bin) - add_subdirectory(src/rnnlmbin) - add_subdirectory(src/chainbin) - add_subdirectory(src/ivectorbin) --if(NOT MSVC) -- add_subdirectory(src/onlinebin) -- add_subdirectory(src/online2bin) --endif() -+add_subdirectory(src/onlinebin) -+add_subdirectory(src/online2bin) - add_subdirectory(src/kwsbin) - - # add all cuda executables diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py -index c8fee4c41..154369dd5 100644 +index c8fee4c..bf95eab 100644 --- a/cmake/gen_cmake_skeleton.py +++ b/cmake/gen_cmake_skeleton.py @@ -154,6 +154,7 @@ def disable_for_win32(t): @@ -109,18 +10,17 @@ index c8fee4c41..154369dd5 100644 "online-server-gmm-decode-faster", "online-audio-server-decode-faster" ] -@@ -239,6 +240,10 @@ class CMakeListsLibrary(object): +@@ -179,6 +180,9 @@ class CMakeListsHeaderLibrary(object): + def gen_code(self): ret = [] - + if sys.platform == 'win32' and self.dir_name.startswith("online"): + self.header_list = [x for x in self.header_list if x != 'online-tcp-source.h'] + self.source_list = [x for x in self.source_list if x != 'online-tcp-source.cc'] -+ if len(self.header_list) > 0: ret.append("set(PUBLIC_HEADERS") for f in self.header_list: -@@ -252,6 +257,8 @@ class CMakeListsLibrary(object): +@@ -252,6 +256,8 @@ class CMakeListsLibrary(object): ret.append(" cuda_compile(CUDA_OBJS SHARED") for f in self.cuda_source_list: ret.append(" " + f) @@ -129,7 +29,7 @@ index c8fee4c41..154369dd5 100644 ret.append(" )") ret.append("endif()\n") -@@ -266,6 +273,27 @@ class CMakeListsLibrary(object): +@@ -266,6 +272,27 @@ class CMakeListsLibrary(object): ret.append(" $") ret.append(" $") ret.append(")\n") @@ -157,13 +57,98 @@ index c8fee4c41..154369dd5 100644 if len(self.depends) > 0: ret.append("target_link_libraries(" + self.target_name + " PUBLIC") +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 2f26596..d824a40 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -33,6 +33,9 @@ if(BuildForFedora) + + #pkg_check_modules(FST REQUIRED fst) + ++elseif(CONDA_ROOT) ++ include(third_party/get_third_party) ++ + else() + include(third_party/get_third_party) + +@@ -60,6 +63,9 @@ execute_process(COMMAND ${PYTHON_EXECUTABLE} + unset(IS_LIB_SHARE) + + if(BuildForFedora) ++elseif(CONDA_ROOT) ++ set(CMAKE_CXX_STANDARD 17) ++ set(CMAKE_CXX_EXTENSIONS OFF) + else() + set(CMAKE_CXX_STANDARD 14) + set(CMAKE_CXX_EXTENSIONS OFF) +@@ -67,9 +73,14 @@ endif() + set(CMAKE_INSTALL_MESSAGE LAZY) # hide "-- Up-to-date: ..." + if(BUILD_SHARED_LIBS) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +- if(WIN32) +- set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +- message(FATAL_ERROR "DLL is not supported currently") ++ if(MSVC) ++ find_package(dlfcn-win32 REQUIRED) ++ set(CMAKE_DL_LIBS dlfcn-win32::dl) ++ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) ++ add_definitions(-DWIN32_LEAN_AND_MEAN) ++ add_definitions(-DNOMINMAX) ++ add_definitions(-D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS) ++ add_definitions(-D_USE_MATH_DEFINES) + elseif(APPLE) + set(CMAKE_INSTALL_RPATH "@loader_path") + else() +@@ -200,6 +211,8 @@ if(CUDA_FOUND) + list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC") + set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}") + endif() ++ set(CMAKE_CUDA_STANDARD 17) ++ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) + set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS}) + + add_definitions(-DHAVE_CUDA=1) +@@ -252,7 +265,9 @@ if(BuildForFedora) + # TODO: Detect the right version and put it here. + add_definitions(-DOPENFST_VER=18300) + # link_directories(/usr/lib64) +-# include_directories(/usr/include/fst) ++# include_directories(/usr/include/fst) ++elseif(CONDA_ROOT) ++ add_definitions(-DOPENFST_VER=18300) + endif() + + link_libraries(fst) +@@ -281,9 +296,7 @@ add_subdirectory(src/nnet3) + add_subdirectory(src/rnnlm) + add_subdirectory(src/chain) + add_subdirectory(src/ivector) +-if(NOT MSVC) +- add_subdirectory(src/online) +-endif() ++add_subdirectory(src/online) + add_subdirectory(src/online2) + add_subdirectory(src/kws) + +@@ -313,10 +326,8 @@ add_subdirectory(src/nnet3bin) + add_subdirectory(src/rnnlmbin) + add_subdirectory(src/chainbin) + add_subdirectory(src/ivectorbin) +-if(NOT MSVC) +- add_subdirectory(src/onlinebin) +- add_subdirectory(src/online2bin) +-endif() ++add_subdirectory(src/onlinebin) ++add_subdirectory(src/online2bin) + add_subdirectory(src/kwsbin) + + # add all cuda executables diff --git a/src/base/kaldi-dll-exports.h b/src/base/kaldi-dll-exports.h new file mode 100644 -index 000000000..ee1d84cd6 +index 0000000..3a7f156 --- /dev/null +++ b/src/base/kaldi-dll-exports.h -@@ -0,0 +1,29 @@ -+ +@@ -0,0 +1,28 @@ +#ifndef kaldi_dll_export_H +#define kaldi_dll_export_H + @@ -192,8 +177,9 @@ index 000000000..ee1d84cd6 +#endif // defined(KALDI_CUMATRIX_DLL_EXPORTS) + +#endif /* kaldi_dll_export_H */ +\ No newline at end of file diff --git a/src/base/kaldi-error.h b/src/base/kaldi-error.h -index a9904a752..bc950f3da 100644 +index 572cbb4..3e431b6 100644 --- a/src/base/kaldi-error.h +++ b/src/base/kaldi-error.h @@ -33,6 +33,7 @@ @@ -213,142 +199,8 @@ index a9904a752..bc950f3da 100644 /// Get verbosity level, usually set via command line '--verbose=' switch. inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; } -diff --git a/src/base/kaldi-types.h b/src/base/kaldi-types.h -index 68d5578a5..3a1b8b920 100644 ---- a/src/base/kaldi-types.h -+++ b/src/base/kaldi-types.h -@@ -39,16 +39,30 @@ typedef float BaseFloat; - // we find in the future lacks stdint.h - #include - --typedef int8_t int8; --typedef int16_t int16; --typedef int32_t int32; --typedef int64_t int64; -- --typedef uint8_t uint8; --typedef uint16_t uint16; --typedef uint32_t uint32; --typedef uint64_t uint64; --typedef float float32; --typedef double double64; -+namespace fst { -+ typedef int8_t int8; -+ typedef int16_t int16; -+ typedef int32_t int32; -+ typedef int64_t int64; -+ typedef uint8_t uint8; -+ typedef uint16_t uint16; -+ typedef uint32_t uint32; -+ typedef uint64_t uint64; -+} // end namespace fst -+ -+ -+namespace kaldi { -+ typedef int8_t int8; -+ typedef int16_t int16; -+ typedef int32_t int32; -+ typedef int64_t int64; -+ typedef uint8_t uint8; -+ typedef uint16_t uint16; -+ typedef uint32_t uint32; -+ typedef uint64_t uint64; -+ typedef float float32; -+ typedef double double64; -+} // end namespace kaldi -+ - - #endif // KALDI_BASE_KALDI_TYPES_H_ -diff --git a/src/bin/phones-to-prons.cc b/src/bin/phones-to-prons.cc -index 0d7ab12c2..d6a4422a3 100644 ---- a/src/bin/phones-to-prons.cc -+++ b/src/bin/phones-to-prons.cc -@@ -28,9 +28,9 @@ - - // Create FST that accepts the phone sequence, with any number - // of word-start and word-end symbol in between each phone. --void CreatePhonesAltFst(const std::vector &phones, -- int32 word_start_sym, -- int32 word_end_sym, -+void CreatePhonesAltFst(const std::vector &phones, -+ fst::int32 word_start_sym, -+ fst::int32 word_end_sym, - fst::VectorFst *ofst) { - using fst::StdArc; - typedef fst::StdArc::StateId StateId; -@@ -172,7 +172,7 @@ int main(int argc, char *argv[]) { - if (g_kaldi_verbose_level >= 2) { - KALDI_LOG << "phn2word FST is below:"; - fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true, "\t"); -- fstprinter.Print(&std::cerr, "standard error"); -+ fstprinter.Print(std::cerr, "standard error"); - KALDI_LOG << "phone sequence is: "; - for (size_t i = 0; i < phones.size(); i++) - std::cerr << phones[i] << ' '; -diff --git a/src/cudafeat/feature-spectral-cuda.cu b/src/cudafeat/feature-spectral-cuda.cu -index d8fc215b8..035d6bb9d 100644 ---- a/src/cudafeat/feature-spectral-cuda.cu -+++ b/src/cudafeat/feature-spectral-cuda.cu -@@ -109,7 +109,7 @@ __global__ void power_spectrum_kernel(int row_length, float *A_in, int32_t ldi, - // Expects to be called with 32x8 sized thread block. - // LDB: Adding use_log flag - __global__ void mel_banks_compute_kernel(int32_t num_frames, float energy_floor, -- int32 *offsets, int32 *sizes, -+ kaldi::int32 *offsets, kaldi::int32 *sizes, - float **vecs, const float *feats, - int32_t ldf, float *mels, int32_t ldm, - bool use_log) { -@@ -269,31 +269,31 @@ __global__ void process_window_kernel( - } - } - --__device__ inline int32 FirstSampleOfFrame(int32 frame, int32 frame_shift, -- int32 window_size, bool snip_edges) { -+__device__ inline kaldi::int32 FirstSampleOfFrame(kaldi::int32 frame, kaldi::int32 frame_shift, -+ kaldi::int32 window_size, bool snip_edges) { - if (snip_edges) { - return frame * frame_shift; - } else { -- int32 midpoint_of_frame = frame_shift * frame + frame_shift / 2, -+ kaldi::int32 midpoint_of_frame = frame_shift * frame + frame_shift / 2, - beginning_of_frame = midpoint_of_frame - window_size / 2; - return beginning_of_frame; - } - } - - __global__ void extract_window_kernel( -- int32 frame_shift, int32 frame_length, int32 frame_length_padded, -- int32 window_size, bool snip_edges, int32_t sample_offset, -- const BaseFloat * __restrict__ wave, int32 wave_dim, -+ kaldi::int32 frame_shift, kaldi::int32 frame_length, kaldi::int32 frame_length_padded, -+ kaldi::int32 window_size, bool snip_edges, int32_t sample_offset, -+ const BaseFloat * __restrict__ wave, kaldi::int32 wave_dim, - BaseFloat *__restrict__ windows, int32_t wlda) { - int frame = blockIdx.x; - int tidx = threadIdx.x; - -- int32 start_sample = -+ kaldi::int32 start_sample = - FirstSampleOfFrame(frame, frame_shift, window_size, snip_edges); - - // wave_start and wave_end are start and end indexes into 'wave', for the - // piece of wave that we're trying to extract. -- int32 wave_start = int32(start_sample - sample_offset), -+ kaldi::int32 wave_start = kaldi::int32(start_sample - sample_offset), - wave_end = wave_start + frame_length; - - BaseFloat *window = windows + frame * wlda; -@@ -307,7 +307,7 @@ __global__ void extract_window_kernel( - // be reached for about two frames per utterance, so we don't concern - // ourselves excessively with efficiency. - for (int s = tidx; s < frame_length; s += blockDim.x) { -- int32 s_in_wave = s + wave_start; -+ kaldi::int32 s_in_wave = s + wave_start; - while (s_in_wave < 0 || s_in_wave >= wave_dim) { - // reflect around the beginning or end of the wave. - // e.g. -1 -> 0, -2 -> 1. diff --git a/src/cudamatrix/cu-allocator.h b/src/cudamatrix/cu-allocator.h -index 3edd9f1ca..464f3fd4e 100644 +index 3edd9f1..464f3fd 100644 --- a/src/cudamatrix/cu-allocator.h +++ b/src/cudamatrix/cu-allocator.h @@ -46,6 +46,7 @@ @@ -378,23 +230,26 @@ index 3edd9f1ca..464f3fd4e 100644 } // namespace kaldi diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc -index fd2c0c64f..5c47aef15 100644 +index fd2c0c6..c31b031 100644 --- a/src/cudamatrix/cu-device.cc +++ b/src/cudamatrix/cu-device.cc -@@ -631,7 +631,10 @@ CuDevice::~CuDevice() { +@@ -631,7 +631,14 @@ CuDevice::~CuDevice() { // Each thread has its own copy of the CuDevice object. // Note: this was declared "static". --thread_local CuDevice CuDevice::this_thread_device_; ++#ifdef _WIN32 +CuDevice& CuDevice::this_thread_device() { + static thread_local CuDevice this_thread_device_; + return this_thread_device_; +} ++#else + thread_local CuDevice CuDevice::this_thread_device_; ++#endif CuDevice::CuDeviceOptions CuDevice::device_options_; diff --git a/src/cudamatrix/cu-device.h b/src/cudamatrix/cu-device.h -index fe8ac7955..7f54e0864 100644 +index fe8ac79..ef827fe 100644 --- a/src/cudamatrix/cu-device.h +++ b/src/cudamatrix/cu-device.h @@ -61,6 +61,7 @@ typedef int cusolverStatus_t; @@ -405,16 +260,20 @@ index fe8ac7955..7f54e0864 100644 namespace kaldi { -@@ -100,7 +101,7 @@ class CuDevice { +@@ -100,7 +101,12 @@ class CuDevice { // At the beginning of the program, if you want to use a GPU, you // should call CuDevice::Instantiate().SelectGpuId(..). static CuDevice& Instantiate() { -- CuDevice &ans = this_thread_device_; ++ #ifdef _WIN32 + CuDevice &ans = this_thread_device(); ++ #else + CuDevice &ans = this_thread_device_; ++ #endif ++ if (!ans.initialized_) ans.Initialize(); return ans; -@@ -296,7 +297,7 @@ class CuDevice { +@@ -296,7 +302,7 @@ class CuDevice { } }; @@ -423,205 +282,35 @@ index fe8ac7955..7f54e0864 100644 // Default constructor used to initialize this_thread_device_ CuDevice(); -@@ -339,20 +340,19 @@ class CuDevice { +@@ -339,7 +345,11 @@ class CuDevice { // Each thread has its own CuDevice object, which contains the cublas and // cusparse handles. These are unique to the thread (which is what is // recommended by NVidia). -- static thread_local CuDevice this_thread_device_; -- ++ #ifdef _WIN32 + static CuDevice& this_thread_device(); ++ #else + static thread_local CuDevice this_thread_device_; ++ #endif + // The GPU device-id that we are using. This will be initialized to -1, and will // be set when the user calls - // CuDevice::Instantiate::SelectGpuId(...) +@@ -347,12 +357,12 @@ class CuDevice { // from the main thread. Background threads will, when spawned and when // CuDevice::Instantiate() is called from them the first time, will // call cudaSetDevice(device_id)) - static int32 device_id_; -+ static KALDI_CUMATRIX_DLL int32 device_id_; ++ static int32 KALDI_CUMATRIX_DLL device_id_; // This will automatically be set to true if the application has multiple // threads that access the GPU device. It is used to know whether to // use locks when accessing the allocator and the profiling-related code. - static bool multi_threaded_; -+ static KALDI_CUMATRIX_DLL bool multi_threaded_; ++ static bool KALDI_CUMATRIX_DLL multi_threaded_; // The variable profile_map_ will only be used if the verbose level is >= 1; // it will accumulate some function-level timing information that is printed -diff --git a/src/fstbin/make-grammar-fst.cc b/src/fstbin/make-grammar-fst.cc -index 564f25531..a6168bcd1 100644 ---- a/src/fstbin/make-grammar-fst.cc -+++ b/src/fstbin/make-grammar-fst.cc -@@ -27,7 +27,7 @@ - - template - void MakeGrammarFst(kaldi::ParseOptions po, -- int32 nonterm_phones_offset, -+ kaldi::int32 nonterm_phones_offset, - bool write_as_grammar){ - using namespace kaldi; - using namespace fst; -diff --git a/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc b/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc -index ca39cbe8c..dab4a85ab 100644 ---- a/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc -+++ b/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc -@@ -71,7 +71,7 @@ bool DecodeUtterance(kaldi::FasterDecoder *decoder, - kaldi::DecodableInterface *decodable, - DecodeInfo *info, - const string &uttid, -- int32 num_frames, -+ kaldi::int32 num_frames, - BaseFloat *total_like) { - decoder->Decode(decodable); - KALDI_LOG << "Length of file is " << num_frames; -diff --git a/src/gmmbin/gmm-decode-faster-regtree-mllr.cc b/src/gmmbin/gmm-decode-faster-regtree-mllr.cc -index 9a5d9486b..bf89bfb62 100644 ---- a/src/gmmbin/gmm-decode-faster-regtree-mllr.cc -+++ b/src/gmmbin/gmm-decode-faster-regtree-mllr.cc -@@ -70,7 +70,7 @@ bool DecodeUtterance(kaldi::FasterDecoder *decoder, - kaldi::DecodableInterface *decodable, - DecodeInfo *info, - const string &uttid, -- int32 num_frames, -+ kaldi::int32 num_frames, - BaseFloat *total_like) { - decoder->Decode(decodable); - KALDI_LOG << "Length of file is " << num_frames;; -diff --git a/src/gmmbin/gmm-init-biphone.cc b/src/gmmbin/gmm-init-biphone.cc -index 18f7b68af..31940b8be 100644 ---- a/src/gmmbin/gmm-init-biphone.cc -+++ b/src/gmmbin/gmm-init-biphone.cc -@@ -222,9 +222,9 @@ BiphoneContextDependencyFull(std::vector > phone_sets, - It's more efficient to load the biphone counts into a map because - most entries are zero, but since there are not many biphones, a 2-dim vector - is OK. */ --static void ReadPhoneCounts(std::string &filename, int32 num_phones, -- std::vector *mono_counts, -- std::vector > *bi_counts) { -+static void ReadPhoneCounts(std::string &filename, kaldi::int32 num_phones, -+ std::vector *mono_counts, -+ std::vector > *bi_counts) { - // The actual phones start from id = 1 (so the last phone has id = num_phones). - mono_counts->resize(num_phones + 1, 0); - bi_counts->resize(num_phones + 1, std::vector(num_phones + 1, 0)); -diff --git a/src/kwsbin/kws-search.cc b/src/kwsbin/kws-search.cc -index 8e2b2a84d..7dcc3f81b 100644 ---- a/src/kwsbin/kws-search.cc -+++ b/src/kwsbin/kws-search.cc -@@ -120,7 +120,7 @@ typedef kaldi::TableWriter< kaldi::BasicVectorHolder > - VectorOfDoublesWriter; - void OutputDetailedStatistics(const std::string &kwid, - const kaldi::KwsLexicographicFst &keyword, -- const unordered_map &label_decoder, -+ const unordered_map &label_decoder, - VectorOfDoublesWriter *output ) { - std::vector paths; - -@@ -134,9 +134,9 @@ void OutputDetailedStatistics(const std::string &kwid, - for (int i = 0; i < paths.size(); ++i) { - std::vector out; - double score; -- int32 tbeg, tend, uid; -+ kaldi::int32 tbeg, tend, uid; - -- uint64 osymbol = label_decoder.find(paths[i].last)->second; -+ kaldi::uint64 osymbol = label_decoder.find(paths[i].last)->second; - uid = kaldi::DecodeLabelUid(osymbol); - tbeg = paths[i].weight.Value2().Value1().Value(); - tend = paths[i].weight.Value2().Value2().Value(); -@@ -305,7 +305,7 @@ int main(int argc, char *argv[]) { - - KwsLexicographicFst keyword_fst; - KwsLexicographicFst result_fst; -- Map(keyword, &keyword_fst, VectorFstToKwsLexicographicFstMapper()); -+ ArcMap(keyword, &keyword_fst, VectorFstToKwsLexicographicFstMapper()); - Compose(keyword_fst, index, &result_fst); - - if (stats_wspecifier != "") { -diff --git a/src/latbin/lattice-arc-post.cc b/src/latbin/lattice-arc-post.cc -index 0aca9cdc4..c5b673dd2 100644 ---- a/src/latbin/lattice-arc-post.cc -+++ b/src/latbin/lattice-arc-post.cc -@@ -178,7 +178,7 @@ int main(int argc, char *argv[]) { - // Read as regular lattice - kaldi::SequentialCompactLatticeReader clat_reader(lats_rspecifier); - -- int64 tot_post = 0; -+ kaldi::int64 tot_post = 0; - int32 num_lat_done = 0, num_lat_err = 0; - - for (; !clat_reader.Done(); clat_reader.Next()) { -diff --git a/src/latbin/lattice-compose.cc b/src/latbin/lattice-compose.cc -index d191b076a..4074b0ef9 100644 ---- a/src/latbin/lattice-compose.cc -+++ b/src/latbin/lattice-compose.cc -@@ -126,9 +126,9 @@ int main(int argc, char *argv[]) { - // with all the cost on the first member of the pair (since we're - // assuming it's a graph weight). - fst::CacheOptions cache_opts(true, num_states_cache); -- fst::MapFstOptions mapfst_opts(cache_opts); -+ fst::ArcMapFstOptions mapfst_opts(cache_opts); - fst::StdToLatticeMapper mapper; -- fst::MapFst > -+ fst::ArcMapFst > - mapped_fst2(*fst2, mapper, mapfst_opts); - - for (; !lattice_reader1.Done(); lattice_reader1.Next()) { -@@ -265,9 +265,9 @@ int main(int argc, char *argv[]) { - // with all the cost on the first member of the pair (since we're - // assuming it's a graph weight). - fst::CacheOptions cache_opts(true, num_states_cache); -- fst::MapFstOptions mapfst_opts(cache_opts); -+ fst::ArcMapFstOptions mapfst_opts(cache_opts); - fst::StdToLatticeMapper mapper; -- fst::MapFst > -+ fst::ArcMapFst > - mapped_fst2(fst2, mapper, mapfst_opts); - - // sort lat1 on olabel. -diff --git a/src/latbin/lattice-lmrescore.cc b/src/latbin/lattice-lmrescore.cc -index 2e5406f75..1a538e538 100644 ---- a/src/latbin/lattice-lmrescore.cc -+++ b/src/latbin/lattice-lmrescore.cc -@@ -75,9 +75,9 @@ int main(int argc, char *argv[]) { - // with all the cost on the first member of the pair (since it's a graph - // weight). - fst::CacheOptions cache_opts(true, num_states_cache); -- fst::MapFstOptions mapfst_opts(cache_opts); -+ fst::ArcMapFstOptions mapfst_opts(cache_opts); - fst::StdToLatticeMapper mapper; -- fst::MapFst > -+ fst::ArcMapFst > - lm_fst(*std_lm_fst, mapper, mapfst_opts); - delete std_lm_fst; - -diff --git a/src/latbin/lattice-oracle.cc b/src/latbin/lattice-oracle.cc -index 5f2513131..d08ff93f9 100644 ---- a/src/latbin/lattice-oracle.cc -+++ b/src/latbin/lattice-oracle.cc -@@ -66,7 +66,7 @@ void ConvertLatticeToUnweightedAcceptor(const kaldi::Lattice &ilat, - // first convert from lattice to normal FST - fst::ConvertLattice(ilat, ofst); - // remove weights, project to output, sort according to input arg -- fst::Map(ofst, fst::RmWeightMapper()); -+ fst::ArcMap(ofst, fst::RmWeightMapper()); - fst::Project(ofst, fst::PROJECT_OUTPUT); // The words are on the output side - fst::Relabel(ofst, wildcards, wildcards); - fst::RmEpsilon(ofst); // Don't tolerate epsilons as they make it hard to -diff --git a/src/onlinebin/online-audio-server-decode-faster.cc b/src/onlinebin/online-audio-server-decode-faster.cc -index 43a2fa7e6..91b13cf22 100644 ---- a/src/onlinebin/online-audio-server-decode-faster.cc -+++ b/src/onlinebin/online-audio-server-decode-faster.cc -@@ -65,7 +65,7 @@ bool WriteLine(int32 socket, std::string line); - const float kFramesPerSecond = 100.0f; - } // namespace kaldi - --int32 main(int argc, char *argv[]) { -+int main(int argc, char *argv[]) { - using namespace kaldi; - using namespace fst; - diff --git a/src/util/kaldi-thread.h b/src/util/kaldi-thread.h -index 29cfaee8a..8d6003022 100644 +index 29cfaee..8d60030 100644 --- a/src/util/kaldi-thread.h +++ b/src/util/kaldi-thread.h @@ -26,6 +26,7 @@ From 398a39b8132bc20bbb722e2b2411cb770447b057 Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Wed, 30 Jul 2025 10:11:32 -0700 Subject: [PATCH 02/10] Working locally on windows --- recipe/meta.yaml | 15 +- ...=> 0001-Shared-libraries-on-windows.patch} | 145 ++++++++++++++++-- 2 files changed, 132 insertions(+), 28 deletions(-) rename recipe/patches/{0005-Shared-libraries-on-windows.patch => 0001-Shared-libraries-on-windows.patch} (71%) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 830d0b2..19e155c 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -10,7 +10,7 @@ "base", "matrix", "cudamatrix", "util", "feat", "tree", "gmm", "transform", "fstext", "hmm", "lm", "decoder", "lat", "nnet", "nnet2", "nnet3", "rnnlm", "chain", "ivector", "online", "online2", "kws" -] + (cuda_compiler_version is defined and cuda_compiler_version != "None") * [ +] + (cuda_compiler_version != "None") * [ "cudafeat", "cudadecoder" ]%} # This list should be substantially longer, but upstream just globs @@ -43,18 +43,9 @@ source: url: https://github.com/kaldi-asr/kaldi/archive/{{ commit }}.tar.gz sha256: 139de58f1abbf727fee65e709a8fcc6d8714d5e5596a7eb15491faef1ac73304 patches: - # Patches based on https://github.com/kaldi-asr/kaldi/pull/4829 - #- patches/0001-Support-openfst-1.7.6.patch - #- patches/0002-Support-openfst-1.8.0.patch - #- patches/0003-Support-openfst-1.8.1.patch - #- patches/0004-Support-openfst-1.8.2.patch - - patches/0005-Shared-libraries-on-windows.patch - #- patches/0006-Cuda-12-support.patch # [(cuda_compiler_version or "").startswith("12")] - ## backport https://github.com/kaldi-asr/kaldi/pull/4917 - #- patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch + - patches/0001-Shared-libraries-on-windows.patch build: number: 0 - skip: true # [cuda_compiler_version == "11.8" and (aarch64 or ppc64le)] requirements: build: @@ -82,7 +73,7 @@ outputs: - libtool # [not win] - automake # [not win] host: - {% if cuda_major >= 12 %} + {% if cuda_compiler_version != "None" %} - cuda-version {{ cuda_compiler_version }} - cuda-cudart-dev - cuda-driver-dev # [linux] diff --git a/recipe/patches/0005-Shared-libraries-on-windows.patch b/recipe/patches/0001-Shared-libraries-on-windows.patch similarity index 71% rename from recipe/patches/0005-Shared-libraries-on-windows.patch rename to recipe/patches/0001-Shared-libraries-on-windows.patch index b7de222..fc85a9f 100644 --- a/recipe/patches/0005-Shared-libraries-on-windows.patch +++ b/recipe/patches/0001-Shared-libraries-on-windows.patch @@ -1,5 +1,5 @@ diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py -index c8fee4c..bf95eab 100644 +index c8fee4c..7b7b050 100644 --- a/cmake/gen_cmake_skeleton.py +++ b/cmake/gen_cmake_skeleton.py @@ -154,6 +154,7 @@ def disable_for_win32(t): @@ -20,16 +20,20 @@ index c8fee4c..bf95eab 100644 if len(self.header_list) > 0: ret.append("set(PUBLIC_HEADERS") for f in self.header_list: -@@ -252,6 +256,8 @@ class CMakeListsLibrary(object): - ret.append(" cuda_compile(CUDA_OBJS SHARED") +@@ -247,9 +251,9 @@ class CMakeListsLibrary(object): + + if len(self.cuda_source_list) > 0: + self.source_list.append("${CUDA_OBJS}") +- ret.append("if(CUDA_FOUND)") +- ret.append(" cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)") +- ret.append(" cuda_compile(CUDA_OBJS SHARED") ++ ret.append("if(CUDAToolkit_FOUND)") ++ ret.append(" include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)") ++ ret.append(" set(CUDA_OBJS") for f in self.cuda_source_list: ret.append(" " + f) -+ if self.dir_name.startswith("cudafeat") and sys.platform == 'win32': -+ ret.append(" OPTIONS -DKALDI_CUMATRIX_DLL_IMPORTS") ret.append(" )") - ret.append("endif()\n") - -@@ -266,6 +272,27 @@ class CMakeListsLibrary(object): +@@ -266,6 +270,31 @@ class CMakeListsLibrary(object): ret.append(" $") ret.append(" $") ret.append(")\n") @@ -47,6 +51,10 @@ index c8fee4c..bf95eab 100644 + ret.append("target_compile_definitions(" + self.target_name + " INTERFACE KALDI_CUMATRIX_DLL_IMPORTS)") + + ret.append("endif(MSVC)\n") ++ elif self.dir_name.startswith("cudafeat"): ++ ret.append("if(MSVC)") ++ ret.append("target_compile_definitions(" + self.target_name + " INTERFACE KALDI_CUMATRIX_DLL_IMPORTS)") ++ ret.append("endif(MSVC)\n") + elif self.dir_name.startswith("util"): + ret.append("if(MSVC)") + ret.append("set_target_properties(" + self.target_name + " PROPERTIES DEFINE_SYMBOL \"KALDI_UTIL_DLL_EXPORTS\")") @@ -58,10 +66,23 @@ index c8fee4c..bf95eab 100644 if len(self.depends) > 0: ret.append("target_link_libraries(" + self.target_name + " PUBLIC") diff --git a/CMakeLists.txt b/CMakeLists.txt -index 2f26596..d824a40 100644 +index 2f26596..2ec796e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -33,6 +33,9 @@ if(BuildForFedora) +@@ -16,6 +16,12 @@ if(CONDA_ROOT) + include_directories("${CONDA_ROOT}/include") + if (NOT CUDA_TOOLKIT_ROOT_DIR) + set(CUDA_TOOLKIT_ROOT_DIR "${CONDA_ROOT}") ++ else() ++ set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${CUDA_TOOLKIT_ROOT_DIR}/include") ++ set(CMAKE_LIBRARY_PATH "${CMAKE_LIBRARY_PATH};${CUDA_TOOLKIT_ROOT_DIR}/lib;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs") ++ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") ++ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs") ++ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") + endif() + + endif() +@@ -33,6 +39,9 @@ if(BuildForFedora) #pkg_check_modules(FST REQUIRED fst) @@ -71,7 +92,27 @@ index 2f26596..d824a40 100644 else() include(third_party/get_third_party) -@@ -60,6 +63,9 @@ execute_process(COMMAND ${PYTHON_EXECUTABLE} +@@ -40,8 +49,8 @@ else() + endif() + + +-find_package(PythonInterp) +-if(NOT PYTHON_EXECUTABLE) ++find_package(Python3 REQUIRED) ++if(NOT Python3_EXECUTABLE) + message(FATAL_ERROR "Needs python to auto-generate most CMake files, but not found.") + endif() + +@@ -51,7 +60,7 @@ set(IS_LIB_SHARE "") + if(BUILD_SHARED_LIBS) + set(IS_LIB_SHARE "--shared") + endif() +-execute_process(COMMAND ${PYTHON_EXECUTABLE} ++execute_process(COMMAND ${Python3_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/gen_cmake_skeleton.py" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "--quiet" +@@ -60,6 +69,9 @@ execute_process(COMMAND ${PYTHON_EXECUTABLE} unset(IS_LIB_SHARE) if(BuildForFedora) @@ -81,7 +122,7 @@ index 2f26596..d824a40 100644 else() set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_EXTENSIONS OFF) -@@ -67,9 +73,14 @@ endif() +@@ -67,9 +79,14 @@ endif() set(CMAKE_INSTALL_MESSAGE LAZY) # hide "-- Up-to-date: ..." if(BUILD_SHARED_LIBS) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -99,7 +140,56 @@ index 2f26596..d824a40 100644 elseif(APPLE) set(CMAKE_INSTALL_RPATH "@loader_path") else() -@@ -200,6 +211,8 @@ if(CUDA_FOUND) +@@ -160,10 +177,10 @@ if(MSVC) + # Added in source, but we actually should do it in build script, whatever... + # add_definitions(-DWIN32_LEAN_AND_MEAN=1) + +- add_compile_options(/permissive- /FS /wd4819 /EHsc /bigobj) ++ # add_compile_options(/permissive- /FS /wd4819 /EHsc /bigobj) + + # some warnings related with fst +- add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305) ++ # add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305) + + set(CompilerFlags + CMAKE_CXX_FLAGS +@@ -186,6 +203,35 @@ if(MSVC) + endif() + + find_package(CUDAToolkit) ++if(CONDA_ROOT) ++ if(CUDAToolkit_FOUND) ++ enable_language(CUDA) ++ ++ set(CMAKE_CUDA_STANDARD 17) ++ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) ++ ++ add_definitions(-DHAVE_CUDA=1) ++ add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM=1) ++ link_libraries( ++ CUDA::cudart ++ CUDA::cublas ++ CUDA::cufft ++ CUDA::curand ++ CUDA::cusolver ++ CUDA::cusparse ++ CUDA::cuda_driver) ++ ++ if(MSVC) ++ # CMake's detection for NvToolExt fails with CUDA 12; it's header-only now, see ++ # https://github.com/conda-forge/cuda-nvtx-feedstock/issues/4 ++ include_directories(${NvToolExt_INCLUDE_DIR}) ++ else() ++ find_package(NvToolExt REQUIRED) ++ include_directories(${NvToolExt_INCLUDE_DIR}) ++ link_libraries(${NvToolExt_LIBRARIES}) ++ endif() ++ endif() ++else() + find_package(CUDA) + if(CUDA_FOUND) + set(CUDA_PROPAGATE_HOST_FLAGS ON) +@@ -200,6 +246,8 @@ if(CUDA_FOUND) list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC") set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}") endif() @@ -108,7 +198,16 @@ index 2f26596..d824a40 100644 set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS}) add_definitions(-DHAVE_CUDA=1) -@@ -252,7 +265,9 @@ if(BuildForFedora) +@@ -221,7 +269,7 @@ if(CUDA_FOUND) + find_package(CUB REQUIRED) + include_directories(${CUB_INCLUDE_DIR}) + endif() +- ++endif() + add_definitions(-DKALDI_NO_PORTAUDIO=1) + + if(KALDI_VERSION) +@@ -252,7 +300,9 @@ if(BuildForFedora) # TODO: Detect the right version and put it here. add_definitions(-DOPENFST_VER=18300) # link_directories(/usr/lib64) @@ -119,7 +218,7 @@ index 2f26596..d824a40 100644 endif() link_libraries(fst) -@@ -281,9 +296,7 @@ add_subdirectory(src/nnet3) +@@ -281,9 +331,7 @@ add_subdirectory(src/nnet3) add_subdirectory(src/rnnlm) add_subdirectory(src/chain) add_subdirectory(src/ivector) @@ -130,7 +229,16 @@ index 2f26596..d824a40 100644 add_subdirectory(src/online2) add_subdirectory(src/kws) -@@ -313,10 +326,8 @@ add_subdirectory(src/nnet3bin) +@@ -295,7 +343,7 @@ if(TENSORFLOW_DIR) + endif() + + # add all cuda libraries +-if(CUDA_FOUND) ++if(CUDAToolkit_FOUND) + add_subdirectory(src/cudafeat) + add_subdirectory(src/cudadecoder) + endif() +@@ -313,14 +361,12 @@ add_subdirectory(src/nnet3bin) add_subdirectory(src/rnnlmbin) add_subdirectory(src/chainbin) add_subdirectory(src/ivectorbin) @@ -143,6 +251,11 @@ index 2f26596..d824a40 100644 add_subdirectory(src/kwsbin) # add all cuda executables +-if(CUDA_FOUND) ++if(CUDAToolkit_FOUND) + add_subdirectory(src/cudafeatbin) + add_subdirectory(src/cudadecoderbin) + endif() diff --git a/src/base/kaldi-dll-exports.h b/src/base/kaldi-dll-exports.h new file mode 100644 index 0000000..3a7f156 From 5b17b45733e25d1e2e55441b6c386bc0f01195df Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Wed, 30 Jul 2025 10:47:23 -0700 Subject: [PATCH 03/10] Update patch for regex issue --- recipe/patches/0001-Shared-libraries-on-windows.patch | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/recipe/patches/0001-Shared-libraries-on-windows.patch b/recipe/patches/0001-Shared-libraries-on-windows.patch index fc85a9f..d36afc9 100644 --- a/recipe/patches/0001-Shared-libraries-on-windows.patch +++ b/recipe/patches/0001-Shared-libraries-on-windows.patch @@ -1,5 +1,5 @@ diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py -index c8fee4c..7b7b050 100644 +index c8fee4c..2eac686 100644 --- a/cmake/gen_cmake_skeleton.py +++ b/cmake/gen_cmake_skeleton.py @@ -154,6 +154,7 @@ def disable_for_win32(t): @@ -20,6 +20,15 @@ index c8fee4c..7b7b050 100644 if len(self.header_list) > 0: ret.append("set(PUBLIC_HEADERS") for f in self.header_list: +@@ -232,7 +236,7 @@ class CMakeListsLibrary(object): + print_wrapper("WARNING: non-standard", filename) + return + libs = makefile.split("ADDLIBS")[-1].split("\n\n")[0] +- libs = re.findall("[^\s\\\\=]+", libs) ++ libs = re.findall(r"[^\s\\=]+", libs) + for lib in libs: + self.depends.append(os.path.splitext(os.path.basename(lib))[0]) + @@ -247,9 +251,9 @@ class CMakeListsLibrary(object): if len(self.cuda_source_list) > 0: From 02e0c11c2110542dd1825f4d12518be561441870 Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Wed, 30 Jul 2025 12:47:03 -0700 Subject: [PATCH 04/10] Update meta yaml for cuda issues --- recipe/meta.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 19e155c..b1d20a1 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -73,7 +73,7 @@ outputs: - libtool # [not win] - automake # [not win] host: - {% if cuda_compiler_version != "None" %} + {% if cuda_major >= 12 %} - cuda-version {{ cuda_compiler_version }} - cuda-cudart-dev - cuda-driver-dev # [linux] @@ -107,7 +107,6 @@ outputs: - magma # [cuda_compiler_version != "None"] - openfst ==1.8.4 - ucrt # [win] - - __cuda >={{ cuda_compiler_version }} # [cuda_compiler_version != "None"] test: # will also run run_test.py (checking the binaries) commands: From e5a017eb3052513436523173e4a729658aa7b99e Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Wed, 30 Jul 2025 13:54:39 -0700 Subject: [PATCH 05/10] Change build script to follow torchaudio --- recipe/build_kaldi.sh | 15 +++++++-------- recipe/meta.yaml | 6 +++--- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/recipe/build_kaldi.sh b/recipe/build_kaldi.sh index 3d1634f..38ce880 100644 --- a/recipe/build_kaldi.sh +++ b/recipe/build_kaldi.sh @@ -84,20 +84,19 @@ then # Point CMake to CUDA Toolkit. Use `targets` directories with CUDA 12+ if [[ "${cuda_compiler_version}" == 11* ]] then - CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" + export CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" elif [[ "${target_platform}" == "linux-64" ]] then - CUDA_TOOLKIT_ROOT_DIR="${PREFIX}/targets/x86_64-linux" - elif [[ "${target_platform}" == "linux-aarch64" ]] - then - CUDA_TOOLKIT_ROOT_DIR="${PREFIX}/targets/sbsa-linux" - elif [[ "${target_platform}" == "linux-ppc64le" ]] - then - CUDA_TOOLKIT_ROOT_DIR="${PREFIX}/targets/ppc64le-linux" + export CUDA_TOOLKIT_ROOT_DIR="${PREFIX}" + export CUDA_HOME="${PREFIX}" else echo "Unknown CUDA version ${cuda_compiler_version} for target platform ${target_platform}" exit 1 fi + + if [[ "${target_platform}" != "${build_platform}" ]]; then + export CUDA_TOOLKIT_ROOT=${PREFIX} + fi CMAKE_ARGS="${CMAKE_ARGS} -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}" if [[ "${cuda_compiler_version}" == 12* ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DNvToolExt_SEARCH_DIRS=${CUDA_TOOLKIT_ROOT_DIR}/include/nvtx3" diff --git a/recipe/meta.yaml b/recipe/meta.yaml index b1d20a1..6db9b05 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "5.5.1162" %} +{% set version = "5.5.1172" %} # minor/major is https://github.com/kaldi-asr/kaldi/blob/master/src/.version; # patch version is the number of commits since then, see # https://github.com/kaldi-asr/kaldi/blob/master/cmake/VersionHelper.cmake @@ -10,7 +10,7 @@ "base", "matrix", "cudamatrix", "util", "feat", "tree", "gmm", "transform", "fstext", "hmm", "lm", "decoder", "lat", "nnet", "nnet2", "nnet3", "rnnlm", "chain", "ivector", "online", "online2", "kws" -] + (cuda_compiler_version != "None") * [ +] + (cuda_compiler_version is defined and cuda_compiler_version != "None") * [ "cudafeat", "cudadecoder" ]%} # This list should be substantially longer, but upstream just globs @@ -25,7 +25,7 @@ "nnet2/am-nnet.h", "nnet3/attention.h", "rnnlm/sampler.h", "chain/chain-datastruct.h", "ivector/plda.h", "online/online-decodable.h", "online2/online-endpoint.h", "kws/kaldi-kws.h", "itf/options-itf.h" -] + (cuda_compiler_version != "None") * [ +] + (cuda_compiler_version is defined and cuda_compiler_version != "None") * [ "cudafeat/lane-desc.h", "cudadecoder/cuda-fst.h" ] %} From 2fa04969e5e9ef9008fda6b7eddb664b86f98067 Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Wed, 30 Jul 2025 16:34:49 -0700 Subject: [PATCH 06/10] Add debugging for missing libnvToolsExt.so --- recipe/build_kaldi.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/recipe/build_kaldi.sh b/recipe/build_kaldi.sh index 38ce880..4a0f464 100644 --- a/recipe/build_kaldi.sh +++ b/recipe/build_kaldi.sh @@ -97,6 +97,7 @@ then if [[ "${target_platform}" != "${build_platform}" ]]; then export CUDA_TOOLKIT_ROOT=${PREFIX} fi + find "${CUDA_TOOLKIT_ROOT_DIR}" -name "libnvToolsExt.so" CMAKE_ARGS="${CMAKE_ARGS} -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}" if [[ "${cuda_compiler_version}" == 12* ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DNvToolExt_SEARCH_DIRS=${CUDA_TOOLKIT_ROOT_DIR}/include/nvtx3" From 12de43eb23a2fd87633759fdcb0e457142522cd0 Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Fri, 1 Aug 2025 15:47:16 -0700 Subject: [PATCH 07/10] Revert changes to build.sh --- recipe/build_kaldi.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/recipe/build_kaldi.sh b/recipe/build_kaldi.sh index 4a0f464..3a1ddd2 100644 --- a/recipe/build_kaldi.sh +++ b/recipe/build_kaldi.sh @@ -84,11 +84,16 @@ then # Point CMake to CUDA Toolkit. Use `targets` directories with CUDA 12+ if [[ "${cuda_compiler_version}" == 11* ]] then - export CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" + CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" elif [[ "${target_platform}" == "linux-64" ]] then - export CUDA_TOOLKIT_ROOT_DIR="${PREFIX}" - export CUDA_HOME="${PREFIX}" + CUDA_TOOLKIT_ROOT_DIR="${PREFIX}/targets/x86_64-linux" + elif [[ "${target_platform}" == "linux-aarch64" ]] + then + CUDA_TOOLKIT_ROOT_DIR="${PREFIX}/targets/sbsa-linux" + elif [[ "${target_platform}" == "linux-ppc64le" ]] + then + CUDA_TOOLKIT_ROOT_DIR="${PREFIX}/targets/ppc64le-linux" else echo "Unknown CUDA version ${cuda_compiler_version} for target platform ${target_platform}" exit 1 @@ -97,7 +102,8 @@ then if [[ "${target_platform}" != "${build_platform}" ]]; then export CUDA_TOOLKIT_ROOT=${PREFIX} fi - find "${CUDA_TOOLKIT_ROOT_DIR}" -name "libnvToolsExt.so" + find "${CUDA_TOOLKIT_ROOT_DIR}" -print | grep -i "libnvToolsExt.so" + find "${PREFIX}" -print | grep -i "libnvToolsExt.so" CMAKE_ARGS="${CMAKE_ARGS} -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}" if [[ "${cuda_compiler_version}" == 12* ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DNvToolExt_SEARCH_DIRS=${CUDA_TOOLKIT_ROOT_DIR}/include/nvtx3" From 39a0e21e8dafa2fc82e94ac629465f84c9d10031 Mon Sep 17 00:00:00 2001 From: h-vetinari Date: Sat, 2 Aug 2025 16:41:45 +1100 Subject: [PATCH 08/10] fix guard for CUDA deps --- recipe/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 6db9b05..5132098 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -73,7 +73,7 @@ outputs: - libtool # [not win] - automake # [not win] host: - {% if cuda_major >= 12 %} + {% if cuda_compiler_version != "None" %} - cuda-version {{ cuda_compiler_version }} - cuda-cudart-dev - cuda-driver-dev # [linux] From 41568d388905803157fe0c2c5d2c7ad9823746b4 Mon Sep 17 00:00:00 2001 From: h-vetinari Date: Sat, 2 Aug 2025 17:17:46 +1100 Subject: [PATCH 09/10] fix condition that only exists to shut up linter --- recipe/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 5132098..8f48d5e 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -3,7 +3,7 @@ # patch version is the number of commits since then, see # https://github.com/kaldi-asr/kaldi/blob/master/cmake/VersionHelper.cmake {% set commit = "f4007661023b98b8081fd875029f0dee62242fd1" %} -{% set cuda_compiler_version = "None" if cuda_compiler_version is undefined %} +{% set cuda_compiler_version = "None" if cuda_compiler_version is undefined else cuda_compiler_version %} {% set kaldi_proc_type = "cuda" if cuda_compiler_version != "None" else "cpu" %} {% set kaldi_libraries = [ From 6932d027947a673bb17f77e16ae26f2b6fdaae0a Mon Sep 17 00:00:00 2001 From: Michael McAuliffe Date: Sat, 2 Aug 2025 11:45:28 -0700 Subject: [PATCH 10/10] Clean up debugging and unused patches --- recipe/build_kaldi.sh | 3 +- recipe/meta.yaml | 6 - recipe/patches/0006-Cuda-12-support.patch | 175 ------------------ ...MAX-in-some-CUDA-installation-scenar.patch | 24 --- 4 files changed, 1 insertion(+), 207 deletions(-) delete mode 100644 recipe/patches/0006-Cuda-12-support.patch delete mode 100644 recipe/patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch diff --git a/recipe/build_kaldi.sh b/recipe/build_kaldi.sh index 3a1ddd2..a250384 100644 --- a/recipe/build_kaldi.sh +++ b/recipe/build_kaldi.sh @@ -102,8 +102,7 @@ then if [[ "${target_platform}" != "${build_platform}" ]]; then export CUDA_TOOLKIT_ROOT=${PREFIX} fi - find "${CUDA_TOOLKIT_ROOT_DIR}" -print | grep -i "libnvToolsExt.so" - find "${PREFIX}" -print | grep -i "libnvToolsExt.so" + CMAKE_ARGS="${CMAKE_ARGS} -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}" if [[ "${cuda_compiler_version}" == 12* ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DNvToolExt_SEARCH_DIRS=${CUDA_TOOLKIT_ROOT_DIR}/include/nvtx3" diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 8f48d5e..d990e05 100755 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -29,12 +29,6 @@ "cudafeat/lane-desc.h", "cudadecoder/cuda-fst.h" ] %} -{% if cuda_compiler_version != "None" %} -{% set cuda_major = environ.get("cuda_compiler_version", "11.8").split(".")[0] | int %} -{% else %} -{% set cuda_major = 0 %} -{% endif %} - package: name: kaldi-split version: {{ version }} diff --git a/recipe/patches/0006-Cuda-12-support.patch b/recipe/patches/0006-Cuda-12-support.patch deleted file mode 100644 index ccccc41..0000000 --- a/recipe/patches/0006-Cuda-12-support.patch +++ /dev/null @@ -1,175 +0,0 @@ -From eb712c5f5e351b97406f4903fbb9d94021d05a0b Mon Sep 17 00:00:00 2001 -From: Michael McAuliffe -Date: Thu, 1 Feb 2024 10:41:44 -0800 -Subject: [PATCH 6/7] Cuda 12 support - ---- - CMakeLists.txt | 67 ++++++++++++++++++------------------- - cmake/gen_cmake_skeleton.py | 12 ++++--- - 2 files changed, 40 insertions(+), 39 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 24cb574a6..a5c6ff81e 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -1,4 +1,4 @@ --cmake_minimum_required(VERSION 3.18) -+cmake_minimum_required(VERSION 3.26) - project(kaldi) - - if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) -@@ -16,6 +16,12 @@ if(CONDA_ROOT) - include_directories("${CONDA_ROOT}/include") - if (NOT CUDA_TOOLKIT_ROOT_DIR) - set(CUDA_TOOLKIT_ROOT_DIR "${CONDA_ROOT}") -+ else() -+ set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${CUDA_TOOLKIT_ROOT_DIR}/include") -+ set(CMAKE_LIBRARY_PATH "${CMAKE_LIBRARY_PATH};${CUDA_TOOLKIT_ROOT_DIR}/lib;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs") -+ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") -+ link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs") -+ include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") - endif() - - endif() -@@ -23,8 +29,8 @@ endif() - include(third_party/get_third_party) - - --find_package(PythonInterp) --if(NOT PYTHON_EXECUTABLE) -+find_package(Python3 REQUIRED) -+if(NOT Python3_EXECUTABLE) - message(FATAL_ERROR "Needs python to auto-generate most CMake files, but not found.") - endif() - -@@ -34,7 +40,7 @@ set(IS_LIB_SHARE "") - if(BUILD_SHARED_LIBS) - set(IS_LIB_SHARE "--shared") - endif() --execute_process(COMMAND ${PYTHON_EXECUTABLE} -+execute_process(COMMAND ${Python3_EXECUTABLE} - "${CMAKE_CURRENT_SOURCE_DIR}/cmake/gen_cmake_skeleton.py" - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "--quiet" -@@ -146,10 +152,10 @@ if(MSVC) - # Added in source, but we actually should do it in build script, whatever... - # add_definitions(-DWIN32_LEAN_AND_MEAN=1) - -- add_compile_options(/permissive- /FS /wd4819 /EHsc /bigobj) -+ # add_compile_options(/permissive- /FS /wd4819 /EHsc /bigobj) - - # some warnings related with fst -- add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305) -+ # add_compile_options(/wd4018 /wd4244 /wd4267 /wd4291 /wd4305) - - set(CompilerFlags - CMAKE_CXX_FLAGS -@@ -172,39 +178,32 @@ if(MSVC) - endif() - - find_package(CUDAToolkit) --find_package(CUDA) --if(CUDA_FOUND) -- set(CUDA_PROPAGATE_HOST_FLAGS ON) -- if(MSVC) -- set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread") # Fixes incompatibility with cxx14 and cxx17 for Kaldi vs cuda in VS2019 -- list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /permissive-,/FS,/wd4819,/EHsc,/bigobj") -- list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler /wd4018,/wd4244,/wd4267,/wd4291,/wd4305") -- list(APPEND CUDA_NVCC_FLAGS_RELEASE -Xcompiler /MD) # Kaldi will always be dynamically linked to Cuda -- list(APPEND CUDA_NVCC_FLAGS_DEBUG -Xcompiler /MDd) -- else() -- # list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -std=c++${CMAKE_CXX_STANDARD}") -- list(APPEND KALDI_CUDA_NVCC_FLAGS "-Xcompiler -fPIC") -- set(KALDI_CUDA_NVCC_FLAGS "--default-stream=per-thread;-std=c++${CMAKE_CXX_STANDARD}") -- endif() -+if(CUDAToolkit_FOUND) -+ enable_language(CUDA) -+ - set(CMAKE_CUDA_STANDARD 17) - set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) -- set(CUDA_NVCC_FLAGS ${KALDI_CUDA_NVCC_FLAGS} ${CUDA_NVCC_FLAGS}) - - add_definitions(-DHAVE_CUDA=1) - add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM=1) - link_libraries( -- ${CUDA_LIBRARIES} -- ${CUDA_CUDA_LIBRARY} -- ${CUDA_CUBLAS_LIBRARIES} -- ${CUDA_CUFFT_LIBRARIES} -- ${CUDA_curand_LIBRARY} -- ${CUDA_cusolver_LIBRARY} -- ${CUDA_cusparse_LIBRARY}) -- -- find_package(NvToolExt REQUIRED) -- include_directories(${NvToolExt_INCLUDE_DIR}) -- link_libraries(${NvToolExt_LIBRARIES}) -+ CUDA::cudart -+ CUDA::cublas -+ CUDA::cufft -+ CUDA::curand -+ CUDA::cusolver -+ CUDA::cusparse -+ CUDA::cuda_driver) - -+ if(MSVC) -+ # CMake's detection for NvToolExt fails with CUDA 12; it's header-only now, see -+ # https://github.com/conda-forge/cuda-nvtx-feedstock/issues/4 -+ include_directories(${NvToolExt_INCLUDE_DIR}) -+ else() -+ find_package(NvToolExt REQUIRED) -+ include_directories(${NvToolExt_INCLUDE_DIR}) -+ link_libraries(${NvToolExt_LIBRARIES}) -+ endif() - - find_package(CUB REQUIRED) - include_directories(${CUB_INCLUDE_DIR}) -@@ -270,7 +269,7 @@ if(TENSORFLOW_DIR) - endif() - - # add all cuda libraries --if(CUDA_FOUND) -+if(CUDAToolkit_FOUND) - add_subdirectory(src/cudafeat) - add_subdirectory(src/cudadecoder) - endif() -@@ -293,7 +292,7 @@ add_subdirectory(src/online2bin) - add_subdirectory(src/kwsbin) - - # add all cuda executables --if(CUDA_FOUND) -+if(CUDAToolkit_FOUND) - add_subdirectory(src/cudafeatbin) - add_subdirectory(src/cudadecoderbin) - endif() -diff --git a/cmake/gen_cmake_skeleton.py b/cmake/gen_cmake_skeleton.py -index 154369dd5..163a4dd97 100644 ---- a/cmake/gen_cmake_skeleton.py -+++ b/cmake/gen_cmake_skeleton.py -@@ -252,13 +252,11 @@ class CMakeListsLibrary(object): - - if len(self.cuda_source_list) > 0: - self.source_list.append("${CUDA_OBJS}") -- ret.append("if(CUDA_FOUND)") -- ret.append(" cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)") -- ret.append(" cuda_compile(CUDA_OBJS SHARED") -+ ret.append("if(CUDAToolkit_FOUND)") -+ ret.append(" include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)") -+ ret.append(" set(CUDA_OBJS") - for f in self.cuda_source_list: - ret.append(" " + f) -- if self.dir_name.startswith("cudafeat") and sys.platform == 'win32': -- ret.append(" OPTIONS -DKALDI_CUMATRIX_DLL_IMPORTS") - ret.append(" )") - ret.append("endif()\n") - -@@ -286,6 +284,10 @@ class CMakeListsLibrary(object): - - ret.append("target_compile_definitions(" + self.target_name + " INTERFACE KALDI_CUMATRIX_DLL_IMPORTS)") - -+ ret.append("endif(MSVC)\n") -+ elif self.dir_name.startswith("cudafeat"): -+ ret.append("if(MSVC)") -+ ret.append("target_compile_definitions(" + self.target_name + " INTERFACE KALDI_CUMATRIX_DLL_IMPORTS)") - ret.append("endif(MSVC)\n") - elif self.dir_name.startswith("util"): - ret.append("if(MSVC)") diff --git a/recipe/patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch b/recipe/patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch deleted file mode 100644 index ae14858..0000000 --- a/recipe/patches/0007-Fix-missing-FLT_MAX-in-some-CUDA-installation-scenar.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 3ec56682ec70a4fc54309ae3a97ae6af77dd911b Mon Sep 17 00:00:00 2001 -From: danijel3 -Date: Sun, 2 Jun 2024 23:11:33 +0200 -Subject: [PATCH 7/7] Fix missing FLT_MAX in some CUDA installation scenarios. - ---- - src/cudadecoder/cuda-decoder-kernels.cu | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/cudadecoder/cuda-decoder-kernels.cu b/src/cudadecoder/cuda-decoder-kernels.cu -index 8503182c1..e20a7dea1 100644 ---- a/src/cudadecoder/cuda-decoder-kernels.cu -+++ b/src/cudadecoder/cuda-decoder-kernels.cu -@@ -26,6 +26,10 @@ - #include "cuda-decoder-kernels.h" - #include "cuda-decoder-kernels-utils.h" - -+#ifndef FLT_MAX -+#define FLT_MAX 340282346638528859811704183484516925440.0f -+#endif -+ - namespace kaldi { - namespace cuda_decoder { -
VariantStatus
linux_64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13linux_64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13 - variant + variant
linux_64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13linux_64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14 - variant + variant
linux_64_cuda_compilernvcccuda_compiler_version11.8cxx_compiler_version11fortran_compiler_version11linux_aarch64_cuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13 - variant + variant
linux_aarch64_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13linux_aarch64_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14 - variant + variant
linux_aarch64_cuda_compilercuda-nvcccuda_compiler_version12.6cxx_compiler_version13fortran_compiler_version13linux_ppc64le_cuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12 - variant + variant
linux_ppc64le_cuda_compilerNonecuda_compiler_versionNonecxx_compiler_version13fortran_compiler_version13linux_ppc64le_cuda_compiler_versionNonecxx_compiler_version14fortran_compiler_version14 - variant - -
linux_ppc64le_cuda_compilercuda-nvcccuda_compiler_version12.4cxx_compiler_version12fortran_compiler_version12 - - variant + variant
win_64_cuda_compilerNonecuda_compiler_versionNone - - variant - -
win_64_cuda_compilercuda-nvcccuda_compiler_version12.6win_64_cuda_compiler_version12.6 - variant + variant
win_64_cuda_compilernvcccuda_compiler_version11.8win_64_cuda_compiler_versionNone - variant + variant