From b8ff1711278bd1444a230cb1ce44bf8585cb688e Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Mon, 9 Dec 2024 03:50:11 +0000 Subject: [PATCH 1/6] FileSystem: Improve directory deletion test slightly --- tests/ctest/common/filesystem_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ctest/common/filesystem_tests.cpp b/tests/ctest/common/filesystem_tests.cpp index 89562819bf815..eaafc74a53a38 100644 --- a/tests/ctest/common/filesystem_tests.cpp +++ b/tests/ctest/common/filesystem_tests.cpp @@ -45,10 +45,10 @@ TEST(FileSystem, RecursiveDeleteDirectoryDontFollowSymbolicLinks) ASSERT_EQ(symlink(target_dir.c_str(), symlink_path.c_str()), 0); // Delete the directory containing the symlink. - ASSERT_TRUE(dir_to_delete.starts_with("/tmp/")); + ASSERT_TRUE(dir_to_delete.starts_with("/tmp/pcsx2_filesystem_test_")); ASSERT_TRUE(FileSystem::RecursiveDeleteDirectory(dir_to_delete.c_str())); - // Make sure the target file didn't get deleted. + // Make sure the file in the target directory didn't get deleted. ASSERT_TRUE(FileSystem::FileExists(file_path.c_str())); // Clean up. From 1ed3001358e98ce03fbbfaf149fa45fdab405e5f Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Mon, 9 Dec 2024 04:09:34 +0000 Subject: [PATCH 2/6] FileSystem: Add CreateSymLink function --- common/FileSystem.cpp | 25 +++++++++++++++++++++++++ common/FileSystem.h | 4 ++++ tests/ctest/common/filesystem_tests.cpp | 4 +--- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/common/FileSystem.cpp b/common/FileSystem.cpp index e162a555999c1..46940388b4044 100644 --- a/common/FileSystem.cpp +++ b/common/FileSystem.cpp @@ -1961,6 +1961,26 @@ bool FileSystem::SetPathCompression(const char* path, bool enable) return result; } +bool FileSystem::CreateSymLink(const char* link, const char* target) +{ + // convert to wide string + const std::wstring wlink = GetWin32Path(link); + if (wlink.empty()) + return false; + + const std::wstring wtarget = GetWin32Path(target); + if (wtarget.empty()) + return false; + + // check if it's a directory + DWORD flags = 0; + if (DirectoryExists(target)) + flags |= SYMBOLIC_LINK_FLAG_DIRECTORY; + + // create the symbolic link + return CreateSymbolicLinkW(wlink.c_str(), wtarget.c_str(), flags) != 0; +} + bool FileSystem::IsSymbolicLink(const char* path) { // convert to wide string @@ -2541,6 +2561,11 @@ bool FileSystem::SetPathCompression(const char* path, bool enable) return false; } +bool FileSystem::CreateSymLink(const char* link, const char* target) +{ + return symlink(target, link) == 0; +} + bool FileSystem::IsSymbolicLink(const char* path) { struct stat sysStatData; diff --git a/common/FileSystem.h b/common/FileSystem.h index 6b7fe1806c9b0..f731542b5a12c 100644 --- a/common/FileSystem.h +++ b/common/FileSystem.h @@ -178,6 +178,10 @@ namespace FileSystem /// Does nothing and returns false on non-Windows platforms. bool SetPathCompression(const char* path, bool enable); + // Creates a symbolic link. Note that on Windows this requires elevated + // privileges so this is mostly useful for testing purposes. + bool CreateSymLink(const char* link, const char* target); + /// Checks if a file or directory is a symbolic link. bool IsSymbolicLink(const char* path); diff --git a/tests/ctest/common/filesystem_tests.cpp b/tests/ctest/common/filesystem_tests.cpp index eaafc74a53a38..d0fd9b89304ad 100644 --- a/tests/ctest/common/filesystem_tests.cpp +++ b/tests/ctest/common/filesystem_tests.cpp @@ -7,8 +7,6 @@ #ifdef __linux__ -#include - static std::optional create_test_directory() { for (u16 i = 0; i < UINT16_MAX; i++) @@ -42,7 +40,7 @@ TEST(FileSystem, RecursiveDeleteDirectoryDontFollowSymbolicLinks) std::string dir_to_delete = Path::Combine(*test_dir, "dir_to_delete"); ASSERT_TRUE(FileSystem::CreateDirectoryPath(dir_to_delete.c_str(), false)); std::string symlink_path = Path::Combine(dir_to_delete, "link"); - ASSERT_EQ(symlink(target_dir.c_str(), symlink_path.c_str()), 0); + ASSERT_TRUE(FileSystem::CreateSymLink(symlink_path.c_str(), target_dir.c_str())); // Delete the directory containing the symlink. ASSERT_TRUE(dir_to_delete.starts_with("/tmp/pcsx2_filesystem_test_")); From 68e6ede47e69e446af74f62610cb7157479bf128 Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Mon, 9 Dec 2024 05:18:50 +0000 Subject: [PATCH 3/6] Path: Add tests for Path::RealPath --- tests/ctest/common/path_tests.cpp | 130 ++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 8 deletions(-) diff --git a/tests/ctest/common/path_tests.cpp b/tests/ctest/common/path_tests.cpp index 4c20678a68fbe..e967a61ab0ebf 100644 --- a/tests/ctest/common/path_tests.cpp +++ b/tests/ctest/common/path_tests.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ +#include "common/FileSystem.h" #include "common/Pcsx2Defs.h" #include "common/Path.h" #include @@ -238,16 +239,129 @@ TEST(Path, CreateFileURL) #endif } -#if 0 +#if __linux__ -// Relies on presence of files. -TEST(Path, RealPath) +static std::optional create_test_directory() { -#ifdef _WIN32 - ASSERT_EQ(Path::RealPath("C:\\Users\\Me\\Desktop\\foo\\baz"), "C:\\Users\\Me\\Desktop\\foo\\bar\\baz"); -#else - ASSERT_EQ(Path::RealPath("/lib/foo/bar"), "/usr/lib/foo/bar"); -#endif + for (u16 i = 0; i < UINT16_MAX; i++) + { + std::string path = std::string("/tmp/pcsx2_path_test_") + std::to_string(i); + if (!FileSystem::DirectoryExists(path.c_str())) + { + if (!FileSystem::CreateDirectoryPath(path.c_str(), false)) + break; + + return path; + } + } + + return std::nullopt; +} + +TEST(Path, RealPathAbsoluteSymbolicLink) +{ + std::optional test_dir = create_test_directory(); + ASSERT_TRUE(test_dir.has_value()); + + // Create a file to point at. + std::string file_path = Path::Combine(*test_dir, "file"); + ASSERT_TRUE(FileSystem::WriteStringToFile(file_path.c_str(), "Hello, world!")); + + // Create a symbolic link that points to said file. + std::string link_path = Path::Combine(*test_dir, "link"); + ASSERT_TRUE(FileSystem::CreateSymLink(link_path.c_str(), file_path.c_str())); + + // Make sure the symbolic link is resolved correctly. + ASSERT_EQ(Path::RealPath(link_path), file_path); + + // Clean up. + ASSERT_TRUE(FileSystem::DeleteSymbolicLink(link_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteFilePath(file_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteDirectory(test_dir->c_str())); +} + +TEST(Path, RealPathRelativeSymbolicLink) +{ + std::optional test_dir = create_test_directory(); + ASSERT_TRUE(test_dir.has_value()); + + // Create a file to point at. + std::string file_path = Path::Combine(*test_dir, "file"); + ASSERT_TRUE(FileSystem::WriteStringToFile(file_path.c_str(), "Hello, world!")); + + // Create a symbolic link that points to said file. + std::string link_path = Path::Combine(*test_dir, "link"); + ASSERT_TRUE(FileSystem::CreateSymLink(link_path.c_str(), "file")); + + // Make sure the symbolic link is resolved correctly. + ASSERT_EQ(Path::RealPath(link_path), file_path); + + // Clean up. + ASSERT_TRUE(FileSystem::DeleteSymbolicLink(link_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteFilePath(file_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteDirectory(test_dir->c_str())); +} + +TEST(Path, RealPathDotDotSymbolicLink) +{ + std::optional test_dir = create_test_directory(); + ASSERT_TRUE(test_dir.has_value()); + + // Create a file to point at. + std::string file_path = Path::Combine(*test_dir, "file"); + ASSERT_TRUE(FileSystem::WriteStringToFile(file_path.c_str(), "Hello, world!")); + + // Create a directory to put the link in. + std::string link_dir = Path::Combine(*test_dir, "dir"); + ASSERT_TRUE(FileSystem::CreateDirectoryPath(link_dir.c_str(), false)); + + // Create a symbolic link that points to said file. + std::string link_path = Path::Combine(link_dir, "link"); + ASSERT_TRUE(FileSystem::CreateSymLink(link_path.c_str(), "../file")); + + // Make sure the symbolic link is resolved correctly. + ASSERT_EQ(Path::RealPath(link_path), file_path); + + // Clean up. + ASSERT_TRUE(FileSystem::DeleteSymbolicLink(link_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteDirectory(link_dir.c_str())); + ASSERT_TRUE(FileSystem::DeleteFilePath(file_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteDirectory(test_dir->c_str())); +} + +TEST(Path, RealPathCircularSymbolicLink) +{ + std::optional test_dir = create_test_directory(); + ASSERT_TRUE(test_dir.has_value()); + + // Create a circular symbolic link. + std::string link_path = Path::Combine(*test_dir, "link"); + ASSERT_TRUE(FileSystem::CreateSymLink(link_path.c_str(), ".")); + + // Make sure the link gets resolved correctly. + ASSERT_EQ(Path::RealPath(link_path), *test_dir); + ASSERT_EQ(Path::RealPath(Path::Combine(link_path, "link")), *test_dir); + + // Clean up. + ASSERT_TRUE(FileSystem::DeleteSymbolicLink(link_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteDirectory(test_dir->c_str())); +} + +TEST(Path, RealPathLoopingSymbolicLink) +{ + std::optional test_dir = create_test_directory(); + ASSERT_TRUE(test_dir.has_value()); + + // Create a symbolic link that points to itself. + std::string link_path = Path::Combine(*test_dir, "link"); + ASSERT_TRUE(FileSystem::CreateSymLink(link_path.c_str(), "link")); + + // Make sure this doesn't cause problems. + ASSERT_EQ(Path::RealPath(link_path), link_path); + + // Clean up. + ASSERT_TRUE(FileSystem::DeleteSymbolicLink(link_path.c_str())); + ASSERT_TRUE(FileSystem::DeleteDirectory(test_dir->c_str())); } #endif From 0c21023bb2b2522540242fe68d4d5e43d535882b Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Mon, 9 Dec 2024 08:11:30 +0000 Subject: [PATCH 4/6] Path: Prevent Path::RealPath from returning '.' and '..' components --- common/FileSystem.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/FileSystem.cpp b/common/FileSystem.cpp index 46940388b4044..a73dab6700232 100644 --- a/common/FileSystem.cpp +++ b/common/FileSystem.cpp @@ -453,6 +453,11 @@ std::string Path::RealPath(const std::string_view path) } } } + + // If any relative symlinks were resolved, there may be '.' and '..' + // components in the resultant path, which must be removed. + realpath = Path::Canonicalize(realpath); + #endif return realpath; From 19882dc160ff3b0905a90bddbb84a46f2083bf18 Mon Sep 17 00:00:00 2001 From: JordanTheToaster Date: Sun, 8 Dec 2024 23:13:40 +0000 Subject: [PATCH 5/6] 3rdparty: Sync d3d12memalloc to commit da380f69bd4547cd776c525ae225bb9d13df94e2 --- 3rdparty/d3d12memalloc/LICENSE.txt | 2 +- 3rdparty/d3d12memalloc/README.md | 20 +- .../d3d12memalloc/include/D3D12MemAlloc.h | 81 +- 3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp | 863 +++--------------- 4 files changed, 183 insertions(+), 783 deletions(-) diff --git a/3rdparty/d3d12memalloc/LICENSE.txt b/3rdparty/d3d12memalloc/LICENSE.txt index bc2ab4dc05299..0761191395f90 100644 --- a/3rdparty/d3d12memalloc/LICENSE.txt +++ b/3rdparty/d3d12memalloc/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/3rdparty/d3d12memalloc/README.md b/3rdparty/d3d12memalloc/README.md index 81c6be18aa4ec..98d61a2ab8d7a 100644 --- a/3rdparty/d3d12memalloc/README.md +++ b/3rdparty/d3d12memalloc/README.md @@ -2,7 +2,7 @@ Easy to integrate memory allocation library for Direct3D 12. -**Documentation:** Browse online: [D3D12 Memory Allocator](https://gpuopen-librariesandsdks.github.io/D3D12MemoryAllocator/html/) (generated from Doxygen-style comments in [src/D3D12MemAlloc.h](src/D3D12MemAlloc.h)) +**Documentation:** Browse online: [D3D12 Memory Allocator](https://gpuopen-librariesandsdks.github.io/D3D12MemoryAllocator/html/) (generated from Doxygen-style comments in [include/D3D12MemAlloc.h](include/D3D12MemAlloc.h)) **License:** MIT. See [LICENSE.txt](LICENSE.txt) @@ -36,6 +36,7 @@ Additional features: - Customization and integration with custom engines: Predefine appropriate macros to provide your own implementation of external facilities used by the library, like assert, mutex, and atomic. - Support for resource aliasing (overlap). - Custom memory pools: Create a pool with desired parameters (e.g. fixed or limited maximum size, custom `D3D12_HEAP_PROPERTIES` and `D3D12_HEAP_FLAGS`) and allocate memory out of it. +- Support for GPU Upload Heaps from preview Agility SDK (needs compilation with `D3D12MA_OPTIONS16_SUPPORTED` macro). - Linear allocator: Create a pool with linear algorithm and use it for much faster allocations and deallocations in free-at-once, stack, double stack, or ring buffer fashion. - Defragmentation: Let the library move data around to free some memory blocks and make your allocations better compacted. - Statistics: Obtain brief or detailed statistics about the amount of memory used, unused, number of allocated heaps, number of allocations etc. - globally and per memory heap type. Current memory usage and budget as reported by the system can also be queried. @@ -101,15 +102,18 @@ This software package uses third party software: For more information see [NOTICES.txt](NOTICES.txt). +# See also + +- **[Vcpkg](https://github.com/Microsoft/vcpkg)** dependency manager from Microsoft offers a port of this library that is easy to install. +- **[d3d12ma.c](https://github.com/milliewalky/d3d12ma.c)** - C bindings for this library. Author: Mateusz Maciejewski (Matt Walky). License: MIT. +- **[TerraFX.Interop.D3D12MemoryAllocator](https://github.com/terrafx/terrafx.interop.d3d12memoryallocator)** - interop bindings for this library for C#, as used by [TerraFX](https://github.com/terrafx/terrafx). License: MIT. +- **[Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/)** - equivalent library for Vulkan. License: MIT. + # Software using this library +- **[Qt Project](https://github.com/qt)** +- **[Ghost of Tsushima: Director's Cut PC](https://www.youtube.com/watch?v=cPKBDbCYctc&t=698s)** - Information avaliable in 11:38 of credits - **[The Forge](https://github.com/ConfettiFX/The-Forge)** - cross-platform rendering framework. Apache License 2.0. -- **[Wicked Engine](https://github.com/turanszkij/WickedEngine)** - 3D engine with modern graphics +- **[Wicked Engine](https://github.com/turanszkij/WickedEngine)** - 3D engine with modern graphics [Some other projects on GitHub](https://github.com/search?q=D3D12MemAlloc.h&type=Code) and some game development studios that use DX12 in their games. - -# See also - -- **[Vcpkg](https://github.com/Microsoft/vcpkg)** dependency manager from Microsoft offers a port of this library that is easy to install. -- **[Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/)** - equivalent library for Vulkan. License: MIT. -- **[TerraFX.Interop.D3D12MemoryAllocator](https://github.com/terrafx/terrafx.interop.d3d12memoryallocator)** - interop bindings for this library for C#, as used by [TerraFX](https://github.com/terrafx/terrafx). License: MIT. diff --git a/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h b/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h index 4e87bf0277474..84306054ce813 100644 --- a/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h +++ b/3rdparty/d3d12memalloc/include/D3D12MemAlloc.h @@ -24,9 +24,9 @@ /** \mainpage D3D12 Memory Allocator -Version 2.1.0-development (2023-07-05) +Version 2.1.0-development (2024-07-05) -Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. \n +Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. \n License: MIT Documentation of all members: D3D12MemAlloc.h @@ -160,9 +160,9 @@ class D3D12MA_API IUnknownImpl : public IUnknown { public: virtual ~IUnknownImpl() = default; - virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); - virtual ULONG STDMETHODCALLTYPE AddRef(); - virtual ULONG STDMETHODCALLTYPE Release(); + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) override; + ULONG STDMETHODCALLTYPE AddRef() override; + ULONG STDMETHODCALLTYPE Release() override; protected: virtual void ReleaseThis() { delete this; } private: @@ -265,18 +265,18 @@ enum ALLOCATION_FLAGS */ ALLOCATION_FLAG_CAN_ALIAS = 0x10, - /** Allocation strategy that chooses smallest possible free range for the allocation + /** %Allocation strategy that chooses smallest possible free range for the allocation to minimize memory usage and fragmentation, possibly at the expense of allocation time. */ ALLOCATION_FLAG_STRATEGY_MIN_MEMORY = 0x00010000, - /** Allocation strategy that chooses first suitable free range for the allocation - + /** %Allocation strategy that chooses first suitable free range for the allocation - not necessarily in terms of the smallest offset but the one that is easiest and fastest to find to minimize allocation time, possibly at the expense of allocation quality. */ ALLOCATION_FLAG_STRATEGY_MIN_TIME = 0x00020000, - /** Allocation strategy that chooses always the lowest offset in available space. + /** %Allocation strategy that chooses always the lowest offset in available space. This is not the most efficient strategy but achieves highly packed data. Used internally by defragmentation, not recomended in typical usage. */ @@ -402,8 +402,9 @@ struct TotalStatistics - 1 = `D3D12_HEAP_TYPE_UPLOAD` - 2 = `D3D12_HEAP_TYPE_READBACK` - 3 = `D3D12_HEAP_TYPE_CUSTOM` + - 4 = `D3D12_HEAP_TYPE_GPU_UPLOAD` */ - DetailedStatistics HeapType[4]; + DetailedStatistics HeapType[5]; /** \brief One element for each memory segment group located at the following indices: - 0 = `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` @@ -413,9 +414,9 @@ struct TotalStatistics - When `IsUMA() == FALSE` (discrete graphics card): - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` (index 0) represents GPU memory - (resources allocated in `D3D12_HEAP_TYPE_DEFAULT` or `D3D12_MEMORY_POOL_L1`). + (resources allocated in `D3D12_HEAP_TYPE_DEFAULT`, `D3D12_HEAP_TYPE_GPU_UPLOAD` or `D3D12_MEMORY_POOL_L1`). - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` (index 1) represents system memory - (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). + (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). - When `IsUMA() == TRUE` (integrated graphics chip): - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` = (index 0) represents memory shared for all the resources. - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` = (index 1) is unused and always 0. @@ -542,26 +543,6 @@ class D3D12MA_API Allocation : public IUnknownImpl */ LPCWSTR GetName() const { return m_Name; } - /** \brief Returns `TRUE` if the memory of the allocation was filled with zeros when the allocation was created. - - Returns `TRUE` only if the allocator is sure that the entire memory where the - allocation was created was filled with zeros at the moment the allocation was made. - - Returns `FALSE` if the memory could potentially contain garbage data. - If it's a render-target or depth-stencil texture, it then needs proper - initialization with `ClearRenderTargetView`, `ClearDepthStencilView`, `DiscardResource`, - or a copy operation, as described on page - "ID3D12Device::CreatePlacedResource method - Notes on the required resource initialization" in Microsoft documentation. - Please note that rendering a fullscreen triangle or quad to the texture as - a render target is not a proper way of initialization! - - See also articles: - - - "Coming to DirectX 12: More control over memory allocation" on DirectX Developer Blog - - ["Initializing DX12 Textures After Allocation and Aliasing"](https://asawicki.info/news_1724_initializing_dx12_textures_after_allocation_and_aliasing). - */ - BOOL WasZeroInitialized() const { return m_PackedData.WasZeroInitialized(); } - protected: void ReleaseThis() override; @@ -620,29 +601,26 @@ class D3D12MA_API Allocation : public IUnknownImpl { public: PackedData() : - m_Type(0), m_ResourceDimension(0), m_ResourceFlags(0), m_TextureLayout(0), m_WasZeroInitialized(0) { } + m_Type(0), m_ResourceDimension(0), m_ResourceFlags(0), m_TextureLayout(0) { } Type GetType() const { return (Type)m_Type; } D3D12_RESOURCE_DIMENSION GetResourceDimension() const { return (D3D12_RESOURCE_DIMENSION)m_ResourceDimension; } D3D12_RESOURCE_FLAGS GetResourceFlags() const { return (D3D12_RESOURCE_FLAGS)m_ResourceFlags; } D3D12_TEXTURE_LAYOUT GetTextureLayout() const { return (D3D12_TEXTURE_LAYOUT)m_TextureLayout; } - BOOL WasZeroInitialized() const { return (BOOL)m_WasZeroInitialized; } void SetType(Type type); void SetResourceDimension(D3D12_RESOURCE_DIMENSION resourceDimension); void SetResourceFlags(D3D12_RESOURCE_FLAGS resourceFlags); void SetTextureLayout(D3D12_TEXTURE_LAYOUT textureLayout); - void SetWasZeroInitialized(BOOL wasZeroInitialized) { m_WasZeroInitialized = wasZeroInitialized ? 1 : 0; } private: UINT m_Type : 2; // enum Type UINT m_ResourceDimension : 3; // enum D3D12_RESOURCE_DIMENSION UINT m_ResourceFlags : 24; // flags D3D12_RESOURCE_FLAGS UINT m_TextureLayout : 9; // enum D3D12_TEXTURE_LAYOUT - UINT m_WasZeroInitialized : 1; // BOOL } m_PackedData; - Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment, BOOL wasZeroInitialized); + Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment); // Nothing here, everything already done in Release. virtual ~Allocation() = default; @@ -1065,6 +1043,16 @@ enum ALLOCATOR_FLAGS to create its heaps on smaller alignment not suitable for MSAA textures. */ ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x8, + /** \brief Disable optimization that prefers creating small buffers as committed to avoid 64 KB alignment. + + By default, the library prefers creating small buffers <= 32 KB as committed, + because drivers tend to pack them better, while placed buffers require 64 KB alignment. + This, however, may decrease performance, as creating committed resources involves allocation of implicit heaps, + which may take longer than creating placed resources in existing heaps. + Passing this flag will disable this committed preference globally for the allocator. + It can also be disabled for a single allocation by using #ALLOCATION_FLAG_STRATEGY_MIN_TIME. + */ + ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED = 0x10, }; /// \brief Parameters of created Allocator object. To be used with CreateAllocator(). @@ -1130,6 +1118,15 @@ class D3D12MA_API Allocator : public IUnknownImpl - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" */ BOOL IsCacheCoherentUMA() const; + /** \brief Returns true if GPU Upload Heaps are supported on the current system. + + When true, you can use `D3D12_HEAP_TYPE_GPU_UPLOAD`. + + This flag is fetched from `D3D12_FEATURE_D3D12_OPTIONS16::GPUUploadHeapSupported`. + + `#define D3D12MA_OPTIONS16_SUPPORTED 1` is needed for the compilation of this library. Otherwise the flag is always false. + */ + BOOL IsGPUUploadHeapSupported() const; /** \brief Returns total amount of memory of specific segment group, in bytes. \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. @@ -1447,11 +1444,11 @@ enum VIRTUAL_ALLOCATION_FLAGS */ VIRTUAL_ALLOCATION_FLAG_UPPER_ADDRESS = ALLOCATION_FLAG_UPPER_ADDRESS, - /// Allocation strategy that tries to minimize memory usage. + /// %Allocation strategy that tries to minimize memory usage. VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_MEMORY = ALLOCATION_FLAG_STRATEGY_MIN_MEMORY, - /// Allocation strategy that tries to minimize allocation time. + /// %Allocation strategy that tries to minimize allocation time. VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_TIME = ALLOCATION_FLAG_STRATEGY_MIN_TIME, - /** \brief Allocation strategy that chooses always the lowest offset in available space. + /** %Allocation strategy that chooses always the lowest offset in available space. This is not the most efficient strategy but achieves highly packed data. */ VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_OFFSET = ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, @@ -1640,6 +1637,9 @@ ID3D12Device* device = (...) D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; allocatorDesc.pDevice = device; allocatorDesc.pAdapter = adapter; +// These flags are optional but recommended. +allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED | + D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED; D3D12MA::Allocator* allocator; HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); @@ -1864,6 +1864,9 @@ to obtain object D3D12MA::Pool. Example: \code POOL_DESC poolDesc = {}; poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_DEFAULT; +// These flags are optional but recommended. +poolDesc.Flags = D3D12MA::POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED; +poolDesc.HeapFlags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; Pool* pool; HRESULT hr = allocator->CreatePool(&poolDesc, &pool); diff --git a/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp b/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp index 21c178269f219..f13f5684961bc 100644 --- a/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp +++ b/3rdparty/d3d12memalloc/src/D3D12MemAlloc.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -134,9 +134,9 @@ especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. namespace D3D12MA { -static constexpr UINT HEAP_TYPE_COUNT = 4; -static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 3; // Only DEFAULT, UPLOAD, READBACK. -static constexpr UINT DEFAULT_POOL_MAX_COUNT = 9; +static constexpr UINT HEAP_TYPE_COUNT = 5; +static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 4; // Only DEFAULT, UPLOAD, READBACK, GPU_UPLOAD. +static constexpr UINT DEFAULT_POOL_MAX_COUNT = STANDARD_HEAP_TYPE_COUNT * 3; static const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3; // Minimum size of a free suballocation to register it in the free suballocation collection. static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; @@ -147,12 +147,14 @@ static const WCHAR* const HeapTypeNames[] = L"UPLOAD", L"READBACK", L"CUSTOM", + L"GPU_UPLOAD", }; static const WCHAR* const StandardHeapTypeNames[] = { L"DEFAULT", L"UPLOAD", L"READBACK", + L"GPU_UPLOAD", }; static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = @@ -160,6 +162,8 @@ static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = static const D3D12_RESIDENCY_PRIORITY D3D12_RESIDENCY_PRIORITY_NONE = D3D12_RESIDENCY_PRIORITY(0); +static const D3D12_HEAP_TYPE D3D12_HEAP_TYPE_GPU_UPLOAD_COPY = (D3D12_HEAP_TYPE)5; + #ifndef _D3D12MA_ENUM_DECLARATIONS // Local copy of this enum, as it is provided only by , so it may not be available. @@ -465,6 +469,7 @@ static UINT StandardHeapTypeToIndex(D3D12_HEAP_TYPE type) case D3D12_HEAP_TYPE_DEFAULT: return 0; case D3D12_HEAP_TYPE_UPLOAD: return 1; case D3D12_HEAP_TYPE_READBACK: return 2; + case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: return 3; default: D3D12MA_ASSERT(0); return UINT_MAX; } } @@ -476,6 +481,7 @@ static D3D12_HEAP_TYPE IndexToStandardHeapType(UINT heapTypeIndex) case 0: return D3D12_HEAP_TYPE_DEFAULT; case 1: return D3D12_HEAP_TYPE_UPLOAD; case 2: return D3D12_HEAP_TYPE_READBACK; + case 3: return D3D12_HEAP_TYPE_GPU_UPLOAD_COPY; default: D3D12MA_ASSERT(0); return D3D12_HEAP_TYPE_CUSTOM; } } @@ -525,7 +531,8 @@ static bool IsHeapTypeStandard(D3D12_HEAP_TYPE type) { return type == D3D12_HEAP_TYPE_DEFAULT || type == D3D12_HEAP_TYPE_UPLOAD || - type == D3D12_HEAP_TYPE_READBACK; + type == D3D12_HEAP_TYPE_READBACK || + type == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY; } static D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type) @@ -2783,8 +2790,8 @@ class AllocationObjectAllocator { D3D12MA_CLASS_NO_COPY(AllocationObjectAllocator); public: - AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks) - : m_Allocator(allocationCallbacks, 1024) {} + AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, bool useMutex) + : m_Allocator(allocationCallbacks, 1024), m_UseMutex(useMutex) {} template Allocation* Allocate(Types... args); @@ -2792,6 +2799,7 @@ class AllocationObjectAllocator private: D3D12MA_MUTEX m_Mutex; + bool m_UseMutex; PoolAllocator m_Allocator; }; @@ -2799,13 +2807,13 @@ class AllocationObjectAllocator template Allocation* AllocationObjectAllocator::Allocate(Types... args) { - MutexLock mutexLock(m_Mutex); + MutexLock mutexLock(m_Mutex, m_UseMutex); return m_Allocator.Alloc(std::forward(args)...); } void AllocationObjectAllocator::Free(Allocation* alloc) { - MutexLock mutexLock(m_Mutex); + MutexLock mutexLock(m_Mutex, m_UseMutex); m_Allocator.Free(alloc); } #endif // _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR_FUNCTIONS @@ -2867,75 +2875,9 @@ struct AllocationRequest UINT64 sumFreeSize; // Sum size of free items that overlap with proposed allocation. UINT64 sumItemSize; // Sum size of items to make lost that overlap with proposed allocation. SuballocationList::iterator item; - BOOL zeroInitialized = FALSE; // TODO Implement proper handling in TLSF and Linear, using ZeroInitializedRange class. }; #endif // _D3D12MA_ALLOCATION_REQUEST -#ifndef _D3D12MA_ZERO_INITIALIZED_RANGE -/* -Keeps track of the range of bytes that are surely initialized with zeros. -Everything outside of it is considered uninitialized memory that may contain -garbage data. - -The range is left-inclusive. -*/ -class ZeroInitializedRange -{ -public: - void Reset(UINT64 size); - BOOL IsRangeZeroInitialized(UINT64 beg, UINT64 end) const; - void MarkRangeAsUsed(UINT64 usedBeg, UINT64 usedEnd); - -private: - UINT64 m_ZeroBeg = 0, m_ZeroEnd = 0; -}; - -#ifndef _D3D12MA_ZERO_INITIALIZED_RANGE_FUNCTIONS -void ZeroInitializedRange::Reset(UINT64 size) -{ - D3D12MA_ASSERT(size > 0); - m_ZeroBeg = 0; - m_ZeroEnd = size; -} - -BOOL ZeroInitializedRange::IsRangeZeroInitialized(UINT64 beg, UINT64 end) const -{ - D3D12MA_ASSERT(beg < end); - return m_ZeroBeg <= beg && end <= m_ZeroEnd; -} - -void ZeroInitializedRange::MarkRangeAsUsed(UINT64 usedBeg, UINT64 usedEnd) -{ - D3D12MA_ASSERT(usedBeg < usedEnd); - // No new bytes marked. - if (usedEnd <= m_ZeroBeg || m_ZeroEnd <= usedBeg) - { - return; - } - // All bytes marked. - if (usedBeg <= m_ZeroBeg && m_ZeroEnd <= usedEnd) - { - m_ZeroBeg = m_ZeroEnd = 0; - } - // Some bytes marked. - else - { - const UINT64 remainingZeroBefore = usedBeg > m_ZeroBeg ? usedBeg - m_ZeroBeg : 0; - const UINT64 remainingZeroAfter = usedEnd < m_ZeroEnd ? m_ZeroEnd - usedEnd : 0; - D3D12MA_ASSERT(remainingZeroBefore > 0 || remainingZeroAfter > 0); - if (remainingZeroBefore > remainingZeroAfter) - { - m_ZeroEnd = usedBeg; - } - else - { - m_ZeroBeg = usedEnd; - } - } -} -#endif // _D3D12MA_ZERO_INITIALIZED_RANGE_FUNCTIONS -#endif // _D3D12MA_ZERO_INITIALIZED_RANGE - #ifndef _D3D12MA_BLOCK_METADATA /* Data structure used for bookkeeping of allocations and unused ranges of memory @@ -3114,610 +3056,6 @@ void BlockMetadata::PrintDetailedMap_End(JsonWriter& json) const #endif // _D3D12MA_BLOCK_METADATA_FUNCTIONS #endif // _D3D12MA_BLOCK_METADATA -#if 0 -#ifndef _D3D12MA_BLOCK_METADATA_GENERIC -class BlockMetadata_Generic : public BlockMetadata -{ -public: - BlockMetadata_Generic(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); - virtual ~BlockMetadata_Generic() = default; - - size_t GetAllocationCount() const override { return m_Suballocations.size() - m_FreeCount; } - UINT64 GetSumFreeSize() const override { return m_SumFreeSize; } - UINT64 GetAllocationOffset(AllocHandle allocHandle) const override { return (UINT64)allocHandle - 1; } - - void Init(UINT64 size) override; - bool Validate() const override; - bool IsEmpty() const override; - void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const override; - - bool CreateAllocationRequest( - UINT64 allocSize, - UINT64 allocAlignment, - bool upperAddress, - AllocationRequest* pAllocationRequest) override; - - void Alloc( - const AllocationRequest& request, - UINT64 allocSize, - void* privateData) override; - - void Free(AllocHandle allocHandle) override; - void Clear() override; - - void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) override; - - void AddStatistics(Statistics& inoutStats) const override; - void AddDetailedStatistics(DetailedStatistics& inoutStats) const override; - void WriteAllocationInfoToJson(JsonWriter& json) const override; - -private: - UINT m_FreeCount; - UINT64 m_SumFreeSize; - SuballocationList m_Suballocations; - // Suballocations that are free and have size greater than certain threshold. - // Sorted by size, ascending. - Vector m_FreeSuballocationsBySize; - ZeroInitializedRange m_ZeroInitializedRange; - - SuballocationList::const_iterator FindAtOffset(UINT64 offset) const; - bool ValidateFreeSuballocationList() const; - - // Checks if requested suballocation with given parameters can be placed in given pFreeSuballocItem. - // If yes, fills pOffset and returns true. If no, returns false. - bool CheckAllocation( - UINT64 allocSize, - UINT64 allocAlignment, - SuballocationList::const_iterator suballocItem, - AllocHandle* pAllocHandle, - UINT64* pSumFreeSize, - UINT64* pSumItemSize, - BOOL *pZeroInitialized) const; - // Given free suballocation, it merges it with following one, which must also be free. - void MergeFreeWithNext(SuballocationList::iterator item); - // Releases given suballocation, making it free. - // Merges it with adjacent free suballocations if applicable. - // Returns iterator to new free suballocation at this place. - SuballocationList::iterator FreeSuballocation(SuballocationList::iterator suballocItem); - // Given free suballocation, it inserts it into sorted list of - // m_FreeSuballocationsBySize if it's suitable. - void RegisterFreeSuballocation(SuballocationList::iterator item); - // Given free suballocation, it removes it from sorted list of - // m_FreeSuballocationsBySize if it's suitable. - void UnregisterFreeSuballocation(SuballocationList::iterator item); - - D3D12MA_CLASS_NO_COPY(BlockMetadata_Generic) -}; - -#ifndef _D3D12MA_BLOCK_METADATA_GENERIC_FUNCTIONS -BlockMetadata_Generic::BlockMetadata_Generic(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) - : BlockMetadata(allocationCallbacks, isVirtual), - m_FreeCount(0), - m_SumFreeSize(0), - m_Suballocations(*allocationCallbacks), - m_FreeSuballocationsBySize(*allocationCallbacks) -{ - D3D12MA_ASSERT(allocationCallbacks); -} - -void BlockMetadata_Generic::Init(UINT64 size) -{ - BlockMetadata::Init(size); - m_ZeroInitializedRange.Reset(size); - - m_FreeCount = 1; - m_SumFreeSize = size; - - Suballocation suballoc = {}; - suballoc.offset = 0; - suballoc.size = size; - suballoc.type = SUBALLOCATION_TYPE_FREE; - suballoc.privateData = NULL; - - D3D12MA_ASSERT(size > MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER); - m_Suballocations.push_back(suballoc); - SuballocationList::iterator suballocItem = m_Suballocations.end(); - --suballocItem; - m_FreeSuballocationsBySize.push_back(suballocItem); -} - -bool BlockMetadata_Generic::Validate() const -{ - D3D12MA_VALIDATE(!m_Suballocations.empty()); - - // Expected offset of new suballocation as calculated from previous ones. - UINT64 calculatedOffset = 0; - // Expected number of free suballocations as calculated from traversing their list. - UINT calculatedFreeCount = 0; - // Expected sum size of free suballocations as calculated from traversing their list. - UINT64 calculatedSumFreeSize = 0; - // Expected number of free suballocations that should be registered in - // m_FreeSuballocationsBySize calculated from traversing their list. - size_t freeSuballocationsToRegister = 0; - // True if previous visited suballocation was free. - bool prevFree = false; - - for (const auto& subAlloc : m_Suballocations) - { - // Actual offset of this suballocation doesn't match expected one. - D3D12MA_VALIDATE(subAlloc.offset == calculatedOffset); - - const bool currFree = (subAlloc.type == SUBALLOCATION_TYPE_FREE); - // Two adjacent free suballocations are invalid. They should be merged. - D3D12MA_VALIDATE(!prevFree || !currFree); - - const Allocation* const alloc = (Allocation*)subAlloc.privateData; - if (!IsVirtual()) - { - D3D12MA_VALIDATE(currFree == (alloc == NULL)); - } - - if (currFree) - { - calculatedSumFreeSize += subAlloc.size; - ++calculatedFreeCount; - if (subAlloc.size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - ++freeSuballocationsToRegister; - } - - // Margin required between allocations - every free space must be at least that large. - D3D12MA_VALIDATE(subAlloc.size >= GetDebugMargin()); - } - else - { - if (!IsVirtual()) - { - D3D12MA_VALIDATE(alloc->GetOffset() == subAlloc.offset); - D3D12MA_VALIDATE(alloc->GetSize() == subAlloc.size); - } - - // Margin required between allocations - previous allocation must be free. - D3D12MA_VALIDATE(GetDebugMargin() == 0 || prevFree); - } - - calculatedOffset += subAlloc.size; - prevFree = currFree; - } - - // Number of free suballocations registered in m_FreeSuballocationsBySize doesn't - // match expected one. - D3D12MA_VALIDATE(m_FreeSuballocationsBySize.size() == freeSuballocationsToRegister); - - UINT64 lastSize = 0; - for (size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) - { - SuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i]; - - // Only free suballocations can be registered in m_FreeSuballocationsBySize. - D3D12MA_VALIDATE(suballocItem->type == SUBALLOCATION_TYPE_FREE); - // They must be sorted by size ascending. - D3D12MA_VALIDATE(suballocItem->size >= lastSize); - - lastSize = suballocItem->size; - } - - // Check if totals match calculacted values. - D3D12MA_VALIDATE(ValidateFreeSuballocationList()); - D3D12MA_VALIDATE(calculatedOffset == GetSize()); - D3D12MA_VALIDATE(calculatedSumFreeSize == m_SumFreeSize); - D3D12MA_VALIDATE(calculatedFreeCount == m_FreeCount); - - return true; -} - -bool BlockMetadata_Generic::IsEmpty() const -{ - return (m_Suballocations.size() == 1) && (m_FreeCount == 1); -} - -void BlockMetadata_Generic::GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const -{ - Suballocation& suballoc = *FindAtOffset((UINT64)allocHandle - 1).dropConst(); - outInfo.Offset = suballoc.offset; - outInfo.Size = suballoc.size; - outInfo.pPrivateData = suballoc.privateData; -} - -bool BlockMetadata_Generic::CreateAllocationRequest( - UINT64 allocSize, - UINT64 allocAlignment, - bool upperAddress, - AllocationRequest* pAllocationRequest) -{ - D3D12MA_ASSERT(allocSize > 0); - D3D12MA_ASSERT(!upperAddress && "ALLOCATION_FLAG_UPPER_ADDRESS can be used only with linear algorithm."); - D3D12MA_ASSERT(pAllocationRequest != NULL); - D3D12MA_HEAVY_ASSERT(Validate()); - - // There is not enough total free space in this block to fullfill the request: Early return. - if (m_SumFreeSize < allocSize + GetDebugMargin()) - { - return false; - } - - // New algorithm, efficiently searching freeSuballocationsBySize. - const size_t freeSuballocCount = m_FreeSuballocationsBySize.size(); - if (freeSuballocCount > 0) - { - // Find first free suballocation with size not less than allocSize + GetDebugMargin(). - SuballocationList::iterator* const it = BinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + freeSuballocCount, - allocSize + GetDebugMargin(), - SuballocationItemSizeLess()); - size_t index = it - m_FreeSuballocationsBySize.data(); - for (; index < freeSuballocCount; ++index) - { - if (CheckAllocation( - allocSize, - allocAlignment, - m_FreeSuballocationsBySize[index], - &pAllocationRequest->allocHandle, - &pAllocationRequest->sumFreeSize, - &pAllocationRequest->sumItemSize, - &pAllocationRequest->zeroInitialized)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - - return false; -} - -void BlockMetadata_Generic::Alloc( - const AllocationRequest& request, - UINT64 allocSize, - void* privateData) -{ - D3D12MA_ASSERT(request.item != m_Suballocations.end()); - Suballocation& suballoc = *request.item; - // Given suballocation is a free block. - D3D12MA_ASSERT(suballoc.type == SUBALLOCATION_TYPE_FREE); - // Given offset is inside this suballocation. - UINT64 offset = (UINT64)request.allocHandle - 1; - D3D12MA_ASSERT(offset >= suballoc.offset); - const UINT64 paddingBegin = offset - suballoc.offset; - D3D12MA_ASSERT(suballoc.size >= paddingBegin + allocSize); - const UINT64 paddingEnd = suballoc.size - paddingBegin - allocSize; - - // Unregister this free suballocation from m_FreeSuballocationsBySize and update - // it to become used. - UnregisterFreeSuballocation(request.item); - - suballoc.offset = offset; - suballoc.size = allocSize; - suballoc.type = SUBALLOCATION_TYPE_ALLOCATION; - suballoc.privateData = privateData; - - // If there are any free bytes remaining at the end, insert new free suballocation after current one. - if (paddingEnd) - { - Suballocation paddingSuballoc = {}; - paddingSuballoc.offset = offset + allocSize; - paddingSuballoc.size = paddingEnd; - paddingSuballoc.type = SUBALLOCATION_TYPE_FREE; - SuballocationList::iterator next = request.item; - ++next; - const SuballocationList::iterator paddingEndItem = - m_Suballocations.insert(next, paddingSuballoc); - RegisterFreeSuballocation(paddingEndItem); - } - - // If there are any free bytes remaining at the beginning, insert new free suballocation before current one. - if (paddingBegin) - { - Suballocation paddingSuballoc = {}; - paddingSuballoc.offset = offset - paddingBegin; - paddingSuballoc.size = paddingBegin; - paddingSuballoc.type = SUBALLOCATION_TYPE_FREE; - const SuballocationList::iterator paddingBeginItem = - m_Suballocations.insert(request.item, paddingSuballoc); - RegisterFreeSuballocation(paddingBeginItem); - } - - // Update totals. - m_FreeCount = m_FreeCount - 1; - if (paddingBegin > 0) - { - ++m_FreeCount; - } - if (paddingEnd > 0) - { - ++m_FreeCount; - } - m_SumFreeSize -= allocSize; - - m_ZeroInitializedRange.MarkRangeAsUsed(offset, offset + allocSize); -} - -void BlockMetadata_Generic::Free(AllocHandle allocHandle) -{ - FreeSuballocation(FindAtOffset((UINT64)allocHandle - 1).dropConst()); -} - -void BlockMetadata_Generic::Clear() -{ - m_FreeCount = 1; - m_SumFreeSize = GetSize(); - - m_Suballocations.clear(); - Suballocation suballoc = {}; - suballoc.offset = 0; - suballoc.size = GetSize(); - suballoc.type = SUBALLOCATION_TYPE_FREE; - m_Suballocations.push_back(suballoc); - - m_FreeSuballocationsBySize.clear(); - m_FreeSuballocationsBySize.push_back(m_Suballocations.begin()); -} - -SuballocationList::const_iterator BlockMetadata_Generic::FindAtOffset(UINT64 offset) const -{ - const UINT64 last = m_Suballocations.crbegin()->offset; - if (last == offset) - return m_Suballocations.crbegin(); - const UINT64 first = m_Suballocations.cbegin()->offset; - if (first == offset) - return m_Suballocations.cbegin(); - - const size_t suballocCount = m_Suballocations.size(); - const UINT64 step = (last - first + m_Suballocations.cbegin()->size) / suballocCount; - auto findSuballocation = [&](auto begin, auto end) -> SuballocationList::const_iterator - { - for (auto suballocItem = begin; - suballocItem != end; - ++suballocItem) - { - const Suballocation& suballoc = *suballocItem; - if (suballoc.offset == offset) - return suballocItem; - } - D3D12MA_ASSERT(false && "Not found!"); - return m_Suballocations.end(); - }; - // If requested offset is closer to the end of range, search from the end - if ((offset - first) > suballocCount * step / 2) - { - return findSuballocation(m_Suballocations.crbegin(), m_Suballocations.crend()); - } - return findSuballocation(m_Suballocations.cbegin(), m_Suballocations.cend()); -} - -bool BlockMetadata_Generic::ValidateFreeSuballocationList() const -{ - UINT64 lastSize = 0; - for (size_t i = 0, count = m_FreeSuballocationsBySize.size(); i < count; ++i) - { - const SuballocationList::iterator it = m_FreeSuballocationsBySize[i]; - - D3D12MA_VALIDATE(it->type == SUBALLOCATION_TYPE_FREE); - D3D12MA_VALIDATE(it->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER); - D3D12MA_VALIDATE(it->size >= lastSize); - lastSize = it->size; - } - return true; -} - -bool BlockMetadata_Generic::CheckAllocation( - UINT64 allocSize, - UINT64 allocAlignment, - SuballocationList::const_iterator suballocItem, - AllocHandle* pAllocHandle, - UINT64* pSumFreeSize, - UINT64* pSumItemSize, - BOOL* pZeroInitialized) const -{ - D3D12MA_ASSERT(allocSize > 0); - D3D12MA_ASSERT(suballocItem != m_Suballocations.cend()); - D3D12MA_ASSERT(pAllocHandle != NULL && pZeroInitialized != NULL); - - *pSumFreeSize = 0; - *pSumItemSize = 0; - *pZeroInitialized = FALSE; - - const Suballocation& suballoc = *suballocItem; - D3D12MA_ASSERT(suballoc.type == SUBALLOCATION_TYPE_FREE); - - *pSumFreeSize = suballoc.size; - - // Size of this suballocation is too small for this request: Early return. - if (suballoc.size < allocSize) - { - return false; - } - - // Start from offset equal to beginning of this suballocation and debug margin of previous allocation if present. - UINT64 offset = suballoc.offset + (suballocItem == m_Suballocations.cbegin() ? 0 : GetDebugMargin()); - - // Apply alignment. - offset = AlignUp(offset, allocAlignment); - - // Calculate padding at the beginning based on current offset. - const UINT64 paddingBegin = offset - suballoc.offset; - - // Fail if requested size plus margin after is bigger than size of this suballocation. - if (paddingBegin + allocSize + GetDebugMargin() > suballoc.size) - { - return false; - } - - // All tests passed: Success. Offset is already filled. - *pZeroInitialized = m_ZeroInitializedRange.IsRangeZeroInitialized(offset, offset + allocSize); - *pAllocHandle = (AllocHandle)(offset + 1); - return true; -} - -void BlockMetadata_Generic::MergeFreeWithNext(SuballocationList::iterator item) -{ - D3D12MA_ASSERT(item != m_Suballocations.end()); - D3D12MA_ASSERT(item->type == SUBALLOCATION_TYPE_FREE); - - SuballocationList::iterator nextItem = item; - ++nextItem; - D3D12MA_ASSERT(nextItem != m_Suballocations.end()); - D3D12MA_ASSERT(nextItem->type == SUBALLOCATION_TYPE_FREE); - - item->size += nextItem->size; - --m_FreeCount; - m_Suballocations.erase(nextItem); -} - -SuballocationList::iterator BlockMetadata_Generic::FreeSuballocation(SuballocationList::iterator suballocItem) -{ - // Change this suballocation to be marked as free. - Suballocation& suballoc = *suballocItem; - suballoc.type = SUBALLOCATION_TYPE_FREE; - suballoc.privateData = NULL; - - // Update totals. - ++m_FreeCount; - m_SumFreeSize += suballoc.size; - - // Merge with previous and/or next suballocation if it's also free. - bool mergeWithNext = false; - bool mergeWithPrev = false; - - SuballocationList::iterator nextItem = suballocItem; - ++nextItem; - if ((nextItem != m_Suballocations.end()) && (nextItem->type == SUBALLOCATION_TYPE_FREE)) - { - mergeWithNext = true; - } - - SuballocationList::iterator prevItem = suballocItem; - if (suballocItem != m_Suballocations.begin()) - { - --prevItem; - if (prevItem->type == SUBALLOCATION_TYPE_FREE) - { - mergeWithPrev = true; - } - } - - if (mergeWithNext) - { - UnregisterFreeSuballocation(nextItem); - MergeFreeWithNext(suballocItem); - } - - if (mergeWithPrev) - { - UnregisterFreeSuballocation(prevItem); - MergeFreeWithNext(prevItem); - RegisterFreeSuballocation(prevItem); - return prevItem; - } - else - { - RegisterFreeSuballocation(suballocItem); - return suballocItem; - } -} - -void BlockMetadata_Generic::RegisterFreeSuballocation(SuballocationList::iterator item) -{ - D3D12MA_ASSERT(item->type == SUBALLOCATION_TYPE_FREE); - D3D12MA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if (item->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - if (m_FreeSuballocationsBySize.empty()) - { - m_FreeSuballocationsBySize.push_back(item); - } - else - { - m_FreeSuballocationsBySize.InsertSorted(item, SuballocationItemSizeLess()); - } - } - - //D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - -void BlockMetadata_Generic::UnregisterFreeSuballocation(SuballocationList::iterator item) -{ - D3D12MA_ASSERT(item->type == SUBALLOCATION_TYPE_FREE); - D3D12MA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if (item->size >= MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER) - { - SuballocationList::iterator* const it = BinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + m_FreeSuballocationsBySize.size(), - item, - SuballocationItemSizeLess()); - for (size_t index = it - m_FreeSuballocationsBySize.data(); - index < m_FreeSuballocationsBySize.size(); - ++index) - { - if (m_FreeSuballocationsBySize[index] == item) - { - m_FreeSuballocationsBySize.remove(index); - return; - } - D3D12MA_ASSERT((m_FreeSuballocationsBySize[index]->size == item->size) && "Not found."); - } - D3D12MA_ASSERT(0 && "Not found."); - } - - //D3D12MA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - -void BlockMetadata_Generic::SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) -{ - Suballocation& suballoc = *FindAtOffset((UINT64)allocHandle - 1).dropConst(); - suballoc.privateData = privateData; -} - -void BlockMetadata_Generic::AddStatistics(Statistics& inoutStats) const -{ - inoutStats.BlockCount++; - inoutStats.AllocationCount += (UINT)m_Suballocations.size() - m_FreeCount; - inoutStats.BlockBytes += GetSize(); - inoutStats.AllocationBytes += GetSize() - m_SumFreeSize; -} - -void BlockMetadata_Generic::AddDetailedStatistics(DetailedStatistics& inoutStats) const -{ - inoutStats.Stats.BlockCount++; - inoutStats.Stats.BlockBytes += GetSize(); - - for (const auto& suballoc : m_Suballocations) - { - if (suballoc.type == SUBALLOCATION_TYPE_FREE) - AddDetailedStatisticsUnusedRange(inoutStats, suballoc.size); - else - AddDetailedStatisticsAllocation(inoutStats, suballoc.size); - } -} - -void BlockMetadata_Generic::WriteAllocationInfoToJson(JsonWriter& json) const -{ - PrintDetailedMap_Begin(json, GetSumFreeSize(), GetAllocationCount(), m_FreeCount); - for (const auto& suballoc : m_Suballocations) - { - if (suballoc.type == SUBALLOCATION_TYPE_FREE) - PrintDetailedMap_UnusedRange(json, suballoc.offset, suballoc.size); - else - PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); - } - PrintDetailedMap_End(json); -} -#endif // _D3D12MA_BLOCK_METADATA_GENERIC_FUNCTIONS -#endif // _D3D12MA_BLOCK_METADATA_GENERIC -#endif // #if 0 - #ifndef _D3D12MA_BLOCK_METADATA_LINEAR class BlockMetadata_Linear : public BlockMetadata { @@ -4027,6 +3365,10 @@ bool BlockMetadata_Linear::CreateAllocationRequest( D3D12MA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); D3D12MA_ASSERT(pAllocationRequest != NULL); D3D12MA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + pAllocationRequest->size = allocSize; return upperAddress ? CreateAllocationRequest_UpperAddress( @@ -5688,7 +5030,8 @@ void BlockMetadata_TLSF::WriteAllocationInfoToJson(JsonWriter& json) const } D3D12MA_ASSERT(i == 0); - PrintDetailedMap_Begin(json, GetSumFreeSize(), GetAllocationCount(), m_BlocksFreeCount + static_cast(m_NullBlock->size)); + PrintDetailedMap_Begin(json, GetSumFreeSize(), GetAllocationCount(), m_BlocksFreeCount + + (m_NullBlock->size > 0 ? 1 : 0)); for (; i < blockCount; ++i) { Block* block = blockList[i]; @@ -6567,6 +5910,7 @@ class AllocatorPimpl BOOL IsUMA() const { return m_D3D12Architecture.UMA; } BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; } bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; } + bool IsGPUUploadHeapSupported() const { return m_GPUUploadHeapSupported != FALSE; } bool UseMutex() const { return m_UseMutex; } AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; } UINT GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } @@ -6575,6 +5919,7 @@ class AllocatorPimpl 0: D3D12_HEAP_TYPE_DEFAULT 1: D3D12_HEAP_TYPE_UPLOAD 2: D3D12_HEAP_TYPE_READBACK + 3: D3D12_HEAP_TYPE_GPU_UPLOAD else: 0: D3D12_HEAP_TYPE_DEFAULT + buffer 1: D3D12_HEAP_TYPE_DEFAULT + texture @@ -6585,8 +5930,11 @@ class AllocatorPimpl 6: D3D12_HEAP_TYPE_READBACK + buffer 7: D3D12_HEAP_TYPE_READBACK + texture 8: D3D12_HEAP_TYPE_READBACK + texture RT or DS + 9: D3D12_HEAP_TYPE_GPU_UPLOAD + buffer + 10: D3D12_HEAP_TYPE_GPU_UPLOAD + texture + 11: D3D12_HEAP_TYPE_GPU_UPLOAD + texture RT or DS */ - UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 3 : 9; } + UINT GetDefaultPoolCount() const { return SupportsResourceHeapTier2() ? 4 : 12; } BlockVector** GetDefaultPools() { return m_BlockVectors; } HRESULT Init(const ALLOCATOR_DESC& desc); @@ -6649,6 +5997,7 @@ class AllocatorPimpl const bool m_UseMutex; const bool m_AlwaysCommitted; const bool m_MsaaAlwaysCommitted; + const bool m_PreferSmallBuffersCommitted; bool m_DefaultPoolsNotZeroed = false; ID3D12Device* m_Device; // AddRef #ifdef __ID3D12Device1_INTERFACE_DEFINED__ @@ -6672,6 +6021,7 @@ class AllocatorPimpl D3D12MA_ATOMIC_UINT32 m_CurrentFrameIndex; DXGI_ADAPTER_DESC m_AdapterDesc; D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options; + BOOL m_GPUUploadHeapSupported = FALSE; D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture; AllocationObjectAllocator m_AllocationObjectAllocator; @@ -6686,7 +6036,8 @@ class AllocatorPimpl dedicated allocation (committed resource rather than placed resource). */ template - static bool PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc); + bool PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc, + ALLOCATION_FLAGS strategy); // Allocates and registers new committed resource with implicit heap, as dedicated allocation. // Creates and returns Allocation object and optionally D3D12 resource. @@ -6738,13 +6089,14 @@ AllocatorPimpl::AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, : m_UseMutex((desc.Flags & ALLOCATOR_FLAG_SINGLETHREADED) == 0), m_AlwaysCommitted((desc.Flags & ALLOCATOR_FLAG_ALWAYS_COMMITTED) != 0), m_MsaaAlwaysCommitted((desc.Flags & ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED) != 0), + m_PreferSmallBuffersCommitted((desc.Flags & ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED) == 0), m_Device(desc.pDevice), m_Adapter(desc.pAdapter), m_PreferredBlockSize(desc.PreferredBlockSize != 0 ? desc.PreferredBlockSize : D3D12MA_DEFAULT_BLOCK_SIZE), m_AllocationCallbacks(allocationCallbacks), m_CurrentFrameIndex(0), // Below this line don't use allocationCallbacks but m_AllocationCallbacks!!! - m_AllocationObjectAllocator(m_AllocationCallbacks) + m_AllocationObjectAllocator(m_AllocationCallbacks, m_UseMutex) { // desc.pAllocationCallbacks intentionally ignored here, preprocessed by CreateAllocator. ZeroMemory(&m_D3D12Options, sizeof(m_D3D12Options)); @@ -6811,6 +6163,20 @@ HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) m_D3D12Options.ResourceHeapTier = (D3D12MA_FORCE_RESOURCE_HEAP_TIER); #endif +// You must define this macro to like `#define D3D12MA_OPTIONS16_SUPPORTED 1` to enable GPU Upload Heaps! +// Unfortunately there is no way to programmatically check if the included defines D3D12_FEATURE_DATA_D3D12_OPTIONS16 or not. +// Main interfaces have respective macros like __ID3D12Device4_INTERFACE_DEFINED__, but structures like this do not. +#if D3D12MA_OPTIONS16_SUPPORTED + { + D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16 = {}; + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &options16, sizeof(options16)); + if (SUCCEEDED(hr)) + { + m_GPUUploadHeapSupported = options16.GPUUploadHeapSupported; + } + } +#endif + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture)); if (FAILED(hr)) { @@ -6910,7 +6276,7 @@ UINT AllocatorPimpl::StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapTy D3D12MA_ASSERT(IsHeapTypeStandard(heapType)); if (IsUMA()) return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; - return heapType == D3D12_HEAP_TYPE_DEFAULT ? + return (heapType == D3D12_HEAP_TYPE_DEFAULT || heapType == D3D12_HEAP_TYPE_GPU_UPLOAD_COPY) ? DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; } @@ -7276,15 +6642,16 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat ClearDetailedStatistics(outCustomHeaps[1]); } - // Process default pools. 3 standard heap types only. Add them to outStats.HeapType[i]. + // Process default pools. 4 standard heap types only. Add them to outStats.HeapType[i]. if (SupportsResourceHeapTier2()) { - // DEFAULT, UPLOAD, READBACK. + // DEFAULT, UPLOAD, READBACK, GPU_UPLOAD. for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) { BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex]; D3D12MA_ASSERT(pBlockVector); - pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]); } } else @@ -7296,7 +6663,9 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat { BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType]; D3D12MA_ASSERT(pBlockVector); - pBlockVector->AddDetailedStatistics(outStats.HeapType[heapTypeIndex]); + + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]); } } } @@ -7311,6 +6680,9 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat AddDetailedStatistics( outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_READBACK)], outStats.HeapType[2]); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_GPU_UPLOAD_COPY)], + outStats.HeapType[4]); // Process custom pools. DetailedStatistics tmpStats; @@ -7335,13 +6707,14 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat } } - // Process committed allocations. 3 standard heap types only. + // Process committed allocations. standard heap types only. for (UINT heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) { ClearDetailedStatistics(tmpStats); m_CommittedAllocations[heapTypeIndex].AddDetailedStatistics(tmpStats); + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 AddDetailedStatistics( - outStats.HeapType[heapTypeIndex], tmpStats); + outStats.HeapType[outputIndex], tmpStats); AddDetailedStatistics( outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(IndexToStandardHeapType(heapTypeIndex))], tmpStats); } @@ -7363,19 +6736,24 @@ void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStat D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == outStats.HeapType[0].Stats.BlockCount + outStats.HeapType[1].Stats.BlockCount + - outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount); + outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount + + outStats.HeapType[4].Stats.BlockCount); D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == outStats.HeapType[0].Stats.AllocationCount + outStats.HeapType[1].Stats.AllocationCount + - outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount); + outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount + + outStats.HeapType[4].Stats.AllocationCount); D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == outStats.HeapType[0].Stats.BlockBytes + outStats.HeapType[1].Stats.BlockBytes + - outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes); + outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes + + outStats.HeapType[4].Stats.BlockBytes); D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == outStats.HeapType[0].Stats.AllocationBytes + outStats.HeapType[1].Stats.AllocationBytes + - outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes); + outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes + + outStats.HeapType[4].Stats.AllocationBytes); D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == outStats.HeapType[0].UnusedRangeCount + outStats.HeapType[1].UnusedRangeCount + - outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount); + outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount + + outStats.HeapType[4].UnusedRangeCount); } void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget) @@ -7423,6 +6801,7 @@ void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE hea switch (heapType) { case D3D12_HEAP_TYPE_DEFAULT: + case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: GetBudget(&outBudget, NULL); break; case D3D12_HEAP_TYPE_UPLOAD: @@ -7479,6 +6858,9 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) json.WriteBool(m_D3D12Architecture.UMA); json.WriteString(L"CacheCoherentUMA"); json.WriteBool(m_D3D12Architecture.CacheCoherentUMA); + + json.WriteString(L"GPUUploadHeapSupported"); + json.WriteBool(m_GPUUploadHeapSupported != FALSE); } json.EndObject(); } @@ -7511,6 +6893,17 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); } json.EndObject(); + + if(IsGPUUploadHeapSupported()) + { + json.WriteString(L"GPU_UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[4]); + } + json.EndObject(); + } } json.WriteString(L"UPLOAD"); json.BeginObject(); @@ -7561,6 +6954,17 @@ void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) } json.EndObject(); + if(IsGPUUploadHeapSupported()) + { + json.WriteString(L"GPU_UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[4]); + } + json.EndObject(); + } + json.WriteString(L"CUSTOM"); json.BeginObject(); { @@ -7766,8 +7170,19 @@ void AllocatorPimpl::FreeStatsString(WCHAR* pStatsString) } template -bool AllocatorPimpl::PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc) +bool AllocatorPimpl::PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc, + ALLOCATION_FLAGS strategy) { + // Prefer creating small buffers <= 32 KB as committed, because drivers pack them better, + // while placed buffers require 64 KB alignment. + if(resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + resourceDesc.Width <= D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT / 2 && + strategy != ALLOCATION_FLAG_STRATEGY_MIN_TIME && // Creating as committed would be slower. + m_PreferSmallBuffersCommitted) + { + return true; + } + // Intentional. It may change in the future. return false; } @@ -7900,16 +7315,8 @@ HRESULT AllocatorPimpl::AllocateCommittedResource( } if (SUCCEEDED(hr)) { - BOOL wasZeroInitialized = TRUE; -#if D3D12MA_CREATE_NOT_ZEROED_AVAILABLE - if((committedAllocParams.m_HeapFlags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) != 0) - { - wasZeroInitialized = FALSE; - } -#endif - Allocation* alloc = m_AllocationObjectAllocator.Allocate( - this, resourceSize, createParams.GetBaseResourceDesc()->Alignment, wasZeroInitialized); + this, resourceSize, createParams.GetBaseResourceDesc()->Alignment); alloc->InitCommitted(committedAllocParams.m_List); alloc->SetResourcePointer(res, createParams.GetBaseResourceDesc()); alloc->SetPrivateData(pPrivateData); @@ -7968,16 +7375,7 @@ HRESULT AllocatorPimpl::AllocateHeap( if (SUCCEEDED(hr)) { SetResidencyPriority(heap, committedAllocParams.m_ResidencyPriority); - - BOOL wasZeroInitialized = TRUE; -#if D3D12MA_CREATE_NOT_ZEROED_AVAILABLE - if((heapDesc.Flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) != 0) - { - wasZeroInitialized = FALSE; - } -#endif - - (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes, allocInfo.Alignment, wasZeroInitialized); + (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes, allocInfo.Alignment); (*ppAllocation)->InitHeap(committedAllocParams.m_List, heap); (*ppAllocation)->SetPrivateData(pPrivateData); committedAllocParams.m_List->Register(*ppAllocation); @@ -7998,6 +7396,9 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U outCommittedAllocationParams = CommittedAllocationParameters(); outPreferCommitted = false; + D3D12MA_ASSERT((allocDesc.HeapType != D3D12_HEAP_TYPE_GPU_UPLOAD_COPY || IsGPUUploadHeapSupported()) && + "Trying to allocate from D3D12_HEAP_TYPE_GPU_UPLOAD while GPUUploadHeapSupported == FALSE or D3D12MA_OPTIONS16_SUPPORTED macro was not defined when compiling D3D12MA library."); + bool msaaAlwaysCommitted; if (allocDesc.CustomPool != NULL) { @@ -8066,7 +7467,7 @@ HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, U { if (resDesc->SampleDesc.Count > 1 && msaaAlwaysCommitted) outBlockVector = NULL; - if (!outPreferCommitted && PrefersCommittedAllocation(*resDesc)) + if (!outPreferCommitted && PrefersCommittedAllocation(*resDesc, allocDesc.Flags & ALLOCATION_FLAG_STRATEGY_MASK)) outPreferCommitted = true; } @@ -8097,6 +7498,7 @@ UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, Reso case D3D12_HEAP_TYPE_DEFAULT: poolIndex = 0; break; case D3D12_HEAP_TYPE_UPLOAD: poolIndex = 1; break; case D3D12_HEAP_TYPE_READBACK: poolIndex = 2; break; + case D3D12_HEAP_TYPE_GPU_UPLOAD_COPY: poolIndex = 3; break; default: D3D12MA_ASSERT(0); } @@ -8152,6 +7554,9 @@ void AllocatorPimpl::CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_H case 2: outHeapType = D3D12_HEAP_TYPE_READBACK; break; + case 3: + outHeapType = D3D12_HEAP_TYPE_GPU_UPLOAD_COPY; + break; default: D3D12MA_ASSERT(0); } @@ -8995,7 +8400,7 @@ HRESULT BlockVector::CommitAllocationRequest( if (pBlock->m_pMetadata->IsEmpty()) m_HasEmptyBlock = false; - *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size, alignment, allocRequest.zeroInitialized); + *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size, alignment); pBlock->m_pMetadata->Alloc(allocRequest, size, *pAllocation); (*pAllocation)->InitPlaced(allocRequest.allocHandle, pBlock); @@ -9908,11 +9313,6 @@ void Allocation::SetName(LPCWSTR Name) void Allocation::ReleaseThis() { - if (this == NULL) - { - return; - } - SAFE_RELEASE(m_Resource); switch (m_PackedData.GetType()) @@ -9933,7 +9333,7 @@ void Allocation::ReleaseThis() m_Allocator->GetAllocationObjectAllocator().Free(this); } -Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment, BOOL wasZeroInitialized) +Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment) : m_Allocator{ allocator }, m_Size{ size }, m_Alignment{ alignment }, @@ -9947,7 +9347,6 @@ Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment, m_PackedData.SetResourceDimension(D3D12_RESOURCE_DIMENSION_UNKNOWN); m_PackedData.SetResourceFlags(D3D12_RESOURCE_FLAG_NONE); m_PackedData.SetTextureLayout(D3D12_TEXTURE_LAYOUT_UNKNOWN); - m_PackedData.SetWasZeroInitialized(wasZeroInitialized); } void Allocation::InitCommitted(CommittedAllocationList* list) @@ -9981,7 +9380,6 @@ void Allocation::SwapBlockAllocation(Allocation* allocation) D3D12MA_ASSERT(allocation->m_PackedData.GetType() == TYPE_PLACED); D3D12MA_SWAP(m_Resource, allocation->m_Resource); - m_PackedData.SetWasZeroInitialized(allocation->m_PackedData.WasZeroInitialized()); m_Placed.block->m_pMetadata->SetAllocationPrivateData(m_Placed.allocHandle, allocation); D3D12MA_SWAP(m_Placed, allocation->m_Placed); m_Placed.block->m_pMetadata->SetAllocationPrivateData(m_Placed.allocHandle, this); @@ -10059,11 +9457,6 @@ void DefragmentationContext::GetStats(DEFRAGMENTATION_STATS* pStats) void DefragmentationContext::ReleaseThis() { - if (this == NULL) - { - return; - } - D3D12MA_DELETE(m_Pimpl->GetAllocs(), this); } @@ -10124,11 +9517,6 @@ HRESULT Pool::BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, Defragment void Pool::ReleaseThis() { - if (this == NULL) - { - return; - } - D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), this); } @@ -10159,6 +9547,11 @@ BOOL Allocator::IsCacheCoherentUMA() const return m_Pimpl->IsCacheCoherentUMA(); } +BOOL Allocator::IsGPUUploadHeapSupported() const +{ + return m_Pimpl->IsGPUUploadHeapSupported(); +} + UINT64 Allocator::GetMemoryCapacity(UINT memorySegmentGroup) const { return m_Pimpl->GetMemoryCapacity(memorySegmentGroup); From 4b8890c43873fc4894355cd92018b1429e156c45 Mon Sep 17 00:00:00 2001 From: JordanTheToaster Date: Sun, 8 Dec 2024 23:16:54 +0000 Subject: [PATCH 6/6] 3rdparty: Sync vkmemoryallocator to commit 5a53a198945ba8260fbc58fadb788745ce6aa263 --- 3rdparty/vulkan/include/vk_mem_alloc.h | 523 ++++++++++++++++-- 3rdparty/vulkan/include/vulkan/vk_mem_alloc.h | 523 ++++++++++++++++-- 2 files changed, 958 insertions(+), 88 deletions(-) diff --git a/3rdparty/vulkan/include/vk_mem_alloc.h b/3rdparty/vulkan/include/vk_mem_alloc.h index 2307325d4e26d..39f6ef345d3b1 100644 --- a/3rdparty/vulkan/include/vk_mem_alloc.h +++ b/3rdparty/vulkan/include/vk_mem_alloc.h @@ -95,6 +95,7 @@ See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-me - \subpage enabling_buffer_device_address - \subpage vk_ext_memory_priority - \subpage vk_amd_device_coherent_memory + - \subpage vk_khr_external_memory_win32 - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) @@ -127,7 +128,9 @@ See documentation chapter: \ref statistics. extern "C" { #endif +#if !defined(VULKAN_H_) #include +#endif #if !defined(VMA_VULKAN_VERSION) #if defined(VK_VERSION_1_3) @@ -240,6 +243,15 @@ extern "C" { #endif #endif +// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY_WIN32) + #if VK_KHR_external_memory_win32 + #define VMA_EXTERNAL_MEMORY_WIN32 1 + #else + #define VMA_EXTERNAL_MEMORY_WIN32 0 + #endif +#endif + // Define these macros to decorate all public functions with additional code, // before and after returned type, appropriately. This may be useful for // exporting the functions when compiling VMA as a separate library. Example: @@ -459,6 +471,15 @@ typedef enum VmaAllocatorCreateFlagBits */ VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, + /** + Enables usage of VK_KHR_external_memory_win32 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + For more information, see \ref vk_khr_external_memory_win32. + */ + VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200, + VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; /// See #VmaAllocatorCreateFlagBits. @@ -1033,6 +1054,11 @@ typedef struct VmaVulkanFunctions /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; #endif +#if VMA_EXTERNAL_MEMORY_WIN32 + PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#else + void* VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#endif } VmaVulkanFunctions; /// Description of a Allocator to be created. @@ -1810,6 +1836,9 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( \param allocator Allocator object. \param pool Pool object. \param[out] pPoolStats Statistics of specified pool. + +Note that when using the pool from multiple threads, returned information may immediately +become outdated. */ VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator VMA_NOT_NULL allocator, @@ -2050,6 +2079,40 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( VmaAllocation VMA_NOT_NULL allocation, VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +#if VMA_EXTERNAL_MEMORY_WIN32 +/** +\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs. + +\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns + handle for the current process. +\param[out] pHandle Output parameter that returns the handle. + +The function fills `pHandle` with handle that can be used in target process. +The handle is fetched using function `vkGetMemoryWin32HandleKHR`. +When no longer needed, you must close it using: + +\code +CloseHandle(handle); +\endcode + +You can close it any time, before or after destroying the allocation object. +It is reference-counted internally by Windows. + +Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to. +If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation +(VmaAllocationInfo::offset). + +If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check +that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS` +or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions. + +For more information, see chapter \ref vk_khr_external_memory_win32. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle); +#endif // VMA_EXTERNAL_MEMORY_WIN32 + /** \brief Maps memory represented by given allocation and returns pointer to it. Maps memory represented by given allocation to make it accessible to CPU code. @@ -3097,7 +3160,7 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) std::shared_mutex m_Mutex; }; #define VMA_RW_MUTEX VmaRWMutex - #elif defined(_WIN32) && defined(WINVER) && WINVER >= 0x0600 + #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600 // Use SRWLOCK from WinAPI. // Minimum supported client = Windows Vista, server = Windows Server 2008. class VmaRWMutex @@ -3838,12 +3901,6 @@ struct VmaBufferImageUsage const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); -static void swap(VmaBufferImageUsage& lhs, VmaBufferImageUsage& rhs) noexcept -{ - using std::swap; - swap(lhs.Value, rhs.Value); -} - VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) { @@ -6073,6 +6130,84 @@ class VmaMappingHysteresis #endif // _VMA_MAPPING_HYSTERESIS +#if VMA_EXTERNAL_MEMORY_WIN32 +class VmaWin32Handle +{ +public: + VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { } + explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { } + ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } } + VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle) + +public: + // Strengthened + VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept + { + *pHandle = VMA_NULL; + // Try to get handle first. + if (m_hHandle != VMA_NULL) + { + *pHandle = Duplicate(hTargetProcess); + return VK_SUCCESS; + } + + VkResult res = VK_SUCCESS; + // If failed, try to create it. + { + VmaMutexLockWrite lock(m_Mutex, useMutex); + if (m_hHandle == VMA_NULL) + { + res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle); + } + } + + *pHandle = Duplicate(hTargetProcess); + return res; + } + + operator bool() const noexcept { return m_hHandle != VMA_NULL; } +private: + // Not atomic + static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept + { + VkResult res = VK_ERROR_FEATURE_NOT_PRESENT; + if (pvkGetMemoryWin32HandleKHR != VMA_NULL) + { + VkMemoryGetWin32HandleInfoKHR handleInfo{ }; + handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handleInfo.memory = memory; + handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; + res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle); + } + return res; + } + HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept + { + if (!m_hHandle) + return m_hHandle; + + HANDLE hCurrentProcess = ::GetCurrentProcess(); + HANDLE hDupHandle = VMA_NULL; + if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + VMA_ASSERT(0 && "Failed to duplicate handle."); + } + return hDupHandle; + } +private: + HANDLE m_hHandle; + VMA_RW_MUTEX m_Mutex; // Protects access m_Handle +}; +#else +class VmaWin32Handle +{ + // ABI compatibility + void* placeholder = VMA_NULL; + VMA_RW_MUTEX placeholder2; +}; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + + #ifndef _VMA_DEVICE_MEMORY_BLOCK /* Represents a single block of device memory (`VkDeviceMemory`) with all the @@ -6139,7 +6274,13 @@ class VmaDeviceMemoryBlock VkDeviceSize allocationLocalOffset, VkImage hImage, const void* pNext); - +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult CreateWin32Handle( + const VmaAllocator hAllocator, + PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, + HANDLE hTargetProcess, + HANDLE* pHandle)noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 private: VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. uint32_t m_MemoryTypeIndex; @@ -6155,10 +6296,18 @@ class VmaDeviceMemoryBlock VmaMappingHysteresis m_MappingHysteresis; uint32_t m_MapCount; void* m_pMappedData; + + VmaWin32Handle m_Handle; }; #endif // _VMA_DEVICE_MEMORY_BLOCK #ifndef _VMA_ALLOCATION_T +struct VmaAllocationExtraData +{ + void* m_pMappedData = VMA_NULL; // Not null means memory is mapped. + VmaWin32Handle m_Handle; +}; + struct VmaAllocation_T { friend struct VmaDedicatedAllocationListItemTraits; @@ -6191,12 +6340,14 @@ struct VmaAllocation_T bool mapped); // pMappedData not null means allocation is created with MAPPED flag. void InitDedicatedAllocation( + VmaAllocator allocator, VmaPool hParentPool, uint32_t memoryTypeIndex, VkDeviceMemory hMemory, VmaSuballocationType suballocationType, void* pMappedData, VkDeviceSize size); + void Destroy(VmaAllocator allocator); ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } VkDeviceSize GetAlignment() const { return m_Alignment; } @@ -6240,6 +6391,10 @@ struct VmaAllocation_T void PrintParameters(class VmaJsonWriter& json) const; #endif +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + private: // Allocation out of VmaDeviceMemoryBlock. struct BlockAllocation @@ -6252,7 +6407,7 @@ struct VmaAllocation_T { VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. VkDeviceMemory m_hMemory; - void* m_pMappedData; // Not null means memory is mapped. + VmaAllocationExtraData* m_ExtraData; VmaAllocation_T* m_Prev; VmaAllocation_T* m_Next; }; @@ -6277,6 +6432,8 @@ struct VmaAllocation_T #if VMA_STATS_STRING_ENABLED VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. #endif + + void EnsureExtraData(VmaAllocator hAllocator); }; #endif // _VMA_ALLOCATION_T @@ -10075,6 +10232,7 @@ struct VmaAllocator_T bool m_UseExtMemoryPriority; bool m_UseKhrMaintenance4; bool m_UseKhrMaintenance5; + bool m_UseKhrExternalMemoryWin32; const VkDevice m_hDevice; const VkInstance m_hInstance; const bool m_AllocationCallbacksSpecified; @@ -10438,7 +10596,7 @@ VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) m_Id(0), m_hMemory(VK_NULL_HANDLE), m_MapCount(0), - m_pMappedData(VMA_NULL) {} + m_pMappedData(VMA_NULL){} VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() { @@ -10681,6 +10839,14 @@ VkResult VmaDeviceMemoryBlock::BindImageMemory( VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); } + +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + VMA_ASSERT(pHandle); + return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 #endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS #ifndef _VMA_ALLOCATION_T_FUNCTIONS @@ -10733,6 +10899,7 @@ void VmaAllocation_T::InitBlockAllocation( } void VmaAllocation_T::InitDedicatedAllocation( + VmaAllocator allocator, VmaPool hParentPool, uint32_t memoryTypeIndex, VkDeviceMemory hMemory, @@ -10747,16 +10914,29 @@ void VmaAllocation_T::InitDedicatedAllocation( m_Size = size; m_MemoryTypeIndex = memoryTypeIndex; m_SuballocationType = (uint8_t)suballocationType; - if(pMappedData != VMA_NULL) - { - VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; - } + m_DedicatedAllocation.m_ExtraData = VMA_NULL; m_DedicatedAllocation.m_hParentPool = hParentPool; m_DedicatedAllocation.m_hMemory = hMemory; - m_DedicatedAllocation.m_pMappedData = pMappedData; m_DedicatedAllocation.m_Prev = VMA_NULL; m_DedicatedAllocation.m_Next = VMA_NULL; + + if (pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + EnsureExtraData(allocator); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData; + } +} + +void VmaAllocation_T::Destroy(VmaAllocator allocator) +{ + FreeName(allocator); + + if (GetType() == ALLOCATION_TYPE_DEDICATED) + { + vma_delete(allocator, m_DedicatedAllocation.m_ExtraData); + } } void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) @@ -10861,8 +11041,9 @@ void* VmaAllocation_T::GetMappedData() const } break; case ALLOCATION_TYPE_DEDICATED: - VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0 || IsPersistentMap())); - return m_DedicatedAllocation.m_pMappedData; + VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) == + (m_MapCount != 0 || IsPersistentMap())); + return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL; default: VMA_ASSERT(0); return VMA_NULL; @@ -10903,12 +11084,14 @@ VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppDa VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + EnsureExtraData(hAllocator); + if (m_MapCount != 0 || IsPersistentMap()) { if (m_MapCount < 0xFF) { - VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); - *ppData = m_DedicatedAllocation.m_pMappedData; + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL); + *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData; ++m_MapCount; return VK_SUCCESS; } @@ -10929,7 +11112,7 @@ VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppDa ppData); if (result == VK_SUCCESS) { - m_DedicatedAllocation.m_pMappedData = *ppData; + m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData; m_MapCount = 1; } return result; @@ -10945,7 +11128,8 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) --m_MapCount; if (m_MapCount == 0 && !IsPersistentMap()) { - m_DedicatedAllocation.m_pMappedData = VMA_NULL; + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory); @@ -10981,8 +11165,33 @@ void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const json.WriteString(m_pName); } } +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR; + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle); + case ALLOCATION_TYPE_DEDICATED: + EnsureExtraData(hAllocator); + return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); + default: + VMA_ASSERT(0); + return VK_ERROR_FEATURE_NOT_PRESENT; + } +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 #endif // VMA_STATS_STRING_ENABLED +void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator) +{ + if (m_DedicatedAllocation.m_ExtraData == VMA_NULL) + { + m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)(); + } +} + void VmaAllocation_T::FreeName(VmaAllocator hAllocator) { if(m_pName) @@ -11399,6 +11608,10 @@ void VmaBlockVector::Free(const VmaAllocation hAllocation) } IncrementallySortBlocks(); + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + hAllocation->Destroy(m_hAllocator); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } // Destruction of a free block. Deferred until this point, outside of mutex @@ -11409,9 +11622,6 @@ void VmaBlockVector::Free(const VmaAllocation hAllocation) pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } - - m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); - m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const @@ -12711,6 +12921,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), + m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0), m_hDevice(pCreateInfo->device), m_hInstance(pCreateInfo->instance), m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), @@ -12802,6 +13013,19 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); } #endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + +#if !(VMA_EXTERNAL_MEMORY_WIN32) + if(m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); @@ -13026,7 +13250,9 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); #endif - +#if VMA_EXTERNAL_MEMORY_WIN32 + VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR); +#endif #undef VMA_COPY_IF_NOT_NULL } @@ -13128,7 +13354,12 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); } #endif - +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR"); + } +#endif #undef VMA_FETCH_DEVICE_FUNC #undef VMA_FETCH_INSTANCE_FUNC } @@ -13177,6 +13408,12 @@ void VmaAllocator_T::ValidateVulkanFunctions() VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); } #endif +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL); + } +#endif // Not validating these due to suspected driver bugs with these function // pointers being null despite correct extension or Vulkan version is enabled. @@ -13527,7 +13764,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); - (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); + (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size); if (isUserDataString) (*pAllocation)->SetName(this, (const char*)pUserData); else @@ -13863,8 +14100,6 @@ void VmaAllocator_T::FreeMemory( FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); } - allocation->FreeName(this); - switch(allocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: @@ -14335,7 +14570,6 @@ VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) } return res; } - VMA_FALLTHROUGH; // Fallthrough case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: return hAllocation->DedicatedAllocMap(this, ppData); default: @@ -14549,6 +14783,7 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + allocation->Destroy(this); m_AllocationObjectAllocator.Free(allocation); VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); @@ -16169,7 +16404,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( pImageCreateInfo, allocator->GetAllocationCallbacks(), pImage); - if(res >= 0) + if(res == VK_SUCCESS) { VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : @@ -16194,14 +16429,14 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( 1, // allocationCount pAllocation); - if(res >= 0) + if(res == VK_SUCCESS) { // 3. Bind image with memory. if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) { res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); } - if(res >= 0) + if(res == VK_SUCCESS) { // All steps succeeded. #if VMA_STATS_STRING_ENABLED @@ -16434,6 +16669,15 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock V VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); } } +#if VMA_EXTERNAL_MEMORY_WIN32 +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle) +{ + VMA_ASSERT(allocator && allocation && pHandle); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_PUBLIC_INTERFACE #endif // VMA_IMPLEMENTATION @@ -16567,6 +16811,7 @@ VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +VK_KHR_external_memory_win32 | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT Example with fetching pointers to Vulkan functions dynamically: @@ -17053,7 +17298,7 @@ implementation whether the allocation succeeds or fails. You can change this beh by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is not made if it would exceed the budget or if the budget is already exceeded. VMA then tries to make the allocation from the next eligible Vulkan memory type. -The all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag when creating resources that are not essential for the application (e.g. the texture of a specific object) and not to pass it when creating critically important resources @@ -18193,7 +18438,8 @@ allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... VkMemoryPropertyFlags memPropFlags; vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); @@ -18204,10 +18450,24 @@ if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) // [Executed in runtime]: memcpy(allocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); } else { - // Allocation ended up in a non-mappable memory - need to transfer. + // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; stagingBufCreateInfo.size = 65536; stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; @@ -18220,18 +18480,46 @@ else VkBuffer stagingBuf; VmaAllocation stagingAlloc; VmaAllocationInfo stagingAllocInfo; - vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, - &stagingBuf, &stagingAlloc, stagingAllocInfo); + result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, &stagingAllocInfo); + // Check result... // [Executed in runtime]: memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); - vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); - //vkCmdPipelineBarrier: VK_ACCESS_HOST_WRITE_BIT --> VK_ACCESS_TRANSFER_READ_BIT + result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = stagingBuf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + VkBufferCopy bufCopy = { 0, // srcOffset 0, // dstOffset, - myDataSize); // size + myDataSize, // size + }; + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + + VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer + bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.buffer = buf; + bufMemBarrier2.offset = 0; + bufMemBarrier2.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); } \endcode @@ -18264,14 +18552,22 @@ Please check "CONFIGURATION SECTION" in the code to find macros that you can def before each include of this file or change directly in this file to provide your own implementation of basic facilities like assert, `min()` and `max()` functions, mutex, atomic etc. -The library uses its own implementation of containers by default, but you can switch to using -STL containers instead. For example, define `VMA_ASSERT(expr)` before including the library to provide custom implementation of the assertion, compatible with your project. By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration and empty otherwise. +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) +\endcode + \section config_Vulkan_functions Pointers to Vulkan functions There are multiple ways to import pointers to Vulkan functions in the library. @@ -18526,6 +18822,145 @@ Example use of this extension can be found in the code of the sample and test su accompanying this library. +\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32 + +On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE` +of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances, +in multiple processes, and/or in multiple APIs. +VMA offers support for it. + +\section vk_khr_external_memory_win32_initialization Initialization + +1) Make sure the extension is defined in the code by including following header before including VMA: + +\code +#include +\endcode + +2) Check if "VK_KHR_external_memory_win32" is available among device extensions. +Enable it when creating the `VkDevice` object. + +3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT +when calling vmaCreateAllocator(). + +4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro +or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly. +For more information, see \ref quick_start_initialization_importing_vulkan_functions. + +\section vk_khr_external_memory_win32_preparations Preparations + +You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`. + +To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain, +and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain. +To make use of them, you need to use \ref custom_memory_pools. Example: + +\code +// Define an example buffer and allocation parameters. +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 0x10000; // Doesn't matter here. +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +exampleBufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo exampleAllocCreateInfo = {}; +exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +// Find memory type index to use for the custom pool. +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator, + &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a custom pool. +constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = { + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo; + +VmaPool pool; +res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyPool(g_Allocator, pool); +\endcode + +Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged +for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block. +No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`. + +\section vk_khr_external_memory_win32_memory_allocation Memory allocation + +Finally, you can create a buffer with an allocation out of the custom pool. +The buffer should use same flags as the sample buffer used to find the memory type. +It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain. + +\code +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = // Your desired buffer size. +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +bufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; // It is enough to set this one member. + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyBuffer(g_Allocator, buf, alloc); +\endcode + +If you need each allocation to have its own device memory block and start at offset 0, you can still do +by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools. + +\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle + +After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to. +VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`. + +\code +HANDLE handle; +res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, you must close the handle. +CloseHandle(handle); +\endcode + +Documentation of the VK_KHR_external_memory_win32 extension states that: + +> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType. + +This is ensured automatically inside VMA. +The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed. +Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close. + +For further information, please check documentation of the vmaGetMemoryWin32Handle() function. + + \page enabling_buffer_device_address Enabling buffer device address Device extension VK_KHR_buffer_device_address diff --git a/3rdparty/vulkan/include/vulkan/vk_mem_alloc.h b/3rdparty/vulkan/include/vulkan/vk_mem_alloc.h index 2307325d4e26d..39f6ef345d3b1 100644 --- a/3rdparty/vulkan/include/vulkan/vk_mem_alloc.h +++ b/3rdparty/vulkan/include/vulkan/vk_mem_alloc.h @@ -95,6 +95,7 @@ See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-me - \subpage enabling_buffer_device_address - \subpage vk_ext_memory_priority - \subpage vk_amd_device_coherent_memory + - \subpage vk_khr_external_memory_win32 - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) @@ -127,7 +128,9 @@ See documentation chapter: \ref statistics. extern "C" { #endif +#if !defined(VULKAN_H_) #include +#endif #if !defined(VMA_VULKAN_VERSION) #if defined(VK_VERSION_1_3) @@ -240,6 +243,15 @@ extern "C" { #endif #endif +// Defined to 1 when VK_KHR_external_memory_win32 device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY_WIN32) + #if VK_KHR_external_memory_win32 + #define VMA_EXTERNAL_MEMORY_WIN32 1 + #else + #define VMA_EXTERNAL_MEMORY_WIN32 0 + #endif +#endif + // Define these macros to decorate all public functions with additional code, // before and after returned type, appropriately. This may be useful for // exporting the functions when compiling VMA as a separate library. Example: @@ -459,6 +471,15 @@ typedef enum VmaAllocatorCreateFlagBits */ VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, + /** + Enables usage of VK_KHR_external_memory_win32 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + For more information, see \ref vk_khr_external_memory_win32. + */ + VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT = 0x00000200, + VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; /// See #VmaAllocatorCreateFlagBits. @@ -1033,6 +1054,11 @@ typedef struct VmaVulkanFunctions /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; #endif +#if VMA_EXTERNAL_MEMORY_WIN32 + PFN_vkGetMemoryWin32HandleKHR VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#else + void* VMA_NULLABLE vkGetMemoryWin32HandleKHR; +#endif } VmaVulkanFunctions; /// Description of a Allocator to be created. @@ -1810,6 +1836,9 @@ VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( \param allocator Allocator object. \param pool Pool object. \param[out] pPoolStats Statistics of specified pool. + +Note that when using the pool from multiple threads, returned information may immediately +become outdated. */ VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( VmaAllocator VMA_NOT_NULL allocator, @@ -2050,6 +2079,40 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( VmaAllocation VMA_NOT_NULL allocation, VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +#if VMA_EXTERNAL_MEMORY_WIN32 +/** +\brief Given an allocation, returns Win32 handle that may be imported by other processes or APIs. + +\param hTargetProcess Must be a valid handle to target process or null. If it's null, the function returns + handle for the current process. +\param[out] pHandle Output parameter that returns the handle. + +The function fills `pHandle` with handle that can be used in target process. +The handle is fetched using function `vkGetMemoryWin32HandleKHR`. +When no longer needed, you must close it using: + +\code +CloseHandle(handle); +\endcode + +You can close it any time, before or after destroying the allocation object. +It is reference-counted internally by Windows. + +Note the handle is returned for the entire `VkDeviceMemory` block that the allocation belongs to. +If the allocation is sub-allocated from a larger block, you may need to consider the offset of the allocation +(VmaAllocationInfo::offset). + +If the function fails with `VK_ERROR_FEATURE_NOT_PRESENT` error code, please double-check +that VmaVulkanFunctions::vkGetMemoryWin32HandleKHR function pointer is set, e.g. either by using `VMA_DYNAMIC_VULKAN_FUNCTIONS` +or by manually passing it through VmaAllocatorCreateInfo::pVulkanFunctions. + +For more information, see chapter \ref vk_khr_external_memory_win32. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle); +#endif // VMA_EXTERNAL_MEMORY_WIN32 + /** \brief Maps memory represented by given allocation and returns pointer to it. Maps memory represented by given allocation to make it accessible to CPU code. @@ -3097,7 +3160,7 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) std::shared_mutex m_Mutex; }; #define VMA_RW_MUTEX VmaRWMutex - #elif defined(_WIN32) && defined(WINVER) && WINVER >= 0x0600 + #elif defined(_WIN32) && defined(WINVER) && defined(SRWLOCK_INIT) && WINVER >= 0x0600 // Use SRWLOCK from WinAPI. // Minimum supported client = Windows Vista, server = Windows Server 2008. class VmaRWMutex @@ -3838,12 +3901,6 @@ struct VmaBufferImageUsage const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); -static void swap(VmaBufferImageUsage& lhs, VmaBufferImageUsage& rhs) noexcept -{ - using std::swap; - swap(lhs.Value, rhs.Value); -} - VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) { @@ -6073,6 +6130,84 @@ class VmaMappingHysteresis #endif // _VMA_MAPPING_HYSTERESIS +#if VMA_EXTERNAL_MEMORY_WIN32 +class VmaWin32Handle +{ +public: + VmaWin32Handle() noexcept : m_hHandle(VMA_NULL) { } + explicit VmaWin32Handle(HANDLE hHandle) noexcept : m_hHandle(hHandle) { } + ~VmaWin32Handle() noexcept { if (m_hHandle != VMA_NULL) { ::CloseHandle(m_hHandle); } } + VMA_CLASS_NO_COPY_NO_MOVE(VmaWin32Handle) + +public: + // Strengthened + VkResult GetHandle(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, bool useMutex, HANDLE* pHandle) noexcept + { + *pHandle = VMA_NULL; + // Try to get handle first. + if (m_hHandle != VMA_NULL) + { + *pHandle = Duplicate(hTargetProcess); + return VK_SUCCESS; + } + + VkResult res = VK_SUCCESS; + // If failed, try to create it. + { + VmaMutexLockWrite lock(m_Mutex, useMutex); + if (m_hHandle == VMA_NULL) + { + res = Create(device, memory, pvkGetMemoryWin32HandleKHR, &m_hHandle); + } + } + + *pHandle = Duplicate(hTargetProcess); + return res; + } + + operator bool() const noexcept { return m_hHandle != VMA_NULL; } +private: + // Not atomic + static VkResult Create(VkDevice device, VkDeviceMemory memory, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE* pHandle) noexcept + { + VkResult res = VK_ERROR_FEATURE_NOT_PRESENT; + if (pvkGetMemoryWin32HandleKHR != VMA_NULL) + { + VkMemoryGetWin32HandleInfoKHR handleInfo{ }; + handleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handleInfo.memory = memory; + handleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; + res = pvkGetMemoryWin32HandleKHR(device, &handleInfo, pHandle); + } + return res; + } + HANDLE Duplicate(HANDLE hTargetProcess = VMA_NULL) const noexcept + { + if (!m_hHandle) + return m_hHandle; + + HANDLE hCurrentProcess = ::GetCurrentProcess(); + HANDLE hDupHandle = VMA_NULL; + if (!::DuplicateHandle(hCurrentProcess, m_hHandle, hTargetProcess ? hTargetProcess : hCurrentProcess, &hDupHandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + { + VMA_ASSERT(0 && "Failed to duplicate handle."); + } + return hDupHandle; + } +private: + HANDLE m_hHandle; + VMA_RW_MUTEX m_Mutex; // Protects access m_Handle +}; +#else +class VmaWin32Handle +{ + // ABI compatibility + void* placeholder = VMA_NULL; + VMA_RW_MUTEX placeholder2; +}; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + + #ifndef _VMA_DEVICE_MEMORY_BLOCK /* Represents a single block of device memory (`VkDeviceMemory`) with all the @@ -6139,7 +6274,13 @@ class VmaDeviceMemoryBlock VkDeviceSize allocationLocalOffset, VkImage hImage, const void* pNext); - +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult CreateWin32Handle( + const VmaAllocator hAllocator, + PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, + HANDLE hTargetProcess, + HANDLE* pHandle)noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 private: VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. uint32_t m_MemoryTypeIndex; @@ -6155,10 +6296,18 @@ class VmaDeviceMemoryBlock VmaMappingHysteresis m_MappingHysteresis; uint32_t m_MapCount; void* m_pMappedData; + + VmaWin32Handle m_Handle; }; #endif // _VMA_DEVICE_MEMORY_BLOCK #ifndef _VMA_ALLOCATION_T +struct VmaAllocationExtraData +{ + void* m_pMappedData = VMA_NULL; // Not null means memory is mapped. + VmaWin32Handle m_Handle; +}; + struct VmaAllocation_T { friend struct VmaDedicatedAllocationListItemTraits; @@ -6191,12 +6340,14 @@ struct VmaAllocation_T bool mapped); // pMappedData not null means allocation is created with MAPPED flag. void InitDedicatedAllocation( + VmaAllocator allocator, VmaPool hParentPool, uint32_t memoryTypeIndex, VkDeviceMemory hMemory, VmaSuballocationType suballocationType, void* pMappedData, VkDeviceSize size); + void Destroy(VmaAllocator allocator); ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } VkDeviceSize GetAlignment() const { return m_Alignment; } @@ -6240,6 +6391,10 @@ struct VmaAllocation_T void PrintParameters(class VmaJsonWriter& json) const; #endif +#if VMA_EXTERNAL_MEMORY_WIN32 + VkResult GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* hHandle) noexcept; +#endif // VMA_EXTERNAL_MEMORY_WIN32 + private: // Allocation out of VmaDeviceMemoryBlock. struct BlockAllocation @@ -6252,7 +6407,7 @@ struct VmaAllocation_T { VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. VkDeviceMemory m_hMemory; - void* m_pMappedData; // Not null means memory is mapped. + VmaAllocationExtraData* m_ExtraData; VmaAllocation_T* m_Prev; VmaAllocation_T* m_Next; }; @@ -6277,6 +6432,8 @@ struct VmaAllocation_T #if VMA_STATS_STRING_ENABLED VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. #endif + + void EnsureExtraData(VmaAllocator hAllocator); }; #endif // _VMA_ALLOCATION_T @@ -10075,6 +10232,7 @@ struct VmaAllocator_T bool m_UseExtMemoryPriority; bool m_UseKhrMaintenance4; bool m_UseKhrMaintenance5; + bool m_UseKhrExternalMemoryWin32; const VkDevice m_hDevice; const VkInstance m_hInstance; const bool m_AllocationCallbacksSpecified; @@ -10438,7 +10596,7 @@ VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) m_Id(0), m_hMemory(VK_NULL_HANDLE), m_MapCount(0), - m_pMappedData(VMA_NULL) {} + m_pMappedData(VMA_NULL){} VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() { @@ -10681,6 +10839,14 @@ VkResult VmaDeviceMemoryBlock::BindImageMemory( VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); } + +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaDeviceMemoryBlock::CreateWin32Handle(const VmaAllocator hAllocator, PFN_vkGetMemoryWin32HandleKHR pvkGetMemoryWin32HandleKHR, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + VMA_ASSERT(pHandle); + return m_Handle.GetHandle(hAllocator->m_hDevice, m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 #endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS #ifndef _VMA_ALLOCATION_T_FUNCTIONS @@ -10733,6 +10899,7 @@ void VmaAllocation_T::InitBlockAllocation( } void VmaAllocation_T::InitDedicatedAllocation( + VmaAllocator allocator, VmaPool hParentPool, uint32_t memoryTypeIndex, VkDeviceMemory hMemory, @@ -10747,16 +10914,29 @@ void VmaAllocation_T::InitDedicatedAllocation( m_Size = size; m_MemoryTypeIndex = memoryTypeIndex; m_SuballocationType = (uint8_t)suballocationType; - if(pMappedData != VMA_NULL) - { - VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; - } + m_DedicatedAllocation.m_ExtraData = VMA_NULL; m_DedicatedAllocation.m_hParentPool = hParentPool; m_DedicatedAllocation.m_hMemory = hMemory; - m_DedicatedAllocation.m_pMappedData = pMappedData; m_DedicatedAllocation.m_Prev = VMA_NULL; m_DedicatedAllocation.m_Next = VMA_NULL; + + if (pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + EnsureExtraData(allocator); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = pMappedData; + } +} + +void VmaAllocation_T::Destroy(VmaAllocator allocator) +{ + FreeName(allocator); + + if (GetType() == ALLOCATION_TYPE_DEDICATED) + { + vma_delete(allocator, m_DedicatedAllocation.m_ExtraData); + } } void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) @@ -10861,8 +11041,9 @@ void* VmaAllocation_T::GetMappedData() const } break; case ALLOCATION_TYPE_DEDICATED: - VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0 || IsPersistentMap())); - return m_DedicatedAllocation.m_pMappedData; + VMA_ASSERT((m_DedicatedAllocation.m_ExtraData != VMA_NULL && m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL) == + (m_MapCount != 0 || IsPersistentMap())); + return m_DedicatedAllocation.m_ExtraData != VMA_NULL ? m_DedicatedAllocation.m_ExtraData->m_pMappedData : VMA_NULL; default: VMA_ASSERT(0); return VMA_NULL; @@ -10903,12 +11084,14 @@ VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppDa VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + EnsureExtraData(hAllocator); + if (m_MapCount != 0 || IsPersistentMap()) { if (m_MapCount < 0xFF) { - VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); - *ppData = m_DedicatedAllocation.m_pMappedData; + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData->m_pMappedData != VMA_NULL); + *ppData = m_DedicatedAllocation.m_ExtraData->m_pMappedData; ++m_MapCount; return VK_SUCCESS; } @@ -10929,7 +11112,7 @@ VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppDa ppData); if (result == VK_SUCCESS) { - m_DedicatedAllocation.m_pMappedData = *ppData; + m_DedicatedAllocation.m_ExtraData->m_pMappedData = *ppData; m_MapCount = 1; } return result; @@ -10945,7 +11128,8 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) --m_MapCount; if (m_MapCount == 0 && !IsPersistentMap()) { - m_DedicatedAllocation.m_pMappedData = VMA_NULL; + VMA_ASSERT(m_DedicatedAllocation.m_ExtraData != VMA_NULL); + m_DedicatedAllocation.m_ExtraData->m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory); @@ -10981,8 +11165,33 @@ void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const json.WriteString(m_pName); } } +#if VMA_EXTERNAL_MEMORY_WIN32 +VkResult VmaAllocation_T::GetWin32Handle(VmaAllocator hAllocator, HANDLE hTargetProcess, HANDLE* pHandle) noexcept +{ + auto pvkGetMemoryWin32HandleKHR = hAllocator->GetVulkanFunctions().vkGetMemoryWin32HandleKHR; + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->CreateWin32Handle(hAllocator, pvkGetMemoryWin32HandleKHR, hTargetProcess, pHandle); + case ALLOCATION_TYPE_DEDICATED: + EnsureExtraData(hAllocator); + return m_DedicatedAllocation.m_ExtraData->m_Handle.GetHandle(hAllocator->m_hDevice, m_DedicatedAllocation.m_hMemory, pvkGetMemoryWin32HandleKHR, hTargetProcess, hAllocator->m_UseMutex, pHandle); + default: + VMA_ASSERT(0); + return VK_ERROR_FEATURE_NOT_PRESENT; + } +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 #endif // VMA_STATS_STRING_ENABLED +void VmaAllocation_T::EnsureExtraData(VmaAllocator hAllocator) +{ + if (m_DedicatedAllocation.m_ExtraData == VMA_NULL) + { + m_DedicatedAllocation.m_ExtraData = vma_new(hAllocator, VmaAllocationExtraData)(); + } +} + void VmaAllocation_T::FreeName(VmaAllocator hAllocator) { if(m_pName) @@ -11399,6 +11608,10 @@ void VmaBlockVector::Free(const VmaAllocation hAllocation) } IncrementallySortBlocks(); + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + hAllocation->Destroy(m_hAllocator); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } // Destruction of a free block. Deferred until this point, outside of mutex @@ -11409,9 +11622,6 @@ void VmaBlockVector::Free(const VmaAllocation hAllocation) pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } - - m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); - m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); } VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const @@ -12711,6 +12921,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), + m_UseKhrExternalMemoryWin32((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT) != 0), m_hDevice(pCreateInfo->device), m_hInstance(pCreateInfo->instance), m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), @@ -12802,6 +13013,19 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); } #endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + +#if !(VMA_EXTERNAL_MEMORY_WIN32) + if(m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); @@ -13026,7 +13250,9 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); #endif - +#if VMA_EXTERNAL_MEMORY_WIN32 + VMA_COPY_IF_NOT_NULL(vkGetMemoryWin32HandleKHR); +#endif #undef VMA_COPY_IF_NOT_NULL } @@ -13128,7 +13354,12 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); } #endif - +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_FETCH_DEVICE_FUNC(vkGetMemoryWin32HandleKHR, PFN_vkGetMemoryWin32HandleKHR, "vkGetMemoryWin32HandleKHR"); + } +#endif #undef VMA_FETCH_DEVICE_FUNC #undef VMA_FETCH_INSTANCE_FUNC } @@ -13177,6 +13408,12 @@ void VmaAllocator_T::ValidateVulkanFunctions() VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); } #endif +#if VMA_EXTERNAL_MEMORY_WIN32 + if (m_UseKhrExternalMemoryWin32) + { + VMA_ASSERT(m_VulkanFunctions.vkGetMemoryWin32HandleKHR != VMA_NULL); + } +#endif // Not validating these due to suspected driver bugs with these function // pointers being null despite correct extension or Vulkan version is enabled. @@ -13527,7 +13764,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); - (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); + (*pAllocation)->InitDedicatedAllocation(this, pool, memTypeIndex, hMemory, suballocType, pMappedData, size); if (isUserDataString) (*pAllocation)->SetName(this, (const char*)pUserData); else @@ -13863,8 +14100,6 @@ void VmaAllocator_T::FreeMemory( FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); } - allocation->FreeName(this); - switch(allocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: @@ -14335,7 +14570,6 @@ VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) } return res; } - VMA_FALLTHROUGH; // Fallthrough case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: return hAllocation->DedicatedAllocMap(this, ppData); default: @@ -14549,6 +14783,7 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + allocation->Destroy(this); m_AllocationObjectAllocator.Free(allocation); VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); @@ -16169,7 +16404,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( pImageCreateInfo, allocator->GetAllocationCallbacks(), pImage); - if(res >= 0) + if(res == VK_SUCCESS) { VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : @@ -16194,14 +16429,14 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( 1, // allocationCount pAllocation); - if(res >= 0) + if(res == VK_SUCCESS) { // 3. Bind image with memory. if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) { res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); } - if(res >= 0) + if(res == VK_SUCCESS) { // All steps succeeded. #if VMA_STATS_STRING_ENABLED @@ -16434,6 +16669,15 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock V VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); } } +#if VMA_EXTERNAL_MEMORY_WIN32 +VMA_CALL_PRE VkResult VMA_CALL_POST vmaGetMemoryWin32Handle(VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, HANDLE hTargetProcess, HANDLE* VMA_NOT_NULL pHandle) +{ + VMA_ASSERT(allocator && allocation && pHandle); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return allocation->GetWin32Handle(allocator, hTargetProcess, pHandle); +} +#endif // VMA_EXTERNAL_MEMORY_WIN32 #endif // VMA_STATS_STRING_ENABLED #endif // _VMA_PUBLIC_INTERFACE #endif // VMA_IMPLEMENTATION @@ -16567,6 +16811,7 @@ VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +VK_KHR_external_memory_win32 | #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT Example with fetching pointers to Vulkan functions dynamically: @@ -17053,7 +17298,7 @@ implementation whether the allocation succeeds or fails. You can change this beh by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is not made if it would exceed the budget or if the budget is already exceeded. VMA then tries to make the allocation from the next eligible Vulkan memory type. -The all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +If all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag when creating resources that are not essential for the application (e.g. the texture of a specific object) and not to pass it when creating critically important resources @@ -18193,7 +18438,8 @@ allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... VkMemoryPropertyFlags memPropFlags; vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); @@ -18204,10 +18450,24 @@ if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) // [Executed in runtime]: memcpy(allocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); } else { - // Allocation ended up in a non-mappable memory - need to transfer. + // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; stagingBufCreateInfo.size = 65536; stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; @@ -18220,18 +18480,46 @@ else VkBuffer stagingBuf; VmaAllocation stagingAlloc; VmaAllocationInfo stagingAllocInfo; - vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, - &stagingBuf, &stagingAlloc, stagingAllocInfo); + result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, &stagingAllocInfo); + // Check result... // [Executed in runtime]: memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); - vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); - //vkCmdPipelineBarrier: VK_ACCESS_HOST_WRITE_BIT --> VK_ACCESS_TRANSFER_READ_BIT + result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = stagingBuf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + VkBufferCopy bufCopy = { 0, // srcOffset 0, // dstOffset, - myDataSize); // size + myDataSize, // size + }; + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + + VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer + bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.buffer = buf; + bufMemBarrier2.offset = 0; + bufMemBarrier2.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); } \endcode @@ -18264,14 +18552,22 @@ Please check "CONFIGURATION SECTION" in the code to find macros that you can def before each include of this file or change directly in this file to provide your own implementation of basic facilities like assert, `min()` and `max()` functions, mutex, atomic etc. -The library uses its own implementation of containers by default, but you can switch to using -STL containers instead. For example, define `VMA_ASSERT(expr)` before including the library to provide custom implementation of the assertion, compatible with your project. By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration and empty otherwise. +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) +\endcode + \section config_Vulkan_functions Pointers to Vulkan functions There are multiple ways to import pointers to Vulkan functions in the library. @@ -18526,6 +18822,145 @@ Example use of this extension can be found in the code of the sample and test su accompanying this library. +\page vk_khr_external_memory_win32 VK_KHR_external_memory_win32 + +On Windows, the VK_KHR_external_memory_win32 device extension allows exporting a Win32 `HANDLE` +of a `VkDeviceMemory` block, to be able to reference the memory on other Vulkan logical devices or instances, +in multiple processes, and/or in multiple APIs. +VMA offers support for it. + +\section vk_khr_external_memory_win32_initialization Initialization + +1) Make sure the extension is defined in the code by including following header before including VMA: + +\code +#include +\endcode + +2) Check if "VK_KHR_external_memory_win32" is available among device extensions. +Enable it when creating the `VkDevice` object. + +3) Enable the usage of this extension in VMA by setting flag #VMA_ALLOCATOR_CREATE_KHR_EXTERNAL_MEMORY_WIN32_BIT +when calling vmaCreateAllocator(). + +4) Make sure that VMA has access to the `vkGetMemoryWin32HandleKHR` function by either enabling `VMA_DYNAMIC_VULKAN_FUNCTIONS` macro +or setting VmaVulkanFunctions::vkGetMemoryWin32HandleKHR explicitly. +For more information, see \ref quick_start_initialization_importing_vulkan_functions. + +\section vk_khr_external_memory_win32_preparations Preparations + +You can find example usage among tests, in file "Tests.cpp", function `TestWin32Handles()`. + +To use the extenion, buffers need to be created with `VkExternalMemoryBufferCreateInfoKHR` attached to their `pNext` chain, +and memory allocations need to be made with `VkExportMemoryAllocateInfoKHR` attached to their `pNext` chain. +To make use of them, you need to use \ref custom_memory_pools. Example: + +\code +// Define an example buffer and allocation parameters. +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 0x10000; // Doesn't matter here. +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +exampleBufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo exampleAllocCreateInfo = {}; +exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +// Find memory type index to use for the custom pool. +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_Allocator, + &exampleBufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a custom pool. +constexpr static VkExportMemoryAllocateInfoKHR exportMemAllocInfo = { + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.pMemoryAllocateNext = (void*)&exportMemAllocInfo; + +VmaPool pool; +res = vmaCreatePool(g_Allocator, &poolCreateInfo, &pool); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyPool(g_Allocator, pool); +\endcode + +Note that the structure passed as VmaPoolCreateInfo::pMemoryAllocateNext must remain alive and unchanged +for the whole lifetime of the custom pool, because it will be used when the pool allocates a new device memory block. +No copy is made internally. This is why variable `exportMemAllocInfo` is defined as `static`. + +\section vk_khr_external_memory_win32_memory_allocation Memory allocation + +Finally, you can create a buffer with an allocation out of the custom pool. +The buffer should use same flags as the sample buffer used to find the memory type. +It should also specify `VkExternalMemoryBufferCreateInfoKHR` in its `pNext` chain. + +\code +VkExternalMemoryBufferCreateInfoKHR externalMemBufCreateInfo = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + nullptr, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT +}; +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = // Your desired buffer size. +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; +bufCreateInfo.pNext = &externalMemBufCreateInfo; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; // It is enough to set this one member. + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(g_Allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, don't forget to destroy it! +vmaDestroyBuffer(g_Allocator, buf, alloc); +\endcode + +If you need each allocation to have its own device memory block and start at offset 0, you can still do +by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag. It works also with custom pools. + +\section vk_khr_external_memory_win32_exporting_win32_handle Exporting Win32 handle + +After the allocation is created, you can acquire a Win32 `HANDLE` to the `VkDeviceMemory` block it belongs to. +VMA function vmaGetMemoryWin32Handle() is a replacement of the Vulkan function `vkGetMemoryWin32HandleKHR`. + +\code +HANDLE handle; +res = vmaGetMemoryWin32Handle(g_Allocator, alloc, nullptr, &handle); +// Check res... + +// YOUR OTHER CODE COMES HERE.... + +// At the end, you must close the handle. +CloseHandle(handle); +\endcode + +Documentation of the VK_KHR_external_memory_win32 extension states that: + +> If handleType is defined as an NT handle, vkGetMemoryWin32HandleKHR must be called no more than once for each valid unique combination of memory and handleType. + +This is ensured automatically inside VMA. +The library fetches the handle on first use, remembers it internally, and closes it when the memory block or dedicated allocation is destroyed. +Every time you call vmaGetMemoryWin32Handle(), VMA calls `DuplicateHandle` and returns a new handle that you need to close. + +For further information, please check documentation of the vmaGetMemoryWin32Handle() function. + + \page enabling_buffer_device_address Enabling buffer device address Device extension VK_KHR_buffer_device_address