diff --git a/ci/licenses_golden/licenses_flutter b/ci/licenses_golden/licenses_flutter index 0e582e6c90aa0..f94f7d3046bf5 100644 --- a/ci/licenses_golden/licenses_flutter +++ b/ci/licenses_golden/licenses_flutter @@ -1601,6 +1601,7 @@ ORIGIN: ../../../flutter/impeller/renderer/snapshot.h + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/stroke.comp + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/surface.cc + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/surface.h + ../../../flutter/LICENSE +ORIGIN: ../../../flutter/impeller/renderer/threadgroup_sizing_test.comp + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/vertex_buffer_builder.cc + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/vertex_buffer_builder.h + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/vertex_descriptor.cc + ../../../flutter/LICENSE @@ -4272,6 +4273,7 @@ FILE: ../../../flutter/impeller/renderer/snapshot.h FILE: ../../../flutter/impeller/renderer/stroke.comp FILE: ../../../flutter/impeller/renderer/surface.cc FILE: ../../../flutter/impeller/renderer/surface.h +FILE: ../../../flutter/impeller/renderer/threadgroup_sizing_test.comp FILE: ../../../flutter/impeller/renderer/vertex_buffer_builder.cc FILE: ../../../flutter/impeller/renderer/vertex_buffer_builder.h FILE: ../../../flutter/impeller/renderer/vertex_descriptor.cc diff --git a/impeller/renderer/BUILD.gn b/impeller/renderer/BUILD.gn index ec516cd56829c..b43c71a3e34b0 100644 --- a/impeller/renderer/BUILD.gn +++ b/impeller/renderer/BUILD.gn @@ -23,6 +23,7 @@ if (impeller_enable_compute) { "stroke.comp", "path_polyline.comp", "prefix_sum_test.comp", + "threadgroup_sizing_test.comp", ] } diff --git a/impeller/renderer/backend/metal/compute_pass_mtl.mm b/impeller/renderer/backend/metal/compute_pass_mtl.mm index 864b7d7d0de0e..5cdf4a5c3d9fd 100644 --- a/impeller/renderer/backend/metal/compute_pass_mtl.mm +++ b/impeller/renderer/backend/metal/compute_pass_mtl.mm @@ -258,8 +258,10 @@ static bool Bind(ComputePassBindingsCache& pass, // Special case for linear processing. if (height == 1) { - int64_t threadGroups = - std::max(width / maxTotalThreadsPerThreadgroup, 1LL); + int64_t threadGroups = std::max( + static_cast( + std::ceil(width * 1.0 / maxTotalThreadsPerThreadgroup * 1.0)), + 1LL); [encoder dispatchThreadgroups:MTLSizeMake(threadGroups, 1, 1) threadsPerThreadgroup:MTLSizeMake(maxTotalThreadsPerThreadgroup, 1, 1)]; diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc index bb1651c83d332..cb1074f32f9d5 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.cc +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.cc @@ -252,14 +252,22 @@ bool ComputePassVK::OnEncodeCommands(const Context& context, int64_t width = grid_size.width; int64_t height = grid_size.height; - while (width > max_wg_size[0]) { - width = std::max(static_cast(1), width / 2); + // Special case for linear processing. + if (height == 1) { + int64_t minimum = 1; + int64_t threadGroups = std::max( + static_cast(std::ceil(width * 1.0 / max_wg_size[0] * 1.0)), + minimum); + cmd_buffer.dispatch(threadGroups, 1, 1); + } else { + while (width > max_wg_size[0]) { + width = std::max(static_cast(1), width / 2); + } + while (height > max_wg_size[1]) { + height = std::max(static_cast(1), height / 2); + } + cmd_buffer.dispatch(width, height, 1); } - while (height > max_wg_size[1]) { - height = std::max(static_cast(1), height / 2); - } - - cmd_buffer.dispatch(width, height, 1); } } diff --git a/impeller/renderer/backend/vulkan/context_vk.h b/impeller/renderer/backend/vulkan/context_vk.h index 53f51757903d3..4b691c8366ec4 100644 --- a/impeller/renderer/backend/vulkan/context_vk.h +++ b/impeller/renderer/backend/vulkan/context_vk.h @@ -136,6 +136,11 @@ class ContextVK final : public Context, struct DeviceHolderImpl : public DeviceHolder { // |DeviceHolder| const vk::Device& GetDevice() const override { return device.get(); } + // |DeviceHolder| + const vk::PhysicalDevice& GetPhysicalDevice() const override { + return physical_device; + } + vk::UniqueInstance instance; vk::PhysicalDevice physical_device; vk::UniqueDevice device; diff --git a/impeller/renderer/backend/vulkan/device_holder.h b/impeller/renderer/backend/vulkan/device_holder.h index cb9fdee248584..9086666ba13ed 100644 --- a/impeller/renderer/backend/vulkan/device_holder.h +++ b/impeller/renderer/backend/vulkan/device_holder.h @@ -12,6 +12,7 @@ class DeviceHolder { public: virtual ~DeviceHolder() = default; virtual const vk::Device& GetDevice() const = 0; + virtual const vk::PhysicalDevice& GetPhysicalDevice() const = 0; }; } // namespace impeller diff --git a/impeller/renderer/backend/vulkan/pipeline_library_vk.cc b/impeller/renderer/backend/vulkan/pipeline_library_vk.cc index c2e26c74bf254..201f88e32a3ab 100644 --- a/impeller/renderer/backend/vulkan/pipeline_library_vk.cc +++ b/impeller/renderer/backend/vulkan/pipeline_library_vk.cc @@ -357,17 +357,35 @@ std::unique_ptr PipelineLibraryVK::CreateComputePipeline( return nullptr; } + std::shared_ptr strong_device = device_holder_.lock(); + if (!strong_device) { + return nullptr; + } + auto device_properties = strong_device->GetPhysicalDevice().getProperties(); + auto max_wg_size = device_properties.limits.maxComputeWorkGroupSize; + + // Give all compute shaders a specialization constant entry for the + // workgroup/threadgroup size. + vk::SpecializationMapEntry specialization_map_entry[1]; + + uint32_t workgroup_size_x = max_wg_size[0]; + specialization_map_entry[0].constantID = 0; + specialization_map_entry[0].offset = 0; + specialization_map_entry[0].size = sizeof(uint32_t); + + vk::SpecializationInfo specialization_info; + specialization_info.mapEntryCount = 1; + specialization_info.pMapEntries = &specialization_map_entry[0]; + specialization_info.dataSize = sizeof(uint32_t); + specialization_info.pData = &workgroup_size_x; + vk::PipelineShaderStageCreateInfo info; info.setStage(vk::ShaderStageFlagBits::eCompute); info.setPName("main"); info.setModule(ShaderFunctionVK::Cast(entrypoint.get())->GetModule()); + info.setPSpecializationInfo(&specialization_info); pipeline_info.setStage(info); - std::shared_ptr strong_device = device_holder_.lock(); - if (!strong_device) { - return nullptr; - } - //---------------------------------------------------------------------------- /// Pipeline Layout a.k.a the descriptor sets and uniforms. /// diff --git a/impeller/renderer/compute_pipeline_builder.h b/impeller/renderer/compute_pipeline_builder.h index f4fe1b8b9763f..7a7d34cbcedcf 100644 --- a/impeller/renderer/compute_pipeline_builder.h +++ b/impeller/renderer/compute_pipeline_builder.h @@ -45,9 +45,8 @@ struct ComputePipelineBuilder { ComputePipelineDescriptor desc; if (InitializePipelineDescriptorDefaults(context, desc)) { return {std::move(desc)}; - } else { - return std::nullopt; } + return std::nullopt; } [[nodiscard]] static bool InitializePipelineDescriptorDefaults( diff --git a/impeller/renderer/compute_unittests.cc b/impeller/renderer/compute_unittests.cc index 2ebb92c96563b..67517a4ebaafa 100644 --- a/impeller/renderer/compute_unittests.cc +++ b/impeller/renderer/compute_unittests.cc @@ -19,6 +19,7 @@ #include "impeller/renderer/compute_pipeline_builder.h" #include "impeller/renderer/pipeline_library.h" #include "impeller/renderer/prefix_sum_test.comp.h" +#include "impeller/renderer/threadgroup_sizing_test.comp.h" namespace impeller { namespace testing { @@ -176,6 +177,59 @@ TEST_P(ComputeTest, CanComputePrefixSum) { latch.Wait(); } +TEST_P(ComputeTest, 1DThreadgroupSizingIsCorrect) { + using CS = ThreadgroupSizingTestComputeShader; + auto context = GetContext(); + ASSERT_TRUE(context); + ASSERT_TRUE(context->GetCapabilities()->SupportsCompute()); + + using SamplePipelineBuilder = ComputePipelineBuilder; + auto pipeline_desc = + SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context); + ASSERT_TRUE(pipeline_desc.has_value()); + auto compute_pipeline = + context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get(); + ASSERT_TRUE(compute_pipeline); + + auto cmd_buffer = context->CreateCommandBuffer(); + auto pass = cmd_buffer->CreateComputePass(); + ASSERT_TRUE(pass && pass->IsValid()); + + static constexpr size_t kCount = 2048; + + pass->SetGridSize(ISize(kCount, 1)); + pass->SetThreadGroupSize(ISize(kCount, 1)); + + ComputeCommand cmd; + cmd.label = "Compute"; + cmd.pipeline = compute_pipeline; + + auto output_buffer = CreateHostVisibleDeviceBuffer>( + context, "Output Buffer"); + + CS::BindOutputData(cmd, output_buffer->AsBufferView()); + + ASSERT_TRUE(pass->AddCommand(std::move(cmd))); + ASSERT_TRUE(pass->EncodeCommands()); + + fml::AutoResetWaitableEvent latch; + ASSERT_TRUE(cmd_buffer->SubmitCommands( + [&latch, output_buffer](CommandBuffer::Status status) { + EXPECT_EQ(status, CommandBuffer::Status::kCompleted); + + auto view = output_buffer->AsBufferView(); + EXPECT_EQ(view.range.length, sizeof(CS::OutputData)); + + CS::OutputData* output = + reinterpret_cast*>(view.contents); + EXPECT_TRUE(output); + EXPECT_EQ(output->data[kCount - 1], kCount - 1); + latch.Signal(); + })); + + latch.Wait(); +} + TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) { using CS = PrefixSumTestComputeShader; diff --git a/impeller/renderer/prefix_sum_test.comp b/impeller/renderer/prefix_sum_test.comp index 0f8bff231ffa5..7cc940fb1251e 100644 --- a/impeller/renderer/prefix_sum_test.comp +++ b/impeller/renderer/prefix_sum_test.comp @@ -2,9 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// TODO(dnfield): This should not need to be so small, -// https://github.com/flutter/flutter/issues/119357 -layout(local_size_x = 256, local_size_y = 1) in; +layout(local_size_x_id = 0) in; layout(std430) buffer; #include diff --git a/impeller/renderer/threadgroup_sizing_test.comp b/impeller/renderer/threadgroup_sizing_test.comp new file mode 100644 index 0000000000000..3d2e02af507ca --- /dev/null +++ b/impeller/renderer/threadgroup_sizing_test.comp @@ -0,0 +1,18 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Size is passed in via specialization constant. +layout(local_size_x_id = 0) in; + +layout(std430) buffer; + +layout(binding = 1) writeonly buffer OutputData { + uint data[]; +} +output_data; + +void main() { + uint ident = gl_GlobalInvocationID.x; + output_data.data[ident] = ident; +} diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 68afc86f992ed..e99ada0edc0a8 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -13569,9 +13569,9 @@ "load_store" ], "longest_path_cycles": [ - 2.65625, + 2.450000047683716, 0.0, - 2.65625, + 2.450000047683716, 1.0, 72.0, 0.0 @@ -13589,9 +13589,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.9375, + 0.762499988079071, 0.0, - 0.9375, + 0.762499988079071, 0.0, 0.0, 0.0 @@ -13600,9 +13600,9 @@ "load_store" ], "total_cycles": [ - 2.65625, + 2.46875, 0.0, - 2.65625, + 2.46875, 1.0, 72.0, 0.0 @@ -13612,7 +13612,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 17 + "work_registers_used": 18 } } } @@ -13680,6 +13680,68 @@ } } }, + "flutter/impeller/renderer/threadgroup_sizing_test.comp.vkspv": { + "Mali-G78": { + "core": "Mali-G78", + "filename": "flutter/impeller/renderer/threadgroup_sizing_test.comp.vkspv", + "has_uniform_computation": true, + "type": "Compute", + "variants": { + "Main": { + "fp16_arithmetic": null, + "has_stack_spilling": false, + "performance": { + "longest_path_bound_pipelines": [ + "load_store" + ], + "longest_path_cycles": [ + 0.03125, + 0.0, + 0.03125, + 0.0, + 1.0, + 0.0 + ], + "pipelines": [ + "arith_total", + "arith_fma", + "arith_cvt", + "arith_sfu", + "load_store", + "texture" + ], + "shortest_path_bound_pipelines": [ + "load_store" + ], + "shortest_path_cycles": [ + 0.03125, + 0.0, + 0.03125, + 0.0, + 1.0, + 0.0 + ], + "total_bound_pipelines": [ + "load_store" + ], + "total_cycles": [ + 0.03125, + 0.0, + 0.03125, + 0.0, + 1.0, + 0.0 + ] + }, + "shared_storage_used": 0, + "stack_spill_bytes": 0, + "thread_occupancy": 100, + "uniform_registers_used": 2, + "work_registers_used": 4 + } + } + } + }, "flutter/impeller/scene/shaders/gles/skinned.vert.gles": { "Mali-G78": { "core": "Mali-G78",