diff --git a/include/luisa/luisa-compute.h b/include/luisa/luisa-compute.h index 72ca8712a..879be3258 100644 --- a/include/luisa/luisa-compute.h +++ b/include/luisa/luisa-compute.h @@ -236,6 +236,7 @@ #include #include #include +#include #include #include #include diff --git a/include/luisa/xir/builder.h b/include/luisa/xir/builder.h index 3db3d918e..bb806f246 100644 --- a/include/luisa/xir/builder.h +++ b/include/luisa/xir/builder.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/include/luisa/xir/instructions/intrinsic.h b/include/luisa/xir/instructions/intrinsic.h index 9c204d10a..b998fbe3b 100644 --- a/include/luisa/xir/instructions/intrinsic.h +++ b/include/luisa/xir/instructions/intrinsic.h @@ -41,17 +41,6 @@ enum struct IntrinsicOp { BINARY_EQUAL, BINARY_NOT_EQUAL, - // thread coordination - THREAD_ID, - BLOCK_ID, - WARP_LANE_ID, - DISPATCH_ID, - KERNEL_ID, - OBJECT_ID, - BLOCK_SIZE, - WARP_SIZE, - DISPATCH_SIZE, - // block synchronization SYNCHRONIZE_BLOCK,// () diff --git a/include/luisa/xir/special_register.h b/include/luisa/xir/special_register.h new file mode 100644 index 000000000..8496a5ba9 --- /dev/null +++ b/include/luisa/xir/special_register.h @@ -0,0 +1,87 @@ +#pragma once + +#include + +namespace luisa::compute::xir { + +enum struct DerivedSpecialRegisterTag { + THREAD_ID, + BLOCK_ID, + WARP_LANE_ID, + DISPATCH_ID, + KERNEL_ID, + OBJECT_ID, + BLOCK_SIZE, + WARP_SIZE, + DISPATCH_SIZE, +}; + +[[nodiscard]] constexpr luisa::string_view to_string(DerivedSpecialRegisterTag tag) noexcept { + using namespace std::string_view_literals; + switch (tag) { + case DerivedSpecialRegisterTag::THREAD_ID: return "thread_id"sv; + case DerivedSpecialRegisterTag::BLOCK_ID: return "block_id"sv; + case DerivedSpecialRegisterTag::WARP_LANE_ID: return "warp_lane_id"sv; + case DerivedSpecialRegisterTag::DISPATCH_ID: return "dispatch_id"sv; + case DerivedSpecialRegisterTag::KERNEL_ID: return "kernel_id"sv; + case DerivedSpecialRegisterTag::OBJECT_ID: return "object_id"sv; + case DerivedSpecialRegisterTag::BLOCK_SIZE: return "block_size"sv; + case DerivedSpecialRegisterTag::WARP_SIZE: return "warp_size"sv; + case DerivedSpecialRegisterTag::DISPATCH_SIZE: return "dispatch_size"sv; + } + return "unknown"sv; +} + +class LC_XIR_API SpecialRegister : public DerivedValue { +public: + explicit SpecialRegister(const Type *type) noexcept : DerivedValue{type} {} + [[nodiscard]] virtual DerivedSpecialRegisterTag derived_special_register_tag() const noexcept = 0; + [[nodiscard]] static SpecialRegister *create(DerivedSpecialRegisterTag tag) noexcept; +}; + +namespace detail { + +[[nodiscard]] LC_XIR_API const Type *special_register_type_uint() noexcept; +[[nodiscard]] LC_XIR_API const Type *special_register_type_uint3() noexcept; + +template +[[nodiscard]] auto get_special_register_type() noexcept { + if constexpr (std::is_same_v) { + return special_register_type_uint(); + } else if constexpr (std::is_same_v) { + return special_register_type_uint3(); + } else { + static_assert(always_false_v, "Unsupported special register type."); + } +} + +}// namespace detail + +template +class DerivedSpecialRegister : public SpecialRegister { +public: + DerivedSpecialRegister() noexcept : SpecialRegister{detail::get_special_register_type()} {} + [[nodiscard]] static constexpr auto + static_derived_special_register_tag() noexcept { return tag; } + [[nodiscard]] DerivedSpecialRegisterTag + derived_special_register_tag() const noexcept final { + return static_derived_special_register_tag(); + } + [[nodiscard]] static auto create() noexcept { + return static_cast(SpecialRegister::create(tag)); + } +}; + +// special registers +// note that we add the `SPR` prefix to avoid potential name conflicts with macros +using SPR_ThreadID = DerivedSpecialRegister; +using SPR_BlockID = DerivedSpecialRegister; +using SPR_WarpLaneID = DerivedSpecialRegister; +using SPR_DispatchID = DerivedSpecialRegister; +using SPR_KernelID = DerivedSpecialRegister; +using SPR_ObjectID = DerivedSpecialRegister; +using SPR_BlockSize = DerivedSpecialRegister; +using SPR_WarpSize = DerivedSpecialRegister; +using SPR_DispatchSize = DerivedSpecialRegister; + +}// namespace luisa::compute::xir diff --git a/include/luisa/xir/value.h b/include/luisa/xir/value.h index 557d199b6..fae0ec66c 100644 --- a/include/luisa/xir/value.h +++ b/include/luisa/xir/value.h @@ -15,6 +15,7 @@ enum struct DerivedValueTag { INSTRUCTION, CONSTANT, ARGUMENT, + SPECIAL_REGISTER, }; class LC_XIR_API Value : public PooledObject, diff --git a/src/backends/fallback/fallback_codegen.cpp b/src/backends/fallback/fallback_codegen.cpp index ec5b6e713..e197a2075 100644 --- a/src/backends/fallback/fallback_codegen.cpp +++ b/src/backends/fallback/fallback_codegen.cpp @@ -324,6 +324,22 @@ class FallbackCodegen { } } + [[nodiscard]] llvm::Value *_translate_special_register(CurrentFunction ¤t, IRBuilder &b, + const xir::SpecialRegister *sreg) noexcept { + switch (sreg->derived_special_register_tag()) { + case xir::DerivedSpecialRegisterTag::THREAD_ID: return current.builtin_variables[CurrentFunction::builtin_variable_index_thread_id]; + case xir::DerivedSpecialRegisterTag::BLOCK_ID: return current.builtin_variables[CurrentFunction::builtin_variable_index_block_id]; + case xir::DerivedSpecialRegisterTag::WARP_LANE_ID: return llvm::ConstantInt::get(b.getInt32Ty(), 0);// CPU only has one lane + case xir::DerivedSpecialRegisterTag::DISPATCH_ID: return current.builtin_variables[CurrentFunction::builtin_variable_index_dispatch_id]; + case xir::DerivedSpecialRegisterTag::KERNEL_ID: LUISA_NOT_IMPLEMENTED(); + case xir::DerivedSpecialRegisterTag::OBJECT_ID: LUISA_NOT_IMPLEMENTED(); + case xir::DerivedSpecialRegisterTag::BLOCK_SIZE: return current.builtin_variables[CurrentFunction::builtin_variable_index_block_size]; + case xir::DerivedSpecialRegisterTag::WARP_SIZE: return llvm::ConstantInt::get(b.getInt32Ty(), 1);// CPU only has one lane + case xir::DerivedSpecialRegisterTag::DISPATCH_SIZE: return current.builtin_variables[CurrentFunction::builtin_variable_index_dispatch_size]; + } + LUISA_ERROR_WITH_LOCATION("Invalid special register."); + } + [[nodiscard]] llvm::Value *_lookup_value(CurrentFunction ¤t, IRBuilder &b, const xir::Value *v, bool load_global = true) noexcept { LUISA_ASSERT(v != nullptr, "Value is null."); switch (v->derived_value_tag()) { @@ -348,6 +364,10 @@ class FallbackCodegen { LUISA_ASSERT(iter != current.value_map.end(), "Value not found."); return iter->second; } + case xir::DerivedValueTag::SPECIAL_REGISTER: { + auto sreg = static_cast(v); + return _translate_special_register(current, b, sreg); + } } LUISA_ERROR_WITH_LOCATION("Invalid value."); } @@ -1959,15 +1979,6 @@ class FallbackCodegen { case xir::IntrinsicOp::BINARY_GREATER_EQUAL: return _translate_binary_greater_equal(current, b, inst->operand(0u), inst->operand(1u)); case xir::IntrinsicOp::BINARY_EQUAL: return _translate_binary_equal(current, b, inst->operand(0u), inst->operand(1u)); case xir::IntrinsicOp::BINARY_NOT_EQUAL: return _translate_binary_not_equal(current, b, inst->operand(0u), inst->operand(1u)); - case xir::IntrinsicOp::THREAD_ID: return current.builtin_variables[CurrentFunction::builtin_variable_index_thread_id]; - case xir::IntrinsicOp::BLOCK_ID: return current.builtin_variables[CurrentFunction::builtin_variable_index_block_id]; - case xir::IntrinsicOp::WARP_LANE_ID: return llvm::ConstantInt::get(b.getInt32Ty(), 0);// CPU only has one lane - case xir::IntrinsicOp::DISPATCH_ID: return current.builtin_variables[CurrentFunction::builtin_variable_index_dispatch_id]; - case xir::IntrinsicOp::KERNEL_ID: LUISA_NOT_IMPLEMENTED(); - case xir::IntrinsicOp::OBJECT_ID: LUISA_NOT_IMPLEMENTED(); - case xir::IntrinsicOp::BLOCK_SIZE: return current.builtin_variables[CurrentFunction::builtin_variable_index_block_size]; - case xir::IntrinsicOp::WARP_SIZE: return llvm::ConstantInt::get(b.getInt32Ty(), 1);// CPU only has one lane - case xir::IntrinsicOp::DISPATCH_SIZE: return current.builtin_variables[CurrentFunction::builtin_variable_index_dispatch_size]; case xir::IntrinsicOp::SYNCHRONIZE_BLOCK: LUISA_NOT_IMPLEMENTED(); case xir::IntrinsicOp::ALL: { auto llvm_operand = _lookup_value(current, b, inst->operand(0u)); diff --git a/src/tests/test_xir_builder.cpp b/src/tests/test_xir_builder.cpp index 926fec1b9..09399ec78 100644 --- a/src/tests/test_xir_builder.cpp +++ b/src/tests/test_xir_builder.cpp @@ -31,7 +31,7 @@ int main() { b.set_insertion_point(f->create_body_block()); auto add = b.call(Type::of(), xir::IntrinsicOp::BINARY_MUL, {x, y}); auto mul = b.call(Type::of(), xir::IntrinsicOp::BINARY_ADD, {add, y}); - auto coord = b.call(Type::of(), xir::IntrinsicOp::DISPATCH_ID, {}); + auto coord = xir::SPR_DispatchID::create(); auto coord_x = b.call(Type::of(), xir::IntrinsicOp::EXTRACT, {coord, u32_zero}); auto outline = b.outline(); auto outline_body = outline->create_target_block(); diff --git a/src/xir/CMakeLists.txt b/src/xir/CMakeLists.txt index 1fd6c7ed1..8a3fb691f 100644 --- a/src/xir/CMakeLists.txt +++ b/src/xir/CMakeLists.txt @@ -9,6 +9,7 @@ set(LUISA_COMPUTE_XIR_SOURCES metadata.cpp module.cpp pool.cpp + special_register.cpp use.cpp user.cpp value.cpp diff --git a/src/xir/instructions/intrinsic_name_map.inl.h b/src/xir/instructions/intrinsic_name_map.inl.h index 2f47e7ac6..ade9b758f 100644 --- a/src/xir/instructions/intrinsic_name_map.inl.h +++ b/src/xir/instructions/intrinsic_name_map.inl.h @@ -1,480 +1,462 @@ -#pragma once - -luisa::string to_string(IntrinsicOp op) noexcept { - switch (op) { - case IntrinsicOp::NOP: return "nop"; - case IntrinsicOp::UNARY_PLUS: return "unary_plus"; - case IntrinsicOp::UNARY_MINUS: return "unary_minus"; - case IntrinsicOp::UNARY_LOGIC_NOT: return "unary_logic_not"; - case IntrinsicOp::UNARY_BIT_NOT: return "unary_bit_not"; - case IntrinsicOp::BINARY_ADD: return "binary_add"; - case IntrinsicOp::BINARY_SUB: return "binary_sub"; - case IntrinsicOp::BINARY_MUL: return "binary_mul"; - case IntrinsicOp::BINARY_DIV: return "binary_div"; - case IntrinsicOp::BINARY_MOD: return "binary_mod"; - case IntrinsicOp::BINARY_LOGIC_AND: return "binary_logic_and"; - case IntrinsicOp::BINARY_LOGIC_OR: return "binary_logic_or"; - case IntrinsicOp::BINARY_BIT_AND: return "binary_bit_and"; - case IntrinsicOp::BINARY_BIT_OR: return "binary_bit_or"; - case IntrinsicOp::BINARY_BIT_XOR: return "binary_bit_xor"; - case IntrinsicOp::BINARY_SHIFT_LEFT: return "binary_shift_left"; - case IntrinsicOp::BINARY_SHIFT_RIGHT: return "binary_shift_right"; - case IntrinsicOp::BINARY_ROTATE_LEFT: return "binary_rotate_left"; - case IntrinsicOp::BINARY_ROTATE_RIGHT: return "binary_rotate_right"; - case IntrinsicOp::BINARY_LESS: return "binary_less"; - case IntrinsicOp::BINARY_GREATER: return "binary_greater"; - case IntrinsicOp::BINARY_LESS_EQUAL: return "binary_less_equal"; - case IntrinsicOp::BINARY_GREATER_EQUAL: return "binary_greater_equal"; - case IntrinsicOp::BINARY_EQUAL: return "binary_equal"; - case IntrinsicOp::BINARY_NOT_EQUAL: return "binary_not_equal"; - case IntrinsicOp::THREAD_ID: return "thread_id"; - case IntrinsicOp::BLOCK_ID: return "block_id"; - case IntrinsicOp::WARP_LANE_ID: return "warp_lane_id"; - case IntrinsicOp::DISPATCH_ID: return "dispatch_id"; - case IntrinsicOp::KERNEL_ID: return "kernel_id"; - case IntrinsicOp::OBJECT_ID: return "object_id"; - case IntrinsicOp::BLOCK_SIZE: return "block_size"; - case IntrinsicOp::WARP_SIZE: return "warp_size"; - case IntrinsicOp::DISPATCH_SIZE: return "dispatch_size"; - case IntrinsicOp::SYNCHRONIZE_BLOCK: return "synchronize_block"; - case IntrinsicOp::ALL: return "all"; - case IntrinsicOp::ANY: return "any"; - case IntrinsicOp::SELECT: return "select"; - case IntrinsicOp::CLAMP: return "clamp"; - case IntrinsicOp::SATURATE: return "saturate"; - case IntrinsicOp::LERP: return "lerp"; - case IntrinsicOp::SMOOTHSTEP: return "smoothstep"; - case IntrinsicOp::STEP: return "step"; - case IntrinsicOp::ABS: return "abs"; - case IntrinsicOp::MIN: return "min"; - case IntrinsicOp::MAX: return "max"; - case IntrinsicOp::CLZ: return "clz"; - case IntrinsicOp::CTZ: return "ctz"; - case IntrinsicOp::POPCOUNT: return "popcount"; - case IntrinsicOp::REVERSE: return "reverse"; - case IntrinsicOp::ISINF: return "isinf"; - case IntrinsicOp::ISNAN: return "isnan"; - case IntrinsicOp::ACOS: return "acos"; - case IntrinsicOp::ACOSH: return "acosh"; - case IntrinsicOp::ASIN: return "asin"; - case IntrinsicOp::ASINH: return "asinh"; - case IntrinsicOp::ATAN: return "atan"; - case IntrinsicOp::ATAN2: return "atan2"; - case IntrinsicOp::ATANH: return "atanh"; - case IntrinsicOp::COS: return "cos"; - case IntrinsicOp::COSH: return "cosh"; - case IntrinsicOp::SIN: return "sin"; - case IntrinsicOp::SINH: return "sinh"; - case IntrinsicOp::TAN: return "tan"; - case IntrinsicOp::TANH: return "tanh"; - case IntrinsicOp::EXP: return "exp"; - case IntrinsicOp::EXP2: return "exp2"; - case IntrinsicOp::EXP10: return "exp10"; - case IntrinsicOp::LOG: return "log"; - case IntrinsicOp::LOG2: return "log2"; - case IntrinsicOp::LOG10: return "log10"; - case IntrinsicOp::POW: return "pow"; - case IntrinsicOp::POW_INT: return "pow_int"; - case IntrinsicOp::SQRT: return "sqrt"; - case IntrinsicOp::RSQRT: return "rsqrt"; - case IntrinsicOp::CEIL: return "ceil"; - case IntrinsicOp::FLOOR: return "floor"; - case IntrinsicOp::FRACT: return "fract"; - case IntrinsicOp::TRUNC: return "trunc"; - case IntrinsicOp::ROUND: return "round"; - case IntrinsicOp::RINT: return "rint"; - case IntrinsicOp::FMA: return "fma"; - case IntrinsicOp::COPYSIGN: return "copysign"; - case IntrinsicOp::CROSS: return "cross"; - case IntrinsicOp::DOT: return "dot"; - case IntrinsicOp::LENGTH: return "length"; - case IntrinsicOp::LENGTH_SQUARED: return "length_squared"; - case IntrinsicOp::NORMALIZE: return "normalize"; - case IntrinsicOp::FACEFORWARD: return "faceforward"; - case IntrinsicOp::REFLECT: return "reflect"; - case IntrinsicOp::REDUCE_SUM: return "reduce_sum"; - case IntrinsicOp::REDUCE_PRODUCT: return "reduce_product"; - case IntrinsicOp::REDUCE_MIN: return "reduce_min"; - case IntrinsicOp::REDUCE_MAX: return "reduce_max"; - case IntrinsicOp::OUTER_PRODUCT: return "outer_product"; - case IntrinsicOp::MATRIX_COMP_NEG: return "matrix_comp_neg"; - case IntrinsicOp::MATRIX_COMP_ADD: return "matrix_comp_add"; - case IntrinsicOp::MATRIX_COMP_SUB: return "matrix_comp_sub"; - case IntrinsicOp::MATRIX_COMP_MUL: return "matrix_comp_mul"; - case IntrinsicOp::MATRIX_COMP_DIV: return "matrix_comp_div"; - case IntrinsicOp::MATRIX_LINALG_MUL: return "matrix_linalg_mul"; - case IntrinsicOp::MATRIX_DETERMINANT: return "matrix_determinant"; - case IntrinsicOp::MATRIX_TRANSPOSE: return "matrix_transpose"; - case IntrinsicOp::MATRIX_INVERSE: return "matrix_inverse"; - case IntrinsicOp::ATOMIC_EXCHANGE: return "atomic_exchange"; - case IntrinsicOp::ATOMIC_COMPARE_EXCHANGE: return "atomic_compare_exchange"; - case IntrinsicOp::ATOMIC_FETCH_ADD: return "atomic_fetch_add"; - case IntrinsicOp::ATOMIC_FETCH_SUB: return "atomic_fetch_sub"; - case IntrinsicOp::ATOMIC_FETCH_AND: return "atomic_fetch_and"; - case IntrinsicOp::ATOMIC_FETCH_OR: return "atomic_fetch_or"; - case IntrinsicOp::ATOMIC_FETCH_XOR: return "atomic_fetch_xor"; - case IntrinsicOp::ATOMIC_FETCH_MIN: return "atomic_fetch_min"; - case IntrinsicOp::ATOMIC_FETCH_MAX: return "atomic_fetch_max"; - case IntrinsicOp::BUFFER_READ: return "buffer_read"; - case IntrinsicOp::BUFFER_WRITE: return "buffer_write"; - case IntrinsicOp::BUFFER_SIZE: return "buffer_size"; - case IntrinsicOp::BYTE_BUFFER_READ: return "byte_buffer_read"; - case IntrinsicOp::BYTE_BUFFER_WRITE: return "byte_buffer_write"; - case IntrinsicOp::BYTE_BUFFER_SIZE: return "byte_buffer_size"; - case IntrinsicOp::TEXTURE2D_READ: return "texture2d_read"; - case IntrinsicOp::TEXTURE2D_WRITE: return "texture2d_write"; - case IntrinsicOp::TEXTURE2D_SIZE: return "texture2d_size"; - case IntrinsicOp::TEXTURE2D_SAMPLE: return "texture2d_sample"; - case IntrinsicOp::TEXTURE2D_SAMPLE_LEVEL: return "texture2d_sample_level"; - case IntrinsicOp::TEXTURE2D_SAMPLE_GRAD: return "texture2d_sample_grad"; - case IntrinsicOp::TEXTURE2D_SAMPLE_GRAD_LEVEL: return "texture2d_sample_grad_level"; - case IntrinsicOp::TEXTURE3D_READ: return "texture3d_read"; - case IntrinsicOp::TEXTURE3D_WRITE: return "texture3d_write"; - case IntrinsicOp::TEXTURE3D_SIZE: return "texture3d_size"; - case IntrinsicOp::TEXTURE3D_SAMPLE: return "texture3d_sample"; - case IntrinsicOp::TEXTURE3D_SAMPLE_LEVEL: return "texture3d_sample_level"; - case IntrinsicOp::TEXTURE3D_SAMPLE_GRAD: return "texture3d_sample_grad"; - case IntrinsicOp::TEXTURE3D_SAMPLE_GRAD_LEVEL: return "texture3d_sample_grad_level"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE: return "bindless_texture2d_sample"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL: return "bindless_texture2d_sample_level"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD: return "bindless_texture2d_sample_grad"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL: return "bindless_texture2d_sample_grad_level"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE: return "bindless_texture3d_sample"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL: return "bindless_texture3d_sample_level"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD: return "bindless_texture3d_sample_grad"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL: return "bindless_texture3d_sample_grad_level"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_SAMPLER: return "bindless_texture2d_sample_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL_SAMPLER: return "bindless_texture2d_sample_level_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_SAMPLER: return "bindless_texture2d_sample_grad_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL_SAMPLER: return "bindless_texture2d_sample_grad_level_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_SAMPLER: return "bindless_texture3d_sample_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL_SAMPLER: return "bindless_texture3d_sample_level_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_SAMPLER: return "bindless_texture3d_sample_grad_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL_SAMPLER: return "bindless_texture3d_sample_grad_level_sampler"; - case IntrinsicOp::BINDLESS_TEXTURE2D_READ: return "bindless_texture2d_read"; - case IntrinsicOp::BINDLESS_TEXTURE3D_READ: return "bindless_texture3d_read"; - case IntrinsicOp::BINDLESS_TEXTURE2D_READ_LEVEL: return "bindless_texture2d_read_level"; - case IntrinsicOp::BINDLESS_TEXTURE3D_READ_LEVEL: return "bindless_texture3d_read_level"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SIZE: return "bindless_texture2d_size"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SIZE: return "bindless_texture3d_size"; - case IntrinsicOp::BINDLESS_TEXTURE2D_SIZE_LEVEL: return "bindless_texture2d_size_level"; - case IntrinsicOp::BINDLESS_TEXTURE3D_SIZE_LEVEL: return "bindless_texture3d_size_level"; - case IntrinsicOp::BINDLESS_BUFFER_READ: return "bindless_buffer_read"; - case IntrinsicOp::BINDLESS_BUFFER_WRITE: return "bindless_buffer_write"; - case IntrinsicOp::BINDLESS_BUFFER_SIZE: return "bindless_buffer_size"; - case IntrinsicOp::BINDLESS_BYTE_BUFFER_READ: return "bindless_byte_buffer_read"; - case IntrinsicOp::BINDLESS_BYTE_BUFFER_WRITE: return "bindless_byte_buffer_write"; - case IntrinsicOp::BINDLESS_BYTE_BUFFER_SIZE: return "bindless_byte_buffer_size"; - case IntrinsicOp::BUFFER_DEVICE_ADDRESS: return "buffer_device_address"; - case IntrinsicOp::BINDLESS_BUFFER_DEVICE_ADDRESS: return "bindless_buffer_device_address"; - case IntrinsicOp::DEVICE_ADDRESS_READ: return "device_address_read"; - case IntrinsicOp::DEVICE_ADDRESS_WRITE: return "device_address_write"; - case IntrinsicOp::AGGREGATE: return "aggregate"; - case IntrinsicOp::SHUFFLE: return "shuffle"; - case IntrinsicOp::INSERT: return "insert"; - case IntrinsicOp::EXTRACT: return "extract"; - case IntrinsicOp::AUTODIFF_REQUIRES_GRADIENT: return "autodiff_requires_gradient"; - case IntrinsicOp::AUTODIFF_GRADIENT: return "autodiff_gradient"; - case IntrinsicOp::AUTODIFF_GRADIENT_MARKER: return "autodiff_gradient_marker"; - case IntrinsicOp::AUTODIFF_ACCUMULATE_GRADIENT: return "autodiff_accumulate_gradient"; - case IntrinsicOp::AUTODIFF_BACKWARD: return "autodiff_backward"; - case IntrinsicOp::AUTODIFF_DETACH: return "autodiff_detach"; - case IntrinsicOp::RAY_TRACING_INSTANCE_TRANSFORM: return "ray_tracing_instance_transform"; - case IntrinsicOp::RAY_TRACING_INSTANCE_USER_ID: return "ray_tracing_instance_user_id"; - case IntrinsicOp::RAY_TRACING_INSTANCE_VISIBILITY_MASK: return "ray_tracing_instance_visibility_mask"; - case IntrinsicOp::RAY_TRACING_SET_INSTANCE_TRANSFORM: return "ray_tracing_set_instance_transform"; - case IntrinsicOp::RAY_TRACING_SET_INSTANCE_VISIBILITY_MASK: return "ray_tracing_set_instance_visibility_mask"; - case IntrinsicOp::RAY_TRACING_SET_INSTANCE_OPACITY: return "ray_tracing_set_instance_opacity"; - case IntrinsicOp::RAY_TRACING_SET_INSTANCE_USER_ID: return "ray_tracing_set_instance_user_id"; - case IntrinsicOp::RAY_TRACING_TRACE_CLOSEST: return "ray_tracing_trace_closest"; - case IntrinsicOp::RAY_TRACING_TRACE_ANY: return "ray_tracing_trace_any"; - case IntrinsicOp::RAY_TRACING_QUERY_ALL: return "ray_tracing_query_all"; - case IntrinsicOp::RAY_TRACING_QUERY_ANY: return "ray_tracing_query_any"; - case IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_MATRIX: return "ray_tracing_instance_motion_matrix"; - case IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_SRT: return "ray_tracing_instance_motion_srt"; - case IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_MATRIX: return "ray_tracing_set_instance_motion_matrix"; - case IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_SRT: return "ray_tracing_set_instance_motion_srt"; - case IntrinsicOp::RAY_TRACING_TRACE_CLOSEST_MOTION_BLUR: return "ray_tracing_trace_closest_motion_blur"; - case IntrinsicOp::RAY_TRACING_TRACE_ANY_MOTION_BLUR: return "ray_tracing_trace_any_motion_blur"; - case IntrinsicOp::RAY_TRACING_QUERY_ALL_MOTION_BLUR: return "ray_tracing_query_all_motion_blur"; - case IntrinsicOp::RAY_TRACING_QUERY_ANY_MOTION_BLUR: return "ray_tracing_query_any_motion_blur"; - case IntrinsicOp::RAY_QUERY_WORLD_SPACE_RAY: return "ray_query_world_space_ray"; - case IntrinsicOp::RAY_QUERY_PROCEDURAL_CANDIDATE_HIT: return "ray_query_procedural_candidate_hit"; - case IntrinsicOp::RAY_QUERY_TRIANGLE_CANDIDATE_HIT: return "ray_query_triangle_candidate_hit"; - case IntrinsicOp::RAY_QUERY_COMMITTED_HIT: return "ray_query_committed_hit"; - case IntrinsicOp::RAY_QUERY_COMMIT_TRIANGLE: return "ray_query_commit_triangle"; - case IntrinsicOp::RAY_QUERY_COMMIT_PROCEDURAL: return "ray_query_commit_procedural"; - case IntrinsicOp::RAY_QUERY_TERMINATE: return "ray_query_terminate"; - case IntrinsicOp::RAY_QUERY_PROCEED: return "ray_query_proceed"; - case IntrinsicOp::RAY_QUERY_IS_TRIANGLE_CANDIDATE: return "ray_query_is_triangle_candidate"; - case IntrinsicOp::RAY_QUERY_IS_PROCEDURAL_CANDIDATE: return "ray_query_is_procedural_candidate"; - case IntrinsicOp::RASTER_DISCARD: return "raster_discard"; - case IntrinsicOp::RASTER_DDX: return "raster_ddx"; - case IntrinsicOp::RASTER_DDY: return "raster_ddy"; - case IntrinsicOp::WARP_IS_FIRST_ACTIVE_LANE: return "warp_is_first_active_lane"; - case IntrinsicOp::WARP_FIRST_ACTIVE_LANE: return "warp_first_active_lane"; - case IntrinsicOp::WARP_ACTIVE_ALL_EQUAL: return "warp_active_all_equal"; - case IntrinsicOp::WARP_ACTIVE_BIT_AND: return "warp_active_bit_and"; - case IntrinsicOp::WARP_ACTIVE_BIT_OR: return "warp_active_bit_or"; - case IntrinsicOp::WARP_ACTIVE_BIT_XOR: return "warp_active_bit_xor"; - case IntrinsicOp::WARP_ACTIVE_COUNT_BITS: return "warp_active_count_bits"; - case IntrinsicOp::WARP_ACTIVE_MAX: return "warp_active_max"; - case IntrinsicOp::WARP_ACTIVE_MIN: return "warp_active_min"; - case IntrinsicOp::WARP_ACTIVE_PRODUCT: return "warp_active_product"; - case IntrinsicOp::WARP_ACTIVE_SUM: return "warp_active_sum"; - case IntrinsicOp::WARP_ACTIVE_ALL: return "warp_active_all"; - case IntrinsicOp::WARP_ACTIVE_ANY: return "warp_active_any"; - case IntrinsicOp::WARP_ACTIVE_BIT_MASK: return "warp_active_bit_mask"; - case IntrinsicOp::WARP_PREFIX_COUNT_BITS: return "warp_prefix_count_bits"; - case IntrinsicOp::WARP_PREFIX_SUM: return "warp_prefix_sum"; - case IntrinsicOp::WARP_PREFIX_PRODUCT: return "warp_prefix_product"; - case IntrinsicOp::WARP_READ_LANE: return "warp_read_lane"; - case IntrinsicOp::WARP_READ_FIRST_ACTIVE_LANE: return "warp_read_first_active_lane"; - case IntrinsicOp::INDIRECT_DISPATCH_SET_KERNEL: return "indirect_dispatch_set_kernel"; - case IntrinsicOp::INDIRECT_DISPATCH_SET_COUNT: return "indirect_dispatch_set_count"; - case IntrinsicOp::SHADER_EXECUTION_REORDER: return "shader_execution_reorder"; - case IntrinsicOp::CLOCK: return "clock"; - } - LUISA_ERROR_WITH_LOCATION("Unknown intrinsic operation: {}.", - static_cast(op)); -} - -IntrinsicOp intrinsic_op_from_string(luisa::string_view name) noexcept { - static const luisa::unordered_map m{ - {"nop", IntrinsicOp::NOP}, - {"unary_plus", IntrinsicOp::UNARY_PLUS}, - {"unary_minus", IntrinsicOp::UNARY_MINUS}, - {"unary_logic_not", IntrinsicOp::UNARY_LOGIC_NOT}, - {"unary_bit_not", IntrinsicOp::UNARY_BIT_NOT}, - {"binary_add", IntrinsicOp::BINARY_ADD}, - {"binary_sub", IntrinsicOp::BINARY_SUB}, - {"binary_mul", IntrinsicOp::BINARY_MUL}, - {"binary_div", IntrinsicOp::BINARY_DIV}, - {"binary_mod", IntrinsicOp::BINARY_MOD}, - {"binary_logic_and", IntrinsicOp::BINARY_LOGIC_AND}, - {"binary_logic_or", IntrinsicOp::BINARY_LOGIC_OR}, - {"binary_bit_and", IntrinsicOp::BINARY_BIT_AND}, - {"binary_bit_or", IntrinsicOp::BINARY_BIT_OR}, - {"binary_bit_xor", IntrinsicOp::BINARY_BIT_XOR}, - {"binary_shift_left", IntrinsicOp::BINARY_SHIFT_LEFT}, - {"binary_shift_right", IntrinsicOp::BINARY_SHIFT_RIGHT}, - {"binary_rotate_left", IntrinsicOp::BINARY_ROTATE_LEFT}, - {"binary_rotate_right", IntrinsicOp::BINARY_ROTATE_RIGHT}, - {"binary_less", IntrinsicOp::BINARY_LESS}, - {"binary_greater", IntrinsicOp::BINARY_GREATER}, - {"binary_less_equal", IntrinsicOp::BINARY_LESS_EQUAL}, - {"binary_greater_equal", IntrinsicOp::BINARY_GREATER_EQUAL}, - {"binary_equal", IntrinsicOp::BINARY_EQUAL}, - {"binary_not_equal", IntrinsicOp::BINARY_NOT_EQUAL}, - {"thread_id", IntrinsicOp::THREAD_ID}, - {"block_id", IntrinsicOp::BLOCK_ID}, - {"warp_lane_id", IntrinsicOp::WARP_LANE_ID}, - {"dispatch_id", IntrinsicOp::DISPATCH_ID}, - {"kernel_id", IntrinsicOp::KERNEL_ID}, - {"object_id", IntrinsicOp::OBJECT_ID}, - {"block_size", IntrinsicOp::BLOCK_SIZE}, - {"warp_size", IntrinsicOp::WARP_SIZE}, - {"dispatch_size", IntrinsicOp::DISPATCH_SIZE}, - {"synchronize_block", IntrinsicOp::SYNCHRONIZE_BLOCK}, - {"all", IntrinsicOp::ALL}, - {"any", IntrinsicOp::ANY}, - {"select", IntrinsicOp::SELECT}, - {"clamp", IntrinsicOp::CLAMP}, - {"saturate", IntrinsicOp::SATURATE}, - {"lerp", IntrinsicOp::LERP}, - {"smoothstep", IntrinsicOp::SMOOTHSTEP}, - {"step", IntrinsicOp::STEP}, - {"abs", IntrinsicOp::ABS}, - {"min", IntrinsicOp::MIN}, - {"max", IntrinsicOp::MAX}, - {"clz", IntrinsicOp::CLZ}, - {"ctz", IntrinsicOp::CTZ}, - {"popcount", IntrinsicOp::POPCOUNT}, - {"reverse", IntrinsicOp::REVERSE}, - {"isinf", IntrinsicOp::ISINF}, - {"isnan", IntrinsicOp::ISNAN}, - {"acos", IntrinsicOp::ACOS}, - {"acosh", IntrinsicOp::ACOSH}, - {"asin", IntrinsicOp::ASIN}, - {"asinh", IntrinsicOp::ASINH}, - {"atan", IntrinsicOp::ATAN}, - {"atan2", IntrinsicOp::ATAN2}, - {"atanh", IntrinsicOp::ATANH}, - {"cos", IntrinsicOp::COS}, - {"cosh", IntrinsicOp::COSH}, - {"sin", IntrinsicOp::SIN}, - {"sinh", IntrinsicOp::SINH}, - {"tan", IntrinsicOp::TAN}, - {"tanh", IntrinsicOp::TANH}, - {"exp", IntrinsicOp::EXP}, - {"exp2", IntrinsicOp::EXP2}, - {"exp10", IntrinsicOp::EXP10}, - {"log", IntrinsicOp::LOG}, - {"log2", IntrinsicOp::LOG2}, - {"log10", IntrinsicOp::LOG10}, - {"pow", IntrinsicOp::POW}, - {"pow_int", IntrinsicOp::POW_INT}, - {"sqrt", IntrinsicOp::SQRT}, - {"rsqrt", IntrinsicOp::RSQRT}, - {"ceil", IntrinsicOp::CEIL}, - {"floor", IntrinsicOp::FLOOR}, - {"fract", IntrinsicOp::FRACT}, - {"trunc", IntrinsicOp::TRUNC}, - {"round", IntrinsicOp::ROUND}, - {"rint", IntrinsicOp::RINT}, - {"fma", IntrinsicOp::FMA}, - {"copysign", IntrinsicOp::COPYSIGN}, - {"cross", IntrinsicOp::CROSS}, - {"dot", IntrinsicOp::DOT}, - {"length", IntrinsicOp::LENGTH}, - {"length_squared", IntrinsicOp::LENGTH_SQUARED}, - {"normalize", IntrinsicOp::NORMALIZE}, - {"faceforward", IntrinsicOp::FACEFORWARD}, - {"reflect", IntrinsicOp::REFLECT}, - {"reduce_sum", IntrinsicOp::REDUCE_SUM}, - {"reduce_product", IntrinsicOp::REDUCE_PRODUCT}, - {"reduce_min", IntrinsicOp::REDUCE_MIN}, - {"reduce_max", IntrinsicOp::REDUCE_MAX}, - {"outer_product", IntrinsicOp::OUTER_PRODUCT}, - {"matrix_comp_neg", IntrinsicOp::MATRIX_COMP_NEG}, - {"matrix_comp_add", IntrinsicOp::MATRIX_COMP_ADD}, - {"matrix_comp_sub", IntrinsicOp::MATRIX_COMP_SUB}, - {"matrix_comp_mul", IntrinsicOp::MATRIX_COMP_MUL}, - {"matrix_comp_div", IntrinsicOp::MATRIX_COMP_DIV}, - {"matrix_linalg_mul", IntrinsicOp::MATRIX_LINALG_MUL}, - {"matrix_determinant", IntrinsicOp::MATRIX_DETERMINANT}, - {"matrix_transpose", IntrinsicOp::MATRIX_TRANSPOSE}, - {"matrix_inverse", IntrinsicOp::MATRIX_INVERSE}, - {"atomic_exchange", IntrinsicOp::ATOMIC_EXCHANGE}, - {"atomic_compare_exchange", IntrinsicOp::ATOMIC_COMPARE_EXCHANGE}, - {"atomic_fetch_add", IntrinsicOp::ATOMIC_FETCH_ADD}, - {"atomic_fetch_sub", IntrinsicOp::ATOMIC_FETCH_SUB}, - {"atomic_fetch_and", IntrinsicOp::ATOMIC_FETCH_AND}, - {"atomic_fetch_or", IntrinsicOp::ATOMIC_FETCH_OR}, - {"atomic_fetch_xor", IntrinsicOp::ATOMIC_FETCH_XOR}, - {"atomic_fetch_min", IntrinsicOp::ATOMIC_FETCH_MIN}, - {"atomic_fetch_max", IntrinsicOp::ATOMIC_FETCH_MAX}, - {"buffer_read", IntrinsicOp::BUFFER_READ}, - {"buffer_write", IntrinsicOp::BUFFER_WRITE}, - {"buffer_size", IntrinsicOp::BUFFER_SIZE}, - {"byte_buffer_read", IntrinsicOp::BYTE_BUFFER_READ}, - {"byte_buffer_write", IntrinsicOp::BYTE_BUFFER_WRITE}, - {"byte_buffer_size", IntrinsicOp::BYTE_BUFFER_SIZE}, - {"texture2d_read", IntrinsicOp::TEXTURE2D_READ}, - {"texture2d_write", IntrinsicOp::TEXTURE2D_WRITE}, - {"texture2d_size", IntrinsicOp::TEXTURE2D_SIZE}, - {"texture2d_sample", IntrinsicOp::TEXTURE2D_SAMPLE}, - {"texture2d_sample_level", IntrinsicOp::TEXTURE2D_SAMPLE_LEVEL}, - {"texture2d_sample_grad", IntrinsicOp::TEXTURE2D_SAMPLE_GRAD}, - {"texture2d_sample_grad_level", IntrinsicOp::TEXTURE2D_SAMPLE_GRAD_LEVEL}, - {"texture3d_read", IntrinsicOp::TEXTURE3D_READ}, - {"texture3d_write", IntrinsicOp::TEXTURE3D_WRITE}, - {"texture3d_size", IntrinsicOp::TEXTURE3D_SIZE}, - {"texture3d_sample", IntrinsicOp::TEXTURE3D_SAMPLE}, - {"texture3d_sample_level", IntrinsicOp::TEXTURE3D_SAMPLE_LEVEL}, - {"texture3d_sample_grad", IntrinsicOp::TEXTURE3D_SAMPLE_GRAD}, - {"texture3d_sample_grad_level", IntrinsicOp::TEXTURE3D_SAMPLE_GRAD_LEVEL}, - {"bindless_texture2d_sample", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE}, - {"bindless_texture2d_sample_level", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL}, - {"bindless_texture2d_sample_grad", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD}, - {"bindless_texture2d_sample_grad_level", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL}, - {"bindless_texture3d_sample", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE}, - {"bindless_texture3d_sample_level", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL}, - {"bindless_texture3d_sample_grad", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD}, - {"bindless_texture3d_sample_grad_level", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL}, - {"bindless_texture2d_sample_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_SAMPLER}, - {"bindless_texture2d_sample_level_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL_SAMPLER}, - {"bindless_texture2d_sample_grad_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_SAMPLER}, - {"bindless_texture2d_sample_grad_level_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL_SAMPLER}, - {"bindless_texture3d_sample_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_SAMPLER}, - {"bindless_texture3d_sample_level_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL_SAMPLER}, - {"bindless_texture3d_sample_grad_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_SAMPLER}, - {"bindless_texture3d_sample_grad_level_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL_SAMPLER}, - {"bindless_texture2d_read", IntrinsicOp::BINDLESS_TEXTURE2D_READ}, - {"bindless_texture3d_read", IntrinsicOp::BINDLESS_TEXTURE3D_READ}, - {"bindless_texture2d_read_level", IntrinsicOp::BINDLESS_TEXTURE2D_READ_LEVEL}, - {"bindless_texture3d_read_level", IntrinsicOp::BINDLESS_TEXTURE3D_READ_LEVEL}, - {"bindless_texture2d_size", IntrinsicOp::BINDLESS_TEXTURE2D_SIZE}, - {"bindless_texture3d_size", IntrinsicOp::BINDLESS_TEXTURE3D_SIZE}, - {"bindless_texture2d_size_level", IntrinsicOp::BINDLESS_TEXTURE2D_SIZE_LEVEL}, - {"bindless_texture3d_size_level", IntrinsicOp::BINDLESS_TEXTURE3D_SIZE_LEVEL}, - {"bindless_buffer_read", IntrinsicOp::BINDLESS_BUFFER_READ}, - {"bindless_buffer_write", IntrinsicOp::BINDLESS_BUFFER_WRITE}, - {"bindless_buffer_size", IntrinsicOp::BINDLESS_BUFFER_SIZE}, - {"bindless_byte_buffer_read", IntrinsicOp::BINDLESS_BYTE_BUFFER_READ}, - {"bindless_byte_buffer_write", IntrinsicOp::BINDLESS_BYTE_BUFFER_WRITE}, - {"bindless_byte_buffer_size", IntrinsicOp::BINDLESS_BYTE_BUFFER_SIZE}, - {"buffer_device_address", IntrinsicOp::BUFFER_DEVICE_ADDRESS}, - {"bindless_buffer_device_address", IntrinsicOp::BINDLESS_BUFFER_DEVICE_ADDRESS}, - {"device_address_read", IntrinsicOp::DEVICE_ADDRESS_READ}, - {"device_address_write", IntrinsicOp::DEVICE_ADDRESS_WRITE}, - {"aggregate", IntrinsicOp::AGGREGATE}, - {"shuffle", IntrinsicOp::SHUFFLE}, - {"insert", IntrinsicOp::INSERT}, - {"extract", IntrinsicOp::EXTRACT}, - {"autodiff_requires_gradient", IntrinsicOp::AUTODIFF_REQUIRES_GRADIENT}, - {"autodiff_gradient", IntrinsicOp::AUTODIFF_GRADIENT}, - {"autodiff_gradient_marker", IntrinsicOp::AUTODIFF_GRADIENT_MARKER}, - {"autodiff_accumulate_gradient", IntrinsicOp::AUTODIFF_ACCUMULATE_GRADIENT}, - {"autodiff_backward", IntrinsicOp::AUTODIFF_BACKWARD}, - {"autodiff_detach", IntrinsicOp::AUTODIFF_DETACH}, - {"ray_tracing_instance_transform", IntrinsicOp::RAY_TRACING_INSTANCE_TRANSFORM}, - {"ray_tracing_instance_user_id", IntrinsicOp::RAY_TRACING_INSTANCE_USER_ID}, - {"ray_tracing_instance_visibility_mask", IntrinsicOp::RAY_TRACING_INSTANCE_VISIBILITY_MASK}, - {"ray_tracing_set_instance_transform", IntrinsicOp::RAY_TRACING_SET_INSTANCE_TRANSFORM}, - {"ray_tracing_set_instance_visibility_mask", IntrinsicOp::RAY_TRACING_SET_INSTANCE_VISIBILITY_MASK}, - {"ray_tracing_set_instance_opacity", IntrinsicOp::RAY_TRACING_SET_INSTANCE_OPACITY}, - {"ray_tracing_set_instance_user_id", IntrinsicOp::RAY_TRACING_SET_INSTANCE_USER_ID}, - {"ray_tracing_trace_closest", IntrinsicOp::RAY_TRACING_TRACE_CLOSEST}, - {"ray_tracing_trace_any", IntrinsicOp::RAY_TRACING_TRACE_ANY}, - {"ray_tracing_query_all", IntrinsicOp::RAY_TRACING_QUERY_ALL}, - {"ray_tracing_query_any", IntrinsicOp::RAY_TRACING_QUERY_ANY}, - {"ray_tracing_instance_motion_matrix", IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_MATRIX}, - {"ray_tracing_instance_motion_srt", IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_SRT}, - {"ray_tracing_set_instance_motion_matrix", IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_MATRIX}, - {"ray_tracing_set_instance_motion_srt", IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_SRT}, - {"ray_tracing_trace_closest_motion_blur", IntrinsicOp::RAY_TRACING_TRACE_CLOSEST_MOTION_BLUR}, - {"ray_tracing_trace_any_motion_blur", IntrinsicOp::RAY_TRACING_TRACE_ANY_MOTION_BLUR}, - {"ray_tracing_query_all_motion_blur", IntrinsicOp::RAY_TRACING_QUERY_ALL_MOTION_BLUR}, - {"ray_tracing_query_any_motion_blur", IntrinsicOp::RAY_TRACING_QUERY_ANY_MOTION_BLUR}, - {"ray_query_world_space_ray", IntrinsicOp::RAY_QUERY_WORLD_SPACE_RAY}, - {"ray_query_procedural_candidate_hit", IntrinsicOp::RAY_QUERY_PROCEDURAL_CANDIDATE_HIT}, - {"ray_query_triangle_candidate_hit", IntrinsicOp::RAY_QUERY_TRIANGLE_CANDIDATE_HIT}, - {"ray_query_committed_hit", IntrinsicOp::RAY_QUERY_COMMITTED_HIT}, - {"ray_query_commit_triangle", IntrinsicOp::RAY_QUERY_COMMIT_TRIANGLE}, - {"ray_query_commit_procedural", IntrinsicOp::RAY_QUERY_COMMIT_PROCEDURAL}, - {"ray_query_terminate", IntrinsicOp::RAY_QUERY_TERMINATE}, - {"ray_query_proceed", IntrinsicOp::RAY_QUERY_PROCEED}, - {"ray_query_is_triangle_candidate", IntrinsicOp::RAY_QUERY_IS_TRIANGLE_CANDIDATE}, - {"ray_query_is_procedural_candidate", IntrinsicOp::RAY_QUERY_IS_PROCEDURAL_CANDIDATE}, - {"raster_discard", IntrinsicOp::RASTER_DISCARD}, - {"raster_ddx", IntrinsicOp::RASTER_DDX}, - {"raster_ddy", IntrinsicOp::RASTER_DDY}, - {"warp_is_first_active_lane", IntrinsicOp::WARP_IS_FIRST_ACTIVE_LANE}, - {"warp_first_active_lane", IntrinsicOp::WARP_FIRST_ACTIVE_LANE}, - {"warp_active_all_equal", IntrinsicOp::WARP_ACTIVE_ALL_EQUAL}, - {"warp_active_bit_and", IntrinsicOp::WARP_ACTIVE_BIT_AND}, - {"warp_active_bit_or", IntrinsicOp::WARP_ACTIVE_BIT_OR}, - {"warp_active_bit_xor", IntrinsicOp::WARP_ACTIVE_BIT_XOR}, - {"warp_active_count_bits", IntrinsicOp::WARP_ACTIVE_COUNT_BITS}, - {"warp_active_max", IntrinsicOp::WARP_ACTIVE_MAX}, - {"warp_active_min", IntrinsicOp::WARP_ACTIVE_MIN}, - {"warp_active_product", IntrinsicOp::WARP_ACTIVE_PRODUCT}, - {"warp_active_sum", IntrinsicOp::WARP_ACTIVE_SUM}, - {"warp_active_all", IntrinsicOp::WARP_ACTIVE_ALL}, - {"warp_active_any", IntrinsicOp::WARP_ACTIVE_ANY}, - {"warp_active_bit_mask", IntrinsicOp::WARP_ACTIVE_BIT_MASK}, - {"warp_prefix_count_bits", IntrinsicOp::WARP_PREFIX_COUNT_BITS}, - {"warp_prefix_sum", IntrinsicOp::WARP_PREFIX_SUM}, - {"warp_prefix_product", IntrinsicOp::WARP_PREFIX_PRODUCT}, - {"warp_read_lane", IntrinsicOp::WARP_READ_LANE}, - {"warp_read_first_active_lane", IntrinsicOp::WARP_READ_FIRST_ACTIVE_LANE}, - {"indirect_dispatch_set_kernel", IntrinsicOp::INDIRECT_DISPATCH_SET_KERNEL}, - {"indirect_dispatch_set_count", IntrinsicOp::INDIRECT_DISPATCH_SET_COUNT}, - {"shader_execution_reorder", IntrinsicOp::SHADER_EXECUTION_REORDER}, - {"clock", IntrinsicOp::CLOCK}, - }; - auto iter = m.find(name); - LUISA_ASSERT(iter != m.end(), "Unknown intrinsic operation: {}.", name); - return iter->second; -} +#pragma once + +luisa::string to_string(IntrinsicOp op) noexcept { + switch (op) { + case IntrinsicOp::NOP: return "nop"; + case IntrinsicOp::UNARY_PLUS: return "unary_plus"; + case IntrinsicOp::UNARY_MINUS: return "unary_minus"; + case IntrinsicOp::UNARY_LOGIC_NOT: return "unary_logic_not"; + case IntrinsicOp::UNARY_BIT_NOT: return "unary_bit_not"; + case IntrinsicOp::BINARY_ADD: return "binary_add"; + case IntrinsicOp::BINARY_SUB: return "binary_sub"; + case IntrinsicOp::BINARY_MUL: return "binary_mul"; + case IntrinsicOp::BINARY_DIV: return "binary_div"; + case IntrinsicOp::BINARY_MOD: return "binary_mod"; + case IntrinsicOp::BINARY_LOGIC_AND: return "binary_logic_and"; + case IntrinsicOp::BINARY_LOGIC_OR: return "binary_logic_or"; + case IntrinsicOp::BINARY_BIT_AND: return "binary_bit_and"; + case IntrinsicOp::BINARY_BIT_OR: return "binary_bit_or"; + case IntrinsicOp::BINARY_BIT_XOR: return "binary_bit_xor"; + case IntrinsicOp::BINARY_SHIFT_LEFT: return "binary_shift_left"; + case IntrinsicOp::BINARY_SHIFT_RIGHT: return "binary_shift_right"; + case IntrinsicOp::BINARY_ROTATE_LEFT: return "binary_rotate_left"; + case IntrinsicOp::BINARY_ROTATE_RIGHT: return "binary_rotate_right"; + case IntrinsicOp::BINARY_LESS: return "binary_less"; + case IntrinsicOp::BINARY_GREATER: return "binary_greater"; + case IntrinsicOp::BINARY_LESS_EQUAL: return "binary_less_equal"; + case IntrinsicOp::BINARY_GREATER_EQUAL: return "binary_greater_equal"; + case IntrinsicOp::BINARY_EQUAL: return "binary_equal"; + case IntrinsicOp::BINARY_NOT_EQUAL: return "binary_not_equal"; + case IntrinsicOp::SYNCHRONIZE_BLOCK: return "synchronize_block"; + case IntrinsicOp::ALL: return "all"; + case IntrinsicOp::ANY: return "any"; + case IntrinsicOp::SELECT: return "select"; + case IntrinsicOp::CLAMP: return "clamp"; + case IntrinsicOp::SATURATE: return "saturate"; + case IntrinsicOp::LERP: return "lerp"; + case IntrinsicOp::SMOOTHSTEP: return "smoothstep"; + case IntrinsicOp::STEP: return "step"; + case IntrinsicOp::ABS: return "abs"; + case IntrinsicOp::MIN: return "min"; + case IntrinsicOp::MAX: return "max"; + case IntrinsicOp::CLZ: return "clz"; + case IntrinsicOp::CTZ: return "ctz"; + case IntrinsicOp::POPCOUNT: return "popcount"; + case IntrinsicOp::REVERSE: return "reverse"; + case IntrinsicOp::ISINF: return "isinf"; + case IntrinsicOp::ISNAN: return "isnan"; + case IntrinsicOp::ACOS: return "acos"; + case IntrinsicOp::ACOSH: return "acosh"; + case IntrinsicOp::ASIN: return "asin"; + case IntrinsicOp::ASINH: return "asinh"; + case IntrinsicOp::ATAN: return "atan"; + case IntrinsicOp::ATAN2: return "atan2"; + case IntrinsicOp::ATANH: return "atanh"; + case IntrinsicOp::COS: return "cos"; + case IntrinsicOp::COSH: return "cosh"; + case IntrinsicOp::SIN: return "sin"; + case IntrinsicOp::SINH: return "sinh"; + case IntrinsicOp::TAN: return "tan"; + case IntrinsicOp::TANH: return "tanh"; + case IntrinsicOp::EXP: return "exp"; + case IntrinsicOp::EXP2: return "exp2"; + case IntrinsicOp::EXP10: return "exp10"; + case IntrinsicOp::LOG: return "log"; + case IntrinsicOp::LOG2: return "log2"; + case IntrinsicOp::LOG10: return "log10"; + case IntrinsicOp::POW: return "pow"; + case IntrinsicOp::POW_INT: return "pow_int"; + case IntrinsicOp::SQRT: return "sqrt"; + case IntrinsicOp::RSQRT: return "rsqrt"; + case IntrinsicOp::CEIL: return "ceil"; + case IntrinsicOp::FLOOR: return "floor"; + case IntrinsicOp::FRACT: return "fract"; + case IntrinsicOp::TRUNC: return "trunc"; + case IntrinsicOp::ROUND: return "round"; + case IntrinsicOp::RINT: return "rint"; + case IntrinsicOp::FMA: return "fma"; + case IntrinsicOp::COPYSIGN: return "copysign"; + case IntrinsicOp::CROSS: return "cross"; + case IntrinsicOp::DOT: return "dot"; + case IntrinsicOp::LENGTH: return "length"; + case IntrinsicOp::LENGTH_SQUARED: return "length_squared"; + case IntrinsicOp::NORMALIZE: return "normalize"; + case IntrinsicOp::FACEFORWARD: return "faceforward"; + case IntrinsicOp::REFLECT: return "reflect"; + case IntrinsicOp::REDUCE_SUM: return "reduce_sum"; + case IntrinsicOp::REDUCE_PRODUCT: return "reduce_product"; + case IntrinsicOp::REDUCE_MIN: return "reduce_min"; + case IntrinsicOp::REDUCE_MAX: return "reduce_max"; + case IntrinsicOp::OUTER_PRODUCT: return "outer_product"; + case IntrinsicOp::MATRIX_COMP_NEG: return "matrix_comp_neg"; + case IntrinsicOp::MATRIX_COMP_ADD: return "matrix_comp_add"; + case IntrinsicOp::MATRIX_COMP_SUB: return "matrix_comp_sub"; + case IntrinsicOp::MATRIX_COMP_MUL: return "matrix_comp_mul"; + case IntrinsicOp::MATRIX_COMP_DIV: return "matrix_comp_div"; + case IntrinsicOp::MATRIX_LINALG_MUL: return "matrix_linalg_mul"; + case IntrinsicOp::MATRIX_DETERMINANT: return "matrix_determinant"; + case IntrinsicOp::MATRIX_TRANSPOSE: return "matrix_transpose"; + case IntrinsicOp::MATRIX_INVERSE: return "matrix_inverse"; + case IntrinsicOp::ATOMIC_EXCHANGE: return "atomic_exchange"; + case IntrinsicOp::ATOMIC_COMPARE_EXCHANGE: return "atomic_compare_exchange"; + case IntrinsicOp::ATOMIC_FETCH_ADD: return "atomic_fetch_add"; + case IntrinsicOp::ATOMIC_FETCH_SUB: return "atomic_fetch_sub"; + case IntrinsicOp::ATOMIC_FETCH_AND: return "atomic_fetch_and"; + case IntrinsicOp::ATOMIC_FETCH_OR: return "atomic_fetch_or"; + case IntrinsicOp::ATOMIC_FETCH_XOR: return "atomic_fetch_xor"; + case IntrinsicOp::ATOMIC_FETCH_MIN: return "atomic_fetch_min"; + case IntrinsicOp::ATOMIC_FETCH_MAX: return "atomic_fetch_max"; + case IntrinsicOp::BUFFER_READ: return "buffer_read"; + case IntrinsicOp::BUFFER_WRITE: return "buffer_write"; + case IntrinsicOp::BUFFER_SIZE: return "buffer_size"; + case IntrinsicOp::BYTE_BUFFER_READ: return "byte_buffer_read"; + case IntrinsicOp::BYTE_BUFFER_WRITE: return "byte_buffer_write"; + case IntrinsicOp::BYTE_BUFFER_SIZE: return "byte_buffer_size"; + case IntrinsicOp::TEXTURE2D_READ: return "texture2d_read"; + case IntrinsicOp::TEXTURE2D_WRITE: return "texture2d_write"; + case IntrinsicOp::TEXTURE2D_SIZE: return "texture2d_size"; + case IntrinsicOp::TEXTURE2D_SAMPLE: return "texture2d_sample"; + case IntrinsicOp::TEXTURE2D_SAMPLE_LEVEL: return "texture2d_sample_level"; + case IntrinsicOp::TEXTURE2D_SAMPLE_GRAD: return "texture2d_sample_grad"; + case IntrinsicOp::TEXTURE2D_SAMPLE_GRAD_LEVEL: return "texture2d_sample_grad_level"; + case IntrinsicOp::TEXTURE3D_READ: return "texture3d_read"; + case IntrinsicOp::TEXTURE3D_WRITE: return "texture3d_write"; + case IntrinsicOp::TEXTURE3D_SIZE: return "texture3d_size"; + case IntrinsicOp::TEXTURE3D_SAMPLE: return "texture3d_sample"; + case IntrinsicOp::TEXTURE3D_SAMPLE_LEVEL: return "texture3d_sample_level"; + case IntrinsicOp::TEXTURE3D_SAMPLE_GRAD: return "texture3d_sample_grad"; + case IntrinsicOp::TEXTURE3D_SAMPLE_GRAD_LEVEL: return "texture3d_sample_grad_level"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE: return "bindless_texture2d_sample"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL: return "bindless_texture2d_sample_level"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD: return "bindless_texture2d_sample_grad"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL: return "bindless_texture2d_sample_grad_level"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE: return "bindless_texture3d_sample"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL: return "bindless_texture3d_sample_level"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD: return "bindless_texture3d_sample_grad"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL: return "bindless_texture3d_sample_grad_level"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_SAMPLER: return "bindless_texture2d_sample_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL_SAMPLER: return "bindless_texture2d_sample_level_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_SAMPLER: return "bindless_texture2d_sample_grad_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL_SAMPLER: return "bindless_texture2d_sample_grad_level_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_SAMPLER: return "bindless_texture3d_sample_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL_SAMPLER: return "bindless_texture3d_sample_level_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_SAMPLER: return "bindless_texture3d_sample_grad_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL_SAMPLER: return "bindless_texture3d_sample_grad_level_sampler"; + case IntrinsicOp::BINDLESS_TEXTURE2D_READ: return "bindless_texture2d_read"; + case IntrinsicOp::BINDLESS_TEXTURE3D_READ: return "bindless_texture3d_read"; + case IntrinsicOp::BINDLESS_TEXTURE2D_READ_LEVEL: return "bindless_texture2d_read_level"; + case IntrinsicOp::BINDLESS_TEXTURE3D_READ_LEVEL: return "bindless_texture3d_read_level"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SIZE: return "bindless_texture2d_size"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SIZE: return "bindless_texture3d_size"; + case IntrinsicOp::BINDLESS_TEXTURE2D_SIZE_LEVEL: return "bindless_texture2d_size_level"; + case IntrinsicOp::BINDLESS_TEXTURE3D_SIZE_LEVEL: return "bindless_texture3d_size_level"; + case IntrinsicOp::BINDLESS_BUFFER_READ: return "bindless_buffer_read"; + case IntrinsicOp::BINDLESS_BUFFER_WRITE: return "bindless_buffer_write"; + case IntrinsicOp::BINDLESS_BUFFER_SIZE: return "bindless_buffer_size"; + case IntrinsicOp::BINDLESS_BYTE_BUFFER_READ: return "bindless_byte_buffer_read"; + case IntrinsicOp::BINDLESS_BYTE_BUFFER_WRITE: return "bindless_byte_buffer_write"; + case IntrinsicOp::BINDLESS_BYTE_BUFFER_SIZE: return "bindless_byte_buffer_size"; + case IntrinsicOp::BUFFER_DEVICE_ADDRESS: return "buffer_device_address"; + case IntrinsicOp::BINDLESS_BUFFER_DEVICE_ADDRESS: return "bindless_buffer_device_address"; + case IntrinsicOp::DEVICE_ADDRESS_READ: return "device_address_read"; + case IntrinsicOp::DEVICE_ADDRESS_WRITE: return "device_address_write"; + case IntrinsicOp::AGGREGATE: return "aggregate"; + case IntrinsicOp::SHUFFLE: return "shuffle"; + case IntrinsicOp::INSERT: return "insert"; + case IntrinsicOp::EXTRACT: return "extract"; + case IntrinsicOp::AUTODIFF_REQUIRES_GRADIENT: return "autodiff_requires_gradient"; + case IntrinsicOp::AUTODIFF_GRADIENT: return "autodiff_gradient"; + case IntrinsicOp::AUTODIFF_GRADIENT_MARKER: return "autodiff_gradient_marker"; + case IntrinsicOp::AUTODIFF_ACCUMULATE_GRADIENT: return "autodiff_accumulate_gradient"; + case IntrinsicOp::AUTODIFF_BACKWARD: return "autodiff_backward"; + case IntrinsicOp::AUTODIFF_DETACH: return "autodiff_detach"; + case IntrinsicOp::RAY_TRACING_INSTANCE_TRANSFORM: return "ray_tracing_instance_transform"; + case IntrinsicOp::RAY_TRACING_INSTANCE_USER_ID: return "ray_tracing_instance_user_id"; + case IntrinsicOp::RAY_TRACING_INSTANCE_VISIBILITY_MASK: return "ray_tracing_instance_visibility_mask"; + case IntrinsicOp::RAY_TRACING_SET_INSTANCE_TRANSFORM: return "ray_tracing_set_instance_transform"; + case IntrinsicOp::RAY_TRACING_SET_INSTANCE_VISIBILITY_MASK: return "ray_tracing_set_instance_visibility_mask"; + case IntrinsicOp::RAY_TRACING_SET_INSTANCE_OPACITY: return "ray_tracing_set_instance_opacity"; + case IntrinsicOp::RAY_TRACING_SET_INSTANCE_USER_ID: return "ray_tracing_set_instance_user_id"; + case IntrinsicOp::RAY_TRACING_TRACE_CLOSEST: return "ray_tracing_trace_closest"; + case IntrinsicOp::RAY_TRACING_TRACE_ANY: return "ray_tracing_trace_any"; + case IntrinsicOp::RAY_TRACING_QUERY_ALL: return "ray_tracing_query_all"; + case IntrinsicOp::RAY_TRACING_QUERY_ANY: return "ray_tracing_query_any"; + case IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_MATRIX: return "ray_tracing_instance_motion_matrix"; + case IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_SRT: return "ray_tracing_instance_motion_srt"; + case IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_MATRIX: return "ray_tracing_set_instance_motion_matrix"; + case IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_SRT: return "ray_tracing_set_instance_motion_srt"; + case IntrinsicOp::RAY_TRACING_TRACE_CLOSEST_MOTION_BLUR: return "ray_tracing_trace_closest_motion_blur"; + case IntrinsicOp::RAY_TRACING_TRACE_ANY_MOTION_BLUR: return "ray_tracing_trace_any_motion_blur"; + case IntrinsicOp::RAY_TRACING_QUERY_ALL_MOTION_BLUR: return "ray_tracing_query_all_motion_blur"; + case IntrinsicOp::RAY_TRACING_QUERY_ANY_MOTION_BLUR: return "ray_tracing_query_any_motion_blur"; + case IntrinsicOp::RAY_QUERY_WORLD_SPACE_RAY: return "ray_query_world_space_ray"; + case IntrinsicOp::RAY_QUERY_PROCEDURAL_CANDIDATE_HIT: return "ray_query_procedural_candidate_hit"; + case IntrinsicOp::RAY_QUERY_TRIANGLE_CANDIDATE_HIT: return "ray_query_triangle_candidate_hit"; + case IntrinsicOp::RAY_QUERY_COMMITTED_HIT: return "ray_query_committed_hit"; + case IntrinsicOp::RAY_QUERY_COMMIT_TRIANGLE: return "ray_query_commit_triangle"; + case IntrinsicOp::RAY_QUERY_COMMIT_PROCEDURAL: return "ray_query_commit_procedural"; + case IntrinsicOp::RAY_QUERY_TERMINATE: return "ray_query_terminate"; + case IntrinsicOp::RAY_QUERY_PROCEED: return "ray_query_proceed"; + case IntrinsicOp::RAY_QUERY_IS_TRIANGLE_CANDIDATE: return "ray_query_is_triangle_candidate"; + case IntrinsicOp::RAY_QUERY_IS_PROCEDURAL_CANDIDATE: return "ray_query_is_procedural_candidate"; + case IntrinsicOp::RASTER_DISCARD: return "raster_discard"; + case IntrinsicOp::RASTER_DDX: return "raster_ddx"; + case IntrinsicOp::RASTER_DDY: return "raster_ddy"; + case IntrinsicOp::WARP_IS_FIRST_ACTIVE_LANE: return "warp_is_first_active_lane"; + case IntrinsicOp::WARP_FIRST_ACTIVE_LANE: return "warp_first_active_lane"; + case IntrinsicOp::WARP_ACTIVE_ALL_EQUAL: return "warp_active_all_equal"; + case IntrinsicOp::WARP_ACTIVE_BIT_AND: return "warp_active_bit_and"; + case IntrinsicOp::WARP_ACTIVE_BIT_OR: return "warp_active_bit_or"; + case IntrinsicOp::WARP_ACTIVE_BIT_XOR: return "warp_active_bit_xor"; + case IntrinsicOp::WARP_ACTIVE_COUNT_BITS: return "warp_active_count_bits"; + case IntrinsicOp::WARP_ACTIVE_MAX: return "warp_active_max"; + case IntrinsicOp::WARP_ACTIVE_MIN: return "warp_active_min"; + case IntrinsicOp::WARP_ACTIVE_PRODUCT: return "warp_active_product"; + case IntrinsicOp::WARP_ACTIVE_SUM: return "warp_active_sum"; + case IntrinsicOp::WARP_ACTIVE_ALL: return "warp_active_all"; + case IntrinsicOp::WARP_ACTIVE_ANY: return "warp_active_any"; + case IntrinsicOp::WARP_ACTIVE_BIT_MASK: return "warp_active_bit_mask"; + case IntrinsicOp::WARP_PREFIX_COUNT_BITS: return "warp_prefix_count_bits"; + case IntrinsicOp::WARP_PREFIX_SUM: return "warp_prefix_sum"; + case IntrinsicOp::WARP_PREFIX_PRODUCT: return "warp_prefix_product"; + case IntrinsicOp::WARP_READ_LANE: return "warp_read_lane"; + case IntrinsicOp::WARP_READ_FIRST_ACTIVE_LANE: return "warp_read_first_active_lane"; + case IntrinsicOp::INDIRECT_DISPATCH_SET_KERNEL: return "indirect_dispatch_set_kernel"; + case IntrinsicOp::INDIRECT_DISPATCH_SET_COUNT: return "indirect_dispatch_set_count"; + case IntrinsicOp::SHADER_EXECUTION_REORDER: return "shader_execution_reorder"; + case IntrinsicOp::CLOCK: return "clock"; + } + LUISA_ERROR_WITH_LOCATION("Unknown intrinsic operation: {}.", + static_cast(op)); +} + +IntrinsicOp intrinsic_op_from_string(luisa::string_view name) noexcept { + static const luisa::unordered_map m{ + {"nop", IntrinsicOp::NOP}, + {"unary_plus", IntrinsicOp::UNARY_PLUS}, + {"unary_minus", IntrinsicOp::UNARY_MINUS}, + {"unary_logic_not", IntrinsicOp::UNARY_LOGIC_NOT}, + {"unary_bit_not", IntrinsicOp::UNARY_BIT_NOT}, + {"binary_add", IntrinsicOp::BINARY_ADD}, + {"binary_sub", IntrinsicOp::BINARY_SUB}, + {"binary_mul", IntrinsicOp::BINARY_MUL}, + {"binary_div", IntrinsicOp::BINARY_DIV}, + {"binary_mod", IntrinsicOp::BINARY_MOD}, + {"binary_logic_and", IntrinsicOp::BINARY_LOGIC_AND}, + {"binary_logic_or", IntrinsicOp::BINARY_LOGIC_OR}, + {"binary_bit_and", IntrinsicOp::BINARY_BIT_AND}, + {"binary_bit_or", IntrinsicOp::BINARY_BIT_OR}, + {"binary_bit_xor", IntrinsicOp::BINARY_BIT_XOR}, + {"binary_shift_left", IntrinsicOp::BINARY_SHIFT_LEFT}, + {"binary_shift_right", IntrinsicOp::BINARY_SHIFT_RIGHT}, + {"binary_rotate_left", IntrinsicOp::BINARY_ROTATE_LEFT}, + {"binary_rotate_right", IntrinsicOp::BINARY_ROTATE_RIGHT}, + {"binary_less", IntrinsicOp::BINARY_LESS}, + {"binary_greater", IntrinsicOp::BINARY_GREATER}, + {"binary_less_equal", IntrinsicOp::BINARY_LESS_EQUAL}, + {"binary_greater_equal", IntrinsicOp::BINARY_GREATER_EQUAL}, + {"binary_equal", IntrinsicOp::BINARY_EQUAL}, + {"binary_not_equal", IntrinsicOp::BINARY_NOT_EQUAL}, + {"synchronize_block", IntrinsicOp::SYNCHRONIZE_BLOCK}, + {"all", IntrinsicOp::ALL}, + {"any", IntrinsicOp::ANY}, + {"select", IntrinsicOp::SELECT}, + {"clamp", IntrinsicOp::CLAMP}, + {"saturate", IntrinsicOp::SATURATE}, + {"lerp", IntrinsicOp::LERP}, + {"smoothstep", IntrinsicOp::SMOOTHSTEP}, + {"step", IntrinsicOp::STEP}, + {"abs", IntrinsicOp::ABS}, + {"min", IntrinsicOp::MIN}, + {"max", IntrinsicOp::MAX}, + {"clz", IntrinsicOp::CLZ}, + {"ctz", IntrinsicOp::CTZ}, + {"popcount", IntrinsicOp::POPCOUNT}, + {"reverse", IntrinsicOp::REVERSE}, + {"isinf", IntrinsicOp::ISINF}, + {"isnan", IntrinsicOp::ISNAN}, + {"acos", IntrinsicOp::ACOS}, + {"acosh", IntrinsicOp::ACOSH}, + {"asin", IntrinsicOp::ASIN}, + {"asinh", IntrinsicOp::ASINH}, + {"atan", IntrinsicOp::ATAN}, + {"atan2", IntrinsicOp::ATAN2}, + {"atanh", IntrinsicOp::ATANH}, + {"cos", IntrinsicOp::COS}, + {"cosh", IntrinsicOp::COSH}, + {"sin", IntrinsicOp::SIN}, + {"sinh", IntrinsicOp::SINH}, + {"tan", IntrinsicOp::TAN}, + {"tanh", IntrinsicOp::TANH}, + {"exp", IntrinsicOp::EXP}, + {"exp2", IntrinsicOp::EXP2}, + {"exp10", IntrinsicOp::EXP10}, + {"log", IntrinsicOp::LOG}, + {"log2", IntrinsicOp::LOG2}, + {"log10", IntrinsicOp::LOG10}, + {"pow", IntrinsicOp::POW}, + {"pow_int", IntrinsicOp::POW_INT}, + {"sqrt", IntrinsicOp::SQRT}, + {"rsqrt", IntrinsicOp::RSQRT}, + {"ceil", IntrinsicOp::CEIL}, + {"floor", IntrinsicOp::FLOOR}, + {"fract", IntrinsicOp::FRACT}, + {"trunc", IntrinsicOp::TRUNC}, + {"round", IntrinsicOp::ROUND}, + {"rint", IntrinsicOp::RINT}, + {"fma", IntrinsicOp::FMA}, + {"copysign", IntrinsicOp::COPYSIGN}, + {"cross", IntrinsicOp::CROSS}, + {"dot", IntrinsicOp::DOT}, + {"length", IntrinsicOp::LENGTH}, + {"length_squared", IntrinsicOp::LENGTH_SQUARED}, + {"normalize", IntrinsicOp::NORMALIZE}, + {"faceforward", IntrinsicOp::FACEFORWARD}, + {"reflect", IntrinsicOp::REFLECT}, + {"reduce_sum", IntrinsicOp::REDUCE_SUM}, + {"reduce_product", IntrinsicOp::REDUCE_PRODUCT}, + {"reduce_min", IntrinsicOp::REDUCE_MIN}, + {"reduce_max", IntrinsicOp::REDUCE_MAX}, + {"outer_product", IntrinsicOp::OUTER_PRODUCT}, + {"matrix_comp_neg", IntrinsicOp::MATRIX_COMP_NEG}, + {"matrix_comp_add", IntrinsicOp::MATRIX_COMP_ADD}, + {"matrix_comp_sub", IntrinsicOp::MATRIX_COMP_SUB}, + {"matrix_comp_mul", IntrinsicOp::MATRIX_COMP_MUL}, + {"matrix_comp_div", IntrinsicOp::MATRIX_COMP_DIV}, + {"matrix_linalg_mul", IntrinsicOp::MATRIX_LINALG_MUL}, + {"matrix_determinant", IntrinsicOp::MATRIX_DETERMINANT}, + {"matrix_transpose", IntrinsicOp::MATRIX_TRANSPOSE}, + {"matrix_inverse", IntrinsicOp::MATRIX_INVERSE}, + {"atomic_exchange", IntrinsicOp::ATOMIC_EXCHANGE}, + {"atomic_compare_exchange", IntrinsicOp::ATOMIC_COMPARE_EXCHANGE}, + {"atomic_fetch_add", IntrinsicOp::ATOMIC_FETCH_ADD}, + {"atomic_fetch_sub", IntrinsicOp::ATOMIC_FETCH_SUB}, + {"atomic_fetch_and", IntrinsicOp::ATOMIC_FETCH_AND}, + {"atomic_fetch_or", IntrinsicOp::ATOMIC_FETCH_OR}, + {"atomic_fetch_xor", IntrinsicOp::ATOMIC_FETCH_XOR}, + {"atomic_fetch_min", IntrinsicOp::ATOMIC_FETCH_MIN}, + {"atomic_fetch_max", IntrinsicOp::ATOMIC_FETCH_MAX}, + {"buffer_read", IntrinsicOp::BUFFER_READ}, + {"buffer_write", IntrinsicOp::BUFFER_WRITE}, + {"buffer_size", IntrinsicOp::BUFFER_SIZE}, + {"byte_buffer_read", IntrinsicOp::BYTE_BUFFER_READ}, + {"byte_buffer_write", IntrinsicOp::BYTE_BUFFER_WRITE}, + {"byte_buffer_size", IntrinsicOp::BYTE_BUFFER_SIZE}, + {"texture2d_read", IntrinsicOp::TEXTURE2D_READ}, + {"texture2d_write", IntrinsicOp::TEXTURE2D_WRITE}, + {"texture2d_size", IntrinsicOp::TEXTURE2D_SIZE}, + {"texture2d_sample", IntrinsicOp::TEXTURE2D_SAMPLE}, + {"texture2d_sample_level", IntrinsicOp::TEXTURE2D_SAMPLE_LEVEL}, + {"texture2d_sample_grad", IntrinsicOp::TEXTURE2D_SAMPLE_GRAD}, + {"texture2d_sample_grad_level", IntrinsicOp::TEXTURE2D_SAMPLE_GRAD_LEVEL}, + {"texture3d_read", IntrinsicOp::TEXTURE3D_READ}, + {"texture3d_write", IntrinsicOp::TEXTURE3D_WRITE}, + {"texture3d_size", IntrinsicOp::TEXTURE3D_SIZE}, + {"texture3d_sample", IntrinsicOp::TEXTURE3D_SAMPLE}, + {"texture3d_sample_level", IntrinsicOp::TEXTURE3D_SAMPLE_LEVEL}, + {"texture3d_sample_grad", IntrinsicOp::TEXTURE3D_SAMPLE_GRAD}, + {"texture3d_sample_grad_level", IntrinsicOp::TEXTURE3D_SAMPLE_GRAD_LEVEL}, + {"bindless_texture2d_sample", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE}, + {"bindless_texture2d_sample_level", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL}, + {"bindless_texture2d_sample_grad", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD}, + {"bindless_texture2d_sample_grad_level", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL}, + {"bindless_texture3d_sample", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE}, + {"bindless_texture3d_sample_level", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL}, + {"bindless_texture3d_sample_grad", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD}, + {"bindless_texture3d_sample_grad_level", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL}, + {"bindless_texture2d_sample_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_SAMPLER}, + {"bindless_texture2d_sample_level_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_LEVEL_SAMPLER}, + {"bindless_texture2d_sample_grad_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_SAMPLER}, + {"bindless_texture2d_sample_grad_level_sampler", IntrinsicOp::BINDLESS_TEXTURE2D_SAMPLE_GRAD_LEVEL_SAMPLER}, + {"bindless_texture3d_sample_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_SAMPLER}, + {"bindless_texture3d_sample_level_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_LEVEL_SAMPLER}, + {"bindless_texture3d_sample_grad_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_SAMPLER}, + {"bindless_texture3d_sample_grad_level_sampler", IntrinsicOp::BINDLESS_TEXTURE3D_SAMPLE_GRAD_LEVEL_SAMPLER}, + {"bindless_texture2d_read", IntrinsicOp::BINDLESS_TEXTURE2D_READ}, + {"bindless_texture3d_read", IntrinsicOp::BINDLESS_TEXTURE3D_READ}, + {"bindless_texture2d_read_level", IntrinsicOp::BINDLESS_TEXTURE2D_READ_LEVEL}, + {"bindless_texture3d_read_level", IntrinsicOp::BINDLESS_TEXTURE3D_READ_LEVEL}, + {"bindless_texture2d_size", IntrinsicOp::BINDLESS_TEXTURE2D_SIZE}, + {"bindless_texture3d_size", IntrinsicOp::BINDLESS_TEXTURE3D_SIZE}, + {"bindless_texture2d_size_level", IntrinsicOp::BINDLESS_TEXTURE2D_SIZE_LEVEL}, + {"bindless_texture3d_size_level", IntrinsicOp::BINDLESS_TEXTURE3D_SIZE_LEVEL}, + {"bindless_buffer_read", IntrinsicOp::BINDLESS_BUFFER_READ}, + {"bindless_buffer_write", IntrinsicOp::BINDLESS_BUFFER_WRITE}, + {"bindless_buffer_size", IntrinsicOp::BINDLESS_BUFFER_SIZE}, + {"bindless_byte_buffer_read", IntrinsicOp::BINDLESS_BYTE_BUFFER_READ}, + {"bindless_byte_buffer_write", IntrinsicOp::BINDLESS_BYTE_BUFFER_WRITE}, + {"bindless_byte_buffer_size", IntrinsicOp::BINDLESS_BYTE_BUFFER_SIZE}, + {"buffer_device_address", IntrinsicOp::BUFFER_DEVICE_ADDRESS}, + {"bindless_buffer_device_address", IntrinsicOp::BINDLESS_BUFFER_DEVICE_ADDRESS}, + {"device_address_read", IntrinsicOp::DEVICE_ADDRESS_READ}, + {"device_address_write", IntrinsicOp::DEVICE_ADDRESS_WRITE}, + {"aggregate", IntrinsicOp::AGGREGATE}, + {"shuffle", IntrinsicOp::SHUFFLE}, + {"insert", IntrinsicOp::INSERT}, + {"extract", IntrinsicOp::EXTRACT}, + {"autodiff_requires_gradient", IntrinsicOp::AUTODIFF_REQUIRES_GRADIENT}, + {"autodiff_gradient", IntrinsicOp::AUTODIFF_GRADIENT}, + {"autodiff_gradient_marker", IntrinsicOp::AUTODIFF_GRADIENT_MARKER}, + {"autodiff_accumulate_gradient", IntrinsicOp::AUTODIFF_ACCUMULATE_GRADIENT}, + {"autodiff_backward", IntrinsicOp::AUTODIFF_BACKWARD}, + {"autodiff_detach", IntrinsicOp::AUTODIFF_DETACH}, + {"ray_tracing_instance_transform", IntrinsicOp::RAY_TRACING_INSTANCE_TRANSFORM}, + {"ray_tracing_instance_user_id", IntrinsicOp::RAY_TRACING_INSTANCE_USER_ID}, + {"ray_tracing_instance_visibility_mask", IntrinsicOp::RAY_TRACING_INSTANCE_VISIBILITY_MASK}, + {"ray_tracing_set_instance_transform", IntrinsicOp::RAY_TRACING_SET_INSTANCE_TRANSFORM}, + {"ray_tracing_set_instance_visibility_mask", IntrinsicOp::RAY_TRACING_SET_INSTANCE_VISIBILITY_MASK}, + {"ray_tracing_set_instance_opacity", IntrinsicOp::RAY_TRACING_SET_INSTANCE_OPACITY}, + {"ray_tracing_set_instance_user_id", IntrinsicOp::RAY_TRACING_SET_INSTANCE_USER_ID}, + {"ray_tracing_trace_closest", IntrinsicOp::RAY_TRACING_TRACE_CLOSEST}, + {"ray_tracing_trace_any", IntrinsicOp::RAY_TRACING_TRACE_ANY}, + {"ray_tracing_query_all", IntrinsicOp::RAY_TRACING_QUERY_ALL}, + {"ray_tracing_query_any", IntrinsicOp::RAY_TRACING_QUERY_ANY}, + {"ray_tracing_instance_motion_matrix", IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_MATRIX}, + {"ray_tracing_instance_motion_srt", IntrinsicOp::RAY_TRACING_INSTANCE_MOTION_SRT}, + {"ray_tracing_set_instance_motion_matrix", IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_MATRIX}, + {"ray_tracing_set_instance_motion_srt", IntrinsicOp::RAY_TRACING_SET_INSTANCE_MOTION_SRT}, + {"ray_tracing_trace_closest_motion_blur", IntrinsicOp::RAY_TRACING_TRACE_CLOSEST_MOTION_BLUR}, + {"ray_tracing_trace_any_motion_blur", IntrinsicOp::RAY_TRACING_TRACE_ANY_MOTION_BLUR}, + {"ray_tracing_query_all_motion_blur", IntrinsicOp::RAY_TRACING_QUERY_ALL_MOTION_BLUR}, + {"ray_tracing_query_any_motion_blur", IntrinsicOp::RAY_TRACING_QUERY_ANY_MOTION_BLUR}, + {"ray_query_world_space_ray", IntrinsicOp::RAY_QUERY_WORLD_SPACE_RAY}, + {"ray_query_procedural_candidate_hit", IntrinsicOp::RAY_QUERY_PROCEDURAL_CANDIDATE_HIT}, + {"ray_query_triangle_candidate_hit", IntrinsicOp::RAY_QUERY_TRIANGLE_CANDIDATE_HIT}, + {"ray_query_committed_hit", IntrinsicOp::RAY_QUERY_COMMITTED_HIT}, + {"ray_query_commit_triangle", IntrinsicOp::RAY_QUERY_COMMIT_TRIANGLE}, + {"ray_query_commit_procedural", IntrinsicOp::RAY_QUERY_COMMIT_PROCEDURAL}, + {"ray_query_terminate", IntrinsicOp::RAY_QUERY_TERMINATE}, + {"ray_query_proceed", IntrinsicOp::RAY_QUERY_PROCEED}, + {"ray_query_is_triangle_candidate", IntrinsicOp::RAY_QUERY_IS_TRIANGLE_CANDIDATE}, + {"ray_query_is_procedural_candidate", IntrinsicOp::RAY_QUERY_IS_PROCEDURAL_CANDIDATE}, + {"raster_discard", IntrinsicOp::RASTER_DISCARD}, + {"raster_ddx", IntrinsicOp::RASTER_DDX}, + {"raster_ddy", IntrinsicOp::RASTER_DDY}, + {"warp_is_first_active_lane", IntrinsicOp::WARP_IS_FIRST_ACTIVE_LANE}, + {"warp_first_active_lane", IntrinsicOp::WARP_FIRST_ACTIVE_LANE}, + {"warp_active_all_equal", IntrinsicOp::WARP_ACTIVE_ALL_EQUAL}, + {"warp_active_bit_and", IntrinsicOp::WARP_ACTIVE_BIT_AND}, + {"warp_active_bit_or", IntrinsicOp::WARP_ACTIVE_BIT_OR}, + {"warp_active_bit_xor", IntrinsicOp::WARP_ACTIVE_BIT_XOR}, + {"warp_active_count_bits", IntrinsicOp::WARP_ACTIVE_COUNT_BITS}, + {"warp_active_max", IntrinsicOp::WARP_ACTIVE_MAX}, + {"warp_active_min", IntrinsicOp::WARP_ACTIVE_MIN}, + {"warp_active_product", IntrinsicOp::WARP_ACTIVE_PRODUCT}, + {"warp_active_sum", IntrinsicOp::WARP_ACTIVE_SUM}, + {"warp_active_all", IntrinsicOp::WARP_ACTIVE_ALL}, + {"warp_active_any", IntrinsicOp::WARP_ACTIVE_ANY}, + {"warp_active_bit_mask", IntrinsicOp::WARP_ACTIVE_BIT_MASK}, + {"warp_prefix_count_bits", IntrinsicOp::WARP_PREFIX_COUNT_BITS}, + {"warp_prefix_sum", IntrinsicOp::WARP_PREFIX_SUM}, + {"warp_prefix_product", IntrinsicOp::WARP_PREFIX_PRODUCT}, + {"warp_read_lane", IntrinsicOp::WARP_READ_LANE}, + {"warp_read_first_active_lane", IntrinsicOp::WARP_READ_FIRST_ACTIVE_LANE}, + {"indirect_dispatch_set_kernel", IntrinsicOp::INDIRECT_DISPATCH_SET_KERNEL}, + {"indirect_dispatch_set_count", IntrinsicOp::INDIRECT_DISPATCH_SET_COUNT}, + {"shader_execution_reorder", IntrinsicOp::SHADER_EXECUTION_REORDER}, + {"clock", IntrinsicOp::CLOCK}, + }; + auto iter = m.find(name); + LUISA_ASSERT(iter != m.end(), "Unknown intrinsic operation: {}.", name); + return iter->second; +} diff --git a/src/xir/special_register.cpp b/src/xir/special_register.cpp new file mode 100644 index 000000000..8b7db3285 --- /dev/null +++ b/src/xir/special_register.cpp @@ -0,0 +1,27 @@ +#include +#include +#include + +namespace luisa::compute::xir { + +namespace detail { +const Type *special_register_type_uint() noexcept { return Type::of(); } +const Type *special_register_type_uint3() noexcept { return Type::of(); } +}// namespace detail + +SpecialRegister *SpecialRegister::create(DerivedSpecialRegisterTag tag) noexcept { + switch (tag) { + case DerivedSpecialRegisterTag::THREAD_ID: return Pool::current()->create(); + case DerivedSpecialRegisterTag::BLOCK_ID: return Pool::current()->create(); + case DerivedSpecialRegisterTag::WARP_LANE_ID: return Pool::current()->create(); + case DerivedSpecialRegisterTag::DISPATCH_ID: return Pool::current()->create(); + case DerivedSpecialRegisterTag::KERNEL_ID: return Pool::current()->create(); + case DerivedSpecialRegisterTag::OBJECT_ID: return Pool::current()->create(); + case DerivedSpecialRegisterTag::BLOCK_SIZE: return Pool::current()->create(); + case DerivedSpecialRegisterTag::WARP_SIZE: return Pool::current()->create(); + case DerivedSpecialRegisterTag::DISPATCH_SIZE: return Pool::current()->create(); + } + LUISA_ERROR_WITH_LOCATION("Unexpected special register tag."); +} + +}// namespace luisa::compute::xir diff --git a/src/xir/translators/ast2xir.cpp b/src/xir/translators/ast2xir.cpp index 844104003..0117ef5cd 100644 --- a/src/xir/translators/ast2xir.cpp +++ b/src/xir/translators/ast2xir.cpp @@ -242,39 +242,26 @@ class AST2XIRContext { return _translate_typed_literal(key); } - [[nodiscard]] static Value *_translate_builtin_variable(Builder &b, Variable ast_var) noexcept { + [[nodiscard]] static Value *_translate_builtin_variable(Variable ast_var) noexcept { LUISA_ASSERT(ast_var.is_builtin(), "Unresolved variable reference."); - auto op = [tag = ast_var.tag(), t = ast_var.type()] { + auto r = [tag = ast_var.tag()] { switch (tag) { - case Variable::Tag::THREAD_ID: - LUISA_ASSERT(t == Type::of(), "Invalid thread_id type: {}", t->description()); - return IntrinsicOp::THREAD_ID; - case Variable::Tag::BLOCK_ID: - LUISA_ASSERT(t == Type::of(), "Invalid block_id type: {}.", t->description()); - return IntrinsicOp::BLOCK_ID; - case Variable::Tag::DISPATCH_ID: - LUISA_ASSERT(t == Type::of(), "Invalid dispatch_id type: {}", t->description()); - return IntrinsicOp::DISPATCH_ID; - case Variable::Tag::DISPATCH_SIZE: - LUISA_ASSERT(t == Type::of(), "Invalid dispatch_size type: {}", t->description()); - return IntrinsicOp::DISPATCH_SIZE; - case Variable::Tag::KERNEL_ID: - LUISA_ASSERT(t == Type::of(), "Invalid kernel_id type: {}", t->description()); - return IntrinsicOp::KERNEL_ID; - case Variable::Tag::WARP_LANE_COUNT: - LUISA_ASSERT(t == Type::of(), "Invalid warp_size type: {}", t->description()); - return IntrinsicOp::WARP_SIZE; - case Variable::Tag::WARP_LANE_ID: - LUISA_ASSERT(t == Type::of(), "Invalid warp_lane_id type: {}", t->description()); - return IntrinsicOp::WARP_LANE_ID; - case Variable::Tag::OBJECT_ID: - LUISA_ASSERT(t == Type::of(), "Invalid object_id type: {}", t->description()); - return IntrinsicOp::OBJECT_ID; + case Variable::Tag::THREAD_ID: return SpecialRegister::create(DerivedSpecialRegisterTag::THREAD_ID); + case Variable::Tag::BLOCK_ID: return SpecialRegister::create(DerivedSpecialRegisterTag::BLOCK_ID); + case Variable::Tag::DISPATCH_ID: return SpecialRegister::create(DerivedSpecialRegisterTag::DISPATCH_ID); + case Variable::Tag::DISPATCH_SIZE: return SpecialRegister::create(DerivedSpecialRegisterTag::DISPATCH_SIZE); + case Variable::Tag::KERNEL_ID: return SpecialRegister::create(DerivedSpecialRegisterTag::KERNEL_ID); + case Variable::Tag::WARP_LANE_COUNT: return SpecialRegister::create(DerivedSpecialRegisterTag::WARP_SIZE); + case Variable::Tag::WARP_LANE_ID: return SpecialRegister::create(DerivedSpecialRegisterTag::WARP_LANE_ID); + case Variable::Tag::OBJECT_ID: return SpecialRegister::create(DerivedSpecialRegisterTag::OBJECT_ID); default: break; } LUISA_ERROR_WITH_LOCATION("Unexpected variable type."); }(); - return b.call(ast_var.type(), op, {}); + LUISA_ASSERT(r->type() == ast_var.type(), "Special register {} type mismatch: {} vs {}.", + xir::to_string(r->derived_special_register_tag()), + r->type()->description(), ast_var.type()->description()); + return r; } [[nodiscard]] Value *_translate_ref_expr(Builder &b, const RefExpr *expr, bool load_lval) noexcept { @@ -284,7 +271,7 @@ class AST2XIRContext { auto var = iter->second; return load_lval && var->is_lvalue() ? b.load(expr->type(), var) : var; } - return _translate_builtin_variable(b, ast_var); + return _translate_builtin_variable(ast_var); } [[nodiscard]] Value *_translate_constant_expr(const ConstantExpr *expr) noexcept { @@ -1049,7 +1036,7 @@ class AST2XIRContext { "Local variable already exists."); auto v = _current.variables.emplace(ast_local, b.alloca_local(ast_local.type())).first->second; if (ast_local.is_builtin()) { - auto builtin_init = _translate_builtin_variable(b, ast_local); + auto builtin_init = _translate_builtin_variable(ast_local); LUISA_ASSERT(v->type() == builtin_init->type(), "Variable type mismatch."); b.store(v, builtin_init); } diff --git a/src/xir/translators/xir2text.cpp b/src/xir/translators/xir2text.cpp index a0ae04263..ff1250fe3 100644 --- a/src/xir/translators/xir2text.cpp +++ b/src/xir/translators/xir2text.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,11 @@ class XIR2TextTranslator final { [[nodiscard]] auto _value_ident(const Value *value) noexcept { auto uid = _value_uid(value); + if (value->derived_value_tag() == DerivedValueTag::SPECIAL_REGISTER) { + auto r = static_cast(value); + auto name = xir::to_string(r->derived_special_register_tag()); + return luisa::format("%{}.{}", uid, name); + } return luisa::format("%{}", uid); }