Skip to content
105 changes: 103 additions & 2 deletions onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@
namespace onnxruntime {
namespace coreml {

namespace {

bool IsSupportedFusedConvActivation(const std::string& name) {
return name == "Relu" || name == "Sigmoid" || name == "Tanh" ||
name == "LeakyRelu" || name == "Clip" || name == "HardSigmoid";
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
}

} // namespace

class ConvOpBuilder : public BaseOpBuilder {
void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;

Expand Down Expand Up @@ -92,9 +101,83 @@

AddPadTypeAndPads(*conv_op, model_builder, op_type, helper, num_spatial_dims);

AddOperationOutput(*conv_op, *node.OutputDefs()[0]);
const bool is_fused_conv = node.OpType() == "FusedConv";
if (!is_fused_conv) {
AddOperationOutput(*conv_op, *node.OutputDefs()[0]);
model_builder.AddOperation(std::move(conv_op));
} else {
Comment thread
yuslepukhin marked this conversation as resolved.
// com.microsoft:FusedConv = Conv + activation. Emit conv into an
// intermediate, then the activation MIL op on top. Mirrors how
// ConvActivationFusion was going to compose them on other EPs.
const auto output_elem_type = static_cast<int32_t>(
node.OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type());
std::vector<int64_t> output_shape;
ORT_RETURN_IF_NOT(GetShape(*node.OutputDefs()[0], output_shape, logger),
"Failed to get FusedConv output shape");

const std::string& conv_out_name = model_builder.GetUniqueName(node, "fused_conv_conv_out");
AddIntermediateOperationOutput(*conv_op, conv_out_name, output_elem_type, output_shape);
model_builder.AddOperation(std::move(conv_op));

const std::string activation = helper.Get("activation", std::string(""));
const auto activation_params = helper.Get("activation_params", std::vector<float>{});

std::string_view mil_op;
if (activation == "Relu") {
mil_op = "relu";
} else if (activation == "Sigmoid") {
mil_op = "sigmoid";
} else if (activation == "Tanh") {
mil_op = "tanh";
} else if (activation == "LeakyRelu") {
mil_op = "leaky_relu";
} else if (activation == "Clip") {
mil_op = "clip";
} else if (activation == "HardSigmoid") {
mil_op = "sigmoid_hard";
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"FusedConv has unsupported activation: ", activation);
}

auto act_op = model_builder.CreateOperation(node, mil_op, "activation");
AddOperationInput(*act_op, "x", conv_out_name);

auto add_scalar = [&](std::string_view port_name, float value) {
if (output_elem_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
AddOperationInput(*act_op, std::string(port_name),
model_builder.AddScalarConstant(act_op->type(), std::string(port_name), value));
} else {
AddOperationInput(*act_op, std::string(port_name),
model_builder.AddScalarConstant(act_op->type(), std::string(port_name), MLFloat16(value)));
}
};
Comment thread
yuslepukhin marked this conversation as resolved.

// Activation-specific params. ConvActivationFusion packs them into
// `activation_params` in this order (see conv_activation_fusion.cc:165-184):
// LeakyRelu: [alpha]
// Clip: [min, max]
// HardSigmoid: [alpha, beta]
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
if (activation == "LeakyRelu") {
const float alpha = activation_params.empty() ? 0.01f : activation_params[0];
add_scalar("alpha", alpha);
} else if (activation == "Clip") {
const float min_v = activation_params.size() > 0 ? activation_params[0]
: std::numeric_limits<float>::lowest();
const float max_v = activation_params.size() > 1 ? activation_params[1]
: std::numeric_limits<float>::max();

Check warning on line 168 in onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <limits> for numeric_limits<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc:168: Add #include <limits> for numeric_limits<> [build/include_what_you_use] [4]
add_scalar("alpha", min_v);
add_scalar("beta", max_v);
} else if (activation == "HardSigmoid") {
const float alpha = activation_params.size() > 0 ? activation_params[0] : 0.2f;
const float beta = activation_params.size() > 1 ? activation_params[1] : 0.5f;
add_scalar("alpha", alpha);
add_scalar("beta", beta);
}
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated

model_builder.AddOperation(std::move(conv_op));
AddOperationOutput(*act_op, *node.OutputDefs()[0]);
model_builder.AddOperation(std::move(act_op));
}
} else {
std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);

Expand Down Expand Up @@ -232,6 +315,24 @@
const logging::Logger& logger) const {
const auto& name = node.Name();
const auto& input_defs = node.InputDefs();
const bool is_fused_conv = node.OpType() == "FusedConv";

// FusedConv composes Conv with an activation op in a single node. Only
// implemented for the MLProgram path; fall back to CPU in NeuralNetwork mode
// rather than emitting an unfused Conv and losing the activation.
if (is_fused_conv) {
if (!input_params.create_mlprogram) {
LOGS(logger, VERBOSE) << "FusedConv is only supported in MLProgram format";
return false;
}
NodeAttrHelper fused_helper(node);
const std::string activation = fused_helper.Get("activation", std::string(""));
if (!IsSupportedFusedConvActivation(activation)) {
LOGS(logger, VERBOSE) << "FusedConv activation [" << activation
<< "] is not supported by the CoreML EP";
return false;
}
}

const auto& weight_name = input_defs[1]->Name();
const auto* weight = input_params.graph_viewer.GetConstantInitializer(weight_name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
CreateActivationOpBuilder("Elu", op_registrations);
CreateActivationOpBuilder("HardSigmoid", op_registrations);

// Microsoft-domain ops produced by ORT's own optimizer passes
// Microsoft-domain ops produced by ORT's own optimizer passes.
CreateQuickGeluOpBuilder("QuickGelu", op_registrations);
// FusedConv (from ConvActivationFusion) reuses the existing ConvOpBuilder
// which branches on op_type internally.
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
CreateConvOpBuilder("FusedConv", op_registrations);

// Unary ops
CreateUnaryOpBuilder("Erf", op_registrations);
Expand Down
132 changes: 132 additions & 0 deletions onnxruntime/test/providers/coreml/coreml_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1164,6 +1164,138 @@ TEST(CoreMLExecutionProviderTest, QuickGeluTestFp16) {
#endif
}

namespace {
// Build a single-node com.microsoft:FusedConv model for the tests below.
// Input X is {1, 2, 4, 4}, weight W is {3, 2, 2, 2} (constant initializer, set
// to a simple pattern), no bias. stride=1, pad=0. Output is {1, 3, 3, 3}.
ONNX_NAMESPACE::ModelProto MakeFusedConvModel(const std::string& activation,
const std::vector<float>& activation_params) {
ONNX_NAMESPACE::ModelProto model_proto;
model_proto.set_ir_version(ONNX_NAMESPACE::IR_VERSION);
auto* onnx_opset = model_proto.add_opset_import();
onnx_opset->set_domain("");
onnx_opset->set_version(13);
auto* ms_opset = model_proto.add_opset_import();
ms_opset->set_domain("com.microsoft");
ms_opset->set_version(1);

auto* graph_proto = model_proto.mutable_graph();
graph_proto->set_name("fused_conv_test");

auto add_tensor_value = [&](auto* proto, const char* name, const std::vector<int64_t>& shape) {
proto->set_name(name);
auto* tt = proto->mutable_type()->mutable_tensor_type();
tt->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
for (int64_t d : shape) tt->mutable_shape()->add_dim()->set_dim_value(d);
};
add_tensor_value(graph_proto->add_input(), "X", {1, 2, 4, 4});
add_tensor_value(graph_proto->add_output(), "Y", {1, 3, 3, 3});

// Weight initializer: {3, 2, 2, 2} = 24 floats, deterministic pattern.
auto* w_init = graph_proto->add_initializer();
w_init->set_name("W");
w_init->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
for (int64_t d : {3, 2, 2, 2}) w_init->add_dims(d);
for (int i = 0; i < 3 * 2 * 2 * 2; ++i) {
w_init->add_float_data(static_cast<float>(i) * 0.05f - 0.4f);
}

auto* node = graph_proto->add_node();
node->set_op_type("FusedConv");
node->set_domain("com.microsoft");
node->add_input("X");
node->add_input("W");
node->add_output("Y");

// Set pads explicitly since the CoreML conv builder's VALID-pad branch
// omits the 'pad' input that the MIL op requires. Conv attrs otherwise
// default: strides=[1,1].
auto* pads_attr = node->add_attribute();
pads_attr->set_name("pads");
pads_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INTS);
for (int64_t v : {0, 0, 0, 0}) pads_attr->add_ints(v);

auto* act_attr = node->add_attribute();
act_attr->set_name("activation");
act_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_STRING);
act_attr->set_s(activation);

if (!activation_params.empty()) {
auto* act_params_attr = node->add_attribute();
act_params_attr->set_name("activation_params");
act_params_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_FLOATS);
for (float v : activation_params) act_params_attr->add_floats(v);
}

return model_proto;
}

void RunFusedConvTest(const std::string& activation,
const std::vector<float>& activation_params,
std::string_view log_id) {
auto model_proto = MakeFusedConvModel(activation, activation_params);
std::string model_data;
ASSERT_TRUE(model_proto.SerializeToString(&model_data));
gsl::span<const std::byte> model_span{reinterpret_cast<const std::byte*>(model_data.data()), model_data.size()};

#if defined(__APPLE__)
std::vector<float> x_data(1 * 2 * 4 * 4);
for (size_t i = 0; i < x_data.size(); ++i) x_data[i] = static_cast<float>(i) * 0.1f - 1.5f;
OrtValue ml_value_x;
AllocatorPtr allocator = CPUAllocator::DefaultInstance();
CreateMLValue<float>(allocator, {1, 2, 4, 4}, x_data, &ml_value_x);

NameMLValMap feeds;
feeds.insert(std::make_pair("X", ml_value_x));

RunAndVerifyOutputsWithEP(model_span, std::string(log_id),
MakeCoreMLExecutionProvider("MLProgram"),
feeds,
EPVerificationParams{ExpectedEPNodeAssignment::All});
#else
TestModelLoad(model_span, MakeCoreMLExecutionProvider("MLProgram"), ExpectedEPNodeAssignment::All);
#endif
}
} // namespace

TEST(CoreMLExecutionProviderTest, FusedConvTestRelu) {
// Param-less activation. Exercises the Conv → activation wiring with no
// `activation_params` attribute.
RunFusedConvTest("Relu", {}, "FusedConvTestRelu_MLProgram");
}
Comment thread
yuslepukhin marked this conversation as resolved.

TEST(CoreMLExecutionProviderTest, FusedConvTestHardSigmoid) {
// Two-param activation (alpha, beta) with non-default values — catches any
// activation_params-wiring bug. Depends on the HardSigmoid CoreML builder
// landed in #28182.
RunFusedConvTest("HardSigmoid", {0.15f, 0.55f}, "FusedConvTestHardSigmoid_MLProgram");
}

TEST(CoreMLExecutionProviderTest, FusedConvTestClip) {
// Two-param activation where params map to alpha=min, beta=max in CoreML's
// clip op. Covers the remaining parametric activation.
RunFusedConvTest("Clip", {-0.5f, 0.5f}, "FusedConvTestClip_MLProgram");
}

TEST(CoreMLExecutionProviderTest, FusedConvTestLeakyRelu) {
// Single-param activation (alpha). Heavily used by YOLOv3 — a CPU-optimized
// YOLOv3 graph contains 72 Conv→LeakyRelu fusions, all of which would
// otherwise fall back to CPU and fragment the CoreML partition.
RunFusedConvTest("LeakyRelu", {0.1f}, "FusedConvTestLeakyRelu_MLProgram");
}

TEST(CoreMLExecutionProviderTest, FusedConvTestSigmoid) {
// Param-less Sigmoid activation. Distinct from the Relu test only in the
// emitted MIL op (`sigmoid` vs `relu`); guards against regressions in
// op-name dispatch.
RunFusedConvTest("Sigmoid", {}, "FusedConvTestSigmoid_MLProgram");
}

TEST(CoreMLExecutionProviderTest, FusedConvTestTanh) {
// Param-less Tanh activation; same rationale as the Sigmoid test for the
// remaining elementwise activation.
RunFusedConvTest("Tanh", {}, "FusedConvTestTanh_MLProgram");
}
#endif // !(ORT_MINIMAL_BUILD)
} // namespace test
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution
|ai.onnx:Transpose||
|ai.onnx:Unsqueeze||
|com.microsoft:QuickGelu|Produced by ORT's `QuickGeluFusion` optimizer pass. Decomposed into `mul` / `sigmoid` / `mul`.|
|com.microsoft:FusedConv|Produced by ORT's `ConvActivationFusion` pass. Decomposed into `conv` + the fused activation (`Relu`, `Sigmoid`, `Tanh`, `LeakyRelu`, `Clip`, `HardSigmoid`).|
Loading