diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc index 70002b6295f5a..2817f34bc64f2 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc @@ -13,14 +13,6 @@ using namespace CoreML::Specification; namespace onnxruntime { namespace coreml { -// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to -// filter suppported ones. -static std::set Float16Ops = { - "Add", "ArgMax", "AveragePool", "BatchNormalization", "Cast", "Clip", "Concat", "Conv", "ConvTranspose", - "DepthToSpace", "Div", "Gelu", "Gemm", "GlobalAveragePool", "GlobalMaxPool", "GridSample", "GroupNormalization", - "InstanceNormalization", "LayerNormalization", "LeakyRelu", "MatMul", "MaxPool", "Mul", "PRelu", "Pow", - "Reciprocal", "Relu", "Reshape", "Resize", "Sigmoid", "Slice", "Split", "Sqrt", "Sub", "Tanh", "Transpose"}; - namespace { // TODO, move this to shared_library bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node, @@ -64,20 +56,27 @@ bool BaseOpBuilder::IsOpSupported(const Node& node, const OpBuilderInputParams& } if (!HasSupportedOpSet(node, logger)) { + LOGS(logger, VERBOSE) << "Operator [" << node.OpType() << "] does not support this opset"; return false; } if (!HasSupportedInputs(node, input_params, logger)) { + LOGS(logger, VERBOSE) << "Operator [" << node.OpType() << "] has unsupported inputs"; return false; } // We do not support external initializers for now const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors(); if (HasExternalInitializer(initializers, node, logger)) { + LOGS(logger, VERBOSE) << "Operator [" << node.OpType() << "] has external initializers"; return false; } - return IsOpSupportedImpl(node, input_params, logger); + if (!IsOpSupportedImpl(node, input_params, logger)) { + LOGS(logger, VERBOSE) << "Operator [" << node.OpType() << "] is not supported by the impl"; + return false; + } + return true; } bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputParams& input_params, @@ -114,13 +113,10 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx, return true; } -// only support MLProgram for FP16 -#if defined(COREML_ENABLE_MLPROGRAM) - if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 && - Float16Ops.count(node.OpType())) { + // only MLProgram support FP16 + if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { return true; } -#endif LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported"; return false; diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc index 8aa2dbae2531c..0482620b269a4 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc @@ -6,6 +6,7 @@ #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" #include "core/providers/coreml/builders/impl/builder_utils.h" +#include "core/providers/coreml/shape_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/shared/utils/utils.h" @@ -55,6 +56,64 @@ bool CheckIfBothInputShapesMatch(const Node& node, const logging::Logger& logger } } // namespace +#if defined(COREML_ENABLE_MLPROGRAM) +static std::vector InferOutputShape(const std::vector& a, const std::vector& b) { + std::vector output_shape; + int64_t i_a = 0, j_b = 0; + if (a.size() >= b.size()) { + output_shape = a; + j_b -= a.size() - b.size(); + } else { + output_shape = b; + i_a -= b.size() - a.size(); + } + + for (size_t i = 0; i < output_shape.size(); i++, i_a++, j_b++) { + const int64_t a_dim = (i_a >= 0) ? a[i_a] : 1; + const int64_t b_dim = (j_b >= 0) ? b[j_b] : 1; + if (a_dim == -1 || b_dim == -1) { + output_shape[i] = -1; + } else { + output_shape[i] = std::max(a_dim, b_dim); + } + } + return output_shape; +} + +// Add variadic inputs to the model builder +// in onnx spec, some node allows variadic inputs, such as max(x, y, z, ...) +// while in coreml, maximum op only allows two inputs maximum(x, y) +// the conversion is doing the following: +// max(x, y, z, ...) -> max(max(x, y), z, ...) +static void AddVariadicInputs(std::unique_ptr* op, + ModelBuilder& model_builder, + const Node& node, + const logging::Logger& logger) { + using namespace CoreML::Specification::MILSpec; + const auto& input_defs(node.InputDefs()); + std::string_view layer_input_name_x = model_builder.GetUniqueName(node, "variadic"); + auto input_dtype = input_defs[0]->TypeAsProto()->tensor_type().elem_type(); + const int32_t elem_type = static_cast(input_dtype); + std::vector x0_shape, x1_shape; + GetShape(*input_defs[0], x0_shape, logger); + GetShape(*input_defs[1], x1_shape, logger); + x0_shape = InferOutputShape(x0_shape, x1_shape); + std::unique_ptr op_prev = std::move(*op); + for (size_t i = 2; i < input_defs.size(); i++) { + AddIntermediateOperationOutput(*op_prev, layer_input_name_x, elem_type, x0_shape); + std::unique_ptr op_cur = model_builder.CreateOperation(node, op_prev->type()); + AddOperationInput(*op_cur, "x", layer_input_name_x); + AddOperationInput(*op_cur, "y", input_defs[i]->Name()); + model_builder.AddOperation(std::move(op_prev)); + op_prev = std::move(op_cur); + layer_input_name_x = model_builder.GetUniqueName(node, "variadic"); + GetShape(*input_defs[i], x1_shape, logger); + x0_shape = InferOutputShape(x0_shape, x1_shape); + } + *op = std::move(op_prev); +} +#endif + Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const { const auto& op_type(node.OpType()); @@ -70,6 +129,8 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const coreml_op_type = "add"; } else if (op_type == "Mul") { coreml_op_type = "mul"; + } else if (op_type == "Max") { + coreml_op_type = "maximum"; } else if (op_type == "Sub") { coreml_op_type = "sub"; } else if (op_type == "Div") { @@ -86,8 +147,11 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); AddOperationInput(*op, "x", input_defs[0]->Name()); AddOperationInput(*op, "y", input_defs[1]->Name()); + if (input_defs.size() > 2) { + // "max" node may have variadic inputs + AddVariadicInputs(&op, model_builder, node, logger); + } AddOperationOutput(*op, *node.OutputDefs()[0]); - model_builder.AddOperation(std::move(op)); } else #endif // defined (COREML_ENABLE_MLPROGRAM) @@ -157,6 +221,10 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn return false; } + if (node.OpType() == "Max" && !input_params.create_mlprogram) { + return false; + } + return true; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc index bc9e2f10296ed..f7046c213a8cb 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc @@ -98,26 +98,24 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const bool min_max_attribs = node.SinceVersion() < 11; std::string_view min_name; if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - min_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "min", min) - : node.InputDefs()[1]->Name(); + min_name = (min_max_attribs || !has_min) ? model_builder.AddScalarConstant(clip_op.type(), "min", min) + : node.InputDefs()[1]->Name(); } else { - min_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "min", MLFloat16(min)) - : node.InputDefs()[1]->Name(); + min_name = (min_max_attribs || !has_min) ? model_builder.AddScalarConstant(clip_op.type(), "min", MLFloat16(min)) + : node.InputDefs()[1]->Name(); } AddOperationInput(clip_op, "alpha", min_name); - if (has_max) { - std::string_view max_name; - if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - max_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "max", max) - : node.InputDefs()[2]->Name(); - } else { - max_name = min_max_attribs ? model_builder.AddScalarConstant(clip_op.type(), "max", MLFloat16(max)) - : node.InputDefs()[2]->Name(); - } - AddOperationInput(clip_op, "beta", max_name); + std::string_view max_name; + if (input_dtype == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + max_name = (min_max_attribs || !has_max) ? model_builder.AddScalarConstant(clip_op.type(), "max", max) + : node.InputDefs()[2]->Name(); + } else { + max_name = (min_max_attribs || !has_max) ? model_builder.AddScalarConstant(clip_op.type(), "max", MLFloat16(max)) + : node.InputDefs()[2]->Name(); } + AddOperationInput(clip_op, "beta", max_name); } } @@ -200,7 +198,9 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, bool ClipOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { float min, max; - return GetClipMinMax(input_params.graph_viewer, node, min, max, logger); + bool ret = GetClipMinMax(input_params.graph_viewer, node, min, max, logger); + // what does it mean if min == max? + return ret && (min != max); } void CreateClipOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { diff --git a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc index 5651b9cc5793e..f161b309a2425 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc @@ -5,10 +5,15 @@ #include "core/providers/common.h" #include "core/providers/coreml/builders/helper.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/shared/utils/utils.h" +#ifdef __APPLE__ +#include +#endif + namespace onnxruntime { namespace coreml { @@ -20,6 +25,7 @@ class ReductionOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + bool SupportsMLProgram() const override { return true; } }; namespace { @@ -48,13 +54,12 @@ Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co const logging::Logger& /* logger */) const { const auto& op_type(node.OpType()); const auto& input_defs(node.InputDefs()); - const auto& initializers(model_builder.GetInitializerTensors()); std::vector axes; NodeAttrHelper helper(node); if (input_defs.size() > 1 && input_defs[1]->Exists()) { - auto& axes_tensor = *initializers.at(input_defs[1]->Name()); + auto& axes_tensor = *model_builder.GetConstantInitializer(input_defs[1]->Name()); Initializer axes_initializer(axes_tensor); int64_t* data = axes_initializer.data(); int64_t size = axes_initializer.size(); @@ -66,28 +71,77 @@ Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co const bool keepdims = helper.Get("keepdims", 1) != 0; const bool noop_with_empty_axes = helper.Get("noop_with_empty_axes", 0) != 0; +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + std::string_view coreml_op_type; + if (noop_with_empty_axes && axes.size() == 0) { + coreml_op_type = "identity"; + } else if (op_type == "ReduceSum") { + coreml_op_type = "reduce_sum"; + } else if (op_type == "ReduceMean") { + coreml_op_type = "reduce_mean"; + } else if (op_type == "ReduceMax") { + coreml_op_type = "reduce_max"; + } else if (op_type == "ReduceMin") { + coreml_op_type = "reduce_min"; + } else if (op_type == "ReduceProd") { + coreml_op_type = "reduce_prod"; + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "ReductionOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type); + } + std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); + AddOperationInput(*op, "x", input_defs[0]->Name()); + if (coreml_op_type != "identity") { + if (axes.size() > 0) { + AddOperationInput(*op, "axes", model_builder.AddConstant(op->type(), "axes", axes)); + } + AddOperationInput(*op, "keep_dims", model_builder.AddScalarConstant(op->type(), "keep_dims", keepdims)); + } + AddOperationOutput(*op, *node.OutputDefs()[0]); + + model_builder.AddOperation(std::move(op)); + } else +#endif // (COREML_ENABLE_MLPROGRAM) + { + std::unique_ptr layer = model_builder.CreateNNLayer(node); + + if (op_type == "ReduceSum") { + AddReductionParams(layer->mutable_reducesum(), axes, keepdims, noop_with_empty_axes); + } else if (op_type == "ReduceMean") { + AddReductionParams(layer->mutable_reducemean(), axes, keepdims, noop_with_empty_axes); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "ReductionOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); + } - std::unique_ptr layer = model_builder.CreateNNLayer(node); + *layer->mutable_input()->Add() = node.InputDefs()[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); - if (op_type == "ReduceSum") { - AddReductionParams(layer->mutable_reducesum(), axes, keepdims, noop_with_empty_axes); - } else if (op_type == "ReduceMean") { - AddReductionParams(layer->mutable_reducemean(), axes, keepdims, noop_with_empty_axes); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "ReductionOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type); + model_builder.AddLayer(std::move(layer)); } - - *layer->mutable_input()->Add() = node.InputDefs()[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); - - model_builder.AddLayer(std::move(layer)); return Status::OK(); } bool ReductionOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); + if (!input_params.create_mlprogram && + (node.OpType() == "ReduceMax" || node.OpType() == "ReduceMin" || node.OpType() == "ReduceProd")) { + return false; + } + +#if defined(TARGET_OS_IOS) && defined(TARGET_CPU_X86_64) + // to pass https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1563483&view=logs&j=f7cc61a9-cc70-56e7-b06c-4668ca17e426 + // ReductionOpTest.ReduceSum_half_bert + int32_t input_type; + GetType(*input_defs[0], input_type, logger); + if (node.OpType() == "ReduceSum" && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { + return false; + } +#endif NodeAttrHelper helper(node); @@ -99,18 +153,16 @@ bool ReductionOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInpu if (input_defs.size() > 1 && input_defs[1]->Exists()) { // 'axes' is optional input in new opsets const auto& axes_name = input_defs[1]->Name(); - const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors(); - if (!Contains(initializers, axes_name)) { + const auto* axes = input_params.graph_viewer.GetConstantInitializer(axes_name); + if (!axes) { LOGS(logger, VERBOSE) << "Axes of reduction must be a constant initializer"; return false; } - empty_axes = initializers.at(axes_name)->int64_data_size() == 0; + empty_axes = axes->int64_data_size() == 0; } - - if (empty_axes && noop_with_empty_axes) { - // TODO: When we add ML Program support we should enable this as it makes the node an Identity op - LOGS(logger, VERBOSE) << "CoreML doesn't support noop on empty axes for reduction layers" << std::endl; + if (empty_axes && noop_with_empty_axes && !input_params.create_mlprogram) { + LOGS(logger, VERBOSE) << "NeuralNetwork doesn't support noop on empty axes for reduction layers"; return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc index a86e3d9538d87..243f949bdd48e 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc @@ -2,7 +2,9 @@ // Licensed under the MIT License. #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" +#include "core/providers/coreml/shape_utils.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/shared/utils/utils.h" // for NodeAttrHelper @@ -14,28 +16,132 @@ class ShapeOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params, + const logging::Logger& logger) const override; + bool SupportsMLProgram() const override { return true; } }; Status ShapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& /*logger*/) const { - auto layer = model_builder.CreateNNLayer(node); - layer->mutable_getshape(); - *layer->mutable_input()->Add() = node.InputDefs()[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); - model_builder.AddLayer(std::move(layer)); + const auto& input_defs = node.InputDefs(); + +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + NodeAttrHelper node_attr_helper{node}; + int64_t size = -1; + int64_t num_dims = 0; + int64_t start = node_attr_helper.Get("start", 0); + // If the input shape is not available, size is -1 and start is 0 + if (input_defs[0]->Shape()) { + num_dims = input_defs[0]->Shape()->dim_size(); + start = HandleNegativeAxis(start, num_dims); + if (node_attr_helper.HasAttr("end")) { + int64_t end = HandleNegativeAxis(node_attr_helper.Get("end", -1), num_dims); + size = end - start; + } + } + + int32_t output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32; + std::unique_ptr op = model_builder.CreateOperation(node, "shape"); + AddOperationInput(*op, "x", input_defs[0]->Name()); + if (size != -1 || start != 0) { + std::string_view layer_input_name_x = model_builder.GetUniqueName(node, "slice_by_size"); + std::vector x0_shape{num_dims}; + AddIntermediateOperationOutput(*op, layer_input_name_x, output_datatype, x0_shape); + model_builder.AddOperation(std::move(op)); + + auto slice_op = model_builder.CreateOperation(node, "slice_by_size"); + AddOperationInput(*slice_op, "x", layer_input_name_x); + std::vector starts = {start}; + std::vector sizes = {size}; + AddOperationInput(*slice_op, "begin", model_builder.AddConstant(slice_op->type(), "begin", starts)); + AddOperationInput(*slice_op, "size", model_builder.AddConstant(slice_op->type(), "size", sizes)); + AddOperationOutput(*slice_op, *node.OutputDefs()[0], output_datatype); + model_builder.AddOperation(std::move(slice_op)); + } else { + AddOperationOutput(*op, *node.OutputDefs()[0], output_datatype); + model_builder.AddOperation(std::move(op)); + } + } else // NOLINT +#endif + { + auto layer = model_builder.CreateNNLayer(node); + layer->mutable_getshape(); + *layer->mutable_input()->Add() = input_defs[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + model_builder.AddLayer(std::move(layer)); + } return Status::OK(); } -bool ShapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/, +bool ShapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const { + const auto* tensor_shape = node.InputDefs()[0]->Shape(); + NodeAttrHelper node_attr_helper{node}; - if (node_attr_helper.Get("start", 0) != 0) { - LOGS(logger, VERBOSE) << "Shape does not support 'start' attribute with value other than 0"; + if (!input_params.create_mlprogram) { + if (node_attr_helper.HasAttr("end")) { + LOGS(logger, VERBOSE) << "Shape does not support 'end' attribute"; + return false; + } + + if (node_attr_helper.Get("start", 0) != 0) { + LOGS(logger, VERBOSE) << "Shape does not support 'start' attribute with value other than 0"; + return false; + } + } else { + int64_t end = node_attr_helper.HasAttr("end") + ? node_attr_helper.Get("end", -1) + : std::numeric_limits::max(); + int64_t start = node_attr_helper.Get("start", 0); + // no need to slice if start is 0 and end is max + if (end == std::numeric_limits::max() && start == 0) { + } else if (tensor_shape == nullptr) { + LOGS(logger, VERBOSE) << "Shape does not support slicing when tensor_shape is not available"; + return false; + } + int64_t dim_size = tensor_shape->dim_size(); + int64_t size = node_attr_helper.HasAttr("end") + ? HandleNegativeAxis(node_attr_helper.Get("end", -1), dim_size) + : dim_size; + start = HandleNegativeAxis(start, dim_size); + size = size - start; + if (size == 0) { + LOGS(logger, VERBOSE) << "Shape does not support slicing when size is 0"; + return false; + } + } + + return true; +} + +bool ShapeOpBuilder::HasSupportedInputsImpl(const Node& node, + [[maybe_unused]] const OpBuilderInputParams& input_params, + const logging::Logger& logger) const { + // We only check the type of input 0 + const auto& input = *node.InputDefs()[0]; + + int32_t input_type; + if (!GetType(input, input_type, logger)) { return false; } - if (node_attr_helper.HasAttr("end")) { - LOGS(logger, VERBOSE) << "Shape does not support 'end' attribute"; + if (input_params.create_mlprogram) { + if ((input_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 || + input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT || + input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16)) { + return true; + } else { + LOGS(logger, VERBOSE) << "[" << node.OpType() + << "] Input type: [" << input_type + << "] is not supported."; + return false; + } + } else if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + LOGS(logger, VERBOSE) << "[" << node.OpType() + << "] Input type: [" << input_type + << "] is not supported."; return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc index d6584124c6aba..c6e331feed326 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc @@ -4,6 +4,7 @@ #include "core/framework/tensorprotoutils.h" #include "core/providers/common.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" #include "core/providers/coreml/shape_utils.h" @@ -18,6 +19,7 @@ class SoftmaxOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + bool SupportsMLProgram() const override { return true; } }; Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, @@ -33,55 +35,100 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, NodeAttrHelper helper(node); int32_t axis_default_value = (node.SinceVersion() < 13) ? 1 : -1; const auto axis = helper.Get("axis", axis_default_value); - const auto axis_nonnegative = HandleNegativeAxis(axis, data_shape.size()); - - if (node.SinceVersion() >= 13 || (data_shape.size() == 2)) { - auto* coreml_softmaxnd = layer->mutable_softmaxnd(); - coreml_softmaxnd->set_axis(axis); - *layer->mutable_input()->Add() = input_name; - *layer->mutable_output()->Add() = output_name; - model_builder.AddLayer(std::move(layer)); - } else { - // note: if opsets < 13, onnx Softmax coerces the input shape to be 2D based on axis. - // we need to manually reshape to 2D and apply SoftmaxND to axis -1 to achieve equivalent results for CoreML. - TensorShape input_shape(data_shape); - const auto size_to_dimension = input_shape.SizeToDimension(axis_nonnegative); - const auto size_from_dimension = input_shape.SizeFromDimension(axis_nonnegative); - - TensorShapeVector target_shape; - target_shape.push_back(size_to_dimension); - target_shape.push_back(size_from_dimension); - - const auto reshape1_output_name = model_builder.GetUniqueName(node, "reshape1_output"); - { // Add reshape layer - auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape1"); - *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()}; - *reshape_layer->mutable_input()->Add() = input_name; - *reshape_layer->mutable_output()->Add() = reshape1_output_name; - model_builder.AddLayer(std::move(reshape_layer)); + auto axis_nonnegative = HandleNegativeAxis(axis, data_shape.size()); + +#if defined(COREML_ENABLE_MLPROGRAM) + // CoreML's softmax match onnx's softmax behavior since opset 13. + // For opset < 13, we need to reshape to 2D and set axis to -1 to simulate onnx softmax behavior. + // [B,D,...](onnx softmax opset 12, axis=1)->[B,D*...](CoreML softmax, axis=-1)->[B,D,...](reshape back) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + auto input_dtype = node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + const int32_t elem_type = static_cast(input_dtype); + + std::string_view layer_input_name_x = node.InputDefs()[0]->Name(); + const bool need_reshape = node.SinceVersion() < 13 && axis_nonnegative != static_cast(data_shape.size()) - 1; + std::vector target_shape; + if (need_reshape) { + // reshape to 2D to simulate onnx softmax behavior + auto reshape1 = model_builder.CreateOperation(node, "reshape", "pre"); + TensorShape input_shape(data_shape); + target_shape.push_back(input_shape.SizeToDimension(axis_nonnegative)); + target_shape.push_back(input_shape.SizeFromDimension(axis_nonnegative)); + axis_nonnegative = 1; + AddOperationInput(*reshape1, "x", layer_input_name_x); + AddOperationInput(*reshape1, "shape", model_builder.AddConstant(reshape1->type(), "shape1", target_shape)); + layer_input_name_x = model_builder.GetUniqueName(node, "ln_reshape1_"); + AddIntermediateOperationOutput(*reshape1, layer_input_name_x, elem_type, target_shape); + model_builder.AddOperation(std::move(reshape1)); } - const auto softmax_output_name = model_builder.GetUniqueName(node, "softmax_output"); - { + std::unique_ptr op = model_builder.CreateOperation(node, "softmax"); + AddOperationInput(*op, "x", layer_input_name_x); + AddOperationInput(*op, "axis", model_builder.AddScalarConstant(op->type(), "axis", axis_nonnegative)); + if (!need_reshape) { + AddOperationOutput(*op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(op)); + } else { + std::string_view ln_output_name = model_builder.GetUniqueName(node, "ln_reshape1_"); + AddIntermediateOperationOutput(*op, ln_output_name, elem_type, target_shape); + model_builder.AddOperation(std::move(op)); + auto reshape2 = model_builder.CreateOperation(node, "reshape", "post"); + AddOperationInput(*reshape2, "x", ln_output_name); + AddOperationInput(*reshape2, "shape", model_builder.AddConstant(reshape2->type(), "shape2", data_shape)); + AddOperationOutput(*reshape2, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(reshape2)); + } + } else // NOLINT +#endif + { + if (node.SinceVersion() >= 13 || (data_shape.size() == 2)) { auto* coreml_softmaxnd = layer->mutable_softmaxnd(); - coreml_softmaxnd->set_axis(-1); - *layer->mutable_input()->Add() = reshape1_output_name; - *layer->mutable_output()->Add() = softmax_output_name; + coreml_softmaxnd->set_axis(axis); + *layer->mutable_input()->Add() = input_name; + *layer->mutable_output()->Add() = output_name; model_builder.AddLayer(std::move(layer)); - } - { - // Add reshape back layer - auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape2"); - *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {data_shape.cbegin(), data_shape.cend()}; - *reshape_layer->mutable_input()->Add() = softmax_output_name; - *reshape_layer->mutable_output()->Add() = output_name; - model_builder.AddLayer(std::move(reshape_layer)); + } else { + // note: if opsets < 13, onnx Softmax coerces the input shape to be 2D based on axis. + // we need to manually reshape to 2D and apply SoftmaxND to axis -1 to achieve equivalent results for CoreML. + TensorShape input_shape(data_shape); + const auto size_to_dimension = input_shape.SizeToDimension(axis_nonnegative); + const auto size_from_dimension = input_shape.SizeFromDimension(axis_nonnegative); + + TensorShapeVector target_shape; + target_shape.push_back(size_to_dimension); + target_shape.push_back(size_from_dimension); + + const auto reshape1_output_name = model_builder.GetUniqueName(node, "reshape1_output"); + { // Add reshape layer + auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape1"); + *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()}; + *reshape_layer->mutable_input()->Add() = input_name; + *reshape_layer->mutable_output()->Add() = reshape1_output_name; + model_builder.AddLayer(std::move(reshape_layer)); + } + const auto softmax_output_name = model_builder.GetUniqueName(node, "softmax_output"); + { + auto* coreml_softmaxnd = layer->mutable_softmaxnd(); + coreml_softmaxnd->set_axis(-1); + *layer->mutable_input()->Add() = reshape1_output_name; + *layer->mutable_output()->Add() = softmax_output_name; + model_builder.AddLayer(std::move(layer)); + } + { + // Add reshape back layer + auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape2"); + *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {data_shape.cbegin(), data_shape.cend()}; + *reshape_layer->mutable_input()->Add() = softmax_output_name; + *reshape_layer->mutable_output()->Add() = output_name; + model_builder.AddLayer(std::move(reshape_layer)); + } } } return Status::OK(); } -bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */, +bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); std::vector input_shape; diff --git a/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc index dbd0f48576f8b..6372f3136123b 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc @@ -51,8 +51,8 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, auto calculate_remainder_and_chunk_size = [&](int32_t num_outputs) { // note: checked in IsOpSupportedImpl that ensures the dim value at splitting axis exists auto split_dim_size = data_shape[HandleNegativeAxis(axis, data_shape.size())]; - uint64_t chunk_size = (split_dim_size + num_outputs - 1) / num_outputs; - uint64_t remainder = split_dim_size % chunk_size; + int64_t chunk_size = (split_dim_size + num_outputs - 1) / num_outputs; + int64_t remainder = split_dim_size % chunk_size; return std::make_tuple(remainder, chunk_size); }; @@ -106,20 +106,20 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, // if "split" is explicitly provided as an input // const auto& split_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name()); Initializer unpacked_tensor(*model_builder.GetConstantInitializer(input_defs[1]->Name())); - auto split_span = unpacked_tensor.DataAsSpan(); + auto split_span = unpacked_tensor.DataAsSpan(); for (const auto& split_size : split_span) { coreml_splitnd->add_splitsizes(split_size); } } else if (node.SinceVersion() < 18) { - uint64_t num_outputs = narrow(node.OutputDefs().size()); + int64_t num_outputs = narrow(node.OutputDefs().size()); coreml_splitnd->set_numsplits(num_outputs); } else { // note: for opset 18+ 'num_outputs' is a required attribute - uint64_t num_outputs = narrow(helper.GetInt64("num_outputs").value()); + int64_t num_outputs = narrow(helper.GetInt64("num_outputs").value()); auto [remainder, chunk_size] = calculate_remainder_and_chunk_size(static_cast(num_outputs)); if (remainder) { // uneven - auto split_sizes = InlinedVector(num_outputs, chunk_size); + auto split_sizes = InlinedVector(num_outputs, chunk_size); split_sizes.back() = remainder; for (size_t i = 0; i < split_sizes.size(); i++) { coreml_splitnd->add_splitsizes(split_sizes[i]); @@ -162,7 +162,7 @@ bool SplitOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPar } const auto split_shape = *input_defs[1]->Shape(); - if (split_shape.dim_size() < 2) { + if (split_shape.dim(0).dim_value() < 2) { LOGS(logger, VERBOSE) << "CoreML Split must produce at least 2 outputs."; return false; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc index e9cc1c2dbf638..c8df7c1a43f65 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc @@ -5,10 +5,13 @@ #include "core/framework/tensorprotoutils.h" #include "core/providers/common.h" #include "core/providers/coreml/builders/impl/base_op_builder.h" +#include "core/providers/coreml/builders/impl/builder_utils.h" #include "core/providers/coreml/builders/model_builder.h" #include "core/providers/coreml/builders/op_builder_factory.h" +#include "core/providers/coreml/shape_utils.h" #include "core/providers/shared/utils/utils.h" #include "core/optimizer/initializer.h" +#include "core/providers/cpu/tensor/unsqueeze.h" namespace onnxruntime { namespace coreml { @@ -21,16 +24,16 @@ class SqueezeOpBuilder : public BaseOpBuilder { bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) const override; + bool SupportsMLProgram() const override { return true; } }; namespace { -Status GetAxes(ModelBuilder& model_builder, const Node& node, std::vector& axes) { +void GetAxes(ModelBuilder& model_builder, const Node& node, TensorShapeVector& axes) { // Squeeze opset 13 use input as axes if (node.SinceVersion() > 12) { // If axes is not provided, return an empty axes as default to squeeze all if (node.InputDefs().size() > 1) { - const auto& initializers(model_builder.GetInitializerTensors()); - const auto& axes_tensor = *initializers.at(node.InputDefs()[1]->Name()); + const auto& axes_tensor = *model_builder.GetConstantInitializer(node.InputDefs()[1]->Name()); Initializer unpacked_tensor(axes_tensor); auto raw_axes = unpacked_tensor.DataAsSpan(); const auto size = SafeInt(axes_tensor.dims()[0]); @@ -39,10 +42,9 @@ Status GetAxes(ModelBuilder& model_builder, const Node& node, std::vector()); + auto axes_attr = helper.Get("axes", std::vector()); + axes.assign(axes_attr.begin(), axes_attr.end()); } - - return Status::OK(); } } // namespace @@ -54,38 +56,83 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, - const logging::Logger& /* logger */) const { + [[maybe_unused]] const logging::Logger& logger) const { std::unique_ptr layer = model_builder.CreateNNLayer(node); - + const auto& input_defs(node.InputDefs()); auto* coreml_squeeze = layer->mutable_squeeze(); - std::vector axes; - ORT_RETURN_IF_ERROR(GetAxes(model_builder, node, axes)); - if (axes.empty()) { - coreml_squeeze->set_squeezeall(true); - } else { - *coreml_squeeze->mutable_axes() = {axes.cbegin(), axes.cend()}; - coreml_squeeze->set_squeezeall(false); - } + TensorShapeVector axes; + GetAxes(model_builder, node, axes); + std::vector input_shape; + GetShape(*input_defs[0], input_shape, logger); +#if defined(COREML_ENABLE_MLPROGRAM) + if (model_builder.CreateMLProgram()) { + using namespace CoreML::Specification::MILSpec; + + std::string_view coreml_op_type = node.OpType() == "Squeeze" ? "squeeze" : "reshape"; + std::unique_ptr op = model_builder.CreateOperation(node, coreml_op_type); + AddOperationInput(*op, "x", input_defs[0]->Name()); + + if (coreml_op_type == "squeeze") { + if (!axes.empty()) { + // coreml squeeze op does support negative axes + AddOperationInput(*op, "axes", model_builder.AddConstant(op->type(), "axes", AsSpan(axes))); + } + } else { + TensorShapeVector output_shape = UnsqueezeBase::ComputeOutputShape(TensorShape(input_shape), axes); + AddOperationInput(*op, "shape", model_builder.AddConstant(op->type(), "shape", AsSpan(output_shape))); + } + AddOperationOutput(*op, *node.OutputDefs()[0]); + model_builder.AddOperation(std::move(op)); + } else // NOLINT +#endif + { + if (axes.empty()) { + coreml_squeeze->set_squeezeall(true); + } else { + *coreml_squeeze->mutable_axes() = {axes.cbegin(), axes.cend()}; + coreml_squeeze->set_squeezeall(false); + } - *layer->mutable_input()->Add() = node.InputDefs()[0]->Name(); - *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); + *layer->mutable_input()->Add() = node.InputDefs()[0]->Name(); + *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name(); - model_builder.AddLayer(std::move(layer)); + model_builder.AddLayer(std::move(layer)); + } return Status::OK(); } bool SqueezeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, - const logging::Logger& /*logger*/) const { + const logging::Logger& logger) const { // Squeeze opset 13 uses input 1 as axes, if we have input 1 then it needs to be an initializer - const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors(); - if (node.SinceVersion() > 12 && node.InputDefs().size() > 1) { - const auto& axes_name = node.InputDefs()[1]->Name(); - if (!Contains(initializers, axes_name)) { - LOGS_DEFAULT(VERBOSE) << "Input axes of Squeeze must be known"; + const auto& input_defs = node.InputDefs(); + if (node.SinceVersion() > 12 && input_defs.size() > 1) { + const auto& axes_name = input_defs[1]->Name(); + if (!input_params.graph_viewer.GetConstantInitializer(axes_name)) { + LOGS(logger, VERBOSE) << "Input axes must be known"; return false; } } + if (node.OpType() == "Unsqueeze") { + if (!input_params.create_mlprogram) { + return false; + } + + int64_t num_of_new_dims = 0; + if (node.SinceVersion() > 12) { + num_of_new_dims = node.InputDefs()[1]->Shape()->dim(0).dim_value(); + } else { + NodeAttrHelper helper(node); + auto axes = helper.Get("axes", std::vector()); + num_of_new_dims = static_cast(axes.size()); + } + + std::vector input_shape; + if (!GetShape(*input_defs[0], input_shape, logger) || input_shape.size() + num_of_new_dims > 5) { + LOGS(logger, VERBOSE) << "Unsqueeze to output shape with > 5 dimensions is not supported"; + return false; + } + } return true; } diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc index a6580920343c4..bc3cad004aec1 100644 --- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc +++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc @@ -16,6 +16,8 @@ class UnaryOpBuilder : public BaseOpBuilder { Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const override; bool SupportsMLProgram() const override { return true; } + bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, + const logging::Logger& logger) const override; }; Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, @@ -32,6 +34,10 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const coreml_op_type = "sqrt"; } else if (op_type == "Reciprocal") { coreml_op_type = "inverse"; + } else if (op_type == "Erf") { + coreml_op_type = "erf"; + } else if (op_type == "Round") { + coreml_op_type = "round"; } else { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "UnaryOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type); @@ -74,6 +80,14 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const return Status::OK(); } +bool UnaryOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params, + const logging::Logger& /*logger*/) const { + if (!input_params.create_mlprogram && (node.OpType() == "Erf" || node.OpType() == "Round")) { + return false; + } + return true; +} + void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { op_registrations.builders.push_back(std::make_unique()); op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get()); diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc index 4fd0c0577a9b8..6e7df20a06097 100644 --- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc +++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.cc @@ -24,13 +24,16 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { CreateActivationOpBuilder("Gelu", op_registrations); // Unary ops + CreateUnaryOpBuilder("Erf", op_registrations); CreateUnaryOpBuilder("Reciprocal", op_registrations); + CreateUnaryOpBuilder("Round", op_registrations); CreateUnaryOpBuilder("Sqrt", op_registrations); // Binary elementwise ops CreateBinaryOpBuilder("Add", op_registrations); CreateBinaryOpBuilder("Div", op_registrations); CreateBinaryOpBuilder("Mul", op_registrations); + CreateBinaryOpBuilder("Max", op_registrations); CreateBinaryOpBuilder("Pow", op_registrations); CreateBinaryOpBuilder("Sub", op_registrations); @@ -42,6 +45,9 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { // Reduction ops CreateReductionOpBuilder("ReduceMean", op_registrations); + CreateReductionOpBuilder("ReduceMin", op_registrations); + CreateReductionOpBuilder("ReduceMax", op_registrations); + CreateReductionOpBuilder("ReduceProd", op_registrations); CreateReductionOpBuilder("ReduceSum", op_registrations); // Normalization ops @@ -72,6 +78,7 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() { CreateSoftmaxOpBuilder("Softmax", op_registrations); CreateSqueezeOpBuilder("Squeeze", op_registrations); CreateTransposeOpBuilder("Transpose", op_registrations); + CreateSqueezeOpBuilder("Unsqueeze", op_registrations); return op_registrations; } diff --git a/onnxruntime/core/providers/cpu/tensor/unsqueeze.h b/onnxruntime/core/providers/cpu/tensor/unsqueeze.h index 4b31e3a82f2d0..6960f8838ffde 100644 --- a/onnxruntime/core/providers/cpu/tensor/unsqueeze.h +++ b/onnxruntime/core/providers/cpu/tensor/unsqueeze.h @@ -20,15 +20,6 @@ class UnsqueezeBase { }; Status PrepareCompute(OpKernelContext* context, Prepare& p) const; - - protected: - UnsqueezeBase(const OpKernelInfo& info) { - size_t num_inputs = info.GetInputCount(); - if (num_inputs == 1) { // axes must be a valid attribute - ORT_ENFORCE(info.GetAttrs("axes", axes_).IsOK(), "Missing/Invalid 'axes' attribute value"); - } - } - static TensorShapeVector ComputeOutputShape( const TensorShape& input_shape, const TensorShapeVector& axes) { @@ -59,6 +50,14 @@ class UnsqueezeBase { return output_shape; } + protected: + UnsqueezeBase(const OpKernelInfo& info) { + size_t num_inputs = info.GetInputCount(); + if (num_inputs == 1) { // axes must be a valid attribute + ORT_ENFORCE(info.GetAttrs("axes", axes_).IsOK(), "Missing/Invalid 'axes' attribute value"); + } + } + TensorShapeVector axes_; }; diff --git a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc index d32e286ad933e..a74517840097c 100644 --- a/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc +++ b/onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc @@ -2271,6 +2271,21 @@ TEST(MathOpTest, Max_12_MLFloat16_Scalar1) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: Input batch size is inconsistent } +TEST(MathOpTest, Max_12_MLFloat16_Scalar2) { + OpTester test("Max", 12); + test.AddInput("data_0", {1}, + MakeMLFloat16({-1.f})); + test.AddInput("data_1", {}, + MakeMLFloat16({2.f})); + test.AddInput("data_2", {1, 3}, + MakeMLFloat16({-2.f, -3.f, -4.f})); + test.AddInput("data_3", {1, 1, 3}, + MakeMLFloat16({-2.f, -3.f, -4.f})); + test.AddOutput("max", {1, 1, 3}, + MakeMLFloat16({2.f, 2.f, 2.f})); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: Input batch size is inconsistent +} + TEST(MathOpTest, Max_13_Float16_MatrixVector) { TestFloat16MinMax("Max", {4, 3}, diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc index bb6d732fccb8f..e8f74177f6f6c 100644 --- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc +++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc @@ -1374,7 +1374,7 @@ TEST(ReductionOpTest, ReduceMax_double) { test.Run(); } -#if defined(USE_CUDA) || defined(USE_ROCM) +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(COREML_ENABLE_MLPROGRAM) TEST(ReductionOpTest, ReduceMax_half) { OpTester test("ReduceMax"); test.AddAttribute("axes", std::vector{1, 2}); @@ -2157,7 +2157,7 @@ TEST(ReductionOpTest, ReduceMin_double) { test.Run(); } -#if defined(USE_CUDA) || defined(USE_ROCM) +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(COREML_ENABLE_MLPROGRAM) TEST(ReductionOpTest, ReduceMin_half) { OpTester test("ReduceMin"); test.AddAttribute("axes", std::vector{0, 2}); @@ -2355,7 +2355,7 @@ TEST(ReductionOpTest, ReduceSum_int32) { test.Run(); } -#if defined(USE_CUDA) || defined(USE_ROCM) +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(COREML_ENABLE_MLPROGRAM) TEST(ReductionOpTest, ReduceSumHalfHalf) { OpTester test("ReduceSum"); test.AddAttribute("keepdims", (int64_t)0); @@ -5610,7 +5610,7 @@ TEST(ReductionOpTest, ReduceSum_RK_parallel) { test.AddOutput("reduced", {32}, expected); // CoreML does not provide 1e-5 precision here (it's off by 1e-4) - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCoreMLExecutionProvider}); + test.Run(OpTester::ExpectResult::kExpectSuccess); } TEST(ReductionOpTest, ReduceSum_RK_keepdims) { diff --git a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc index d2aa5dd428fec..d1910c89f76b7 100644 --- a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc @@ -11,7 +11,7 @@ namespace test { // Disable TensorRT on the tests because of SegFault errors in the parser -TEST(TensorOpTest, Unsqueeze_1) { +TEST(UnsqueezeOpTest, Unsqueeze_1) { OpTester test("Unsqueeze"); test.AddAttribute("axes", std::vector{1}); @@ -20,7 +20,7 @@ TEST(TensorOpTest, Unsqueeze_1) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } -TEST(TensorOpTest, Unsqueeze_1_int32) { +TEST(UnsqueezeOpTest, Unsqueeze_1_int32) { OpTester test("Unsqueeze"); test.AddAttribute("axes", std::vector{1}); @@ -29,7 +29,7 @@ TEST(TensorOpTest, Unsqueeze_1_int32) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } -TEST(TensorOpTest, Unsqueeze_2) { +TEST(UnsqueezeOpTest, Unsqueeze_2) { OpTester test("Unsqueeze"); test.AddAttribute("axes", std::vector{0, 4}); @@ -38,7 +38,7 @@ TEST(TensorOpTest, Unsqueeze_2) { test.Run(); } -TEST(TensorOpTest, Unsqueeze_3) { +TEST(UnsqueezeOpTest, Unsqueeze_3) { OpTester test("Unsqueeze"); test.AddAttribute("axes", std::vector{2, 1, 0}); @@ -47,7 +47,7 @@ TEST(TensorOpTest, Unsqueeze_3) { test.Run(); } -TEST(TensorOpTest, Unsqueeze_scalar) { +TEST(UnsqueezeOpTest, Unsqueeze_scalar) { { OpTester test("Unsqueeze"); @@ -85,7 +85,7 @@ TEST(TensorOpTest, Unsqueeze_scalar) { run_test(true); } -TEST(TensorOpTest, Unsqueeze_scalar_2) { +TEST(UnsqueezeOpTest, Unsqueeze_scalar_2) { { OpTester test("Unsqueeze"); @@ -105,7 +105,7 @@ TEST(TensorOpTest, Unsqueeze_scalar_2) { run_test(true); } -TEST(TensorOpTest, Unsqueeze_Duplicate) { +TEST(UnsqueezeOpTest, Unsqueeze_Duplicate) { { OpTester test("Unsqueeze", 12); // opset 1-12 has axes attribute @@ -128,7 +128,7 @@ TEST(TensorOpTest, Unsqueeze_Duplicate) { } } -TEST(TensorOpTest, Unsqueeze_OutOfRange) { +TEST(UnsqueezeOpTest, Unsqueeze_OutOfRange) { { OpTester test("Unsqueeze", 12); // opset 1-12 has axes attribute test.AddAttribute("axes", std::vector{4}); @@ -149,7 +149,7 @@ TEST(TensorOpTest, Unsqueeze_OutOfRange) { } } -TEST(TensorOpTest, UnsqueezeNegAxis_3) { +TEST(UnsqueezeOpTest, UnsqueezeNegAxis_3) { { OpTester test("Unsqueeze", 12); // opset 1-12 has axes attribute test.AddAttribute("axes", std::vector{-4, 1, -6}); @@ -171,7 +171,7 @@ TEST(TensorOpTest, UnsqueezeNegAxis_3) { run_test(true); } -TEST(TensorOpTest, Unsqueeze_1_int32_axes_input) { +TEST(UnsqueezeOpTest, Unsqueeze_1_int32_axes_input) { auto run_test = [](bool axes_is_initializer) { OpTester test("Unsqueeze", 13); @@ -185,7 +185,7 @@ TEST(TensorOpTest, Unsqueeze_1_int32_axes_input) { run_test(true); } -TEST(TensorOpTest, Unsqueeze_3_axes_input) { +TEST(UnsqueezeOpTest, Unsqueeze_3_axes_input) { auto run_test = [](bool axes_is_initializer) { OpTester test("Unsqueeze", 13); @@ -200,7 +200,7 @@ TEST(TensorOpTest, Unsqueeze_3_axes_input) { } #if defined(USE_DNNL) -TEST(TensorOpTest, Unsqueeze_3_axes_input_bfloat16) { +TEST(UnsqueezeOpTest, Unsqueeze_3_axes_input_bfloat16) { #ifdef USE_DNNL if (!DnnlHasBF16Support()) { LOGS_DEFAULT(WARNING) << "Hardware does NOT support BF16"; @@ -218,7 +218,7 @@ TEST(TensorOpTest, Unsqueeze_3_axes_input_bfloat16) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } -TEST(TensorOpTest, UnsqueezeNegAxis_3_bfloat16) { +TEST(UnsqueezeOpTest, UnsqueezeNegAxis_3_bfloat16) { #ifdef USE_DNNL if (!DnnlHasBF16Support()) { LOGS_DEFAULT(WARNING) << "Hardware does NOT support BF16"; diff --git a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md index b269026ea02ac..4991b4329646f 100644 --- a/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md +++ b/tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md @@ -13,6 +13,7 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:ConvTranspose|Weight and bias must be constant.
padding_type of SAME_UPPER/SAME_LOWER is not supported.
kernel_shape must have default values.
output_shape is not supported.
output_padding must have default values.| |ai.onnx:DepthToSpace|If 'mode' is 'CRD' the input must have a fixed shape.| |ai.onnx:Div|| +|ai.onnx:Erf|| |ai.onnx:Gemm|Input B must be constant.| |ai.onnx:Gelu|| |ai.onnx:GlobalAveragePool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.| @@ -24,17 +25,26 @@ Keep in sync with doco generated from /docs/execution-providers/CoreML-Execution |ai.onnx:LeakyRelu|| |ai.onnx:MatMul|Only support for transA == 0, alpha == 1.0 and beta == 1.0 is currently implemented.| |ai.onnx:MaxPool|Only 2D Pool is supported currently. 3D and 5D support can be added if needed.| +|ai.onnx:Max|| |ai.onnx:Mul|| |ai.onnx:Pow|Only supports cases when both inputs are fp32.| |ai.onnx:PRelu|| |ai.onnx:Reciprocal|this ask for a `epislon` (default 1e-4) where onnx don't provide| +|ai.onnx:ReduceSum|| +|ai.onnx:ReduceMean|| +|ai.onnx:ReduceMax|| |ai.onnx:Relu|| |ai.onnx:Reshape|| |ai.onnx:Resize|See [resize_op_builder.cc](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc) implementation. There are too many permutations to describe the valid combinations.| +|ai.onnx:Round|| +|ai.onnx:Shape|| |ai.onnx:Slice|starts/ends/axes/steps must be constant initializers.| |ai.onnx:Split|If provided, `splits` must be constant.| |ai.onnx:Sub|| |ai.onnx:Sigmoid|| +|ai.onnx:Softmax|| |ai.onnx:Sqrt|| +|ai.onnx:Squeeze|| |ai.onnx:Tanh|| |ai.onnx:Transpose|| +|ai.onnx:Unsqueeze||