diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs index 9734baaa90..39bc1fc05d 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs @@ -53,7 +53,7 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog /// The path of the file containing the ONNX model. /// ONNX shapes to be used over those loaded from . /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter - /// is particullarly useful for working with variable dimension inputs and outputs. + /// is particularly useful for working with variable dimension inputs and outputs. /// /// Optional GPU device ID to run execution on, to run on CPU. /// If GPU error, raise exception or fallback to CPU. @@ -110,7 +110,7 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog /// The path of the file containing the ONNX model. /// ONNX shapes to be used over those loaded from . /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter - /// is particullarly useful for working with variable dimension inputs and outputs. + /// is particularly useful for working with variable dimension inputs and outputs. /// /// Optional GPU device ID to run execution on, to run on CPU. /// If GPU error, raise exception or fallback to CPU. @@ -162,7 +162,7 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog /// The path of the file containing the ONNX model. /// ONNX shapes to be used over those loaded from . /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter - /// is particullarly useful for working with variable dimension inputs and outputs. + /// is particularly useful for working with variable dimension inputs and outputs. /// /// Optional GPU device ID to run execution on, to run on CPU. /// If GPU error, raise exception or fallback to CPU. @@ -176,6 +176,33 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary: shapeDictionary); + /// + /// Create a , which applies a pre-trained Onnx model to the columns. + /// Please refer to to learn more about the necessary dependencies, + /// and how to run it on a GPU. + /// + /// The transform's catalog. + /// The output columns resulting from the transformation. + /// The input columns. + /// The path of the file containing the ONNX model. + /// ONNX shapes to be used over those loaded from . + /// For keys use names as stated in the ONNX model, e.g. "input". Stating the shapes with this parameter + /// is particularly useful for working with variable dimension inputs and outputs. + /// + /// Optional GPU device ID to run execution on, to run on CPU. + /// If GPU error, raise exception or fallback to CPU. + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. + public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog, + string[] outputColumnNames, + string[] inputColumnNames, + string modelFile, + IDictionary shapeDictionary, + int? gpuDeviceId = null, + bool fallbackToCpu = false, + int recursionLimit = 100) + => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, + modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary: shapeDictionary, recursionLimit); + /// /// Create , which applies one of the pre-trained DNN models in /// to featurize an image. diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 6a4d98750c..41f573ed3f 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -87,6 +87,9 @@ internal sealed class Options : TransformInputBase [Argument(ArgumentType.Multiple, HelpText = "Shapes used to overwrite shapes loaded from ONNX file.", SortOrder = 5)] public CustomShapeInfo[] CustomShapeInfos; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Protobuf CodedInputStream recursion limit.", SortOrder = 6)] + public int RecursionLimit = 100; } /// @@ -126,8 +129,9 @@ private static VersionInfo GetVersionInfo() modelSignature: "ONNXSCOR", // version 10001 is single input & output. // version 10002 = multiple inputs & outputs - verWrittenCur: 0x00010002, - verReadableCur: 0x00010002, + // version 10003 = custom protobuf recursion limit + verWrittenCur: 0x00010003, + verReadableCur: 0x00010003, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, loaderAssemblyName: typeof(OnnxTransformer).Assembly.FullName); @@ -184,7 +188,26 @@ private static OnnxTransformer Create(IHostEnvironment env, ModelLoadContext ctx } } - var options = new Options() { InputColumns = inputs, OutputColumns = outputs, CustomShapeInfos = loadedCustomShapeInfos }; + int recursionLimit; + + // Recursion limit change + if (ctx.Header.ModelVerWritten >= 0x00010003) + { + recursionLimit = ctx.Reader.ReadInt32(); + } + else + { + // Default if not written inside ONNX model + recursionLimit = 100; + } + + var options = new Options() + { + InputColumns = inputs, + OutputColumns = outputs, + CustomShapeInfos = loadedCustomShapeInfos, + RecursionLimit = recursionLimit + }; return new OnnxTransformer(env, options, modelBytes); } @@ -221,13 +244,13 @@ private OnnxTransformer(IHostEnvironment env, Options options, byte[] modelBytes Host.CheckNonWhiteSpace(options.ModelFile, nameof(options.ModelFile)); Host.CheckIO(File.Exists(options.ModelFile), "Model file {0} does not exists.", options.ModelFile); // Because we cannot delete the user file, ownModelFile should be false. - Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary); + Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary, options.RecursionLimit); } else { // Entering this region means that the byte[] is passed as the model. To feed that byte[] to ONNXRuntime, we need // to create a temporal file to store it and then call ONNXRuntime's API to load that file. - Model = OnnxModel.CreateFromBytes(modelBytes, env, options.GpuDeviceId, options.FallbackToCpu, shapeDictionary: shapeDictionary); + Model = OnnxModel.CreateFromBytes(modelBytes, env, options.GpuDeviceId, options.FallbackToCpu, shapeDictionary: shapeDictionary, options.RecursionLimit); } } catch (OnnxRuntimeException e) @@ -258,8 +281,9 @@ private OnnxTransformer(IHostEnvironment env, Options options, byte[] modelBytes /// Optional GPU device ID to run execution on. Null for CPU. /// If GPU error, raise exception or fallback to CPU. /// + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. internal OnnxTransformer(IHostEnvironment env, string modelFile, int? gpuDeviceId = null, - bool fallbackToCpu = false, IDictionary shapeDictionary = null) + bool fallbackToCpu = false, IDictionary shapeDictionary = null, int recursionLimit = 100) : this(env, new Options() { ModelFile = modelFile, @@ -267,7 +291,8 @@ internal OnnxTransformer(IHostEnvironment env, string modelFile, int? gpuDeviceI OutputColumns = new string[] { }, GpuDeviceId = gpuDeviceId, FallbackToCpu = fallbackToCpu, - CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray() + CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray(), + RecursionLimit = recursionLimit }) { } @@ -283,8 +308,9 @@ internal OnnxTransformer(IHostEnvironment env, string modelFile, int? gpuDeviceI /// Optional GPU device ID to run execution on. Null for CPU. /// If GPU error, raise exception or fallback to CPU. /// + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. internal OnnxTransformer(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false, - IDictionary shapeDictionary = null) + IDictionary shapeDictionary = null, int recursionLimit = 100) : this(env, new Options() { ModelFile = modelFile, @@ -292,7 +318,8 @@ internal OnnxTransformer(IHostEnvironment env, string[] outputColumnNames, strin OutputColumns = outputColumnNames, GpuDeviceId = gpuDeviceId, FallbackToCpu = fallbackToCpu, - CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray() + CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray(), + RecursionLimit = recursionLimit }) { } @@ -325,6 +352,8 @@ private protected override void SaveModel(ModelSaveContext ctx) ctx.SaveNonEmptyString(info.Name); ctx.Writer.WriteIntArray(info.Shape); } + + ctx.Writer.Write(_options.RecursionLimit); } private protected override IRowMapper MakeRowMapper(DataViewSchema inputSchema) => new Mapper(this, inputSchema); @@ -807,10 +836,11 @@ public sealed class OnnxScoringEstimator : TrivialEstimator /// Optional GPU device ID to run execution on. Null for CPU. /// If GPU error, raise exception or fallback to CPU. /// + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. [BestFriend] internal OnnxScoringEstimator(IHostEnvironment env, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false, - IDictionary shapeDictionary = null) - : this(env, new OnnxTransformer(env, new string[] { }, new string[] { }, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary)) + IDictionary shapeDictionary = null, int recursionLimit = 100) + : this(env, new OnnxTransformer(env, new string[] { }, new string[] { }, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit)) { } @@ -825,9 +855,10 @@ internal OnnxScoringEstimator(IHostEnvironment env, string modelFile, int? gpuDe /// Optional GPU device ID to run execution on. Null for CPU. /// If GPU error, raise exception or fallback to CPU. /// + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. internal OnnxScoringEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile, - int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary shapeDictionary = null) - : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary)) + int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary shapeDictionary = null, int recursionLimit = 100) + : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit)) { } diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs index 615b95fa04..6f1af4835e 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs @@ -164,8 +164,9 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da /// If true, the will be deleted when is /// no longer needed. /// + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false, - bool ownModelFile=false, IDictionary shapeDictionary = null) + bool ownModelFile=false, IDictionary shapeDictionary = null, int recursionLimit = 100) { // If we don't own the model file, _disposed should be false to prevent deleting user's file. _disposed = false; @@ -204,7 +205,7 @@ public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = // The CodedInputStream auto closes the stream, and we need to make sure that our main stream stays open, so creating a new one here. using (var modelStream = new FileStream(modelFile, FileMode.Open, FileAccess.Read, FileShare.Delete | FileShare.Read)) - using (var codedStream = Google.Protobuf.CodedInputStream.CreateWithLimits(modelStream, Int32.MaxValue, 100)) + using (var codedStream = Google.Protobuf.CodedInputStream.CreateWithLimits(modelStream, Int32.MaxValue, recursionLimit)) model = OnnxCSharpToProtoWrapper.ModelProto.Parser.ParseFrom(codedStream); // Parse actual input and output types stored in the loaded ONNX model to get their DataViewType's. @@ -321,7 +322,7 @@ private static bool CheckOnnxShapeCompatibility(IEnumerable left, IEnumerab /// /// Create an OnnxModel from a byte[]. Usually, a ONNX model is consumed by as a file. - /// With and , + /// With and , /// it's possible to use in-memory model (type: byte[]) to create . /// /// Bytes of the serialized model @@ -335,7 +336,7 @@ public static OnnxModel CreateFromBytes(byte[] modelBytes, IHostEnvironment env) /// Create an OnnxModel from a byte[]. Set execution to GPU if required. /// Usually, a ONNX model is consumed by as a file. /// With and - /// , + /// , /// it's possible to use in-memory model (type: byte[]) to create . /// /// Bytes of the serialized model. @@ -345,14 +346,15 @@ public static OnnxModel CreateFromBytes(byte[] modelBytes, IHostEnvironment env) /// User-provided shapes. If the key "myTensorName" is associated /// with the value [1, 3, 5], the shape of "myTensorName" will be set to [1, 3, 5]. /// The shape loaded from would be overwritten. + /// Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100. /// An public static OnnxModel CreateFromBytes(byte[] modelBytes, IHostEnvironment env, int? gpuDeviceId = null, bool fallbackToCpu = false, - IDictionary shapeDictionary = null) + IDictionary shapeDictionary = null, int recursionLimit = 100) { var tempModelFile = Path.Combine(((IHostEnvironmentInternal)env).TempFilePath, Path.GetRandomFileName()); File.WriteAllBytes(tempModelFile, modelBytes); return new OnnxModel(tempModelFile, gpuDeviceId, fallbackToCpu, - ownModelFile: true, shapeDictionary: shapeDictionary); + ownModelFile: true, shapeDictionary: shapeDictionary, recursionLimit); } /// diff --git a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs index 202b3171c1..48561d435d 100644 --- a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs +++ b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs @@ -1002,5 +1002,32 @@ public void TestOnnxTransformSaveAndLoadWithCustomShapes() (model as IDisposable)?.Dispose(); (loadedModel as IDisposable)?.Dispose(); } + + /// + /// A test to check if recursion limit works. + /// + [OnnxFact] + public void TestOnnxTransformSaveAndLoadWithRecursionLimit() + { + var modelFile = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet", "00000001", "model.onnx"); + + const int imageHeight = 224; + const int imageWidth = 224; + var dataFile = GetDataPath("images/images.tsv"); + var imageFolder = Path.GetDirectoryName(dataFile); + + var data = ML.Data.LoadFromTextFile(dataFile, new[] { + new TextLoader.Column("imagePath", DataKind.String, 0), + new TextLoader.Column("name", DataKind.String, 1) + }); + + var pipe = ML.Transforms.LoadImages("data_0", imageFolder, "imagePath") + .Append(ML.Transforms.ResizeImages("data_0", imageHeight, imageWidth)) + .Append(ML.Transforms.ExtractPixels("data_0", interleavePixelColors: true)) + .Append(ML.Transforms.ApplyOnnxModel(new []{ "softmaxout_1" }, new []{ "data_0" }, modelFile, + gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu, shapeDictionary: null, recursionLimit: 50)); + + TestEstimatorCore(pipe, data); + } } }