diff --git a/src/Microsoft.ML.Core/Data/IProgressChannel.cs b/src/Microsoft.ML.Core/Data/IProgressChannel.cs index 26dd7e1831..c379d19775 100644 --- a/src/Microsoft.ML.Core/Data/IProgressChannel.cs +++ b/src/Microsoft.ML.Core/Data/IProgressChannel.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Collections.Generic; namespace Microsoft.ML { @@ -77,13 +78,13 @@ public sealed class ProgressHeader /// For example, neural network might have {'epoch', 'example'} and FastTree might have {'tree', 'split', 'feature'}. /// Will never be null, but can be empty. /// - public readonly string[] UnitNames; + public readonly IReadOnlyList UnitNames; /// /// These are the names of the reported metrics. For example, this could be the 'loss', 'weight updates/sec' etc. /// Will never be null, but can be empty. /// - public readonly string[] MetricNames; + public readonly IReadOnlyList MetricNames; /// /// Initialize the header. This will take ownership of the arrays. diff --git a/src/Microsoft.ML.Core/Data/ProgressReporter.cs b/src/Microsoft.ML.Core/Data/ProgressReporter.cs index eaa8886a1f..f890b7e97f 100644 --- a/src/Microsoft.ML.Core/Data/ProgressReporter.cs +++ b/src/Microsoft.ML.Core/Data/ProgressReporter.cs @@ -25,8 +25,6 @@ public sealed class ProgressChannel : IProgressChannel { private readonly IExceptionContext _ectx; - private readonly string _name; - /// /// The pair of (header, fill action) is updated atomically. /// @@ -41,7 +39,7 @@ public sealed class ProgressChannel : IProgressChannel private volatile int _maxSubId; private bool _isDisposed; - public string Name { get { return _name; } } + public string Name { get; } /// /// Initialize a for the process identified by . @@ -56,7 +54,7 @@ public ProgressChannel(IExceptionContext ectx, ProgressTracker tracker, string c _ectx.CheckValue(tracker, nameof(tracker)); _ectx.CheckNonEmpty(computationName, nameof(computationName)); - _name = computationName; + Name = computationName; _tracker = tracker; _subChannels = new ConcurrentDictionary(); _maxSubId = 0; @@ -132,7 +130,7 @@ public ProgressEntry GetProgress() var entry = new ProgressEntry(false, cache.Item1); if (fillAction == null) - Contracts.Assert(entry.Header.MetricNames.Length == 0 && entry.Header.UnitNames.Length == 0); + Contracts.Assert(entry.Header.MetricNames.Count == 0 && entry.Header.UnitNames.Count == 0); else fillAction(entry); @@ -232,7 +230,7 @@ public ProgressEntry GetProgress() var entry = new ProgressEntry(false, cache.Item1); if (fillAction == null) - Contracts.Assert(entry.Header.MetricNames.Length == 0 && entry.Header.UnitNames.Length == 0); + Contracts.Assert(entry.Header.MetricNames.Count == 0 && entry.Header.UnitNames.Count == 0); else fillAction(entry); return entry; @@ -558,9 +556,9 @@ public ProgressEntry(bool isCheckpoint, ProgressHeader header) Contracts.CheckValue(header, nameof(header)); Header = header; IsCheckpoint = isCheckpoint; - Progress = new Double?[header.UnitNames.Length]; - ProgressLim = new Double?[header.UnitNames.Length]; - Metrics = new Double?[header.MetricNames.Length]; + Progress = new Double?[header.UnitNames.Count]; + ProgressLim = new Double?[header.UnitNames.Count]; + Metrics = new Double?[header.MetricNames.Count]; } } diff --git a/src/Microsoft.ML.Core/Environment/ConsoleEnvironment.cs b/src/Microsoft.ML.Core/Environment/ConsoleEnvironment.cs index 66e74b5f70..673f0159e3 100644 --- a/src/Microsoft.ML.Core/Environment/ConsoleEnvironment.cs +++ b/src/Microsoft.ML.Core/Environment/ConsoleEnvironment.cs @@ -259,7 +259,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven // Progress units. bool first = true; - for (int i = 0; i < ev.ProgressEntry.Header.UnitNames.Length; i++) + for (int i = 0; i < ev.ProgressEntry.Header.UnitNames.Count; i++) { if (ev.ProgressEntry.Progress[i] == null) continue; @@ -272,7 +272,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven } // Metrics. - for (int i = 0; i < ev.ProgressEntry.Header.MetricNames.Length; i++) + for (int i = 0; i < ev.ProgressEntry.Header.MetricNames.Count; i++) { if (ev.ProgressEntry.Metrics[i] == null) continue; diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs index 28324d61eb..1e89622641 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MultiClassClassifierMetrics.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; +using System.Collections.Immutable; using Microsoft.Data.DataView; namespace Microsoft.ML.Data @@ -78,7 +80,7 @@ public sealed class MultiClassClassifierMetrics /// p[i] is the probability returned by the classifier if the instance belongs to the class, /// and 1 minus the probability returned by the classifier if the instance does not belong to the class. /// - public double[] PerClassLogLoss { get; } + public IReadOnlyList PerClassLogLoss { get; } internal MultiClassClassifierMetrics(IExceptionContext ectx, DataViewRow overallResult, int topK) { @@ -92,8 +94,7 @@ internal MultiClassClassifierMetrics(IExceptionContext ectx, DataViewRow overall TopKAccuracy = FetchDouble(MultiClassClassifierEvaluator.TopKAccuracy); var perClassLogLoss = RowCursorUtils.Fetch>(ectx, overallResult, MultiClassClassifierEvaluator.PerClassLogLoss); - PerClassLogLoss = new double[perClassLogLoss.Length]; - perClassLogLoss.CopyTo(PerClassLogLoss); + PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray(); } internal MultiClassClassifierMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction, @@ -105,8 +106,7 @@ internal MultiClassClassifierMetrics(double accuracyMicro, double accuracyMacro, LogLossReduction = logLossReduction; TopK = topK; TopKAccuracy = topKAccuracy; - PerClassLogLoss = new double[perClassLogLoss.Length]; - perClassLogLoss.CopyTo(PerClassLogLoss, 0); + PerClassLogLoss = perClassLogLoss.ToImmutableArray(); } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/RankingMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/RankingMetrics.cs index b47a166201..6f4a18a427 100644 --- a/src/Microsoft.ML.Data/Evaluators/Metrics/RankingMetrics.cs +++ b/src/Microsoft.ML.Data/Evaluators/Metrics/RankingMetrics.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; +using System.Collections.Immutable; using Microsoft.Data.DataView; namespace Microsoft.ML.Data @@ -15,7 +17,7 @@ public sealed class RankingMetrics /// Array of normalized discounted cumulative gains where i-th element represent NDCG@i. /// /// - public double[] NormalizedDiscountedCumulativeGains { get; } + public IReadOnlyList NormalizedDiscountedCumulativeGains { get; } /// /// Array of discounted cumulative gains where i-th element represent DCG@i. @@ -25,7 +27,7 @@ public sealed class RankingMetrics /// /// /// Discounted Cumulative gain. - public double[] DiscountedCumulativeGains { get; } + public IReadOnlyList DiscountedCumulativeGains { get; } private static T Fetch(IExceptionContext ectx, DataViewRow row, string name) { @@ -40,16 +42,14 @@ internal RankingMetrics(IExceptionContext ectx, DataViewRow overallResult) { VBuffer Fetch(string name) => Fetch>(ectx, overallResult, name); - DiscountedCumulativeGains = Fetch(RankingEvaluator.Dcg).GetValues().ToArray(); - NormalizedDiscountedCumulativeGains = Fetch(RankingEvaluator.Ndcg).GetValues().ToArray(); + DiscountedCumulativeGains = Fetch(RankingEvaluator.Dcg).DenseValues().ToImmutableArray(); + NormalizedDiscountedCumulativeGains = Fetch(RankingEvaluator.Ndcg).DenseValues().ToImmutableArray(); } internal RankingMetrics(double[] dcg, double[] ndcg) { - DiscountedCumulativeGains = new double[dcg.Length]; - dcg.CopyTo(DiscountedCumulativeGains, 0); - NormalizedDiscountedCumulativeGains = new double[ndcg.Length]; - ndcg.CopyTo(NormalizedDiscountedCumulativeGains, 0); + DiscountedCumulativeGains = dcg.ToImmutableArray(); + NormalizedDiscountedCumulativeGains = ndcg.ToImmutableArray(); } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs index 58bb5ef8dc..a563b2e6fb 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; using System.Linq; using Microsoft.Data.DataView; using Microsoft.ML.Data; @@ -38,7 +39,8 @@ public abstract class ColumnOptionsBase public readonly string InputColumnName; public readonly SortOrder Sort; public readonly int MaxNumKeys; - public readonly string[] Term; + public IReadOnlyList Term => TermArray; + internal readonly string[] TermArray; public readonly bool TextKeyValues; [BestFriend] @@ -53,7 +55,7 @@ private protected ColumnOptionsBase(string outputColumnName, string inputColumnN InputColumnName = inputColumnName ?? outputColumnName; Sort = sort; MaxNumKeys = maxNumKeys; - Term = term; + TermArray = term; TextKeyValues = textKeyValues; } } diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs index 5e0013b4cf..44d7a6781b 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs @@ -532,7 +532,7 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info { // First check whether we have a terms argument, and handle it appropriately. var terms = columns[iinfo].Terms.AsMemory(); - var termsArray = columns[iinfo].Term; + var termsArray = columns[iinfo].TermArray; terms = ReadOnlyMemoryUtils.TrimSpaces(terms); if (!terms.IsEmpty || (termsArray != null && termsArray.Length > 0)) diff --git a/src/Microsoft.ML.Transforms/MetricStatistics.cs b/src/Microsoft.ML.Transforms/MetricStatistics.cs index bf91e3299f..acded1a640 100644 --- a/src/Microsoft.ML.Transforms/MetricStatistics.cs +++ b/src/Microsoft.ML.Transforms/MetricStatistics.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; using Microsoft.ML.Internal.Utilities; namespace Microsoft.ML.Data @@ -51,11 +52,11 @@ internal void Add(double metric) internal static class MetricsStatisticsUtils { - public static void AddArray(double[] src, MetricStatistics[] dest) + public static void AddToEach(IReadOnlyList src, IReadOnlyList dest) { - Contracts.Assert(src.Length == dest.Length, "Array sizes do not match."); + Contracts.Assert(src.Count == dest.Count); - for (int i = 0; i < dest.Length; i++) + for (int i = 0; i < dest.Count; i++) dest[i].Add(src[i]); } @@ -242,7 +243,7 @@ public sealed class MultiClassClassifierMetricsStatistics : IMetricsStatistics /// Summary statistics for . /// - public MetricStatistics[] PerClassLogLoss { get; private set; } + public IReadOnlyList PerClassLogLoss { get; private set; } internal MultiClassClassifierMetricsStatistics() { @@ -266,8 +267,8 @@ void IMetricsStatistics.Add(MultiClassClassifierMet TopKAccuracy.Add(metrics.TopKAccuracy); if (PerClassLogLoss == null) - PerClassLogLoss = MetricsStatisticsUtils.InitializeArray(metrics.PerClassLogLoss.Length); - MetricsStatisticsUtils.AddArray(metrics.PerClassLogLoss, PerClassLogLoss); + PerClassLogLoss = MetricsStatisticsUtils.InitializeArray(metrics.PerClassLogLoss.Count); + MetricsStatisticsUtils.AddToEach(metrics.PerClassLogLoss, PerClassLogLoss); } } @@ -280,12 +281,12 @@ public sealed class RankingMetricsStatistics : IMetricsStatistics /// Summary statistics for . /// - public MetricStatistics[] DiscountedCumulativeGains { get; private set; } + public IReadOnlyList DiscountedCumulativeGains { get; private set; } /// /// Summary statistics for . /// - public MetricStatistics[] NormalizedDiscountedCumulativeGains { get; private set; } + public IReadOnlyList NormalizedDiscountedCumulativeGains { get; private set; } internal RankingMetricsStatistics() { @@ -298,13 +299,13 @@ internal RankingMetricsStatistics() void IMetricsStatistics.Add(RankingMetrics metrics) { if (DiscountedCumulativeGains == null) - DiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.DiscountedCumulativeGains.Length); + DiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.DiscountedCumulativeGains.Count); if (NormalizedDiscountedCumulativeGains == null) - NormalizedDiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.NormalizedDiscountedCumulativeGains.Length); + NormalizedDiscountedCumulativeGains = MetricsStatisticsUtils.InitializeArray(metrics.NormalizedDiscountedCumulativeGains.Count); - MetricsStatisticsUtils.AddArray(metrics.DiscountedCumulativeGains, DiscountedCumulativeGains); - MetricsStatisticsUtils.AddArray(metrics.NormalizedDiscountedCumulativeGains, NormalizedDiscountedCumulativeGains); + MetricsStatisticsUtils.AddToEach(metrics.DiscountedCumulativeGains, DiscountedCumulativeGains); + MetricsStatisticsUtils.AddToEach(metrics.NormalizedDiscountedCumulativeGains, NormalizedDiscountedCumulativeGains); } } } diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs index 9218da6a46..0169a11faa 100644 --- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs +++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections.Generic; using System.Collections.Immutable; using Microsoft.Data.DataView; using Microsoft.ML.Data; @@ -236,7 +237,7 @@ private static MultiClassClassifierMetrics MulticlassClassificationDelta( if (a.TopK != b.TopK) Contracts.Assert(a.TopK == b.TopK, "TopK to compare must be the same length."); - var perClassLogLoss = ComputeArrayDeltas(a.PerClassLogLoss, b.PerClassLogLoss); + var perClassLogLoss = ComputeSequenceDeltas(a.PerClassLogLoss, b.PerClassLogLoss); return new MultiClassClassifierMetrics( accuracyMicro: a.MicroAccuracy - b.MicroAccuracy, @@ -315,8 +316,8 @@ public static ImmutableArray private static RankingMetrics RankingDelta( RankingMetrics a, RankingMetrics b) { - var dcg = ComputeArrayDeltas(a.DiscountedCumulativeGains, b.DiscountedCumulativeGains); - var ndcg = ComputeArrayDeltas(a.NormalizedDiscountedCumulativeGains, b.NormalizedDiscountedCumulativeGains); + var dcg = ComputeSequenceDeltas(a.DiscountedCumulativeGains, b.DiscountedCumulativeGains); + var ndcg = ComputeSequenceDeltas(a.NormalizedDiscountedCumulativeGains, b.NormalizedDiscountedCumulativeGains); return new RankingMetrics(dcg: dcg, ndcg: ndcg); } @@ -325,12 +326,12 @@ private static RankingMetrics RankingDelta( #region Helpers - private static double[] ComputeArrayDeltas(double[] a, double[] b) + private static double[] ComputeSequenceDeltas(IReadOnlyList a, IReadOnlyList b) { - Contracts.Assert(a.Length == b.Length, "Arrays to compare must be of the same length."); + Contracts.Assert(a.Count == b.Count); - var delta = new double[a.Length]; - for (int i = 0; i < a.Length; i++) + var delta = new double[a.Count]; + for (int i = 0; i < a.Count; i++) delta[i] = a[i] - b[i]; return delta; } diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs index 112b865389..34af9d3ed5 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs @@ -213,13 +213,13 @@ internal NgramHashingTransformer(IHostEnvironment env, IDataView input, params N { columnWithInvertHash.Add(i); invertHashMaxCounts[i] = invertHashMaxCount; - for (int j = 0; j < _columns[i].InputColumnNames.Length; j++) + for (int j = 0; j < _columns[i].InputColumnNamesArray.Length; j++) { - if (!input.Schema.TryGetColumnIndex(_columns[i].InputColumnNames[j], out int srcCol)) - throw Host.ExceptSchemaMismatch(nameof(input), "input", _columns[i].InputColumnNames[j]); + if (!input.Schema.TryGetColumnIndex(_columns[i].InputColumnNamesArray[j], out int srcCol)) + throw Host.ExceptSchemaMismatch(nameof(input), "input", _columns[i].InputColumnNamesArray[j]); var columnType = input.Schema[srcCol].Type; if (!NgramHashingEstimator.IsColumnTypeValid(input.Schema[srcCol].Type)) - throw Host.ExceptSchemaMismatch(nameof(input), "input", _columns[i].InputColumnNames[j], NgramHashingEstimator.ExpectedColumnType, columnType.ToString()); + throw Host.ExceptSchemaMismatch(nameof(input), "input", _columns[i].InputColumnNamesArray[j], NgramHashingEstimator.ExpectedColumnType, columnType.ToString()); sourceColumnsForInvertHash.Add(input.Schema[srcCol]); } } @@ -393,11 +393,11 @@ public Mapper(NgramHashingTransformer parent, DataViewSchema inputSchema, Finder _srcTypes = new DataViewType[_parent._columns.Length][]; for (int i = 0; i < _parent._columns.Length; i++) { - _srcIndices[i] = new int[_parent._columns[i].InputColumnNames.Length]; - _srcTypes[i] = new DataViewType[_parent._columns[i].InputColumnNames.Length]; - for (int j = 0; j < _parent._columns[i].InputColumnNames.Length; j++) + _srcIndices[i] = new int[_parent._columns[i].InputColumnNamesArray.Length]; + _srcTypes[i] = new DataViewType[_parent._columns[i].InputColumnNamesArray.Length]; + for (int j = 0; j < _parent._columns[i].InputColumnNamesArray.Length; j++) { - var srcName = _parent._columns[i].InputColumnNames[j]; + var srcName = _parent._columns[i].InputColumnNamesArray[j]; if (!inputSchema.TryGetColumnIndex(srcName, out int srcCol)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", srcName); var columnType = inputSchema[srcCol].Type; @@ -671,10 +671,10 @@ public InvertHashHelper(NgramHashingTransformer parent, DataViewSchema inputSche _srcIndices = new int[_parent._columns.Length][]; for (int i = 0; i < _parent._columns.Length; i++) { - _srcIndices[i] = new int[_parent._columns[i].InputColumnNames.Length]; - for (int j = 0; j < _parent._columns[i].InputColumnNames.Length; j++) + _srcIndices[i] = new int[_parent._columns[i].InputColumnNamesArray.Length]; + for (int j = 0; j < _parent._columns[i].InputColumnNamesArray.Length; j++) { - var srcName = _parent._columns[i].InputColumnNames[j]; + var srcName = _parent._columns[i].InputColumnNamesArray[j]; if (!inputSchema.TryGetColumnIndex(srcName, out int srcCol)) throw _parent.Host.ExceptSchemaMismatch(nameof(inputSchema), "input", srcName); _srcIndices[i][j] = srcCol; @@ -794,7 +794,7 @@ public NgramIdFinder Decorate(int iinfo, NgramIdFinder finder) { srcNames = new string[srcIndices.Length]; for (int i = 0; i < srcIndices.Length; ++i) - srcNames[i] = _parent._columns[iinfo].InputColumnNames[i]; + srcNames[i] = _parent._columns[iinfo].InputColumnNamesArray[i]; } Contracts.Assert(Utils.Size(srcNames) == srcIndices.Length); string[] friendlyNames = _friendlyNames?[iinfo]; @@ -878,7 +878,8 @@ public sealed class ColumnOptions /// Name of the column resulting from the transformation of . public readonly string Name; /// Names of the columns to transform. - public readonly string[] InputColumnNames; + public IReadOnlyList InputColumnNames => InputColumnNamesArray; + internal readonly string[] InputColumnNamesArray; /// Maximum ngram length. public readonly int NgramLength; /// Maximum number of tokens to skip when constructing an ngram. @@ -951,7 +952,7 @@ public ColumnOptions(string name, } FriendlyNames = null; Name = name; - InputColumnNames = inputColumnNames; + InputColumnNamesArray = inputColumnNames; NgramLength = ngramLength; SkipLength = skipLength; AllLengths = allLengths; @@ -978,9 +979,9 @@ internal ColumnOptions(ModelLoadContext ctx) // byte: Ordered // byte: AllLengths var inputsLength = ctx.Reader.ReadInt32(); - InputColumnNames = new string[inputsLength]; - for (int i = 0; i < InputColumnNames.Length; i++) - InputColumnNames[i] = ctx.LoadNonEmptyString(); + InputColumnNamesArray = new string[inputsLength]; + for (int i = 0; i < InputColumnNamesArray.Length; i++) + InputColumnNamesArray[i] = ctx.LoadNonEmptyString(); Name = ctx.LoadNonEmptyString(); NgramLength = ctx.Reader.ReadInt32(); Contracts.CheckDecode(0 < NgramLength && NgramLength <= NgramBufferBuilder.MaxSkipNgramLength); @@ -1001,7 +1002,7 @@ internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNa Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames)); Contracts.CheckParam(!inputColumnNames.Any(r => string.IsNullOrWhiteSpace(r)), nameof(inputColumnNames), "Contained some null or empty items"); - InputColumnNames = inputColumnNames; + InputColumnNamesArray = inputColumnNames; Name = name; // *** Binary format *** // string Output; @@ -1040,10 +1041,10 @@ internal void Save(ModelSaveContext ctx) // byte: Rehash // byte: Ordered // byte: AllLengths - Contracts.Assert(InputColumnNames.Length > 0); - ctx.Writer.Write(InputColumnNames.Length); - for (int i = 0; i < InputColumnNames.Length; i++) - ctx.SaveNonEmptyString(InputColumnNames[i]); + Contracts.Assert(InputColumnNamesArray.Length > 0); + ctx.Writer.Write(InputColumnNamesArray.Length); + for (int i = 0; i < InputColumnNamesArray.Length; i++) + ctx.SaveNonEmptyString(InputColumnNamesArray[i]); ctx.SaveNonEmptyString(Name); Contracts.Assert(0 < NgramLength && NgramLength <= NgramBufferBuilder.MaxSkipNgramLength); @@ -1228,7 +1229,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) var result = inputSchema.ToDictionary(x => x.Name); foreach (var colInfo in _columns) { - foreach (var input in colInfo.InputColumnNames) + foreach (var input in colInfo.InputColumnNamesArray) { if (!inputSchema.TryFindColumn(input, out var col)) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input); diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs index 87a6cb866d..ad1ee8c787 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs @@ -161,7 +161,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // charArray: Separators SaveColumns(ctx); foreach (var column in _columns) - ctx.Writer.WriteCharArray(column.Separators); + ctx.Writer.WriteCharArray(column.SeparatorsArray); } // Factory method for SignatureLoadModel. @@ -254,7 +254,7 @@ private ValueGetter>> MakeGetterOne(DataViewRow inp var getSrc = input.GetGetter>(ColMapNewToOld[iinfo]); var src = default(ReadOnlyMemory); var terms = new List>(); - var separators = _parent._columns[iinfo].Separators; + var separators = _parent._columns[iinfo].SeparatorsArray; return (ref VBuffer> dst) => @@ -283,7 +283,7 @@ private ValueGetter>> MakeGetterVec(DataViewRow inp var getSrc = input.GetGetter>>(ColMapNewToOld[iinfo]); var src = default(VBuffer>); var terms = new List>(); - var separators = _parent._columns[iinfo].Separators; + var separators = _parent._columns[iinfo].SeparatorsArray; return (ref VBuffer> dst) => @@ -368,22 +368,22 @@ private JToken SaveAsPfaCore(BoundPfaContext ctx, int iinfo, JToken srcToken) Contracts.Assert(CanSavePfa); var exInfo = _parent._columns[iinfo]; - var sep = PfaUtils.String("" + exInfo.Separators[0]); + var sep = PfaUtils.String("" + exInfo.SeparatorsArray[0]); if (_isSourceVector[iinfo]) { // If it's a vector, we'll concatenate them together. srcToken = PfaUtils.Call("s.join", srcToken, sep); } - if (exInfo.Separators.Length > 1) + if (exInfo.SeparatorsArray.Length > 1) { // Due to the intrinsics in PFA, it is much easier if we can do // one split, rather than multiple splits. So, if there are multiple // separators, we first replace them with the first separator, then // split once on that one. This could also have been done with a.flatMap. - for (int i = 1; i < exInfo.Separators.Length; ++i) + for (int i = 1; i < exInfo.SeparatorsArray.Length; ++i) { - var postSep = PfaUtils.String("" + exInfo.Separators[i]); + var postSep = PfaUtils.String("" + exInfo.SeparatorsArray[i]); srcToken = PfaUtils.Call("s.replaceall", srcToken, postSep, sep); } } @@ -444,7 +444,8 @@ public sealed class ColumnOptions { public readonly string Name; public readonly string InputColumnName; - public readonly char[] Separators; + public IReadOnlyList Separators => SeparatorsArray; + internal readonly char[] SeparatorsArray; /// /// Describes how the transformer handles one column pair. @@ -456,7 +457,7 @@ public ColumnOptions(string name, string inputColumnName = null, char[] separato { Name = name; InputColumnName = inputColumnName ?? name; - Separators = separators ?? new[] { ' ' }; + SeparatorsArray = separators ?? new[] { ' ' }; } } @@ -480,5 +481,4 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) return new SchemaShape(result.Values); } } - } \ No newline at end of file diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs index bef95a0dc1..26d2741040 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs @@ -860,7 +860,7 @@ public void FastTreeRanking() var metrics = catalog.Evaluate(data, r => r.label, r => r.groupId, r => r.score); Assert.NotNull(metrics); - Assert.True(metrics.NormalizedDiscountedCumulativeGains.Length == metrics.DiscountedCumulativeGains.Length && metrics.DiscountedCumulativeGains.Length == 3); + Assert.True(metrics.NormalizedDiscountedCumulativeGains.Count == metrics.DiscountedCumulativeGains.Count && metrics.DiscountedCumulativeGains.Count == 3); Assert.InRange(metrics.DiscountedCumulativeGains[0], 1.4, 1.6); Assert.InRange(metrics.DiscountedCumulativeGains[1], 1.4, 1.8); @@ -901,7 +901,7 @@ public void LightGBMRanking() var metrics = catalog.Evaluate(data, r => r.label, r => r.groupId, r => r.score); Assert.NotNull(metrics); - Assert.True(metrics.NormalizedDiscountedCumulativeGains.Length == metrics.DiscountedCumulativeGains.Length && metrics.DiscountedCumulativeGains.Length == 3); + Assert.True(metrics.NormalizedDiscountedCumulativeGains.Count == metrics.DiscountedCumulativeGains.Count && metrics.DiscountedCumulativeGains.Count == 3); Assert.InRange(metrics.DiscountedCumulativeGains[0], 1.4, 1.6); Assert.InRange(metrics.DiscountedCumulativeGains[1], 1.4, 1.8); diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index 541b632e6d..92ff2fbddf 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -253,7 +253,7 @@ public void TestPfiMulticlassClassificationOnDenseFeatures() // Because they are _negative_, the difference will be positive for worse classifiers. Assert.Equal(1, MaxDeltaIndex(pfi, m => m.LogLoss.Mean)); Assert.Equal(3, MinDeltaIndex(pfi, m => m.LogLoss.Mean)); - for (int i = 0; i < pfi[0].PerClassLogLoss.Length; i++) + for (int i = 0; i < pfi[0].PerClassLogLoss.Count; i++) { Assert.True(MaxDeltaIndex(pfi, m => m.PerClassLogLoss[i].Mean) == 1); Assert.True(MinDeltaIndex(pfi, m => m.PerClassLogLoss[i].Mean) == 3); @@ -293,7 +293,7 @@ public void TestPfiMulticlassClassificationOnSparseFeatures() // Because they are negative metrics, the _difference_ will be positive for worse classifiers. Assert.Equal(5, MaxDeltaIndex(pfi, m => m.LogLoss.Mean)); Assert.Equal(2, MinDeltaIndex(pfi, m => m.LogLoss.Mean)); - for (int i = 0; i < pfi[0].PerClassLogLoss.Length; i++) + for (int i = 0; i < pfi[0].PerClassLogLoss.Count; i++) { Assert.Equal(5, MaxDeltaIndex(pfi, m => m.PerClassLogLoss[i].Mean)); Assert.Equal(2, MinDeltaIndex(pfi, m => m.PerClassLogLoss[i].Mean)); @@ -321,12 +321,12 @@ public void TestPfiRankingOnDenseFeatures() // X4Rand: 3 // For the following metrics higher is better, so minimum delta means more important feature, and vice versa - for (int i = 0; i < pfi[0].DiscountedCumulativeGains.Length; i++) + for (int i = 0; i < pfi[0].DiscountedCumulativeGains.Count; i++) { Assert.Equal(0, MaxDeltaIndex(pfi, m => m.DiscountedCumulativeGains[i].Mean)); Assert.Equal(1, MinDeltaIndex(pfi, m => m.DiscountedCumulativeGains[i].Mean)); } - for (int i = 0; i < pfi[0].NormalizedDiscountedCumulativeGains.Length; i++) + for (int i = 0; i < pfi[0].NormalizedDiscountedCumulativeGains.Count; i++) { Assert.Equal(0, MaxDeltaIndex(pfi, m => m.NormalizedDiscountedCumulativeGains[i].Mean)); Assert.Equal(1, MinDeltaIndex(pfi, m => m.NormalizedDiscountedCumulativeGains[i].Mean)); @@ -354,12 +354,12 @@ public void TestPfiRankingOnSparseFeatures() // X3Important: 5 // Most important // For the following metrics higher is better, so minimum delta means more important feature, and vice versa - for (int i = 0; i < pfi[0].DiscountedCumulativeGains.Length; i++) + for (int i = 0; i < pfi[0].DiscountedCumulativeGains.Count; i++) { Assert.Equal(2, MaxDeltaIndex(pfi, m => m.DiscountedCumulativeGains[i].Mean)); Assert.Equal(5, MinDeltaIndex(pfi, m => m.DiscountedCumulativeGains[i].Mean)); } - for (int i = 0; i < pfi[0].NormalizedDiscountedCumulativeGains.Length; i++) + for (int i = 0; i < pfi[0].NormalizedDiscountedCumulativeGains.Count; i++) { Assert.Equal(2, MaxDeltaIndex(pfi, m => m.NormalizedDiscountedCumulativeGains[i].Mean)); Assert.Equal(5, MinDeltaIndex(pfi, m => m.NormalizedDiscountedCumulativeGains[i].Mean)); diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index 500454b478..06f1a166ef 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -90,7 +90,7 @@ public void TrainAndPredictIrisModelTest() Assert.Equal(.06, metrics.LogLoss, 2); Assert.Equal(1, metrics.TopKAccuracy); - Assert.Equal(3, metrics.PerClassLogLoss.Length); + Assert.Equal(3, metrics.PerClassLogLoss.Count); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index e5e2b59d25..1534c2b77e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -95,7 +95,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() Assert.InRange(metrics.LogLossReduction, 94, 96); Assert.Equal(1, metrics.TopKAccuracy); - Assert.Equal(3, metrics.PerClassLogLoss.Length); + Assert.Equal(3, metrics.PerClassLogLoss.Count); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index b8fcf6ba75..04c3770233 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -95,7 +95,7 @@ private void CompareMetrics(MultiClassClassifierMetrics metrics) Assert.InRange(metrics.LogLoss, .05, .06); Assert.InRange(metrics.LogLossReduction, 94, 96); - Assert.Equal(3, metrics.PerClassLogLoss.Length); + Assert.Equal(3, metrics.PerClassLogLoss.Count); Assert.Equal(0, metrics.PerClassLogLoss[0], 1); Assert.Equal(.1, metrics.PerClassLogLoss[1], 1); Assert.Equal(.1, metrics.PerClassLogLoss[2], 1);