dotnet · kere-nel · Oct 15, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 14, 2020
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -334,7 +334,7 @@ public static CustomStopWordsRemovingEstimator RemoveStopWords(this TransformsCa
             => new CustomStopWordsRemovingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName, stopwords);
 
         /// <summary>
-        /// Create a <see cref="WordHashBagEstimator"/>, which maps the column specified in <paramref name="inputColumnName"/>
+        /// Create a <see cref="WordBagEstimator"/>, which maps the column specified in <paramref name="inputColumnName"/>
         /// to a vector of n-gram counts in a new column named <paramref name="outputColumnName"/>.
         /// </summary>
         /// <remarks>
@@ -363,7 +363,7 @@ public static WordBagEstimator ProduceWordBags(this TransformsCatalog.TextTransf
                 outputColumnName, inputColumnName, ngramLength, skipLength, useAllLengths, maximumNgramsCount, weighting);
 
         /// <summary>
-        /// Create a <see cref="WordHashBagEstimator"/>, which maps the multiple columns specified in <paramref name="inputColumnNames"/>
+        /// Create a <see cref="WordBagEstimator"/>, which maps the multiple columns specified in <paramref name="inputColumnNames"/>
         /// to a vector of n-gram counts in a new column named <paramref name="outputColumnName"/>.
         /// </summary>
         /// <remarks>

diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizing.cs
@@ -415,10 +415,10 @@ public void SaveAsOnnx(OnnxContext ctx)
                     string[] separators = column.SeparatorsArray.Select(c => c.ToString()).ToArray();
                     tokenizerNode.AddAttribute("separators", separators);
 
-                    opType = "Squeeze";
-                    var squeezeOutput = ctx.AddIntermediateVariable(_type, column.Name);
-                    var squeezeNode = ctx.CreateNode(opType, intermediateVar, squeezeOutput, ctx.GetNodeName(opType), "");
-                    squeezeNode.AddAttribute("axes", new long[] { 1 });
+                    opType = "Reshape";
+                    var shape = ctx.AddInitializer(new long[] { 1, -1 }, new long[] { 2 }, "Shape");
+                    var reshapeOutput = ctx.AddIntermediateVariable(new VectorDataViewType(TextDataViewType.Instance, 1), column.Name);
+                    var reshapeNode = ctx.CreateNode(opType, new[] { intermediateVar, shape }, new[] { reshapeOutput }, ctx.GetNodeName(opType), "");
                 }
             }
         }

diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
@@ -1310,11 +1310,11 @@ public void NgramOnnxConversionTest(
             IEstimator<ITransformer>[] pipelines =
             {
                 mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text", new[] { ' ' })
-                                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
-                                .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
-                                            ngramLength: ngramLength,
-                                            useAllLengths: useAllLength,
-                                            weighting: weighting)),
+                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
+                .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
+                            ngramLength: ngramLength,
+                            useAllLengths: useAllLength,
+                            weighting: weighting)),
 
                 mlContext.Transforms.Text.TokenizeIntoCharactersAsKeys("Tokens", "Text")
                 .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens",
@@ -1323,9 +1323,12 @@ public void NgramOnnxConversionTest(
                             weighting: weighting)),
 
                 mlContext.Transforms.Text.ProduceWordBags("Tokens", "Text",
-                                        ngramLength: ngramLength,
-                                        useAllLengths: useAllLength,
-                                        weighting: weighting)
+                            ngramLength: ngramLength,
+                            useAllLengths: useAllLength,
+                            weighting: weighting),
+
+                mlContext.Transforms.Text.TokenizeIntoWords("Tokens0", "Text")
+                .Append(mlContext.Transforms.Text.ProduceWordBags("Tokens", "Tokens0"))
             };
 
             for (int i = 0; i < pipelines.Length; i++)
@@ -1346,7 +1349,7 @@ public void NgramOnnxConversionTest(
                     var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(onnxFilePath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
                     var onnxTransformer = onnxEstimator.Fit(dataView);
                     var onnxResult = onnxTransformer.Transform(dataView);
-                    var columnName = i == pipelines.Length - 1 ? "Tokens" : "NGrams";
+                    var columnName = i >= pipelines.Length - 2 ? "Tokens" : "NGrams";
                     CompareResults(columnName, columnName, transformedData, onnxResult, 3);
 
                     VBuffer<ReadOnlyMemory<char>> mlNetSlots = default;