Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8d07401
Attempt on Issue 4169
mstfbl Sep 18, 2019
d80e23d
Further work on Issue_4169
mstfbl Sep 20, 2019
7b239b0
Temporary change for inquiry
mstfbl Sep 24, 2019
33e71ae
Pushing changes for inquiry
mstfbl Sep 26, 2019
2b1ab69
Implemented PredictedLabel as Categorical value (String/int). Now wor…
mstfbl Sep 26, 2019
7254a7e
Merge branch 'Issue_4169' of https://github.com/mstfbl/machinelearnin…
mstfbl Sep 26, 2019
1a43cc7
Added tests for Prediction Engine
mstfbl Sep 26, 2019
d39f98b
Removed the forwarding of DataViewSchema to the TrySinglePrediction f…
mstfbl Sep 26, 2019
8cf2b7c
Minor performance upgrade to avoid the array bounds checkl
mstfbl Sep 27, 2019
c4182fb
Update ImageClassificationTransform.cs
mstfbl Sep 30, 2019
d490383
Updated tests
mstfbl Oct 2, 2019
01c77c9
Merge branch 'Issue_4169' of https://github.com/mstfbl/machinelearnin…
mstfbl Oct 2, 2019
380d8bf
Revert "Merge branch 'Issue_4169' of https://github.com/mstfbl/machin…
mstfbl Oct 2, 2019
2af03ca
Revert "Updated tests"
mstfbl Oct 2, 2019
2c05ba8
Updated test files and corrected variable spellings
mstfbl Oct 2, 2019
6356665
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
mstfbl Oct 2, 2019
ed928c6
Update ImageClassificationTransform.cs
mstfbl Oct 2, 2019
f7f8253
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
mstfbl Oct 2, 2019
bd42f0a
Revert "Merge branch 'master' of https://github.com/dotnet/machinelea…
mstfbl Oct 2, 2019
f851791
Merge branch 'master' into Issue_4169
mstfbl Oct 2, 2019
643fb58
Deleted unused _outputTypes
mstfbl Oct 2, 2019
b6cfeda
Update ImageClassificationTransform.cs
mstfbl Oct 2, 2019
cca4fb8
Added test case to check the matching of predicted labels
mstfbl Oct 2, 2019
1c4c5dc
Update ImageClassificationTransform.cs
mstfbl Oct 2, 2019
2301a96
Update TensorflowTests.cs
mstfbl Oct 2, 2019
26e2ae1
Update TensorflowTests.cs
mstfbl Oct 2, 2019
94c0423
Update TensorflowTests.cs
mstfbl Oct 3, 2019
109879b
Fixed test case and off-by-one predictedLabel error
mstfbl Oct 3, 2019
6ff4c64
Merge remote-tracking branch 'upstream/master' into Issue_4169
mstfbl Oct 3, 2019
15c4654
Removed comments
mstfbl Oct 3, 2019
0573ef3
Replacing Path.Join with Path.Combine due to build error with Path.Join
mstfbl Oct 3, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ public static void Example()
batchSize: 10,
learningRate: 0.01f,
metricsCallback: (metrics) => Console.WriteLine(metrics),
validationSet: testDataset);

validationSet: testDataset)
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

Console.WriteLine("*** Training the image classification model with " +
"DNN Transfer Learning on top of the selected pre-trained " +
Expand All @@ -96,12 +96,8 @@ public static void Example()

EvaluateModel(mlContext, testDataset, loadedModel);

VBuffer<ReadOnlyMemory<char>> keys = default;
loadedModel.GetOutputSchema(schema)["Label"].GetKeyValues(ref keys);

watch = System.Diagnostics.Stopwatch.StartNew();
TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel,
keys.DenseValues().ToArray());
TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel);

watch.Stop();
elapsedMs = watch.ElapsedMilliseconds;
Expand All @@ -119,12 +115,11 @@ public static void Example()
}

private static void TrySinglePrediction(string imagesForPredictions,
MLContext mlContext, ITransformer trainedModel,
ReadOnlyMemory<char>[] originalLabels)
MLContext mlContext, ITransformer trainedModel)
{
// Create prediction function to try one prediction
var predictionEngine = mlContext.Model
.CreatePredictionEngine<ImageData, ImagePrediction>(trainedModel);
.CreatePredictionEngine<ImageData, ImagePrediction>(trainedModel, true);
Comment thread
mstfbl marked this conversation as resolved.
Outdated

IEnumerable<ImageData> testImages = LoadImagesFromDirectory(
imagesForPredictions, false);
Expand All @@ -135,12 +130,11 @@ private static void TrySinglePrediction(string imagesForPredictions,
};

var prediction = predictionEngine.Predict(imageToPredict);
var index = prediction.PredictedLabel;

Console.WriteLine($"ImageFile : " +
$"[{Path.GetFileName(imageToPredict.ImagePath)}], " +
$"Scores : [{string.Join(",", prediction.Score)}], " +
$"Predicted Label : {originalLabels[index]}");
$"Predicted Label : {prediction.PredictedLabel}");
}


Expand Down Expand Up @@ -300,7 +294,7 @@ public class ImagePrediction
public float[] Score;

[ColumnName("PredictedLabel")]
public UInt32 PredictedLabel;
public string PredictedLabel;
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Data/DataLoadSave/EstimatorChain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ public EstimatorChain()
public TransformerChain<TLastTransformer> Fit(IDataView input)
{
// Before fitting, run schema propagation.
GetOutputSchema(SchemaShape.Create(input.Schema));
SchemaShape schemaShape = SchemaShape.Create(input.Schema);
GetOutputSchema(schemaShape);
Comment thread
mstfbl marked this conversation as resolved.
Outdated

IDataView current = input;
var xfs = new ITransformer[_estimators.Length];
Expand Down
20 changes: 12 additions & 8 deletions src/Microsoft.ML.Dnn/ImageClassificationTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@ protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
{
var info = new DataViewSchema.DetachedColumn[_parent._outputs.Length];
info[0] = new DataViewSchema.DetachedColumn(_parent._outputs[0], new VectorDataViewType(NumberDataViewType.Single, _parent._classCount), null);
info[1] = new DataViewSchema.DetachedColumn(_parent._outputs[1], NumberDataViewType.UInt32, null);
info[1] = new DataViewSchema.DetachedColumn(_parent._outputs[1], new KeyDataViewType(typeof(uint), _parent._classCount), ((DataViewSchema.Column)InputSchema.GetColumnOrNull(_parent._labelColumnName)).Annotations);

@yaeldekel yaeldekel Sep 20, 2019

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_labelColumnName [](start = 197, length = 16)

We cannot rely on the _labelColumnName column in the transformer, since it may not be present in InputSchema.
The way this should work is by getting the key values at the time we have the training data (which is required to contain the label column), which is in the constructor of the transformer. The key values should be serialized as part of saving the transformer, so that they are available even when the model is loaded from disk.
You should also be able to remove the _labelColumnName field from the transformer, it is not needed. #Resolved

@yaeldekel yaeldekel Sep 20, 2019

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_outputs [](start = 68, length = 8)

Please replace this field with two fields containing the score column name and the predicted label column name, and do the same for the argument in the Options class.
It is implicitly assumed throughout this code that there are exactly two output columns, the first one being the score column and the second one being the predicted label column - for example, this method (GetOutputColumnsCore), GetOutputSchema in the estimator, GetOutputColumnsCore in the mapper. #Resolved

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed that there is the same problem with the input column names - on one hand the argument in the Options class is an array, and in other place it is assumed that it contains exactly one element which is the features column.


In reply to: 326619105 [](ancestors = 326619105)

return info;
}
}
Expand Down Expand Up @@ -1166,7 +1166,7 @@ internal ImageClassificationEstimator(IHostEnvironment env, Options options, Dnn
_options = options;
_dnnModel = dnnModel;
_tfInputTypes = new[] { TF_DataType.TF_STRING };
_outputTypes = new[] { new VectorDataViewType(NumberDataViewType.Single), NumberDataViewType.UInt32.GetItemType() };
_outputTypes = new DataViewType[] { new VectorDataViewType(NumberDataViewType.Single), new KeyDataViewType(typeof(uint), 5) };
Comment thread
mstfbl marked this conversation as resolved.
Outdated

@yaeldekel yaeldekel Sep 19, 2019

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_outputTypes [](start = 12, length = 12)

_outputTypes doesn't need to be an array of DataViewType. All the estimator needs to know is whether the length of the score vector is variable or not (seems to me that it is typically not variable, or at least this information can be inferred from the DnnModel).
If you get rid of this field, you will not have to guess the size of the key (which is also not needed by the estimator). #Resolved

@mstfbl mstfbl Sep 19, 2019

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good catch, thank you! Quick question, the Image Classification example ResnetV2101TransferLearningTrainTestSplit.cs runs perfectly well when I don't further define _outputTypes, i,e, delete line 69. Does this also mean than the estimator isn't using this field? #Resolved

@mstfbl mstfbl Sep 19, 2019

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I've found that the DataViewType field is needed for the pipeline in ResnetV2101TransferLearningTrainTestSplit.cs to fit properly. #Resolved

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate? The only place _outputTypes is used is in GetOutputSchema (and it is actually being used incorrectly there - see my comments there). What is not working properly in the example you mentioned?


In reply to: 326329163 [](ancestors = 326329163)

@mstfbl mstfbl Sep 20, 2019

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was giving a different error when I removed _outputTypes, but as I now see that was not the bottleneck problem I was having while implementing this KeyType solution. #Resolved

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The field _outputTypes can be deleted. The only place where it is used is in line 1281 where you have _outputTypes[0].GetItemType(). Instead of that you can write NumberDataViewType.Single, and then the field is not needed.


In reply to: 326229209 [](ancestors = 326229209)

@mstfbl mstfbl Oct 2, 2019

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed this, thank you for the catch! #Resolved

}

private static Options CreateArguments(DnnModel tensorFlowModel, string[] outputColumnNames, string[] inputColumnName, bool addBatchDimensionInput)
Expand Down Expand Up @@ -1196,12 +1196,16 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
if (col.ItemType != expectedType)
throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString());
}
for (var i = 0; i < _options.OutputColumns.Length; i++)
{
resultDic[_options.OutputColumns[i]] = new SchemaShape.Column(_options.OutputColumns[i],
_outputTypes[i].IsKnownSizeVector() ? SchemaShape.Column.VectorKind.Vector
: SchemaShape.Column.VectorKind.VariableVector, _outputTypes[i].GetItemType(), false);
}

resultDic[_options.OutputColumns[0]] = new SchemaShape.Column(_options.OutputColumns[0],
SchemaShape.Column.VectorKind.Vector, _outputTypes[0].GetItemType(), false);

var metadata = new List<SchemaShape.Column>();
metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, TextDataViewType.Instance, false));

resultDic[_options.OutputColumns[1]] = new SchemaShape.Column(_options.OutputColumns[1],
SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32, true, new SchemaShape(metadata.ToArray()));

return new SchemaShape(resultDic.Values);
}

Expand Down