diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs
index e564b68cb8..239e7d93ac 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs
@@ -4,7 +4,7 @@
namespace Microsoft.ML.Samples.Dynamic
{
- public static class LdaTransform
+ public static class LatentDirichletAllocationTransform
{
public static void Example()
{
@@ -30,7 +30,7 @@ public static void Example()
// A pipeline for featurizing the "Review" column
var pipeline = ml.Transforms.Text.ProduceWordBags(review).
- Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numTopic:3));
+ Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numberOfTopics: 3));
// The transformed data
var transformer = pipeline.Fit(trainData);
diff --git a/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs
index d094d444ba..1d1df76477 100644
--- a/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs
@@ -12,65 +12,65 @@ namespace Microsoft.ML.StaticPipe
///
/// Information on the result of fitting a LDA transform.
///
- public sealed class LdaFitResult
+ public sealed class LatentDirichletAllocationFitResult
{
///
/// For user defined delegates that accept instances of the containing type.
///
///
- public delegate void OnFit(LdaFitResult result);
+ public delegate void OnFit(LatentDirichletAllocationFitResult result);
public LatentDirichletAllocationTransformer.LdaSummary LdaTopicSummary;
- public LdaFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary)
+ public LatentDirichletAllocationFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary)
{
LdaTopicSummary = ldaTopicSummary;
}
}
- public static class LdaStaticExtensions
+ public static class LatentDirichletAllocationStaticExtensions
{
private struct Config
{
- public readonly int NumTopic;
+ public readonly int NumberOfTopics;
public readonly Single AlphaSum;
public readonly Single Beta;
- public readonly int MHStep;
- public readonly int NumIter;
+ public readonly int SamplingStepCount;
+ public readonly int MaximumNumberOfIterations;
public readonly int LikelihoodInterval;
- public readonly int NumThread;
- public readonly int NumMaxDocToken;
- public readonly int NumSummaryTermPerTopic;
- public readonly int NumBurninIter;
+ public readonly int NumberOfThreads;
+ public readonly int MaximumTokenCountPerDocument;
+ public readonly int NumberOfSummaryTermsPerTopic;
+ public readonly int NumberOfBurninIterations;
public readonly bool ResetRandomGenerator;
public readonly Action OnFit;
- public Config(int numTopic, Single alphaSum, Single beta, int mhStep, int numIter, int likelihoodInterval,
- int numThread, int numMaxDocToken, int numSummaryTermPerTopic, int numBurninIter, bool resetRandomGenerator,
+ public Config(int numberOfTopics, Single alphaSum, Single beta, int samplingStepCount, int maximumNumberOfIterations, int likelihoodInterval,
+ int numberOfThreads, int maximumTokenCountPerDocument, int numberOfSummaryTermsPerTopic, int numberOfBurninIterations, bool resetRandomGenerator,
Action onFit)
{
- NumTopic = numTopic;
+ NumberOfTopics = numberOfTopics;
AlphaSum = alphaSum;
Beta = beta;
- MHStep = mhStep;
- NumIter = numIter;
+ SamplingStepCount = samplingStepCount;
+ MaximumNumberOfIterations = maximumNumberOfIterations;
LikelihoodInterval = likelihoodInterval;
- NumThread = numThread;
- NumMaxDocToken = numMaxDocToken;
- NumSummaryTermPerTopic = numSummaryTermPerTopic;
- NumBurninIter = numBurninIter;
+ NumberOfThreads = numberOfThreads;
+ MaximumTokenCountPerDocument = maximumTokenCountPerDocument;
+ NumberOfSummaryTermsPerTopic = numberOfSummaryTermsPerTopic;
+ NumberOfBurninIterations = numberOfBurninIterations;
ResetRandomGenerator = resetRandomGenerator;
OnFit = onFit;
}
}
- private static Action Wrap(LdaFitResult.OnFit onFit)
+ private static Action Wrap(LatentDirichletAllocationFitResult.OnFit onFit)
{
if (onFit == null)
return null;
- return ldaTopicSummary => onFit(new LdaFitResult(ldaTopicSummary));
+ return ldaTopicSummary => onFit(new LatentDirichletAllocationFitResult(ldaTopicSummary));
}
private interface ILdaCol
@@ -108,16 +108,16 @@ public override IEstimator Reconcile(IHostEnvironment env,
infos[i] = new LatentDirichletAllocationEstimator.ColumnOptions(outputNames[toOutput[i]],
inputNames[tcol.Input],
- tcol.Config.NumTopic,
+ tcol.Config.NumberOfTopics,
tcol.Config.AlphaSum,
tcol.Config.Beta,
- tcol.Config.MHStep,
- tcol.Config.NumIter,
+ tcol.Config.SamplingStepCount,
+ tcol.Config.MaximumNumberOfIterations,
tcol.Config.LikelihoodInterval,
- tcol.Config.NumThread,
- tcol.Config.NumMaxDocToken,
- tcol.Config.NumSummaryTermPerTopic,
- tcol.Config.NumBurninIter,
+ tcol.Config.NumberOfThreads,
+ tcol.Config.MaximumTokenCountPerDocument,
+ tcol.Config.NumberOfSummaryTermsPerTopic,
+ tcol.Config.NumberOfBurninIterations,
tcol.Config.ResetRandomGenerator);
if (tcol.Config.OnFit != null)
@@ -137,36 +137,36 @@ public override IEstimator Reconcile(IHostEnvironment env,
///
/// A vector of floats representing the document.
- /// The number of topics.
+ /// The number of topics.
/// Dirichlet prior on document-topic vectors.
/// Dirichlet prior on vocab-topic vectors.
- /// Number of Metropolis Hasting step.
- /// Number of iterations.
+ /// Number of Metropolis Hasting step.
+ /// Number of iterations.
/// Compute log likelihood over local dataset on this iteration interval.
- /// The number of training threads. Default value depends on number of logical processors.
- /// The threshold of maximum count of tokens per doc.
- /// The number of words to summarize the topic.
- /// The number of burn-in iterations.
+ /// The number of training threads. Default value depends on number of logical processors.
+ /// The threshold of maximum count of tokens per doc.
+ /// The number of words to summarize the topic.
+ /// The number of burn-in iterations.
/// Reset the random number generator for each document.
/// Called upon fitting with the learnt enumeration on the dataset.
- public static Vector ToLdaTopicVector(this Vector input,
- int numTopic = LatentDirichletAllocationEstimator.Defaults.NumTopic,
+ public static Vector LatentDirichletAllocation(this Vector input,
+ int numberOfTopics = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics,
Single alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum,
Single beta = LatentDirichletAllocationEstimator.Defaults.Beta,
- int mhstep = LatentDirichletAllocationEstimator.Defaults.Mhstep,
- int numIterations = LatentDirichletAllocationEstimator.Defaults.NumIterations,
+ int samplingStepCount = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
+ int maximumNumberOfIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations,
int likelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
- int numThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads,
- int numMaxDocToken = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken,
- int numSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic,
- int numBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations,
+ int numberOfThreads = LatentDirichletAllocationEstimator.Defaults.NumberOfThreads,
+ int maximumTokenCountPerDocument = LatentDirichletAllocationEstimator.Defaults.MaximumTokenCountPerDocument,
+ int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfSummaryTermsPerTopic,
+ int numberOfBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations,
bool resetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator,
- LdaFitResult.OnFit onFit = null)
+ LatentDirichletAllocationFitResult.OnFit onFit = null)
{
Contracts.CheckValue(input, nameof(input));
return new ImplVector(input,
- new Config(numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads, numMaxDocToken, numSummaryTermPerTopic,
- numBurninIterations, resetRandomGenerator, Wrap(onFit)));
+ new Config(numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads, maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic,
+ numberOfBurninIterations, resetRandomGenerator, Wrap(onFit)));
}
}
}
\ No newline at end of file
diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
index 3c247ab1fd..aec1bfe763 100644
--- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
+++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs
@@ -58,7 +58,7 @@ internal sealed class Options : TransformInputBase
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of topics", SortOrder = 50)]
[TGUI(SuggestedSweeps = "20,40,100,200")]
[TlcModule.SweepableDiscreteParam("NumTopic", new object[] { 20, 40, 100, 200 })]
- public int NumTopic = LatentDirichletAllocationEstimator.Defaults.NumTopic;
+ public int NumTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics;
[Argument(ArgumentType.AtMostOnce, HelpText = "Dirichlet prior on document-topic vectors")]
[TGUI(SuggestedSweeps = "1,10,100,200")]
@@ -73,30 +73,30 @@ internal sealed class Options : TransformInputBase
[Argument(ArgumentType.Multiple, HelpText = "Number of Metropolis Hasting step")]
[TGUI(SuggestedSweeps = "2,4,8,16")]
[TlcModule.SweepableDiscreteParam("Mhstep", new object[] { 2, 4, 8, 16 })]
- public int Mhstep = LatentDirichletAllocationEstimator.Defaults.Mhstep;
+ public int Mhstep = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount;
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter")]
[TGUI(SuggestedSweeps = "100,200,300,400")]
[TlcModule.SweepableDiscreteParam("NumIterations", new object[] { 100, 200, 300, 400 })]
- public int NumIterations = LatentDirichletAllocationEstimator.Defaults.NumIterations;
+ public int NumIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations;
[Argument(ArgumentType.AtMostOnce, HelpText = "Compute log likelihood over local dataset on this iteration interval", ShortName = "llInterval")]
public int LikelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval;
// REVIEW: Should change the default when multi-threading support is optimized.
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of training threads. Default value depends on number of logical processors.", ShortName = "t", SortOrder = 50)]
- public int NumThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads;
+ public int NumThreads = LatentDirichletAllocationEstimator.Defaults.NumberOfThreads;
[Argument(ArgumentType.AtMostOnce, HelpText = "The threshold of maximum count of tokens per doc", ShortName = "maxNumToken", SortOrder = 50)]
- public int NumMaxDocToken = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken;
+ public int NumMaxDocToken = LatentDirichletAllocationEstimator.Defaults.MaximumTokenCountPerDocument;
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of words to summarize the topic", ShortName = "ns")]
- public int NumSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic;
+ public int NumSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfSummaryTermsPerTopic;
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of burn-in iterations", ShortName = "burninIter")]
[TGUI(SuggestedSweeps = "10,20,30,40")]
[TlcModule.SweepableDiscreteParam("NumBurninIterations", new object[] { 10, 20, 30, 40 })]
- public int NumBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations;
+ public int NumBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations;
[Argument(ArgumentType.AtMostOnce, HelpText = "Reset the random number generator for each document", ShortName = "reset")]
public bool ResetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator;
@@ -219,17 +219,17 @@ internal LdaState(IExceptionContext ectx, LatentDirichletAllocationEstimator.Col
_numVocab = numVocab;
_ldaTrainer = new LdaSingleBox(
- InfoEx.NumTopic,
+ InfoEx.NumberOfTopics,
numVocab, /* Need to set number of vocabulary here */
InfoEx.AlphaSum,
InfoEx.Beta,
- InfoEx.NumIter,
+ InfoEx.NumberOfIterations,
InfoEx.LikelihoodInterval,
- InfoEx.NumThread,
- InfoEx.MHStep,
- InfoEx.NumSummaryTermPerTopic,
+ InfoEx.NumberOfThreads,
+ InfoEx.SamplingStepCount,
+ InfoEx.NumberOfSummaryTermsPerTopic,
false,
- InfoEx.NumMaxDocToken);
+ InfoEx.MaximumTokenCountPerDocument);
}
internal LdaState(IExceptionContext ectx, ModelLoadContext ctx)
@@ -257,19 +257,19 @@ internal LdaState(IExceptionContext ectx, ModelLoadContext ctx)
ectx.CheckDecode(aliasMemBlockSize > 0);
_ldaTrainer = new LdaSingleBox(
- InfoEx.NumTopic,
+ InfoEx.NumberOfTopics,
_numVocab, /* Need to set number of vocabulary here */
InfoEx.AlphaSum,
InfoEx.Beta,
- InfoEx.NumIter,
+ InfoEx.NumberOfIterations,
InfoEx.LikelihoodInterval,
- InfoEx.NumThread,
- InfoEx.MHStep,
- InfoEx.NumSummaryTermPerTopic,
+ InfoEx.NumberOfThreads,
+ InfoEx.SamplingStepCount,
+ InfoEx.NumberOfSummaryTermsPerTopic,
false,
- InfoEx.NumMaxDocToken);
+ InfoEx.MaximumTokenCountPerDocument);
- _ldaTrainer.AllocateModelMemory(_numVocab, InfoEx.NumTopic, memBlockSize, aliasMemBlockSize);
+ _ldaTrainer.AllocateModelMemory(_numVocab, InfoEx.NumberOfTopics, memBlockSize, aliasMemBlockSize);
for (int i = 0; i < _numVocab; i++)
{
@@ -400,7 +400,7 @@ public int FeedTrain(IExceptionContext ectx, in VBuffer input)
// Ignore this row.
return 0;
}
- if (docSize >= InfoEx.NumMaxDocToken - termFreq)
+ if (docSize >= InfoEx.MaximumTokenCountPerDocument - termFreq)
break;
// If legal then add the term.
@@ -448,7 +448,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin
}
}
- int len = InfoEx.NumTopic;
+ int len = InfoEx.NumberOfTopics;
var srcValues = src.GetValues();
if (srcValues.Length == 0)
{
@@ -476,7 +476,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin
return;
}
- if (docSize >= InfoEx.NumMaxDocToken - termFreq)
+ if (docSize >= InfoEx.MaximumTokenCountPerDocument - termFreq)
break;
docSize += termFreq;
@@ -557,7 +557,7 @@ protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
for (int i = 0; i < _parent.ColumnPairs.Length; i++)
{
var info = _parent._columns[i];
- result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, new VectorType(NumberDataViewType.Single, info.NumTopic), null);
+ result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, new VectorType(NumberDataViewType.Single, info.NumberOfTopics), null);
}
return result;
}
@@ -576,7 +576,7 @@ private ValueGetter> GetTopic(DataViewRow input, int iinfo)
var getSrc = RowCursorUtils.GetVecGetterAs(NumberDataViewType.Double, input, _srcCols[iinfo]);
var src = default(VBuffer);
var lda = _parent._ldas[iinfo];
- int numBurninIter = lda.InfoEx.NumBurninIter;
+ int numBurninIter = lda.InfoEx.NumberOfBurninIterations;
bool reset = lda.InfoEx.ResetRandomGenerator;
return
(ref VBuffer dst) =>
@@ -831,7 +831,7 @@ private static List>> Train(IHostEnvironment env, I
break;
}
- if (docSize >= columns[i].NumMaxDocToken - termFreq)
+ if (docSize >= columns[i].MaximumTokenCountPerDocument - termFreq)
break; //control the document length
//if legal then add the term
@@ -920,16 +920,16 @@ public sealed class LatentDirichletAllocationEstimator : IEstimatorThe environment.
/// Name of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
- /// The number of topics.
+ /// The number of topics.
/// Dirichlet prior on document-topic vectors.
/// Dirichlet prior on vocab-topic vectors.
- /// Number of Metropolis Hasting step.
- /// Number of iterations.
+ /// Number of Metropolis Hasting step.
+ /// Number of iterations.
+ /// The number of training threads. Default value depends on number of logical processors.
+ /// The threshold of maximum count of tokens per doc.
+ /// The number of words to summarize the topic.
/// Compute log likelihood over local dataset on this iteration interval.
- /// The number of training threads. Default value depends on number of logical processors.
- /// The threshold of maximum count of tokens per doc.
- /// The number of words to summarize the topic.
- /// The number of burn-in iterations.
+ /// The number of burn-in iterations.
/// Reset the random number generator for each document.
internal LatentDirichletAllocationEstimator(IHostEnvironment env,
string outputColumnName, string inputColumnName = null,
- int numTopic = Defaults.NumTopic,
+ int numberOfTopics = Defaults.NumberOfTopics,
float alphaSum = Defaults.AlphaSum,
float beta = Defaults.Beta,
- int mhstep = Defaults.Mhstep,
- int numIterations = Defaults.NumIterations,
+ int samplingStepCount = Defaults.SamplingStepCount,
+ int maximumNumberOfIterations = Defaults.MaximumNumberOfIterations,
+ int numberOfThreads = Defaults.NumberOfThreads,
+ int maximumTokenCountPerDocument = Defaults.MaximumTokenCountPerDocument,
+ int numberOfSummaryTermsPerTopic = Defaults.NumberOfSummaryTermsPerTopic,
int likelihoodInterval = Defaults.LikelihoodInterval,
- int numThreads = Defaults.NumThreads,
- int numMaxDocToken = Defaults.NumMaxDocToken,
- int numSummaryTermPerTopic = Defaults.NumSummaryTermPerTopic,
- int numBurninIterations = Defaults.NumBurninIterations,
+ int numberOfBurninIterations = Defaults.NumberOfBurninIterations,
bool resetRandomGenerator = Defaults.ResetRandomGenerator)
: this(env, new[] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName,
- numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads, numMaxDocToken,
- numSummaryTermPerTopic, numBurninIterations, resetRandomGenerator) })
+ numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads, maximumTokenCountPerDocument,
+ numberOfSummaryTermsPerTopic, numberOfBurninIterations, resetRandomGenerator) })
{ }
///
@@ -995,7 +995,7 @@ public sealed class ColumnOptions
///
/// The number of topics.
///
- public readonly int NumTopic;
+ public readonly int NumberOfTopics;
///
/// Dirichlet prior on document-topic vectors.
///
@@ -1007,11 +1007,11 @@ public sealed class ColumnOptions
///
/// Number of Metropolis Hasting step.
///
- public readonly int MHStep;
+ public readonly int SamplingStepCount;
///
/// Number of iterations.
///
- public readonly int NumIter;
+ public readonly int NumberOfIterations;
///
/// Compute log likelihood over local dataset on this iteration interval.
///
@@ -1019,19 +1019,19 @@ public sealed class ColumnOptions
///
/// The number of training threads.
///
- public readonly int NumThread;
+ public readonly int NumberOfThreads;
///
/// The threshold of maximum count of tokens per doc.
///
- public readonly int NumMaxDocToken;
+ public readonly int MaximumTokenCountPerDocument;
///
/// The number of words to summarize the topic.
///
- public readonly int NumSummaryTermPerTopic;
+ public readonly int NumberOfSummaryTermsPerTopic;
///
/// The number of burn-in iterations.
///
- public readonly int NumBurninIter;
+ public readonly int NumberOfBurninIterations;
///
/// Reset the random number generator for each document.
///
@@ -1042,54 +1042,54 @@ public sealed class ColumnOptions
///
/// The column containing the output scores over a set of topics, represented as a vector of floats.
/// The column representing the document as a vector of floats.A null value for the column means is replaced.
- /// The number of topics.
+ /// The number of topics.
/// Dirichlet prior on document-topic vectors.
/// Dirichlet prior on vocab-topic vectors.
- /// Number of Metropolis Hasting step.
- /// Number of iterations.
+ /// Number of Metropolis Hasting step.
+ /// Number of iterations.
/// Compute log likelihood over local dataset on this iteration interval.
- /// The number of training threads. Default value depends on number of logical processors.
- /// The threshold of maximum count of tokens per doc.
- /// The number of words to summarize the topic.
- /// The number of burn-in iterations.
+ /// The number of training threads. Default value depends on number of logical processors.
+ /// The threshold of maximum count of tokens per doc.
+ /// The number of words to summarize the topic.
+ /// The number of burn-in iterations.
/// Reset the random number generator for each document.
public ColumnOptions(string name,
string inputColumnName = null,
- int numTopic = LatentDirichletAllocationEstimator.Defaults.NumTopic,
+ int numberOfTopics = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics,
float alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum,
float beta = LatentDirichletAllocationEstimator.Defaults.Beta,
- int mhStep = LatentDirichletAllocationEstimator.Defaults.Mhstep,
- int numIter = LatentDirichletAllocationEstimator.Defaults.NumIterations,
+ int samplingStepCount = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
+ int maximumNumberOfIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations,
int likelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
- int numThread = LatentDirichletAllocationEstimator.Defaults.NumThreads,
- int numMaxDocToken = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken,
- int numSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic,
- int numBurninIter = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations,
+ int numberOfThreads = LatentDirichletAllocationEstimator.Defaults.NumberOfThreads,
+ int maximumTokenCountPerDocument = LatentDirichletAllocationEstimator.Defaults.MaximumTokenCountPerDocument,
+ int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfSummaryTermsPerTopic,
+ int numberOfBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations,
bool resetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator)
{
Contracts.CheckValue(name, nameof(name));
Contracts.CheckValueOrNull(inputColumnName);
- Contracts.CheckParam(numTopic > 0, nameof(numTopic), "Must be positive.");
- Contracts.CheckParam(mhStep > 0, nameof(mhStep), "Must be positive.");
- Contracts.CheckParam(numIter > 0, nameof(numIter), "Must be positive.");
+ Contracts.CheckParam(numberOfTopics > 0, nameof(numberOfTopics), "Must be positive.");
+ Contracts.CheckParam(samplingStepCount > 0, nameof(samplingStepCount), "Must be positive.");
+ Contracts.CheckParam(maximumNumberOfIterations > 0, nameof(maximumNumberOfIterations), "Must be positive.");
Contracts.CheckParam(likelihoodInterval > 0, nameof(likelihoodInterval), "Must be positive.");
- Contracts.CheckParam(numThread >= 0, nameof(numThread), "Must be positive or zero.");
- Contracts.CheckParam(numMaxDocToken > 0, nameof(numMaxDocToken), "Must be positive.");
- Contracts.CheckParam(numSummaryTermPerTopic > 0, nameof(numSummaryTermPerTopic), "Must be positive");
- Contracts.CheckParam(numBurninIter >= 0, nameof(numBurninIter), "Must be non-negative.");
+ Contracts.CheckParam(numberOfThreads >= 0, nameof(numberOfThreads), "Must be positive or zero.");
+ Contracts.CheckParam(maximumTokenCountPerDocument > 0, nameof(maximumTokenCountPerDocument), "Must be positive.");
+ Contracts.CheckParam(numberOfSummaryTermsPerTopic > 0, nameof(numberOfSummaryTermsPerTopic), "Must be positive");
+ Contracts.CheckParam(numberOfBurninIterations >= 0, nameof(numberOfBurninIterations), "Must be non-negative.");
Name = name;
InputColumnName = inputColumnName ?? name;
- NumTopic = numTopic;
+ NumberOfTopics = numberOfTopics;
AlphaSum = alphaSum;
Beta = beta;
- MHStep = mhStep;
- NumIter = numIter;
+ SamplingStepCount = samplingStepCount;
+ NumberOfIterations = maximumNumberOfIterations;
LikelihoodInterval = likelihoodInterval;
- NumThread = numThread;
- NumMaxDocToken = numMaxDocToken;
- NumSummaryTermPerTopic = numSummaryTermPerTopic;
- NumBurninIter = numBurninIter;
+ NumberOfThreads = numberOfThreads;
+ MaximumTokenCountPerDocument = maximumTokenCountPerDocument;
+ NumberOfSummaryTermsPerTopic = numberOfSummaryTermsPerTopic;
+ NumberOfBurninIterations = numberOfBurninIterations;
ResetRandomGenerator = resetRandomGenerator;
}
@@ -1128,33 +1128,33 @@ internal ColumnOptions(IExceptionContext ectx, ModelLoadContext ctx)
// int NumBurninIter;
// byte ResetRandomGenerator;
- NumTopic = ctx.Reader.ReadInt32();
- ectx.CheckDecode(NumTopic > 0);
+ NumberOfTopics = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(NumberOfTopics > 0);
AlphaSum = ctx.Reader.ReadSingle();
Beta = ctx.Reader.ReadSingle();
- MHStep = ctx.Reader.ReadInt32();
- ectx.CheckDecode(MHStep > 0);
+ SamplingStepCount = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(SamplingStepCount > 0);
- NumIter = ctx.Reader.ReadInt32();
- ectx.CheckDecode(NumIter > 0);
+ NumberOfIterations = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(NumberOfIterations > 0);
LikelihoodInterval = ctx.Reader.ReadInt32();
ectx.CheckDecode(LikelihoodInterval > 0);
- NumThread = ctx.Reader.ReadInt32();
- ectx.CheckDecode(NumThread >= 0);
+ NumberOfThreads = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(NumberOfThreads >= 0);
- NumMaxDocToken = ctx.Reader.ReadInt32();
- ectx.CheckDecode(NumMaxDocToken > 0);
+ MaximumTokenCountPerDocument = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(MaximumTokenCountPerDocument > 0);
- NumSummaryTermPerTopic = ctx.Reader.ReadInt32();
- ectx.CheckDecode(NumSummaryTermPerTopic > 0);
+ NumberOfSummaryTermsPerTopic = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(NumberOfSummaryTermsPerTopic > 0);
- NumBurninIter = ctx.Reader.ReadInt32();
- ectx.CheckDecode(NumBurninIter >= 0);
+ NumberOfBurninIterations = ctx.Reader.ReadInt32();
+ ectx.CheckDecode(NumberOfBurninIterations >= 0);
ResetRandomGenerator = ctx.Reader.ReadBoolByte();
}
@@ -1176,16 +1176,16 @@ internal void Save(ModelSaveContext ctx)
// int NumBurninIter;
// byte ResetRandomGenerator;
- ctx.Writer.Write(NumTopic);
+ ctx.Writer.Write(NumberOfTopics);
ctx.Writer.Write(AlphaSum);
ctx.Writer.Write(Beta);
- ctx.Writer.Write(MHStep);
- ctx.Writer.Write(NumIter);
+ ctx.Writer.Write(SamplingStepCount);
+ ctx.Writer.Write(NumberOfIterations);
ctx.Writer.Write(LikelihoodInterval);
- ctx.Writer.Write(NumThread);
- ctx.Writer.Write(NumMaxDocToken);
- ctx.Writer.Write(NumSummaryTermPerTopic);
- ctx.Writer.Write(NumBurninIter);
+ ctx.Writer.Write(NumberOfThreads);
+ ctx.Writer.Write(MaximumTokenCountPerDocument);
+ ctx.Writer.Write(NumberOfSummaryTermsPerTopic);
+ ctx.Writer.Write(NumberOfBurninIterations);
ctx.Writer.WriteBoolByte(ResetRandomGenerator);
}
}
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 92647a4ab0..230ed970d4 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -579,17 +579,10 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
/// The transform's catalog.
/// Name of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
- /// The number of topics.
- /// Dirichlet prior on document-topic vectors.
- /// Dirichlet prior on vocab-topic vectors.
- /// Number of Metropolis Hasting step.
- /// Number of iterations.
- /// Compute log likelihood over local dataset on this iteration interval.
- /// The number of training threads. Default value depends on number of logical processors.
- /// The threshold of maximum count of tokens per doc.
- /// The number of words to summarize the topic.
- /// The number of burn-in iterations.
- /// Reset the random number generator for each document.
+ /// The number of topics.
+ /// Number of iterations.
+ /// The threshold of maximum count of tokens per doc.
+ /// The number of words to summarize the topic.
///
///
/// new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog),
- outputColumnName, inputColumnName, numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads,
- numMaxDocToken, numSummaryTermPerTopic, numBurninIterations, resetRandomGenerator);
+ outputColumnName, inputColumnName, numberOfTopics,
+ LatentDirichletAllocationEstimator.Defaults.AlphaSum,
+ LatentDirichletAllocationEstimator.Defaults.Beta,
+ LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
+ maximumNumberOfIterations,
+ LatentDirichletAllocationEstimator.Defaults.NumberOfThreads,
+ maximumTokenCountPerDocument,
+ numberOfSummaryTermsPerTopic,
+ LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
+ LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations,
+ LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator);
///
/// Uses LightLDA to transform a document (represented as a vector of floats)
diff --git a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs
index 08f5431a55..933eab22e3 100644
--- a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs
+++ b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs
@@ -183,7 +183,7 @@ public void InspectLdaModelParameters()
// Define the pipeline.
var pipeline = mlContext.Transforms.Text.ProduceWordBags("SentimentBag", "SentimentText")
- .Append(mlContext.Transforms.Text.LatentDirichletAllocation("Features", "SentimentBag", numTopic: numTopics, numIterations: 10));
+ .Append(mlContext.Transforms.Text.LatentDirichletAllocation("Features", "SentimentBag", numberOfTopics: numTopics, maximumNumberOfIterations: 10));
// Fit the pipeline.
var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 813ec4c352..9796e8021d 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -675,7 +675,7 @@ public void LdaTopicModel()
var est = data.MakeNewEstimator()
.Append(r => (
r.label,
- topics: r.text.ToBagofWords().ToLdaTopicVector(numTopic: 3, numSummaryTermPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary)));
+ topics: r.text.ToBagofWords().LatentDirichletAllocation(numberOfTopics: 3, numberOfSummaryTermsPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary)));
var transformer = est.Fit(data);
var tdata = transformer.Transform(data);
diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs
index 56ae2fbc0e..dc2e5a1584 100644
--- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs
+++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs
@@ -11,6 +11,7 @@
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
+using Microsoft.ML.Transforms.Text;
using Xunit;
namespace Microsoft.ML.RunTests
@@ -1318,7 +1319,9 @@ public void TestLDATransform()
builder.AddColumn("F1V", NumberDataViewType.Single, data);
var srcView = builder.GetDataView();
- var est = ML.Transforms.Text.LatentDirichletAllocation("F1V", numTopic: 3, numSummaryTermPerTopic: 3, alphaSum: 3, numThreads: 1, resetRandomGenerator: true);
+ var opt = new LatentDirichletAllocationEstimator.ColumnOptions(name: "F1V", numberOfTopics: 3,
+ numberOfSummaryTermsPerTopic: 3, alphaSum: 3, numberOfThreads: 1, resetRandomGenerator: true);
+ var est = ML.Transforms.Text.LatentDirichletAllocation(opt);
var ldaTransformer = est.Fit(srcView);
var transformedData = ldaTransformer.Transform(srcView);
diff --git a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
index 13c97bbcfb..ecaf5cd36e 100644
--- a/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/TextFeaturizerTests.cs
@@ -288,7 +288,7 @@ public void LdaWorkout()
.Load(sentimentDataPath);
var est = new WordBagEstimator(env, "bag_of_words", "text").
- Append(new LatentDirichletAllocationEstimator(env, "topics", "bag_of_words", 10, numIterations: 10,
+ Append(new LatentDirichletAllocationEstimator(env, "topics", "bag_of_words", 10, maximumNumberOfIterations: 10,
resetRandomGenerator: true));
// The following call fails because of the following issue