Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Featurizers;

namespace Samples.Dynamic
{
public static class DateTimeTransformer
{
private class DateTimeInput
{
public long Date;
}

public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();

// Create a small dataset as an IEnumerable.
// Future Date - 2025 June 30
var samples = new[] { new DateTimeInput() { Date = 1751241600 } };

// Convert training data to IDataView.
var dataview = mlContext.Data.LoadFromEnumerable(samples);

// A pipeline for splitting the time features into individual columns
var pipeline = mlContext.Transforms.DateTimeTransformer("Date", "DTC");

// The transformed data.
var transformedData = pipeline.Fit(dataview).Transform(dataview);

// Now let's take a look at what this did. We should have created 21 more columns with all the
// DateTime information split into its own columns
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, reuseRowObject: false);

// And we can write out a few rows
Console.WriteLine($"Features column obtained post-transformation.");
foreach (var featureRow in featuresColumn)
Console.WriteLine(featureRow.Date + ", " + featureRow.DTCYear + ", " + featureRow.DTCMonth + ", " +
featureRow.DTCDay + ", " + featureRow.DTCHour + ", " + featureRow.DTCMinute + ", " +
featureRow.DTCSecond + ", " + featureRow.DTCAmPm + ", " + featureRow.DTCHour12 + ", " +
featureRow.DTCDayOfWeek + ", " + featureRow.DTCDayOfQuarter + ", " + featureRow.DTCDayOfYear +
", " + featureRow.DTCWeekOfMonth + ", " + featureRow.DTCQuarterOfYear + ", " + featureRow.DTCHalfOfYear +
", " + featureRow.DTCWeekIso + ", " + featureRow.DTCYearIso + ", " + featureRow.DTCMonthLabel + ", " +
featureRow.DTCAmPmLabel + ", " + featureRow.DTCDayOfWeekLabel + ", " + featureRow.DTCHolidayName + ", " +
featureRow.DTCIsPaidTimeOff);

// Expected output:
// Features columns obtained post-transformation.
// 1751241600, 2025, 6, 30, 0, 0, 0, 0, 0, 1, 91, 180, 4, 2, 1, 27, 2025, June, am, Monday, , 0
}

// These columns start with DTC because that is the prefix we picked
private sealed class TransformedData
{
public long Date { get; set; }
public int DTCYear { get; set; }
public byte DTCMonth { get; set; }
public byte DTCDay { get; set; }
public byte DTCHour { get; set; }
public byte DTCMinute { get; set; }
public byte DTCSecond { get; set; }
public byte DTCAmPm { get; set; }
public byte DTCHour12 { get; set; }
public byte DTCDayOfWeek { get; set; }
public byte DTCDayOfQuarter { get; set; }
public ushort DTCDayOfYear { get; set; }
public ushort DTCWeekOfMonth { get; set; }
public byte DTCQuarterOfYear { get; set; }
public byte DTCHalfOfYear { get; set; }
public byte DTCWeekIso { get; set; }
public int DTCYearIso { get; set; }
public string DTCMonthLabel { get; set; }
public string DTCAmPmLabel { get; set; }
public string DTCDayOfWeekLabel { get; set; }
public string DTCHolidayName { get; set; }
public byte DTCIsPaidTimeOff { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Featurizers;

namespace Samples.Dynamic
{
public static class DateTimeTransformerDropColumns
{
private class DateTimeInput
{
public long Date;
}

public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();

// Create a small dataset as an IEnumerable.
// Future Date - 2025 June 30
var samples = new[] { new DateTimeInput() { Date = 1751241600 } };

// Convert training data to IDataView.
var dataview = mlContext.Data.LoadFromEnumerable(samples);

// A pipeline for splitting the time features into individual columns
// All the columns listed here will be dropped.
var pipeline = mlContext.Transforms.DateTimeTransformer("Date", "DTC", DateTimeTransformerEstimator.ColumnsProduced.IsPaidTimeOff,
DateTimeTransformerEstimator.ColumnsProduced.Day, DateTimeTransformerEstimator.ColumnsProduced.QuarterOfYear,
DateTimeTransformerEstimator.ColumnsProduced.AmPm, DateTimeTransformerEstimator.ColumnsProduced.HolidayName);

// The transformed data.
var transformedData = pipeline.Fit(dataview).Transform(dataview);

// Now let's take a look at what this did. We should have created 16 more columns with all the
// DateTime information split into its own columns
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, reuseRowObject: false);

// And we can write out a few rows
Console.WriteLine($"Features column obtained post-transformation.");
foreach (var featureRow in featuresColumn)
Console.WriteLine(featureRow.Date + ", " + featureRow.DTCYear + ", " + featureRow.DTCMonth + ", " +
featureRow.DTCHour + ", " + featureRow.DTCMinute + ", " + featureRow.DTCSecond + ", " +
featureRow.DTCHour12 + ", " + featureRow.DTCDayOfWeek + ", " + featureRow.DTCDayOfQuarter + ", " +
featureRow.DTCDayOfYear + ", " + featureRow.DTCWeekOfMonth + ", " + featureRow.DTCHalfOfYear +
", " + featureRow.DTCWeekIso + ", " + featureRow.DTCYearIso + ", " + featureRow.DTCMonthLabel + ", " +
featureRow.DTCAmPmLabel + ", " + featureRow.DTCDayOfWeekLabel);

// Expected output:
// Features columns obtained post-transformation.
// 1751241600, 2025, 6, 30, 0, 0, 0, 0, 0, 1, 91, 180, 4, 2, 1, 27, 2025, June, am, Monday
}

// These columns start with DTC because that is the prefix we picked
private sealed class TransformedData
{
public long Date { get; set; }
public int DTCYear { get; set; }
public byte DTCMonth { get; set; }
public byte DTCHour { get; set; }
public byte DTCMinute { get; set; }
public byte DTCSecond { get; set; }
public byte DTCHour12 { get; set; }
public byte DTCDayOfWeek { get; set; }
public byte DTCDayOfQuarter { get; set; }
public ushort DTCDayOfYear { get; set; }
public ushort DTCWeekOfMonth { get; set; }
public byte DTCHalfOfYear { get; set; }
public byte DTCWeekIso { get; set; }
public int DTCYearIso { get; set; }
public string DTCMonthLabel { get; set; }
public string DTCAmPmLabel { get; set; }
public string DTCDayOfWeekLabel { get; set; }

@justinormont justinormont Dec 4, 2019

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be needed to be added to the code being wrapped here, but when featurizing DateTime fields, I prefer polar (Sin/Cos) transforms (along with the others listed).

Clocks and times/dates are cyclic. The current feature set doesn't include this fact.

The main gain of polar date transforms is that it places times like 11:50PM and 12:10AM right next to each other (both mapping to ~0.999 in the cosine transform of percent of day). This lets a model make a decision boundary at CosTimeOfDay > 0.9 to treat these two times as very similar.

Contrast this to the DTCHour feature which will map these to 23 and 0 (issue: non-continuous range, and not possible to group both the 0 and 23 in a single split point, which to a human is similar).

Info:

Code example I use for DateTime feature engineering:

DateTime dt;
var invalidDate = !DateTime.TryParse(input.Date, out dt);
var percentOfDay = (invalidDate ? Single.NaN : (float)(new TimeSpan(dt.Hour, dt.Minute, 0).TotalMinutes) / 1440 f);
var percentOfWeek = (invalidDate ? Single.NaN : ((float) dt.DayOfWeek + percentOfDay) / 7 f);
var percentOfMonth = (invalidDate ? Single.NaN : (float)(dt.Day + percentOfDay) / (float) DateTime.DaysInMonth(dt.Year, dt.Month));
var percentOfYear = (invalidDate ? Single.NaN : (float)((dt.DayOfYear + percentOfDay) / (new DateTime(dt.Year + 1, 1, 1) - new DateTime(dt.Year, 1, 1)).TotalDays));

// Time features like the current ones
output.DateWeekday = (invalidDate ? Single.NaN : (float) dt.DayOfWeek);
output.DateMonth = (invalidDate ? Single.NaN : (float) dt.Month);
output.DateHour = (invalidDate ? Single.NaN : dt.Hour);
output.DatePartOfDay = (invalidDate ? Single.NaN : (float) Math.Round(dt.AddHours(-3).Hour / 6 f));
output.InvalidDate = (invalidDate ? 1.0f : 0.0f);

// Polar date/time transforms for percent of day & week & year 
output.CosTimeOfDay = (float) Math.Cos(2 * Math.PI * percentOfDay);
output.SinTimeOfDay = (float) Math.Sin(2 * Math.PI * percentOfDay);
output.CosTimeOfWeek = (float) Math.Cos(2 * Math.PI * percentOfWeek);
output.SinTimeOfWeek = (float) Math.Sin(2 * Math.PI * percentOfWeek);
output.CosTimeOfMonth = (float) Math.Cos(2 * Math.PI * percentOfMonth);
output.SinTimeOfMonth = (float) Math.Sin(2 * Math.PI * percentOfMonth);
output.CosTimeOfYear = (float) Math.Cos(2 * Math.PI * percentOfYear);
output.SinTimeOfYear = (float) Math.Sin(2 * Math.PI * percentOfYear);
``` #Resolved

@justinormont justinormont Dec 4, 2019

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/cc @davidbrownellWork #Resolved

@davidbrownellWork davidbrownellWork Dec 5, 2019

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add this as a potential feature for Mn. #Resolved

@gvashishtha gvashishtha Dec 5, 2019

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean add to the ML.NET plan? Any estimate on how much additional dev time this would take? #Resolved

@justinormont justinormont Dec 5, 2019

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the new features is pretty simple. I expect more of the work is in testing. #Resolved

}
}
}
Loading