Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions src/Microsoft.ML.Data/DataLoadSave/MultiFileSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
Expand All @@ -24,6 +25,12 @@ public sealed class MultiFileSource : IMultiStreamSource
/// In case of usage from Maml, the paths would be wildcard concatenated in the first string of <paramref name="paths"/>.
/// </summary>
/// <param name="paths">The paths of the files to load.</param>
/// <remarks>
/// The provided <paramref name="paths"/> can utilize wildcards to load all source files. For example:
/// paths = "Data/*" includes all files in directory Data
/// paths = "DataFolder/.../*" includes all files in all subdirectories inside directory Data.
/// paths = "Data1/*", "Data2/*" includes all files in directories Data1 and Data2
/// </remarks>
public MultiFileSource(params string[] paths)
{
Contracts.CheckValueOrNull(paths);
Expand All @@ -37,16 +44,19 @@ public MultiFileSource(params string[] paths)
return;
}

// in case of usage from Maml, the paths would be wildcard concatenated in the
// first string of paths.
string[] concatenated = paths[0] != null ? StreamUtils.ExpandWildCards(paths[0]) : null;

if (concatenated != null && concatenated.Length > 1)
List<string> concatenated = new List<string>();
if (paths != null)
{
if (paths.Length > 1)
throw Contracts.Except($"Pass a single string to the {nameof(MultiFileSource)} constructor, if you are using wildcards.");
foreach (string path in paths)
foreach (string rPath in StreamUtils.ExpandWildCards(path))
concatenated.Add(rPath);
}
else
concatenated = null;

_paths = concatenated;
if (concatenated != null && concatenated.Count > 0)
{
_paths = concatenated.ToArray();
}
else
_paths = paths;
Expand Down
38 changes: 37 additions & 1 deletion test/Microsoft.ML.Core.Tests/UnitTests/FileSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,43 @@ public void MultiFileSourceUnitTest()
fileSource = new MultiFileSource(Path.Combine(dirName, "..."));
Assert.True(fileSource.Count == 2, $"Error passing concatenated paths to {nameof(MultiFileSource)}");

Assert.Throws<InvalidOperationException>(() => new MultiFileSource($"{file1}+{file2}", "adult.tiny.with-schema.txt"));
/* Create test directories and files in the following specifications:
/MultiFileSourceUnitTest/Data
/MultiFileSourceUnitTest/Data/a.txt
/MultiFileSourceUnitTest/Data/b.txt
/MultiFileSourceUnitTest/DataFolder/
/MultiFileSourceUnitTest/DataFolder/SubFolder1
/MultiFileSourceUnitTest/DataFolder/SubFolder1/a.txt
/MultiFileSourceUnitTest/DataFolder/SubFolder2
/MultiFileSourceUnitTest/DataFolder/SubFolder2/b.txt
*/

var dataDir = Directory.CreateDirectory("MultiFileSourceUnitTest/Data").FullName;

var fileDataA = Path.Combine(dataDir, "a.txt");
var fileDataB = Path.Combine(dataDir, "b.txt");

File.WriteAllText(fileDataA, "Unit Test");
File.WriteAllText(fileDataB, "Unit Test");

var dataFolderDir = Directory.CreateDirectory("MultiFileSourceUnitTest/DataFolder").FullName;
var subFolder1Dir = Directory.CreateDirectory("MultiFileSourceUnitTest/DataFolder/SubFolder1").FullName;
var subFolder2Dir = Directory.CreateDirectory("MultiFileSourceUnitTest/DataFolder/SubFolder2").FullName;

var fileDataSA = Path.Combine(subFolder1Dir, "a.txt");
var fileDataSB = Path.Combine(subFolder2Dir, "b.txt");

File.WriteAllText(fileDataSA, "Unit Test");
File.WriteAllText(fileDataSB, "Unit Test");

fileSource = new MultiFileSource(dataDir+"/*");
Assert.True(fileSource.Count == 2, $"Error passing concatenated paths to {nameof(MultiFileSource)}");

fileSource = new MultiFileSource(dataFolderDir + "/.../*");
Assert.True(fileSource.Count == 2, $"Error passing concatenated paths to {nameof(MultiFileSource)}");

//Delete test folder and files for test clean-up
Directory.Delete(dirName, true);
}
}
}