diff --git a/src/Microsoft.ML.Data/DataLoadSave/MultiFileSource.cs b/src/Microsoft.ML.Data/DataLoadSave/MultiFileSource.cs index 6dd5405486..3399978f9c 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/MultiFileSource.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/MultiFileSource.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Collections.Generic; using System.IO; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; @@ -24,6 +25,12 @@ public sealed class MultiFileSource : IMultiStreamSource /// In case of usage from Maml, the paths would be wildcard concatenated in the first string of . /// /// The paths of the files to load. + /// + /// The provided can utilize wildcards to load all source files. For example: + /// paths = "Data/*" includes all files in directory Data + /// paths = "DataFolder/.../*" includes all files in all subdirectories inside directory Data. + /// paths = "Data1/*", "Data2/*" includes all files in directories Data1 and Data2 + /// public MultiFileSource(params string[] paths) { Contracts.CheckValueOrNull(paths); @@ -37,16 +44,19 @@ public MultiFileSource(params string[] paths) return; } - // in case of usage from Maml, the paths would be wildcard concatenated in the - // first string of paths. - string[] concatenated = paths[0] != null ? StreamUtils.ExpandWildCards(paths[0]) : null; - - if (concatenated != null && concatenated.Length > 1) + List concatenated = new List(); + if (paths != null) { - if (paths.Length > 1) - throw Contracts.Except($"Pass a single string to the {nameof(MultiFileSource)} constructor, if you are using wildcards."); + foreach (string path in paths) + foreach (string rPath in StreamUtils.ExpandWildCards(path)) + concatenated.Add(rPath); + } + else + concatenated = null; - _paths = concatenated; + if (concatenated != null && concatenated.Count > 0) + { + _paths = concatenated.ToArray(); } else _paths = paths; diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/FileSource.cs b/test/Microsoft.ML.Core.Tests/UnitTests/FileSource.cs index d329d48073..5fb108cb84 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/FileSource.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/FileSource.cs @@ -41,7 +41,43 @@ public void MultiFileSourceUnitTest() fileSource = new MultiFileSource(Path.Combine(dirName, "...")); Assert.True(fileSource.Count == 2, $"Error passing concatenated paths to {nameof(MultiFileSource)}"); - Assert.Throws(() => new MultiFileSource($"{file1}+{file2}", "adult.tiny.with-schema.txt")); + /* Create test directories and files in the following specifications: + /MultiFileSourceUnitTest/Data + /MultiFileSourceUnitTest/Data/a.txt + /MultiFileSourceUnitTest/Data/b.txt + /MultiFileSourceUnitTest/DataFolder/ + /MultiFileSourceUnitTest/DataFolder/SubFolder1 + /MultiFileSourceUnitTest/DataFolder/SubFolder1/a.txt + /MultiFileSourceUnitTest/DataFolder/SubFolder2 + /MultiFileSourceUnitTest/DataFolder/SubFolder2/b.txt + */ + + var dataDir = Directory.CreateDirectory("MultiFileSourceUnitTest/Data").FullName; + + var fileDataA = Path.Combine(dataDir, "a.txt"); + var fileDataB = Path.Combine(dataDir, "b.txt"); + + File.WriteAllText(fileDataA, "Unit Test"); + File.WriteAllText(fileDataB, "Unit Test"); + + var dataFolderDir = Directory.CreateDirectory("MultiFileSourceUnitTest/DataFolder").FullName; + var subFolder1Dir = Directory.CreateDirectory("MultiFileSourceUnitTest/DataFolder/SubFolder1").FullName; + var subFolder2Dir = Directory.CreateDirectory("MultiFileSourceUnitTest/DataFolder/SubFolder2").FullName; + + var fileDataSA = Path.Combine(subFolder1Dir, "a.txt"); + var fileDataSB = Path.Combine(subFolder2Dir, "b.txt"); + + File.WriteAllText(fileDataSA, "Unit Test"); + File.WriteAllText(fileDataSB, "Unit Test"); + + fileSource = new MultiFileSource(dataDir+"/*"); + Assert.True(fileSource.Count == 2, $"Error passing concatenated paths to {nameof(MultiFileSource)}"); + + fileSource = new MultiFileSource(dataFolderDir + "/.../*"); + Assert.True(fileSource.Count == 2, $"Error passing concatenated paths to {nameof(MultiFileSource)}"); + + //Delete test folder and files for test clean-up + Directory.Delete(dirName, true); } } }