diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs index fed1b97c7d..ab72ec3d57 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderCursor.cs @@ -487,7 +487,7 @@ private void ThreadProc() // REVIEW: Avoid allocating a string for every line. This would probably require // introducing a CharSpan type (similar to ReadOnlyMemory but based on char[] or StringBuilder) // and implementing all the necessary conversion functionality on it. See task 3871. - text = rdr.ReadLine(); + text = rdr.ReadEntry(); if (text == null) goto LNext; line++; @@ -514,7 +514,7 @@ private void ThreadProc() if (_abort) return; - text = rdr.ReadLine(); + text = rdr.ReadEntry(); if (text == null) { // We're done with this file. Queue the last partial batch. diff --git a/src/Microsoft.ML.Data/Utilities/StreamUtils.cs b/src/Microsoft.ML.Data/Utilities/StreamUtils.cs index f157d09ea8..32ee6c774f 100644 --- a/src/Microsoft.ML.Data/Utilities/StreamUtils.cs +++ b/src/Microsoft.ML.Data/Utilities/StreamUtils.cs @@ -174,5 +174,39 @@ private static string[] Expand(string pattern) return matchList.ToArray(); } #endif + + public static string ReadEntry(this TextReader sr) + { + string entry = string.Empty; + + // get first bit + entry += sr.ReadLine(); + + // And get more lines until the number of quotes is even + while (GetNumberOf(entry, "\"") % 2 != 0 ) + { + string line = sr.ReadLine(); + entry += line; + } + + // Then return what we've gotten + if (entry == string.Empty) + { + return null; + } + else + { + return entry; + } + } + + public static int GetNumberOf(string s, string strSearchString) + { + if(strSearchString.Length == 0 || s.Length == 0) + { + return 0; + } + return (s.Length - s.Replace(strSearchString, string.Empty).Length) / strSearchString.Length; + } } }