From 52f321d5ae13ed188edb3175ef61e0168fc85135 Mon Sep 17 00:00:00 2001 From: J W Date: Sat, 11 Jan 2020 11:29:39 -0500 Subject: [PATCH 1/6] Use a GUID when creating the temp path --- src/Microsoft.ML.Core/Data/Repository.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index 40d7b7cc47..8d432e4793 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -256,7 +256,7 @@ protected void GetPath(out string pathEnt, out string pathTemp, string dir, stri string root = Path.GetFullPath(DirTemp ?? @"x:\dummy"); string entityPath = Path.Combine(root, dir ?? "", name); entityPath = Path.GetFullPath(entityPath); - string tempPath = Path.Combine(root, PathMap.Count.ToString()); + string tempPath = Path.Combine(root, Guid.NewGuid().ToString(), PathMap.Count.ToString()); tempPath = Path.GetFullPath(tempPath); string parent = Path.GetDirectoryName(entityPath); From 280e9adcedddacea6b58616c1376cee27c869041 Mon Sep 17 00:00:00 2001 From: J W Date: Sat, 11 Jan 2020 14:44:28 -0500 Subject: [PATCH 2/6] Update where the GUID is in the temp path --- src/Microsoft.ML.Core/Data/Repository.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index 8d432e4793..c2d83ca36a 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -256,7 +256,7 @@ protected void GetPath(out string pathEnt, out string pathTemp, string dir, stri string root = Path.GetFullPath(DirTemp ?? @"x:\dummy"); string entityPath = Path.Combine(root, dir ?? "", name); entityPath = Path.GetFullPath(entityPath); - string tempPath = Path.Combine(root, Guid.NewGuid().ToString(), PathMap.Count.ToString()); + string tempPath = Path.Combine(root, Guid.NewGuid().ToString()); tempPath = Path.GetFullPath(tempPath); string parent = Path.GetDirectoryName(entityPath); @@ -340,7 +340,9 @@ public Entry CreateEntry(string dir, string name) Stream stream; if (pathTemp != null) + { stream = new FileStream(pathTemp, FileMode.CreateNew); + } else stream = new MemoryStream(); From fc636e46955300fd0d228d26ad5e2a24fdd20397 Mon Sep 17 00:00:00 2001 From: J W Date: Sun, 12 Jan 2020 05:45:32 -0500 Subject: [PATCH 3/6] Remove braces --- src/Microsoft.ML.Core/Data/Repository.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index c2d83ca36a..66f4c4c683 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -340,9 +340,7 @@ public Entry CreateEntry(string dir, string name) Stream stream; if (pathTemp != null) - { stream = new FileStream(pathTemp, FileMode.CreateNew); - } else stream = new MemoryStream(); From c29a00b5f6089fb5eba17b26f826b791e63236c4 Mon Sep 17 00:00:00 2001 From: J W Date: Wed, 15 Jan 2020 06:01:02 -0500 Subject: [PATCH 4/6] Use random file name instead of GUID --- src/Microsoft.ML.Core/Data/Repository.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index 66f4c4c683..67c1582464 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -256,7 +256,7 @@ protected void GetPath(out string pathEnt, out string pathTemp, string dir, stri string root = Path.GetFullPath(DirTemp ?? @"x:\dummy"); string entityPath = Path.Combine(root, dir ?? "", name); entityPath = Path.GetFullPath(entityPath); - string tempPath = Path.Combine(root, Guid.NewGuid().ToString()); + string tempPath = Path.Combine(root, Path.GetRandomFileName()); tempPath = Path.GetFullPath(tempPath); string parent = Path.GetDirectoryName(entityPath); From 0f06dfb17762141d4b297a65f751c5b88f25f2e3 Mon Sep 17 00:00:00 2001 From: J W Date: Wed, 15 Jan 2020 18:20:58 -0500 Subject: [PATCH 5/6] Switch to concurrent dictionary --- src/Microsoft.ML.Core/Data/Repository.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index 67c1582464..d642f1f8fe 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.IO; @@ -93,7 +94,7 @@ public void Dispose() // more than once. // REVIEW: Should we garbage collect to some degree? Currently we don't delete any // of these temp files until the repository is disposed. - protected readonly Dictionary PathMap; + protected readonly ConcurrentDictionary PathMap; /// /// Exception context. @@ -107,7 +108,7 @@ internal Repository(bool needDir, IExceptionContext ectx) Contracts.AssertValueOrNull(ectx); _ectx = ectx; - PathMap = new Dictionary(); + PathMap = new ConcurrentDictionary(); _open = new List(); if (needDir) DirTemp = GetShortTempDir(); @@ -333,10 +334,8 @@ public Entry CreateEntry(string dir, string name) string pathEnt; string pathTemp; GetPath(out pathEnt, out pathTemp, dir, name, true); - if (PathMap.ContainsKey(pathEnt)) + if (PathMap.TryAdd(pathEnt, pathTemp)) throw ExceptionContext.ExceptParam(nameof(name), "Duplicate entry: '{0}'", pathEnt); - else - PathMap.Add(pathEnt, pathTemp); Stream stream; if (pathTemp != null) @@ -525,7 +524,7 @@ public Entry OpenEntryOrNull(string dir, string name) // Extract to a temporary file. Directory.CreateDirectory(Path.GetDirectoryName(pathTemp)); entry.ExtractToFile(pathTemp); - PathMap.Add(pathLower, pathTemp); + PathMap.TryAdd(pathLower, pathTemp); stream = new FileStream(pathTemp, FileMode.Open, FileAccess.Read); } else From ca78b2cb5302e4ae2872fe8eae33649c8f3c33ec Mon Sep 17 00:00:00 2001 From: J W Date: Wed, 15 Jan 2020 21:37:10 -0500 Subject: [PATCH 6/6] TryAdd updates --- src/Microsoft.ML.Core/Data/Repository.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/Repository.cs b/src/Microsoft.ML.Core/Data/Repository.cs index d642f1f8fe..71b6c94d77 100644 --- a/src/Microsoft.ML.Core/Data/Repository.cs +++ b/src/Microsoft.ML.Core/Data/Repository.cs @@ -334,7 +334,7 @@ public Entry CreateEntry(string dir, string name) string pathEnt; string pathTemp; GetPath(out pathEnt, out pathTemp, dir, name, true); - if (PathMap.TryAdd(pathEnt, pathTemp)) + if (!PathMap.TryAdd(pathEnt, pathTemp)) throw ExceptionContext.ExceptParam(nameof(name), "Duplicate entry: '{0}'", pathEnt); Stream stream; @@ -524,7 +524,9 @@ public Entry OpenEntryOrNull(string dir, string name) // Extract to a temporary file. Directory.CreateDirectory(Path.GetDirectoryName(pathTemp)); entry.ExtractToFile(pathTemp); - PathMap.TryAdd(pathLower, pathTemp); + if (!PathMap.TryAdd(pathLower, pathTemp)) + throw ExceptionContext.ExceptParam(nameof(name), "Duplicate entry: '{0}'", pathLower); + stream = new FileStream(pathTemp, FileMode.Open, FileAccess.Read); } else