Skip to content

Commit 926ebbd

Browse files
committed
remove createNormalizedInternedPathname
1 parent 5472170 commit 926ebbd

File tree

2 files changed

+8
-48
lines changed

2 files changed

+8
-48
lines changed

common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExecutorDiskUtils.java

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,11 @@
1818
package org.apache.spark.network.shuffle;
1919

2020
import java.io.File;
21-
import java.util.regex.Matcher;
22-
import java.util.regex.Pattern;
23-
24-
import com.google.common.annotations.VisibleForTesting;
2521

2622
import org.apache.spark.network.util.JavaUtils;
2723

2824
public class ExecutorDiskUtils {
2925

30-
private static final Pattern MULTIPLE_SEPARATORS = Pattern.compile(File.separator + "{2,}");
31-
3226
/**
3327
* Hashes a filename into the corresponding local directory, in a manner consistent with
3428
* Spark's DiskBlockManager.getFile().
@@ -37,30 +31,14 @@ public static File getFile(String[] localDirs, int subDirsPerLocalDir, String fi
3731
int hash = JavaUtils.nonNegativeHash(filename);
3832
String localDir = localDirs[hash % localDirs.length];
3933
int subDirId = (hash / localDirs.length) % subDirsPerLocalDir;
40-
return new File(createNormalizedInternedPathname(
41-
localDir, String.format("%02x", subDirId), filename));
42-
}
43-
44-
/**
45-
* This method is needed to avoid the situation when multiple File instances for the
46-
* same pathname "foo/bar" are created, each with a separate copy of the "foo/bar" String.
47-
* According to measurements, in some scenarios such duplicate strings may waste a lot
48-
* of memory (~ 10% of the heap). To avoid that, we intern the pathname, and before that
49-
* we make sure that it's in a normalized form (contains no "//", "///" etc.) Otherwise,
50-
* the internal code in java.io.File would normalize it later, creating a new "foo/bar"
51-
* String copy. Unfortunately, we cannot just reuse the normalization code that java.io.File
52-
* uses, since it is in the package-private class java.io.FileSystem.
53-
*/
54-
@VisibleForTesting
55-
static String createNormalizedInternedPathname(String dir1, String dir2, String fname) {
56-
String pathname = dir1 + File.separator + dir2 + File.separator + fname;
57-
Matcher m = MULTIPLE_SEPARATORS.matcher(pathname);
58-
pathname = m.replaceAll("/");
59-
// A single trailing slash needs to be taken care of separately
60-
if (pathname.length() > 1 && pathname.endsWith("/")) {
61-
pathname = pathname.substring(0, pathname.length() - 1);
62-
}
63-
return pathname.intern();
34+
final String notNormalizedPath =
35+
localDir + File.separator + String.format("%02x", subDirId) + File.separator + filename;
36+
// Interning the normalized path as according to measurements, in some scenarios such
37+
// duplicate strings may waste a lot of memory (~ 10% of the heap).
38+
// Unfortunately, we cannot just call the normalization code that java.io.File
39+
// uses, since it is in the package-private class java.io.FileSystem.
40+
final String normalizedPath = new File(notNormalizedPath).getPath();
41+
return new File(normalizedPath.intern());
6442
}
6543

6644
}

common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,4 @@ public void jsonSerializationOfExecutorRegistration() throws IOException {
144144
assertEquals(shuffleInfo, mapper.readValue(legacyShuffleJson, ExecutorShuffleInfo.class));
145145
}
146146

147-
@Test
148-
public void testNormalizeAndInternPathname() {
149-
assertPathsMatch("/foo", "bar", "baz", "/foo/bar/baz");
150-
assertPathsMatch("//foo/", "bar/", "//baz", "/foo/bar/baz");
151-
assertPathsMatch("foo", "bar", "baz///", "foo/bar/baz");
152-
assertPathsMatch("/foo/", "/bar//", "/baz", "/foo/bar/baz");
153-
assertPathsMatch("/", "", "", "/");
154-
assertPathsMatch("/", "/", "/", "/");
155-
}
156-
157-
private void assertPathsMatch(String p1, String p2, String p3, String expectedPathname) {
158-
String normPathname =
159-
ExecutorDiskUtils.createNormalizedInternedPathname(p1, p2, p3);
160-
assertEquals(expectedPathname, normPathname);
161-
File file = new File(normPathname);
162-
String returnedPath = file.getPath();
163-
assertTrue(normPathname == returnedPath);
164-
}
165147
}

0 commit comments

Comments
 (0)