From a78c87197b5806b4ee2249e851e46d40c246ffe2 Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Wed, 27 Aug 2025 16:02:37 -0400 Subject: [PATCH 1/6] Fix loading jars with encoded paths Fixes https://github.com/smithy-lang/smithy/issues/2576 and https://github.com/smithy-lang/smithy/issues/2103, which are both that the model assembler can't load jars if the jar's path has percent-encoded characters. This may happen for artifacts resolved by coursier (see linked issues). The model assembler can load Smithy files from a jar - model discovery. This works by looking for a 'META-INF/smithy/manifest' file within the jar, which contains the paths of smithy model files within the jar. You can give the model assembler a class loader to use for model discovery, but you can also add a jar to discover models from using a regular `addImport`. The problem is with the latter case. Because jars are like zip files, you can't read files within the jar using regular file system operations. Instead, you can either use the JarFile api to open the jar, and read specific entries (like a zip file), or you can directly read specific entries via a special URL with the format `jar:file:/path/to/jar.jar!/path/in/jar`. The latter is the way that our model discovery apis work. The problem was that URL doesn't perform any encoding or decoding itself, so callers/consumers have to take care of it, which we were not doing. For example, if the jar file has a path of `/foo/bar%45baz.jar`, we would create a URL like: `jar:file:/foo/bar%45baz.jar!/META-INF/smithy/manifest`, and when the JDK tried to use this URL to read from the file system, it would decode it and attempt to read `/foo/bar@baz.jar`. Since we weren't encoding the URL to begin with, this was only a problem when the path contained `%`. I tried to preserve the existing model discovery api, and just make it work with these paths. The URL class has a section in its javadoc about this footgun, and in more recent jdk versions, URL constructors are deprecated in favor of `URI::toURL` to avoid it. So that's what I did. This technically changes the `SourceLocation::filename` for jars that are `addImport`ed, when the jar path has reserved characters (like spaces). Such jars were importable before because URL doesn't validate encoding, and the JDK decoding the URL would be a no-op since it wasn't encoded to begin with. So we could _just_ encode `%`, meaning only the newly-allowed paths would have any encoding. Disregarding the fact that this side-steps the assumed pre-conditions of URL, it would actually exacerbate an existing inconsistency between `SourceLocation::filename` of models discovered on the classpath, and models discovered from imports. The JDK encodes the URLs of resources on the classpath, so their models' source locations will be encoded. Encoding URLs of imported jars means there's no difference. You can assume that a source location with `jar:file:` is a well-formed URL/URI. --- .../smithy/model/loader/ModelDiscovery.java | 24 +++++++++----- .../model/loader/ModelAssemblerTest.java | 32 +++++++++++++++++++ .../model/loader/ModelDiscoveryTest.java | 10 ++++++ .../assembler-valid-jar/META-INF/MANIFEST.MF | 2 ++ .../META-INF/smithy/manifest | 1 + .../META-INF/smithy/valid.smithy | 5 +++ 6 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/MANIFEST.MF create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/manifest create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/valid.smithy diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java index f6d7016ea02..2e4b6115a52 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java @@ -10,6 +10,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.nio.charset.StandardCharsets; @@ -185,19 +187,25 @@ public static String getSmithyModelPathFromJarUrl(URL modelUrl) { */ public static URL createSmithyJarManifestUrl(String fileOrUrl) { try { - return new URL(getFilenameWithScheme(fileOrUrl) + "!/" + MANIFEST_PATH); - } catch (IOException e) { + // Building a URI from parts (scheme + file path) will make sure the file path + // gets properly URI encoded. URL does not do this, instead expecting callers + // to encode first, and consumers to decode. Encoding it here means that when + // it is decoded by the consumer (e.g. the JDK reading the jar from the file system), + // it will point to the correct location. + String manifestPath = removeScheme(fileOrUrl) + "!/" + MANIFEST_PATH; + return new URI("jar:file", null, manifestPath, null).toURL(); + } catch (IOException | URISyntaxException e) { throw new ModelImportException(e.getMessage(), e); } } - private static String getFilenameWithScheme(String filename) { - if (filename.startsWith("jar:")) { - return filename; - } else if (filename.startsWith("file:")) { - return "jar:" + filename; + private static String removeScheme(String fileOrUrl) { + // Index will also cover jar: part of jar:file: + int pathStart = fileOrUrl.indexOf("file:"); + if (pathStart == -1) { + return fileOrUrl; } else { - return "jar:file:" + filename; + return fileOrUrl.substring(pathStart + "file:".length()); } } diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java index 4a180b4e224..3fbcbc7674f 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java @@ -1459,4 +1459,36 @@ public void handlesSameModelWhenBuiltAndImported() throws Exception { assertTrue(combinedModel.expectShape(ShapeId.from("smithy.example#MachineData$machineId"), MemberShape.class) .hasTrait(RequiredTrait.ID)); } + + @Test + public void loadsJarFromPathWithEncodedChars() throws Exception { + Path source = Paths.get(getClass().getResource("assembler-valid-jar").toURI()); + Path jarPath = outputDirectory.resolve(Paths.get("path%20with%45encoded%25chars", "test.jar")); + + Files.createDirectories(jarPath.getParent()); + Files.copy(JarUtils.createJarFromDir(source), jarPath); + + Model model = Model.assembler() + .addImport(jarPath) + .assemble() + .unwrap(); + + assertTrue(model.getShape(ShapeId.from("smithy.example#ExampleString")).isPresent()); + } + + @Test + public void loadsJarFromUrlWithEncodedChars() throws Exception { + Path source = Paths.get(getClass().getResource("assembler-valid-jar").toURI()); + Path jarPath = outputDirectory.resolve(Paths.get("path%20with%45encoded%25chars", "test.jar")); + + Files.createDirectories(jarPath.getParent()); + Files.copy(JarUtils.createJarFromDir(source), jarPath); + + Model model = Model.assembler() + .addImport(jarPath.toUri().toURL()) + .assemble() + .unwrap(); + + assertTrue(model.getShape(ShapeId.from("smithy.example#ExampleString")).isPresent()); + } } diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java index 098eb780d56..465e51548a8 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java @@ -147,4 +147,14 @@ public void createSmithyManifestUrlFromJarUrl() throws IOException { assertThat(ModelDiscovery.createSmithyJarManifestUrl("jar:file:/foo.jar"), equalTo(new URL("jar:file:/foo.jar!/META-INF/smithy/manifest"))); } + + @Test + public void encodesSmithyManifestUrl() throws IOException { + assertThat(ModelDiscovery.createSmithyJarManifestUrl("/foo bar.jar"), + equalTo(new URL("jar:file:/foo%20bar.jar!/META-INF/smithy/manifest"))); + assertThat(ModelDiscovery.createSmithyJarManifestUrl("file:/foo bar.jar"), + equalTo(new URL("jar:file:/foo%20bar.jar!/META-INF/smithy/manifest"))); + assertThat(ModelDiscovery.createSmithyJarManifestUrl("jar:file:/foo bar.jar"), + equalTo(new URL("jar:file:/foo%20bar.jar!/META-INF/smithy/manifest"))); + } } diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/MANIFEST.MF b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/MANIFEST.MF new file mode 100644 index 00000000000..3ca5721135a --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/MANIFEST.MF @@ -0,0 +1,2 @@ +Manifest-Version: 1.0 +Created-By: 11.0.6 (Amazon.com Inc.) diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/manifest b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/manifest new file mode 100644 index 00000000000..c7f006be08b --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/manifest @@ -0,0 +1 @@ +valid.smithy diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/valid.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/valid.smithy new file mode 100644 index 00000000000..ddcadfbbcbc --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/assembler-valid-jar/META-INF/smithy/valid.smithy @@ -0,0 +1,5 @@ +$version: "2.0" + +namespace smithy.example + +string ExampleString From 9a4f9930d1ba2a8f996b41cd85344b37b81934cd Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Wed, 27 Aug 2025 23:06:02 -0400 Subject: [PATCH 2/6] Account for windows paths I wasn't accounting for the fact that windows paths have a different string representation than unix paths. This commit fixes that by normalizing the path to a URI path (using '/') before building the manifest URI. While debugging this, I found another difference between the source location filename of classpath and import discovered models. On windows, the source location of import discovered models will look like this: ``` jar:file:C:\foo\bar.jar!/META-INF/smithy/foo.smithy ``` which looks pretty cursed to me, but it _still works_. As in, you can do a `new URL(thatThing)`, and successfully read `foo.smithy`. I'm guessing the reason this works is similar to why you can read files when the URL isn't properly encoded, coincidental, or the JDK is doing some extra work to normalize it. Especially considering that you can do the same with the filename of classpath discovered models, which look like a normal URL: ``` jar:file:/C:/foo/bar.jar!/META-INF/smithy/foo.smithy ``` --- .../smithy/model/loader/ModelDiscovery.java | 8 +++++- .../model/loader/ModelAssemblerTest.java | 25 +++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java index 2e4b6115a52..934c2b977ff 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java @@ -15,6 +15,7 @@ import java.net.URL; import java.net.URLConnection; import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Enumeration; import java.util.LinkedHashSet; @@ -187,12 +188,17 @@ public static String getSmithyModelPathFromJarUrl(URL modelUrl) { */ public static URL createSmithyJarManifestUrl(String fileOrUrl) { try { + // This normalizes the path to the jar into the format expected by URI, + // accounting for the different string representation of a path on windows + // (otherwise all '\' path separators will be encoded). + URI jarUri = Paths.get(removeScheme(fileOrUrl)).toUri(); + String manifestPath = jarUri.getPath() + "!/" + MANIFEST_PATH; + // Building a URI from parts (scheme + file path) will make sure the file path // gets properly URI encoded. URL does not do this, instead expecting callers // to encode first, and consumers to decode. Encoding it here means that when // it is decoded by the consumer (e.g. the JDK reading the jar from the file system), // it will point to the correct location. - String manifestPath = removeScheme(fileOrUrl) + "!/" + MANIFEST_PATH; return new URI("jar:file", null, manifestPath, null).toURL(); } catch (IOException | URISyntaxException e) { throw new ModelImportException(e.getMessage(), e); diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java index 3fbcbc7674f..868db53b9e4 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelAssemblerTest.java @@ -1461,7 +1461,7 @@ public void handlesSameModelWhenBuiltAndImported() throws Exception { } @Test - public void loadsJarFromPathWithEncodedChars() throws Exception { + public void importsJarFromPathWithEncodedChars() throws Exception { Path source = Paths.get(getClass().getResource("assembler-valid-jar").toURI()); Path jarPath = outputDirectory.resolve(Paths.get("path%20with%45encoded%25chars", "test.jar")); @@ -1477,7 +1477,7 @@ public void loadsJarFromPathWithEncodedChars() throws Exception { } @Test - public void loadsJarFromUrlWithEncodedChars() throws Exception { + public void importsJarFromUrlWithEncodedChars() throws Exception { Path source = Paths.get(getClass().getResource("assembler-valid-jar").toURI()); Path jarPath = outputDirectory.resolve(Paths.get("path%20with%45encoded%25chars", "test.jar")); @@ -1491,4 +1491,25 @@ public void loadsJarFromUrlWithEncodedChars() throws Exception { assertTrue(model.getShape(ShapeId.from("smithy.example#ExampleString")).isPresent()); } + + @Test + public void importedJarsWithEncodedCharsHaveCorrectFilename() throws Exception { + Path source = Paths.get(getClass().getResource("assembler-valid-jar").toURI()); + Path jarPath = outputDirectory.resolve(Paths.get("path%20with%45encoded%25chars", "test.jar")); + + Files.createDirectories(jarPath.getParent()); + Files.copy(JarUtils.createJarFromDir(source), jarPath); + + Model model = Model.assembler() + .addImport(jarPath) + .assemble() + .unwrap(); + + String filename = model.expectShape(ShapeId.from("smithy.example#ExampleString")) + .getSourceLocation() + .getFilename(); + String fileContents = IoUtils.readUtf8Url(new URL(filename)); + + assertThat(fileContents, containsString("string ExampleString")); + } } From 47583f97259c6944bdf2534e1366a4403208ab85 Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Thu, 28 Aug 2025 14:42:20 -0400 Subject: [PATCH 3/6] Fix sources plugin PluginContext was relying on being able to compare the source location filename with a stringified version of each configured source path, in order to figure out which shapes/metadata are part of sources. This was used by the sources plugin to generate the projected version of the source model, for non-source projections. In my last commit, I discovered that models in imported jars have a source location filename like: ``` jar:file:\path\to\... ``` on windows. So they could be compared to source paths by stripping the 'jar:file:' part. Since my changes changed the filename to be a proper url, this comparison no longer worked. I updated PluginContext to compare filenames like 'jar:file:' to the URI paths of sources, instead of the regular paths. These URI paths are computed eagerly, so we don't have to do it on every comparison. Usually there are a very low number of sources (they're usually a single directory). The fact that the specific format of the filename was being relied on is mildly concerning, but considering the fact that models discovered on the classpath already had a different format, I think this is ok. I would also be surprised if there's a lot of code out there manually importing jars, instead of providing a classloader. --- .../amazon/smithy/build/PluginContext.java | 23 +++++++ .../build/plugins/SourcesPluginTest.java | 69 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java b/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java index 0497197692e..bff5131d095 100644 --- a/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java +++ b/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java @@ -5,6 +5,7 @@ package software.amazon.smithy.build; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -37,6 +38,7 @@ public final class PluginContext implements ToSmithyBuilder { private final ClassLoader pluginClassLoader; private final Set sources; private final String artifactName; + private final List sourceUriPaths; private Model nonTraitsModel; private PluginContext(Builder builder) { @@ -50,6 +52,15 @@ private PluginContext(Builder builder) { settings = builder.settings; pluginClassLoader = builder.pluginClassLoader; sources = builder.sources.copy(); + // Normalize the source paths into URI paths, which are URI encoded and use '/' separators, + // which is the format of paths in 'jar:file:/' filenames, so they can be compared in the + // 'isSource*' methods. + // This is done preemptively so we don't have to do the same work repeatedly, and the number + // of configured sources is typically very low. + sourceUriPaths = new ArrayList<>(sources.size()); + for (Path source : sources) { + sourceUriPaths.add(source.toUri().getRawPath()); + } } /** @@ -223,6 +234,18 @@ private boolean isSource(FromSourceLocation sourceLocation) { String location = sourceLocation.getSourceLocation().getFilename(); int offsetFromStart = findOffsetFromStart(location); + if (offsetFromStart > 0) { + // Offset means the filename is a URI, so compare to the URI paths to account for encoding and windows. + for (String sourceUriPath : sourceUriPaths) { + if (location.regionMatches(offsetFromStart, sourceUriPath, 0, sourceUriPath.length())) { + return true; + } + } + + return false; + } + + // Filename is a regular path, so we can compare to sources directly. for (Path path : sources) { String pathString = path.toString(); int offsetFromStartInSource = findOffsetFromStart(pathString); diff --git a/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java b/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java index 36a0eb8efc4..e808adb68d2 100644 --- a/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java +++ b/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java @@ -10,10 +10,14 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import java.io.IOException; import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import software.amazon.smithy.build.MockManifest; import software.amazon.smithy.build.PluginContext; import software.amazon.smithy.build.SmithyBuild; @@ -236,4 +240,69 @@ public void omitsUnsupportedFilesFromManifest() throws URISyntaxException { assertThat(manifestString, containsString("a.smithy\n")); assertThat(manifestString, not(containsString("foo.md"))); } + + @Test + public void copiesModelFromJarWithEncodedPathWithSourceProjection(@TempDir Path tempDir) + throws IOException, URISyntaxException { + Path source = Paths.get(getClass().getResource("sources/jar-import.jar").toURI()); + Path jarPath = tempDir.resolve(Paths.get("special%45path", "special.jar")); + + Files.createDirectories(jarPath.getParent()); + Files.copy(source, jarPath); + + Model model = Model.assembler() + .addImport(jarPath) + .addImport(getClass().getResource("notsources/d.smithy")) + .assemble() + .unwrap(); + MockManifest manifest = new MockManifest(); + PluginContext context = PluginContext.builder() + .fileManifest(manifest) + .model(model) + .originalModel(model) + .sources(ListUtils.of(jarPath)) + .build(); + new SourcesPlugin().execute(context); + String manifestString = manifest.getFileString("manifest").get(); + + assertThat(manifestString, containsString("special/a.smithy\n")); + assertThat(manifestString, containsString("special/b/b.smithy\n")); + assertThat(manifestString, containsString("special/b/c/c.json\n")); + assertThat(manifestString, not(containsString("jar-import/d.json"))); + assertThat(manifest.getFileString("special/a.smithy").get(), containsString("string A")); + assertThat(manifest.getFileString("special/b/b.smithy").get(), containsString("string B")); + assertThat(manifest.getFileString("special/b/c/c.json").get(), containsString("\"foo.baz#C\"")); + } + + @Test + public void copiesModelFromJarWithEncodedPathWithNonSourceProjection(@TempDir Path tempDir) + throws IOException, URISyntaxException { + Path source = Paths.get(getClass().getResource("sources/jar-import.jar").toURI()); + Path jarPath = tempDir.resolve(Paths.get("special%45path", "special.jar")); + + Files.createDirectories(jarPath.getParent()); + Files.copy(source, jarPath); + + Model model = Model.assembler() + .addImport(jarPath) + .assemble() + .unwrap(); + MockManifest manifest = new MockManifest(); + ProjectionConfig projection = ProjectionConfig.builder().build(); + PluginContext context = PluginContext.builder() + .fileManifest(manifest) + .projection("foo", projection) + .model(model) + .originalModel(model) + .sources(ListUtils.of(jarPath)) + .build(); + new SourcesPlugin().execute(context); + String manifestString = manifest.getFileString("manifest").get(); + + assertThat(manifestString, containsString("model.json")); + assertThat(manifestString, not(containsString("special"))); + assertThat(manifest.getFileString("model.json").get(), containsString("\"foo.baz#A\"")); + assertThat(manifest.getFileString("model.json").get(), containsString("\"foo.baz#B\"")); + assertThat(manifest.getFileString("model.json").get(), containsString("\"foo.baz#C\"")); + } } From a74c8b0bee854233b5d0de9106f09d1352bce66c Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Thu, 4 Sep 2025 12:24:04 -0400 Subject: [PATCH 4/6] Try to fix windows Junit (or something, maybe not junit directly) seems to be failing to delete a temp dir I was creating with the TempDir annotation. Maybe just manually creating/deleting it will work. --- .../build/plugins/SourcesPluginTest.java | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java b/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java index e808adb68d2..2b97d42071d 100644 --- a/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java +++ b/smithy-build/src/test/java/software/amazon/smithy/build/plugins/SourcesPluginTest.java @@ -10,14 +10,17 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Comparator; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; import software.amazon.smithy.build.MockManifest; import software.amazon.smithy.build.PluginContext; import software.amazon.smithy.build.SmithyBuild; @@ -30,6 +33,19 @@ import software.amazon.smithy.utils.ListUtils; public class SourcesPluginTest { + + private Path tempDirectory; + + @BeforeEach + public void before() throws IOException { + tempDirectory = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + public void after() throws IOException { + Files.walk(tempDirectory).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } + @Test public void copiesFilesForSourceProjection() throws URISyntaxException { Model model = Model.assembler() @@ -242,10 +258,10 @@ public void omitsUnsupportedFilesFromManifest() throws URISyntaxException { } @Test - public void copiesModelFromJarWithEncodedPathWithSourceProjection(@TempDir Path tempDir) + public void copiesModelFromJarWithEncodedPathWithSourceProjection() throws IOException, URISyntaxException { Path source = Paths.get(getClass().getResource("sources/jar-import.jar").toURI()); - Path jarPath = tempDir.resolve(Paths.get("special%45path", "special.jar")); + Path jarPath = tempDirectory.resolve(Paths.get("special%45path", "special.jar")); Files.createDirectories(jarPath.getParent()); Files.copy(source, jarPath); @@ -275,10 +291,10 @@ public void copiesModelFromJarWithEncodedPathWithSourceProjection(@TempDir Path } @Test - public void copiesModelFromJarWithEncodedPathWithNonSourceProjection(@TempDir Path tempDir) + public void copiesModelFromJarWithEncodedPathWithNonSourceProjection() throws IOException, URISyntaxException { Path source = Paths.get(getClass().getResource("sources/jar-import.jar").toURI()); - Path jarPath = tempDir.resolve(Paths.get("special%45path", "special.jar")); + Path jarPath = tempDirectory.resolve(Paths.get("special%45path", "special.jar")); Files.createDirectories(jarPath.getParent()); Files.copy(source, jarPath); From d3334a95c9fa9bb05ad05bd2f18e2640ade97385 Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Thu, 16 Oct 2025 11:39:19 -0400 Subject: [PATCH 5/6] Fix tests and subtle bug Tests were failing windows drive letters (i.e. 'C:\path') weren't being accounted for in some of our ModelDiscovery tests. This was fine when `createSmithyJarManifestUrl` was interpreting its string parameter literally, but converting to a Path will add the drive letter. So I had to update the tests. It's a bit weird to make it work, but I think the tests are important and I couldn't think of a better way. However, this drew my attention to a bug/potential breaking change in my original change to `createSmithyJarManifestUrl`. I was encoding everything, which meant that anyone passing in an already-encoded string would get back a double-encoded URL, which would _not_ resolve to the correct location on the filesystem. I don't think trying to figure out if the string is encoded or not is something we should be doing though, so I opted to just keep the existing behavior when a "jar:file:/foo.jar" or "file:/foo.jar" is passed in, and updated the javadoc. --- .../amazon/smithy/build/PluginContext.java | 16 ++++--- .../smithy/model/loader/ModelDiscovery.java | 43 +++++++++---------- .../model/loader/ModelDiscoveryTest.java | 18 +++++--- 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java b/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java index bff5131d095..3b6ff004d74 100644 --- a/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java +++ b/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java @@ -38,7 +38,7 @@ public final class PluginContext implements ToSmithyBuilder { private final ClassLoader pluginClassLoader; private final Set sources; private final String artifactName; - private final List sourceUriPaths; + private final List sourceUris; private Model nonTraitsModel; private PluginContext(Builder builder) { @@ -52,14 +52,14 @@ private PluginContext(Builder builder) { settings = builder.settings; pluginClassLoader = builder.pluginClassLoader; sources = builder.sources.copy(); - // Normalize the source paths into URI paths, which are URI encoded and use '/' separators, + // Normalize the source paths into URI strings, which are URI encoded and use '/' separators, // which is the format of paths in 'jar:file:/' filenames, so they can be compared in the // 'isSource*' methods. // This is done preemptively so we don't have to do the same work repeatedly, and the number // of configured sources is typically very low. - sourceUriPaths = new ArrayList<>(sources.size()); + sourceUris = new ArrayList<>(sources.size()); for (Path source : sources) { - sourceUriPaths.add(source.toUri().getRawPath()); + sourceUris.add(source.toUri().toString()); } } @@ -236,8 +236,12 @@ private boolean isSource(FromSourceLocation sourceLocation) { if (offsetFromStart > 0) { // Offset means the filename is a URI, so compare to the URI paths to account for encoding and windows. - for (String sourceUriPath : sourceUriPaths) { - if (location.regionMatches(offsetFromStart, sourceUriPath, 0, sourceUriPath.length())) { + for (String sourceUri : sourceUris) { + // Compare starting after the protocol (the source uri will always be "file", because we created it + // from a path). + int sourceCompareStart = "file:".length(); + int regionCompareLength = sourceUri.length() - sourceCompareStart; + if (location.regionMatches(offsetFromStart, sourceUri, sourceCompareStart, regionCompareLength)) { return true; } } diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java index 934c2b977ff..70aea9167eb 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ModelDiscovery.java @@ -11,7 +11,6 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URI; -import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.nio.charset.StandardCharsets; @@ -183,38 +182,36 @@ public static String getSmithyModelPathFromJarUrl(URL modelUrl) { * to a file, (e.g., "/foo/baz.jar"), a file URL (e.g., "file:/baz.jar"), * or a JAR URL (e.g., "jar:file:/baz.jar"). * + *

If {@code fileOrUrl} is a URL, it must be absolute and percent-encoded + * to ensure the manifest is read from the correct location. + * * @param fileOrUrl Filename or URL that points to a JAR. * @return Returns the computed URL. */ public static URL createSmithyJarManifestUrl(String fileOrUrl) { try { - // This normalizes the path to the jar into the format expected by URI, - // accounting for the different string representation of a path on windows - // (otherwise all '\' path separators will be encoded). - URI jarUri = Paths.get(removeScheme(fileOrUrl)).toUri(); - String manifestPath = jarUri.getPath() + "!/" + MANIFEST_PATH; + // URL expects callers to perform percent-encoding before construction, + // and for consumers to perform decoding. When given a URL-like string, + // (e.g. file:/foo.jar), we have to assume it is properly encoded because + // otherwise we risk double-encoding. For file paths, we have to encode + // to make sure that when the consumer of the URL performs decoding (i.e. + // to read from the file system), they will get back the original path. + String jarUrl; + if (fileOrUrl.startsWith("jar:")) { + jarUrl = fileOrUrl; + } else if (fileOrUrl.startsWith("file:")) { + jarUrl = "jar:" + fileOrUrl; + } else { + URI jarUri = Paths.get(fileOrUrl).toUri(); + jarUrl = "jar:" + jarUri; + } - // Building a URI from parts (scheme + file path) will make sure the file path - // gets properly URI encoded. URL does not do this, instead expecting callers - // to encode first, and consumers to decode. Encoding it here means that when - // it is decoded by the consumer (e.g. the JDK reading the jar from the file system), - // it will point to the correct location. - return new URI("jar:file", null, manifestPath, null).toURL(); - } catch (IOException | URISyntaxException e) { + return new URL(jarUrl + "!/" + MANIFEST_PATH); + } catch (IOException e) { throw new ModelImportException(e.getMessage(), e); } } - private static String removeScheme(String fileOrUrl) { - // Index will also cover jar: part of jar:file: - int pathStart = fileOrUrl.indexOf("file:"); - if (pathStart == -1) { - return fileOrUrl; - } else { - return fileOrUrl.substring(pathStart + "file:".length()); - } - } - private static Set parseManifest(URL location) throws IOException { Set models = new LinkedHashSet<>(); URLConnection connection = location.openConnection(); diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java index 465e51548a8..ae754eda530 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/ModelDiscoveryTest.java @@ -13,6 +13,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; +import java.nio.file.Paths; import java.util.List; import java.util.stream.Collectors; import org.junit.jupiter.api.Assertions; @@ -132,8 +133,10 @@ public void requiresModelNameToBeValidWhenParsing() throws IOException { @Test public void createSmithyManifestUrlFromPath() throws IOException { + // Accounts for windows drive letters and path separators + String normalizedJarPath = Paths.get("/foo.jar").toUri().getPath(); assertThat(ModelDiscovery.createSmithyJarManifestUrl("/foo.jar"), - equalTo(new URL("jar:file:/foo.jar!/META-INF/smithy/manifest"))); + equalTo(new URL("jar:file:" + normalizedJarPath + "!/META-INF/smithy/manifest"))); } @Test @@ -149,12 +152,17 @@ public void createSmithyManifestUrlFromJarUrl() throws IOException { } @Test - public void encodesSmithyManifestUrl() throws IOException { + public void encodesSmithyManifestUrlFromPath() throws IOException { + String normalizedAndEncodedJarPath = Paths.get("/foo bar.jar").toUri().getRawPath(); assertThat(ModelDiscovery.createSmithyJarManifestUrl("/foo bar.jar"), + equalTo(new URL("jar:file:" + normalizedAndEncodedJarPath + "!/META-INF/smithy/manifest"))); + } + + @Test + public void preservesEncodingOfSmithyManifestUrlFromUrl() throws IOException { + assertThat(ModelDiscovery.createSmithyJarManifestUrl("file:/foo%20bar.jar"), equalTo(new URL("jar:file:/foo%20bar.jar!/META-INF/smithy/manifest"))); - assertThat(ModelDiscovery.createSmithyJarManifestUrl("file:/foo bar.jar"), - equalTo(new URL("jar:file:/foo%20bar.jar!/META-INF/smithy/manifest"))); - assertThat(ModelDiscovery.createSmithyJarManifestUrl("jar:file:/foo bar.jar"), + assertThat(ModelDiscovery.createSmithyJarManifestUrl("jar:file:/foo%20bar.jar"), equalTo(new URL("jar:file:/foo%20bar.jar!/META-INF/smithy/manifest"))); } } From b9815d529688e4c761a62cdf882c0624800d9e91 Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Fri, 17 Oct 2025 11:15:12 -0400 Subject: [PATCH 6/6] 5 --- .../java/software/amazon/smithy/build/PluginContext.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java b/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java index 3b6ff004d74..cc66bd3bbba 100644 --- a/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java +++ b/smithy-build/src/main/java/software/amazon/smithy/build/PluginContext.java @@ -239,9 +239,8 @@ private boolean isSource(FromSourceLocation sourceLocation) { for (String sourceUri : sourceUris) { // Compare starting after the protocol (the source uri will always be "file", because we created it // from a path). - int sourceCompareStart = "file:".length(); - int regionCompareLength = sourceUri.length() - sourceCompareStart; - if (location.regionMatches(offsetFromStart, sourceUri, sourceCompareStart, regionCompareLength)) { + int regionCompareLength = sourceUri.length() - 5; + if (location.regionMatches(offsetFromStart, sourceUri, 5, regionCompareLength)) { return true; } }