From 0468721e05b746e27dc1ce4684a648b5a5673687 Mon Sep 17 00:00:00 2001 From: Krzysztof Sobolewski Date: Tue, 10 May 2022 14:54:56 +0200 Subject: [PATCH 1/2] Static-import UTF_8 in PluginReader --- .../src/main/java/io/trino/server/PluginReader.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java b/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java index d3a7c8a17624..4c95d32c242a 100644 --- a/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java +++ b/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java @@ -31,7 +31,6 @@ import java.io.FileReader; import java.io.IOException; import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.attribute.BasicFileAttributes; import java.util.List; @@ -51,6 +50,7 @@ import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static io.trino.metadata.InternalFunctionBundle.extractFunctions; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Arrays.asList; import static java.util.Objects.requireNonNull; import static java.util.Objects.requireNonNullElse; @@ -200,7 +200,7 @@ private static String readPluginClassName(File serviceJar) .findFirst() .map(entry -> { try (BufferedInputStream bis = new BufferedInputStream(zipFile.getInputStream(entry))) { - return new String(ByteStreams.toByteArray(bis), StandardCharsets.UTF_8).trim(); + return new String(ByteStreams.toByteArray(bis), UTF_8).trim(); } catch (IOException e) { throw new UncheckedIOException(format("Couldn't read plugin's service descriptor in %s", serviceJar), e); @@ -219,7 +219,7 @@ private static String readPluginClassName(File serviceJar) private static Optional> readImpactedModules(File gibImpactedModules) { try { - return Optional.of(Files.asCharSource(gibImpactedModules, StandardCharsets.UTF_8).readLines()); + return Optional.of(Files.asCharSource(gibImpactedModules, UTF_8).readLines()); } catch (IOException e) { log.warn(e, "Couldn't read file %s", gibImpactedModules); From 9c8345e8a7d035af7ea8f6caba357933ac51b0df Mon Sep 17 00:00:00 2001 From: Krzysztof Sobolewski Date: Tue, 10 May 2022 13:31:36 +0200 Subject: [PATCH 2/2] Enable Error Prone check DefaultCharset This is focused on charsets, so it won't catch locale-related problems like `String#toLowerCase` with default locale. But it will catch `String` to `byte[]` (and vice versa) conversion with default charset, in addition to the IO stream related issues fixed in this commit. The existing cases were fixed to explicitly use `defaultCharset()` where the data being read comes from (or is written to a file on) the local system, otherwise they are fixed to use `UTF_8` - unless there are special considerations to use a different charset. --- .../main/java/io/trino/plugin/atop/AtopProcessFactory.java | 3 ++- .../test/java/io/trino/plugin/atop/TestingAtopFactory.java | 3 ++- .../src/test/java/io/trino/plugin/druid/DruidQueryRunner.java | 3 ++- .../java/io/trino/plugin/localfile/LocalFileRecordCursor.java | 3 ++- .../src/main/java/io/trino/plugin/ml/SvmClassifier.java | 4 +++- .../src/main/java/io/trino/plugin/ml/SvmRegressor.java | 4 +++- .../tpcds/statistics/TableStatisticsDataRepository.java | 3 ++- .../plugin/tpch/statistics/TableStatisticsDataRepository.java | 3 ++- pom.xml | 1 + .../src/main/java/io/trino/verifier/JsonEventClient.java | 3 ++- .../io/trino/benchmark/SimpleLineBenchmarkResultWriter.java | 3 ++- .../src/main/java/io/trino/server/PluginReader.java | 4 ++-- 12 files changed, 25 insertions(+), 12 deletions(-) diff --git a/plugin/trino-atop/src/main/java/io/trino/plugin/atop/AtopProcessFactory.java b/plugin/trino-atop/src/main/java/io/trino/plugin/atop/AtopProcessFactory.java index eaba1fed2af4..a55c04d79d09 100644 --- a/plugin/trino-atop/src/main/java/io/trino/plugin/atop/AtopProcessFactory.java +++ b/plugin/trino-atop/src/main/java/io/trino/plugin/atop/AtopProcessFactory.java @@ -38,6 +38,7 @@ import static io.trino.plugin.atop.AtopErrorCode.ATOP_CANNOT_START_PROCESS_ERROR; import static io.trino.plugin.atop.AtopErrorCode.ATOP_READ_TIMEOUT; import static java.lang.String.format; +import static java.nio.charset.Charset.defaultCharset; import static java.util.Objects.requireNonNull; import static java.util.concurrent.Executors.newFixedThreadPool; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -97,7 +98,7 @@ private static final class AtopProcess private AtopProcess(Process process, Duration readTimeout, ExecutorService executor) { this.process = requireNonNull(process, "process is null"); - underlyingReader = new BufferedReader(new InputStreamReader(process.getInputStream())); + underlyingReader = new BufferedReader(new InputStreamReader(process.getInputStream(), defaultCharset())); TimeLimiter limiter = SimpleTimeLimiter.create(executor); this.reader = limiter.newProxy(underlyingReader::readLine, LineReader.class, readTimeout.toMillis(), MILLISECONDS); try { diff --git a/plugin/trino-atop/src/test/java/io/trino/plugin/atop/TestingAtopFactory.java b/plugin/trino-atop/src/test/java/io/trino/plugin/atop/TestingAtopFactory.java index 3b61691bc51c..d804d37ed1eb 100644 --- a/plugin/trino-atop/src/test/java/io/trino/plugin/atop/TestingAtopFactory.java +++ b/plugin/trino-atop/src/test/java/io/trino/plugin/atop/TestingAtopFactory.java @@ -28,6 +28,7 @@ import java.util.NoSuchElementException; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; public class TestingAtopFactory @@ -51,7 +52,7 @@ private static final class TestingAtop private TestingAtop(InputStream dataStream, ZonedDateTime date) { this.date = date; - this.reader = new BufferedReader(new InputStreamReader(dataStream)); + this.reader = new BufferedReader(new InputStreamReader(dataStream, UTF_8)); try { line = reader.readLine(); } diff --git a/plugin/trino-druid/src/test/java/io/trino/plugin/druid/DruidQueryRunner.java b/plugin/trino-druid/src/test/java/io/trino/plugin/druid/DruidQueryRunner.java index e41ce77350f4..e7db7df4a601 100644 --- a/plugin/trino-druid/src/test/java/io/trino/plugin/druid/DruidQueryRunner.java +++ b/plugin/trino-druid/src/test/java/io/trino/plugin/druid/DruidQueryRunner.java @@ -42,6 +42,7 @@ import static io.trino.tpch.TpchTable.PART; import static io.trino.tpch.TpchTable.REGION; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.concurrent.TimeUnit.SECONDS; public class DruidQueryRunner @@ -112,7 +113,7 @@ private static void writeDataAsTsv(MaterializedResult rows, String dataFile) throws IOException { File file = new File(dataFile); - try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(file, UTF_8))) { for (MaterializedRow row : rows.getMaterializedRows()) { bw.write(convertToTSV(row.getFields())); bw.newLine(); diff --git a/plugin/trino-local-file/src/main/java/io/trino/plugin/localfile/LocalFileRecordCursor.java b/plugin/trino-local-file/src/main/java/io/trino/plugin/localfile/LocalFileRecordCursor.java index 768d40d5fe73..49195ce9b73b 100644 --- a/plugin/trino-local-file/src/main/java/io/trino/plugin/localfile/LocalFileRecordCursor.java +++ b/plugin/trino-local-file/src/main/java/io/trino/plugin/localfile/LocalFileRecordCursor.java @@ -59,6 +59,7 @@ import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static java.lang.Math.toIntExact; import static java.lang.String.format; +import static java.nio.charset.Charset.defaultCharset; import static java.time.format.DateTimeFormatter.ISO_OFFSET_DATE_TIME; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.SECONDS; @@ -309,7 +310,7 @@ private BufferedReader createNextReader() FileInputStream fileInputStream = new FileInputStream(file); InputStream in = isGZipped(file) ? new GZIPInputStream(fileInputStream) : fileInputStream; - return new BufferedReader(new InputStreamReader(in)); + return new BufferedReader(new InputStreamReader(in, defaultCharset())); } public static boolean isGZipped(File file) diff --git a/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmClassifier.java b/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmClassifier.java index 5d016f12917f..c46b3eb0395d 100644 --- a/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmClassifier.java +++ b/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmClassifier.java @@ -25,6 +25,7 @@ import java.io.UncheckedIOException; import static io.trino.plugin.ml.type.ClassifierType.BIGINT_CLASSIFIER; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; public class SvmClassifier @@ -50,7 +51,8 @@ public static SvmClassifier deserialize(byte[] modelData) { // TODO do something with the hyperparameters try { - svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData)))); + // UTF-8 should work, though the only thing we can say about the charset is that it's guaranteed to be 8-bits + svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData), UTF_8))); return new SvmClassifier(model); } catch (IOException e) { diff --git a/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmRegressor.java b/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmRegressor.java index 1f32520c9c3b..65f1a06edff5 100644 --- a/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmRegressor.java +++ b/plugin/trino-ml/src/main/java/io/trino/plugin/ml/SvmRegressor.java @@ -25,6 +25,7 @@ import java.io.UncheckedIOException; import static io.trino.plugin.ml.type.RegressorType.REGRESSOR; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; public class SvmRegressor @@ -50,7 +51,8 @@ public static SvmRegressor deserialize(byte[] modelData) { // TODO do something with the hyperparameters try { - svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData)))); + // UTF-8 should work, though the only thing we can say about the charset is that it's guaranteed to be 8-bits + svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData), UTF_8))); return new SvmRegressor(model); } catch (IOException e) { diff --git a/plugin/trino-tpcds/src/main/java/io/trino/plugin/tpcds/statistics/TableStatisticsDataRepository.java b/plugin/trino-tpcds/src/main/java/io/trino/plugin/tpcds/statistics/TableStatisticsDataRepository.java index 507ebcd79c59..67e67128daf9 100644 --- a/plugin/trino-tpcds/src/main/java/io/trino/plugin/tpcds/statistics/TableStatisticsDataRepository.java +++ b/plugin/trino-tpcds/src/main/java/io/trino/plugin/tpcds/statistics/TableStatisticsDataRepository.java @@ -27,6 +27,7 @@ import java.util.Optional; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; public class TableStatisticsDataRepository { @@ -58,7 +59,7 @@ private void writeStatistics(Path path, TableStatisticsData tableStatisticsData) objectMapper .writerWithDefaultPrettyPrinter() .writeValue(file, tableStatisticsData); - try (FileWriter fileWriter = new FileWriter(file, true)) { + try (FileWriter fileWriter = new FileWriter(file, UTF_8, true)) { fileWriter.append('\n'); } } diff --git a/plugin/trino-tpch/src/main/java/io/trino/plugin/tpch/statistics/TableStatisticsDataRepository.java b/plugin/trino-tpch/src/main/java/io/trino/plugin/tpch/statistics/TableStatisticsDataRepository.java index 786a906258d9..4edc5052df09 100644 --- a/plugin/trino-tpch/src/main/java/io/trino/plugin/tpch/statistics/TableStatisticsDataRepository.java +++ b/plugin/trino-tpch/src/main/java/io/trino/plugin/tpch/statistics/TableStatisticsDataRepository.java @@ -28,6 +28,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static io.trino.plugin.tpch.util.Optionals.withBoth; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; public class TableStatisticsDataRepository { @@ -58,7 +59,7 @@ private void writeStatistics(Path path, TableStatisticsData tableStatisticsData) objectMapper .writerWithDefaultPrettyPrinter() .writeValue(file, tableStatisticsData); - try (FileWriter fileWriter = new FileWriter(file, true)) { + try (FileWriter fileWriter = new FileWriter(file, UTF_8, true)) { fileWriter.append('\n'); } } diff --git a/pom.xml b/pom.xml index d5da9488e3f8..e48f26d44ce5 100644 --- a/pom.xml +++ b/pom.xml @@ -1893,6 +1893,7 @@ -Xep:BoxedPrimitiveConstructor:ERROR \ -Xep:ClassCanBeStatic:ERROR \ -Xep:CompareToZero:ERROR \ + -Xep:DefaultCharset:ERROR \ -Xep:EqualsGetClass:OFF \ -Xep:EqualsIncompatibleType:ERROR \ -Xep:FallThrough:ERROR \ diff --git a/service/trino-verifier/src/main/java/io/trino/verifier/JsonEventClient.java b/service/trino-verifier/src/main/java/io/trino/verifier/JsonEventClient.java index f6f932c7b5a5..5b930fe6c093 100644 --- a/service/trino-verifier/src/main/java/io/trino/verifier/JsonEventClient.java +++ b/service/trino-verifier/src/main/java/io/trino/verifier/JsonEventClient.java @@ -27,6 +27,7 @@ import java.io.PrintStream; import java.io.UncheckedIOException; +import static java.nio.charset.Charset.defaultCharset; import static java.util.Objects.requireNonNull; public class JsonEventClient @@ -52,7 +53,7 @@ public void postEvent(T event) ByteArrayOutputStream buffer = new ByteArrayOutputStream(); JsonGenerator generator = factory.createGenerator(buffer, JsonEncoding.UTF8); serializer.serialize(event, generator); - out.println(buffer.toString().trim()); + out.println(buffer.toString(defaultCharset()).trim()); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/testing/trino-benchmark/src/main/java/io/trino/benchmark/SimpleLineBenchmarkResultWriter.java b/testing/trino-benchmark/src/main/java/io/trino/benchmark/SimpleLineBenchmarkResultWriter.java index 6ca9838b404f..9667d288e4d8 100644 --- a/testing/trino-benchmark/src/main/java/io/trino/benchmark/SimpleLineBenchmarkResultWriter.java +++ b/testing/trino-benchmark/src/main/java/io/trino/benchmark/SimpleLineBenchmarkResultWriter.java @@ -22,6 +22,7 @@ import java.io.Writer; import java.util.Map; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; public class SimpleLineBenchmarkResultWriter @@ -31,7 +32,7 @@ public class SimpleLineBenchmarkResultWriter public SimpleLineBenchmarkResultWriter(OutputStream outputStream) { - writer = new OutputStreamWriter(requireNonNull(outputStream, "outputStream is null")); + writer = new OutputStreamWriter(requireNonNull(outputStream, "outputStream is null"), UTF_8); } @Override diff --git a/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java b/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java index 4c95d32c242a..81ca85dbc0d9 100644 --- a/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java +++ b/testing/trino-plugin-reader/src/main/java/io/trino/server/PluginReader.java @@ -143,7 +143,7 @@ private static Map mapModulesToPlugins(File rootPom) private static List readTrinoPlugins(File rootPom) { - try (FileReader fileReader = new FileReader(rootPom)) { + try (FileReader fileReader = new FileReader(rootPom, UTF_8)) { MavenXpp3Reader reader = new MavenXpp3Reader(); Model model = reader.read(fileReader); return model.getModules().stream() @@ -161,7 +161,7 @@ private static List readTrinoPlugins(File rootPom) private static boolean isTrinoPlugin(String module) { String modulePom = module + "/pom.xml"; - try (FileReader fileReader = new FileReader(modulePom)) { + try (FileReader fileReader = new FileReader(modulePom, UTF_8)) { MavenXpp3Reader reader = new MavenXpp3Reader(); Model model = reader.read(fileReader); return model.getPackaging().equals("trino-plugin");