Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import static io.trino.plugin.atop.AtopErrorCode.ATOP_CANNOT_START_PROCESS_ERROR;
import static io.trino.plugin.atop.AtopErrorCode.ATOP_READ_TIMEOUT;
import static java.lang.String.format;
import static java.nio.charset.Charset.defaultCharset;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newFixedThreadPool;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
Expand Down Expand Up @@ -97,7 +98,7 @@ private static final class AtopProcess
private AtopProcess(Process process, Duration readTimeout, ExecutorService executor)
{
this.process = requireNonNull(process, "process is null");
underlyingReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
underlyingReader = new BufferedReader(new InputStreamReader(process.getInputStream(), defaultCharset()));
TimeLimiter limiter = SimpleTimeLimiter.create(executor);
this.reader = limiter.newProxy(underlyingReader::readLine, LineReader.class, readTimeout.toMillis(), MILLISECONDS);
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.NoSuchElementException;

import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

public class TestingAtopFactory
Expand All @@ -51,7 +52,7 @@ private static final class TestingAtop
private TestingAtop(InputStream dataStream, ZonedDateTime date)
{
this.date = date;
this.reader = new BufferedReader(new InputStreamReader(dataStream));
this.reader = new BufferedReader(new InputStreamReader(dataStream, UTF_8));
try {
line = reader.readLine();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import static io.trino.tpch.TpchTable.PART;
import static io.trino.tpch.TpchTable.REGION;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.concurrent.TimeUnit.SECONDS;

public class DruidQueryRunner
Expand Down Expand Up @@ -112,7 +113,7 @@ private static void writeDataAsTsv(MaterializedResult rows, String dataFile)
throws IOException
{
File file = new File(dataFile);
try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
try (BufferedWriter bw = new BufferedWriter(new FileWriter(file, UTF_8))) {
for (MaterializedRow row : rows.getMaterializedRows()) {
bw.write(convertToTSV(row.getFields()));
bw.newLine();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import static io.trino.spi.type.VarcharType.createUnboundedVarcharType;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.nio.charset.Charset.defaultCharset;
import static java.time.format.DateTimeFormatter.ISO_OFFSET_DATE_TIME;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.SECONDS;
Expand Down Expand Up @@ -309,7 +310,7 @@ private BufferedReader createNextReader()
FileInputStream fileInputStream = new FileInputStream(file);

InputStream in = isGZipped(file) ? new GZIPInputStream(fileInputStream) : fileInputStream;
return new BufferedReader(new InputStreamReader(in));
return new BufferedReader(new InputStreamReader(in, defaultCharset()));
}

public static boolean isGZipped(File file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.io.UncheckedIOException;

import static io.trino.plugin.ml.type.ClassifierType.BIGINT_CLASSIFIER;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

public class SvmClassifier
Expand All @@ -50,7 +51,8 @@ public static SvmClassifier deserialize(byte[] modelData)
{
// TODO do something with the hyperparameters
try {
svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData))));
// UTF-8 should work, though the only thing we can say about the charset is that it's guaranteed to be 8-bits
svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData), UTF_8)));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this read? where does modelData come from?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm. Looks like nobody calls this method?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it's called dynamically in io.trino.plugin.ml.ModelUtils#deserialize(io.airlift.slice.Slice). The byte[] data comes from theSlice. I have no idea what are the contents of the slice - some representation of the ML model, it appears, and it doesn't look like a text 🤔

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upon further investigation: the model data is some binary metadata followed by a textual representation of the model. The text is generated by writing to a temporary file using java.io.DataOutputStream#writeBytes(String), which... treats the string as "a sequence of bytes" (out.write((byte)s.charAt(i)) 😱). So it's actually either US_ASCII or ISO_8859_1.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ouch. use UTF-8 and add a comment that this may or may not be correct choice

return new SvmClassifier(model);
}
catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.io.UncheckedIOException;

import static io.trino.plugin.ml.type.RegressorType.REGRESSOR;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

public class SvmRegressor
Expand All @@ -50,7 +51,8 @@ public static SvmRegressor deserialize(byte[] modelData)
{
// TODO do something with the hyperparameters
try {
svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData))));
// UTF-8 should work, though the only thing we can say about the charset is that it's guaranteed to be 8-bits
svm_model model = svm.svm_load_model(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(modelData), UTF_8)));
return new SvmRegressor(model);
}
catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Optional;

import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;

public class TableStatisticsDataRepository
{
Expand Down Expand Up @@ -58,7 +59,7 @@ private void writeStatistics(Path path, TableStatisticsData tableStatisticsData)
objectMapper
.writerWithDefaultPrettyPrinter()
.writeValue(file, tableStatisticsData);
try (FileWriter fileWriter = new FileWriter(file, true)) {
try (FileWriter fileWriter = new FileWriter(file, UTF_8, true)) {
fileWriter.append('\n');
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.plugin.tpch.util.Optionals.withBoth;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;

public class TableStatisticsDataRepository
{
Expand Down Expand Up @@ -58,7 +59,7 @@ private void writeStatistics(Path path, TableStatisticsData tableStatisticsData)
objectMapper
.writerWithDefaultPrettyPrinter()
.writeValue(file, tableStatisticsData);
try (FileWriter fileWriter = new FileWriter(file, true)) {
try (FileWriter fileWriter = new FileWriter(file, UTF_8, true)) {
fileWriter.append('\n');
}
}
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,7 @@
-Xep:BoxedPrimitiveConstructor:ERROR \
-Xep:ClassCanBeStatic:ERROR \
-Xep:CompareToZero:ERROR \
-Xep:DefaultCharset:ERROR \
-Xep:EqualsGetClass:OFF <!-- we would rather want the opposite check --> \
-Xep:EqualsIncompatibleType:ERROR \
-Xep:FallThrough:ERROR \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.io.PrintStream;
import java.io.UncheckedIOException;

import static java.nio.charset.Charset.defaultCharset;
import static java.util.Objects.requireNonNull;

public class JsonEventClient
Expand All @@ -52,7 +53,7 @@ public <T> void postEvent(T event)
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
JsonGenerator generator = factory.createGenerator(buffer, JsonEncoding.UTF8);
serializer.serialize(event, generator);
out.println(buffer.toString().trim());
out.println(buffer.toString(defaultCharset()).trim());
}
catch (IOException e) {
throw new UncheckedIOException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.io.Writer;
import java.util.Map;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

public class SimpleLineBenchmarkResultWriter
Expand All @@ -31,7 +32,7 @@ public class SimpleLineBenchmarkResultWriter

public SimpleLineBenchmarkResultWriter(OutputStream outputStream)
{
writer = new OutputStreamWriter(requireNonNull(outputStream, "outputStream is null"));
writer = new OutputStreamWriter(requireNonNull(outputStream, "outputStream is null"), UTF_8);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.io.FileReader;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.List;
Expand All @@ -51,6 +50,7 @@
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static io.trino.metadata.InternalFunctionBundle.extractFunctions;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Arrays.asList;
import static java.util.Objects.requireNonNull;
import static java.util.Objects.requireNonNullElse;
Expand Down Expand Up @@ -143,7 +143,7 @@ private static Map<String, String> mapModulesToPlugins(File rootPom)

private static List<String> readTrinoPlugins(File rootPom)
{
try (FileReader fileReader = new FileReader(rootPom)) {
try (FileReader fileReader = new FileReader(rootPom, UTF_8)) {
MavenXpp3Reader reader = new MavenXpp3Reader();
Model model = reader.read(fileReader);
return model.getModules().stream()
Expand All @@ -161,7 +161,7 @@ private static List<String> readTrinoPlugins(File rootPom)
private static boolean isTrinoPlugin(String module)
{
String modulePom = module + "/pom.xml";
try (FileReader fileReader = new FileReader(modulePom)) {
try (FileReader fileReader = new FileReader(modulePom, UTF_8)) {
MavenXpp3Reader reader = new MavenXpp3Reader();
Model model = reader.read(fileReader);
return model.getPackaging().equals("trino-plugin");
Expand Down Expand Up @@ -200,7 +200,7 @@ private static String readPluginClassName(File serviceJar)
.findFirst()
.map(entry -> {
try (BufferedInputStream bis = new BufferedInputStream(zipFile.getInputStream(entry))) {
return new String(ByteStreams.toByteArray(bis), StandardCharsets.UTF_8).trim();
return new String(ByteStreams.toByteArray(bis), UTF_8).trim();
}
catch (IOException e) {
throw new UncheckedIOException(format("Couldn't read plugin's service descriptor in %s", serviceJar), e);
Expand All @@ -219,7 +219,7 @@ private static String readPluginClassName(File serviceJar)
private static Optional<List<String>> readImpactedModules(File gibImpactedModules)
{
try {
return Optional.of(Files.asCharSource(gibImpactedModules, StandardCharsets.UTF_8).readLines());
return Optional.of(Files.asCharSource(gibImpactedModules, UTF_8).readLines());
}
catch (IOException e) {
log.warn(e, "Couldn't read file %s", gibImpactedModules);
Expand Down