Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,8 @@ static void assertOrcFileIsValid(FileSystem fs, Path filePath, OrcFile.ReaderOpt
HadoopUtils.deletePath(fs, filePath, false);
GobblinEventBuilder eventBuilder = new GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
eventBuilder.addMetadata("filePath", filePath.toString());
eventBuilder.addMetadata("exceptionType", e.getClass().getCanonicalName());
eventBuilder.addMetadata("exceptionMessage", e.getMessage());
EventSubmitter.submit(metricContext, eventBuilder);

throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@
import org.apache.hadoop.fs.Path;
import org.apache.orc.FileFormatException;
import org.apache.orc.OrcFile;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

import com.google.common.io.Files;
Expand All @@ -36,31 +39,70 @@
import org.apache.gobblin.metrics.event.GobblinEventBuilder;

import static org.apache.gobblin.writer.GobblinBaseOrcWriter.CORRUPTED_ORC_FILE_DELETION_EVENT;
import static org.mockito.MockitoAnnotations.openMocks;


public class GobblinBaseOrcWriterTest {
Configuration conf;
FileSystem fs;
File tmpDir;
File orcFile;
Path orcFilePath;

@Mock
MetricContext mockContext;

AutoCloseable closeable;

@BeforeTest
public void setup() throws IOException {
this.closeable = openMocks(this);
this.conf = new Configuration();
this.fs = FileSystem.getLocal(conf);
this.tmpDir = Files.createTempDir();
this.orcFile = new File(tmpDir, "test.orc");
this.orcFilePath = new Path(orcFile.getAbsolutePath());
}

@AfterTest
public void tearDown()
throws Exception {
this.closeable.close();
}

@Test
public void testOrcValidation()
public void testOrcValidationOnlyHeader()
throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
File tmpDir = Files.createTempDir();
File corruptedOrcFile = new File(tmpDir, "test.orc");
try (FileWriter writer = new FileWriter(corruptedOrcFile)) {
// write a corrupted ORC file that only contains the header but without content
try (FileWriter writer = new FileWriter(orcFile)) {
// writer a corrupted ORC file that only contains thethe header
writer.write(OrcFile.MAGIC);
}

OrcFile.ReaderOptions readerOptions = new OrcFile.ReaderOptions(conf);
Assert.assertThrows(FileFormatException.class, () -> GobblinBaseOrcWriter.assertOrcFileIsValid(
fs, orcFilePath, new OrcFile.ReaderOptions(conf), mockContext));

GobblinEventBuilder eventBuilder = new GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
eventBuilder.addMetadata("filePath", orcFilePath.toString());
eventBuilder.addMetadata("exceptionType", "org.apache.orc.FileFormatException");
eventBuilder.addMetadata("exceptionMessage", String.format("Not a valid ORC file %s (maxFileLength= 9223372036854775807)", orcFilePath));
Mockito.verify(mockContext, Mockito.times(1)).submitEvent(eventBuilder.build());
}

@Test
public void testOrcValidationWithContent() throws IOException {
try (FileWriter writer = new FileWriter(orcFile)) {
// write a corrupted ORC file that only contains the header and invalid protobuf content
writer.write(OrcFile.MAGIC);
writer.write("\n");
}

MetricContext mockContext = Mockito.mock(MetricContext.class);
Path p = new Path(corruptedOrcFile.getAbsolutePath());
Assert.assertThrows(FileFormatException.class,
() -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, p, readerOptions, mockContext));
Assert.assertThrows(com.google.protobuf25.InvalidProtocolBufferException.class,
() -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, orcFilePath, new OrcFile.ReaderOptions(conf), mockContext));

GobblinEventBuilder eventBuilder = new GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE);
eventBuilder.addMetadata("filePath", p.toString());
eventBuilder.addMetadata("filePath", orcFilePath.toString());
eventBuilder.addMetadata("exceptionType", "com.google.protobuf25.InvalidProtocolBufferException");
eventBuilder.addMetadata("exceptionMessage", "Protocol message tag had invalid wire type.");
Mockito.verify(mockContext, Mockito.times(1))
.submitEvent(eventBuilder.build());
}
Expand Down