diff --git a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java index bb6c11aaaa4..2ed31b93b57 100644 --- a/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java +++ b/gobblin-modules/gobblin-orc/src/main/java/org/apache/gobblin/writer/GobblinBaseOrcWriter.java @@ -399,6 +399,8 @@ static void assertOrcFileIsValid(FileSystem fs, Path filePath, OrcFile.ReaderOpt HadoopUtils.deletePath(fs, filePath, false); GobblinEventBuilder eventBuilder = new GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE); eventBuilder.addMetadata("filePath", filePath.toString()); + eventBuilder.addMetadata("exceptionType", e.getClass().getCanonicalName()); + eventBuilder.addMetadata("exceptionMessage", e.getMessage()); EventSubmitter.submit(metricContext, eventBuilder); throw e; diff --git a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java index 81387059787..1448848cfce 100644 --- a/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java +++ b/gobblin-modules/gobblin-orc/src/test/java/org/apache/gobblin/writer/GobblinBaseOrcWriterTest.java @@ -26,8 +26,11 @@ import org.apache.hadoop.fs.Path; import org.apache.orc.FileFormatException; import org.apache.orc.OrcFile; +import org.mockito.Mock; import org.mockito.Mockito; import org.testng.Assert; +import org.testng.annotations.AfterTest; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import com.google.common.io.Files; @@ -36,31 +39,70 @@ import org.apache.gobblin.metrics.event.GobblinEventBuilder; import static org.apache.gobblin.writer.GobblinBaseOrcWriter.CORRUPTED_ORC_FILE_DELETION_EVENT; +import static org.mockito.MockitoAnnotations.openMocks; public class GobblinBaseOrcWriterTest { + Configuration conf; + FileSystem fs; + File tmpDir; + File orcFile; + Path orcFilePath; + + @Mock + MetricContext mockContext; + + AutoCloseable closeable; + + @BeforeTest + public void setup() throws IOException { + this.closeable = openMocks(this); + this.conf = new Configuration(); + this.fs = FileSystem.getLocal(conf); + this.tmpDir = Files.createTempDir(); + this.orcFile = new File(tmpDir, "test.orc"); + this.orcFilePath = new Path(orcFile.getAbsolutePath()); + } + + @AfterTest + public void tearDown() + throws Exception { + this.closeable.close(); + } @Test - public void testOrcValidation() + public void testOrcValidationOnlyHeader() throws IOException { - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.getLocal(conf); - File tmpDir = Files.createTempDir(); - File corruptedOrcFile = new File(tmpDir, "test.orc"); - try (FileWriter writer = new FileWriter(corruptedOrcFile)) { - // write a corrupted ORC file that only contains the header but without content + try (FileWriter writer = new FileWriter(orcFile)) { + // writer a corrupted ORC file that only contains thethe header writer.write(OrcFile.MAGIC); } - OrcFile.ReaderOptions readerOptions = new OrcFile.ReaderOptions(conf); + Assert.assertThrows(FileFormatException.class, () -> GobblinBaseOrcWriter.assertOrcFileIsValid( + fs, orcFilePath, new OrcFile.ReaderOptions(conf), mockContext)); + + GobblinEventBuilder eventBuilder = new GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE); + eventBuilder.addMetadata("filePath", orcFilePath.toString()); + eventBuilder.addMetadata("exceptionType", "org.apache.orc.FileFormatException"); + eventBuilder.addMetadata("exceptionMessage", String.format("Not a valid ORC file %s (maxFileLength= 9223372036854775807)", orcFilePath)); + Mockito.verify(mockContext, Mockito.times(1)).submitEvent(eventBuilder.build()); + } + + @Test + public void testOrcValidationWithContent() throws IOException { + try (FileWriter writer = new FileWriter(orcFile)) { + // write a corrupted ORC file that only contains the header and invalid protobuf content + writer.write(OrcFile.MAGIC); + writer.write("\n"); + } - MetricContext mockContext = Mockito.mock(MetricContext.class); - Path p = new Path(corruptedOrcFile.getAbsolutePath()); - Assert.assertThrows(FileFormatException.class, - () -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, p, readerOptions, mockContext)); + Assert.assertThrows(com.google.protobuf25.InvalidProtocolBufferException.class, + () -> GobblinBaseOrcWriter.assertOrcFileIsValid(fs, orcFilePath, new OrcFile.ReaderOptions(conf), mockContext)); GobblinEventBuilder eventBuilder = new GobblinEventBuilder(CORRUPTED_ORC_FILE_DELETION_EVENT, GobblinBaseOrcWriter.ORC_WRITER_NAMESPACE); - eventBuilder.addMetadata("filePath", p.toString()); + eventBuilder.addMetadata("filePath", orcFilePath.toString()); + eventBuilder.addMetadata("exceptionType", "com.google.protobuf25.InvalidProtocolBufferException"); + eventBuilder.addMetadata("exceptionMessage", "Protocol message tag had invalid wire type."); Mockito.verify(mockContext, Mockito.times(1)) .submitEvent(eventBuilder.build()); }