@@ -151,6 +151,28 @@ public void testJsonKafkaSource() {
151
151
assertEquals (Option .empty (), fetch4AsRows .getBatch ());
152
152
}
153
153
154
+ // test whether empty messages can be filtered
155
+ @ Test
156
+ public void testJsonKafkaSourceFilterNullMsg () {
157
+ // topic setup.
158
+ testUtils .createTopic (TEST_TOPIC_NAME , 2 );
159
+ HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator ();
160
+ TypedProperties props = createPropsForJsonSource (null , "earliest" );
161
+
162
+ Source jsonSource = new JsonKafkaSource (props , jsc , sparkSession , schemaProvider , metrics );
163
+ SourceFormatAdapter kafkaSource = new SourceFormatAdapter (jsonSource );
164
+
165
+ // 1. Extract without any checkpoint => get all the data, respecting sourceLimit
166
+ assertEquals (Option .empty (), kafkaSource .fetchNewDataInAvroFormat (Option .empty (), Long .MAX_VALUE ).getBatch ());
167
+ // Send 1000 non-null messages to Kafka
168
+ testUtils .sendMessages (TEST_TOPIC_NAME , Helpers .jsonifyRecords (dataGenerator .generateInserts ("000" , 1000 )));
169
+ // Send 100 null messages to Kafka
170
+ testUtils .sendMessages (TEST_TOPIC_NAME ,new String [100 ]);
171
+ InputBatch <JavaRDD <GenericRecord >> fetch1 = kafkaSource .fetchNewDataInAvroFormat (Option .empty (), Long .MAX_VALUE );
172
+ // Verify that messages with null values are filtered
173
+ assertEquals (1000 , fetch1 .getBatch ().get ().count ());
174
+ }
175
+
154
176
// test case with kafka offset reset strategy
155
177
@ Test
156
178
public void testJsonKafkaSourceResetStrategy () {
0 commit comments