13
13
from kafka .future import Future
14
14
from kafka .metrics .stats import Avg , Count , Max , Rate
15
15
from kafka .protocol .fetch import FetchRequest
16
- from kafka .protocol .message import PartialMessage
17
16
from kafka .protocol .offset import (
18
17
OffsetRequest , OffsetResetStrategy , UNKNOWN_OFFSET
19
18
)
19
+ from kafka .record import MemoryRecords
20
20
from kafka .serializer import Deserializer
21
21
from kafka .structs import TopicPartition , OffsetAndTimestamp
22
22
@@ -295,7 +295,7 @@ def fetched_records(self, max_records=None):
295
295
296
296
Raises:
297
297
OffsetOutOfRangeError: if no subscription offset_reset_strategy
298
- InvalidMessageError : if message crc validation fails (check_crcs
298
+ CorruptRecordException : if message crc validation fails (check_crcs
299
299
must be set to True)
300
300
RecordTooLargeError: if a message is larger than the currently
301
301
configured max_partition_fetch_bytes
@@ -440,57 +440,25 @@ def _message_generator(self):
440
440
441
441
self ._next_partition_records = None
442
442
443
- def _unpack_message_set (self , tp , messages ):
443
+ def _unpack_message_set (self , tp , records ):
444
444
try :
445
- for offset , size , msg in messages :
446
- if self .config ['check_crcs' ] and not msg .validate_crc ():
447
- raise Errors .InvalidMessageError (msg )
448
-
449
- if not msg .is_compressed ():
450
- yield self ._parse_record (tp , offset , msg .timestamp , msg )
451
-
452
- else :
453
- # If relative offset is used, we need to decompress the entire message first
454
- # to compute the absolute offset.
455
- inner_mset = msg .decompress ()
456
-
457
- # There should only ever be a single layer of compression
458
- if inner_mset [0 ][- 1 ].is_compressed ():
459
- log .warning ('MessageSet at %s offset %d appears '
460
- ' double-compressed. This should not'
461
- ' happen -- check your producers!' ,
462
- tp , offset )
463
- if self .config ['skip_double_compressed_messages' ]:
464
- log .warning ('Skipping double-compressed message at'
465
- ' %s %d' , tp , offset )
466
- continue
467
-
468
- if msg .magic > 0 :
469
- last_offset , _ , _ = inner_mset [- 1 ]
470
- absolute_base_offset = offset - last_offset
471
- else :
472
- absolute_base_offset = - 1
473
-
474
- for inner_offset , inner_size , inner_msg in inner_mset :
475
- if msg .magic > 0 :
476
- # When magic value is greater than 0, the timestamp
477
- # of a compressed message depends on the
478
- # typestamp type of the wrapper message:
479
-
480
- if msg .timestamp_type == 0 : # CREATE_TIME (0)
481
- inner_timestamp = inner_msg .timestamp
482
-
483
- elif msg .timestamp_type == 1 : # LOG_APPEND_TIME (1)
484
- inner_timestamp = msg .timestamp
485
-
486
- else :
487
- raise ValueError ('Unknown timestamp type: {0}' .format (msg .timestamp_type ))
488
- else :
489
- inner_timestamp = msg .timestamp
490
-
491
- if absolute_base_offset >= 0 :
492
- inner_offset += absolute_base_offset
493
- yield self ._parse_record (tp , inner_offset , inner_timestamp , inner_msg )
445
+ batch = records .next_batch ()
446
+ while batch is not None :
447
+ for record in batch :
448
+ key_size = len (record .key ) if record .key is not None else - 1
449
+ value_size = len (record .value ) if record .value is not None else - 1
450
+ key = self ._deserialize (
451
+ self .config ['key_deserializer' ],
452
+ tp .topic , record .key )
453
+ value = self ._deserialize (
454
+ self .config ['value_deserializer' ],
455
+ tp .topic , record .value )
456
+ yield ConsumerRecord (
457
+ tp .topic , tp .partition , record .offset , record .timestamp ,
458
+ record .timestamp_type , key , value , record .checksum ,
459
+ key_size , value_size )
460
+
461
+ batch = records .next_batch ()
494
462
495
463
# If unpacking raises StopIteration, it is erroneously
496
464
# caught by the generator. We want all exceptions to be raised
@@ -505,15 +473,6 @@ def _unpack_message_set(self, tp, messages):
505
473
log .exception ('AssertionError raised unpacking messageset: %s' , e )
506
474
raise
507
475
508
- def _parse_record (self , tp , offset , timestamp , msg ):
509
- key = self ._deserialize (self .config ['key_deserializer' ], tp .topic , msg .key )
510
- value = self ._deserialize (self .config ['value_deserializer' ], tp .topic , msg .value )
511
- return ConsumerRecord (tp .topic , tp .partition , offset ,
512
- timestamp , msg .timestamp_type ,
513
- key , value , msg .crc ,
514
- len (msg .key ) if msg .key is not None else - 1 ,
515
- len (msg .value ) if msg .value is not None else - 1 )
516
-
517
476
def __iter__ (self ): # pylint: disable=non-iterator-returned
518
477
return self
519
478
@@ -783,7 +742,7 @@ def _parse_fetched_data(self, completed_fetch):
783
742
784
743
error_code , highwater = completed_fetch .partition_data [:2 ]
785
744
error_type = Errors .for_code (error_code )
786
- messages = completed_fetch . partition_data [- 1 ]
745
+ records = MemoryRecords ( partition_data [- 1 ])
787
746
788
747
try :
789
748
if not self ._subscriptions .is_fetchable (tp ):
@@ -807,21 +766,17 @@ def _parse_fetched_data(self, completed_fetch):
807
766
position )
808
767
return None
809
768
810
- partial = None
811
- if messages and isinstance (messages [- 1 ][- 1 ], PartialMessage ):
812
- partial = messages .pop ()
813
-
814
- if messages :
769
+ if records .has_next ():
815
770
log .debug ("Adding fetched record for partition %s with"
816
771
" offset %d to buffered record list" , tp ,
817
772
position )
818
- unpacked = list (self ._unpack_message_set (tp , messages ))
773
+ unpacked = list (self ._unpack_message_set (tp , records ))
819
774
parsed_records = self .PartitionRecords (fetch_offset , tp , unpacked )
820
- last_offset , _ , _ = messages [- 1 ]
775
+ last_offset = unpacked [- 1 ]. offset
821
776
self ._sensors .records_fetch_lag .record (highwater - last_offset )
822
- num_bytes = sum ( msg [ 1 ] for msg in messages )
823
- records_count = len (messages )
824
- elif partial :
777
+ num_bytes = records . valid_bytes ( )
778
+ records_count = len (unpacked )
779
+ elif records . size_in_bytes () > 0 :
825
780
# we did not read a single message from a non-empty
826
781
# buffer because that message's size is larger than
827
782
# fetch size, in this case record this exception
0 commit comments