|
17 | 17 |
|
18 | 18 | package org.apache.spark.streaming.kafka09 |
19 | 19 |
|
20 | | -import java.{ util => ju } |
| 20 | +import java.{util => ju} |
21 | 21 | import java.util.concurrent.ConcurrentLinkedQueue |
22 | 22 | import java.util.concurrent.atomic.AtomicReference |
23 | 23 |
|
24 | | -import scala.annotation.tailrec |
25 | 24 | import scala.collection.JavaConverters._ |
26 | 25 | import scala.collection.mutable |
27 | 26 |
|
28 | 27 | import org.apache.kafka.clients.consumer._ |
29 | | -import org.apache.kafka.common.{ PartitionInfo, TopicPartition } |
| 28 | +import org.apache.kafka.common.TopicPartition |
30 | 29 |
|
31 | | -import org.apache.spark.SparkException |
32 | 30 | import org.apache.spark.internal.Logging |
33 | 31 | import org.apache.spark.storage.StorageLevel |
34 | 32 | import org.apache.spark.streaming.{StreamingContext, Time} |
@@ -74,6 +72,14 @@ private[spark] class DirectKafkaInputDStream[K, V]( |
74 | 72 | kc |
75 | 73 | } |
76 | 74 |
|
| 75 | + def consumerForAssign(): KafkaConsumer[Long, String] = this.synchronized { |
| 76 | + val properties = consumerStrategy.executorKafkaParams |
| 77 | + properties.put("max.poll.records", "1") |
| 78 | + properties.put(ConsumerConfig.GROUP_ID_CONFIG, |
| 79 | + s"${properties.get(ConsumerConfig.GROUP_ID_CONFIG)}_assignGroup") |
| 80 | + new KafkaConsumer[Long, String](properties) |
| 81 | + } |
| 82 | + |
77 | 83 | override def persist(newLevel: StorageLevel): DStream[ConsumerRecord[K, V]] = { |
78 | 84 | logError("Kafka ConsumerRecord is not serializable. " + |
79 | 85 | "Use .map to extract fields before calling .persist or .window") |
@@ -240,10 +246,29 @@ private[spark] class DirectKafkaInputDStream[K, V]( |
240 | 246 |
|
241 | 247 | override def start(): Unit = { |
242 | 248 | val c = consumer |
| 249 | + val consumerAssign = consumerForAssign |
| 250 | + val pollTimeout = ssc.sparkContext.getConf |
| 251 | + .getLong("spark.streaming.kafka.consumer.driver.poll.ms", 120000) |
243 | 252 | paranoidPoll(c) |
244 | 253 | if (currentOffsets.isEmpty) { |
245 | 254 | currentOffsets = c.assignment().asScala.map { tp => |
246 | | - tp -> c.position(tp) |
| 255 | + tp -> { |
| 256 | + val position = c.position(tp) |
| 257 | + |
| 258 | + consumerAssign.assign(ju.Arrays.asList(tp)) |
| 259 | + val records = consumerAssign.poll(pollTimeout).iterator() |
| 260 | + val firstRecordOffset = if (records.hasNext) { |
| 261 | + records.next().offset() |
| 262 | + } else { |
| 263 | + c.endOffsets(ju.Arrays.asList(tp)).get(tp).longValue() |
| 264 | + } |
| 265 | + |
| 266 | + if (position < firstRecordOffset) { |
| 267 | + firstRecordOffset |
| 268 | + } else { |
| 269 | + position |
| 270 | + } |
| 271 | + } |
247 | 272 | }.toMap |
248 | 273 | } |
249 | 274 |
|
|
0 commit comments