-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-24757 : ReplicationSink should limit row count in batch mutation based on hbase.rpc.rows.warning.threshold #2127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import java.io.IOException; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
@@ -37,7 +38,6 @@ | |
| import org.apache.hadoop.hbase.CellUtil; | ||
| import org.apache.hadoop.hbase.HBaseConfiguration; | ||
| import org.apache.hadoop.hbase.HConstants; | ||
| import org.apache.hadoop.hbase.Stoppable; | ||
| import org.apache.hadoop.hbase.TableName; | ||
| import org.apache.hadoop.hbase.TableNotFoundException; | ||
| import org.apache.hadoop.hbase.client.AsyncClusterConnection; | ||
|
|
@@ -54,6 +54,7 @@ | |
| import org.apache.hadoop.hbase.util.FutureUtils; | ||
| import org.apache.hadoop.hbase.util.Pair; | ||
| import org.apache.hadoop.hbase.wal.WALEdit; | ||
| import org.apache.hbase.thirdparty.com.google.common.collect.Lists; | ||
| import org.apache.yetus.audience.InterfaceAudience; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
@@ -93,15 +94,21 @@ public class ReplicationSink { | |
| private SourceFSConfigurationProvider provider; | ||
| private WALEntrySinkFilter walEntrySinkFilter; | ||
|
|
||
| /** | ||
| * Row size threshold for multi requests above which a warning is logged | ||
| */ | ||
| private final int rowSizeWarnThreshold; | ||
|
|
||
| /** | ||
| * Create a sink for replication | ||
| * @param conf conf object | ||
| * @param stopper boolean to tell this thread to stop | ||
| * @throws IOException thrown when HDFS goes bad or bad file name | ||
| */ | ||
| public ReplicationSink(Configuration conf, Stoppable stopper) | ||
| public ReplicationSink(Configuration conf) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the Stoppable unused? Usually its a chain for a Service to pull on when it meets a condition it can't deal with... one that is so bad it wants to stop the process.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree for stoppable usage, but here it is anyways unused :(
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok |
||
| throws IOException { | ||
| this.conf = HBaseConfiguration.create(conf); | ||
| rowSizeWarnThreshold = conf.getInt( | ||
| HConstants.BATCH_ROWS_THRESHOLD_NAME, HConstants.BATCH_ROWS_THRESHOLD_DEFAULT); | ||
| decorateConf(); | ||
| this.metrics = new MetricsSink(); | ||
| this.walEntrySinkFilter = setupWALEntrySinkFilter(); | ||
|
|
@@ -211,11 +218,7 @@ public void replicateEntries(List<WALEntry> entries, final CellScanner cells, | |
| // Map of table name Vs list of pair of family and list of | ||
| // hfile paths from its namespace | ||
| Map<String, List<Pair<byte[], List<String>>>> bulkLoadHFileMap = | ||
| bulkLoadsPerClusters.get(bld.getClusterIdsList()); | ||
| if (bulkLoadHFileMap == null) { | ||
| bulkLoadHFileMap = new HashMap<>(); | ||
| bulkLoadsPerClusters.put(bld.getClusterIdsList(), bulkLoadHFileMap); | ||
| } | ||
| bulkLoadsPerClusters.computeIfAbsent(bld.getClusterIdsList(), k -> new HashMap<>()); | ||
| buildBulkLoadHFileMap(bulkLoadHFileMap, table, bld); | ||
| } | ||
| } else { | ||
|
|
@@ -250,7 +253,7 @@ public void replicateEntries(List<WALEntry> entries, final CellScanner cells, | |
| if (!rowMap.isEmpty()) { | ||
| LOG.debug("Started replicating mutations."); | ||
| for (Entry<TableName, Map<List<UUID>, List<Row>>> entry : rowMap.entrySet()) { | ||
| batch(entry.getKey(), entry.getValue().values()); | ||
| batch(entry.getKey(), entry.getValue().values(), rowSizeWarnThreshold); | ||
| } | ||
| LOG.debug("Finished replicating mutations."); | ||
| } | ||
|
|
@@ -366,16 +369,8 @@ private java.util.UUID toUUID(final HBaseProtos.UUID uuid) { | |
| */ | ||
| private <K1, K2, V> List<V> addToHashMultiMap(Map<K1, Map<K2,List<V>>> map, K1 key1, | ||
| K2 key2, V value) { | ||
| Map<K2,List<V>> innerMap = map.get(key1); | ||
| if (innerMap == null) { | ||
| innerMap = new HashMap<>(); | ||
| map.put(key1, innerMap); | ||
| } | ||
| List<V> values = innerMap.get(key2); | ||
| if (values == null) { | ||
| values = new ArrayList<>(); | ||
| innerMap.put(key2, values); | ||
| } | ||
| Map<K2, List<V>> innerMap = map.computeIfAbsent(key1, k -> new HashMap<>()); | ||
| List<V> values = innerMap.computeIfAbsent(key2, k -> new ArrayList<>()); | ||
| values.add(value); | ||
| return values; | ||
| } | ||
|
|
@@ -403,13 +398,24 @@ public void stopReplicationSinkServices() { | |
| * Do the changes and handle the pool | ||
| * @param tableName table to insert into | ||
| * @param allRows list of actions | ||
| * @param batchRowSizeThreshold rowSize threshold for batch mutation | ||
| */ | ||
| private void batch(TableName tableName, Collection<List<Row>> allRows) throws IOException { | ||
| private void batch(TableName tableName, Collection<List<Row>> allRows, int batchRowSizeThreshold) | ||
| throws IOException { | ||
| if (allRows.isEmpty()) { | ||
| return; | ||
| } | ||
| AsyncTable<?> table = getConnection().getTable(tableName); | ||
| List<Future<?>> futures = allRows.stream().map(table::batchAll).collect(Collectors.toList()); | ||
| List<Future<?>> futures = new ArrayList<>(); | ||
| for (List<Row> rows : allRows) { | ||
| List<List<Row>> batchRows; | ||
| if (rows.size() > batchRowSizeThreshold) { | ||
| batchRows = Lists.partition(rows, batchRowSizeThreshold); | ||
| } else { | ||
| batchRows = Collections.singletonList(rows); | ||
| } | ||
| futures.addAll(batchRows.stream().map(table::batchAll).collect(Collectors.toList())); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the edits that are beyond the limit, they get handled in next batch?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's true, they are handled in next batch.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. UT |
||
| for (Future<?> future : futures) { | ||
| try { | ||
| FutureUtils.get(future); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why the move? HConstants is often considered an anti-pattern; better to have constants beside where they are used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want
RsRpcServicesas well asReplicationSinkboth to use these constants. Would you still recommend having duplicate copies?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. Two very disparate contexts. This is probably only case for HConstants.