-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-28385 make Scan estimates more realistic #5713
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5cbf9ad
f6bce77
f4a04e5
a2e58b4
6eac2c7
0274500
324855c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,10 +27,17 @@ | |
| import org.apache.yetus.audience.InterfaceAudience; | ||
| import org.apache.yetus.audience.InterfaceStability; | ||
|
|
||
| import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; | ||
|
|
||
| @InterfaceAudience.Private | ||
| @InterfaceStability.Evolving | ||
| public class DefaultOperationQuota implements OperationQuota { | ||
|
|
||
| // a single scan estimate can consume no more than this proportion of the limiter's limit | ||
| // this prevents a long-running scan from being estimated at, say, 100MB of IO against | ||
| // a <100MB/IO throttle (because this would never succeed) | ||
| private static final double MAX_SCAN_ESTIMATE_PROPORTIONAL_LIMIT_CONSUMPTION = 0.9; | ||
|
|
||
| protected final List<QuotaLimiter> limiters; | ||
| private final long writeCapacityUnit; | ||
| private final long readCapacityUnit; | ||
|
|
@@ -53,13 +60,17 @@ public class DefaultOperationQuota implements OperationQuota { | |
| protected long readCapacityUnitDiff = 0; | ||
| private boolean useResultSizeBytes; | ||
| private long blockSizeBytes; | ||
| private long maxScanEstimate; | ||
|
|
||
| public DefaultOperationQuota(final Configuration conf, final int blockSizeBytes, | ||
| final QuotaLimiter... limiters) { | ||
| this(conf, Arrays.asList(limiters)); | ||
| this.useResultSizeBytes = | ||
| conf.getBoolean(OperationQuota.USE_RESULT_SIZE_BYTES, USE_RESULT_SIZE_BYTES_DEFAULT); | ||
| this.blockSizeBytes = blockSizeBytes; | ||
| long readSizeLimit = | ||
| Arrays.stream(limiters).mapToLong(QuotaLimiter::getReadLimit).min().orElse(Long.MAX_VALUE); | ||
| maxScanEstimate = Math.round(MAX_SCAN_ESTIMATE_PROPORTIONAL_LIMIT_CONSUMPTION * readSizeLimit); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -80,21 +91,45 @@ public DefaultOperationQuota(final Configuration conf, final List<QuotaLimiter> | |
| } | ||
|
|
||
| @Override | ||
| public void checkQuota(int numWrites, int numReads, int numScans) throws RpcThrottlingException { | ||
| updateEstimateConsumeQuota(numWrites, numReads, numScans); | ||
| public void checkQuota(int numWrites, int numReads) throws RpcThrottlingException { | ||
| updateEstimateConsumeBatchQuota(numWrites, numReads); | ||
|
|
||
| readAvailable = Long.MAX_VALUE; | ||
| for (final QuotaLimiter limiter : limiters) { | ||
| if (limiter.isBypass()) continue; | ||
| if (limiter.isBypass()) { | ||
| continue; | ||
| } | ||
|
|
||
| limiter.checkQuota(numWrites, writeConsumed, numReads + numScans, readConsumed, | ||
| limiter.checkQuota(numWrites, writeConsumed, numReads, readConsumed, | ||
| writeCapacityUnitConsumed, readCapacityUnitConsumed); | ||
| readAvailable = Math.min(readAvailable, limiter.getReadAvailable()); | ||
| } | ||
|
|
||
| for (final QuotaLimiter limiter : limiters) { | ||
| limiter.grabQuota(numWrites, writeConsumed, numReads + numScans, readConsumed, | ||
| writeCapacityUnitConsumed, readCapacityUnitConsumed); | ||
| limiter.grabQuota(numWrites, writeConsumed, numReads, readConsumed, writeCapacityUnitConsumed, | ||
| readCapacityUnitConsumed); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void checkScanQuota(ClientProtos.ScanRequest scanRequest, long maxScannerResultSize, | ||
| long maxBlockBytesScanned) throws RpcThrottlingException { | ||
| updateEstimateConsumeScanQuota(scanRequest, maxScannerResultSize, maxBlockBytesScanned); | ||
|
|
||
| readAvailable = Long.MAX_VALUE; | ||
| for (final QuotaLimiter limiter : limiters) { | ||
| if (limiter.isBypass()) { | ||
| continue; | ||
| } | ||
|
|
||
| limiter.checkQuota(0, writeConsumed, 1, readConsumed, writeCapacityUnitConsumed, | ||
| readCapacityUnitConsumed); | ||
| readAvailable = Math.min(readAvailable, limiter.getReadAvailable()); | ||
| } | ||
|
|
||
| for (final QuotaLimiter limiter : limiters) { | ||
| limiter.grabQuota(0, writeConsumed, 1, readConsumed, writeCapacityUnitConsumed, | ||
| readCapacityUnitConsumed); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -158,24 +193,60 @@ public void addMutation(final Mutation mutation) { | |
| * Update estimate quota(read/write size/capacityUnits) which will be consumed | ||
| * @param numWrites the number of write requests | ||
| * @param numReads the number of read requests | ||
| * @param numScans the number of scan requests | ||
| */ | ||
| protected void updateEstimateConsumeQuota(int numWrites, int numReads, int numScans) { | ||
| protected void updateEstimateConsumeBatchQuota(int numWrites, int numReads) { | ||
| writeConsumed = estimateConsume(OperationType.MUTATE, numWrites, 100); | ||
|
|
||
| if (useResultSizeBytes) { | ||
| readConsumed = estimateConsume(OperationType.GET, numReads, 100); | ||
| readConsumed += estimateConsume(OperationType.SCAN, numScans, 1000); | ||
| } else { | ||
| // assume 1 block required for reads. this is probably a low estimate, which is okay | ||
| readConsumed = numReads > 0 ? blockSizeBytes : 0; | ||
| readConsumed += numScans > 0 ? blockSizeBytes : 0; | ||
| } | ||
|
|
||
| writeCapacityUnitConsumed = calculateWriteCapacityUnit(writeConsumed); | ||
| readCapacityUnitConsumed = calculateReadCapacityUnit(readConsumed); | ||
| } | ||
|
|
||
| /** | ||
| * Update estimate quota(read/write size/capacityUnits) which will be consumed | ||
| * @param scanRequest the scan to be executed | ||
| * @param maxScannerResultSize the maximum bytes to be returned by the scanner | ||
| * @param maxBlockBytesScanned the maximum bytes scanned in a single RPC call by the scanner | ||
| */ | ||
| protected void updateEstimateConsumeScanQuota(ClientProtos.ScanRequest scanRequest, | ||
| long maxScannerResultSize, long maxBlockBytesScanned) { | ||
| if (useResultSizeBytes) { | ||
| readConsumed = estimateConsume(OperationType.GET, 1, 1000); | ||
| } else { | ||
| /* | ||
| * Estimating scan workload is more complicated, and if we severely underestimate workloads | ||
| * then throttled clients will exhaust retries too quickly, and could saturate the RPC layer. | ||
| * We have access to the ScanRequest's nextCallSeq number, the maxScannerResultSize, and the | ||
| * maxBlockBytesScanned by every relevant Scanner#next call. With these inputs we can make a | ||
| * more informed estimate about the scan's workload. | ||
| */ | ||
| long estimate; | ||
| if (scanRequest.getNextCallSeq() == 0) { | ||
| // start scanners with an optimistic 1 block IO estimate | ||
| // it is better to underestimate a large scan in the beginning | ||
| // than to overestimate, and block, a small scan | ||
| estimate = blockSizeBytes; | ||
| } else { | ||
| // scanner result sizes will be limited by quota availability, regardless of | ||
| // maxScannerResultSize. This means that we cannot safely assume that a long-running | ||
| // scan with a small maxBlockBytesScanned would not prefer to pull down | ||
| // a larger payload. So we should estimate with the assumption that long-running scans | ||
| // are appropriately configured to approach their maxScannerResultSize per RPC call | ||
| estimate = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like we should only double if the scan reached the previous limit.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you mind elaborating a bit? I don't know exactly what you mean
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So every time a next() comes in, we double the previous BBS as the new limit. So let's say someone is doing a long scan with a caching of 1. So each time it's doing only 1 block of IO, but we are still doubling the new estimate. So it becomes harder to get a quota if we are always estimating 2x more than really fetching. The doubling is good for reaching a plateau quickly, but then it should stop. For the 1 block/1 caching case, it might only be 64k vs 128kb estimate. Not a huge deal. But lets say we have a can that's doing 25mb per next(). To each time set the estimate to 50mb, that's quite a difference. Ideally once the scan itself plateaus in BBS, that should be the limit going forward. So let's say for the 25mb example, it does 64, 129, 256, etc up to 25. The next request the estimate is 50, but the resulting bbs is only 25. Ideally after that we just use 25 as the estimate going forward
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Is this your proposal for what we should do, or your read on what the current estimation logic will do? If we're worried about overestimation, then I think this logic is actually worse than what you've described because we don't double the previous BBS — rather we multiply the max BBS by the next call sequence. So for 1 block/1 caching case, we'd quickly estimate at a, likely much higher, So, rather than risk falling into a cycle of at the expense of potentially overestimating some scans. We also mitigate the risk of overestimation with the introduction of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was describing what I thought the current implementation does. It seems like every new scan will increase the estimate u til we hit the server configured max. But I'd like to stop increasing when we stop exceeding the old limit. Imagine the following 4 next calls: Estimate = 64k; Scanned = 64k So the actually scanned amount had leveled out, but the estimate keeps increasing. Ideally it'd be like this instead: Estimate = 64k; Scanned = 64k Or something else that doesn't keep increasing. Of course the impact here is not huge yet, but for larger values it seems to matter a bit more. Unless I'm missing something |
||
| Math.min(maxScannerResultSize, scanRequest.getNextCallSeq() * maxBlockBytesScanned); | ||
|
rmdmattingly marked this conversation as resolved.
Outdated
|
||
| } | ||
| readConsumed = Math.min(maxScanEstimate, estimate); | ||
| } | ||
|
|
||
| readCapacityUnitConsumed = calculateReadCapacityUnit(readConsumed); | ||
| } | ||
|
|
||
| private long estimateConsume(final OperationType type, int numReqs, long avgSize) { | ||
| if (numReqs > 0) { | ||
| return avgSize * numReqs; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.