Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -510,19 +510,44 @@ public Void call() {

private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) {
byte[] startKey = null;
Get get = null;
Scan scan = null;
ResultScanner rs = null;
StopWatch stopWatch = new StopWatch();
startKey = region.getStartKey();
// Can't do a get on empty start row so do a Scan of first element if any instead.
if (startKey.length > 0) {
get = new Get(startKey);
Get get = new Get(startKey);
get.setCacheBlocks(false);
get.setFilter(new FirstKeyOnlyFilter());
get.addFamily(column.getName());
// Converting get object to scan to enable RAW SCAN.
// This will work for all the regions of the HBase tables except first region of the table.
scan = new Scan(get);
scan.setRaw(rawScanEnabled);
} else {
scan = new Scan();
// In case of first region of the HBase Table, we do not have start-key for the region.
// For Region Canary, we only need to scan a single row/cell in the region to make sure that
// region is accessible.
//
// When HBase table has more than 1 empty regions at start of the row-key space, Canary will
// create multiple scan object to find first available row in the table by scanning all the
// regions in sequence until it can find first available row.
//
// This could result in multiple millions of scans based on the size of table and number of
// empty regions in sequence. In test environment, A table with no data and 1100 empty
// regions, Single canary run was creating close to half million to 1 million scans to
// successfully do canary run for the table.
//
// Since First region of the table doesn't have any start key, We should set End Key as
// stop row and set inclusive=false to limit scan to single region only.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could even add a comment that we think this would be the better approach for all regions (not only the first region of the table), but who knows what performance people may expect from the canary, and whether they are counting on most Get requests (now Scans) not returning any data, and therefore performing extra fast (?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some more comments as well as TODO for future.

//
// TODO : In future, we can streamline Canary behaviour for all the regions by doing scan
// with startRow inclusive and stopRow exclusive instead of different behaviour for First
// Region of the table and rest of the region of the table. This way implementation is
// simplified. As of now this change has been kept minimal to avoid any unnecessary
// perf impact.
scan.withStopRow(region.getEndKey(), false);
LOG.debug("rawScan {} for {}", rawScanEnabled, region.getTable());
scan.setRaw(rawScanEnabled);
scan.setCaching(1);
Expand All @@ -536,12 +561,8 @@ private Void readColumnFamily(Table table, ColumnFamilyDescriptor column) {
column.getNameAsString(), Bytes.toStringBinary(startKey));
try {
stopWatch.start();
if (startKey.length > 0) {
table.get(get);
} else {
rs = table.getScanner(scan);
rs.next();
}
rs = table.getScanner(scan);
rs.next();
stopWatch.stop();
this.readWriteLatency.add(stopWatch.getTime());
sink.publishReadTiming(serverName, region, column, stopWatch.getTime());
Expand Down