Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,7 @@ public void run(Path[] dirPaths, TableName[] tableNames, Path restoreRootDir,
LOG.debug("Restoring HFiles from directory " + bulkOutputPath);
}

if (loader.bulkLoad(newTableNames[i], bulkOutputPath).isEmpty()) {
throw new IOException("Can not restore from backup directory " + dirs
+ " (check Hadoop and HBase logs). Bulk loader returns null");
}
loader.bulkLoad(newTableNames[i], bulkOutputPath);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a safe change? It solves for 1 case where empty hfiles could fail the restore, but I wonder if there are other cases where the bulkload might unexpectedly not restore any files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have enough knowledge about the internals to give an informed opinion I'm afraid. The test suite finds no problem with the change, but that's no guarantee of course.

But I think that if there would be cases where not loading anything in the bulkload indicated an error, it would have to be detected at other locations rather than here. Restoring no files is a valid usecase, and so the only thing I can think of is that there would be an issue when those HFiles are written, or that something happens to those files before they are restored.

  • The former should already be detected as an issue I'd hope.
  • The latter could be something that damages or deletes the HFiles. The only (valid) to detect that would be to track how many (or even which specific HFiles) should be loaded. So metadata that would be recorded when creating the backup, and verified when restoring. But - the assumption that this could happen, would have a lot more repercussions outside of backup&restore in my opinion.

} else {
throw new IOException("Can not restore from backup directory " + dirs
+ " (check Hadoop/MR and HBase logs). Player return code =" + result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
*/
@InterfaceAudience.Private
public class RestoreTool {
public static final Logger LOG = LoggerFactory.getLogger(BackupUtils.class);
public static final Logger LOG = LoggerFactory.getLogger(RestoreTool.class);
private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000;

private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR };
Expand Down Expand Up @@ -437,6 +437,10 @@ byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException
HFile.Reader reader = HFile.createReader(fs, hfile, conf);
final byte[] first, last;
try {
if (reader.getEntries() == 0) {
LOG.debug("Skipping hfile with 0 entries: " + hfile);
continue;
}
first = reader.getFirstRowKey().get();
last = reader.getLastRowKey().get();
LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first="
Expand Down Expand Up @@ -491,8 +495,12 @@ private void checkAndCreateTable(Connection conn, TableName targetTableName,
admin.createTable(htd);
} else {
keys = generateBoundaryKeys(regionDirList);
// create table using table descriptor and region boundaries
admin.createTable(htd, keys);
if (keys.length > 0) {
// create table using table descriptor and region boundaries
admin.createTable(htd, keys);
} else {
admin.createTable(htd);
}
}
} catch (NamespaceNotFoundException e) {
LOG.warn("There was no namespace and the same will be created");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.util.ToolRunner;
import org.junit.ClassRule;
Expand Down Expand Up @@ -71,6 +72,43 @@ public void testFullRestoreSingle() throws Exception {
hba.close();
}

@Test
public void testFullRestoreSingleWithRegion() throws Exception {
LOG.info("test full restore on a single table empty table that has a region");

// This test creates its own table so other tests are not affected (we adjust it in this test)
TableName tableName = TableName.valueOf("table-full-restore-single-region");
TEST_UTIL.createTable(tableName, famName);

Admin admin = TEST_UTIL.getAdmin();

// Add & remove data to ensure a region is active, but functionally empty
Table table = TEST_UTIL.getConnection().getTable(tableName);
loadTable(table);
admin.flush(tableName);
TEST_UTIL.deleteTableData(tableName);
admin.flush(tableName);

TEST_UTIL.compact(tableName, true);

List<TableName> tables = Lists.newArrayList(tableName);
String backupId = fullTableBackup(tables);
assertTrue(checkSucceeded(backupId));

LOG.info("backup complete");

TEST_UTIL.deleteTable(tableName);

TableName[] tableset = new TableName[] { tableName };
TableName[] tablemap = new TableName[] { tableName };
BackupAdmin client = getBackupAdmin();
client.restore(BackupUtils.createRestoreRequest(BACKUP_ROOT_DIR, backupId, false, tableset,
tablemap, false));
assertTrue(admin.tableExists(tableName));
TEST_UTIL.deleteTable(tableName);
admin.close();
}

@Test
public void testFullRestoreSingleCommand() throws Exception {
LOG.info("test full restore on a single table empty table: command-line");
Expand Down