Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public interface MetricsMasterSource extends BaseSource {
String CLUSTER_REQUESTS_NAME = "clusterRequests";
String CLUSTER_READ_REQUESTS_NAME = "clusterReadRequests";
String CLUSTER_WRITE_REQUESTS_NAME = "clusterWriteRequests";
String OLD_WAL_DIR_SIZE_NAME = "oldWALsDirSize";
String MASTER_ACTIVE_TIME_DESC = "Master Active Time";
String MASTER_START_TIME_DESC = "Master Start Time";
String MASTER_FINISHED_INITIALIZATION_TIME_DESC =
Expand All @@ -91,6 +92,7 @@ public interface MetricsMasterSource extends BaseSource {
String OFFLINE_REGION_COUNT_DESC = "Number of Offline Regions";

String SERVER_CRASH_METRIC_PREFIX = "serverCrash";
String OLD_WAL_DIR_SIZE_DESC = "size of old WALs directory in bytes";

/**
* Increment the number of requests the cluster has seen.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) {
.tag(Interns.info(SERVER_NAME_NAME, SERVER_NAME_DESC), masterWrapper.getServerName())
.tag(Interns.info(CLUSTER_ID_NAME, CLUSTER_ID_DESC), masterWrapper.getClusterId())
.tag(Interns.info(IS_ACTIVE_MASTER_NAME, IS_ACTIVE_MASTER_DESC),
String.valueOf(masterWrapper.getIsActiveMaster()));
String.valueOf(masterWrapper.getIsActiveMaster()))
.addGauge(Interns.info(OLD_WAL_DIR_SIZE_NAME,
OLD_WAL_DIR_SIZE_DESC), masterWrapper.getOldWALsDirSize());
}

metricsRegistry.snapshot(metricsRecordBuilder, all);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,9 @@ public interface MetricsMasterWrapper {
* @return pair of count for online regions and offline regions
*/
PairOfSameType<Integer> getRegionCounts();

/**
* Get the size of old WALs directory in bytes.
*/
long getOldWALsDirSize();
}
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
private QuotaObserverChore quotaObserverChore;
private SnapshotQuotaObserverChore snapshotQuotaChore;
private ScheduledChore oldWALsDirSizeUpdaterChore;

private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
private ProcedureStore procedureStore;
Expand Down Expand Up @@ -1362,6 +1363,10 @@ private void finishActiveMasterInitialization() throws IOException, InterruptedE

this.rollingUpgradeChore = new RollingUpgradeChore(this);
getChoreService().scheduleChore(rollingUpgradeChore);

this.oldWALsDirSizeUpdaterChore = this.walManager.getOldWALsDirSizeUpdaterChore();
getChoreService().scheduleChore(this.oldWALsDirSizeUpdaterChore);

status.markComplete("Progress after master initialized complete");
}

Expand Down Expand Up @@ -1894,6 +1899,7 @@ protected void stopChores() {
shutdownChore(hbckChore);
shutdownChore(regionsRecoveryChore);
shutdownChore(rollingUpgradeChore);
shutdownChore(oldWALsDirSizeUpdaterChore);
}

/** Returns Get remote side's InetAddress */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
Expand Down Expand Up @@ -89,6 +91,12 @@ public boolean accept(Path p) {
// create the split log lock
private final Lock splitLogLock = new ReentrantLock();

// old WALs directory size in bytes
private long oldWALsDirSize;

// old WALs directory size calculation interval
private final int OLD_WAL_DIR_UPDATE_INTERVAL = 5 * 60 * 1000; // 5 mins

/**
* Superceded by {@link SplitWALManager}; i.e. procedure-based WAL splitting rather than 'classic'
* zk-coordinated WAL splitting.
Expand All @@ -114,6 +122,7 @@ public MasterWalManager(Configuration conf, FileSystem fs, Path rootDir, MasterS
this.services = services;
this.splitLogManager = new SplitLogManager(services, conf);
this.oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
this.oldWALsDirSize = 0;
}

public void stop() {
Expand All @@ -134,6 +143,13 @@ Path getOldLogDir() {
return this.oldLogDir;
}

public void updateOldWALsDirSize() throws IOException {
this.oldWALsDirSize = fs.getContentSummary(this.oldLogDir).getLength();
}
public long getOldWALsDirSize() {
return this.oldWALsDirSize;
}

public FileSystem getFileSystem() {
return this.fs;
}
Expand Down Expand Up @@ -398,4 +414,33 @@ public void archiveMetaLog(final ServerName serverName) {
LOG.warn("Failed archiving meta log for server " + serverName, ie);
}
}

private static Stoppable createDummyStoppable() {
return new Stoppable() {
private volatile boolean isStopped = false;

@Override
public void stop(String why) {
isStopped = true;
}

@Override
public boolean isStopped() {
return isStopped;
}
};
}

public ScheduledChore getOldWALsDirSizeUpdaterChore() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have this Chore imlemented on its own class? That way we don't need to define new methods on MasterWalManager that just concerns this Chore logic, like the createDummyStoppable and updateWalDirSize() above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, Will do that

return new ScheduledChore("UpdateOldWALsDirSize", createDummyStoppable(), OLD_WAL_DIR_UPDATE_INTERVAL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this interval configurable? This way we could also give the option to disable this chore altogether.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we disable it, won't we encounter issues with the incorrect results of the oldWALs directory size JMX metrics? Could that potentially be a problem? That's why I didn't make it configurable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could return 0 or a negative value in the metric, and document that such values mean the metric calculation is disabled. I think it's worth to make it possible for operators to disable it because this would consume a spot in the ChoreExecutor pool, which could impact execution of other important background work, such as the file cleaners.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, will modify the code for that!

@Override
protected void chore() {
try {
MasterWalManager.this.updateOldWALsDirSize();
} catch (IOException e) {
LOG.error("Got exception while trying to update the old WALs Directory size counter: " + e.getMessage(), e);
}
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,12 @@ public PairOfSameType<Integer> getRegionCounts() {
return new PairOfSameType<>(0, 0);
}
}

@Override
public long getOldWALsDirSize() {
if(master == null || !master.isInitialized()) {
return 0;
}
return master.getMasterWalManager().getOldWALsDirSize();
}
}