diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 86c45dae09ba..183ccf4fc25b 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -128,6 +128,8 @@ class BalancerClusterState { private int[] regionServerIndexWithBestRegionCachedRatio; // Maps regionName -> oldServerName -> cache ratio of the region on the old server Map> regionCacheRatioOnOldServerMap; + // cache free space available on each server, aligned to the "servers" array indices; + long[] serverBlockCacheFreeSize; private final Supplier> shuffledServerIndicesSupplier = Suppliers.memoizeWithExpiration(() -> { @@ -148,20 +150,23 @@ public String getRack(ServerName server) { BalancerClusterState(Map> clusterState, Map> loads, RegionHDFSBlockLocationFinder regionFinder, RackManager rackManager) { - this(null, clusterState, loads, regionFinder, rackManager, null); + this(null, clusterState, loads, regionFinder, rackManager, null, null); } protected BalancerClusterState(Map> clusterState, Map> loads, RegionHDFSBlockLocationFinder regionFinder, - RackManager rackManager, Map> oldRegionServerRegionCacheRatio) { - this(null, clusterState, loads, regionFinder, rackManager, oldRegionServerRegionCacheRatio); + RackManager rackManager, Map> oldRegionServerRegionCacheRatio, + Map serverBlockCacheFreeByServer) { + this(null, clusterState, loads, regionFinder, rackManager, oldRegionServerRegionCacheRatio, + serverBlockCacheFreeByServer); } @SuppressWarnings("unchecked") BalancerClusterState(Collection unassignedRegions, Map> clusterState, Map> loads, RegionHDFSBlockLocationFinder regionFinder, RackManager rackManager, - Map> oldRegionServerRegionCacheRatio) { + Map> oldRegionServerRegionCacheRatio, + Map serverBlockCacheFreeByServer) { if (unassignedRegions == null) { unassignedRegions = Collections.emptyList(); } @@ -394,6 +399,15 @@ protected BalancerClusterState(Map> clusterState, populateRegionPerLocationFromServer(regionsPerRack, colocatedReplicaCountsPerRack, serversPerRack); } + + this.serverBlockCacheFreeSize = new long[numServers]; + if (serverBlockCacheFreeByServer != null) { + for (int i = 0; i < numServers; i++) { + ServerName sn = servers[i]; + this.serverBlockCacheFreeSize[i] = + sn == null ? 0L : serverBlockCacheFreeByServer.getOrDefault(sn, 0L); + } + } } private void populateRegionPerLocationFromServer(int[][] regionsPerLocation, @@ -714,6 +728,18 @@ public int[] getOrComputeServerWithBestRegionCachedRatio() { return regionServerIndexWithBestRegionCachedRatio; } + /** + * Finds and return the latest reported cache ratio for the region on the RegionServer it's + * currently online. + */ + float getObservedRegionCacheRatio(int region) { + Deque dq = regionLoads[region]; + if (dq == null || dq.isEmpty()) { + return 0.0f; + } + return dq.getLast().getCurrentRegionCacheRatio(); + } + /** * Maps region index to rack index */ diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java index 7e5bd98a0133..28af67a4954c 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java @@ -185,7 +185,7 @@ private boolean isViolating(RegionPlan regionPlan) { private RegionPlanConditional createConditional(Class clazz, BalancerClusterState cluster) { if (cluster == null) { - cluster = new BalancerClusterState(Collections.emptyMap(), null, null, null, null); + cluster = new BalancerClusterState(Collections.emptyMap(), null, null, null, null, null); } try { Constructor ctor = diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index c3f17d88cde0..be99c9670257 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -238,7 +238,7 @@ private BalancerClusterState createCluster(List servers, } } return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager, - null); + null, null); } private List findIdleServers(List servers) { diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java index b61ca31073e5..d1e42ab77e3b 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.Size; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; @@ -58,6 +59,30 @@ public class CacheAwareLoadBalancer extends StochasticLoadBalancer { "hbase.master.balancer.stochastic.throttling.cacheRatio"; public static final float CACHE_RATIO_THRESHOLD_DEFAULT = 0.8f; + /** + * Below this cache ratio on the current host, a move may be considered for the free-space + * heuristic. + */ + public static final String LOW_CACHE_RATIO_FOR_RELOCATION_KEY = + "hbase.master.balancer.cacheaware.lowCacheRatioThreshold"; + public static final float LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT = 0.35f; + + /** + * Optimistic region cache ratio assumed for cost purposes when a better host has free cache space + * (actual warmup is not modeled). + */ + public static final String POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY = + "hbase.master.balancer.cacheaware.potentialCacheRatioAfterMove"; + public static final float POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT = 0.95f; + + /** + * Minimum free block cache on a target server, as a multiple of the region's on-disk size in + * bytes, required to count that server as a relocation opportunity. + */ + public static final String MIN_FREE_CACHE_SPACE_FACTOR_KEY = + "hbase.master.balancer.cacheaware.minFreeCacheSpaceFactor"; + public static final float MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT = 1.0f; + public Float ratioThreshold; private Long sleepTime; @@ -109,6 +134,23 @@ public void updateClusterMetrics(ClusterMetrics clusterMetrics) { updateRegionLoad(); } + protected Map getServerBlockCacheFreeBytes() { + if (clusterStatus == null) { + return null; + } + Map map = new HashMap<>(); + clusterStatus.getLiveServerMetrics().forEach((sn, sm) -> map.put(sn, sm.getCacheFreeSize())); + return map; + } + + @Override + protected BalancerClusterState createState(Map> clusterState, + Map> loads, RegionHDFSBlockLocationFinder finder, + RackManager rackManager) { + return new BalancerClusterState(clusterState, loads, finder, rackManager, + regionCacheRatioOnOldServerMap, getServerBlockCacheFreeBytes()); + } + /** * Collect the amount of region cached for all the regions from all the active region servers. */ @@ -149,8 +191,16 @@ private void updateRegionLoad() { if (!ServerName.isSameAddress(currentServer, sn)) { int regionSizeMB = regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond(); + // The coldDataSize accounts for data size classified as "cold" by DataTieringManager, + // which should be kept out of cache. We sum cold region size in the cache ratio, as we + // don't want to move regions with low cache ratio due to data classified as cold. float regionCacheRatioOnOldServer = - regionSizeMB == 0 ? 0.0f : (float) regionSizeInCache / regionSizeMB; + regionSizeMB + == 0 + ? 0.0f + : (float) (regionSizeInCache + + sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0)) + / regionSizeMB; regionCacheRatioOnOldServerMap.put(regionEncodedName, new Pair<>(sn, regionCacheRatioOnOldServer)); } @@ -473,6 +523,9 @@ static class CacheAwareCostFunction extends CostFunction { private static final String CACHE_COST_KEY = "hbase.master.balancer.stochastic.cacheCost"; private double cacheRatio; private double bestCacheRatio; + private final float lowCacheRatioThreshold; + private final float potentialCacheRatioAfterMove; + private final float minFreeCacheSpaceFactor; private static final float DEFAULT_CACHE_COST = 20; @@ -483,25 +536,90 @@ static class CacheAwareCostFunction extends CostFunction { !isPersistentCache ? 0.0f : conf.getFloat(CACHE_COST_KEY, DEFAULT_CACHE_COST)); bestCacheRatio = 0.0; cacheRatio = 0.0; + lowCacheRatioThreshold = + conf.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY, LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT); + potentialCacheRatioAfterMove = Math.min(1.0f, conf + .getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY, POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT)); + minFreeCacheSpaceFactor = + conf.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY, MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT); } @Override void prepare(BalancerClusterState cluster) { super.prepare(cluster); - cacheRatio = 0.0; - bestCacheRatio = 0.0; + recomputeCacheRatio(cluster); + if (LOG.isDebugEnabled()) { + LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio); + } + } + private void recomputeCacheRatio(BalancerClusterState cluster) { + double[] currentWeighted = computeCurrentWeightedContributions(cluster); + double currentSum = 0.0; + double bestCacheSum = 0.0; for (int region = 0; region < cluster.numRegions; region++) { - cacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region, - cluster.regionIndexToServerIndex[region]); - bestCacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region, - getServerWithBestCacheRatioForRegion(region)); + currentSum += currentWeighted[region]; + // here we only get the server index where this region cache ratio is the highest + int serverIndexBestCache = cluster.getOrComputeServerWithBestRegionCachedRatio()[region]; + double currentHighestCache = + cluster.getOrComputeWeightedRegionCacheRatio(region, serverIndexBestCache); + // Get a hypothetical best cache ratio for this region if any server has enough free cache + // to host it. + double potentialHighestCache = + potentialBestWeightedFromFreeCache(cluster, region, currentHighestCache); + double actualHighest = Math.max(currentHighestCache, potentialHighestCache); + bestCacheSum += actualHighest; } + bestCacheRatio = bestCacheSum; + if (bestCacheSum <= 0.0) { + cacheRatio = cluster.numRegions == 0 ? 1.0 : 0.0; + } else { + cacheRatio = Math.min(1.0, currentSum / bestCacheSum); + } + } - cacheRatio = bestCacheRatio == 0 ? 1.0 : cacheRatio / bestCacheRatio; - if (LOG.isDebugEnabled()) { - LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio); + private double[] computeCurrentWeightedContributions(BalancerClusterState cluster) { + int totalRegions = cluster.numRegions; + double[] contrib = new double[totalRegions]; + for (int r = 0; r < totalRegions; r++) { + int s = cluster.regionIndexToServerIndex[r]; + int sizeMb = cluster.getTotalRegionHFileSizeMB(r); + if (sizeMb <= 0) { + contrib[r] = 0.0; + continue; + } + contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s); + } + return contrib; + } + + /* + * If this region is cold in metrics and at least one RS (including its current host) reports + * enough free block cache to hold it, return an optimistic weighted cache score ({@link + * #potentialCacheRatioAfterMove} * region MB) so placement is not considered optimal solely + * from low ratios when capacity exists somewhere in the cluster. + */ + private double potentialBestWeightedFromFreeCache(BalancerClusterState cluster, int region, + double currentHighestCache) { + if (cluster.serverBlockCacheFreeSize == null) { + return 0.0; + } + float observedRatio = cluster.getObservedRegionCacheRatio(region); + if (observedRatio >= lowCacheRatioThreshold) { + return 0.0; + } + int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region); + if (regionSizeMb <= 0) { + return 0.0; + } + long regionSizeBytes = (long) regionSizeMb * 1024L * 1024L; + long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor); + for (int s = 0; s < cluster.numServers; s++) { + if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) { + return Math.max(currentHighestCache, regionSizeMb * potentialCacheRatioAfterMove); + } } + return 0.0; } @Override diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index f2b2240a174e..62b1c2a34542 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -321,8 +321,7 @@ private void updateBalancerTableLoadInfo(TableName tableName, if ((this.localityCost != null) || (this.rackLocalityCost != null)) { finder = this.regionFinder; } - BalancerClusterState cluster = - new BalancerClusterState(loadOfOneTable, loads, finder, rackManager); + BalancerClusterState cluster = createState(loadOfOneTable, loads, finder, rackManager); initCosts(cluster); curOverallCost = computeCost(cluster, Double.MAX_VALUE); @@ -330,6 +329,12 @@ private void updateBalancerTableLoadInfo(TableName tableName, updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); } + protected BalancerClusterState createState(Map> clusterState, + Map> loads, RegionHDFSBlockLocationFinder finder, + RackManager rackManager) { + return new BalancerClusterState(clusterState, loads, finder, rackManager); + } + @Override public void updateBalancerLoadInfo(Map>> loadOfAllTable) { @@ -577,8 +582,7 @@ protected List balanceTable(TableName tableName, // The clusterState that is given to this method contains the state // of all the regions in the table(s) (that's true today) // Keep track of servers to iterate through them. - BalancerClusterState cluster = new BalancerClusterState(loadOfOneTable, loads, finder, - rackManager, regionCacheRatioOnOldServerMap); + BalancerClusterState cluster = createState(loadOfOneTable, loads, finder, rackManager); long startTime = EnvironmentEdgeManager.currentTime(); cluster.setStopRequestedAt(startTime + maxRunningTime); diff --git a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java index 870c97a1f49c..00d39fc4e740 100644 --- a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java +++ b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java @@ -218,7 +218,7 @@ static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState c static BalancerClusterState createMockBalancerClusterState(Map> serverToRegions) { - return new BalancerClusterState(serverToRegions, null, null, null, null); + return new BalancerClusterState(serverToRegions, null, null, null, null, null); } /** diff --git a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java index 1b9aca931b49..cff009f04564 100644 --- a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java +++ b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java @@ -193,7 +193,7 @@ private static class DummyBalancerClusterState extends BalancerClusterState { private final RegionInfo[] testRegions; DummyBalancerClusterState(BalancerClusterState bcs) { - super(bcs.clusterState, null, null, null, null); + super(bcs.clusterState, null, null, null, null, null); this.testRegions = bcs.regions; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java index 2cf55a1abdc0..8070c5802b0a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java @@ -112,4 +112,19 @@ default String getVersion() { * rounded to MB */ Map getRegionCachedInfo(); + + /** + * The available cache space on this region server (bytes), if reported in the server load. + */ + default long getCacheFreeSize() { + return 0L; + } + + /** + * Returns the region cold data information for the regions hosted on this server. Here, cold data + * refers only to region data that is classified as cold by the DataTieringManager according to + * the configured priority logic. These data should be kept out of block cache. + * @return map of region encoded name and its total cold data size, rounded to MB + */ + Map getRegionColdDataSize(); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java index c7aea21e845a..9103261f3be1 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java @@ -64,7 +64,7 @@ public static ServerMetrics toServerMetrics(ServerName serverName, public static ServerMetrics toServerMetrics(ServerName serverName, int versionNumber, String version, ClusterStatusProtos.ServerLoad serverLoadPB) { - return ServerMetricsBuilder.newBuilder(serverName) + ServerMetricsBuilder builder = ServerMetricsBuilder.newBuilder(serverName) .setRequestCountPerSecond(serverLoadPB.getNumberOfRequests()) .setRequestCount(serverLoadPB.getTotalNumberOfRequests()) .setInfoServerPort(serverLoadPB.getInfoServerPort()) @@ -88,7 +88,10 @@ public static ServerMetrics toServerMetrics(ServerName serverName, int versionNu .setRegionCachedInfo(serverLoadPB.getRegionCachedInfoMap()) .setReportTimestamp(serverLoadPB.getReportEndTime()) .setLastReportTimestamp(serverLoadPB.getReportStartTime()).setVersionNumber(versionNumber) - .setVersion(version).build(); + .setVersion(version) + .setCacheFreeSize(serverLoadPB.hasCacheFreeSize() ? serverLoadPB.getCacheFreeSize() : 0L) + .setRegionColdDataInfo(serverLoadPB.getRegionColdData()); + return builder.build(); } public static List toCoprocessor(Collection names) { @@ -118,6 +121,7 @@ public static ClusterStatusProtos.ServerLoad toServerLoad(ServerMetrics metrics) if (metrics.getReplicationLoadSink() != null) { builder.setReplLoadSink(ProtobufUtil.toReplicationLoadSink(metrics.getReplicationLoadSink())); } + builder.setCacheFreeSize(metrics.getCacheFreeSize()); return builder.build(); } @@ -145,6 +149,8 @@ public static ServerMetricsBuilder newBuilder(ServerName sn) { private long lastReportTimestamp = 0; private final List tasks = new ArrayList<>(); private Map regionCachedInfo = new HashMap<>(); + private long cacheFreeSize; + private Map regionColdDataInfo; private ServerMetricsBuilder(ServerName serverName) { this.serverName = serverName; @@ -240,11 +246,21 @@ public ServerMetricsBuilder setRegionCachedInfo(Map value) { return this; } + public ServerMetricsBuilder setCacheFreeSize(long blockCacheFreeSize) { + this.cacheFreeSize = blockCacheFreeSize; + return this; + } + + public ServerMetricsBuilder setRegionColdDataInfo(Map regionColdDataInfo) { + this.regionColdDataInfo = regionColdDataInfo; + return this; + } + public ServerMetrics build() { return new ServerMetricsImpl(serverName, versionNumber, version, requestCountPerSecond, requestCount, readRequestCount, writeRequestCount, usedHeapSize, maxHeapSize, infoServerPort, sources, sink, regionStatus, coprocessorNames, reportTimestamp, lastReportTimestamp, - userMetrics, tasks, regionCachedInfo); + userMetrics, tasks, regionCachedInfo, cacheFreeSize, regionColdDataInfo); } private static class ServerMetricsImpl implements ServerMetrics { @@ -268,6 +284,8 @@ private static class ServerMetricsImpl implements ServerMetrics { private final Map userMetrics; private final List tasks; private final Map regionCachedInfo; + private final long cacheFreeSize; + private final Map regionColdDataInfo; ServerMetricsImpl(ServerName serverName, int versionNumber, String version, long requestCountPerSecond, long requestCount, long readRequestsCount, @@ -275,7 +293,8 @@ private static class ServerMetricsImpl implements ServerMetrics { List sources, ReplicationLoadSink sink, Map regionStatus, Set coprocessorNames, long reportTimestamp, long lastReportTimestamp, Map userMetrics, List tasks, - Map regionCachedInfo) { + Map regionCachedInfo, long cacheFreeSize, + Map regionColdDataInfo) { this.serverName = Preconditions.checkNotNull(serverName); this.versionNumber = versionNumber; this.version = version; @@ -295,6 +314,8 @@ private static class ServerMetricsImpl implements ServerMetrics { this.lastReportTimestamp = lastReportTimestamp; this.tasks = tasks; this.regionCachedInfo = regionCachedInfo; + this.cacheFreeSize = cacheFreeSize; + this.regionColdDataInfo = regionColdDataInfo; } @Override @@ -402,6 +423,16 @@ public Map getRegionCachedInfo() { return Collections.unmodifiableMap(regionCachedInfo); } + @Override + public long getCacheFreeSize() { + return cacheFreeSize; + } + + @Override + public Map getRegionColdDataSize() { + return Collections.unmodifiableMap(regionColdDataInfo); + } + @Override public String toString() { int storeCount = 0; diff --git a/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto b/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto index afff971687d9..2db64346c283 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto @@ -332,6 +332,17 @@ message ServerLoad { * The metrics for region cached on this region server */ map regionCachedInfo = 16; + + /** +* Unallocated block cache capacity on this RegionServer, in bytes. +* Used by the master for cache-aware load balancing (optional). +*/ + optional uint64 cacheFreeSize = 17; + + /** + * The metrics for total region cold data size + */ + map regionColdData = 18; } message LiveServerInfo { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 672607fe5bc4..ab75a9ecbca8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1242,6 +1242,12 @@ private ClusterStatusProtos.ServerLoad buildServerLoad(long reportStartTime, lon }); }); }); + serverLoad.setCacheFreeSize(regionServerWrapper.getBlockCacheFreeSize()); + if (DataTieringManager.getInstance() != null) { + DataTieringManager.getInstance().getRegionColdDataSize() + .forEach((regionName, coldDataSize) -> serverLoad.putRegionColdData(regionName, + roundSize(coldDataSize.getSecond(), unitMB))); + } serverLoad.setReportStartTime(reportStartTime); serverLoad.setReportEndTime(reportEndTime); if (this.infoServer != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java index eeba839ac111..2c38f56cfdd8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java @@ -400,6 +400,11 @@ public Map getRegionCachedInfo() { return new HashMap<>(); } + @Override + public Map getRegionColdDataSize() { + return new HashMap<>(); + } + }; return serverMetrics; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java index 0551adf9601c..42a0ad213cf6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java @@ -20,13 +20,25 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Deque; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.RegionMetrics; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.Size; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Pair; @@ -251,12 +263,105 @@ public void testCacheCost() { } } + /** + * When block-cache persistence, cold regions (below + * {@link CacheAwareLoadBalancer#LOW_CACHE_RATIO_FOR_RELOCATION_KEY}) together with RS-reported + * block-cache free bytes inflate plausible best placement so weighted cache cost crosses + * {@code minCostNeedBalance}; {@link StochasticLoadBalancer#needsBalance} returns true even with + * evenly spread region-count skew. + */ + @Test + public void testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace() { + conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); + CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf); + int regionSizeMb = 64; + long cacheFreeInBytes = regionSizeMb * 1024L * 1024L; + // simulates a cache ratio lower than + // CacheAwareLoadBalancer.LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT + float simulatedCacheRatio = 0.1f; + Map> clusterServers = + mockClusterServersUnsorted(new int[] { 1, 1 }, 1); + List regions = new ArrayList<>(); + clusterServers.values().forEach(regions::addAll); + List serversList = getServersInInsertionOrder(clusterServers); + Map blockCacheFree = new HashMap<>(); + blockCacheFree.put(serversList.get(0), 0L); + blockCacheFree.put(serversList.get(1), cacheFreeInBytes); + BalancerClusterState cluster = new BalancerClusterState(clusterServers, + buildRegionLoads(regions, simulatedCacheRatio, regionSizeMb), null, null, + Collections.emptyMap(), blockCacheFree); + lb.initCosts(cluster); + assertTrue(lb.needsBalance( + TableName.valueOf("testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace"), + cluster)); + } + + /** + * Checks that needsBalance isn't true when regions report high cache ratios + */ + @Test + public void testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace() { + conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); + CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf); + int regionSizeMb = 64; + long cacheFreeInBytes = regionSizeMb * 1024L * 1024L; + Map> clusterServers = + mockClusterServersUnsorted(new int[] { 1, 1 }, 1); + List all = new ArrayList<>(); + clusterServers.values().forEach(all::addAll); + List serversList = getServersInInsertionOrder(clusterServers); + Map blockCacheFree = new HashMap<>(); + blockCacheFree.put(serversList.get(0), cacheFreeInBytes + 1024 * 1024); + blockCacheFree.put(serversList.get(1), cacheFreeInBytes + 1024 * 1024); + BalancerClusterState cluster = + new BalancerClusterState(clusterServers, buildRegionLoads(all, 1.0f, regionSizeMb), null, + null, Collections.emptyMap(), blockCacheFree); + lb.initCosts(cluster); + assertFalse(lb.needsBalance( + TableName.valueOf("testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace"), + cluster)); + } + + private static CacheAwareLoadBalancer newCacheAwareBalancer(Configuration cfg) { + CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer(); + lb.setClusterInfoProvider(new DummyClusterInfoProvider(cfg)); + lb.loadConf(cfg); + return lb; + } + + private static Map> + buildRegionLoads(Collection regions, float cachedRatio, int regionSizeMb) { + RegionMetrics rm = mock(RegionMetrics.class); + when(rm.getReadRequestCount()).thenReturn(0L); + when(rm.getCpRequestCount()).thenReturn(0L); + when(rm.getWriteRequestCount()).thenReturn(0L); + when(rm.getMemStoreSize()).thenReturn(Size.ZERO); + when(rm.getStoreFileSize()).thenReturn(Size.ZERO); + when(rm.getRegionSizeMB()).thenReturn(new Size(regionSizeMb, Size.Unit.MEGABYTE)); + when(rm.getCurrentRegionCachedRatio()).thenReturn(cachedRatio); + + BalancerRegionLoad brl = new BalancerRegionLoad(rm); + Map> loads = new HashMap<>(); + for (RegionInfo ri : regions) { + ArrayDeque dq = new ArrayDeque<>(1); + dq.add(brl); + loads.put(ri.getRegionNameAsString(), dq); + loads.put(ri.getEncodedName(), dq); + } + return loads; + } + + private static List + getServersInInsertionOrder(Map> cluster) { + return new ArrayList<>(cluster.keySet()); + } + private class MockClusterForCacheCost extends BalancerClusterState { private final Map, Float> regionServerCacheRatio = new HashMap<>(); public MockClusterForCacheCost(int[][] regionsArray) { // regions[0] is an array where index = serverIndex and value = number of regions - super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, null); + super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, null, null); Map> oldCacheRatio = new HashMap<>(); for (int i = 1; i < regionsArray.length; i++) { int regionIndex = i - 1;