Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDFS-17646. Add Option to limit Balancer overUtilized nodes num in each iteration. #7120

Open
wants to merge 3 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ public class Balancer {
+ "\n\t[-sortTopNodes]"
+ "\tSort datanodes based on the utilization so "
+ "that highly utilized datanodes get scheduled first."
+ "\n\t[-limitOverUtilizedNum <specified maximum number of overUtilized datanodes>]"
+ "\tLimit the maximum number of overUtilized datanodes."
+ "\n\t[-hotBlockTimeInterval]\tprefer to move cold blocks.";

@VisibleForTesting
Expand All @@ -227,6 +229,7 @@ public class Balancer {
private final long maxSizeToMove;
private final long defaultBlockSize;
private final boolean sortTopNodes;
private final int limitOverUtilizedNum;
private final BalancerMetrics metrics;

// all data node lists
Expand Down Expand Up @@ -352,6 +355,7 @@ static int getFailedTimesSinceLastSuccessfulBalance() {
this.sourceNodes = p.getSourceNodes();
this.runDuringUpgrade = p.getRunDuringUpgrade();
this.sortTopNodes = p.getSortTopNodes();
this.limitOverUtilizedNum = p.getLimitOverUtilizedNum();

this.maxSizeToMove = getLongBytes(conf,
DFSConfigKeys.DFS_BALANCER_MAX_SIZE_TO_MOVE_KEY,
Expand Down Expand Up @@ -456,11 +460,18 @@ private long init(List<DatanodeStorageReport> reports) {
sortOverUtilized(overUtilizedPercentage);
}

// Limit the maximum number of overUtilized datanodes
// If excludedOverUtilizedNum is greater than 0, The overUtilized nodes num is limited
int excludedOverUtilizedNum = Math.max(overUtilized.size() - limitOverUtilizedNum, 0);
if (excludedOverUtilizedNum > 0) {
limitOverUtilizedNum();
}

logUtilizationCollections();
metrics.setNumOfOverUtilizedNodes(overUtilized.size());
metrics.setNumOfUnderUtilizedNodes(underUtilized.size());

Preconditions.checkState(dispatcher.getStorageGroupMap().size()
Preconditions.checkState(dispatcher.getStorageGroupMap().size() - excludedOverUtilizedNum
== overUtilized.size() + underUtilized.size() + aboveAvgUtilized.size()
+ belowAvgUtilized.size(),
"Mismatched number of storage groups");
Expand All @@ -484,6 +495,20 @@ private void sortOverUtilized(Map<Source, Double> overUtilizedPercentage) {
);
}

private void limitOverUtilizedNum() {
Preconditions.checkState(overUtilized instanceof LinkedList,
"Collection overUtilized is not a LinkedList.");
LinkedList<Source> list = (LinkedList<Source>) overUtilized;

LOG.info("Limiting over-utilized nodes num, if using the '-sortTopNodes' param," +
" the overUtilized nodes of top will be retained");

int size = overUtilized.size();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just suggest to add another one separate function rather than located here which is sort something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix

for (int i = 0; i < size - limitOverUtilizedNum; i++) {
list.removeLast();
}
}

private static long computeMaxSize2Move(final long capacity, final long remaining,
final double utilizationDiff, final long max) {
final double diff = Math.abs(utilizationDiff);
Expand Down Expand Up @@ -1071,6 +1096,12 @@ static BalancerParameters parse(String[] args) {
b.setSortTopNodes(true);
LOG.info("Balancer will sort nodes by" +
" capacity usage percentage to prioritize top used nodes");
} else if ("-limitOverUtilizedNum".equalsIgnoreCase(args[i])) {
Preconditions.checkArgument(++i < args.length,
"limitOverUtilizedNum value is missing: args = " + Arrays.toString(args));
int limitNum = Integer.parseInt(args[i]);
LOG.info("Using a limitOverUtilizedNum of {}", limitNum);
b.setLimitOverUtilizedNum(limitNum);
} else {
throw new IllegalArgumentException("args = "
+ Arrays.toString(args));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ final class BalancerParameters {

private final boolean sortTopNodes;

private final int limitOverUtilizedNum;

static final BalancerParameters DEFAULT = new BalancerParameters();

private BalancerParameters() {
Expand All @@ -67,6 +69,7 @@ private BalancerParameters(Builder builder) {
this.runDuringUpgrade = builder.runDuringUpgrade;
this.runAsService = builder.runAsService;
this.sortTopNodes = builder.sortTopNodes;
this.limitOverUtilizedNum = builder.limitOverUtilizedNum;
this.hotBlockTimeInterval = builder.hotBlockTimeInterval;
}

Expand Down Expand Up @@ -110,6 +113,10 @@ boolean getSortTopNodes() {
return this.sortTopNodes;
}

int getLimitOverUtilizedNum() {
return this.limitOverUtilizedNum;
}

long getHotBlockTimeInterval() {
return this.hotBlockTimeInterval;
}
Expand All @@ -120,12 +127,12 @@ public String toString() {
+ " max idle iteration = %s," + " #excluded nodes = %s,"
+ " #included nodes = %s," + " #source nodes = %s,"
+ " #blockpools = %s," + " run during upgrade = %s,"
+ " sort top nodes = %s,"
+ " sort top nodes = %s," + " limit overUtilized nodes num = %s"
+ " hot block time interval = %s]",
Balancer.class.getSimpleName(), getClass().getSimpleName(), policy,
threshold, maxIdleIteration, excludedNodes.size(),
includedNodes.size(), sourceNodes.size(), blockpools.size(),
runDuringUpgrade, sortTopNodes, hotBlockTimeInterval);
runDuringUpgrade, sortTopNodes, limitOverUtilizedNum, hotBlockTimeInterval);
}

static class Builder {
Expand All @@ -141,6 +148,7 @@ static class Builder {
private boolean runDuringUpgrade = false;
private boolean runAsService = false;
private boolean sortTopNodes = false;
private int limitOverUtilizedNum = Integer.MAX_VALUE;
private long hotBlockTimeInterval = 0;

Builder() {
Expand Down Expand Up @@ -201,6 +209,11 @@ Builder setSortTopNodes(boolean shouldSortTopNodes) {
return this;
}

Builder setLimitOverUtilizedNum(int limitOverUtilizedNum) {
this.limitOverUtilizedNum = limitOverUtilizedNum;
return this;
}

BalancerParameters build() {
return new BalancerParameters(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ Usage:
[-runDuringUpgrade]
[-asService]
[-sortTopNodes]
[-limitOverUtilizedNum <specified maximum number of overUtilized datanodes>]
[-hotBlockTimeInterval <specified time interval>]

| COMMAND\_OPTION | Description |
Expand All @@ -307,6 +308,7 @@ Usage:
| `-runDuringUpgrade` | Whether to run the balancer during an ongoing HDFS upgrade. This is usually not desired since it will not affect used space on over-utilized machines. |
| `-asService` | Run Balancer as a long running service. |
| `-sortTopNodes` | Sort datanodes based on the utilization so that highly utilized datanodes get scheduled first. |
| `-limitOverUtilizedNum` | Limit the maximum number of overUtilized datanodes. |
| `-hotBlockTimeInterval` | Prefer moving cold blocks i.e blocks associated with files accessed or modified before the specified time interval. |
| `-h`\|`--help` | Display the tool usage and help information and exit. |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,98 @@ public void testBalancerWithSortTopNodes() throws Exception {
assertEquals(900, maxUsage);
}

@Test(timeout = 60000)
public void testBalancerWithLimitOverUtilizedNum() throws Exception {
final Configuration conf = new HdfsConfiguration();
// Init the config (block size to 100)
initConf(conf);
conf.setInt(DFS_HEARTBEAT_INTERVAL_KEY, 30000);

final long totalCapacity = 1000L;
final int diffBetweenNodes = 50;

// Set up the nodes with two groups:
// 5 over-utilized nodes with 80%, 85%, 90%, 95%, 100% usage
// 2 under-utilized nodes with 0%, 5% usage
// With sortTopNodes and limitOverUtilizedNum option, 100% used ones will be chosen
final int numOfOverUtilizedDn = 5;
final int numOfUnderUtilizedDn = 2;
final int totalNumOfDn = numOfOverUtilizedDn + numOfUnderUtilizedDn;
final long[] capacityArray = new long[totalNumOfDn];
Arrays.fill(capacityArray, totalCapacity);

try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(totalNumOfDn)
.simulatedCapacities(capacityArray)
.build()) {
cluster.setDataNodesDead();
List<DataNode> dataNodes = cluster.getDataNodes();
// Create top used nodes
for (int i = 0; i < numOfOverUtilizedDn; i++) {
// Bring one node alive
DataNodeTestUtils.triggerHeartbeat(dataNodes.get(i));
DataNodeTestUtils.triggerBlockReport(dataNodes.get(i));
// Create nodes with: 80%, 85%, 90%, 95%, 100%
int nodeCapacity = (int) totalCapacity - diffBetweenNodes * (numOfOverUtilizedDn - i - 1);
TestBalancer.createFile(cluster, new Path("test_big" + i), nodeCapacity, (short) 1, 0);
cluster.setDataNodesDead();
}

// Create under utilized nodes
for (int i = numOfUnderUtilizedDn - 1; i >= 0; i--) {
int index = i + numOfOverUtilizedDn;
// Bring one node alive
DataNodeTestUtils.triggerHeartbeat(dataNodes.get(index));
DataNodeTestUtils.triggerBlockReport(dataNodes.get(index));
// Create nodes with: 5%, 0%
int nodeCapacity = diffBetweenNodes * i;
TestBalancer.createFile(cluster, new Path("test_small" + i), nodeCapacity, (short) 1, 0);
cluster.setDataNodesDead();
}

// Bring all nodes alive
cluster.triggerHeartbeats();
cluster.triggerBlockReports();
cluster.waitFirstBRCompleted(0, 6000);

final BalancerParameters balancerParameters = Balancer.Cli.parse(new String[] {
"-policy", BalancingPolicy.Node.INSTANCE.getName(),
"-threshold", "1",
"-sortTopNodes",
"-limitOverUtilizedNum", "1"
});

client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0)
.getUri(), ClientProtocol.class)
.getProxy();

// Set max-size-to-move to small number
// so only top two nodes will be chosen in one iteration
conf.setLong(DFS_BALANCER_MAX_SIZE_TO_MOVE_KEY, 99L);
final Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
List<NameNodeConnector> connectors =
NameNodeConnector.newNameNodeConnectors(namenodes, Balancer.class.getSimpleName(),
Balancer.BALANCER_ID_PATH, conf, BalancerParameters.DEFAULT.getMaxIdleIteration());
final Balancer balancer = new Balancer(connectors.get(0), balancerParameters, conf);
Balancer.Result balancerResult = balancer.runOneIteration();

cluster.triggerDeletionReports();
cluster.triggerBlockReports();
cluster.triggerHeartbeats();

DatanodeInfo[] datanodeReport =
client.getDatanodeReport(HdfsConstants.DatanodeReportType.ALL);
long maxUsage = 0;
for (int i = 0; i < totalNumOfDn; i++) {
maxUsage = Math.max(maxUsage, datanodeReport[i].getDfsUsed());
}
// The maxUsage value is 950, only 100% of the nodes will be balanced
assertEquals(950, maxUsage);
assertTrue("BalancerResult is not as expected. " + balancerResult,
(balancerResult.getBytesAlreadyMoved() == 100 && balancerResult.getBlocksMoved() == 1));
}
}

@Test(timeout = 100000)
public void testMaxIterationTime() throws Exception {
final Configuration conf = new HdfsConfiguration();
Expand Down
Loading