Skip to content

Commit

Permalink
Remove sporadic min/max usage estimates from stats (#59755)
Browse files Browse the repository at this point in the history
Today `GET _nodes/stats/fs` includes `{least,most}_usage_estimate`
fields for some nodes. These fields have rather strange semantics. They
are only reported on the elected master and on nodes that have been the
elected master since they were last restarted; when a node stops being
the elected master these stats remain in place but we stop updating them
so they may become arbitrarily stale.

This means that these statistics are pretty meaningless and impossible
to use correctly. Even if they were kept up to date they're never
reported for data-only nodes anyway, despite the fact that data nodes
are the ones where we care most about disk usage. The information needed
to compute the path with the least/most available space is already
provided in the rest the stats output, so we can treat the inclusion of
these stats as a bug and fix it by simply removing them in this commit.
Since these stats were always optional and mostly omitted (for opaque
reasons) this is not considered a breaking change.
  • Loading branch information
DaveCTurner authored Jul 20, 2020
1 parent 3615c42 commit 7bb748d
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 107 deletions.
14 changes: 0 additions & 14 deletions docs/reference/cluster/nodes-stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1687,17 +1687,6 @@ less than `free_in_bytes`. This is the actual amount of free disk
space the {es} node can utilise.
=======
`least_usage_estimate`::
(object)
Contains statistics for the file store with the least estimated usage. See
<<cluster-nodes-stats-fs-data,`fs.data`>> for a list of child parameters.
`most_usage_estimate`::
(object)
Contains statistics for the file store with the most estimated usage. See
<<cluster-nodes-stats-fs-data,`fs.data`>> for a list of child parameters.
[[cluster-nodes-stats-fs-data]]
`data`::
(array of objects)
Expand All @@ -1713,9 +1702,6 @@ Path to the file store.
`mount`::
(string)
Mount point of the file store (ex: /dev/sda2).
+
NOTE: This parameter is not provided for the `least_usage_estimate` or
`most_usage_estimate` file stores.

`type`::
(string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.elasticsearch.monitor.os.OsService;
import org.elasticsearch.monitor.process.ProcessService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.cluster.ClusterInfoService;

import java.io.IOException;

Expand All @@ -40,13 +39,12 @@ public class MonitorService extends AbstractLifecycleComponent {
private final JvmService jvmService;
private final FsService fsService;

public MonitorService(Settings settings, NodeEnvironment nodeEnvironment, ThreadPool threadPool,
ClusterInfoService clusterInfoService) throws IOException {
public MonitorService(Settings settings, NodeEnvironment nodeEnvironment, ThreadPool threadPool) throws IOException {
this.jvmGcMonitorService = new JvmGcMonitorService(settings, threadPool);
this.osService = new OsService(settings);
this.processService = new ProcessService(settings);
this.jvmService = new JvmService(settings);
this.fsService = new FsService(settings, nodeEnvironment, clusterInfoService);
this.fsService = new FsService(settings, nodeEnvironment);
}

public OsService osService() {
Expand Down
72 changes: 9 additions & 63 deletions server/src/main/java/org/elasticsearch/monitor/fs/FsInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.monitor.fs;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.DiskUsage;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamInput;
Expand Down Expand Up @@ -119,19 +120,6 @@ private long addLong(long current, long other) {
return current + other;
}

private double addDouble(double current, double other) {
if (current == -1 && other == -1) {
return 0;
}
if (other == -1) {
return current;
}
if (current == -1) {
return other;
}
return current + other;
}

public void add(Path path) {
total = FsProbe.adjustForHugeFilesystems(addLong(total, path.total));
free = FsProbe.adjustForHugeFilesystems(addLong(free, path.free));
Expand Down Expand Up @@ -428,20 +416,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
private final Path[] paths;
private final IoStats ioStats;
private final Path total;
private final DiskUsage leastDiskEstimate;
private final DiskUsage mostDiskEstimate;

public FsInfo(long timestamp, IoStats ioStats, Path[] paths) {
this(timestamp, ioStats, paths, null, null);
}

public FsInfo(long timestamp, IoStats ioStats, Path[] paths, @Nullable DiskUsage leastUsage, @Nullable DiskUsage mostUsage) {
this.timestamp = timestamp;
this.ioStats = ioStats;
this.paths = paths;
this.total = total();
this.leastDiskEstimate = leastUsage;
this.mostDiskEstimate = mostUsage;
}

/**
Expand All @@ -455,8 +435,10 @@ public FsInfo(StreamInput in) throws IOException {
paths[i] = new Path(in);
}
this.total = total();
this.leastDiskEstimate = in.readOptionalWriteable(DiskUsage::new);
this.mostDiskEstimate = in.readOptionalWriteable(DiskUsage::new);
if (in.getVersion().before(Version.V_8_0_0)) {
in.readOptionalWriteable(DiskUsage::new); // previously leastDiskEstimate
in.readOptionalWriteable(DiskUsage::new); // previously mostDiskEstimate
}
}

@Override
Expand All @@ -467,24 +449,16 @@ public void writeTo(StreamOutput out) throws IOException {
for (Path path : paths) {
path.writeTo(out);
}
out.writeOptionalWriteable(this.leastDiskEstimate);
out.writeOptionalWriteable(this.mostDiskEstimate);
if (out.getVersion().before(Version.V_8_0_0)) {
out.writeOptionalWriteable(null); // previously leastDiskEstimate
out.writeOptionalWriteable(null); // previously mostDiskEstimate
}
}

public Path getTotal() {
return total;
}

@Nullable
public DiskUsage getLeastDiskEstimate() {
return this.leastDiskEstimate;
}

@Nullable
public DiskUsage getMostDiskEstimate() {
return this.mostDiskEstimate;
}

private Path total() {
Path res = new Path();
Set<String> seenDevices = new HashSet<>(paths.length);
Expand Down Expand Up @@ -518,28 +492,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(Fields.TIMESTAMP, timestamp);
builder.field(Fields.TOTAL);
total().toXContent(builder, params);
if (leastDiskEstimate != null) {
builder.startObject(Fields.LEAST_ESTIMATE);
{
builder.field(Fields.PATH, leastDiskEstimate.getPath());
builder.humanReadableField(Fields.TOTAL_IN_BYTES, Fields.TOTAL, new ByteSizeValue(leastDiskEstimate.getTotalBytes()));
builder.humanReadableField(Fields.AVAILABLE_IN_BYTES, Fields.AVAILABLE,
new ByteSizeValue(leastDiskEstimate.getFreeBytes()));
builder.field(Fields.USAGE_PERCENTAGE, leastDiskEstimate.getUsedDiskAsPercentage());
}
builder.endObject();
}

if (mostDiskEstimate != null) {
builder.startObject(Fields.MOST_ESTIMATE);
{
builder.field(Fields.PATH, mostDiskEstimate.getPath());
builder.humanReadableField(Fields.TOTAL_IN_BYTES, Fields.TOTAL, new ByteSizeValue(mostDiskEstimate.getTotalBytes()));
builder.humanReadableField(Fields.AVAILABLE_IN_BYTES, Fields.AVAILABLE, new ByteSizeValue(mostDiskEstimate.getFreeBytes()));
builder.field(Fields.USAGE_PERCENTAGE, mostDiskEstimate.getUsedDiskAsPercentage());
}
builder.endObject();
}
builder.startArray(Fields.DATA);
for (Path path : paths) {
path.toXContent(builder, params);
Expand All @@ -559,13 +512,6 @@ static final class Fields {
static final String TIMESTAMP = "timestamp";
static final String DATA = "data";
static final String TOTAL = "total";
static final String TOTAL_IN_BYTES = "total_in_bytes";
static final String IO_STATS = "io_stats";
static final String LEAST_ESTIMATE = "least_usage_estimate";
static final String MOST_ESTIMATE = "most_usage_estimate";
static final String USAGE_PERCENTAGE = "used_disk_percent";
static final String AVAILABLE = "available";
static final String AVAILABLE_IN_BYTES = "available_in_bytes";
static final String PATH = "path";
}
}
13 changes: 2 additions & 11 deletions server/src/main/java/org/elasticsearch/monitor/fs/FsProbe.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.util.Constants;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.cluster.DiskUsage;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.io.PathUtils;
Expand All @@ -51,7 +48,7 @@ public FsProbe(NodeEnvironment nodeEnv) {
this.nodeEnv = nodeEnv;
}

public FsInfo stats(FsInfo previous, @Nullable ClusterInfo clusterInfo) throws IOException {
public FsInfo stats(FsInfo previous) throws IOException {
if (!nodeEnv.hasNodeFile()) {
return new FsInfo(System.currentTimeMillis(), null, new FsInfo.Path[0]);
}
Expand All @@ -70,13 +67,7 @@ public FsInfo stats(FsInfo previous, @Nullable ClusterInfo clusterInfo) throws I
}
ioStats = ioStats(devicesNumbers, previous);
}
DiskUsage leastDiskEstimate = null;
DiskUsage mostDiskEstimate = null;
if (clusterInfo != null) {
leastDiskEstimate = clusterInfo.getNodeLeastAvailableDiskUsages().get(nodeEnv.nodeId());
mostDiskEstimate = clusterInfo.getNodeMostAvailableDiskUsages().get(nodeEnv.nodeId());
}
return new FsInfo(System.currentTimeMillis(), ioStats, paths, leastDiskEstimate, mostDiskEstimate);
return new FsInfo(System.currentTimeMillis(), ioStats, paths);
}

final FsInfo.IoStats ioStats(final Set<Tuple<Integer, Integer>> devicesNumbers, final FsInfo previous) {
Expand Down
20 changes: 7 additions & 13 deletions server/src/main/java/org/elasticsearch/monitor/fs/FsService.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,12 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.SingleObjectCache;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.cluster.ClusterInfoService;

import java.io.IOException;

Expand All @@ -38,9 +35,7 @@ public class FsService {
private static final Logger logger = LogManager.getLogger(FsService.class);

private final FsProbe probe;
private final TimeValue refreshInterval;
private final SingleObjectCache<FsInfo> cache;
private final ClusterInfoService clusterInfoService;

public static final Setting<TimeValue> REFRESH_INTERVAL_SETTING =
Setting.timeSetting(
Expand All @@ -49,23 +44,22 @@ public class FsService {
TimeValue.timeValueSeconds(1),
Property.NodeScope);

public FsService(final Settings settings, final NodeEnvironment nodeEnvironment, ClusterInfoService clusterInfoService) {
public FsService(final Settings settings, final NodeEnvironment nodeEnvironment) {
this.probe = new FsProbe(nodeEnvironment);
this.clusterInfoService = clusterInfoService;
refreshInterval = REFRESH_INTERVAL_SETTING.get(settings);
final TimeValue refreshInterval = REFRESH_INTERVAL_SETTING.get(settings);
logger.debug("using refresh_interval [{}]", refreshInterval);
cache = new FsInfoCache(refreshInterval, stats(probe, null, logger, null));
cache = new FsInfoCache(refreshInterval, stats(probe, null));
}

public FsInfo stats() {
return cache.getOrRefresh();
}

private static FsInfo stats(FsProbe probe, FsInfo initialValue, Logger logger, @Nullable ClusterInfo clusterInfo) {
private static FsInfo stats(FsProbe probe, FsInfo initialValue) {
try {
return probe.stats(initialValue, clusterInfo);
return probe.stats(initialValue);
} catch (IOException e) {
logger.debug("unexpected exception reading filesystem info", e);
FsService.logger.debug("unexpected exception reading filesystem info", e);
return null;
}
}
Expand All @@ -81,7 +75,7 @@ private class FsInfoCache extends SingleObjectCache<FsInfo> {

@Override
protected FsInfo refresh() {
return stats(probe, initialValue, logger, clusterInfoService.getClusterInfo());
return stats(probe, initialValue);
}

}
Expand Down
2 changes: 1 addition & 1 deletion server/src/main/java/org/elasticsearch/node/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ protected Node(final Environment initialEnvironment,
final UsageService usageService = new UsageService();

ModulesBuilder modules = new ModulesBuilder();
final MonitorService monitorService = new MonitorService(settings, nodeEnvironment, threadPool, clusterInfoService);
final MonitorService monitorService = new MonitorService(settings, nodeEnvironment, threadPool);
final FsHealthService fsHealthService = new FsHealthService(settings, clusterService.getClusterSettings(), threadPool,
nodeEnvironment);
ClusterModule clusterModule = new ClusterModule(settings, clusterService, clusterPlugins, clusterInfoService);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void testFsInfo() throws IOException {
try (NodeEnvironment env = newNodeEnvironment()) {
FsProbe probe = new FsProbe(env);

FsInfo stats = probe.stats(null, null);
FsInfo stats = probe.stats(null);
assertNotNull(stats);
assertThat(stats.getTimestamp(), greaterThan(0L));

Expand Down

0 comments on commit 7bb748d

Please sign in to comment.