Skip to content

Commit

Permalink
service: storage monitor added (paritytech#13082)
Browse files Browse the repository at this point in the history
* service: storage monitor added

Storage monitor added. It uses `notify` create to get notifications
about any changes to monitored path (which is database path).
Notifications are consumed in essential task which terminates when
available storage space drops below given threshold.

Closes: paritytech#12399

* Cargo.lock updated

* misspell

* fs events throttling added

* minor updates

* filter out non mutating events

* misspell

* ".git/.scripts/commands/fmt/fmt.sh"

* Update client/service/src/storage_monitor.rs

Co-authored-by: Anton <[email protected]>

* storage-monitor crate added

* cleanup: configuration + service builder

* storage_monitor in custom service (wip)

* copy-paste bad desc fixed

* notify removed

* storage_monitor added to node

* fix for clippy

* publish = false

* Update bin/node/cli/src/command.rs

Co-authored-by: Dmitry Markin <[email protected]>

* Apply suggestions from code review

Co-authored-by: Bastian Köcher <[email protected]>

* crate name: storage-monitor -> sc-storage-monitor

* error handling improved

* Apply suggestions from code review

Co-authored-by: Bastian Köcher <[email protected]>

* publish=false removed

Co-authored-by: command-bot <>
Co-authored-by: Anton <[email protected]>
Co-authored-by: Dmitry Markin <[email protected]>
Co-authored-by: Bastian Köcher <[email protected]>
  • Loading branch information
4 people authored and ltfschoen committed Feb 22, 2023
1 parent de47a0b commit 0d67075
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 10 deletions.
31 changes: 31 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ members = [
"client/service",
"client/service/test",
"client/state-db",
"client/storage-monitor",
"client/sysinfo",
"client/sync-state-rpc",
"client/telemetry",
Expand Down
2 changes: 2 additions & 0 deletions bin/node/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ sc-executor = { version = "0.10.0-dev", path = "../../../client/executor" }
sc-authority-discovery = { version = "0.10.0-dev", path = "../../../client/authority-discovery" }
sc-sync-state-rpc = { version = "0.10.0-dev", path = "../../../client/sync-state-rpc" }
sc-sysinfo = { version = "6.0.0-dev", path = "../../../client/sysinfo" }
sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" }

# frame dependencies
frame-system = { version = "4.0.0-dev", path = "../../../frame/system" }
Expand Down Expand Up @@ -138,6 +139,7 @@ substrate-frame-cli = { version = "4.0.0-dev", optional = true, path = "../../..
try-runtime-cli = { version = "0.10.0-dev", optional = true, path = "../../../utils/frame/try-runtime/cli" }
sc-cli = { version = "0.10.0-dev", path = "../../../client/cli", optional = true }
pallet-balances = { version = "4.0.0-dev", path = "../../../frame/balances" }
sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" }

[features]
default = ["cli"]
Expand Down
4 changes: 4 additions & 0 deletions bin/node/cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ pub struct Cli {
/// telemetry, if telemetry is enabled.
#[arg(long)]
pub no_hardware_benchmarks: bool,

#[allow(missing_docs)]
#[clap(flatten)]
pub storage_monitor: sc_storage_monitor::StorageMonitorParams,
}

/// Possible subcommands of the main binary.
Expand Down
3 changes: 1 addition & 2 deletions bin/node/cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ pub fn run() -> Result<()> {
None => {
let runner = cli.create_runner(&cli.run)?;
runner.run_node_until_exit(|config| async move {
service::new_full(config, cli.no_hardware_benchmarks)
.map_err(sc_cli::Error::Service)
service::new_full(config, cli).map_err(sc_cli::Error::Service)
})
},
Some(Subcommand::Inspect(cmd)) => {
Expand Down
19 changes: 13 additions & 6 deletions bin/node/cli/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

//! Service implementation. Specialized wrapper over substrate service.

use crate::Cli;
use codec::Encode;
use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE;
use frame_system_rpc_runtime_api::AccountNonceApi;
Expand Down Expand Up @@ -556,12 +557,18 @@ pub fn new_full_base(
}

/// Builds a new service for a full client.
pub fn new_full(
config: Configuration,
disable_hardware_benchmarks: bool,
) -> Result<TaskManager, ServiceError> {
new_full_base(config, disable_hardware_benchmarks, |_, _| ())
.map(|NewFullBase { task_manager, .. }| task_manager)
pub fn new_full(config: Configuration, cli: Cli) -> Result<TaskManager, ServiceError> {
let database_source = config.database.clone();
let task_manager = new_full_base(config, cli.no_hardware_benchmarks, |_, _| ())
.map(|NewFullBase { task_manager, .. }| task_manager)?;

sc_storage_monitor::StorageMonitorService::try_spawn(
cli.storage_monitor,
database_source,
&task_manager.spawn_essential_handle(),
)?;

Ok(task_manager)
}

#[cfg(test)]
Expand Down
4 changes: 2 additions & 2 deletions client/cli/src/params/database_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use crate::arg_enums::Database;
use clap::Args;

/// Parameters for block import.
/// Parameters for database
#[derive(Debug, Clone, PartialEq, Args)]
pub struct DatabaseParams {
/// Select database backend to use.
Expand All @@ -32,7 +32,7 @@ pub struct DatabaseParams {
}

impl DatabaseParams {
/// Limit the memory the database cache can use.
/// Database backend
pub fn database(&self) -> Option<Database> {
self.database
}
Expand Down
1 change: 1 addition & 0 deletions client/service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ sc-offchain = { version = "4.0.0-dev", path = "../offchain" }
prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.10.0-dev" }
sc-tracing = { version = "4.0.0-dev", path = "../tracing" }
sc-sysinfo = { version = "6.0.0-dev", path = "../sysinfo" }
sc-storage-monitor = { version = "0.1.0", path = "../storage-monitor" }
tracing = "0.1.29"
tracing-futures = { version = "0.2.4" }
async-trait = "0.1.57"
Expand Down
3 changes: 3 additions & 0 deletions client/service/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ pub enum Error {
#[error(transparent)]
Telemetry(#[from] sc_telemetry::Error),

#[error(transparent)]
Storage(#[from] sc_storage_monitor::Error),

#[error("Best chain selection strategy (SelectChain) is not provided.")]
SelectChainRequired,

Expand Down
20 changes: 20 additions & 0 deletions client/storage-monitor/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[package]
name = "sc-storage-monitor"
version = "0.1.0"
authors = ["Parity Technologies <[email protected]>"]
edition = "2021"
license = "GPL-3.0-or-later WITH Classpath-exception-2.0"
repository = "https:/paritytech/substrate"
description = "Storage monitor service for substrate"
homepage = "https://substrate.io"

[dependencies]
clap = { version = "4.0.9", features = ["derive", "string"] }
futures = "0.3.21"
log = "0.4.17"
nix = { version = "0.26.1", features = ["fs"] }
sc-client-db = { version = "0.10.0-dev", default-features = false, path = "../db" }
sc-utils = { version = "4.0.0-dev", path = "../utils" }
sp-core = { version = "7.0.0", path = "../../primitives/core" }
tokio = "1.22.0"
thiserror = "1.0.30"
149 changes: 149 additions & 0 deletions client/storage-monitor/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// This file is part of Substrate.

// Copyright (C) 2022 Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use clap::Args;
use nix::{errno::Errno, sys::statvfs::statvfs};
use sc_client_db::DatabaseSource;
use sp_core::traits::SpawnEssentialNamed;
use std::{
path::{Path, PathBuf},
time::Duration,
};

const LOG_TARGET: &str = "storage-monitor";

/// Error type used in this crate.
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("IO Error")]
IOError(#[from] Errno),
#[error("Out of storage space: available {0}MB, required {1}MB")]
StorageOutOfSpace(u64, u64),
}

/// Parameters used to create the storage monitor.
#[derive(Default, Debug, Clone, Args)]
pub struct StorageMonitorParams {
/// Required available space on database storage. If available space for DB storage drops below
/// the given threshold, node will be gracefully terminated. If `0` is given monitoring will be
/// disabled.
#[arg(long = "db-storage-threshold", value_name = "MB", default_value_t = 1000)]
pub threshold: u64,

/// How often available space is polled.
#[arg(long = "db-storage-polling-period", value_name = "SECONDS", default_value_t = 5, value_parser = clap::value_parser!(u32).range(1..))]
pub polling_period: u32,
}

/// Storage monitor service: checks the available space for the filesystem for fiven path.
pub struct StorageMonitorService {
/// watched path
path: PathBuf,
/// number of megabytes that shall be free on the filesystem for watched path
threshold: u64,
/// storage space polling period (seconds)
polling_period: u32,
}

impl StorageMonitorService {
/// Creates new StorageMonitorService for given client config
pub fn try_spawn(
parameters: StorageMonitorParams,
database: DatabaseSource,
spawner: &impl SpawnEssentialNamed,
) -> Result<(), Error> {
Ok(match (parameters.threshold, database.path()) {
(0, _) => {
log::info!(
target: LOG_TARGET,
"StorageMonitorService: threshold `0` given, storage monitoring disabled",
);
},
(_, None) => {
log::warn!(
target: LOG_TARGET,
"StorageMonitorService: no database path to observe",
);
},
(threshold, Some(path)) => {
log::debug!(
target: LOG_TARGET,
"Initializing StorageMonitorService for db path: {:?}",
path,
);

Self::check_free_space(&path, threshold)?;

let storage_monitor_service = StorageMonitorService {
path: path.to_path_buf(),
threshold,
polling_period: parameters.polling_period,
};

spawner.spawn_essential(
"storage-monitor",
None,
Box::pin(storage_monitor_service.run()),
);
},
})
}

/// Main monitoring loop, intended to be spawned as essential task. Quits if free space drop
/// below threshold.
async fn run(self) {
loop {
tokio::time::sleep(Duration::from_secs(self.polling_period.into())).await;
if Self::check_free_space(&self.path, self.threshold).is_err() {
break
};
}
}

/// Returns free space in MB, or error if statvfs failed.
fn free_space(path: &Path) -> Result<u64, Error> {
statvfs(path)
.map(|stats| stats.blocks_available() * stats.block_size() / 1_000_000)
.map_err(Error::from)
}

/// Checks if the amount of free space for given `path` is above given `threshold`.
/// If it dropped below, error is returned.
/// System errors are silently ignored.
fn check_free_space(path: &Path, threshold: u64) -> Result<(), Error> {
match StorageMonitorService::free_space(path) {
Ok(available_space) => {
log::trace!(
target: LOG_TARGET,
"free: {available_space} , threshold: {threshold}.",
);

if available_space < threshold {
log::error!(target: LOG_TARGET, "Available space {available_space}MB for path `{}` dropped below threshold: {threshold}MB , terminating...", path.display());
Err(Error::StorageOutOfSpace(available_space, threshold))
} else {
Ok(())
}
},
Err(e) => {
log::error!(target: LOG_TARGET, "Could not read available space: {:?}.", e);
Err(e)
},
}
}
}

0 comments on commit 0d67075

Please sign in to comment.