|
@@ -1,7 +1,9 @@
|
|
|
|
|
+#[cfg(target_os = "linux")]
|
|
|
|
|
+use std::{io, thread, time::Duration};
|
|
|
use {
|
|
use {
|
|
|
crate::{
|
|
crate::{
|
|
|
admin_rpc_service,
|
|
admin_rpc_service,
|
|
|
- commands::{monitor, wait_for_restart_window, FromClapArgMatches, Result},
|
|
|
|
|
|
|
+ commands::{monitor, wait_for_restart_window, Error, FromClapArgMatches, Result},
|
|
|
},
|
|
},
|
|
|
clap::{value_t_or_exit, App, Arg, ArgMatches, SubCommand},
|
|
clap::{value_t_or_exit, App, Arg, ArgMatches, SubCommand},
|
|
|
solana_clap_utils::input_validators::{is_parsable, is_valid_percentage},
|
|
solana_clap_utils::input_validators::{is_parsable, is_valid_percentage},
|
|
@@ -13,10 +15,18 @@ const COMMAND: &str = "exit";
|
|
|
const DEFAULT_MIN_IDLE_TIME: &str = "10";
|
|
const DEFAULT_MIN_IDLE_TIME: &str = "10";
|
|
|
const DEFAULT_MAX_DELINQUENT_STAKE: &str = "5";
|
|
const DEFAULT_MAX_DELINQUENT_STAKE: &str = "5";
|
|
|
|
|
|
|
|
|
|
+#[derive(Clone, Debug, PartialEq)]
|
|
|
|
|
+pub enum PostExitAction {
|
|
|
|
|
+ // Run the agave-validator monitor command indefinitely
|
|
|
|
|
+ Monitor,
|
|
|
|
|
+ // Block until the exiting validator process has terminated
|
|
|
|
|
+ Wait,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
#[derive(Debug, PartialEq)]
|
|
#[derive(Debug, PartialEq)]
|
|
|
pub struct ExitArgs {
|
|
pub struct ExitArgs {
|
|
|
pub force: bool,
|
|
pub force: bool,
|
|
|
- pub monitor: bool,
|
|
|
|
|
|
|
+ pub post_exit_action: Option<PostExitAction>,
|
|
|
pub min_idle_time: usize,
|
|
pub min_idle_time: usize,
|
|
|
pub max_delinquent_stake: u8,
|
|
pub max_delinquent_stake: u8,
|
|
|
pub skip_new_snapshot_check: bool,
|
|
pub skip_new_snapshot_check: bool,
|
|
@@ -25,9 +35,17 @@ pub struct ExitArgs {
|
|
|
|
|
|
|
|
impl FromClapArgMatches for ExitArgs {
|
|
impl FromClapArgMatches for ExitArgs {
|
|
|
fn from_clap_arg_match(matches: &ArgMatches) -> Result<Self> {
|
|
fn from_clap_arg_match(matches: &ArgMatches) -> Result<Self> {
|
|
|
|
|
+ let post_exit_action = if matches.is_present("monitor") {
|
|
|
|
|
+ Some(PostExitAction::Monitor)
|
|
|
|
|
+ } else if matches.is_present("wait_for_exit") {
|
|
|
|
|
+ Some(PostExitAction::Wait)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ None
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
Ok(ExitArgs {
|
|
Ok(ExitArgs {
|
|
|
force: matches.is_present("force"),
|
|
force: matches.is_present("force"),
|
|
|
- monitor: matches.is_present("monitor"),
|
|
|
|
|
|
|
+ post_exit_action,
|
|
|
min_idle_time: value_t_or_exit!(matches, "min_idle_time", usize),
|
|
min_idle_time: value_t_or_exit!(matches, "min_idle_time", usize),
|
|
|
max_delinquent_stake: value_t_or_exit!(matches, "max_delinquent_stake", u8),
|
|
max_delinquent_stake: value_t_or_exit!(matches, "max_delinquent_stake", u8),
|
|
|
skip_new_snapshot_check: matches.is_present("skip_new_snapshot_check"),
|
|
skip_new_snapshot_check: matches.is_present("skip_new_snapshot_check"),
|
|
@@ -55,6 +73,12 @@ pub fn command<'a>() -> App<'a, 'a> {
|
|
|
.takes_value(false)
|
|
.takes_value(false)
|
|
|
.help("Monitor the validator after sending the exit request"),
|
|
.help("Monitor the validator after sending the exit request"),
|
|
|
)
|
|
)
|
|
|
|
|
+ .arg(
|
|
|
|
|
+ Arg::with_name("wait_for_exit")
|
|
|
|
|
+ .long("wait-for-exit")
|
|
|
|
|
+ .conflicts_with("monitor")
|
|
|
|
|
+ .help("Wait for the validator to terminate after sending the exit request"),
|
|
|
|
|
+ )
|
|
|
.arg(
|
|
.arg(
|
|
|
Arg::with_name("min_idle_time")
|
|
Arg::with_name("min_idle_time")
|
|
|
.long("min-idle-time")
|
|
.long("min-idle-time")
|
|
@@ -101,17 +125,99 @@ pub fn execute(matches: &ArgMatches, ledger_path: &Path) -> Result<()> {
|
|
|
)?;
|
|
)?;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- let admin_client = admin_rpc_service::connect(ledger_path);
|
|
|
|
|
- admin_rpc_service::runtime().block_on(async move { admin_client.await?.exit().await })?;
|
|
|
|
|
|
|
+ // Grab the pid from the process before initiating exit as the running
|
|
|
|
|
+ // validator will be unable to respond after exit has returned.
|
|
|
|
|
+ //
|
|
|
|
|
+ // Additionally, only check the pid() RPC call result if it will be used.
|
|
|
|
|
+ // In an upgrade scenario, it is possible that a binary that calls pid()
|
|
|
|
|
+ // will be initating exit against a process that doesn't support pid().
|
|
|
|
|
+ // Since PostExitAction::Wait case is opt-in (via --wait-for-exit), the
|
|
|
|
|
+ // result is checked ONLY in that case to provide a friendlier upgrade
|
|
|
|
|
+ // path for users who are NOT using --wait-for-exit
|
|
|
|
|
+ const WAIT_FOR_EXIT_UNSUPPORTED_ERROR: &str =
|
|
|
|
|
+ "remote process exit cannot be waited on. `--wait-for-exit` is not supported by the remote process";
|
|
|
|
|
+ let post_exit_action = exit_args.post_exit_action.clone();
|
|
|
|
|
+ let validator_pid = admin_rpc_service::runtime().block_on(async move {
|
|
|
|
|
+ let admin_client = admin_rpc_service::connect(ledger_path).await?;
|
|
|
|
|
+ let validator_pid = match post_exit_action {
|
|
|
|
|
+ Some(PostExitAction::Wait) => admin_client
|
|
|
|
|
+ .pid()
|
|
|
|
|
+ .await
|
|
|
|
|
+ .map_err(|_err| Error::Dynamic(WAIT_FOR_EXIT_UNSUPPORTED_ERROR.into()))?,
|
|
|
|
|
+ _ => 0,
|
|
|
|
|
+ };
|
|
|
|
|
+ admin_client.exit().await?;
|
|
|
|
|
+
|
|
|
|
|
+ Ok::<u32, Error>(validator_pid)
|
|
|
|
|
+ })?;
|
|
|
|
|
+
|
|
|
println!("Exit request sent");
|
|
println!("Exit request sent");
|
|
|
|
|
|
|
|
- if exit_args.monitor {
|
|
|
|
|
- monitor::execute(matches, ledger_path)?;
|
|
|
|
|
|
|
+ match exit_args.post_exit_action {
|
|
|
|
|
+ None => Ok(()),
|
|
|
|
|
+ Some(PostExitAction::Monitor) => monitor::execute(matches, ledger_path),
|
|
|
|
|
+ Some(PostExitAction::Wait) => poll_until_pid_terminates(validator_pid),
|
|
|
|
|
+ }?;
|
|
|
|
|
+
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+#[cfg(target_os = "linux")]
|
|
|
|
|
+fn poll_until_pid_terminates(pid: u32) -> Result<()> {
|
|
|
|
|
+ let pid = i32::try_from(pid)?;
|
|
|
|
|
+
|
|
|
|
|
+ println!("Waiting for agave-validator process {pid} to terminate");
|
|
|
|
|
+ loop {
|
|
|
|
|
+ // From man kill(2)
|
|
|
|
|
+ //
|
|
|
|
|
+ // If sig is 0, then no signal is sent, but existence and permission
|
|
|
|
|
+ // checks are still performed; this can be used to check for the
|
|
|
|
|
+ // existence of a process ID or process group ID that the caller is
|
|
|
|
|
+ // permitted to signal.
|
|
|
|
|
+ let result = unsafe {
|
|
|
|
|
+ libc::kill(pid, /*sig:*/ 0)
|
|
|
|
|
+ };
|
|
|
|
|
+ if result >= 0 {
|
|
|
|
|
+ // Give the process some time to exit before checking again
|
|
|
|
|
+ thread::sleep(Duration::from_millis(500));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ let errno = io::Error::last_os_error()
|
|
|
|
|
+ .raw_os_error()
|
|
|
|
|
+ .ok_or(Error::Dynamic("unable to read raw os error".into()))?;
|
|
|
|
|
+ match errno {
|
|
|
|
|
+ libc::ESRCH => {
|
|
|
|
|
+ println!("Done, agave-validator process {pid} has terminated");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ libc::EINVAL => {
|
|
|
|
|
+ // An invalid signal was specified, we only pass sig=0 so
|
|
|
|
|
+ // this should not be possible
|
|
|
|
|
+ Err(Error::Dynamic(
|
|
|
|
|
+ format!("unexpected invalid signal error for kill({pid}, 0)").into(),
|
|
|
|
|
+ ))?;
|
|
|
|
|
+ }
|
|
|
|
|
+ libc::EPERM => {
|
|
|
|
|
+ Err(io::Error::from(io::ErrorKind::PermissionDenied))?;
|
|
|
|
|
+ }
|
|
|
|
|
+ unknown => {
|
|
|
|
|
+ Err(Error::Dynamic(
|
|
|
|
|
+ format!("unexpected errno for kill({pid}, 0): {unknown}").into(),
|
|
|
|
|
+ ))?;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+#[cfg(not(target_os = "linux"))]
|
|
|
|
|
+fn poll_until_pid_terminates(_pid: u32) -> Result<()> {
|
|
|
|
|
+ Err(Error::Dynamic(
|
|
|
|
|
+ "Unable to wait for agave-validator process termination on this platform".into(),
|
|
|
|
|
+ ))
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
#[cfg(test)]
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
mod tests {
|
|
|
use {super::*, crate::commands::tests::verify_args_struct_by_command};
|
|
use {super::*, crate::commands::tests::verify_args_struct_by_command};
|
|
@@ -126,7 +232,7 @@ mod tests {
|
|
|
.parse()
|
|
.parse()
|
|
|
.expect("invalid DEFAULT_MAX_DELINQUENT_STAKE"),
|
|
.expect("invalid DEFAULT_MAX_DELINQUENT_STAKE"),
|
|
|
force: false,
|
|
force: false,
|
|
|
- monitor: false,
|
|
|
|
|
|
|
+ post_exit_action: None,
|
|
|
skip_new_snapshot_check: false,
|
|
skip_new_snapshot_check: false,
|
|
|
skip_health_check: false,
|
|
skip_health_check: false,
|
|
|
}
|
|
}
|
|
@@ -151,12 +257,21 @@ mod tests {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
#[test]
|
|
|
- fn verify_args_struct_by_command_exit_with_monitor() {
|
|
|
|
|
|
|
+ fn verify_args_struct_by_command_exit_with_post_exit_action() {
|
|
|
verify_args_struct_by_command(
|
|
verify_args_struct_by_command(
|
|
|
command(),
|
|
command(),
|
|
|
vec![COMMAND, "--monitor"],
|
|
vec![COMMAND, "--monitor"],
|
|
|
ExitArgs {
|
|
ExitArgs {
|
|
|
- monitor: true,
|
|
|
|
|
|
|
+ post_exit_action: Some(PostExitAction::Monitor),
|
|
|
|
|
+ ..ExitArgs::default()
|
|
|
|
|
+ },
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ verify_args_struct_by_command(
|
|
|
|
|
+ command(),
|
|
|
|
|
+ vec![COMMAND, "--wait-for-exit"],
|
|
|
|
|
+ ExitArgs {
|
|
|
|
|
+ post_exit_action: Some(PostExitAction::Wait),
|
|
|
..ExitArgs::default()
|
|
..ExitArgs::default()
|
|
|
},
|
|
},
|
|
|
);
|
|
);
|