Преглед на файлове

Move nofile ulimit check from Blockstore to Validator (#8405)

While Blockstore (rocksdb) does need open files, AccountsDB needs far
more files and this limit is more of a system parameter than a
Blockstore subsystem parameters. So, move the check to Validator::new();
this will also allow us to fail faster/earlier in agave-validator

Also add the check into ledger-tool since it previously had the check
via Blockstore::open()
steviez преди 1 месец
родител
ревизия
d72eb072de

+ 1 - 1
Cargo.lock

@@ -8103,6 +8103,7 @@ dependencies = [
  "futures 0.3.31",
  "histogram",
  "itertools 0.12.1",
+ "libc",
  "log",
  "lru",
  "min-max-heap",
@@ -9058,7 +9059,6 @@ dependencies = [
  "futures 0.3.31",
  "itertools 0.12.1",
  "lazy-lru",
- "libc",
  "log",
  "lru",
  "mockall",

+ 1 - 0
core/Cargo.toml

@@ -67,6 +67,7 @@ derive_more = { workspace = true }
 futures = { workspace = true }
 histogram = { workspace = true }
 itertools = { workspace = true }
+libc = { workspace = true }
 log = { workspace = true }
 lru = { workspace = true }
 min-max-heap = { workspace = true }

+ 1 - 0
core/src/lib.rs

@@ -27,6 +27,7 @@ pub mod next_leader;
 pub mod optimistic_confirmation_verifier;
 pub mod repair;
 pub mod replay_stage;
+pub mod resource_limits;
 mod result;
 pub mod sample_performance_service;
 mod shred_fetch_stage;

+ 71 - 0
core/src/resource_limits.rs

@@ -0,0 +1,71 @@
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum ResourceLimitError {
+    #[error(
+        "unable to increase the nofile limit to {desired} from {current}; setrlimit() error: \
+         {error}"
+    )]
+    Nofile {
+        desired: u64,
+        current: u64,
+        error: libc::c_int,
+    },
+}
+
+#[cfg(not(unix))]
+pub fn adjust_nofile_limit(_enforce_nofile_limit: bool) -> Result<(), ResourceLimitError> {
+    Ok(())
+}
+
+#[cfg(unix)]
+pub fn adjust_nofile_limit(enforce_nofile_limit: bool) -> Result<(), ResourceLimitError> {
+    // AccountsDB and RocksDB both may have many files open so bump the limit
+    // to ensure each database will be able to function properly
+    //
+    // This should be kept in sync with published validator instructions:
+    // https://docs.anza.xyz/operations/guides/validator-start#system-tuning
+    let desired_nofile = 1_000_000;
+
+    fn get_nofile() -> libc::rlimit {
+        let mut nofile = libc::rlimit {
+            rlim_cur: 0,
+            rlim_max: 0,
+        };
+        if unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &mut nofile) } != 0 {
+            warn!("getrlimit(RLIMIT_NOFILE) failed");
+        }
+        nofile
+    }
+
+    let mut nofile = get_nofile();
+    let current = nofile.rlim_cur;
+    if current < desired_nofile {
+        nofile.rlim_cur = desired_nofile;
+        let return_value = unsafe { libc::setrlimit(libc::RLIMIT_NOFILE, &nofile) };
+        if return_value != 0 {
+            let error = ResourceLimitError::Nofile {
+                desired: desired_nofile,
+                current,
+                error: return_value,
+            };
+
+            if cfg!(target_os = "macos") {
+                error!(
+                    "{error}. On macOS you may need to run |sudo launchctl limit maxfiles \
+                     {desired_nofile} {desired_nofile}| first",
+                );
+            } else {
+                error!("{error}");
+            };
+
+            if enforce_nofile_limit {
+                return Err(error);
+            }
+        }
+
+        nofile = get_nofile();
+    }
+    info!("Maximum open file descriptors: {}", nofile.rlim_cur);
+    Ok(())
+}

+ 9 - 0
core/src/validator.rs

@@ -21,6 +21,7 @@ use {
             repair_handler::RepairHandlerType,
             serve_repair_service::ServeRepairService,
         },
+        resource_limits::{adjust_nofile_limit, ResourceLimitError},
         sample_performance_service::SamplePerformanceService,
         sigverify,
         snapshot_packager_service::SnapshotPackagerService,
@@ -340,6 +341,7 @@ pub struct ValidatorConfig {
     pub no_os_network_stats_reporting: bool,
     pub no_os_cpu_stats_reporting: bool,
     pub no_os_disk_stats_reporting: bool,
+    pub enforce_ulimit_nofile: bool,
     pub poh_pinned_cpu_core: usize,
     pub poh_hashes_per_batch: u64,
     pub process_ledger_before_services: bool,
@@ -419,6 +421,8 @@ impl ValidatorConfig {
             no_os_network_stats_reporting: true,
             no_os_cpu_stats_reporting: true,
             no_os_disk_stats_reporting: true,
+            // No need to enforce nofile limit in tests
+            enforce_ulimit_nofile: false,
             poh_pinned_cpu_core: poh_service::DEFAULT_PINNED_CPU_CORE,
             poh_hashes_per_batch: poh_service::DEFAULT_HASHES_PER_BATCH,
             process_ledger_before_services: false,
@@ -671,6 +675,8 @@ impl Validator {
 
         let start_time = Instant::now();
 
+        adjust_nofile_limit(config.enforce_ulimit_nofile)?;
+
         // Initialize the global rayon pool first to ensure the value in config
         // is honored. Otherwise, some code accessing the global pool could
         // cause it to get initialized with Rayon's default (not ours)
@@ -2658,6 +2664,9 @@ pub enum ValidatorError {
     )]
     PohTooSlow { mine: u64, target: u64 },
 
+    #[error(transparent)]
+    ResourceLimitError(#[from] ResourceLimitError),
+
     #[error("shred version mismatch: actual {actual}, expected {expected}")]
     ShredVersionMismatch { actual: u16, expected: u16 },
 

+ 0 - 4
ledger-tool/src/ledger_utils.rs

@@ -436,14 +436,12 @@ pub fn open_blockstore(
         .value_of("wal_recovery_mode")
         .map(BlockstoreRecoveryMode::from);
     let force_update_to_open = matches.is_present("force_update_to_open");
-    let enforce_ulimit_nofile = !matches.is_present("ignore_ulimit_nofile_error");
 
     match Blockstore::open_with_options(
         ledger_path,
         BlockstoreOptions {
             access_type: access_type.clone(),
             recovery_mode: wal_recovery_mode.clone(),
-            enforce_ulimit_nofile,
             ..BlockstoreOptions::default()
         },
     ) {
@@ -517,7 +515,6 @@ fn open_blockstore_with_temporary_primary_access(
             BlockstoreOptions {
                 access_type: AccessType::PrimaryForMaintenance,
                 recovery_mode: wal_recovery_mode.clone(),
-                enforce_ulimit_nofile: true,
                 ..BlockstoreOptions::default()
             },
         )?;
@@ -531,7 +528,6 @@ fn open_blockstore_with_temporary_primary_access(
         BlockstoreOptions {
             access_type: original_access_type,
             recovery_mode: wal_recovery_mode,
-            enforce_ulimit_nofile: true,
             ..BlockstoreOptions::default()
         },
     )

+ 10 - 3
ledger-tool/src/main.rs

@@ -36,6 +36,7 @@ use {
     solana_cluster_type::ClusterType,
     solana_core::{
         banking_simulation::{BankingSimulator, BankingTraceEvents},
+        resource_limits::adjust_nofile_limit,
         system_monitor_service::{SystemMonitorService, SystemMonitorStatsReportConfig},
         validator::{BlockProductionMethod, BlockVerificationMethod, TransactionStructure},
     },
@@ -1000,9 +1001,9 @@ fn main() {
                 .takes_value(false)
                 .global(true)
                 .help(
-                    "Allow opening the blockstore to succeed even if the desired open file \
-                     descriptor limit cannot be configured. Use with caution as some commands may \
-                     run fine with a reduced file descriptor limit while others will not",
+                    "Allow the command to continue even if the desired open file descriptor limit \
+                     cannot be configured. Use with caution as some commands may run fine with a \
+                     a reduced file descriptor limit while others may fail in nonobvious ways",
                 ),
         )
         .arg(
@@ -1692,6 +1693,12 @@ fn main() {
     let ledger_path = PathBuf::from(value_t_or_exit!(matches, "ledger_path", String));
     let verbose_level = matches.occurrences_of("verbose");
 
+    let enforce_nofile_limit = !matches.is_present("ignore_ulimit_nofile_error");
+    adjust_nofile_limit(enforce_nofile_limit).unwrap_or_else(|err| {
+        eprintln!("Error: {err:?}");
+        exit(1);
+    });
+
     // Name the rayon global thread pool
     rayon::ThreadPoolBuilder::new()
         .thread_name(|i| format!("solRayonGlob{i:02}"))

+ 0 - 1
ledger/Cargo.toml

@@ -44,7 +44,6 @@ fs_extra = { workspace = true }
 futures = { workspace = true }
 itertools = { workspace = true }
 lazy-lru = { workspace = true }
-libc = { workspace = true }
 log = { workspace = true }
 lru = { workspace = true }
 mockall = { workspace = true }

+ 0 - 55
ledger/src/blockstore.rs

@@ -383,8 +383,6 @@ impl Blockstore {
         fs::create_dir_all(ledger_path)?;
         let blockstore_path = ledger_path.join(BLOCKSTORE_DIRECTORY_ROCKS_LEVEL);
 
-        adjust_ulimit_nofile(options.enforce_ulimit_nofile)?;
-
         // Open the database
         let mut measure = Measure::start("blockstore open");
         info!("Opening blockstore at {blockstore_path:?}");
@@ -4810,7 +4808,6 @@ pub fn create_new_ledger(
     let blockstore = Blockstore::open_with_options(
         ledger_path,
         BlockstoreOptions {
-            enforce_ulimit_nofile: false,
             column_options: column_options.clone(),
             ..BlockstoreOptions::default()
         },
@@ -5213,58 +5210,6 @@ pub fn make_chaining_slot_entries(
     slots_shreds_and_entries
 }
 
-#[cfg(not(unix))]
-fn adjust_ulimit_nofile(_enforce_ulimit_nofile: bool) -> Result<()> {
-    Ok(())
-}
-
-#[cfg(unix)]
-fn adjust_ulimit_nofile(enforce_ulimit_nofile: bool) -> Result<()> {
-    // Rocks DB likes to have many open files.  The default open file descriptor limit is
-    // usually not enough
-    // AppendVecs and disk Account Index are also heavy users of mmapped files.
-    // This should be kept in sync with published validator instructions.
-    // https://docs.anza.xyz/operations/guides/validator-start#system-tuning
-    let desired_nofile = 1_000_000;
-
-    fn get_nofile() -> libc::rlimit {
-        let mut nofile = libc::rlimit {
-            rlim_cur: 0,
-            rlim_max: 0,
-        };
-        if unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &mut nofile) } != 0 {
-            warn!("getrlimit(RLIMIT_NOFILE) failed");
-        }
-        nofile
-    }
-
-    let mut nofile = get_nofile();
-    let current = nofile.rlim_cur;
-    if current < desired_nofile {
-        nofile.rlim_cur = desired_nofile;
-        if unsafe { libc::setrlimit(libc::RLIMIT_NOFILE, &nofile) } != 0 {
-            error!(
-                "Unable to increase the maximum open file descriptor limit to {} from {}",
-                nofile.rlim_cur, current,
-            );
-
-            if cfg!(target_os = "macos") {
-                error!(
-                    "On mac OS you may need to run |sudo launchctl limit maxfiles \
-                     {desired_nofile} {desired_nofile}| first",
-                );
-            }
-            if enforce_ulimit_nofile {
-                return Err(BlockstoreError::UnableToSetOpenFileDescriptorLimit);
-            }
-        }
-
-        nofile = get_nofile();
-    }
-    info!("Maximum open file descriptors: {}", nofile.rlim_cur);
-    Ok(())
-}
-
 #[cfg(test)]
 pub mod tests {
     use {

+ 0 - 2
ledger/src/blockstore/error.rs

@@ -26,8 +26,6 @@ pub enum BlockstoreError {
     SlotCleanedUp,
     #[error("unpack error: {0}")]
     UnpackError(#[from] UnpackError),
-    #[error("unable to set open file descriptor limit")]
-    UnableToSetOpenFileDescriptorLimit,
     #[error("transaction status slot mismatch")]
     TransactionStatusSlotMismatch,
     #[error("empty epoch stakes")]

+ 0 - 4
ledger/src/blockstore_db.rs

@@ -1344,7 +1344,6 @@ pub mod tests {
         {
             let options = BlockstoreOptions {
                 access_type: AccessType::Primary,
-                enforce_ulimit_nofile: false,
                 ..BlockstoreOptions::default()
             };
             let mut rocks = Rocks::open(db_path.to_path_buf(), options).unwrap();
@@ -1361,7 +1360,6 @@ pub mod tests {
         {
             let options = BlockstoreOptions {
                 access_type: AccessType::Secondary,
-                enforce_ulimit_nofile: false,
                 ..BlockstoreOptions::default()
             };
             let _ = Rocks::open(db_path.to_path_buf(), options).unwrap();
@@ -1369,7 +1367,6 @@ pub mod tests {
         {
             let options = BlockstoreOptions {
                 access_type: AccessType::Primary,
-                enforce_ulimit_nofile: false,
                 ..BlockstoreOptions::default()
             };
             let _ = Rocks::open(db_path.to_path_buf(), options).unwrap();
@@ -1392,7 +1389,6 @@ pub mod tests {
 
         let options = BlockstoreOptions {
             access_type: AccessType::Primary,
-            enforce_ulimit_nofile: false,
             ..BlockstoreOptions::default()
         };
 

+ 1 - 9
ledger/src/blockstore_options.rs

@@ -13,9 +13,6 @@ pub struct BlockstoreOptions {
     pub access_type: AccessType,
     // Whether to open a blockstore under a recovery mode. Default: None.
     pub recovery_mode: Option<BlockstoreRecoveryMode>,
-    // When opening the Blockstore, determines whether to error or not if the
-    // desired open file descriptor limit cannot be configured. Default: true.
-    pub enforce_ulimit_nofile: bool,
     pub column_options: LedgerColumnOptions,
     pub num_rocksdb_compaction_threads: NonZeroUsize,
     pub num_rocksdb_flush_threads: NonZeroUsize,
@@ -29,7 +26,6 @@ impl Default for BlockstoreOptions {
         Self {
             access_type: AccessType::Primary,
             recovery_mode: None,
-            enforce_ulimit_nofile: true,
             column_options: LedgerColumnOptions::default(),
             num_rocksdb_compaction_threads: default_num_compaction_threads(),
             num_rocksdb_flush_threads: default_num_flush_threads(),
@@ -39,11 +35,7 @@ impl Default for BlockstoreOptions {
 
 impl BlockstoreOptions {
     pub fn default_for_tests() -> Self {
-        Self {
-            // No need to enforce the limit in tests
-            enforce_ulimit_nofile: false,
-            ..BlockstoreOptions::default()
-        }
+        BlockstoreOptions::default()
     }
 }
 

+ 0 - 2
local-cluster/src/integration_tests.rs

@@ -106,7 +106,6 @@ pub fn open_blockstore(ledger_path: &Path) -> Blockstore {
         BlockstoreOptions {
             access_type: AccessType::Primary,
             recovery_mode: None,
-            enforce_ulimit_nofile: true,
             ..BlockstoreOptions::default()
         },
     )
@@ -118,7 +117,6 @@ pub fn open_blockstore(ledger_path: &Path) -> Blockstore {
             BlockstoreOptions {
                 access_type: AccessType::Secondary,
                 recovery_mode: None,
-                enforce_ulimit_nofile: true,
                 ..BlockstoreOptions::default()
             },
         )

+ 1 - 0
local-cluster/src/validator_configs.rs

@@ -43,6 +43,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
         no_os_network_stats_reporting: config.no_os_network_stats_reporting,
         no_os_cpu_stats_reporting: config.no_os_cpu_stats_reporting,
         no_os_disk_stats_reporting: config.no_os_disk_stats_reporting,
+        enforce_ulimit_nofile: config.enforce_ulimit_nofile,
         poh_pinned_cpu_core: config.poh_pinned_cpu_core,
         warp_slot: config.warp_slot,
         accounts_db_skip_shrink: config.accounts_db_skip_shrink,

+ 1 - 1
programs/sbf/Cargo.lock

@@ -6370,6 +6370,7 @@ dependencies = [
  "futures 0.3.31",
  "histogram",
  "itertools 0.12.1",
+ "libc",
  "log",
  "lru",
  "min-max-heap",
@@ -7084,7 +7085,6 @@ dependencies = [
  "futures 0.3.31",
  "itertools 0.12.1",
  "lazy-lru",
- "libc",
  "log",
  "lru",
  "mockall",

+ 0 - 3
validator/src/commands/run/args/blockstore_options.rs

@@ -45,9 +45,6 @@ impl FromClapArgMatches for BlockstoreOptions {
         Ok(BlockstoreOptions {
             recovery_mode,
             column_options,
-            // The validator needs to open many files, check that the process has
-            // permission to do so in order to fail quickly and give a direct error
-            enforce_ulimit_nofile: true,
             // The validator needs primary (read/write)
             access_type: AccessType::Primary,
             num_rocksdb_compaction_threads: rocksdb_compaction_threads,

+ 3 - 0
validator/src/commands/run/execute.rs

@@ -568,6 +568,9 @@ pub fn execute(
         no_os_network_stats_reporting: matches.is_present("no_os_network_stats_reporting"),
         no_os_cpu_stats_reporting: matches.is_present("no_os_cpu_stats_reporting"),
         no_os_disk_stats_reporting: matches.is_present("no_os_disk_stats_reporting"),
+        // The validator needs to open many files, check that the process has
+        // permission to do so in order to fail quickly and give a direct error
+        enforce_ulimit_nofile: true,
         poh_pinned_cpu_core: value_of(matches, "poh_pinned_cpu_core")
             .unwrap_or(poh_service::DEFAULT_PINNED_CPU_CORE),
         poh_hashes_per_batch: value_of(matches, "poh_hashes_per_batch")