hardened_unpack.rs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. use {
  2. agave_fs::file_io::{self, FileCreator},
  3. crossbeam_channel::Sender,
  4. log::*,
  5. rand::{thread_rng, Rng},
  6. solana_genesis_config::DEFAULT_GENESIS_FILE,
  7. std::{
  8. fs::{self, File},
  9. io::{self, Read},
  10. path::{
  11. Component::{self, CurDir, Normal},
  12. Path, PathBuf,
  13. },
  14. sync::Arc,
  15. },
  16. tar::{
  17. Archive,
  18. EntryType::{Directory, GNUSparse, Regular},
  19. },
  20. thiserror::Error,
  21. };
  22. #[derive(Error, Debug)]
  23. pub enum UnpackError {
  24. #[error("IO error: {0}")]
  25. Io(#[from] std::io::Error),
  26. #[error("Archive error: {0}")]
  27. Archive(String),
  28. #[error("Unpacking '{1}' failed: {0}")]
  29. Unpack(Box<UnpackError>, PathBuf),
  30. }
  31. pub type Result<T> = std::result::Result<T, UnpackError>;
  32. // 64 TiB; some safe margin to the max 128 TiB in amd64 linux userspace VmSize
  33. // (ref: https://unix.stackexchange.com/a/386555/364236)
  34. // note that this is directly related to the mmaped data size
  35. // so protect against insane value
  36. // This is the file size including holes for sparse files
  37. const MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE: u64 = 64 * 1024 * 1024 * 1024 * 1024;
  38. // 4 TiB;
  39. // This is the actually consumed disk usage for sparse files
  40. const MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE: u64 = 4 * 1024 * 1024 * 1024 * 1024;
  41. const MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT: u64 = 5_000_000;
  42. const MAX_GENESIS_ARCHIVE_UNPACKED_COUNT: u64 = 100;
  43. // The buffer should be large enough to saturate write I/O bandwidth, while also accommodating:
  44. // - Many small files: each file consumes at least one write-capacity-sized chunk (0.5-1 MiB).
  45. // - Large files: their data may accumulate in backlog buffers while waiting for file open
  46. // operations to complete.
  47. const MAX_UNPACK_WRITE_BUF_SIZE: usize = 512 * 1024 * 1024;
  48. fn checked_total_size_sum(total_size: u64, entry_size: u64, limit_size: u64) -> Result<u64> {
  49. trace!("checked_total_size_sum: {total_size} + {entry_size} < {limit_size}");
  50. let total_size = total_size.saturating_add(entry_size);
  51. if total_size > limit_size {
  52. return Err(UnpackError::Archive(format!(
  53. "too large archive: {total_size} than limit: {limit_size}",
  54. )));
  55. }
  56. Ok(total_size)
  57. }
  58. #[allow(clippy::arithmetic_side_effects)]
  59. fn checked_total_count_increment(total_count: u64, limit_count: u64) -> Result<u64> {
  60. let total_count = total_count + 1;
  61. if total_count > limit_count {
  62. return Err(UnpackError::Archive(format!(
  63. "too many files in snapshot: {total_count:?}"
  64. )));
  65. }
  66. Ok(total_count)
  67. }
  68. fn check_unpack_result(unpack_result: Result<()>, path: String) -> Result<()> {
  69. if let Err(err) = unpack_result {
  70. return Err(UnpackError::Archive(format!(
  71. "failed to unpack {path:?}: {err}"
  72. )));
  73. }
  74. Ok(())
  75. }
  76. #[derive(Debug, PartialEq, Eq)]
  77. enum UnpackPath<'a> {
  78. Valid(&'a Path),
  79. Ignore,
  80. Invalid,
  81. }
  82. #[allow(clippy::arithmetic_side_effects)]
  83. fn unpack_archive<'a, C, D>(
  84. input: impl Read,
  85. memlock_budget_size: usize,
  86. apparent_limit_size: u64,
  87. actual_limit_size: u64,
  88. limit_count: u64,
  89. mut entry_checker: C, // checks if entry is valid
  90. file_path_processor: D, // processes file paths after writing
  91. ) -> Result<()>
  92. where
  93. C: FnMut(&[&str], tar::EntryType) -> UnpackPath<'a>,
  94. D: FnMut(PathBuf),
  95. {
  96. let mut apparent_total_size: u64 = 0;
  97. let mut actual_total_size: u64 = 0;
  98. let mut total_count: u64 = 0;
  99. let mut total_entries = 0;
  100. let mut open_dirs = Vec::new();
  101. // Bound the buffer based on provided limit of unpacked data and input archive size
  102. // (decompression multiplies content size, but buffering more than origin isn't necessary).
  103. let buf_size =
  104. (memlock_budget_size.min(actual_limit_size as usize)).min(MAX_UNPACK_WRITE_BUF_SIZE);
  105. let mut files_creator = file_io::file_creator(buf_size, file_path_processor)?;
  106. let mut archive = Archive::new(input);
  107. for entry in archive.entries()? {
  108. let entry = entry?;
  109. let path = entry.path()?;
  110. let path_str = path.display().to_string();
  111. // Although the `tar` crate safely skips at the actual unpacking, fail
  112. // first by ourselves when there are odd paths like including `..` or /
  113. // for our clearer pattern matching reasoning:
  114. // https://docs.rs/tar/0.4.26/src/tar/entry.rs.html#371
  115. let parts = path
  116. .components()
  117. .map(|p| match p {
  118. CurDir => Ok("."),
  119. Normal(c) => c.to_str().ok_or(()),
  120. _ => Err(()), // Prefix (for Windows) and RootDir are forbidden
  121. })
  122. .collect::<std::result::Result<Vec<_>, _>>();
  123. // Reject old-style BSD directory entries that aren't explicitly tagged as directories
  124. let legacy_dir_entry =
  125. entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/");
  126. let kind = entry.header().entry_type();
  127. let reject_legacy_dir_entry = legacy_dir_entry && (kind != Directory);
  128. let (Ok(parts), false) = (parts, reject_legacy_dir_entry) else {
  129. return Err(UnpackError::Archive(format!(
  130. "invalid path found: {path_str:?}"
  131. )));
  132. };
  133. let unpack_dir = match entry_checker(parts.as_slice(), kind) {
  134. UnpackPath::Invalid => {
  135. return Err(UnpackError::Archive(format!(
  136. "extra entry found: {:?} {:?}",
  137. path_str,
  138. entry.header().entry_type(),
  139. )));
  140. }
  141. UnpackPath::Ignore => {
  142. continue;
  143. }
  144. UnpackPath::Valid(unpack_dir) => unpack_dir,
  145. };
  146. apparent_total_size = checked_total_size_sum(
  147. apparent_total_size,
  148. entry.header().size()?,
  149. apparent_limit_size,
  150. )?;
  151. actual_total_size = checked_total_size_sum(
  152. actual_total_size,
  153. entry.header().entry_size()?,
  154. actual_limit_size,
  155. )?;
  156. total_count = checked_total_count_increment(total_count, limit_count)?;
  157. let account_filename = match parts.as_slice() {
  158. ["accounts", account_filename] => Some(PathBuf::from(account_filename)),
  159. _ => None,
  160. };
  161. let entry_path = if let Some(account) = account_filename {
  162. // Special case account files. We're unpacking an account entry inside one of the
  163. // account_paths returned by `entry_checker`. We want to unpack into
  164. // account_path/<account> instead of account_path/accounts/<account> so we strip the
  165. // accounts/ prefix.
  166. sanitize_path_and_open_dir(&account, unpack_dir, &mut open_dirs)
  167. } else {
  168. sanitize_path_and_open_dir(&path, unpack_dir, &mut open_dirs)
  169. }?; // ? handles file system errors
  170. let Some((entry_path, open_dir)) = entry_path else {
  171. continue; // skip it
  172. };
  173. let unpack = unpack_entry(&mut files_creator, entry, entry_path, open_dir);
  174. check_unpack_result(unpack, path_str)?;
  175. total_entries += 1;
  176. }
  177. files_creator.drain()?;
  178. info!("unpacked {total_entries} entries total");
  179. Ok(())
  180. }
  181. fn unpack_entry<'a, R: Read>(
  182. files_creator: &mut Box<dyn FileCreator + 'a>,
  183. mut entry: tar::Entry<'_, R>,
  184. dst: PathBuf,
  185. dst_open_dir: Arc<File>,
  186. ) -> Result<()> {
  187. let mode = match entry.header().entry_type() {
  188. GNUSparse | Regular => 0o644,
  189. _ => 0o755,
  190. };
  191. if should_fallback_to_tar_unpack(&entry) {
  192. entry.unpack(&dst)?;
  193. // Sanitize permissions.
  194. file_io::set_path_permissions(&dst, mode)?;
  195. if !entry.header().entry_type().is_dir() {
  196. // Process file after setting permissions
  197. files_creator.file_complete(dst);
  198. }
  199. return Ok(());
  200. }
  201. files_creator.schedule_create_at_dir(dst, mode, dst_open_dir, &mut entry)?;
  202. Ok(())
  203. }
  204. fn should_fallback_to_tar_unpack<R: io::Read>(entry: &tar::Entry<'_, R>) -> bool {
  205. // Follows cases that are handled as directory or in special way by tar-rs library,
  206. // we want to handle just cases where the library would write plain files with entry's content.
  207. matches!(
  208. entry.header().entry_type(),
  209. tar::EntryType::Directory
  210. | tar::EntryType::Link
  211. | tar::EntryType::Symlink
  212. | tar::EntryType::XGlobalHeader
  213. | tar::EntryType::XHeader
  214. | tar::EntryType::GNULongName
  215. | tar::EntryType::GNULongLink
  216. ) || entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/")
  217. }
  218. // return Err on file system error
  219. // return Some((path, open_dir)) if path is good
  220. // return None if we should skip this file
  221. fn sanitize_path_and_open_dir(
  222. entry_path: &Path,
  223. dst: &Path,
  224. open_dirs: &mut Vec<(PathBuf, Arc<File>)>,
  225. ) -> Result<Option<(PathBuf, Arc<File>)>> {
  226. // We cannot call unpack_in because it errors if we try to use 2 account paths.
  227. // So, this code is borrowed from unpack_in
  228. // ref: https://docs.rs/tar/*/tar/struct.Entry.html#method.unpack_in
  229. let mut file_dst = dst.to_path_buf();
  230. const SKIP: Result<Option<(PathBuf, Arc<File>)>> = Ok(None);
  231. {
  232. let path = entry_path;
  233. for part in path.components() {
  234. match part {
  235. // Leading '/' characters, root paths, and '.'
  236. // components are just ignored and treated as "empty
  237. // components"
  238. Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
  239. // If any part of the filename is '..', then skip over
  240. // unpacking the file to prevent directory traversal
  241. // security issues. See, e.g.: CVE-2001-1267,
  242. // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
  243. Component::ParentDir => return SKIP,
  244. Component::Normal(part) => file_dst.push(part),
  245. }
  246. }
  247. }
  248. // Skip cases where only slashes or '.' parts were seen, because
  249. // this is effectively an empty filename.
  250. if *dst == *file_dst {
  251. return SKIP;
  252. }
  253. // Skip entries without a parent (i.e. outside of FS root)
  254. let Some(parent) = file_dst.parent() else {
  255. return SKIP;
  256. };
  257. let open_dst_dir = match open_dirs.binary_search_by(|(key, _)| parent.cmp(key)) {
  258. Err(insert_at) => {
  259. fs::create_dir_all(parent)?;
  260. // Here we are different than untar_in. The code for tar::unpack_in internally calling unpack is a little different.
  261. // ignore return value here
  262. validate_inside_dst(dst, parent)?;
  263. let opened_dir = Arc::new(File::open(parent)?);
  264. open_dirs.insert(insert_at, (parent.to_path_buf(), opened_dir.clone()));
  265. opened_dir
  266. }
  267. Ok(index) => open_dirs[index].1.clone(),
  268. };
  269. Ok(Some((file_dst, open_dst_dir)))
  270. }
  271. // copied from:
  272. // https://github.com/alexcrichton/tar-rs/blob/d90a02f582c03dfa0fd11c78d608d0974625ae5d/src/entry.rs#L781
  273. fn validate_inside_dst(dst: &Path, file_dst: &Path) -> Result<PathBuf> {
  274. // Abort if target (canonical) parent is outside of `dst`
  275. let canon_parent = file_dst.canonicalize().map_err(|err| {
  276. UnpackError::Archive(format!("{err} while canonicalizing {}", file_dst.display()))
  277. })?;
  278. let canon_target = dst.canonicalize().map_err(|err| {
  279. UnpackError::Archive(format!("{err} while canonicalizing {}", dst.display()))
  280. })?;
  281. if !canon_parent.starts_with(&canon_target) {
  282. return Err(UnpackError::Archive(format!(
  283. "trying to unpack outside of destination path: {}",
  284. canon_target.display()
  285. )));
  286. }
  287. Ok(canon_target)
  288. }
  289. /// Unpacks snapshot from (potentially partial) `archive` and
  290. /// sends entry file paths through the `sender` channel
  291. pub(super) fn streaming_unpack_snapshot(
  292. input: impl Read,
  293. memlock_budget_size: usize,
  294. ledger_dir: &Path,
  295. account_paths: &[PathBuf],
  296. sender: &Sender<PathBuf>,
  297. ) -> Result<()> {
  298. unpack_snapshot_with_processors(
  299. input,
  300. memlock_budget_size,
  301. ledger_dir,
  302. account_paths,
  303. |_, _| {},
  304. |file_path| {
  305. let result = sender.send(file_path);
  306. if let Err(err) = result {
  307. panic!(
  308. "failed to send path '{}' from unpacker to rebuilder: {err}",
  309. err.0.display(),
  310. );
  311. }
  312. },
  313. )
  314. }
  315. fn unpack_snapshot_with_processors<F, G>(
  316. input: impl Read,
  317. memlock_budget_size: usize,
  318. ledger_dir: &Path,
  319. account_paths: &[PathBuf],
  320. mut accounts_path_processor: F,
  321. file_path_processor: G,
  322. ) -> Result<()>
  323. where
  324. F: FnMut(&str, &Path),
  325. G: FnMut(PathBuf),
  326. {
  327. assert!(!account_paths.is_empty());
  328. unpack_archive(
  329. input,
  330. memlock_budget_size,
  331. MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE,
  332. MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE,
  333. MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT,
  334. |parts, kind| {
  335. if is_valid_snapshot_archive_entry(parts, kind) {
  336. if let ["accounts", file] = parts {
  337. // Randomly distribute the accounts files about the available `account_paths`,
  338. let path_index = thread_rng().gen_range(0..account_paths.len());
  339. match account_paths
  340. .get(path_index)
  341. .map(|path_buf| path_buf.as_path())
  342. {
  343. Some(path) => {
  344. accounts_path_processor(file, path);
  345. UnpackPath::Valid(path)
  346. }
  347. None => UnpackPath::Invalid,
  348. }
  349. } else {
  350. UnpackPath::Valid(ledger_dir)
  351. }
  352. } else {
  353. UnpackPath::Invalid
  354. }
  355. },
  356. file_path_processor,
  357. )
  358. }
  359. fn all_digits(v: &str) -> bool {
  360. if v.is_empty() {
  361. return false;
  362. }
  363. for x in v.chars() {
  364. if !x.is_ascii_digit() {
  365. return false;
  366. }
  367. }
  368. true
  369. }
  370. #[allow(clippy::arithmetic_side_effects)]
  371. fn like_storage(v: &str) -> bool {
  372. let mut periods = 0;
  373. let mut saw_numbers = false;
  374. for x in v.chars() {
  375. if !x.is_ascii_digit() {
  376. if x == '.' {
  377. if periods > 0 || !saw_numbers {
  378. return false;
  379. }
  380. saw_numbers = false;
  381. periods += 1;
  382. } else {
  383. return false;
  384. }
  385. } else {
  386. saw_numbers = true;
  387. }
  388. }
  389. saw_numbers && periods == 1
  390. }
  391. fn is_valid_snapshot_archive_entry(parts: &[&str], kind: tar::EntryType) -> bool {
  392. match (parts, kind) {
  393. (["version"], Regular) => true,
  394. (["accounts"], Directory) => true,
  395. (["accounts", file], GNUSparse) if like_storage(file) => true,
  396. (["accounts", file], Regular) if like_storage(file) => true,
  397. (["snapshots"], Directory) => true,
  398. (["snapshots", "status_cache"], GNUSparse) => true,
  399. (["snapshots", "status_cache"], Regular) => true,
  400. (["snapshots", dir, file], GNUSparse) if all_digits(dir) && all_digits(file) => true,
  401. (["snapshots", dir, file], Regular) if all_digits(dir) && all_digits(file) => true,
  402. (["snapshots", dir], Directory) if all_digits(dir) => true,
  403. _ => false,
  404. }
  405. }
  406. pub(super) fn unpack_genesis(
  407. input: impl Read,
  408. unpack_dir: &Path,
  409. max_genesis_archive_unpacked_size: u64,
  410. ) -> Result<()> {
  411. unpack_archive(
  412. input,
  413. 0, /* don't provide memlock budget (forces sync IO), since genesis archives are small */
  414. max_genesis_archive_unpacked_size,
  415. max_genesis_archive_unpacked_size,
  416. MAX_GENESIS_ARCHIVE_UNPACKED_COUNT,
  417. |p, k| is_valid_genesis_archive_entry(unpack_dir, p, k),
  418. |_| {},
  419. )
  420. }
  421. fn is_valid_genesis_archive_entry<'a>(
  422. unpack_dir: &'a Path,
  423. parts: &[&str],
  424. kind: tar::EntryType,
  425. ) -> UnpackPath<'a> {
  426. trace!("validating: {parts:?} {kind:?}");
  427. #[allow(clippy::match_like_matches_macro)]
  428. match (parts, kind) {
  429. ([DEFAULT_GENESIS_FILE], GNUSparse) => UnpackPath::Valid(unpack_dir),
  430. ([DEFAULT_GENESIS_FILE], Regular) => UnpackPath::Valid(unpack_dir),
  431. (["rocksdb"], Directory) => UnpackPath::Ignore,
  432. (["rocksdb", _], GNUSparse) => UnpackPath::Ignore,
  433. (["rocksdb", _], Regular) => UnpackPath::Ignore,
  434. (["rocksdb_fifo"], Directory) => UnpackPath::Ignore,
  435. (["rocksdb_fifo", _], GNUSparse) => UnpackPath::Ignore,
  436. (["rocksdb_fifo", _], Regular) => UnpackPath::Ignore,
  437. _ => UnpackPath::Invalid,
  438. }
  439. }
  440. #[cfg(test)]
  441. mod tests {
  442. use {
  443. super::*,
  444. assert_matches::assert_matches,
  445. std::io::BufReader,
  446. tar::{Builder, Header},
  447. };
  448. #[test]
  449. fn test_archive_is_valid_entry() {
  450. assert!(is_valid_snapshot_archive_entry(
  451. &["snapshots"],
  452. tar::EntryType::Directory
  453. ));
  454. assert!(!is_valid_snapshot_archive_entry(
  455. &["snapshots", ""],
  456. tar::EntryType::Directory
  457. ));
  458. assert!(is_valid_snapshot_archive_entry(
  459. &["snapshots", "3"],
  460. tar::EntryType::Directory
  461. ));
  462. assert!(is_valid_snapshot_archive_entry(
  463. &["snapshots", "3", "3"],
  464. tar::EntryType::Regular
  465. ));
  466. assert!(is_valid_snapshot_archive_entry(
  467. &["version"],
  468. tar::EntryType::Regular
  469. ));
  470. assert!(is_valid_snapshot_archive_entry(
  471. &["accounts"],
  472. tar::EntryType::Directory
  473. ));
  474. assert!(!is_valid_snapshot_archive_entry(
  475. &["accounts", ""],
  476. tar::EntryType::Regular
  477. ));
  478. assert!(!is_valid_snapshot_archive_entry(
  479. &["snapshots"],
  480. tar::EntryType::Regular
  481. ));
  482. assert!(!is_valid_snapshot_archive_entry(
  483. &["snapshots", "x0"],
  484. tar::EntryType::Directory
  485. ));
  486. assert!(!is_valid_snapshot_archive_entry(
  487. &["snapshots", "0x"],
  488. tar::EntryType::Directory
  489. ));
  490. assert!(!is_valid_snapshot_archive_entry(
  491. &["snapshots", "①"],
  492. tar::EntryType::Directory
  493. ));
  494. assert!(!is_valid_snapshot_archive_entry(
  495. &["snapshots", "0", "aa"],
  496. tar::EntryType::Regular
  497. ));
  498. assert!(!is_valid_snapshot_archive_entry(
  499. &["aaaa"],
  500. tar::EntryType::Regular
  501. ));
  502. }
  503. #[test]
  504. fn test_valid_snapshot_accounts() {
  505. agave_logger::setup();
  506. assert!(is_valid_snapshot_archive_entry(
  507. &["accounts", "0.0"],
  508. tar::EntryType::Regular
  509. ));
  510. assert!(is_valid_snapshot_archive_entry(
  511. &["accounts", "01829.077"],
  512. tar::EntryType::Regular
  513. ));
  514. assert!(!is_valid_snapshot_archive_entry(
  515. &["accounts", "1.2.34"],
  516. tar::EntryType::Regular
  517. ));
  518. assert!(!is_valid_snapshot_archive_entry(
  519. &["accounts", "12."],
  520. tar::EntryType::Regular
  521. ));
  522. assert!(!is_valid_snapshot_archive_entry(
  523. &["accounts", ".12"],
  524. tar::EntryType::Regular
  525. ));
  526. assert!(!is_valid_snapshot_archive_entry(
  527. &["accounts", "0x0"],
  528. tar::EntryType::Regular
  529. ));
  530. assert!(!is_valid_snapshot_archive_entry(
  531. &["accounts", "abc"],
  532. tar::EntryType::Regular
  533. ));
  534. assert!(!is_valid_snapshot_archive_entry(
  535. &["accounts", "232323"],
  536. tar::EntryType::Regular
  537. ));
  538. assert!(!is_valid_snapshot_archive_entry(
  539. &["accounts", "৬.¾"],
  540. tar::EntryType::Regular
  541. ));
  542. }
  543. #[test]
  544. fn test_archive_is_valid_archive_entry() {
  545. let path = Path::new("");
  546. assert_eq!(
  547. is_valid_genesis_archive_entry(path, &["genesis.bin"], tar::EntryType::Regular),
  548. UnpackPath::Valid(path)
  549. );
  550. assert_eq!(
  551. is_valid_genesis_archive_entry(path, &["genesis.bin"], tar::EntryType::GNUSparse,),
  552. UnpackPath::Valid(path)
  553. );
  554. assert_eq!(
  555. is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::Directory),
  556. UnpackPath::Ignore
  557. );
  558. assert_eq!(
  559. is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::Regular),
  560. UnpackPath::Ignore
  561. );
  562. assert_eq!(
  563. is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::GNUSparse,),
  564. UnpackPath::Ignore
  565. );
  566. assert_eq!(
  567. is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::Directory),
  568. UnpackPath::Ignore
  569. );
  570. assert_eq!(
  571. is_valid_genesis_archive_entry(path, &["rocksdb_fifo", "foo"], tar::EntryType::Regular),
  572. UnpackPath::Ignore
  573. );
  574. assert_eq!(
  575. is_valid_genesis_archive_entry(
  576. path,
  577. &["rocksdb_fifo", "foo"],
  578. tar::EntryType::GNUSparse,
  579. ),
  580. UnpackPath::Ignore
  581. );
  582. assert_eq!(
  583. is_valid_genesis_archive_entry(path, &["aaaa"], tar::EntryType::Regular),
  584. UnpackPath::Invalid
  585. );
  586. assert_eq!(
  587. is_valid_genesis_archive_entry(path, &["aaaa"], tar::EntryType::GNUSparse,),
  588. UnpackPath::Invalid
  589. );
  590. assert_eq!(
  591. is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::Regular),
  592. UnpackPath::Invalid
  593. );
  594. assert_eq!(
  595. is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::GNUSparse,),
  596. UnpackPath::Invalid
  597. );
  598. assert_eq!(
  599. is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::Directory,),
  600. UnpackPath::Invalid
  601. );
  602. assert_eq!(
  603. is_valid_genesis_archive_entry(
  604. path,
  605. &["rocksdb", "foo", "bar"],
  606. tar::EntryType::Directory,
  607. ),
  608. UnpackPath::Invalid
  609. );
  610. assert_eq!(
  611. is_valid_genesis_archive_entry(
  612. path,
  613. &["rocksdb", "foo", "bar"],
  614. tar::EntryType::Regular
  615. ),
  616. UnpackPath::Invalid
  617. );
  618. assert_eq!(
  619. is_valid_genesis_archive_entry(
  620. path,
  621. &["rocksdb", "foo", "bar"],
  622. tar::EntryType::GNUSparse
  623. ),
  624. UnpackPath::Invalid
  625. );
  626. assert_eq!(
  627. is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::Regular),
  628. UnpackPath::Invalid
  629. );
  630. assert_eq!(
  631. is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::GNUSparse,),
  632. UnpackPath::Invalid
  633. );
  634. assert_eq!(
  635. is_valid_genesis_archive_entry(
  636. path,
  637. &["rocksdb_fifo", "foo"],
  638. tar::EntryType::Directory,
  639. ),
  640. UnpackPath::Invalid
  641. );
  642. assert_eq!(
  643. is_valid_genesis_archive_entry(
  644. path,
  645. &["rocksdb_fifo", "foo", "bar"],
  646. tar::EntryType::Directory,
  647. ),
  648. UnpackPath::Invalid
  649. );
  650. assert_eq!(
  651. is_valid_genesis_archive_entry(
  652. path,
  653. &["rocksdb_fifo", "foo", "bar"],
  654. tar::EntryType::Regular
  655. ),
  656. UnpackPath::Invalid
  657. );
  658. assert_eq!(
  659. is_valid_genesis_archive_entry(
  660. path,
  661. &["rocksdb_fifo", "foo", "bar"],
  662. tar::EntryType::GNUSparse
  663. ),
  664. UnpackPath::Invalid
  665. );
  666. }
  667. fn with_finalize_and_unpack<C>(archive: tar::Builder<Vec<u8>>, checker: C) -> Result<()>
  668. where
  669. C: Fn(&[u8], &Path) -> Result<()>,
  670. {
  671. let data = archive.into_inner().unwrap();
  672. let temp_dir = tempfile::TempDir::new().unwrap();
  673. checker(data.as_slice(), temp_dir.path())?;
  674. // Check that there is no bad permissions preventing deletion.
  675. let result = temp_dir.close();
  676. assert_matches!(result, Ok(()));
  677. Ok(())
  678. }
  679. fn finalize_and_unpack_snapshot(archive: tar::Builder<Vec<u8>>) -> Result<()> {
  680. with_finalize_and_unpack(archive, |a, b| {
  681. unpack_snapshot_with_processors(a, 256, b, &[PathBuf::new()], |_, _| {}, |_| {})
  682. .map(|_| ())
  683. })
  684. }
  685. fn finalize_and_unpack_genesis(archive: tar::Builder<Vec<u8>>) -> Result<()> {
  686. with_finalize_and_unpack(archive, |a, b| unpack_genesis(a, b, 1024))
  687. }
  688. #[test]
  689. fn test_archive_unpack_snapshot_ok() {
  690. let mut header = Header::new_gnu();
  691. header.set_path("version").unwrap();
  692. header.set_size(4);
  693. header.set_cksum();
  694. let data: &[u8] = &[1, 2, 3, 4];
  695. let mut archive = Builder::new(Vec::new());
  696. archive.append(&header, data).unwrap();
  697. let result = finalize_and_unpack_snapshot(archive);
  698. assert_matches!(result, Ok(()));
  699. }
  700. #[test]
  701. fn test_archive_unpack_genesis_ok() {
  702. let mut header = Header::new_gnu();
  703. header.set_path("genesis.bin").unwrap();
  704. header.set_size(4);
  705. header.set_cksum();
  706. let data: &[u8] = &[1, 2, 3, 4];
  707. let mut archive = Builder::new(Vec::new());
  708. archive.append(&header, data).unwrap();
  709. let result = finalize_and_unpack_genesis(archive);
  710. assert_matches!(result, Ok(()));
  711. }
  712. #[test]
  713. fn test_archive_unpack_genesis_bad_perms() {
  714. let mut archive = Builder::new(Vec::new());
  715. let mut header = Header::new_gnu();
  716. header.set_path("rocksdb").unwrap();
  717. header.set_entry_type(Directory);
  718. header.set_size(0);
  719. header.set_cksum();
  720. let data: &[u8] = &[];
  721. archive.append(&header, data).unwrap();
  722. let mut header = Header::new_gnu();
  723. header.set_path("rocksdb/test").unwrap();
  724. header.set_size(4);
  725. header.set_cksum();
  726. let data: &[u8] = &[1, 2, 3, 4];
  727. archive.append(&header, data).unwrap();
  728. // Removing all permissions makes it harder to delete this directory
  729. // or work with files inside it.
  730. let mut header = Header::new_gnu();
  731. header.set_path("rocksdb").unwrap();
  732. header.set_entry_type(Directory);
  733. header.set_mode(0o000);
  734. header.set_size(0);
  735. header.set_cksum();
  736. let data: &[u8] = &[];
  737. archive.append(&header, data).unwrap();
  738. let result = finalize_and_unpack_genesis(archive);
  739. assert_matches!(result, Ok(()));
  740. }
  741. #[test]
  742. fn test_archive_unpack_genesis_bad_rocksdb_subdir() {
  743. let mut archive = Builder::new(Vec::new());
  744. let mut header = Header::new_gnu();
  745. header.set_path("rocksdb").unwrap();
  746. header.set_entry_type(Directory);
  747. header.set_size(0);
  748. header.set_cksum();
  749. let data: &[u8] = &[];
  750. archive.append(&header, data).unwrap();
  751. // tar-rs treats following entry as a Directory to support old tar formats.
  752. let mut header = Header::new_gnu();
  753. header.set_path("rocksdb/test/").unwrap();
  754. header.set_entry_type(Regular);
  755. header.set_size(0);
  756. header.set_cksum();
  757. let data: &[u8] = &[];
  758. archive.append(&header, data).unwrap();
  759. let result = finalize_and_unpack_genesis(archive);
  760. assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "invalid path found: \"rocksdb/test/\"");
  761. }
  762. #[test]
  763. fn test_archive_unpack_snapshot_invalid_path() {
  764. let mut header = Header::new_gnu();
  765. // bypass the sanitization of the .set_path()
  766. for (p, c) in header
  767. .as_old_mut()
  768. .name
  769. .iter_mut()
  770. .zip(b"foo/../../../dangerous".iter().chain(Some(&0)))
  771. {
  772. *p = *c;
  773. }
  774. header.set_size(4);
  775. header.set_cksum();
  776. let data: &[u8] = &[1, 2, 3, 4];
  777. let mut archive = Builder::new(Vec::new());
  778. archive.append(&header, data).unwrap();
  779. let result = finalize_and_unpack_snapshot(archive);
  780. assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "invalid path found: \"foo/../../../dangerous\"");
  781. }
  782. fn with_archive_unpack_snapshot_invalid_path(path: &str) -> Result<()> {
  783. let mut header = Header::new_gnu();
  784. // bypass the sanitization of the .set_path()
  785. for (p, c) in header
  786. .as_old_mut()
  787. .name
  788. .iter_mut()
  789. .zip(path.as_bytes().iter().chain(Some(&0)))
  790. {
  791. *p = *c;
  792. }
  793. header.set_size(4);
  794. header.set_cksum();
  795. let data: &[u8] = &[1, 2, 3, 4];
  796. let mut archive = Builder::new(Vec::new());
  797. archive.append(&header, data).unwrap();
  798. with_finalize_and_unpack(archive, |data, path| {
  799. let mut unpacking_archive = Archive::new(BufReader::new(data));
  800. for entry in unpacking_archive.entries()? {
  801. if !entry?.unpack_in(path)? {
  802. return Err(UnpackError::Archive("failed!".to_string()));
  803. } else if !path.join(path).exists() {
  804. return Err(UnpackError::Archive("not existing!".to_string()));
  805. }
  806. }
  807. Ok(())
  808. })
  809. }
  810. #[test]
  811. fn test_archive_unpack_itself() {
  812. assert_matches!(
  813. with_archive_unpack_snapshot_invalid_path("ryoqun/work"),
  814. Ok(())
  815. );
  816. // Absolute paths are neutralized as relative
  817. assert_matches!(
  818. with_archive_unpack_snapshot_invalid_path("/etc/passwd"),
  819. Ok(())
  820. );
  821. assert_matches!(with_archive_unpack_snapshot_invalid_path("../../../dangerous"), Err(UnpackError::Archive(ref message)) if message == "failed!");
  822. }
  823. #[test]
  824. fn test_archive_unpack_snapshot_invalid_entry() {
  825. let mut header = Header::new_gnu();
  826. header.set_path("foo").unwrap();
  827. header.set_size(4);
  828. header.set_cksum();
  829. let data: &[u8] = &[1, 2, 3, 4];
  830. let mut archive = Builder::new(Vec::new());
  831. archive.append(&header, data).unwrap();
  832. let result = finalize_and_unpack_snapshot(archive);
  833. assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "extra entry found: \"foo\" Regular");
  834. }
  835. #[test]
  836. fn test_archive_unpack_snapshot_too_large() {
  837. let mut header = Header::new_gnu();
  838. header.set_path("version").unwrap();
  839. header.set_size(1024 * 1024 * 1024 * 1024 * 1024);
  840. header.set_cksum();
  841. let data: &[u8] = &[1, 2, 3, 4];
  842. let mut archive = Builder::new(Vec::new());
  843. archive.append(&header, data).unwrap();
  844. let result = finalize_and_unpack_snapshot(archive);
  845. assert_matches!(
  846. result,
  847. Err(UnpackError::Archive(ref message))
  848. if message == &format!(
  849. "too large archive: 1125899906842624 than limit: {MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE}"
  850. )
  851. );
  852. }
  853. #[test]
  854. fn test_archive_unpack_snapshot_bad_unpack() {
  855. let result = check_unpack_result(
  856. Err(UnpackError::Io(io::ErrorKind::FileTooLarge.into())),
  857. "abc".to_string(),
  858. );
  859. assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "failed to unpack \"abc\": IO error: file too large");
  860. }
  861. #[test]
  862. fn test_archive_checked_total_size_sum() {
  863. let result = checked_total_size_sum(500, 500, MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE);
  864. assert_matches!(result, Ok(1000));
  865. let result =
  866. checked_total_size_sum(u64::MAX - 2, 2, MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE);
  867. assert_matches!(
  868. result,
  869. Err(UnpackError::Archive(ref message))
  870. if message == &format!(
  871. "too large archive: 18446744073709551615 than limit: {MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE}"
  872. )
  873. );
  874. }
  875. #[test]
  876. fn test_archive_checked_total_size_count() {
  877. let result = checked_total_count_increment(101, MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT);
  878. assert_matches!(result, Ok(102));
  879. let result =
  880. checked_total_count_increment(999_999_999_999, MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT);
  881. assert_matches!(
  882. result,
  883. Err(UnpackError::Archive(ref message))
  884. if message == "too many files in snapshot: 1000000000000"
  885. );
  886. }
  887. #[test]
  888. fn test_archive_unpack_account_path() {
  889. let mut header = Header::new_gnu();
  890. header.set_path("accounts/123.456").unwrap();
  891. header.set_size(4);
  892. header.set_cksum();
  893. let data: &[u8] = &[1, 2, 3, 4];
  894. let mut archive = Builder::new(Vec::new());
  895. archive.append(&header, data).unwrap();
  896. let result = with_finalize_and_unpack(archive, |ar, tmp| {
  897. unpack_snapshot_with_processors(
  898. ar,
  899. 256,
  900. tmp,
  901. &[tmp.join("accounts_dest")],
  902. |_, _| {},
  903. |path| assert_eq!(path, tmp.join("accounts_dest/123.456")),
  904. )
  905. });
  906. assert_matches!(result, Ok(()));
  907. }
  908. }