rolling_bit_field.rs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. //! functionally similar to a hashset
  2. //! Relies on there being a sliding window of key values. The key values continue to increase.
  3. //! Old key values are removed from the lesser values and do not accumulate.
  4. mod iterators;
  5. use {
  6. bv::BitVec, iterators::RollingBitFieldOnesIter, solana_clock::Slot,
  7. solana_nohash_hasher::IntSet,
  8. };
  9. #[cfg_attr(feature = "frozen-abi", derive(AbiExample))]
  10. #[derive(Clone)]
  11. pub struct RollingBitField {
  12. max_width: u64,
  13. min: u64,
  14. max_exclusive: u64,
  15. bits: BitVec,
  16. count: usize,
  17. // These are items that are true and lower than min.
  18. // They would cause us to exceed max_width if we stored them in our bit field.
  19. // We only expect these items in conditions where there is some other bug in the system
  20. // or in testing when large ranges are created.
  21. excess: IntSet<u64>,
  22. }
  23. impl PartialEq<RollingBitField> for RollingBitField {
  24. fn eq(&self, other: &Self) -> bool {
  25. // 2 instances could have different internal data for the same values,
  26. // so we have to compare data.
  27. self.len() == other.len() && {
  28. for item in self.get_all() {
  29. if !other.contains(&item) {
  30. return false;
  31. }
  32. }
  33. true
  34. }
  35. }
  36. }
  37. impl std::fmt::Debug for RollingBitField {
  38. /// Custom debug formatter that outputs the possibly long and
  39. /// sparse vector of bits in a compact representation, where each
  40. /// sequence of equal values is compacted into value;length pair
  41. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  42. let mut bits = String::from("[");
  43. let mut prev = self.bits[0];
  44. bits.push_str(&format!("{prev}"));
  45. let mut index = 1;
  46. while index < self.bits.len() {
  47. if self.bits[index] != prev {
  48. prev = self.bits[index];
  49. break;
  50. }
  51. index += 1;
  52. }
  53. if index > 1 {
  54. bits.push_str(&format!(";{index}"));
  55. }
  56. if index < self.bits.len() {
  57. bits.push_str(&format!(", {prev}"));
  58. }
  59. let mut count = 0;
  60. while index < self.bits.len() {
  61. if self.bits[index] != prev {
  62. if count > 1 {
  63. bits.push_str(&format!(";{count}"));
  64. }
  65. count = 0;
  66. prev = self.bits[index];
  67. bits.push_str(&format!(", {prev}"));
  68. }
  69. count += 1;
  70. index += 1;
  71. }
  72. if count > 1 {
  73. bits.push_str(&format!(";{count}"));
  74. }
  75. bits.push(']');
  76. // The order of the `count` and `bits` fields is changed on
  77. // purpose so that possibly very long output of `bits` doesn't
  78. // make it more difficult to read the value of the `count`
  79. // field.
  80. f.debug_struct("RollingBitField")
  81. .field("max_width", &self.max_width)
  82. .field("min", &self.min)
  83. .field("max_exclusive", &self.max_exclusive)
  84. .field("count", &self.count)
  85. .field("bits", &bits)
  86. .field("excess", &self.excess)
  87. .finish()
  88. }
  89. }
  90. /// functionally similar to a hashset
  91. /// Relies on there being a sliding window of key values. The key values continue to increase.
  92. /// Old key values are removed from the lesser values and do not accumulate.
  93. impl RollingBitField {
  94. pub fn new(max_width: u64) -> Self {
  95. assert!(max_width > 0);
  96. assert!(max_width.is_power_of_two()); // power of 2 to make dividing a shift
  97. let bits = BitVec::new_fill(false, max_width);
  98. Self {
  99. max_width,
  100. bits,
  101. count: 0,
  102. min: 0,
  103. max_exclusive: 0,
  104. excess: IntSet::default(),
  105. }
  106. }
  107. // find the array index
  108. fn get_address(&self, key: &u64) -> u64 {
  109. key % self.max_width
  110. }
  111. pub fn range_width(&self) -> u64 {
  112. // note that max isn't updated on remove, so it can be above the current max
  113. self.max_exclusive - self.min
  114. }
  115. pub fn min(&self) -> Option<u64> {
  116. if self.is_empty() {
  117. None
  118. } else if self.excess.is_empty() {
  119. Some(self.min)
  120. } else {
  121. let excess_min = self.excess.iter().min().copied();
  122. if self.all_items_in_excess() {
  123. excess_min
  124. } else {
  125. Some(std::cmp::min(self.min, excess_min.unwrap_or(u64::MAX)))
  126. }
  127. }
  128. }
  129. pub fn insert(&mut self, key: u64) {
  130. let mut bits_empty = self.count == 0 || self.all_items_in_excess();
  131. let update_bits = if bits_empty {
  132. true // nothing in bits, so in range
  133. } else if key < self.min {
  134. // bits not empty and this insert is before min, so add to excess
  135. if self.excess.insert(key) {
  136. self.count += 1;
  137. }
  138. false
  139. } else if key < self.max_exclusive {
  140. true // fits current bit field range
  141. } else {
  142. // key is >= max
  143. let new_max = key + 1;
  144. loop {
  145. let new_width = new_max.saturating_sub(self.min);
  146. if new_width <= self.max_width {
  147. // this key will fit the max range
  148. break;
  149. }
  150. // move the min item from bits to excess and then purge from min to make room for this new max
  151. let inserted = self.excess.insert(self.min);
  152. assert!(inserted);
  153. let key = self.min;
  154. let address = self.get_address(&key);
  155. self.bits.set(address, false);
  156. self.purge(&key);
  157. if self.all_items_in_excess() {
  158. // if we moved the last existing item to excess, then we are ready to insert the new item in the bits
  159. bits_empty = true;
  160. break;
  161. }
  162. }
  163. true // moved things to excess if necessary, so update bits with the new entry
  164. };
  165. if update_bits {
  166. let address = self.get_address(&key);
  167. let value = self.bits.get(address);
  168. if !value {
  169. self.bits.set(address, true);
  170. if bits_empty {
  171. self.min = key;
  172. self.max_exclusive = key + 1;
  173. } else {
  174. self.min = std::cmp::min(self.min, key);
  175. self.max_exclusive = std::cmp::max(self.max_exclusive, key + 1);
  176. assert!(
  177. self.min + self.max_width >= self.max_exclusive,
  178. "min: {}, max: {}, max_width: {}",
  179. self.min,
  180. self.max_exclusive,
  181. self.max_width
  182. );
  183. }
  184. self.count += 1;
  185. }
  186. }
  187. }
  188. /// remove key from set, return if item was in the set
  189. pub fn remove(&mut self, key: &u64) -> bool {
  190. if key >= &self.min {
  191. // if asked to remove something bigger than max, then no-op
  192. if key < &self.max_exclusive {
  193. let address = self.get_address(key);
  194. let get = self.bits.get(address);
  195. if get {
  196. self.count -= 1;
  197. self.bits.set(address, false);
  198. self.purge(key);
  199. }
  200. get
  201. } else {
  202. false
  203. }
  204. } else {
  205. // asked to remove something < min. would be in excess if it exists
  206. let remove = self.excess.remove(key);
  207. if remove {
  208. self.count -= 1;
  209. }
  210. remove
  211. }
  212. }
  213. fn all_items_in_excess(&self) -> bool {
  214. self.excess.len() == self.count
  215. }
  216. // after removing 'key' where 'key' = min, make min the correct new min value
  217. fn purge(&mut self, key: &u64) {
  218. if self.count > 0 && !self.all_items_in_excess() {
  219. if key == &self.min {
  220. let start = self.min + 1; // min just got removed
  221. for key in start..self.max_exclusive {
  222. if self.contains_assume_in_range(&key) {
  223. self.min = key;
  224. break;
  225. }
  226. }
  227. }
  228. } else {
  229. // The idea is that there are no items in the bitfield anymore.
  230. // But, there MAY be items in excess. The model works such that items < min go into excess.
  231. // So, after purging all items from bitfield, we hold max to be what it previously was, but set min to max.
  232. // Thus, if we lookup >= max, answer is always false without having to look in excess.
  233. // If we changed max here to 0, we would lose the ability to know the range of items in excess (if any).
  234. // So, now, with min updated = max:
  235. // If we lookup < max, then we first check min.
  236. // If >= min, then we look in bitfield.
  237. // Otherwise, we look in excess since the request is < min.
  238. // So, resetting min like this after a remove results in the correct behavior for the model.
  239. // Later, if we insert and there are 0 items total (excess + bitfield), then we reset min/max to reflect the new item only.
  240. self.min = self.max_exclusive;
  241. }
  242. }
  243. fn contains_assume_in_range(&self, key: &u64) -> bool {
  244. // the result may be aliased. Caller is responsible for determining key is in range.
  245. let address = self.get_address(key);
  246. self.bits.get(address)
  247. }
  248. // This is the 99% use case.
  249. // This needs be fast for the most common case of asking for key >= min.
  250. pub fn contains(&self, key: &u64) -> bool {
  251. if key < &self.max_exclusive {
  252. if key >= &self.min {
  253. // in the bitfield range
  254. self.contains_assume_in_range(key)
  255. } else {
  256. self.excess.contains(key)
  257. }
  258. } else {
  259. false
  260. }
  261. }
  262. pub fn len(&self) -> usize {
  263. self.count
  264. }
  265. pub fn is_empty(&self) -> bool {
  266. self.len() == 0
  267. }
  268. pub fn max_exclusive(&self) -> u64 {
  269. self.max_exclusive
  270. }
  271. pub fn max_inclusive(&self) -> u64 {
  272. self.max_exclusive.saturating_sub(1)
  273. }
  274. /// return all items < 'max_slot_exclusive'
  275. pub fn get_all_less_than(&self, max_slot_exclusive: Slot) -> Vec<u64> {
  276. let mut all = Vec::with_capacity(self.count);
  277. self.excess.iter().for_each(|slot| {
  278. if slot < &max_slot_exclusive {
  279. all.push(*slot)
  280. }
  281. });
  282. for key in self.min..self.max_exclusive {
  283. if key >= max_slot_exclusive {
  284. break;
  285. }
  286. if self.contains_assume_in_range(&key) {
  287. all.push(key);
  288. }
  289. }
  290. all
  291. }
  292. /// return highest item < 'max_slot_exclusive'
  293. pub fn get_prior(&self, max_slot_exclusive: Slot) -> Option<Slot> {
  294. let mut slot = max_slot_exclusive.saturating_sub(1);
  295. self.min().and_then(|min| {
  296. loop {
  297. if self.contains(&slot) {
  298. return Some(slot);
  299. }
  300. slot = slot.saturating_sub(1);
  301. if slot == 0 || slot < min {
  302. break;
  303. }
  304. }
  305. None
  306. })
  307. }
  308. pub fn get_all(&self) -> Vec<u64> {
  309. let mut all = Vec::with_capacity(self.count);
  310. self.excess.iter().for_each(|slot| all.push(*slot));
  311. for key in self.min..self.max_exclusive {
  312. if self.contains_assume_in_range(&key) {
  313. all.push(key);
  314. }
  315. }
  316. all
  317. }
  318. /// Returns an iterator over the rolling bit field
  319. ///
  320. /// The iterator yields all the 'set' bits.
  321. /// Note, the iteration order of the bits in 'excess' is not deterministic.
  322. pub fn iter_ones(&self) -> RollingBitFieldOnesIter<'_> {
  323. RollingBitFieldOnesIter::new(self)
  324. }
  325. }
  326. #[cfg(test)]
  327. pub mod tests {
  328. use {super::*, log::*, solana_measure::measure::Measure, std::collections::HashSet};
  329. impl RollingBitField {
  330. pub fn clear(&mut self) {
  331. *self = Self::new(self.max_width);
  332. }
  333. }
  334. #[test]
  335. fn test_get_all_less_than() {
  336. agave_logger::setup();
  337. let len = 16;
  338. let mut bitfield = RollingBitField::new(len);
  339. assert!(bitfield.get_all_less_than(0).is_empty());
  340. bitfield.insert(0);
  341. assert!(bitfield.get_all_less_than(0).is_empty());
  342. assert_eq!(bitfield.get_all_less_than(1), vec![0]);
  343. bitfield.insert(1);
  344. assert_eq!(bitfield.get_all_less_than(1), vec![0]);
  345. let last_item_not_in_excess = len - 1;
  346. bitfield.insert(last_item_not_in_excess);
  347. assert!(bitfield.excess.is_empty());
  348. assert_eq!(
  349. bitfield.get_all_less_than(last_item_not_in_excess),
  350. vec![0, 1]
  351. );
  352. assert_eq!(
  353. bitfield.get_all_less_than(last_item_not_in_excess + 1),
  354. vec![0, 1, last_item_not_in_excess]
  355. );
  356. let first_item_in_excess = last_item_not_in_excess + 1;
  357. bitfield.insert(first_item_in_excess);
  358. assert!(bitfield.excess.contains(&0));
  359. assert_eq!(
  360. bitfield.get_all_less_than(last_item_not_in_excess),
  361. vec![0, 1]
  362. );
  363. assert_eq!(
  364. bitfield.get_all_less_than(last_item_not_in_excess + 1),
  365. vec![0, 1, last_item_not_in_excess]
  366. );
  367. assert_eq!(
  368. bitfield.get_all_less_than(first_item_in_excess + 1),
  369. vec![0, 1, last_item_not_in_excess, first_item_in_excess]
  370. );
  371. bitfield.insert(len * 2);
  372. let mut less = bitfield.get_all_less_than(len * 2);
  373. less.sort_unstable();
  374. assert_eq!(
  375. vec![0, 1, last_item_not_in_excess, first_item_in_excess],
  376. less
  377. );
  378. let mut less = bitfield.get_all_less_than(len * 2 + 1);
  379. less.sort_unstable();
  380. assert_eq!(
  381. vec![0, 1, last_item_not_in_excess, first_item_in_excess, len * 2],
  382. less
  383. );
  384. }
  385. #[test]
  386. fn test_bitfield_delete_non_excess() {
  387. agave_logger::setup();
  388. let len = 16;
  389. let mut bitfield = RollingBitField::new(len);
  390. assert_eq!(bitfield.min(), None);
  391. bitfield.insert(0);
  392. assert_eq!(bitfield.min(), Some(0));
  393. let too_big = len + 1;
  394. bitfield.insert(too_big);
  395. assert!(bitfield.contains(&0));
  396. assert!(bitfield.contains(&too_big));
  397. assert_eq!(bitfield.len(), 2);
  398. assert_eq!(bitfield.excess.len(), 1);
  399. assert_eq!(bitfield.min, too_big);
  400. assert_eq!(bitfield.min(), Some(0));
  401. assert_eq!(bitfield.max_exclusive, too_big + 1);
  402. // delete the thing that is NOT in excess
  403. bitfield.remove(&too_big);
  404. assert_eq!(bitfield.min, too_big + 1);
  405. assert_eq!(bitfield.max_exclusive, too_big + 1);
  406. let too_big_times_2 = too_big * 2;
  407. bitfield.insert(too_big_times_2);
  408. assert!(bitfield.contains(&0));
  409. assert!(bitfield.contains(&too_big_times_2));
  410. assert_eq!(bitfield.len(), 2);
  411. assert_eq!(bitfield.excess.len(), 1);
  412. assert_eq!(bitfield.min(), bitfield.excess.iter().min().copied());
  413. assert_eq!(bitfield.min, too_big_times_2);
  414. assert_eq!(bitfield.max_exclusive, too_big_times_2 + 1);
  415. bitfield.remove(&0);
  416. bitfield.remove(&too_big_times_2);
  417. assert!(bitfield.is_empty());
  418. let other = 5;
  419. bitfield.insert(other);
  420. assert!(bitfield.contains(&other));
  421. assert!(bitfield.excess.is_empty());
  422. assert_eq!(bitfield.min, other);
  423. assert_eq!(bitfield.max_exclusive, other + 1);
  424. }
  425. #[test]
  426. fn test_bitfield_insert_excess() {
  427. agave_logger::setup();
  428. let len = 16;
  429. let mut bitfield = RollingBitField::new(len);
  430. bitfield.insert(0);
  431. let too_big = len + 1;
  432. bitfield.insert(too_big);
  433. assert!(bitfield.contains(&0));
  434. assert!(bitfield.contains(&too_big));
  435. assert_eq!(bitfield.len(), 2);
  436. assert_eq!(bitfield.excess.len(), 1);
  437. assert!(bitfield.excess.contains(&0));
  438. assert_eq!(bitfield.min, too_big);
  439. assert_eq!(bitfield.max_exclusive, too_big + 1);
  440. // delete the thing that IS in excess
  441. // this does NOT affect min/max
  442. bitfield.remove(&0);
  443. assert_eq!(bitfield.min, too_big);
  444. assert_eq!(bitfield.max_exclusive, too_big + 1);
  445. // re-add to excess
  446. bitfield.insert(0);
  447. assert!(bitfield.contains(&0));
  448. assert!(bitfield.contains(&too_big));
  449. assert_eq!(bitfield.len(), 2);
  450. assert_eq!(bitfield.excess.len(), 1);
  451. assert_eq!(bitfield.min, too_big);
  452. assert_eq!(bitfield.max_exclusive, too_big + 1);
  453. }
  454. #[test]
  455. fn test_bitfield_permutations() {
  456. agave_logger::setup();
  457. let mut bitfield = RollingBitField::new(2097152);
  458. let mut hash = HashSet::new();
  459. let min = 101_000;
  460. let width = 400_000;
  461. let dead = 19;
  462. let mut slot = min;
  463. while hash.len() < width {
  464. slot += 1;
  465. if slot % dead == 0 {
  466. continue;
  467. }
  468. hash.insert(slot);
  469. bitfield.insert(slot);
  470. }
  471. compare(&hash, &bitfield);
  472. let max = slot + 1;
  473. let mut time = Measure::start("");
  474. let mut count = 0;
  475. for slot in (min - 10)..max + 100 {
  476. if hash.contains(&slot) {
  477. count += 1;
  478. }
  479. }
  480. time.stop();
  481. let mut time2 = Measure::start("");
  482. let mut count2 = 0;
  483. for slot in (min - 10)..max + 100 {
  484. if bitfield.contains(&slot) {
  485. count2 += 1;
  486. }
  487. }
  488. time2.stop();
  489. info!(
  490. "{}ms, {}ms, {} ratio",
  491. time.as_ms(),
  492. time2.as_ms(),
  493. time.as_ns() / time2.as_ns()
  494. );
  495. assert_eq!(count, count2);
  496. }
  497. #[test]
  498. #[should_panic(expected = "max_width.is_power_of_two()")]
  499. fn test_bitfield_power_2() {
  500. let _ = RollingBitField::new(3);
  501. }
  502. #[test]
  503. #[should_panic(expected = "max_width > 0")]
  504. fn test_bitfield_0() {
  505. let _ = RollingBitField::new(0);
  506. }
  507. fn setup_empty(width: u64) -> RollingBitFieldTester {
  508. let bitfield = RollingBitField::new(width);
  509. let hash_set = HashSet::new();
  510. RollingBitFieldTester { bitfield, hash_set }
  511. }
  512. struct RollingBitFieldTester {
  513. pub bitfield: RollingBitField,
  514. pub hash_set: HashSet<u64>,
  515. }
  516. impl RollingBitFieldTester {
  517. fn insert(&mut self, slot: u64) {
  518. self.bitfield.insert(slot);
  519. self.hash_set.insert(slot);
  520. assert!(self.bitfield.contains(&slot));
  521. compare(&self.hash_set, &self.bitfield);
  522. }
  523. fn remove(&mut self, slot: &u64) -> bool {
  524. let result = self.bitfield.remove(slot);
  525. assert_eq!(result, self.hash_set.remove(slot));
  526. assert!(!self.bitfield.contains(slot));
  527. self.compare();
  528. result
  529. }
  530. fn compare(&self) {
  531. compare(&self.hash_set, &self.bitfield);
  532. }
  533. }
  534. fn setup_wide(width: u64, start: u64) -> RollingBitFieldTester {
  535. let mut tester = setup_empty(width);
  536. tester.compare();
  537. tester.insert(start);
  538. tester.insert(start + 1);
  539. tester
  540. }
  541. #[test]
  542. fn test_bitfield_insert_wide() {
  543. agave_logger::setup();
  544. let width = 16;
  545. let start = 0;
  546. let mut tester = setup_wide(width, start);
  547. let slot = start + width;
  548. let all = tester.bitfield.get_all();
  549. // higher than max range by 1
  550. tester.insert(slot);
  551. let bitfield = tester.bitfield;
  552. for slot in all {
  553. assert!(bitfield.contains(&slot));
  554. }
  555. assert_eq!(bitfield.excess.len(), 1);
  556. assert_eq!(bitfield.count, 3);
  557. }
  558. #[test]
  559. fn test_bitfield_insert_wide_before() {
  560. agave_logger::setup();
  561. let width = 16;
  562. let start = 100;
  563. let mut bitfield = setup_wide(width, start).bitfield;
  564. let slot = start + 1 - width;
  565. // assert here - would make min too low, causing too wide of a range
  566. bitfield.insert(slot);
  567. assert_eq!(1, bitfield.excess.len());
  568. assert_eq!(3, bitfield.count);
  569. assert!(bitfield.contains(&slot));
  570. }
  571. #[test]
  572. fn test_bitfield_insert_wide_before_ok() {
  573. agave_logger::setup();
  574. let width = 16;
  575. let start = 100;
  576. let mut bitfield = setup_wide(width, start).bitfield;
  577. let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed, but it is also inserting prior to min
  578. bitfield.insert(slot);
  579. assert_eq!(1, bitfield.excess.len());
  580. assert!(bitfield.contains(&slot));
  581. assert_eq!(3, bitfield.count);
  582. }
  583. #[test]
  584. fn test_bitfield_contains_wide_no_assert() {
  585. {
  586. let width = 16;
  587. let start = 0;
  588. let bitfield = setup_wide(width, start).bitfield;
  589. let mut slot = width;
  590. assert!(!bitfield.contains(&slot));
  591. slot += 1;
  592. assert!(!bitfield.contains(&slot));
  593. }
  594. {
  595. let width = 16;
  596. let start = 100;
  597. let bitfield = setup_wide(width, start).bitfield;
  598. // too large
  599. let mut slot = width;
  600. assert!(!bitfield.contains(&slot));
  601. slot += 1;
  602. assert!(!bitfield.contains(&slot));
  603. // too small, before min
  604. slot = 0;
  605. assert!(!bitfield.contains(&slot));
  606. }
  607. }
  608. #[test]
  609. fn test_bitfield_remove_wide() {
  610. let width = 16;
  611. let start = 0;
  612. let mut tester = setup_wide(width, start);
  613. let slot = width;
  614. assert!(!tester.remove(&slot));
  615. }
  616. #[test]
  617. fn test_bitfield_excess2() {
  618. agave_logger::setup();
  619. let width = 16;
  620. let mut tester = setup_empty(width);
  621. let slot = 100;
  622. // insert 1st slot
  623. tester.insert(slot);
  624. assert!(tester.bitfield.excess.is_empty());
  625. // insert a slot before the previous one. this is 'excess' since we don't use this pattern in normal operation
  626. let slot2 = slot - 1;
  627. tester.insert(slot2);
  628. assert_eq!(tester.bitfield.excess.len(), 1);
  629. // remove the 1st slot. we will be left with only excess
  630. tester.remove(&slot);
  631. assert!(tester.bitfield.contains(&slot2));
  632. assert_eq!(tester.bitfield.excess.len(), 1);
  633. // re-insert at valid range, making sure we don't insert into excess
  634. tester.insert(slot);
  635. assert_eq!(tester.bitfield.excess.len(), 1);
  636. // remove the excess slot.
  637. tester.remove(&slot2);
  638. assert!(tester.bitfield.contains(&slot));
  639. assert!(tester.bitfield.excess.is_empty());
  640. // re-insert the excess slot
  641. tester.insert(slot2);
  642. assert_eq!(tester.bitfield.excess.len(), 1);
  643. }
  644. #[test]
  645. fn test_bitfield_excess() {
  646. agave_logger::setup();
  647. // start at slot 0 or a separate, higher slot
  648. for width in [16, 4194304].iter() {
  649. let width = *width;
  650. let mut tester = setup_empty(width);
  651. for start in [0, width * 5].iter().cloned() {
  652. // recreate means create empty bitfield with each iteration, otherwise reuse
  653. for recreate in [false, true].iter().cloned() {
  654. let max = start + 3;
  655. // first root to add
  656. for slot in start..max {
  657. // subsequent roots to add
  658. for slot2 in (slot + 1)..max {
  659. // reverse_slots = 1 means add slots in reverse order (max to min). This causes us to add second and later slots to excess.
  660. for reverse_slots in [false, true].iter().cloned() {
  661. let maybe_reverse = |slot| {
  662. if reverse_slots {
  663. max - slot
  664. } else {
  665. slot
  666. }
  667. };
  668. if recreate {
  669. let recreated = setup_empty(width);
  670. tester = recreated;
  671. }
  672. // insert
  673. for slot in slot..=slot2 {
  674. let slot_use = maybe_reverse(slot);
  675. tester.insert(slot_use);
  676. /*
  677. this is noisy on build machine
  678. debug!(
  679. "slot: {}, bitfield: {:?}, reverse: {}, len: {}, excess: {:?}",
  680. slot_use,
  681. tester.bitfield,
  682. reverse_slots,
  683. tester.bitfield.len(),
  684. tester.bitfield.excess
  685. );*/
  686. assert!(
  687. (reverse_slots && tester.bitfield.len() > 1)
  688. ^ tester.bitfield.excess.is_empty()
  689. );
  690. }
  691. if start > width * 2 {
  692. assert!(!tester.bitfield.contains(&(start - width * 2)));
  693. }
  694. assert!(!tester.bitfield.contains(&(start + width * 2)));
  695. let len = (slot2 - slot + 1) as usize;
  696. assert_eq!(tester.bitfield.len(), len);
  697. assert_eq!(tester.bitfield.count, len);
  698. // remove
  699. for slot in slot..=slot2 {
  700. let slot_use = maybe_reverse(slot);
  701. assert!(tester.remove(&slot_use));
  702. assert!(
  703. (reverse_slots && !tester.bitfield.is_empty())
  704. ^ tester.bitfield.excess.is_empty()
  705. );
  706. }
  707. assert!(tester.bitfield.is_empty());
  708. assert_eq!(tester.bitfield.count, 0);
  709. if start > width * 2 {
  710. assert!(!tester.bitfield.contains(&(start - width * 2)));
  711. }
  712. assert!(!tester.bitfield.contains(&(start + width * 2)));
  713. }
  714. }
  715. }
  716. }
  717. }
  718. }
  719. }
  720. #[test]
  721. fn test_bitfield_remove_wide_before() {
  722. let width = 16;
  723. let start = 100;
  724. let mut tester = setup_wide(width, start);
  725. let slot = start + 1 - width;
  726. assert!(!tester.remove(&slot));
  727. }
  728. fn compare_internal(hashset: &HashSet<u64>, bitfield: &RollingBitField) {
  729. assert_eq!(hashset.len(), bitfield.len());
  730. assert_eq!(hashset.is_empty(), bitfield.is_empty());
  731. if !bitfield.is_empty() {
  732. let mut min = Slot::MAX;
  733. let mut overall_min = Slot::MAX;
  734. let mut max = Slot::MIN;
  735. for item in bitfield.get_all() {
  736. assert!(hashset.contains(&item));
  737. if !bitfield.excess.contains(&item) {
  738. min = std::cmp::min(min, item);
  739. max = std::cmp::max(max, item);
  740. }
  741. overall_min = std::cmp::min(overall_min, item);
  742. }
  743. assert_eq!(bitfield.min(), Some(overall_min));
  744. assert_eq!(bitfield.get_all().len(), hashset.len());
  745. // range isn't tracked for excess items
  746. if bitfield.excess.len() != bitfield.len() {
  747. let width = if bitfield.is_empty() {
  748. 0
  749. } else {
  750. max + 1 - min
  751. };
  752. assert!(
  753. bitfield.range_width() >= width,
  754. "hashset: {:?}, bitfield: {:?}, bitfield.range_width: {}, width: {}",
  755. hashset,
  756. bitfield.get_all(),
  757. bitfield.range_width(),
  758. width,
  759. );
  760. }
  761. } else {
  762. assert_eq!(bitfield.min(), None);
  763. }
  764. }
  765. fn compare(hashset: &HashSet<u64>, bitfield: &RollingBitField) {
  766. compare_internal(hashset, bitfield);
  767. let clone = bitfield.clone();
  768. compare_internal(hashset, &clone);
  769. assert!(clone.eq(bitfield));
  770. assert_eq!(clone, *bitfield);
  771. }
  772. #[test]
  773. fn test_bitfield_functionality() {
  774. agave_logger::setup();
  775. // bitfield sizes are powers of 2, cycle through values of 1, 2, 4, .. 2^9
  776. for power in 0..10 {
  777. let max_bitfield_width = 2u64.pow(power);
  778. let width_iteration_max = if max_bitfield_width > 1 {
  779. // add up to 2 items so we can test out multiple items
  780. 3
  781. } else {
  782. // 0 or 1 items is all we can fit with a width of 1 item
  783. 2
  784. };
  785. for width in 0..width_iteration_max {
  786. let mut tester = setup_empty(max_bitfield_width);
  787. let min = 101_000;
  788. let dead = 19;
  789. let mut slot = min;
  790. while tester.hash_set.len() < width {
  791. slot += 1;
  792. if max_bitfield_width > 2 && slot % dead == 0 {
  793. // with max_bitfield_width of 1 and 2, there is no room for dead slots
  794. continue;
  795. }
  796. tester.insert(slot);
  797. }
  798. let max = slot + 1;
  799. for slot in (min - 10)..max + 100 {
  800. assert_eq!(
  801. tester.bitfield.contains(&slot),
  802. tester.hash_set.contains(&slot)
  803. );
  804. }
  805. if width > 0 {
  806. assert!(tester.remove(&slot));
  807. assert!(!tester.remove(&slot));
  808. }
  809. let all = tester.bitfield.get_all();
  810. // remove the rest, including a call that removes slot again
  811. for item in all.iter() {
  812. assert!(tester.remove(item));
  813. assert!(!tester.remove(item));
  814. }
  815. let min = max + ((width * 2) as u64) + 3;
  816. let slot = min; // several widths past previous min
  817. let max = slot + 1;
  818. tester.insert(slot);
  819. for slot in (min - 10)..max + 100 {
  820. assert_eq!(
  821. tester.bitfield.contains(&slot),
  822. tester.hash_set.contains(&slot)
  823. );
  824. }
  825. }
  826. }
  827. }
  828. fn bitfield_insert_and_test(bitfield: &mut RollingBitField, slot: Slot) {
  829. let len = bitfield.len();
  830. let old_all = bitfield.get_all();
  831. let (new_min, new_max) = if bitfield.is_empty() {
  832. (slot, slot + 1)
  833. } else {
  834. (
  835. std::cmp::min(bitfield.min, slot),
  836. std::cmp::max(bitfield.max_exclusive, slot + 1),
  837. )
  838. };
  839. bitfield.insert(slot);
  840. assert_eq!(bitfield.min, new_min);
  841. assert_eq!(bitfield.max_exclusive, new_max);
  842. assert_eq!(bitfield.len(), len + 1);
  843. assert!(!bitfield.is_empty());
  844. assert!(bitfield.contains(&slot));
  845. // verify aliasing is what we expect
  846. assert!(bitfield.contains_assume_in_range(&(slot + bitfield.max_width)));
  847. let get_all = bitfield.get_all();
  848. old_all
  849. .into_iter()
  850. .for_each(|slot| assert!(get_all.contains(&slot)));
  851. assert!(get_all.contains(&slot));
  852. assert!(get_all.len() == len + 1);
  853. }
  854. #[test]
  855. fn test_bitfield_clear() {
  856. let mut bitfield = RollingBitField::new(4);
  857. assert_eq!(bitfield.len(), 0);
  858. assert!(bitfield.is_empty());
  859. bitfield_insert_and_test(&mut bitfield, 0);
  860. bitfield.clear();
  861. assert_eq!(bitfield.len(), 0);
  862. assert!(bitfield.is_empty());
  863. assert!(bitfield.get_all().is_empty());
  864. bitfield_insert_and_test(&mut bitfield, 1);
  865. bitfield.clear();
  866. assert_eq!(bitfield.len(), 0);
  867. assert!(bitfield.is_empty());
  868. assert!(bitfield.get_all().is_empty());
  869. bitfield_insert_and_test(&mut bitfield, 4);
  870. }
  871. #[test]
  872. fn test_bitfield_wrapping() {
  873. let mut bitfield = RollingBitField::new(4);
  874. assert_eq!(bitfield.len(), 0);
  875. assert!(bitfield.is_empty());
  876. bitfield_insert_and_test(&mut bitfield, 0);
  877. assert_eq!(bitfield.get_all(), vec![0]);
  878. bitfield_insert_and_test(&mut bitfield, 2);
  879. assert_eq!(bitfield.get_all(), vec![0, 2]);
  880. bitfield_insert_and_test(&mut bitfield, 3);
  881. bitfield.insert(3); // redundant insert
  882. assert_eq!(bitfield.get_all(), vec![0, 2, 3]);
  883. assert!(bitfield.remove(&0));
  884. assert!(!bitfield.remove(&0));
  885. assert_eq!(bitfield.min, 2);
  886. assert_eq!(bitfield.max_exclusive, 4);
  887. assert_eq!(bitfield.len(), 2);
  888. assert!(!bitfield.remove(&0)); // redundant remove
  889. assert_eq!(bitfield.len(), 2);
  890. assert_eq!(bitfield.get_all(), vec![2, 3]);
  891. bitfield.insert(4); // wrapped around value - same bit as '0'
  892. assert_eq!(bitfield.min, 2);
  893. assert_eq!(bitfield.max_exclusive, 5);
  894. assert_eq!(bitfield.len(), 3);
  895. assert_eq!(bitfield.get_all(), vec![2, 3, 4]);
  896. assert!(bitfield.remove(&2));
  897. assert_eq!(bitfield.min, 3);
  898. assert_eq!(bitfield.max_exclusive, 5);
  899. assert_eq!(bitfield.len(), 2);
  900. assert_eq!(bitfield.get_all(), vec![3, 4]);
  901. assert!(bitfield.remove(&3));
  902. assert_eq!(bitfield.min, 4);
  903. assert_eq!(bitfield.max_exclusive, 5);
  904. assert_eq!(bitfield.len(), 1);
  905. assert_eq!(bitfield.get_all(), vec![4]);
  906. assert!(bitfield.remove(&4));
  907. assert_eq!(bitfield.len(), 0);
  908. assert!(bitfield.is_empty());
  909. assert!(bitfield.get_all().is_empty());
  910. bitfield_insert_and_test(&mut bitfield, 8);
  911. assert!(bitfield.remove(&8));
  912. assert_eq!(bitfield.len(), 0);
  913. assert!(bitfield.is_empty());
  914. assert!(bitfield.get_all().is_empty());
  915. bitfield_insert_and_test(&mut bitfield, 9);
  916. assert!(bitfield.remove(&9));
  917. assert_eq!(bitfield.len(), 0);
  918. assert!(bitfield.is_empty());
  919. assert!(bitfield.get_all().is_empty());
  920. }
  921. #[test]
  922. fn test_bitfield_smaller() {
  923. // smaller bitfield, fewer entries, including 0
  924. agave_logger::setup();
  925. for width in 0..34 {
  926. let mut bitfield = RollingBitField::new(4096);
  927. let mut hash_set = HashSet::new();
  928. let min = 1_010_000;
  929. let dead = 19;
  930. let mut slot = min;
  931. while hash_set.len() < width {
  932. slot += 1;
  933. if slot % dead == 0 {
  934. continue;
  935. }
  936. hash_set.insert(slot);
  937. bitfield.insert(slot);
  938. }
  939. let max = slot + 1;
  940. let mut time = Measure::start("");
  941. let mut count = 0;
  942. for slot in (min - 10)..max + 100 {
  943. if hash_set.contains(&slot) {
  944. count += 1;
  945. }
  946. }
  947. time.stop();
  948. let mut time2 = Measure::start("");
  949. let mut count2 = 0;
  950. for slot in (min - 10)..max + 100 {
  951. if bitfield.contains(&slot) {
  952. count2 += 1;
  953. }
  954. }
  955. time2.stop();
  956. info!(
  957. "{}, {}, {}",
  958. time.as_ms(),
  959. time2.as_ms(),
  960. time.as_ns() / time2.as_ns()
  961. );
  962. assert_eq!(count, count2);
  963. }
  964. }
  965. #[test]
  966. fn test_debug_formatter() {
  967. let mut bitfield = RollingBitField::new(1);
  968. assert_eq!(
  969. "RollingBitField { max_width: 1, min: 0, max_exclusive: 0, count: 0, bits: \
  970. \"[false]\", excess: {} }",
  971. format!("{bitfield:?}")
  972. );
  973. bitfield.insert(0);
  974. assert_eq!(
  975. "RollingBitField { max_width: 1, min: 0, max_exclusive: 1, count: 1, bits: \
  976. \"[true]\", excess: {} }",
  977. format!("{bitfield:?}")
  978. );
  979. let mut bitfield = RollingBitField::new(2);
  980. assert_eq!(
  981. "RollingBitField { max_width: 2, min: 0, max_exclusive: 0, count: 0, bits: \
  982. \"[false;2]\", excess: {} }",
  983. format!("{bitfield:?}")
  984. );
  985. bitfield.insert(0);
  986. assert_eq!(
  987. "RollingBitField { max_width: 2, min: 0, max_exclusive: 1, count: 1, bits: \"[true, \
  988. false]\", excess: {} }",
  989. format!("{bitfield:?}")
  990. );
  991. bitfield.insert(1);
  992. assert_eq!(
  993. "RollingBitField { max_width: 2, min: 0, max_exclusive: 2, count: 2, bits: \
  994. \"[true;2]\", excess: {} }",
  995. format!("{bitfield:?}")
  996. );
  997. let mut bitfield = RollingBitField::new(4096);
  998. assert_eq!(
  999. "RollingBitField { max_width: 4096, min: 0, max_exclusive: 0, count: 0, bits: \
  1000. \"[false;4096]\", excess: {} }",
  1001. format!("{bitfield:?}")
  1002. );
  1003. bitfield.insert(4095);
  1004. assert_eq!(
  1005. "RollingBitField { max_width: 4096, min: 4095, max_exclusive: 4096, count: 1, bits: \
  1006. \"[false;4095, true]\", excess: {} }",
  1007. format!("{bitfield:?}")
  1008. );
  1009. bitfield.clear();
  1010. bitfield.insert(2);
  1011. bitfield.insert(3);
  1012. assert_eq!(
  1013. "RollingBitField { max_width: 4096, min: 2, max_exclusive: 4, count: 2, bits: \
  1014. \"[false;2, true;2, false;4092]\", excess: {} }",
  1015. format!("{bitfield:?}")
  1016. );
  1017. }
  1018. }