diff --git a/vm-migration/src/bitpos_iterator.rs b/vm-migration/src/bitpos_iterator.rs index 35d0886345..c23859f501 100644 --- a/vm-migration/src/bitpos_iterator.rs +++ b/vm-migration/src/bitpos_iterator.rs @@ -2,94 +2,274 @@ // // SPDX-License-Identifier: Apache-2.0 -use std::ops::Mul; +// The following is a 1:1 copy from https://github.com/phip1611/bit_ops/blob/ee5f576fae6e78ec036646977641a457bdf8bf8e/src/bitpos_iter.rs#L1 -/// An iterator that turns a sequence of u64s into a sequence of bit positions -/// that are set. +//! Module providing iterators to iterate over set bits in unsigned integers. +//! +//! See [`BitsIter`] and [`BitmapIter`]. The latter is included into Rust's +//! [`Iterator`] API via [`BitposIteratorExt`]. +//! +//! # Performance +//! +//! These iterators have been trimmed for performance and were benchmarked. See +//! the project's README on GitHub for more details. + +use core::fmt::Debug; +use core::ops::{Add, BitAndAssign, Sub}; + +/// **Internal helper** trait for [`BitsIter`]. +pub trait Uint: +Copy + Eq + Add + Sub + Sized + BitAndAssign + TryInto +{ + /// Number of bits of that type. + const BITS: usize; + /// `0` value of the underlying primitive type. + const ZERO: Self; + /// `1` value of the underlying primitive type. + const ONE: Self; + /// Number of trailing zeroes. + fn trailing_zeros(self) -> Self; +} + +/// Implements the relevant bit operations for the specified primitive type. /// -/// This is useful to iterate over dirty memory bitmaps. -struct BitposIterator { - underlying_it: I, +/// Note that the bit positions start at `0`. The highest `bit` position thus +/// is `BITS - 1`. +macro_rules! impl_uint_trait { + ($primitive_ty:ty) => { + impl Uint for $primitive_ty { + const BITS: usize = <$primitive_ty>::BITS as usize; + const ZERO: Self = 0; + const ONE: Self = 1; - /// How many `u64`'s we've already consumed. - /// - /// `u32` is sufficient. - word_pos: u32, + #[inline(always)] + fn trailing_zeros(self) -> Self { + <$primitive_ty>::trailing_zeros(self) as Self + } + } + }; +} + +impl_uint_trait!(u8); +impl_uint_trait!(u16); +impl_uint_trait!(u32); +impl_uint_trait!(u64); +impl_uint_trait!(u128); +impl_uint_trait!(usize); - /// If we already started working on a u64, it's here. Together with the bit - /// position where we have to continue. - current_word: Option<(u64 /* cur word */, u32 /* cur pos */)>, +/// Iterator over set bits of an unsigned integer. +/// +/// The index / bit position starts at `0`, the last bit position is +/// `n_bits - 1`. +/// +/// The iterator can be used with [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], +/// and [`usize`]. +/// +/// # Example +/// ```rust +/// # use bit_ops::BitsIter; +/// // also works with u16, u32, u64, u128, and usize +/// let iter = BitsIter::::new(0); +/// assert_eq!(&iter.collect::>(), &[]); +/// +/// let iter = BitsIter::::new(1); +/// assert_eq!(&iter.collect::>(), &[0]); +/// +/// let iter = BitsIter::::new(0b1010_1010); +/// assert_eq!(&iter.collect::>(), &[1, 3, 5, 7]); +/// ``` +#[derive(Debug)] +pub struct BitsIter { + value: U, } -impl Iterator for BitposIterator +impl BitsIter where - I: Iterator, + >::Error: Debug, { - type Item = u64; + /// Creates a new iterator. + pub const fn new(value: U) -> Self { + Self { value } + } +} +impl Iterator for BitsIter +where + >::Error: Debug, +{ + type Item = U; + + #[inline] fn next(&mut self) -> Option { - loop { - if self.current_word.is_none() { - self.current_word = self.underlying_it.next().map(|w| (w, 0)); - } + if self.value == U::ZERO { + return None; + } + let tz = self.value.trailing_zeros(); + self.value &= self.value - U::ONE; // clear lowest set bit + Some(tz) + } +} - let (word, word_bit) = self.current_word?; +/// Iterator over set bits in (large) bitmaps, i.e., collection of unsigned +/// integers. +/// +/// This wraps an iterator emitting corresponding unsigned integers and +/// uses [`BitsIter`] on each element. While doing so, [`BitmapIter`] keeps +/// track of consumed elements to properly report the bit position relative to +/// the very first bit. We basically treat the incoming [`Uint`]s as one +/// gigantic integer and just spit out which bits are set. +/// +/// The iterator can be used with [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], +/// and [`usize`]. The [`BitposIteratorExt`] offers a convenient way to +/// integrate this iterator in typical iterator chains. +/// +/// # Example +/// +/// ## Direct Usage of Iterator +/// ```rust +/// use bit_ops::BitmapIter; +/// +/// // also works with u16, u32, u64, u128, and usize +/// let iter = BitmapIter::::new([0b1111_0010, 0b1000, 1].into_iter()); +/// assert_eq!(&iter.collect::>(), &[1, 4, 5, 6, 7, 11, 16]); +/// ``` +/// +/// ## Use via Iterator Trait Extension +/// ```rust +/// use bit_ops::BitposIteratorExt; +/// +/// // also works with u16, u32, u64, u128, and usize +/// let bit_pos = [0b1111_0010_u8, 0b1000, 1].into_iter() +/// .bit_positions() +/// .collect::>(); +/// assert_eq!(&bit_pos, &[1, 4, 5, 6, 7, 11, 16]); +/// ``` +#[derive(Debug)] +pub struct BitmapIter { + bitmap_iter: I, + consumed_bits: usize, + current_element_it: BitsIter, +} - // Continue early if there is no chance to find something. - if word != 0 && word_bit < 64 { - let shifted_word = word >> word_bit; - if shifted_word != 0 { - let zeroes = shifted_word.trailing_zeros(); +impl> BitmapIter +where + >::Error: Debug, +{ + /// Creates a new iterator. + /// + /// This consumes everything that implements [`IntoIterator`] for an + /// [`Iterator`] of the corresponding [`Uint`]. + /// + /// # Example + /// ```rust + /// # use bit_ops::BitmapIter; + /// let _ = BitmapIter::::new([0_u8]); + /// let _ = BitmapIter::::new([0_u16].iter().copied()); + /// let _ = BitmapIter::::new((&[0_u16]).iter().copied()); + /// let _ = BitmapIter::::new((vec![42_usize])); + /// ``` + pub fn new>(bitmap_iter: In) -> Self { + let mut bitmap_iter = bitmap_iter.into_iter(); + let current_element_it = BitsIter::new(bitmap_iter.next().unwrap_or(U::ZERO)); + Self { + bitmap_iter, + consumed_bits: 0, + current_element_it, + } + } +} - self.current_word = Some((word, zeroes + word_bit + 1)); - let next_bitpos = (self.word_pos as u64) - .mul(64) - // the inner value can not overflow - .checked_add(word_bit as u64 + zeroes as u64) - .unwrap(); +impl> Iterator for BitmapIter +where + >::Error: Debug, +{ + type Item = usize; - return Some(next_bitpos); - } + #[inline] + fn next(&mut self) -> Option { + // PERFORMANCE: For performance-reasons, I refrain from a checked + // addition. 2^61 bytes / 2^64 bits (usize on a 64-bit system) are more + // than enough, and it is unlikely that users will ever have bitmaps + // that large. + loop { + // We return here, if we currently have an element. + if let Some(bit) = self.current_element_it.next() { + // Compiled will optimize this check in most cases away. + let bit: usize = bit.try_into().unwrap(); + // Unchecked add: see performance comment above + return Some(self.consumed_bits + bit); } - self.current_word = None; - self.word_pos += 1; + // Current byte exhausted: load next one or return `None` / exit. + let next_byte = self.bitmap_iter.next()?; + // Unchecked add: see performance comment above + self.consumed_bits += U::BITS; + self.current_element_it = BitsIter::new(next_byte); } } } -pub trait BitposIteratorExt: Iterator + Sized { - /// Turn an iterator over `u64` into an iterator over the bit positions of - /// all 1s. We basically treat the incoming `u64` as one gigantic integer - /// and just spit out which bits are set. - fn bit_positions(self) -> impl Iterator { - BitposIterator { - underlying_it: self, - word_pos: 0, - current_word: None, - } +/// Extension for the Rust standard libraries [`Iterator`] for convenient +/// integration of [`BitmapIter`]. +pub trait BitposIteratorExt: Iterator + Sized +where + >::Error: Debug, +{ + /// Creates an iterator that emits which bits are set. + /// + /// See [`BitmapIter`] for more details. + /// + /// # Example + /// ```rust + /// use bit_ops::BitposIteratorExt; + /// let ones = [0b101_u64, 0, 1].into_iter() + /// .bit_positions() + /// .collect::>(); + /// assert_eq!(&ones, &[0, 2, 2*64]); + /// ``` + fn bit_positions(self) -> BitmapIter { + BitmapIter::new(self) } } -impl + Sized> BitposIteratorExt for I {} +// Blanked implementation for all matching iterators. +impl + Sized> BitposIteratorExt for I where + >::Error: Debug +{ +} #[cfg(test)] -mod unit_tests { +mod tests { use super::*; + use std::vec::Vec; - fn bitpos_check(inp: &[u64], out: &[u64]) { - assert_eq!(inp.iter().copied().bit_positions().collect::>(), out); + #[test] + fn bits_iter() { + let iter = BitsIter::::new(0); + assert_eq!(&iter.collect::>(), &[]); + + let iter = BitsIter::::new(1); + assert_eq!(&iter.collect::>(), &[0]); + + let iter = BitsIter::::new(0b1010_1010); + assert_eq!(&iter.collect::>(), &[1, 3, 5, 7]); + + let iter = BitsIter::::new(0b1111_1111); + assert_eq!(&iter.collect::>(), &[0, 1, 2, 3, 4, 5, 6, 7]); + + let iter = BitsIter::::new(0b1111_1111); + assert_eq!(&iter.collect::>(), &[0, 1, 2, 3, 4, 5, 6, 7]); } #[test] - fn bitpos_iterator_works() { - bitpos_check(&[], &[]); - bitpos_check(&[0], &[]); - bitpos_check(&[1], &[0]); - bitpos_check(&[5], &[0, 2]); - bitpos_check(&[3 + 32], &[0, 1, 5]); - bitpos_check(&[1 << 63], &[63]); - - bitpos_check(&[1, 1 + 32], &[0, 64, 69]); + fn bitmap_iter() { + let iter = BitmapIter::::new([0_u8]); + assert_eq!(&iter.collect::>(), &[]); + + let iter = BitmapIter::::new([0b1111_0010, 0b1000, 1]); + assert_eq!(&iter.collect::>(), &[1, 4, 5, 6, 7, 11, 16]); + + let iter = BitmapIter::::new([0b10, 0b10, 0b11]); + assert_eq!(&iter.collect::>(), &[1, 129, 256, 257]); } } diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 1e43e171f2..0b08421ea0 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -405,8 +405,10 @@ impl MemoryRangeTable { bitmap .into_iter() .bit_positions() + // Compiler optimizes this away + .map(|v: usize| u64::try_from(v).unwrap()) // Turn them into single-element ranges for coalesce. - .map(|b| b..(b + 1)) + .map(|b: u64| b..(b + 1)) // Merge adjacent ranges. .coalesce(|prev, curr| { if prev.end == curr.start {