Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
298 changes: 239 additions & 59 deletions vm-migration/src/bitpos_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,94 +2,274 @@
//
// SPDX-License-Identifier: Apache-2.0

use std::ops::Mul;
// The following is a 1:1 copy from https://github.com/phip1611/bit_ops/blob/ee5f576fae6e78ec036646977641a457bdf8bf8e/src/bitpos_iter.rs#L1

/// An iterator that turns a sequence of u64s into a sequence of bit positions
/// that are set.
//! Module providing iterators to iterate over set bits in unsigned integers.
//!
//! See [`BitsIter`] and [`BitmapIter`]. The latter is included into Rust's
//! [`Iterator`] API via [`BitposIteratorExt`].
//!
//! # Performance
//!
//! These iterators have been trimmed for performance and were benchmarked. See
//! the project's README on GitHub for more details.

use core::fmt::Debug;
use core::ops::{Add, BitAndAssign, Sub};

/// **Internal helper** trait for [`BitsIter`].
pub trait Uint:
Copy + Eq + Add<Output = Self> + Sub<Output = Self> + Sized + BitAndAssign + TryInto<usize>
{
/// Number of bits of that type.
const BITS: usize;
/// `0` value of the underlying primitive type.
const ZERO: Self;
/// `1` value of the underlying primitive type.
const ONE: Self;
/// Number of trailing zeroes.
fn trailing_zeros(self) -> Self;
}

/// Implements the relevant bit operations for the specified primitive type.
///
/// This is useful to iterate over dirty memory bitmaps.
struct BitposIterator<I> {
underlying_it: I,
/// Note that the bit positions start at `0`. The highest `bit` position thus
/// is `BITS - 1`.
macro_rules! impl_uint_trait {
($primitive_ty:ty) => {
impl Uint for $primitive_ty {
const BITS: usize = <$primitive_ty>::BITS as usize;
const ZERO: Self = 0;
const ONE: Self = 1;

/// How many `u64`'s we've already consumed.
///
/// `u32` is sufficient.
word_pos: u32,
#[inline(always)]
fn trailing_zeros(self) -> Self {
<$primitive_ty>::trailing_zeros(self) as Self
}
}
};
}

impl_uint_trait!(u8);
impl_uint_trait!(u16);
impl_uint_trait!(u32);
impl_uint_trait!(u64);
impl_uint_trait!(u128);
impl_uint_trait!(usize);

/// If we already started working on a u64, it's here. Together with the bit
/// position where we have to continue.
current_word: Option<(u64 /* cur word */, u32 /* cur pos */)>,
/// Iterator over set bits of an unsigned integer.
///
/// The index / bit position starts at `0`, the last bit position is
/// `n_bits - 1`.
///
/// The iterator can be used with [`u8`], [`u16`], [`u32`], [`u64`], [`u128`],
/// and [`usize`].
///
/// # Example
/// ```rust
/// # use bit_ops::BitsIter;
/// // also works with u16, u32, u64, u128, and usize
/// let iter = BitsIter::<u8>::new(0);
/// assert_eq!(&iter.collect::<Vec<_>>(), &[]);
///
/// let iter = BitsIter::<u8>::new(1);
/// assert_eq!(&iter.collect::<Vec<_>>(), &[0]);
///
/// let iter = BitsIter::<u8>::new(0b1010_1010);
/// assert_eq!(&iter.collect::<Vec<_>>(), &[1, 3, 5, 7]);
/// ```
#[derive(Debug)]
pub struct BitsIter<U> {
value: U,
}

impl<I> Iterator for BitposIterator<I>
impl<U: Uint> BitsIter<U>
where
I: Iterator<Item = u64>,
<U as TryInto<usize>>::Error: Debug,
{
type Item = u64;
/// Creates a new iterator.
pub const fn new(value: U) -> Self {
Self { value }
}
}

impl<U: Uint> Iterator for BitsIter<U>
where
<U as TryInto<usize>>::Error: Debug,
{
type Item = U;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.current_word.is_none() {
self.current_word = self.underlying_it.next().map(|w| (w, 0));
}
if self.value == U::ZERO {
return None;
}
let tz = self.value.trailing_zeros();
self.value &= self.value - U::ONE; // clear lowest set bit
Some(tz)
}
}

let (word, word_bit) = self.current_word?;
/// Iterator over set bits in (large) bitmaps, i.e., collection of unsigned
/// integers.
///
/// This wraps an iterator emitting corresponding unsigned integers and
/// uses [`BitsIter`] on each element. While doing so, [`BitmapIter`] keeps
/// track of consumed elements to properly report the bit position relative to
/// the very first bit. We basically treat the incoming [`Uint`]s as one
/// gigantic integer and just spit out which bits are set.
///
/// The iterator can be used with [`u8`], [`u16`], [`u32`], [`u64`], [`u128`],
/// and [`usize`]. The [`BitposIteratorExt`] offers a convenient way to
/// integrate this iterator in typical iterator chains.
///
/// # Example
///
/// ## Direct Usage of Iterator
/// ```rust
/// use bit_ops::BitmapIter;
///
/// // also works with u16, u32, u64, u128, and usize
/// let iter = BitmapIter::<u8, _>::new([0b1111_0010, 0b1000, 1].into_iter());
/// assert_eq!(&iter.collect::<Vec<_>>(), &[1, 4, 5, 6, 7, 11, 16]);
/// ```
///
/// ## Use via Iterator Trait Extension
/// ```rust
/// use bit_ops::BitposIteratorExt;
///
/// // also works with u16, u32, u64, u128, and usize
/// let bit_pos = [0b1111_0010_u8, 0b1000, 1].into_iter()
/// .bit_positions()
/// .collect::<Vec<_>>();
/// assert_eq!(&bit_pos, &[1, 4, 5, 6, 7, 11, 16]);
/// ```
#[derive(Debug)]
pub struct BitmapIter<U, I> {
bitmap_iter: I,
consumed_bits: usize,
current_element_it: BitsIter<U>,
}

// Continue early if there is no chance to find something.
if word != 0 && word_bit < 64 {
let shifted_word = word >> word_bit;
if shifted_word != 0 {
let zeroes = shifted_word.trailing_zeros();
impl<U: Uint, I: Iterator<Item = U>> BitmapIter<U, I>
where
<U as TryInto<usize>>::Error: Debug,
{
/// Creates a new iterator.
///
/// This consumes everything that implements [`IntoIterator`] for an
/// [`Iterator`] of the corresponding [`Uint`].
///
/// # Example
/// ```rust
/// # use bit_ops::BitmapIter;
/// let _ = BitmapIter::<u8, _>::new([0_u8]);
/// let _ = BitmapIter::<u16, _>::new([0_u16].iter().copied());
/// let _ = BitmapIter::<u16, _>::new((&[0_u16]).iter().copied());
/// let _ = BitmapIter::<usize, _>::new((vec![42_usize]));
/// ```
pub fn new<In: IntoIterator<IntoIter = I>>(bitmap_iter: In) -> Self {
let mut bitmap_iter = bitmap_iter.into_iter();
let current_element_it = BitsIter::new(bitmap_iter.next().unwrap_or(U::ZERO));
Self {
bitmap_iter,
consumed_bits: 0,
current_element_it,
}
}
}

self.current_word = Some((word, zeroes + word_bit + 1));
let next_bitpos = (self.word_pos as u64)
.mul(64)
// the inner value can not overflow
.checked_add(word_bit as u64 + zeroes as u64)
.unwrap();
impl<U: Uint, I: Iterator<Item = U>> Iterator for BitmapIter<U, I>
where
<U as TryInto<usize>>::Error: Debug,
{
type Item = usize;

return Some(next_bitpos);
}
#[inline]
fn next(&mut self) -> Option<Self::Item> {
// PERFORMANCE: For performance-reasons, I refrain from a checked
// addition. 2^61 bytes / 2^64 bits (usize on a 64-bit system) are more
// than enough, and it is unlikely that users will ever have bitmaps
// that large.
loop {
// We return here, if we currently have an element.
if let Some(bit) = self.current_element_it.next() {
// Compiled will optimize this check in most cases away.
let bit: usize = bit.try_into().unwrap();
// Unchecked add: see performance comment above
return Some(self.consumed_bits + bit);
}

self.current_word = None;
self.word_pos += 1;
// Current byte exhausted: load next one or return `None` / exit.
let next_byte = self.bitmap_iter.next()?;
// Unchecked add: see performance comment above
self.consumed_bits += U::BITS;
self.current_element_it = BitsIter::new(next_byte);
}
}
}

pub trait BitposIteratorExt: Iterator<Item = u64> + Sized {
/// Turn an iterator over `u64` into an iterator over the bit positions of
/// all 1s. We basically treat the incoming `u64` as one gigantic integer
/// and just spit out which bits are set.
fn bit_positions(self) -> impl Iterator<Item = u64> {
BitposIterator {
underlying_it: self,
word_pos: 0,
current_word: None,
}
/// Extension for the Rust standard libraries [`Iterator`] for convenient
/// integration of [`BitmapIter`].
pub trait BitposIteratorExt<U: Uint>: Iterator<Item = U> + Sized
where
<U as TryInto<usize>>::Error: Debug,
{
/// Creates an iterator that emits which bits are set.
///
/// See [`BitmapIter`] for more details.
///
/// # Example
/// ```rust
/// use bit_ops::BitposIteratorExt;
/// let ones = [0b101_u64, 0, 1].into_iter()
/// .bit_positions()
/// .collect::<Vec<_>>();
/// assert_eq!(&ones, &[0, 2, 2*64]);
/// ```
fn bit_positions(self) -> BitmapIter<U, Self> {
BitmapIter::new(self)
}
}

impl<I: Iterator<Item = u64> + Sized> BitposIteratorExt for I {}
// Blanked implementation for all matching iterators.
impl<U: Uint, I: Iterator<Item = U> + Sized> BitposIteratorExt<U> for I where
<U as TryInto<usize>>::Error: Debug
{
}

#[cfg(test)]
mod unit_tests {
mod tests {
use super::*;
use std::vec::Vec;

fn bitpos_check(inp: &[u64], out: &[u64]) {
assert_eq!(inp.iter().copied().bit_positions().collect::<Vec<_>>(), out);
#[test]
fn bits_iter() {
let iter = BitsIter::<u8>::new(0);
assert_eq!(&iter.collect::<Vec<_>>(), &[]);

let iter = BitsIter::<u8>::new(1);
assert_eq!(&iter.collect::<Vec<_>>(), &[0]);

let iter = BitsIter::<u8>::new(0b1010_1010);
assert_eq!(&iter.collect::<Vec<_>>(), &[1, 3, 5, 7]);

let iter = BitsIter::<u8>::new(0b1111_1111);
assert_eq!(&iter.collect::<Vec<_>>(), &[0, 1, 2, 3, 4, 5, 6, 7]);

let iter = BitsIter::<u128>::new(0b1111_1111);
assert_eq!(&iter.collect::<Vec<_>>(), &[0, 1, 2, 3, 4, 5, 6, 7]);
}

#[test]
fn bitpos_iterator_works() {
bitpos_check(&[], &[]);
bitpos_check(&[0], &[]);
bitpos_check(&[1], &[0]);
bitpos_check(&[5], &[0, 2]);
bitpos_check(&[3 + 32], &[0, 1, 5]);
bitpos_check(&[1 << 63], &[63]);

bitpos_check(&[1, 1 + 32], &[0, 64, 69]);
fn bitmap_iter() {
let iter = BitmapIter::<u8, _>::new([0_u8]);
assert_eq!(&iter.collect::<Vec<_>>(), &[]);

let iter = BitmapIter::<u8, _>::new([0b1111_0010, 0b1000, 1]);
assert_eq!(&iter.collect::<Vec<_>>(), &[1, 4, 5, 6, 7, 11, 16]);

let iter = BitmapIter::<u128, _>::new([0b10, 0b10, 0b11]);
assert_eq!(&iter.collect::<Vec<_>>(), &[1, 129, 256, 257]);
}
}
4 changes: 3 additions & 1 deletion vm-migration/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,10 @@ impl MemoryRangeTable {
bitmap
.into_iter()
.bit_positions()
// Compiler optimizes this away
.map(|v: usize| u64::try_from(v).unwrap())
// Turn them into single-element ranges for coalesce.
.map(|b| b..(b + 1))
.map(|b: u64| b..(b + 1))
// Merge adjacent ranges.
.coalesce(|prev, curr| {
if prev.end == curr.start {
Expand Down
Loading