From 5172eacc1283038fa0a5b0706d634eb6f4b702be Mon Sep 17 00:00:00 2001 From: Julian Schindel Date: Wed, 18 Mar 2026 08:47:33 +0100 Subject: [PATCH 1/4] vm-migration: skip zero pages during `MemoryRangeTable::partition` In the `MemoryRangeTable::partition` call, we're now skipping all pages completely filled with zeroes. This reduces the memory that needs to be transferred during migration if the VM has zero pages in its memory. On-behalf-of: SAP julian.schindel@sap.com Signed-off-by: Julian Schindel --- Cargo.lock | 1 + vm-migration/Cargo.toml | 4 + vm-migration/src/protocol.rs | 264 ++++++++++++++++++++++++++++++++--- vmm/src/lib.rs | 28 +++- 4 files changed, 271 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5807eaff58..416ea98a56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2806,6 +2806,7 @@ version = "0.1.0" dependencies = [ "anyhow", "itertools 0.14.0", + "libc", "rustls", "serde", "serde_json", diff --git a/vm-migration/Cargo.toml b/vm-migration/Cargo.toml index 2053afc472..2828bdae04 100644 --- a/vm-migration/Cargo.toml +++ b/vm-migration/Cargo.toml @@ -7,11 +7,15 @@ version = "0.1.0" [dependencies] anyhow = { workspace = true } itertools = { workspace = true } +libc = { workspace = true } rustls = { workspace = true } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } thiserror = { workspace = true } vm-memory = { workspace = true, features = ["backend-atomic", "backend-mmap"] } +[dev-dependencies] +vm-memory = { workspace = true, features = ["backend-bitmap"] } + [lints] workspace = true diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index 88e7e81a57..d8da0b3543 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -78,7 +78,7 @@ use std::io::{Read, Write}; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use vm_memory::ByteValued; +use vm_memory::{Address, ByteValued, GuestAddress, GuestAddressSpace, GuestMemory}; use crate::MigratableError; use crate::bitpos_iterator::BitposIteratorExt; @@ -282,29 +282,218 @@ pub struct MemoryRangeTable { data: Vec, } -#[derive(Debug, Clone, Default)] -struct MemoryRangeTableIterator { +/// Iterator that returns the next memory range in the table, +/// making sure that the returned range is not larger than `chunk_size`. +/// +/// If the iterator was configured to remove zero pages, +/// memory pages filled with only zeroes are omitted to reduce the +/// amount of data to be transmitted in a migration. +/// This relies on the migration receiver to initialize the guest +/// memory with zeroed pages. +/// +/// **Note**: Do not rely on the order of the ranges returned by this +/// iterator. This allows for a more efficient implementation. +#[derive(Debug, Clone)] +struct MemoryRangeTableIterator<'a, M> +where + M: GuestAddressSpace, +{ + /// Maximum size of a [`MemoryRange`] returned by the iterator. chunk_size: u64, - data: Vec, + /// A zero filled vector of the size of one memory page. + /// Used to compare guest memory pages to via [`libc::memcmp`]. + zero_page: Vec, + /// [`MemoryRange`]s that haven't been checked for zero pages yet. + /// Only used if `self.skip_zero_pages == true`. + unprocessed_data: Vec, + /// Indicates whether zero pages should be skipped or not. + skip_zero_pages: bool, + /// [`MemoryRange`]s to be given out by the iterator. + /// Depending on whether zero pages should be skipped, this contains all or just zero page + /// removed [`MemoryRange`]s. + processed_data: Vec, + /// A reference to the memory of the guest. + /// Used to check whether a [`MemoryRange`] contains zero pages. + guest_memory: &'a M, } -impl MemoryRangeTableIterator { - pub fn new(table: &MemoryRangeTable, chunk_size: u64) -> Self { - MemoryRangeTableIterator { - chunk_size, - data: table.data.clone(), +impl<'a, M> MemoryRangeTableIterator<'a, M> +where + M: GuestAddressSpace, +{ + /// Creates a new [`MemoryRangeTableIterator`]. + /// + /// The size of [`MemoryRangeTable`]s returned by the iterator is limited by `chunk_size`. + /// + /// If `skip_zero_pages == true`, the iterator checks whether a memory + /// page is filled with zeroes and omits all zero filled pages. + pub fn new( + table: &MemoryRangeTable, + chunk_size: u64, + page_size: u64, + skip_zero_pages: bool, + guest_memory: &'a M, + ) -> Self { + if skip_zero_pages { + MemoryRangeTableIterator { + chunk_size, + zero_page: vec![0; page_size as usize], + unprocessed_data: table.data.clone(), + skip_zero_pages, + processed_data: Vec::new(), + guest_memory, + } + } else { + MemoryRangeTableIterator { + chunk_size, + zero_page: Vec::new(), + unprocessed_data: Vec::new(), + skip_zero_pages, + processed_data: table.data.clone(), + guest_memory, + } + } + } + + /// Removes all-zero-pages from [`MemoryRangeTableIterator::data`] and populates + /// [`MemoryRangeTableIterator::zero_removed_data`] with the non-zero-pages. + /// + /// # Panics + /// + /// Panics if a memory range is not valid for [`MemoryRangeTableIterator::guest_memory`]. + fn fill_zero_removed_data(&mut self) -> bool { + /// Checks whether a memory region completely equal to the provided `comparison_memory`. + /// + /// # Panics: + /// + /// Panics if the `guest_memory_start` and the `comparison_memory.len()` are not valid for + /// `guest_memory`. + fn memory_is_equal( + guest_memory_start: u64, + comparison_memory: &[u8], + guest_memory: &M, + ) -> bool + where + M: GuestAddressSpace, + { + let page_size = comparison_memory.len(); + let mem = guest_memory.memory(); + let volatile_slice = mem + .get_slice(GuestAddress::new(guest_memory_start), page_size) + .unwrap(); + let slice_ptr = volatile_slice.ptr_guard(); + // Shadow `slice_ptr` so the guard cannot be dropped until the end of the scope. + let slice_ptr = slice_ptr.as_ptr().cast(); + let zero_page_ptr = comparison_memory.as_ptr().cast(); + + // Potential data races between the guest writing to memory and the check whether + // a page is all zero are handled by the page dirty logging. + // SAFETY: Both pointers point to valid memory of length `PAGE_SIZE` and + // neither are modified by `memcmp`. + // See: https://man7.org/linux/man-pages/man3/memcmp.3.html + let page_is_zero = unsafe { libc::memcmp(slice_ptr, zero_page_ptr, page_size) }; + page_is_zero == 0 + } + + if !self.skip_zero_pages { + return false; + } + + if let Some(memory_range) = self.unprocessed_data.pop() { + let page_size = self.zero_page.len(); + // Avoids a bunch of `as u64` in the code. + let page_size_u64 = page_size as u64; + + // As far as I can tell, `MemoryRange` should always start and end on page boundaries, + // but there are not type-level guarantees, so we handle page boundaries and overshoot + // to be safe. + + // Amount of bytes by which the gpa undershoots the page boundary. + let gpa_page_undershoot = { + // Amount of bytes by which the gpa overshoots the page boundary. + let offset = memory_range.gpa % page_size_u64; + if offset > 0 { + page_size_u64 - offset + } else { + 0 + } + }; + + // Amount of bytes by which the length overshoots the page boundary. + let length_page_overshoot = (memory_range.length - gpa_page_undershoot) % page_size_u64; + + let first_page_boundary = memory_range.gpa + gpa_page_undershoot; + let last_page_boundary = memory_range.gpa + memory_range.length - length_page_overshoot; + let page_amount = (last_page_boundary - first_page_boundary) / page_size_u64; + + // The gpa of the memory range currently being built. + let mut current_gpa = memory_range.gpa; + // The length of memory range currently being built. + // Initially set to the gpa page overshoot, which will be combined with the first + // page if it is non-zero or added to `zero_removed_data` if the next page is zero. + let mut current_length = 0; + + if gpa_page_undershoot != 0 + && !memory_is_equal( + current_gpa, + &self.zero_page[..gpa_page_undershoot as usize], + self.guest_memory, + ) + { + current_length += gpa_page_undershoot; + } + + for page_start in + (0..page_amount).map(|page_index| page_index * page_size_u64 + first_page_boundary) + { + // If the current page is zero, we push all previous non-zero pages to + // `zero_removed_data` and set `current_gpa` to the end of the zero page while + // resetting the length. + if memory_is_equal(page_start, self.zero_page.as_slice(), self.guest_memory) { + if current_length != 0 { + self.processed_data.push(MemoryRange { + gpa: current_gpa, + length: current_length, + }); + } + current_gpa += current_length + page_size_u64; + current_length = 0; + } else { + current_length += page_size_u64; + } + } + + if length_page_overshoot != 0 + && !memory_is_equal( + current_gpa, + &self.zero_page[..length_page_overshoot as usize], + self.guest_memory, + ) + { + current_length += length_page_overshoot; + } + + // If the current length is zero, the last page was a zero page. + if current_length != 0 { + self.processed_data.push(MemoryRange { + gpa: current_gpa, + length: current_length, + }); + } + + true + } else { + false } } } -impl Iterator for MemoryRangeTableIterator { +impl<'a, M> Iterator for MemoryRangeTableIterator<'a, M> +where + M: GuestAddressSpace, +{ type Item = MemoryRangeTable; - /// Return the next memory range in the table, making sure that - /// the returned range is not larger than `chunk_size`. - /// - /// **Note**: Do not rely on the order of the ranges returned by this - /// iterator. This allows for a more efficient implementation. fn next(&mut self) -> Option { let mut ranges: Vec = vec![]; let mut ranges_size: u64 = 0; @@ -312,11 +501,15 @@ impl Iterator for MemoryRangeTableIterator { loop { assert!(ranges_size <= self.chunk_size); - if ranges_size == self.chunk_size || self.data.is_empty() { + if self.processed_data.is_empty() && !self.fill_zero_removed_data() { + break; + } + + if ranges_size == self.chunk_size { break; } - if let Some(range) = self.data.pop() { + if let Some(range) = self.processed_data.pop() { let next_range: MemoryRange = if ranges_size + range.length > self.chunk_size { // How many bytes we need to put back into the table. let leftover_bytes = ranges_size + range.length - self.chunk_size; @@ -325,7 +518,7 @@ impl Iterator for MemoryRangeTableIterator { assert!(returned_bytes <= range.length); assert_eq!(leftover_bytes + returned_bytes, range.length); - self.data.push(MemoryRange { + self.processed_data.push(MemoryRange { gpa: range.gpa + returned_bytes, length: leftover_bytes, }); @@ -356,8 +549,17 @@ impl MemoryRangeTable { } /// Partitions the table into chunks of at most `chunk_size` bytes. - pub fn partition(&self, chunk_size: u64) -> impl Iterator { - MemoryRangeTableIterator::new(self, chunk_size) + pub fn partition( + &self, + chunk_size: u64, + page_size: u64, + skip_zero_pages: bool, + guest_memory: &M, + ) -> impl Iterator + where + M: GuestAddressSpace, + { + MemoryRangeTableIterator::new(self, chunk_size, page_size, skip_zero_pages, guest_memory) } /// Converts an iterator over a dirty bitmap into an iterator of dirty @@ -462,6 +664,9 @@ impl MemoryRangeTable { #[cfg(test)] mod unit_tests { + use vm_memory::bitmap::AtomicBitmap; + use vm_memory::{Address, GuestAddress, GuestMemoryAtomic, GuestMemoryMmap}; + use crate::protocol::{MemoryRange, MemoryRangeTable}; #[test] @@ -521,11 +726,18 @@ mod unit_tests { ]; assert_eq!(table.regions(), &expected_regions); + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress::new(range.gpa), range.length as usize)); + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + // In the first test, we expect to see the exact same result as above, as we use the length // of every region (which is fixed!). { let chunks = table - .partition(page_size * 2) + .partition(page_size * 2, page_size, false, &atomic_guest_memory_map) .map(|table| table.data) .collect::>(); @@ -548,10 +760,18 @@ mod unit_tests { ); } + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress(range.gpa), range.length as usize)); + + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + // Next, we have a more sophisticated test with a chunk size of 5 pages. { let chunks = table - .partition(page_size * 5) + .partition(page_size * 5, page_size, false, &atomic_guest_memory_map) .map(|table| table.data) .collect::>(); diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index c6139d60c6..0820d665a6 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1648,6 +1648,7 @@ impl SendAdditionalConnections { fn send_memory( &self, table: &MemoryRangeTable, + skip_zero_pages: bool, socket: &mut SocketStream, return_if_cancelled_cb: &impl Fn(&mut SocketStream) -> result::Result<(), MigratableError>, ) -> std::result::Result<(), MigratableError> { @@ -1658,7 +1659,12 @@ impl SendAdditionalConnections { // because we wait for a response after each chunk instead of sending // everything in one go. if thread_len == 0 { - for chunk in table.partition(Self::CHUNK_SIZE) { + for chunk in table.partition( + Self::CHUNK_SIZE, + PAGE_SIZE as u64, + skip_zero_pages, + &self.guest_memory, + ) { return_if_cancelled_cb(socket) .inspect_err(|_| info!("cancelling migration during memory iteration"))?; vm_send_memory(&self.guest_memory, socket, &chunk)?; @@ -1668,7 +1674,12 @@ impl SendAdditionalConnections { // The chunk size is chosen to be big enough so that even very fast // links need some milliseconds to send it. - 'next_chunk: for chunk in table.partition(Self::CHUNK_SIZE) { + 'next_chunk: for chunk in table.partition( + Self::CHUNK_SIZE, + PAGE_SIZE as u64, + skip_zero_pages, + &self.guest_memory, + ) { let mut chunk = SendMemoryThreadMessage::Memory(chunk); // The channel we put work into has a limited size. Thus it may happen that we have to // retry putting this chunk into it. @@ -2593,7 +2604,16 @@ impl Vmm { // Send the current dirty pages s.transmit_start_time = Instant::now(); - mem_send.send_memory(&iteration_table, socket, return_if_cancelled_cb)?; + // Only skip zero pages on first iteration. + // If we skip dirty logged zero pages, we don't send newly zeroed pages to the + // destination. + let skip_zero_pages = s.iteration == 0; + mem_send.send_memory( + &iteration_table, + skip_zero_pages, + socket, + return_if_cancelled_cb, + )?; s.transmit_duration = s.transmit_start_time.elapsed(); s.total_transferred_bytes += s.bytes_to_transmit; @@ -2684,7 +2704,7 @@ impl Vmm { // Send last batch of dirty pages let mut final_table = vm.dirty_log()?; final_table.extend(iteration_table.clone()); - mem_send.send_memory(&final_table, socket, return_if_cancelled_cb)?; + mem_send.send_memory(&final_table, false, socket, return_if_cancelled_cb)?; // Update statistics s.bytes_to_transmit = final_table.regions().iter().map(|range| range.length).sum(); From e6b28045cc08dda16bd8dd11f7b2cf5ea5e34b73 Mon Sep 17 00:00:00 2001 From: Julian Schindel Date: Wed, 18 Mar 2026 09:03:56 +0100 Subject: [PATCH 2/4] vm-migration: add unittests for zero page skipping On-behalf-of: SAP julian.schindel@sap.com Signed-off-by: Julian Schindel --- vm-migration/src/protocol.rs | 279 ++++++++++++++++++++++++++++++++++- 1 file changed, 278 insertions(+), 1 deletion(-) diff --git a/vm-migration/src/protocol.rs b/vm-migration/src/protocol.rs index d8da0b3543..f7bd1ac041 100644 --- a/vm-migration/src/protocol.rs +++ b/vm-migration/src/protocol.rs @@ -665,7 +665,7 @@ impl MemoryRangeTable { #[cfg(test)] mod unit_tests { use vm_memory::bitmap::AtomicBitmap; - use vm_memory::{Address, GuestAddress, GuestMemoryAtomic, GuestMemoryMmap}; + use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryAtomic, GuestMemoryMmap}; use crate::protocol::{MemoryRange, MemoryRangeTable}; @@ -813,4 +813,281 @@ mod unit_tests { ); } } + + #[test] + fn test_memory_range_table_iter_skip_zero_pages_all() { + let input = [0b11_0011_0011_0011]; + + let start_gpa = 0x1000; + let page_size = 0x1000; + + let table = MemoryRangeTable::from_dirty_bitmap(input, start_gpa, page_size); + let expected_regions = [ + MemoryRange { + gpa: start_gpa, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 4 * page_size, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 8 * page_size, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 12 * page_size, + length: page_size * 2, + }, + ]; + assert_eq!(table.regions(), &expected_regions); + + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress::new(range.gpa), range.length as usize)); + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + + let chunks = table + .partition(page_size * 2, page_size, true, &atomic_guest_memory_map) + .map(|table| table.data) + .collect::>(); + + assert!(chunks.is_empty()); + } + + #[test] + fn test_memory_range_table_iter_skip_zero_pages_some() { + let input = [0b11_0011_0011_0011]; + + let start_gpa = 0x1000; + let page_size = 0x1000; + + let table = MemoryRangeTable::from_dirty_bitmap(input, start_gpa, page_size); + let expected_regions = [ + MemoryRange { + gpa: start_gpa, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 4 * page_size, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 8 * page_size, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 12 * page_size, + length: page_size * 2, + }, + ]; + assert_eq!(table.regions(), &expected_regions); + + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress(range.gpa), range.length as usize)); + + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + + expected_regions.iter().step_by(2).for_each(|memory_range| { + let buffer = vec![1_u8; memory_range.length as usize]; + guest_memory_map + .read_volatile_from( + GuestAddress::new(memory_range.gpa), + &mut buffer.as_slice(), + memory_range.length as usize, + ) + .unwrap(); + }); + + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + + // Use a large chunk_size to only return one vector. + let chunks = table + .partition(page_size * 20, page_size, true, &atomic_guest_memory_map) + .map(|table| table.data) + .collect::>(); + + assert_eq!( + chunks, + &[vec![ + MemoryRange { + gpa: start_gpa + 8 * page_size, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa, + length: page_size * 2, + }, + ],] + ); + } + + #[test] + fn test_memory_range_table_iter_skip_zero_pages_within_range_table() { + let input = [0b0111]; + + let start_gpa = 0x1000; + let page_size = 0x1000; + + let table = MemoryRangeTable::from_dirty_bitmap(input, start_gpa, page_size); + let expected_regions = [MemoryRange { + gpa: start_gpa, + length: page_size * 3, + }]; + assert_eq!(table.regions(), &expected_regions); + + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress(range.gpa), range.length as usize)); + + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + + let buffer = vec![1_u8; page_size as usize]; + + guest_memory_map + .read_volatile_from( + GuestAddress::new(expected_regions[0].gpa), + &mut buffer.as_slice(), + page_size as usize, + ) + .unwrap(); + + guest_memory_map + .read_volatile_from( + GuestAddress::new(expected_regions[0].gpa + 2 * page_size), + &mut buffer.as_slice(), + page_size as usize, + ) + .unwrap(); + + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + + // Use a large chunk_size to only return one vector. + let chunks = table + .partition(page_size * 20, page_size, true, &atomic_guest_memory_map) + .map(|table| table.data) + .collect::>(); + + assert_eq!( + chunks, + &[vec![ + MemoryRange { + gpa: start_gpa + 2 * page_size, + length: page_size + }, + MemoryRange { + gpa: start_gpa, + length: page_size + } + ],] + ); + } + + #[test] + fn test_memory_range_table_iter_skip_zero_pages_non_page_boundaries_all_zero() { + let input = [0b11_0011_0000_1111]; + + let start_gpa = 0x1000; + let page_size = 0x1000; + + let mut table = MemoryRangeTable::from_dirty_bitmap(input, start_gpa, page_size); + table.data.iter_mut().for_each(|entry| { + entry.gpa += 10; + }); + let expected_regions = [ + MemoryRange { + gpa: start_gpa + 10, + length: page_size * 4, + }, + MemoryRange { + gpa: start_gpa + 8 * page_size + 10, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 12 * page_size + 10, + length: page_size * 2, + }, + ]; + assert_eq!(table.regions(), &expected_regions); + + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress(range.gpa), range.length as usize)); + + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + + // Use a large chunk_size to only return one vector. + let chunks = table + .partition(page_size * 20, page_size, true, &atomic_guest_memory_map) + .map(|table| table.data) + .collect::>(); + + assert!(chunks.is_empty()); + } + + #[test] + fn test_memory_range_table_iter_skip_zero_pages_non_page_boundaries_all_non_zero() { + let input = [0b11_0011_0000_1111]; + + let start_gpa = 0x1000; + let page_size = 0x1000; + + let mut table = MemoryRangeTable::from_dirty_bitmap(input, start_gpa, page_size); + table.data.iter_mut().for_each(|entry| { + entry.gpa += 10; + }); + let expected_regions = [ + MemoryRange { + gpa: start_gpa + 10, + length: page_size * 4, + }, + MemoryRange { + gpa: start_gpa + 8 * page_size + 10, + length: page_size * 2, + }, + MemoryRange { + gpa: start_gpa + 12 * page_size + 10, + length: page_size * 2, + }, + ]; + assert_eq!(table.regions(), &expected_regions); + + let ranges = expected_regions + .clone() + .map(|range| (GuestAddress(range.gpa), range.length as usize)); + + let guest_memory_map: GuestMemoryMmap = + GuestMemoryMmap::from_ranges(&ranges).unwrap(); + + expected_regions.iter().for_each(|memory_range| { + let buffer = vec![1_u8; memory_range.length as usize]; + + guest_memory_map + .read_volatile_from( + GuestAddress::new(memory_range.gpa), + &mut buffer.as_slice(), + memory_range.length as usize, + ) + .unwrap(); + }); + + let atomic_guest_memory_map = GuestMemoryAtomic::new(guest_memory_map); + + // Use a large chunk_size to only return one vector. + let chunks = table + .partition(page_size * 20, page_size, true, &atomic_guest_memory_map) + .map(|table| table.data) + .collect::>(); + + let mut expected_chunks = expected_regions.clone(); + expected_chunks.reverse(); + assert_eq!(chunks, &[expected_chunks]); + } } From 7f596a183bc7c61ed8c0ecd49e1fc17884673196 Mon Sep 17 00:00:00 2001 From: Julian Schindel Date: Wed, 18 Mar 2026 10:51:56 +0100 Subject: [PATCH 3/4] vm-migration: log total migration time On-behalf-of: SAP julian.schindel@sap.com Signed-off-by: Julian Schindel --- vmm/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 0820d665a6..8d2823205e 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2712,8 +2712,10 @@ impl Vmm { s.total_transferred_bytes += s.bytes_to_transmit; s.total_transferred_pages += s.pages_to_transmit; + let migration_duration = s.migration_start_time.elapsed(); info!( - "Memory Migration finished: iter={},throttle={}%,size={}MiB,dirtyrate={}pps,bandwidth={:.2}MiBs,downtime(expected)={}ms", + "Memory Migration finished: took={}ms,iter={},throttle={}%,size={}MiB,dirtyrate={}pps,bandwidth={:.2}MiBs,downtime(expected)={}ms", + migration_duration.as_millis(), (s.iteration_duration - s.transmit_duration).as_millis(), vm.throttle_percent(), s.bytes_to_transmit.div_ceil(1024).div_ceil(1024), From 46c65f2a4579cec400ffb8e992623fb1158bb2e3 Mon Sep 17 00:00:00 2001 From: Julian Schindel Date: Wed, 18 Mar 2026 10:52:15 +0100 Subject: [PATCH 4/4] ch-remote: add skip zero pages option to API/CLI On-behalf-of: SAP julian.schindel@sap.com Signed-off-by: Julian Schindel --- cloud-hypervisor/src/bin/ch-remote.rs | 14 ++++++++++++++ vmm/src/api/mod.rs | 2 ++ vmm/src/api/openapi/cloud-hypervisor.yaml | 2 ++ vmm/src/lib.rs | 4 +++- 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cloud-hypervisor/src/bin/ch-remote.rs b/cloud-hypervisor/src/bin/ch-remote.rs index 9e913320b0..43f3d36444 100644 --- a/cloud-hypervisor/src/bin/ch-remote.rs +++ b/cloud-hypervisor/src/bin/ch-remote.rs @@ -536,6 +536,11 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu .get_one::("tls-dir") .cloned(), wait_for_migration, + *matches + .subcommand_matches("send-migration") + .unwrap() + .get_one::("skip-zero-pages") + .unwrap(), ); simple_api_command(socket, "PUT", "send-migration", Some(&send_migration_data)) @@ -1032,6 +1037,7 @@ fn receive_migration_data(url: String, tls_dir: Option) -> String { serde_json::to_string(&receive_migration_data).unwrap() } +#[allow(clippy::too_many_arguments)] fn send_migration_data( url: String, local: bool, @@ -1040,6 +1046,7 @@ fn send_migration_data( connections: NonZeroU32, tls_dir: Option, keep_alive: bool, + skip_zero_pages: bool, ) -> String { let send_migration_data = vmm::api::VmSendMigrationData { destination_url: url, @@ -1049,6 +1056,7 @@ fn send_migration_data( connections, tls_dir, keep_alive, + skip_zero_pages, }; serde_json::to_string(&send_migration_data).unwrap() @@ -1275,6 +1283,12 @@ fn get_cli_commands_sorted() -> Box<[Command]> { .num_args(0) .action(ArgAction::SetTrue), ) + .arg( + Arg::new("skip-zero-pages") + .long("skip-zero-pages") + .help("skip zero-filled pages when sending VM memory to the receiver") + .num_args(0), + ) .arg( Arg::new("tls-dir") .long("tls-dir") diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 7436ec8b00..ad9936bd7b 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -310,6 +310,8 @@ pub struct VmSendMigrationData { pub tls_dir: Option, /// Keep the VMM alive. pub keep_alive: bool, + /// Skip zero-filled pages when sending VM memory to the receiver. + pub skip_zero_pages: bool, } // Default value for downtime the same as qemu. diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index 66e0f76ec8..40168439ff 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -1332,6 +1332,8 @@ components: format: int64 description: Total timeout for migration in milliseconds (0 = no limit) default: 0 + skip-zero-pages: + type: boolean VmAddUserDevice: required: diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 8d2823205e..fd356efbc7 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2471,6 +2471,7 @@ impl Vmm { migrate_downtime_limit: Duration, postponed_lifecycle_event: &Mutex>, return_if_cancelled_cb: &impl Fn(&mut SocketStream) -> result::Result<(), MigratableError>, + skip_zero_pages: bool, ) -> result::Result { let mut iteration_table; let total_memory_size_bytes = vm @@ -2607,7 +2608,7 @@ impl Vmm { // Only skip zero pages on first iteration. // If we skip dirty logged zero pages, we don't send newly zeroed pages to the // destination. - let skip_zero_pages = s.iteration == 0; + let skip_zero_pages = s.iteration == 0 && skip_zero_pages; mem_send.send_memory( &iteration_table, skip_zero_pages, @@ -2689,6 +2690,7 @@ impl Vmm { migrate_downtime_limit, postponed_lifecycle_event, return_if_cancelled_cb, + send_data_migration.skip_zero_pages, )?; info!("Entering downtime phase");