From 14d2a310bb1211ec903fca0e54158902ef8fd164 Mon Sep 17 00:00:00 2001 From: David Scott Date: Thu, 26 Feb 2026 13:29:06 +0000 Subject: [PATCH 1/6] vm/libkrun: call krun_free_ctx before dlClose in Shutdown Call krun_free_ctx (via vmc.Shutdown) before unloading the library with dlClose/FreeLibrary. krun_free_ctx is synchronous: it joins all vCPU and virtio worker threads and closes all file handles held by the VM. Previously, dlClose was called without stopping the VM first. On Unix this was mostly harmless (the process was exiting anyway), but on Windows FreeLibrary immediately unloads the DLL while threads are still running against it, and leaves file handles open. The open handles prevent containerd from renaming/deleting the bundle directory, which blocks container restart. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Derek McGowan --- internal/vm/libkrun/instance.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/internal/vm/libkrun/instance.go b/internal/vm/libkrun/instance.go index 18b6f83..69c3d2a 100644 --- a/internal/vm/libkrun/instance.go +++ b/internal/vm/libkrun/instance.go @@ -433,6 +433,16 @@ func (v *vmInstance) Shutdown(ctx context.Context) error { if v.handler == 0 { return fmt.Errorf("libkrun already closed") } + // Stop the VM and wait for all threads (vCPU, virtio workers) to exit + // before unloading the library. krun_free_ctx is synchronous: it joins + // all threads and closes all file handles. Without this, dlClose rips + // the code out from under running threads and leaves file handles open, + // preventing containerd from cleaning up the bundle directory. + if v.vmc != nil { + if err := v.vmc.Shutdown(); err != nil { + log.G(ctx).WithError(err).Warn("krun_free_ctx failed during shutdown") + } + } err := dlClose(v.handler) if err != nil { return err From e694569134d0e213f2f0bd0337b4617ca907a2df Mon Sep 17 00:00:00 2001 From: David Scott Date: Thu, 26 Feb 2026 13:29:30 +0000 Subject: [PATCH 2/6] shim/task: remove rootfs directory during shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the bundle's rootfs directory during graceful shim shutdown, before containerd's bundle cleanup runs. On Windows, containerd's UnmountAll calls bindfilter.RemoveFileBinding on the rootfs directory. This fails with ERROR_ACCESS_DENIED because nerdbox never sets up a bind filter mount — it passes the rootfs into the VM via virtio block devices. The rootfs directory in the bundle is just an empty mount point that containerd created. Removing it before cleanup makes UnmountAll a no-op, allowing the bundle to be deleted and the container to restart. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Derek McGowan --- internal/shim/task/service.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/shim/task/service.go b/internal/shim/task/service.go index ed887ed..09f901a 100644 --- a/internal/shim/task/service.go +++ b/internal/shim/task/service.go @@ -167,6 +167,15 @@ func (s *service) shutdown(ctx context.Context) error { } } + // Remove the rootfs directory so containerd's bundle cleanup doesn't + // attempt a bind filter unmount. On Windows, Unmount calls + // bindfilter.RemoveFileBinding which fails with ERROR_ACCESS_DENIED + // on directories that were never bind filter mounts (like our VM-based + // rootfs). Removing the directory makes UnmountAll a no-op. + if err := os.RemoveAll("rootfs"); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove rootfs directory during shutdown") + } + // Signal last event and stop forwarding s.events <- nil From 6c8e6781d7364d9776785b111756372cb39ff476 Mon Sep 17 00:00:00 2001 From: David Scott Date: Thu, 26 Feb 2026 13:29:56 +0000 Subject: [PATCH 3/6] shim/manager: fix Stop() for Windows delete subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix three issues in the Windows manager Stop() method, which runs in the delete subcommand after a shim disconnects: 1. Use absolute bundle path for file operations. On Windows, containerd deliberately does not set the delete subcommand's CWD to the bundle directory (to avoid holding an implicit directory handle that would block cleanup). The previous code used relative paths like "shim.pid", which resolved against the wrong directory. Extract the bundle path from the context (-bundle flag) and use it explicitly. 2. Wait for the shim process to exit before returning. After reading the shim PID, wait for the process to fully terminate (up to 10s) so that all file handles — from the VM, krun DLL, virtio devices, and the shim's own CWD — are released. Without this, containerd's bundle rename races with handle cleanup and fails with ERROR_SHARING_VIOLATION. 3. Remove the rootfs directory before returning, same as commit "shim/task: remove rootfs directory during shutdown" does for the graceful path. This handles the case where the shim was killed before its shutdown handler ran (e.g. SIGKILL after stop timeout), or where the shim's cleanup races with containerd's under load. Also handle the case where shim.pid does not exist: the shim already exited and cleaned up. Return a synthetic exit status instead of failing, and still remove the rootfs directory. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Derek McGowan --- pkg/shim/manager/manager_windows.go | 33 ++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pkg/shim/manager/manager_windows.go b/pkg/shim/manager/manager_windows.go index a75e8f8..776da90 100644 --- a/pkg/shim/manager/manager_windows.go +++ b/pkg/shim/manager/manager_windows.go @@ -157,9 +157,38 @@ func (manager) Start(ctx context.Context, bparams *bootapi.BootstrapParams) (_ * }, nil } +// bundlePath extracts the bundle path from the context. The shim framework +// stores it as shim.Opts{BundlePath: ...} via the -bundle flag. +func bundlePath(ctx context.Context) string { + if o, ok := ctx.Value(shim.OptsKey{}).(shim.Opts); ok { + return o.BundlePath + } + return "" +} + +// removeRootfs removes the rootfs directory from the bundle so that +// containerd's bundle cleanup doesn't attempt a bind filter unmount. +// On Windows, Unmount calls bindfilter.RemoveFileBinding which fails with +// ERROR_ACCESS_DENIED on directories that were never bind filter mounts +// (nerdbox uses VM-based virtio block devices instead). Removing the +// directory makes UnmountAll a no-op. +func removeRootfs(ctx context.Context) { + if bp := bundlePath(ctx); bp != "" { + os.RemoveAll(filepath.Join(bp, "rootfs")) + } +} + func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) { - p, err := os.ReadFile("shim.pid") + p, err := os.ReadFile(filepath.Join(bundlePath(ctx), "shim.pid")) if err != nil { + if os.IsNotExist(err) { + // The shim already exited and cleaned up its pid file. + removeRootfs(ctx) + return shim.StopStatus{ + ExitedAt: time.Now(), + ExitStatus: 128 + 9, + }, nil + } return shim.StopStatus{}, err } pid, err := strconv.Atoi(strings.TrimSpace(string(p))) @@ -203,6 +232,8 @@ func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) { return shim.StopStatus{}, fmt.Errorf("wait for shim process: %w", err) } + removeRootfs(ctx) + return shim.StopStatus{ ExitedAt: time.Now(), ExitStatus: 128 + 9, From 4f7d9281a5f1c5b60a31c8ba5984f7398becb0bb Mon Sep 17 00:00:00 2001 From: Craig Gumbley Date: Thu, 26 Feb 2026 12:01:52 +0000 Subject: [PATCH 4/6] Force LF line endings for Dockerfile Prevents CRLF issues with heredoc shell scripts on Windows. Signed-off-by: Derek McGowan --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..16de697 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +Dockerfile text eol=lf From 2e2c8875da1bd5340586d33dd20dfc6d730168f7 Mon Sep 17 00:00:00 2001 From: Craig Gumbley Date: Thu, 26 Feb 2026 12:02:02 +0000 Subject: [PATCH 5/6] Update runtime to find arch-suffixed initrd and libkrun Falls back to non-suffixed libkrun names for system-installed libraries. Signed-off-by: Derek McGowan --- internal/vm/libkrun/instance.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/vm/libkrun/instance.go b/internal/vm/libkrun/instance.go index 69c3d2a..8b36a71 100644 --- a/internal/vm/libkrun/instance.go +++ b/internal/vm/libkrun/instance.go @@ -72,10 +72,11 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e if runtime.GOOS != "windows" && len(p2) == 0 { p2 = []string{"/usr/local/lib", "/usr/local/lib64", "/usr/lib", "/lib"} } - sharedNames := []string{"libkrun.so"} + arch := kernelArch() + sharedNames := []string{fmt.Sprintf("libkrun-%s.so", arch), "libkrun.so"} switch runtime.GOOS { case "darwin": - sharedNames = []string{"libkrun.dylib", "libkrun-efi.dylib"} + sharedNames = []string{fmt.Sprintf("libkrun-%s.dylib", arch), "libkrun.dylib", fmt.Sprintf("libkrun-efi-%s.dylib", arch), "libkrun-efi.dylib"} p2 = append(p2, "/opt/homebrew/lib") case "windows": sharedNames = []string{"krun.dll"} @@ -103,7 +104,7 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e } } if initrdPath == "" { - path = filepath.Join(dir, "nerdbox-initrd") + path = filepath.Join(dir, fmt.Sprintf("nerdbox-initrd-%s", arch)) if _, err := os.Stat(path); err == nil { initrdPath = path } @@ -116,7 +117,7 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e return nil, fmt.Errorf("nerdbox-kernel not found in PATH or LIBKRUN_PATH") } if initrdPath == "" { - return nil, fmt.Errorf("nerdbox-initrd not found in PATH or LIBKRUN_PATH") + return nil, fmt.Errorf("nerdbox-initrd-%s not found in PATH or LIBKRUN_PATH", arch) } lib, handler, err := openLibkrun(krunPath) From 5b8afb29dbde5f93c712a637ea968366e85f3bc8 Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Thu, 14 May 2026 23:57:58 -0700 Subject: [PATCH 6/6] Cleanup changes for Windows - vm/libkrun: fall back to 'nerdbox-initrd' when arch-suffixed file not found, fixing integration tests on CIs that still ship the old name; update the not-found error to mention both candidates - shim/task: move rootfs removal into platform-specific files so it only runs on Windows (bind-filter workaround), not on Linux/macOS where removing through a live mountpoint can corrupt data - shim/manager: write shim.pid via bundle path in Start() so it is consistent with Stop() which already reads it from the bundle path Signed-off-by: Derek McGowan --- internal/shim/task/service.go | 11 +++----- internal/shim/task/service_other.go | 25 ++++++++++++++++++ internal/shim/task/service_windows.go | 38 +++++++++++++++++++++++++++ internal/vm/libkrun/instance.go | 11 +++++--- pkg/shim/manager/manager_windows.go | 2 +- 5 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 internal/shim/task/service_other.go create mode 100644 internal/shim/task/service_windows.go diff --git a/internal/shim/task/service.go b/internal/shim/task/service.go index 09f901a..1e63ea4 100644 --- a/internal/shim/task/service.go +++ b/internal/shim/task/service.go @@ -167,14 +167,9 @@ func (s *service) shutdown(ctx context.Context) error { } } - // Remove the rootfs directory so containerd's bundle cleanup doesn't - // attempt a bind filter unmount. On Windows, Unmount calls - // bindfilter.RemoveFileBinding which fails with ERROR_ACCESS_DENIED - // on directories that were never bind filter mounts (like our VM-based - // rootfs). Removing the directory makes UnmountAll a no-op. - if err := os.RemoveAll("rootfs"); err != nil { - log.G(ctx).WithError(err).Warn("failed to remove rootfs directory during shutdown") - } + // Remove the rootfs directory on Windows so containerd's bundle cleanup + // doesn't attempt a bind filter unmount (no-op on other platforms). + removeRootfsDir(ctx) // Signal last event and stop forwarding s.events <- nil diff --git a/internal/shim/task/service_other.go b/internal/shim/task/service_other.go new file mode 100644 index 0000000..6b40f4d --- /dev/null +++ b/internal/shim/task/service_other.go @@ -0,0 +1,25 @@ +//go:build !windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package task + +import "context" + +// removeRootfsDir is a no-op on non-Windows platforms. The bind-filter +// unmount issue that necessitates rootfs removal only affects Windows. +func removeRootfsDir(_ context.Context) {} diff --git a/internal/shim/task/service_windows.go b/internal/shim/task/service_windows.go new file mode 100644 index 0000000..1ccce87 --- /dev/null +++ b/internal/shim/task/service_windows.go @@ -0,0 +1,38 @@ +//go:build windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package task + +import ( + "context" + "os" + + "github.com/containerd/log" +) + +// removeRootfsDir removes the rootfs directory from the bundle so that +// containerd's bundle cleanup doesn't attempt a bind filter unmount. +// On Windows, Unmount calls bindfilter.RemoveFileBinding which fails with +// ERROR_ACCESS_DENIED on directories that were never bind filter mounts +// (nerdbox uses VM-based virtio block devices instead). Removing the +// directory makes UnmountAll a no-op. +func removeRootfsDir(ctx context.Context) { + if err := os.RemoveAll("rootfs"); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove rootfs directory during shutdown") + } +} diff --git a/internal/vm/libkrun/instance.go b/internal/vm/libkrun/instance.go index 8b36a71..0d731c4 100644 --- a/internal/vm/libkrun/instance.go +++ b/internal/vm/libkrun/instance.go @@ -104,9 +104,12 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e } } if initrdPath == "" { - path = filepath.Join(dir, fmt.Sprintf("nerdbox-initrd-%s", arch)) - if _, err := os.Stat(path); err == nil { - initrdPath = path + for _, name := range []string{fmt.Sprintf("nerdbox-initrd-%s", arch), "nerdbox-initrd"} { + path = filepath.Join(dir, name) + if _, err := os.Stat(path); err == nil { + initrdPath = path + break + } } } } @@ -117,7 +120,7 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e return nil, fmt.Errorf("nerdbox-kernel not found in PATH or LIBKRUN_PATH") } if initrdPath == "" { - return nil, fmt.Errorf("nerdbox-initrd-%s not found in PATH or LIBKRUN_PATH", arch) + return nil, fmt.Errorf("nerdbox-initrd-%s or nerdbox-initrd not found in PATH or LIBKRUN_PATH", arch) } lib, handler, err := openLibkrun(krunPath) diff --git a/pkg/shim/manager/manager_windows.go b/pkg/shim/manager/manager_windows.go index 776da90..34a3ffe 100644 --- a/pkg/shim/manager/manager_windows.go +++ b/pkg/shim/manager/manager_windows.go @@ -124,7 +124,7 @@ func (manager) Start(ctx context.Context, bparams *bootapi.BootstrapParams) (_ * // make sure to wait after start go cmd.Wait() - if err = shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { + if err = shim.WritePidFile(filepath.Join(bundlePath(ctx), "shim.pid"), cmd.Process.Pid); err != nil { return nil, err }