diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..16de697 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +Dockerfile text eol=lf diff --git a/internal/shim/task/service.go b/internal/shim/task/service.go index ed887ed..1e63ea4 100644 --- a/internal/shim/task/service.go +++ b/internal/shim/task/service.go @@ -167,6 +167,10 @@ func (s *service) shutdown(ctx context.Context) error { } } + // Remove the rootfs directory on Windows so containerd's bundle cleanup + // doesn't attempt a bind filter unmount (no-op on other platforms). + removeRootfsDir(ctx) + // Signal last event and stop forwarding s.events <- nil diff --git a/internal/shim/task/service_other.go b/internal/shim/task/service_other.go new file mode 100644 index 0000000..6b40f4d --- /dev/null +++ b/internal/shim/task/service_other.go @@ -0,0 +1,25 @@ +//go:build !windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package task + +import "context" + +// removeRootfsDir is a no-op on non-Windows platforms. The bind-filter +// unmount issue that necessitates rootfs removal only affects Windows. +func removeRootfsDir(_ context.Context) {} diff --git a/internal/shim/task/service_windows.go b/internal/shim/task/service_windows.go new file mode 100644 index 0000000..1ccce87 --- /dev/null +++ b/internal/shim/task/service_windows.go @@ -0,0 +1,38 @@ +//go:build windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package task + +import ( + "context" + "os" + + "github.com/containerd/log" +) + +// removeRootfsDir removes the rootfs directory from the bundle so that +// containerd's bundle cleanup doesn't attempt a bind filter unmount. +// On Windows, Unmount calls bindfilter.RemoveFileBinding which fails with +// ERROR_ACCESS_DENIED on directories that were never bind filter mounts +// (nerdbox uses VM-based virtio block devices instead). Removing the +// directory makes UnmountAll a no-op. +func removeRootfsDir(ctx context.Context) { + if err := os.RemoveAll("rootfs"); err != nil { + log.G(ctx).WithError(err).Warn("failed to remove rootfs directory during shutdown") + } +} diff --git a/internal/vm/libkrun/instance.go b/internal/vm/libkrun/instance.go index 18b6f83..0d731c4 100644 --- a/internal/vm/libkrun/instance.go +++ b/internal/vm/libkrun/instance.go @@ -72,10 +72,11 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e if runtime.GOOS != "windows" && len(p2) == 0 { p2 = []string{"/usr/local/lib", "/usr/local/lib64", "/usr/lib", "/lib"} } - sharedNames := []string{"libkrun.so"} + arch := kernelArch() + sharedNames := []string{fmt.Sprintf("libkrun-%s.so", arch), "libkrun.so"} switch runtime.GOOS { case "darwin": - sharedNames = []string{"libkrun.dylib", "libkrun-efi.dylib"} + sharedNames = []string{fmt.Sprintf("libkrun-%s.dylib", arch), "libkrun.dylib", fmt.Sprintf("libkrun-efi-%s.dylib", arch), "libkrun-efi.dylib"} p2 = append(p2, "/opt/homebrew/lib") case "windows": sharedNames = []string{"krun.dll"} @@ -103,9 +104,12 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e } } if initrdPath == "" { - path = filepath.Join(dir, "nerdbox-initrd") - if _, err := os.Stat(path); err == nil { - initrdPath = path + for _, name := range []string{fmt.Sprintf("nerdbox-initrd-%s", arch), "nerdbox-initrd"} { + path = filepath.Join(dir, name) + if _, err := os.Stat(path); err == nil { + initrdPath = path + break + } } } } @@ -116,7 +120,7 @@ func (*vmManager) NewInstance(ctx context.Context, state string) (vm.Instance, e return nil, fmt.Errorf("nerdbox-kernel not found in PATH or LIBKRUN_PATH") } if initrdPath == "" { - return nil, fmt.Errorf("nerdbox-initrd not found in PATH or LIBKRUN_PATH") + return nil, fmt.Errorf("nerdbox-initrd-%s or nerdbox-initrd not found in PATH or LIBKRUN_PATH", arch) } lib, handler, err := openLibkrun(krunPath) @@ -433,6 +437,16 @@ func (v *vmInstance) Shutdown(ctx context.Context) error { if v.handler == 0 { return fmt.Errorf("libkrun already closed") } + // Stop the VM and wait for all threads (vCPU, virtio workers) to exit + // before unloading the library. krun_free_ctx is synchronous: it joins + // all threads and closes all file handles. Without this, dlClose rips + // the code out from under running threads and leaves file handles open, + // preventing containerd from cleaning up the bundle directory. + if v.vmc != nil { + if err := v.vmc.Shutdown(); err != nil { + log.G(ctx).WithError(err).Warn("krun_free_ctx failed during shutdown") + } + } err := dlClose(v.handler) if err != nil { return err diff --git a/pkg/shim/manager/manager_windows.go b/pkg/shim/manager/manager_windows.go index a75e8f8..34a3ffe 100644 --- a/pkg/shim/manager/manager_windows.go +++ b/pkg/shim/manager/manager_windows.go @@ -124,7 +124,7 @@ func (manager) Start(ctx context.Context, bparams *bootapi.BootstrapParams) (_ * // make sure to wait after start go cmd.Wait() - if err = shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { + if err = shim.WritePidFile(filepath.Join(bundlePath(ctx), "shim.pid"), cmd.Process.Pid); err != nil { return nil, err } @@ -157,9 +157,38 @@ func (manager) Start(ctx context.Context, bparams *bootapi.BootstrapParams) (_ * }, nil } +// bundlePath extracts the bundle path from the context. The shim framework +// stores it as shim.Opts{BundlePath: ...} via the -bundle flag. +func bundlePath(ctx context.Context) string { + if o, ok := ctx.Value(shim.OptsKey{}).(shim.Opts); ok { + return o.BundlePath + } + return "" +} + +// removeRootfs removes the rootfs directory from the bundle so that +// containerd's bundle cleanup doesn't attempt a bind filter unmount. +// On Windows, Unmount calls bindfilter.RemoveFileBinding which fails with +// ERROR_ACCESS_DENIED on directories that were never bind filter mounts +// (nerdbox uses VM-based virtio block devices instead). Removing the +// directory makes UnmountAll a no-op. +func removeRootfs(ctx context.Context) { + if bp := bundlePath(ctx); bp != "" { + os.RemoveAll(filepath.Join(bp, "rootfs")) + } +} + func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) { - p, err := os.ReadFile("shim.pid") + p, err := os.ReadFile(filepath.Join(bundlePath(ctx), "shim.pid")) if err != nil { + if os.IsNotExist(err) { + // The shim already exited and cleaned up its pid file. + removeRootfs(ctx) + return shim.StopStatus{ + ExitedAt: time.Now(), + ExitStatus: 128 + 9, + }, nil + } return shim.StopStatus{}, err } pid, err := strconv.Atoi(strings.TrimSpace(string(p))) @@ -203,6 +232,8 @@ func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) { return shim.StopStatus{}, fmt.Errorf("wait for shim process: %w", err) } + removeRootfs(ctx) + return shim.StopStatus{ ExitedAt: time.Now(), ExitStatus: 128 + 9,