diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6644d4df..ee67a379 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,8 +37,11 @@ jobs: steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: go-task/setup-task@01a4adf9db2d14c1de7a560f09170b6e0df736aa # v2.1.0 + with: + version: 3.x - uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 - - run: make validate + - run: task validate # # Project checks @@ -58,6 +61,10 @@ jobs: with: go-version-file: 'src/github.com/containerd/nerdbox/.github/.tool-versions' + - uses: go-task/setup-task@01a4adf9db2d14c1de7a560f09170b6e0df736aa # v2.1.0 + with: + version: 3.x + - uses: containerd/project-checks@d7751f3c375b8fe4a84c02a068184ee4c1f59bc4 # v1.2.2 if: github.repository == 'containerd/nerdbox' with: @@ -65,8 +72,7 @@ jobs: repo-access-token: ${{ secrets.GITHUB_TOKEN }} - name: verify go modules and vendor directory - run: | - make verify-vendor + run: task verify-vendor working-directory: src/github.com/containerd/nerdbox # @@ -89,9 +95,12 @@ jobs: with: go-version-file: '.github/.tool-versions' + - uses: go-task/setup-task@01a4adf9db2d14c1de7a560f09170b6e0df736aa # v2.1.0 + with: + version: 3.x + - name: Run unit tests - shell: bash - run: make test-unit + run: task test:unit # # Protobuf checks @@ -126,9 +135,14 @@ jobs: echo "GOPATH=${{ github.workspace }}" >> $GITHUB_ENV echo "${{ github.workspace }}/bin" >> $GITHUB_PATH + - uses: go-task/setup-task@01a4adf9db2d14c1de7a560f09170b6e0df736aa # v2.1.0 + with: + version: 3.x + - run: script/install-proto-tools - - run: make proto-fmt - - run: make check-protos check-api-descriptors + - run: task proto-fmt + - run: task check-protos + - run: task check-api-descriptors # # Build kernels on cache miss @@ -259,10 +273,17 @@ jobs: with: go-version-file: '.github/.tool-versions' + - uses: go-task/setup-task@01a4adf9db2d14c1de7a560f09170b6e0df736aa # v2.1.0 + with: + version: 3.x + + - name: Install gotestsum + run: go install gotest.tools/gotestsum@v1.13.0 + - name: Verify user namespaces not restricted run: | go build -o _output/userns-check ./script/userns-check _output/userns-check - name: Run integration tests - run: go test -v ./integration/... + run: task test:integration diff --git a/.gitignore b/.gitignore index 481f6846..d34e77c9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ _output kernel/*.old +.task diff --git a/Makefile b/Makefile index 875574f4..304ec765 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,10 @@ GO ?= go DOCKER ?= docker BUILDX ?= $(DOCKER) buildx +ifeq (,$(shell command -v task 2>/dev/null)) +$(error 'task' is required to build nerdbox. Install from https://taskfile.dev) +endif + ROOTDIR=$(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))) WHALE = "🇩" @@ -58,15 +62,10 @@ API_PACKAGES=$(shell ($(GO) list ${GO_TAGS} ./... | grep /api/ )) all: build build: - @echo "$(WHALE) $@" - HOST_OS=$(shell uname -s | tr '[:upper:]' '[:lower:]') KERNEL_ARCH=$(ARCH) $(BUILDX) bake + @task build _output/containerd-shim-nerdbox-v1: cmd/containerd-shim-nerdbox-v1 FORCE - @echo "$(WHALE) $@" - $(GO) build ${DEBUG_GO_GCFLAGS} ${GO_GCFLAGS} ${GO_BUILD_FLAGS} -o $@ ${GO_LDFLAGS} ${GO_TAGS} ./$< -ifeq ($(OS),Darwin) - codesign --entitlements cmd/containerd-shim-nerdbox-v1/containerd-shim-nerdbox-v1.entitlements --force -s - $@ -endif + @task build:shim _output/containerd-shim-nerdbox-v1.exe: cmd/containerd-shim-nerdbox-v1 FORCE @echo "$(WHALE) $@" @@ -77,15 +76,10 @@ _output/vminitd: cmd/vminitd FORCE $(GO) build ${DEBUG_GO_GCFLAGS} ${GO_GCFLAGS} ${GO_BUILD_FLAGS} -o $@ ${GO_STATIC_LDFLAGS} ${GO_STATIC_TAGS} ./$< _output/nerdbox-initrd: cmd/vminitd FORCE - @echo "$(WHALE) $@" - $(BUILDX) bake initrd + @task build:initrd _output/integration.test: integration FORCE - @echo "$(WHALE) $@" - $(GO) test -c -o $@ ${GO_LDFLAGS} ${GO_TAGS} ./integration -ifeq ($(OS),Darwin) - codesign --entitlements cmd/containerd-shim-nerdbox-v1/containerd-shim-nerdbox-v1.entitlements --force -s - $@ -endif + @task build:integration _output/test_vminitd: cmd/test_vminitd FORCE @echo "$(WHALE) $@" @@ -106,32 +100,19 @@ _output/libkrun.so: FORCE generate: protos - @echo "$(WHALE) $@" - @PATH="${ROOTDIR}/bin:${PATH}" $(GO) generate -x ${PACKAGES} + @task generate protos: - @echo "$(WHALE) $@" - @(cd ${ROOTDIR}/api && PATH="${ROOTDIR}/bin:${PATH}" buf generate) - @(cd ${ROOTDIR}/api && buf build --exclude-imports -o next.txtpb) - go-fix-acronym -w -a '^Os' $(shell find api/ -name '*.pb.go') - go-fix-acronym -w -a '(Id|Io|Uuid|Os)$$' $(shell find api/ -name '*.pb.go') + @task protos check-protos: protos ## check if protobufs needs to be generated again - @echo "$(WHALE) $@" - @test -z "$$(git status --short | grep ".pb.go" | tee /dev/stderr)" || \ - ((git diff | cat) && \ - (echo "$(ONI) please run 'make protos' when making changes to proto files" && false)) + @task check-protos check-api-descriptors: protos ## check that protobuf changes aren't present. - @echo "$(WHALE) $@" - @test -z "$$(git status --short | grep ".txtpb" | tee /dev/stderr)" || \ - ((git diff $$(find . -name '*.txtpb') | cat) && \ - (echo "$(ONI) please run 'make protos' when making changes to proto files and check-in the generated descriptor file changes" && false)) + @task check-api-descriptors proto-fmt: ## check format of proto files - @echo "$(WHALE) $@" - @test -z "$$(find . -name '*.proto' -type f -exec grep -Hn -e "^ " {} \; | tee /dev/stderr)" || \ - (echo "$(ONI) please indent proto files with tabs only" && false) + @task proto-fmt menuconfig: ifeq ($(KERNEL_VERSION),) @@ -152,13 +133,13 @@ endif FORCE: validate: - @$(BUILDX) bake validate + @task validate lint: - @$(BUILDX) bake lint + @task lint clean: - rm -rf _output + @task clean shell: @echo "$(WHALE) $@" @@ -174,17 +155,10 @@ shell: nerdbox-dev verify-vendor: ## verify if all the go.mod/go.sum files are up-to-date - @echo "$(WHALE) $@" - $(eval TMPDIR := $(shell mktemp -d)) - @cp -R ${ROOTDIR} ${TMPDIR} - @(cd ${TMPDIR}/nerdbox && ${GO} mod tidy) - @(cd ${TMPDIR}/nerdbox && ${GO} mod verify) - diff -r -u ${ROOTDIR} ${TMPDIR}/nerdbox - @rm -rf ${TMPDIR} + @task verify-vendor test-unit: - go test -count=1 $(shell go list ./... | grep -v /integration) + @task test:unit test-integration: _output/integration.test - @echo "$(WHALE) $@" - gotestsum -f testname --raw-command ./integration/test.sh + @task test:integration diff --git a/Taskfile.yml b/Taskfile.yml new file mode 100644 index 00000000..b67a97bf --- /dev/null +++ b/Taskfile.yml @@ -0,0 +1,149 @@ +version: '3' + +vars: + OUTPUT_DIR: '_output' + GO_TAGS: '-tags "no_grpc"' + LDFLAGS: '-s -w' + GO_LDFLAGS: '-ldflags "{{.LDFLAGS}}"' + # Map Go's GOARCH to the kernel arch naming convention (amd64 → x86_64). + KERNEL_ARCH: + sh: 'a=$(go env GOARCH); case "$a" in amd64) echo x86_64;; *) echo "$a";; esac' + +tasks: + default: + desc: Build all outputs via Docker Buildx Bake + deps: [build] + + # + # Build tasks + # + build: + desc: Build all outputs via Docker Buildx Bake + cmds: + - HOST_OS={{OS}} KERNEL_ARCH={{.KERNEL_ARCH}} docker buildx bake + + build:shim: + desc: Build containerd-shim-nerdbox-v1 for the current platform + cmds: + - mkdir -p {{.OUTPUT_DIR}} + - go build {{.GO_TAGS}} {{.GO_LDFLAGS}} -o {{.OUTPUT_DIR}}/containerd-shim-nerdbox-v1{{if eq OS "windows"}}.exe{{end}} ./cmd/containerd-shim-nerdbox-v1 + - cmd: codesign --entitlements cmd/containerd-shim-nerdbox-v1/containerd-shim-nerdbox-v1.entitlements --force -s - {{.OUTPUT_DIR}}/containerd-shim-nerdbox-v1 + platforms: [darwin] + + build:guest: + desc: Build guest artifacts (kernel and initrd) via Docker Buildx Bake + cmds: + - KERNEL_ARCH={{.KERNEL_ARCH}} docker buildx bake kernel initrd + + build:initrd: + desc: Build the nerdbox initrd via Docker Buildx Bake + cmds: + - KERNEL_ARCH={{.KERNEL_ARCH}} docker buildx bake initrd + + build:integration: + desc: Build the integration test binary + cmds: + - mkdir -p {{.OUTPUT_DIR}} + - go test -c -o {{.OUTPUT_DIR}}/integration.test{{if eq OS "windows"}}.exe{{end}} {{.GO_LDFLAGS}} {{.GO_TAGS}} ./integration + - cmd: codesign --entitlements cmd/containerd-shim-nerdbox-v1/containerd-shim-nerdbox-v1.entitlements --force -s - {{.OUTPUT_DIR}}/integration.test + platforms: [darwin] + + # + # Test tasks + # + test:unit: + desc: Run unit tests (excludes integration package) + cmds: + - go test -count=1 ./api/... ./cmd/... ./internal/... ./pkg/... ./plugins/... + + test:integration: + desc: "Run integration tests (each test in its own process). Extra flags are forwarded to the test binary: task test:integration -- -run TestSystemInfo -v" + deps: [build:integration] + vars: + # When -v is in the extra flags, switch gotestsum to standard-verbose so + # t.Log() output is shown; otherwise testname format suppresses it. + GOTESTSUM_FORMAT: + sh: | + case " {{.CLI_ARGS}} " in *" -v "*|*" -v") echo standard-verbose ;; *) echo testname ;; esac + env: + TESTFLAGS: '{{.CLI_ARGS}}' + cmds: + - cmd: gotestsum -f {{.GOTESTSUM_FORMAT}} --raw-command bash integration/test.sh + platforms: [darwin, linux] + - cmd: gotestsum -f {{.GOTESTSUM_FORMAT}} --raw-command powershell -ExecutionPolicy Bypass -File integration/test.ps1 + platforms: [windows] + + # + # Code quality tasks + # + validate: + desc: Validate via Docker Buildx Bake + cmds: + - docker buildx bake validate + + lint: + desc: Run linters via Docker Buildx Bake + cmds: + - docker buildx bake lint + + protos: + desc: Regenerate protobuf bindings + dir: api + cmds: + - buf generate + - buf build --exclude-imports -o next.txtpb + - go-fix-acronym -w -a '^Os' $(find . -name '*.pb.go') + - go-fix-acronym -w -a '(Id|Io|Uuid|Os)$' $(find . -name '*.pb.go') + + generate: + desc: Regenerate all derived artifacts (protobuf) + deps: [protos] + + check-protos: + desc: Verify protobuf bindings are up to date + deps: [protos] + cmds: + - cmd: | + if [ -n "$(git status --short | grep ".pb.go")" ]; then + git diff | cat + echo "please run 'task protos' when making changes to proto files" + exit 1 + fi + + check-api-descriptors: + desc: Verify protobuf descriptor files are up to date + deps: [protos] + cmds: + - cmd: | + if [ -n "$(git status --short | grep ".txtpb")" ]; then + git diff $(find . -name '*.txtpb') | cat + echo "please run 'task protos' when making changes to proto files and check-in the generated descriptor file changes" + exit 1 + fi + + proto-fmt: + desc: Check proto files use tabs (not spaces) for indentation + cmds: + - cmd: | + if [ -n "$(find . -name '*.proto' -type f -exec grep -Hn -e "^ " {} \;)" ]; then + echo "please indent proto files with tabs only" + exit 1 + fi + + verify-vendor: + desc: Verify go.mod/go.sum and vendor directory are up to date + cmds: + - cmd: | + tmpdir=$(mktemp -d) + cp -R . "$tmpdir/nerdbox" + (cd "$tmpdir/nerdbox" && go mod tidy && go mod verify) + diff -r -u . "$tmpdir/nerdbox" || (rm -rf "$tmpdir" && exit 1) + rm -rf "$tmpdir" + + # + # Clean + # + clean: + desc: Remove all build outputs + cmds: + - rm -rf {{.OUTPUT_DIR}} diff --git a/integration/main_test.go b/integration/main_test.go index 637756d1..aba13158 100644 --- a/integration/main_test.go +++ b/integration/main_test.go @@ -18,15 +18,49 @@ package integration import ( "log" + "log/slog" "os" "path/filepath" "strings" + "sync" "testing" - "github.com/containerd/nerdbox/pkg/vm" + "github.com/containerd/log/logtest" + "github.com/containerd/nerdbox/internal/vm/libkrun" + "github.com/containerd/nerdbox/pkg/logging" + "github.com/containerd/nerdbox/pkg/vm" ) +// tLogWriter routes slog records to t.Log() so they are suppressed by the +// test framework unless the test fails or -v is used. +type tLogWriter struct { + mu sync.Mutex + t testing.TB +} + +func (w *tLogWriter) Write(p []byte) (int, error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.t == nil { + // Test has completed; silently discard to avoid "Log after test + // finished" panics from any background goroutine still draining + // the console FIFO. + return len(p), nil + } + w.t.Log(strings.TrimRight(string(p), "\n")) + return len(p), nil +} + +// disable clears the reference to t so that subsequent Write calls are +// discarded. Call this after vm.Shutdown() in t.Cleanup to ensure no +// background log goroutine can call t.Log() on a completed test. +func (w *tLogWriter) disable() { + w.mu.Lock() + defer w.mu.Unlock() + w.t = nil +} + func TestMain(m *testing.M) { e, err := os.Executable() if err != nil { @@ -67,6 +101,19 @@ func runWithVM(t *testing.T, runTest func(*testing.T, vm.Instance)) { func runWithVMOpts(t *testing.T, startOpts []vm.StartOpt, runTest func(*testing.T, vm.Instance)) { for _, tc := range vmBackends { t.Run(tc.name, func(t *testing.T) { + // Route all log output through t.Log() so it is suppressed + // unless the test fails or -v is used. + // + // logtest.WithT redirects logrus (log.G(ctx)) to t.Log(). + ctx := logtest.WithT(t.Context(), t) + // SetBaseHandler redirects slog (ForwardConsoleLogs: kernel + // kmsg + vminitd JSON) to t.Log() via a TextHandler. + lw := &tLogWriter{t: t} + logging.SetBaseHandler(slog.NewTextHandler( + lw, + &slog.HandlerOptions{Level: slog.LevelDebug}, + )) + td := t.TempDir() t.Chdir(td) // Use Getwd to resolve symlinks (e.g., /var -> /private/var on macOS) @@ -74,17 +121,21 @@ func runWithVMOpts(t *testing.T, startOpts []vm.StartOpt, runTest func(*testing. if err != nil { t.Fatal("Failed to get current working directory:", err) } - vm, err := tc.vmm.NewInstance(t.Context(), resolvedTd) + vm, err := tc.vmm.NewInstance(ctx, resolvedTd) if err != nil { t.Fatal("Failed to create VM instance:", err) } - if err := vm.Start(t.Context(), startOpts...); err != nil { + if err := vm.Start(ctx, startOpts...); err != nil { t.Fatal("Failed to start VM instance:", err) } t.Cleanup(func() { - vm.Shutdown(t.Context()) + vm.Shutdown(ctx) + // Disable the log writer after shutdown so any background + // goroutine still draining the console FIFO cannot call + // t.Log() on a completed test. + lw.disable() }) runTest(t, vm) diff --git a/integration/test.ps1 b/integration/test.ps1 new file mode 100644 index 00000000..f50f9966 --- /dev/null +++ b/integration/test.ps1 @@ -0,0 +1,75 @@ +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +$ErrorActionPreference = 'Stop' + +Set-Location -LiteralPath $PSScriptRoot + +# Build the test binary if it doesn't exist +$outputDir = Join-Path $PSScriptRoot "..\\_output" +$testBin = Join-Path $outputDir "integration.test.exe" +if (-not (Test-Path -LiteralPath $testBin)) { + New-Item -ItemType Directory -Force -Path $outputDir | Out-Null + & go test -c -o $testBin . + if ($LASTEXITCODE -ne 0) { exit 1 } +} + +# Parse TESTFLAGS forwarded from the task invocation. +# -run passed to -test.list for test discovery (uses Go's RE2) +# -v changes gotestsum output format (handled in Taskfile; skip here) +# anything else is normalised to -test. and forwarded to the binary +$runPattern = $null +$binaryFlags = @() +if ($env:TESTFLAGS) { + $tokens = ($env:TESTFLAGS -split '\s+') | Where-Object { $_ } + for ($i = 0; $i -lt $tokens.Count; $i++) { + switch -Regex ($tokens[$i]) { + '^-run$' { $runPattern = $tokens[++$i]; break } + '^-run=(.+)' { $runPattern = $Matches[1]; break } + '^-v$' { break } # handled by gotestsum format in Taskfile + '^-test\.' { $binaryFlags += $tokens[$i]; break } + '^-(.+)' { + $binaryFlags += "-test.$($Matches[1])" + # If the next token isn't a flag, treat it as the value. + if (($i + 1) -lt $tokens.Count -and $tokens[$i + 1] -notmatch '^-') { + $i++ + $binaryFlags += $tokens[$i] + } + break + } + } + } +} + +# Discover tests using -test.list so Go's own RE2 engine applies the -run +# pattern (avoids PowerShell/.NET regex vs RE2 discrepancies). +$listPattern = if ($runPattern) { $runPattern } else { '.*' } +$tests = & $testBin @('-test.list', $listPattern) 2>$null | Where-Object { $_ -match '^Test' } + +if (-not $tests) { + Write-Error "error: no tests found matching '$listPattern'" + exit 1 +} + +# Run each test individually. Each test gets its own process so that the WHP +# hypervisor partition is fully released between tests (Windows allows only one +# VM partition per process with the current libkrun build). +$failed = $false +foreach ($test in $tests) { + $testFlags = @("-test.parallel", "1", "-test.v", "-test.run", $test) + $binaryFlags + & go tool test2json -t -p "github.com/containerd/nerdbox/integration" $testBin @testFlags + if ($LASTEXITCODE -ne 0) { $failed = $true } +} + +if ($failed) { exit 1 } diff --git a/integration/test.sh b/integration/test.sh index 72e223de..3b1a2326 100755 --- a/integration/test.sh +++ b/integration/test.sh @@ -28,13 +28,56 @@ if [[ ! -f ../_output/integration.test ]]; then fi fi -# Run each test individually -tests=( - "TestSystemInfo" - "TestStreamInitialization" - "TestTransferEcho" -) +# Parse TESTFLAGS forwarded from the task invocation. +# -run passed to -test.list for test discovery (uses Go's RE2) +# -v changes gotestsum output format (handled in Taskfile; skip here) +# anything else is normalised to -test. and forwarded to the binary +run_pattern="" +binary_flags=() +if [ -n "${TESTFLAGS:-}" ]; then + read -ra tokens <<< "$TESTFLAGS" + i=0 + while [ $i -lt ${#tokens[@]} ]; do + tok="${tokens[$i]}" + case "$tok" in + -run) i=$((i+1)); run_pattern="${tokens[$i]}" ;; + -run=*) run_pattern="${tok#-run=}" ;; + -v) ;; # handled by gotestsum format in Taskfile + -test.*) binary_flags+=("$tok") ;; + -*) + binary_flags+=("-test.${tok#-}") + # If the next token isn't a flag, treat it as the value. + if [ $((i+1)) -lt ${#tokens[@]} ] && [[ "${tokens[$((i+1))]}" != -* ]]; then + i=$((i+1)) + binary_flags+=("${tokens[$i]}") + fi + ;; + esac + i=$((i+1)) + done +fi + +# Discover tests from the binary using -test.list so Go's own RE2 engine +# applies the -run pattern (avoids bash-regex vs RE2 discrepancies). +# Use a while-read loop instead of readarray for bash 3.2 compatibility +# (macOS ships bash 3.2 by default). +list_pattern="${run_pattern:-.*}" +tests=() +while IFS= read -r t; do + tests+=("$t") +done < <(../_output/integration.test -test.list "$list_pattern" 2>/dev/null | grep '^Test') + +if [ ${#tests[@]} -eq 0 ]; then + echo "error: no tests found matching '${list_pattern}'" >&2 + exit 1 +fi +# Run each test individually. Exit code accumulates failures so all tests +# run even when one fails; the final exit code signals gotestsum. +failed=0 for test in "${tests[@]}"; do - go tool test2json -t -p "github.com/containerd/nerdbox/integration" ../_output/integration.test -test.parallel 1 -test.v -test.run "$test" + go tool test2json -t -p "github.com/containerd/nerdbox/integration" \ + ../_output/integration.test -test.parallel 1 -test.v -test.run "$test" \ + "${binary_flags[@]}" || failed=1 done +exit $failed diff --git a/internal/vm/libkrun/dlfcn_windows.go b/internal/vm/libkrun/dlfcn_windows.go index 6b4fb39e..d055c88d 100644 --- a/internal/vm/libkrun/dlfcn_windows.go +++ b/internal/vm/libkrun/dlfcn_windows.go @@ -33,8 +33,16 @@ func dlOpen(path string) (uintptr, error) { return uintptr(h), nil } -func dlClose(handle uintptr) error { - return syscall.FreeLibrary(syscall.Handle(handle)) +func dlClose(_ uintptr) error { + // On Windows, krun_start_enter may still be executing inside the library + // when Shutdown is called — krun_free_ctx initiates the VM stop but does + // not synchronously join the calling goroutine. Calling FreeLibrary while + // a goroutine is still inside the DLL causes an access violation. + // + // Since nerdbox runs exactly one VM per process (the shim model), the + // library handle is released naturally when the process exits. There is + // no need to call FreeLibrary explicitly. + return nil } func registerLibFunc(fn interface{}, handle uintptr, name string) { diff --git a/internal/vm/libkrun/instance.go b/internal/vm/libkrun/instance.go index 0d731c4d..edb4edd1 100644 --- a/internal/vm/libkrun/instance.go +++ b/internal/vm/libkrun/instance.go @@ -164,8 +164,8 @@ type vmInstance struct { lib *libkrun handler uintptr - client *ttrpc.Client - shutdownCallbacks []func(context.Context) error + client *ttrpc.Client + conn net.Conn // underlying TTRPC connection; closed in Shutdown } func (v *vmInstance) AddFS(ctx context.Context, tag, mountPath string, opts ...vm.MountOpt) error { @@ -312,7 +312,7 @@ func (v *vmInstance) Start(ctx context.Context, opts ...vm.StartOpt) (err error) preVMStart := time.Now() // Start it - errC := make(chan error) + errC := make(chan error, 1) go func() { defer close(errC) if err := v.vmc.Start(); err != nil { @@ -320,20 +320,6 @@ func (v *vmInstance) Start(ctx context.Context, opts ...vm.StartOpt) (err error) } }() - v.shutdownCallbacks = []func(context.Context) error{ - func(context.Context) error { - cerr := v.vmc.Shutdown() - select { - case err := <-errC: - if err != nil { - return fmt.Errorf("failure running vm: %w", err) - } - default: - } - return cerr - }, - } - // Accept a single connection from vminitd connecting back via vsock. type acceptResult struct { conn net.Conn @@ -370,10 +356,7 @@ func (v *vmInstance) Start(ctx context.Context, opts ...vm.StartOpt) (err error) "t_total": time.Since(startedAt), }).Info("VM connection established") - v.shutdownCallbacks = append(v.shutdownCallbacks, func(context.Context) error { - return conn.Close() - }) - + v.conn = conn v.client = ttrpc.NewClient(conn) return nil @@ -437,21 +420,39 @@ func (v *vmInstance) Shutdown(ctx context.Context) error { if v.handler == 0 { return fmt.Errorf("libkrun already closed") } - // Stop the VM and wait for all threads (vCPU, virtio workers) to exit - // before unloading the library. krun_free_ctx is synchronous: it joins - // all threads and closes all file handles. Without this, dlClose rips - // the code out from under running threads and leaves file handles open, - // preventing containerd from cleaning up the bundle directory. + + // Close the TTRPC client so in-flight RPCs fail fast and its background + // goroutines are stopped before we tear down the connection underneath. + if v.client != nil { + v.client.Close() + v.client = nil + } + + // Close the underlying TTRPC net.Conn to vminitd. This must happen + // before krun_free_ctx to avoid leaving file handles open, which would + // prevent containerd from cleaning up the bundle directory. + if v.conn != nil { + if err := v.conn.Close(); err != nil { + log.G(ctx).WithError(err).Warn("failed to close TTRPC connection") + } + v.conn = nil + } + + // Stop the VM. krun_free_ctx joins all VM threads (vCPU, virtio workers) + // on most platforms. On Windows WHP it initiates the stop but may return + // before krun_start_enter unblocks; the goroutine is cleaned up on exit. if v.vmc != nil { if err := v.vmc.Shutdown(); err != nil { log.G(ctx).WithError(err).Warn("krun_free_ctx failed during shutdown") } } - err := dlClose(v.handler) - if err != nil { + + // On Unix, dlClose unloads the library after krun_free_ctx has joined all + // VM threads. On Windows it is a no-op (see dlfcn_windows.go). + if err := dlClose(v.handler); err != nil { return err } - v.handler = 0 // Mark as closed + v.handler = 0 return nil }