From 9516d1b9c193bf7bda29df405563fc26d907a90a Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Mon, 6 Apr 2026 04:40:54 +0300 Subject: [PATCH 01/10] Add registry error diagnostics for d8 mirror pull/push - Classify 11 types of registry errors (EOF, TLS, auth, DNS, timeout, network, rate limit, server errors, image/repo not found, OCI artifacts) and show user-friendly diagnostics with possible causes and solutions - Detect errors via errors.Is/errors.As through go-containerregistry's error chain including multierrs from HTTPS/HTTP fallback - Output colored diagnostics to stderr only when TTY is detected, respecting NO_COLOR and FORCE_COLOR environment variables - Integration tests verify classification against real HTTP responses from httptest servers (no Docker required) Signed-off-by: Roman Berezkin --- cmd/d8/root.go | 9 +- internal/mirror/cmd/pull/pull.go | 4 + internal/mirror/cmd/push/push.go | 13 +- internal/mirror/pusher/pusher.go | 6 +- pkg/libmirror/util/errorutil/errors.go | 60 --- pkg/libmirror/util/registryerr/doc.go | 75 +++ pkg/libmirror/util/registryerr/errors.go | 428 ++++++++++++++++++ .../registryerr/errors_integration_test.go | 241 ++++++++++ pkg/libmirror/util/registryerr/errors_test.go | 364 +++++++++++++++ pkg/libmirror/util/registryerr/format.go | 122 +++++ pkg/libmirror/validation/registry_access.go | 4 +- 11 files changed, 1259 insertions(+), 67 deletions(-) delete mode 100644 pkg/libmirror/util/errorutil/errors.go create mode 100644 pkg/libmirror/util/registryerr/doc.go create mode 100644 pkg/libmirror/util/registryerr/errors.go create mode 100644 pkg/libmirror/util/registryerr/errors_integration_test.go create mode 100644 pkg/libmirror/util/registryerr/errors_test.go create mode 100644 pkg/libmirror/util/registryerr/format.go diff --git a/cmd/d8/root.go b/cmd/d8/root.go index ca955b79..0c68538a 100644 --- a/cmd/d8/root.go +++ b/cmd/d8/root.go @@ -48,6 +48,7 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/tools" useroperation "github.com/deckhouse/deckhouse-cli/internal/useroperation/cmd" "github.com/deckhouse/deckhouse-cli/internal/version" + "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" ) type RootCommand struct { @@ -174,7 +175,13 @@ func (r *RootCommand) Execute() error { func execute() { rootCmd := NewRootCommand() if err := rootCmd.Execute(); err != nil { - fmt.Fprintf(os.Stderr, "Error executing command: %v\n", err) + var diag *registryerr.Diagnostic + // If registry error - show formatted diagnostic with causes and solutions if possible + if errors.As(err, &diag) { + fmt.Fprint(os.Stderr, diag.Format()) + } else { + fmt.Fprintf(os.Stderr, "Error executing command: %v\n", err) + } os.Exit(1) } } diff --git a/internal/mirror/cmd/pull/pull.go b/internal/mirror/cmd/pull/pull.go index 8866c970..3d567ddb 100644 --- a/internal/mirror/cmd/pull/pull.go +++ b/internal/mirror/cmd/pull/pull.go @@ -46,6 +46,7 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" + "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" registryservice "github.com/deckhouse/deckhouse-cli/pkg/registry/service" @@ -119,6 +120,9 @@ func pull(cmd *cobra.Command, _ []string) error { puller.logger.WarnLn("Operation cancelled by user") return nil } + if diag := registryerr.Classify(err); diag != nil { + return diag + } return fmt.Errorf("pull failed: %w", err) } diff --git a/internal/mirror/cmd/push/push.go b/internal/mirror/cmd/push/push.go index a6190233..71c9a482 100644 --- a/internal/mirror/cmd/push/push.go +++ b/internal/mirror/cmd/push/push.go @@ -38,6 +38,7 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" + "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" ) @@ -178,10 +179,20 @@ func (p *Pusher) Execute() error { } if err := p.validateRegistryAccess(); err != nil { + if diag := registryerr.Classify(err); diag != nil { + return diag + } + return err + } + + if err := p.executeNewPush(); err != nil { + if diag := registryerr.Classify(err); diag != nil { + return diag + } return err } - return p.executeNewPush() + return nil } // executeNewPush runs the push using the push service. diff --git a/internal/mirror/pusher/pusher.go b/internal/mirror/pusher/pusher.go index 49c41062..e359aeb1 100644 --- a/internal/mirror/pusher/pusher.go +++ b/internal/mirror/pusher/pusher.go @@ -30,8 +30,8 @@ import ( dkplog "github.com/deckhouse/deckhouse/pkg/log" "github.com/deckhouse/deckhouse-cli/internal/mirror/chunked" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/errorutil" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" + "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/retry" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/retry/task" client "github.com/deckhouse/deckhouse-cli/pkg/registry" @@ -106,8 +106,8 @@ func (s *Service) PushLayout(ctx context.Context, layoutPath layout.Path, client fmt.Sprintf("[%d / %d] Pushing %s", i+1, len(indexManifest.Manifests), imageReferenceString), task.WithConstantRetries(pushRetryAttempts, pushRetryDelay, func(ctx context.Context) error { if err := client.PushImage(ctx, tag, img); err != nil { - if errorutil.IsTrivyMediaTypeNotAllowedError(err) { - return fmt.Errorf(errorutil.CustomTrivyMediaTypesWarning) + if registryerr.IsTrivyMediaTypeNotAllowed(err) { + return fmt.Errorf(registryerr.TrivyMediaTypesWarning) } return fmt.Errorf("write %s:%s to registry: %w", client.GetRegistry(), tag, err) } diff --git a/pkg/libmirror/util/errorutil/errors.go b/pkg/libmirror/util/errorutil/errors.go deleted file mode 100644 index 1ab00ff7..00000000 --- a/pkg/libmirror/util/errorutil/errors.go +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright 2024 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package errorutil - -import "strings" - -const CustomTrivyMediaTypesWarning = `` + - "It looks like you are using Project Quay registry and it is not configured correctly for hosting Deckhouse.\n" + - "See the docs at https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry for more details.\n\n" + - "TL;DR: You should retry push after allowing some additional types of OCI artifacts in your config.yaml as follows:\n" + - `FEATURE_GENERAL_OCI_SUPPORT: true -ALLOWED_OCI_ARTIFACT_TYPES: - "application/octet-stream": - - "application/deckhouse.io.bdu.layer.v1.tar+gzip" - - "application/vnd.cncf.openpolicyagent.layer.v1.tar+gzip" - "application/vnd.aquasec.trivy.config.v1+json": - - "application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip" - - "application/vnd.aquasec.trivy.db.layer.v1.tar+gzip"` - -func IsImageNotFoundError(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "MANIFEST_UNKNOWN") || strings.Contains(errMsg, "404 Not Found") -} - -func IsRepoNotFoundError(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "NAME_UNKNOWN") -} - -func IsTrivyMediaTypeNotAllowedError(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "MANIFEST_INVALID") && - (strings.Contains(errMsg, "vnd.aquasec.trivy") || strings.Contains(errMsg, "application/octet-stream")) -} diff --git a/pkg/libmirror/util/registryerr/doc.go b/pkg/libmirror/util/registryerr/doc.go new file mode 100644 index 00000000..6192eae6 --- /dev/null +++ b/pkg/libmirror/util/registryerr/doc.go @@ -0,0 +1,75 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package registryerr classifies container registry errors and provides +// user-friendly diagnostics for d8 mirror operations. +// +// When users encounter registry errors during "d8 mirror pull/push", raw Go +// errors like "EOF" or "x509: certificate signed by unknown authority" are +// often not actionable. This package analyzes such errors, determines the +// root cause category, and produces a formatted diagnostic with possible +// causes and concrete solutions. +// +// # Usage +// +// if diag := registryerr.Classify(err); diag != nil { +// return diag // implements error interface +// } +// +// The returned [Diagnostic] implements the error interface with two representations: +// - [Diagnostic.Error] returns plain text suitable for logging +// - [Diagnostic.Format] returns colored terminal output (when stderr is a TTY) +// +// # Output example +// +// [Diagnostic.Format] produces output like this: +// +// error: TLS/certificate verification failed +// ╰─▶ Get "https://registry.example.com/v2/": x509: certificate signed by unknown authority +// +// Possible causes: +// * Self-signed certificate without proper trust chain +// * Certificate expired or not yet valid +// * Corporate proxy or middleware intercepting HTTPS connections +// +// How to fix: +// * Use --tls-skip-verify flag to skip TLS verification (not recommended for production) +// * Add the registry's CA certificate to your system trust store +// +// # Classification +// +// [Classify] uses Go's [errors.Is] and [errors.As] to detect error types from +// the standard library (crypto/x509, net, io) and go-containerregistry +// (transport.Error). Detection order matters - more specific checks run first: +// +// 1. EOF - io.EOF, io.ErrUnexpectedEOF (proxy/middleware termination) +// 2. TLS/Certificate - x509.UnknownAuthorityError, HostnameError, etc. +// 3. Authentication - transport.Error with HTTP 401/403 +// 4. Rate limiting - transport.Error with HTTP 429 +// 5. Server errors - transport.Error with HTTP 500/502/503/504 +// 6. DNS - net.DNSError +// 7. Timeout - context.DeadlineExceeded +// 8. Network - net.OpError, syscall.ECONNREFUSED, etc. +// 9. Image not found - error message contains "MANIFEST_UNKNOWN" +// 10. Repo not found - error message contains "NAME_UNKNOWN" +// 11. Unsupported OCI - Trivy media type rejection (Project Quay) +// +// DNS is checked before Network because [net.DNSError] satisfies [net.Error]. +// Timeout is checked before Network for the same reason. +// +// Error type checkers ([IsImageNotFound], [IsRepoNotFound], [IsTrivyMediaTypeNotAllowed]) +// are used for flow control in mirror operations (e.g., skipping optional images). +package registryerr diff --git a/pkg/libmirror/util/registryerr/errors.go b/pkg/libmirror/util/registryerr/errors.go new file mode 100644 index 00000000..5e97b082 --- /dev/null +++ b/pkg/libmirror/util/registryerr/errors.go @@ -0,0 +1,428 @@ +/* +Copyright 2024 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package registryerr + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "strings" + "syscall" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" +) + +const TrivyMediaTypesWarning = `` + + "It looks like you are using Project Quay registry and it is not configured correctly for hosting Deckhouse.\n" + + "See the docs at https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry for more details.\n\n" + + "TL;DR: You should retry push after allowing some additional types of OCI artifacts in your config.yaml as follows:\n" + + `FEATURE_GENERAL_OCI_SUPPORT: true +ALLOWED_OCI_ARTIFACT_TYPES: + "application/octet-stream": + - "application/deckhouse.io.bdu.layer.v1.tar+gzip" + - "application/vnd.cncf.openpolicyagent.layer.v1.tar+gzip" + "application/vnd.aquasec.trivy.config.v1+json": + - "application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip" + - "application/vnd.aquasec.trivy.db.layer.v1.tar+gzip"` + +func IsImageNotFound(err error) bool { + if err == nil { + return false + } + + errMsg := err.Error() + return strings.Contains(errMsg, "MANIFEST_UNKNOWN") || strings.Contains(errMsg, "404 Not Found") +} + +func IsRepoNotFound(err error) bool { + if err == nil { + return false + } + + errMsg := err.Error() + return strings.Contains(errMsg, "NAME_UNKNOWN") +} + +func IsTrivyMediaTypeNotAllowed(err error) bool { + if err == nil { + return false + } + + errMsg := err.Error() + return strings.Contains(errMsg, "MANIFEST_INVALID") && + (strings.Contains(errMsg, "vnd.aquasec.trivy") || strings.Contains(errMsg, "application/octet-stream")) +} + +// Error category names displayed to the user after "error: ". +// Some categories are extended with dynamic details (hostname, port, HTTP code) +// at classification time via fmt.Sprintf. +const ( + CategoryEOF = "Connection terminated unexpectedly (EOF)" + CategoryTLS = "TLS/certificate verification failed" + CategoryAuth = "Authentication failed" + CategoryAuth401 = "Authentication failed (HTTP 401 Unauthorized)" + CategoryAuth403 = "Access denied (HTTP 403 Forbidden)" + CategoryRateLimit = "Rate limited by registry (HTTP 429 Too Many Requests)" + CategoryServerError = "Registry server error" + CategoryDNS = "DNS resolution failed" + CategoryTimeout = "Operation timed out" + CategoryNetwork = "Network connection failed" + CategoryImageNotFound = "Image not found in registry" + CategoryRepoNotFound = "Repository not found in registry" + CategoryUnsupportedOCI = "Unsupported OCI artifact type" +) + +// Classify analyzes an error and returns a Diagnostic if +// the error can be classified into a known category, or nil otherwise. +// Detection order matters: more specific checks come before general ones. +func Classify(err error) *Diagnostic { + if err == nil { + return nil + } + + switch { + case isEOFError(err): + return &Diagnostic{ + Category: CategoryEOF, + OriginalErr: err, + Causes: []string{ + "Corporate proxy or middleware intercepting and terminating HTTPS connections", + "Registry server closed the connection unexpectedly", + "Network device (firewall, load balancer) dropping packets", + }, + Solutions: []string{ + "Check if a corporate proxy is intercepting HTTPS traffic", + "If using a proxy, ensure it is configured to pass through registry traffic", + "Use --tls-skip-verify flag if a proxy is replacing TLS certificates", + "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + }, + } + + case isCertificateError(err): + return &Diagnostic{ + Category: CategoryTLS, + OriginalErr: err, + Causes: []string{ + "Self-signed certificate without proper trust chain", + "Certificate expired or not yet valid", + "Hostname mismatch between certificate and registry URL", + "Corporate proxy or middleware intercepting HTTPS connections", + }, + Solutions: []string{ + "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", + "Add the registry's CA certificate to your system trust store", + "Verify the registry URL hostname matches the certificate", + "Verify system clock is correct (expired certificates can be caused by wrong time)", + }, + } + + case isAuthenticationError(err): + var transportErr *transport.Error + category := CategoryAuth + if errors.As(err, &transportErr) { + switch transportErr.StatusCode { + case http.StatusUnauthorized: + category = CategoryAuth401 + case http.StatusForbidden: + category = CategoryAuth403 + } + } + + return &Diagnostic{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Invalid or expired credentials", + "License key or registry credentials are incorrect or not provided", + "Insufficient permissions for the requested operation", + }, + Solutions: []string{ + "For pull: verify your license key and pass it with --license flag", + "For push: verify --registry-login and --registry-password are correct", + "Contact registry administrator to verify access rights", + }, + } + + case isRateLimitError(err): + return &Diagnostic{ + Category: CategoryRateLimit, + OriginalErr: err, + Causes: []string{ + "Too many requests to the registry in a short time", + "Registry-side rate limiting policy", + }, + Solutions: []string{ + "Wait a few minutes and retry the operation", + "Contact registry administrator to increase rate limits", + }, + } + + case isServerError(err): + var transportErr *transport.Error + category := CategoryServerError + if errors.As(err, &transportErr) { + category = fmt.Sprintf("%s (HTTP %d)", CategoryServerError, transportErr.StatusCode) + } + + return &Diagnostic{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Registry server is experiencing internal errors", + "Backend storage is temporarily unavailable", + "Registry is overloaded or being maintained", + }, + Solutions: []string{ + "Wait a few minutes and retry the operation", + "Check registry server status and health", + "Contact registry administrator if the problem persists", + }, + } + + case isDNSError(err): + var dnsErr *net.DNSError + category := CategoryDNS + if errors.As(err, &dnsErr) && dnsErr.Name != "" { + category = fmt.Sprintf("%s for '%s'", CategoryDNS, dnsErr.Name) + } + + return &Diagnostic{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Registry hostname cannot be resolved by DNS", + "DNS server is unreachable or not responding", + "Incorrect registry URL or typo in hostname", + }, + Solutions: []string{ + "Verify the registry URL is spelled correctly", + "Check your DNS server configuration", + "Try using the registry's IP address instead of hostname", + }, + } + + case isTimeoutError(err): + return &Diagnostic{ + Category: CategoryTimeout, + OriginalErr: err, + Causes: []string{ + "Registry server took too long to respond", + "Network latency is too high", + "Firewall silently dropping packets (no RST, no ICMP)", + }, + Solutions: []string{ + "Check network connectivity to the registry", + "Try increasing the timeout with --timeout flag", + "Verify firewall rules allow outbound HTTPS (port 443)", + }, + } + + case isNetworkError(err): + var opErr *net.OpError + category := CategoryNetwork + if errors.As(err, &opErr) && opErr.Addr != nil { + category = fmt.Sprintf("%s to %s", CategoryNetwork, opErr.Addr.String()) + } + + return &Diagnostic{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Network connectivity issues or no internet connection", + "Firewall or security group blocking the connection", + "Registry server is down or unreachable", + }, + Solutions: []string{ + "Check your network connection and internet access", + "Verify firewall rules allow outbound HTTPS (port 443)", + "Test connectivity with: curl -v https://", + }, + } + + case IsImageNotFound(err): + return &Diagnostic{ + Category: CategoryImageNotFound, + OriginalErr: err, + Causes: []string{ + "Image tag doesn't exist in the registry", + "Incorrect image name or tag specified", + }, + Solutions: []string{ + "Verify the image name and tag are correct", + "Check if you have permission to access this image", + }, + } + + case IsRepoNotFound(err): + return &Diagnostic{ + Category: CategoryRepoNotFound, + OriginalErr: err, + Causes: []string{ + "Repository doesn't exist in the registry", + "Incorrect repository path or name", + }, + Solutions: []string{ + "Verify the repository path is correct", + "Ensure you have permission to access this repository", + }, + } + + case IsTrivyMediaTypeNotAllowed(err): + return &Diagnostic{ + Category: CategoryUnsupportedOCI, + OriginalErr: err, + Causes: []string{ + "Registry doesn't support required media types for Trivy security databases", + "Project Quay registry not configured for Deckhouse artifacts", + }, + Solutions: []string{ + "Configure registry to allow custom OCI artifact types", + "See: https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry", + }, + } + } + + return nil +} + +func isEOFError(err error) bool { + return errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) +} + +func isCertificateError(err error) bool { + var ( + unknownAuthErr x509.UnknownAuthorityError + certInvalidErr x509.CertificateInvalidError + hostnameErr x509.HostnameError + systemRootsErr x509.SystemRootsError + constraintErr x509.ConstraintViolationError + insecureAlgErr x509.InsecureAlgorithmError + ) + + return errors.As(err, &unknownAuthErr) || + errors.As(err, &certInvalidErr) || + errors.As(err, &hostnameErr) || + errors.As(err, &systemRootsErr) || + errors.As(err, &constraintErr) || + errors.As(err, &insecureAlgErr) +} + +func isAuthenticationError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusUnauthorized || transportErr.StatusCode == http.StatusForbidden { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnauthorizedErrorCode || diag.Code == transport.DeniedErrorCode { + return true + } + } + + return false +} + +func isRateLimitError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusTooManyRequests { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.TooManyRequestsErrorCode { + return true + } + } + + return false +} + +func isServerError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + switch transportErr.StatusCode { + case http.StatusInternalServerError, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout: + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnavailableErrorCode { + return true + } + } + + return false +} + +func isDNSError(err error) bool { + var dnsErr *net.DNSError + return errors.As(err, &dnsErr) +} + +func isTimeoutError(err error) bool { + return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, os.ErrDeadlineExceeded) +} + +func isNetworkError(err error) bool { + // DNS and timeout are checked before this, so we skip them here + if isDNSError(err) || isTimeoutError(err) { + return false + } + + var ( + netErr net.Error + opErr *net.OpError + syscallErr syscall.Errno + ) + + if errors.As(err, &opErr) { + return true + } + + if errors.As(err, &netErr) { + return true + } + + if errors.As(err, &syscallErr) { + return syscallErr == syscall.ECONNREFUSED || + syscallErr == syscall.ECONNRESET || + syscallErr == syscall.ETIMEDOUT || + syscallErr == syscall.ENETUNREACH || + syscallErr == syscall.EHOSTUNREACH + } + + return false +} diff --git a/pkg/libmirror/util/registryerr/errors_integration_test.go b/pkg/libmirror/util/registryerr/errors_integration_test.go new file mode 100644 index 00000000..8c453246 --- /dev/null +++ b/pkg/libmirror/util/registryerr/errors_integration_test.go @@ -0,0 +1,241 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package registryerr + +import ( + "context" + "encoding/json" + "fmt" + "net" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// headImage performs remote.Head against the given host with insecure (plain HTTP) mode. +func headImage(ctx context.Context, host string) error { + ref, err := name.ParseReference(host+"/test:latest", name.Insecure) + if err != nil { + return fmt.Errorf("parse reference: %w", err) + } + _, err = remote.Head(ref, remote.WithContext(ctx)) + return err +} + +// newRegistryErrorHandler returns an http.Handler that responds with +// a Docker V2 API error in JSON format. +func newRegistryErrorHandler(statusCode int, code, message string) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + _ = json.NewEncoder(w).Encode(struct { + Errors []struct { + Code string `json:"code"` + Message string `json:"message"` + } `json:"errors"` + }{ + Errors: []struct { + Code string `json:"code"` + Message string `json:"message"` + }{{code, message}}, + }) + }) +} + +// classifyFromServer starts an httptest server with the given handler, +// makes a real remote.Head call, and returns the Classify result. +func classifyFromServer(t *testing.T, handler http.Handler) *Diagnostic { + t.Helper() + server := httptest.NewServer(handler) + defer server.Close() + + err := headImage(context.Background(), strings.TrimPrefix(server.URL, "http://")) + require.Error(t, err) + return Classify(err) +} + +// --- HTTP Status Code Tests --- +// Verify that real go-containerregistry errors from HTTP responses +// are correctly classified using the Category constants. + +func TestIntegration_Auth401(t *testing.T) { + diag := classifyFromServer(t, newRegistryErrorHandler( + http.StatusUnauthorized, "UNAUTHORIZED", "authentication required", + )) + require.NotNil(t, diag) + assert.Equal(t, CategoryAuth401, diag.Category) +} + +func TestIntegration_Auth403(t *testing.T) { + diag := classifyFromServer(t, newRegistryErrorHandler( + http.StatusForbidden, "DENIED", "access denied", + )) + require.NotNil(t, diag) + assert.Equal(t, CategoryAuth403, diag.Category) +} + +func TestIntegration_RateLimit429(t *testing.T) { + diag := classifyFromServer(t, newRegistryErrorHandler( + http.StatusTooManyRequests, "TOOMANYREQUESTS", "rate limit exceeded", + )) + require.NotNil(t, diag) + assert.Equal(t, CategoryRateLimit, diag.Category) +} + +func TestIntegration_ServerErrors(t *testing.T) { + tests := []struct { + name string + statusCode int + errCode string + }{ + {"500", http.StatusInternalServerError, "UNKNOWN"}, + {"502", http.StatusBadGateway, ""}, + {"503", http.StatusServiceUnavailable, "UNAVAILABLE"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + if tt.errCode != "" { + newRegistryErrorHandler(tt.statusCode, tt.errCode, "server error").ServeHTTP(w, nil) + } else { + w.WriteHeader(tt.statusCode) + } + }) + + diag := classifyFromServer(t, handler) + require.NotNil(t, diag) + // Category is dynamic: "Registry server error (HTTP 500)" + assert.Contains(t, diag.Category, CategoryServerError) + assert.Contains(t, diag.Category, tt.name) + }) + } +} + +// --- TLS Error Test --- + +func TestIntegration_TLSCertificateError(t *testing.T) { + // httptest.NewTLSServer uses a self-signed cert not in the system trust store. + // Connecting without TLS skip produces an x509 certificate error. + server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + host := strings.TrimPrefix(server.URL, "https://") + ref, err := name.ParseReference(host+"/test:latest", name.StrictValidation) + require.NoError(t, err) + + _, err = remote.Head(ref) + require.Error(t, err) + + diag := Classify(err) + require.NotNil(t, diag, "expected TLS error to be classified, got raw: %v", err) + assert.Equal(t, CategoryTLS, diag.Category) +} + +// --- EOF Error Test --- + +func TestIntegration_EOF(t *testing.T) { + // Server accepts the TCP connection then closes it immediately, + // producing an EOF on the client side. + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + hijacker, ok := w.(http.Hijacker) + if !ok { + t.Fatal("server does not support hijacking") + } + conn, _, err := hijacker.Hijack() + if err != nil { + t.Fatalf("hijack failed: %v", err) + } + conn.Close() + })) + defer server.Close() + + err := headImage(context.Background(), strings.TrimPrefix(server.URL, "http://")) + require.Error(t, err) + + diag := Classify(err) + require.NotNil(t, diag, "expected EOF error to be classified, got raw: %v", err) + assert.Equal(t, CategoryEOF, diag.Category) +} + +// --- Connection Refused Test --- + +func TestIntegration_ConnectionRefused(t *testing.T) { + // Bind a port, then close the listener so nothing accepts connections. + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := listener.Addr().String() + listener.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + err = headImage(ctx, addr) + require.Error(t, err) + + diag := Classify(err) + require.NotNil(t, diag, "expected network error to be classified, got raw: %v", err) + // Category is dynamic: "Network connection failed to 127.0.0.1:XXXXX" + assert.Contains(t, diag.Category, CategoryNetwork) +} + +// --- Timeout Test --- + +func TestIntegration_Timeout(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + select { + case <-time.After(10 * time.Second): + case <-r.Context().Done(): + } + })) + defer server.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + + err := headImage(ctx, strings.TrimPrefix(server.URL, "http://")) + require.Error(t, err) + + diag := Classify(err) + require.NotNil(t, diag, "expected timeout error to be classified, got raw: %v", err) + assert.Equal(t, CategoryTimeout, diag.Category) +} + +// --- DNS Error Test --- + +func TestIntegration_DNSResolutionFailure(t *testing.T) { + // .invalid TLD is reserved by RFC 2606 and guaranteed to never resolve. + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := headImage(ctx, "nonexistent.invalid:443") + require.Error(t, err) + + diag := Classify(err) + require.NotNil(t, diag, "expected DNS error to be classified, got raw: %v", err) + // Category is dynamic: "DNS resolution failed for 'nonexistent.invalid'" + assert.Contains(t, diag.Category, CategoryDNS) +} diff --git a/pkg/libmirror/util/registryerr/errors_test.go b/pkg/libmirror/util/registryerr/errors_test.go new file mode 100644 index 00000000..1a9aeb0b --- /dev/null +++ b/pkg/libmirror/util/registryerr/errors_test.go @@ -0,0 +1,364 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package registryerr + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "strings" + "syscall" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestClassify_Nil(t *testing.T) { + assert.Nil(t, Classify(nil)) +} + +func TestClassify_Unclassified(t *testing.T) { + assert.Nil(t, Classify(errors.New("some random error"))) +} + +func TestClassify_EOF(t *testing.T) { + diag := Classify(io.EOF) + require.NotNil(t, diag) + assert.Equal(t, CategoryEOF, diag.Category) +} + +func TestClassify_UnexpectedEOF(t *testing.T) { + diag := Classify(io.ErrUnexpectedEOF) + require.NotNil(t, diag) + assert.Equal(t, CategoryEOF, diag.Category) +} + +func TestClassify_WrappedEOF(t *testing.T) { + err := fmt.Errorf("pull from registry: %w", fmt.Errorf("get manifest: %w", io.EOF)) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryEOF, diag.Category) +} + +func TestClassify_TLS_UnknownAuthority(t *testing.T) { + err := fmt.Errorf("registry: %w", x509.UnknownAuthorityError{}) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryTLS, diag.Category) +} + +func TestClassify_TLS_CertificateInvalid(t *testing.T) { + err := fmt.Errorf("registry: %w", x509.CertificateInvalidError{}) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryTLS, diag.Category) +} + +func TestClassify_TLS_Hostname(t *testing.T) { + err := fmt.Errorf("registry: %w", x509.HostnameError{Host: "example.com"}) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryTLS, diag.Category) +} + +func TestClassify_Auth_401(t *testing.T) { + err := &transport.Error{StatusCode: http.StatusUnauthorized} + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryAuth401, diag.Category) +} + +func TestClassify_Auth_403(t *testing.T) { + err := &transport.Error{StatusCode: http.StatusForbidden} + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryAuth403, diag.Category) +} + +func TestClassify_Auth_DiagnosticCode(t *testing.T) { + err := &transport.Error{ + StatusCode: http.StatusOK, + Errors: []transport.Diagnostic{ + {Code: transport.UnauthorizedErrorCode}, + }, + } + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryAuth, diag.Category) +} + +func TestClassify_RateLimit_429(t *testing.T) { + err := &transport.Error{StatusCode: http.StatusTooManyRequests} + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryRateLimit, diag.Category) +} + +func TestClassify_RateLimit_DiagnosticCode(t *testing.T) { + err := &transport.Error{ + StatusCode: http.StatusOK, + Errors: []transport.Diagnostic{ + {Code: transport.TooManyRequestsErrorCode}, + }, + } + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryRateLimit, diag.Category) +} + +func TestClassify_ServerErrors(t *testing.T) { + tests := []struct { + name string + statusCode int + }{ + {"500", http.StatusInternalServerError}, + {"502", http.StatusBadGateway}, + {"503", http.StatusServiceUnavailable}, + {"504", http.StatusGatewayTimeout}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diag := Classify(&transport.Error{StatusCode: tt.statusCode}) + require.NotNil(t, diag) + // Category is dynamic: "Registry server error (HTTP 500)" + assert.Contains(t, diag.Category, CategoryServerError) + assert.Contains(t, diag.Category, tt.name) + }) + } +} + +func TestClassify_ServerError_Unavailable(t *testing.T) { + err := &transport.Error{ + StatusCode: http.StatusOK, + Errors: []transport.Diagnostic{ + {Code: transport.UnavailableErrorCode}, + }, + } + diag := Classify(err) + require.NotNil(t, diag) + assert.Contains(t, diag.Category, CategoryServerError) +} + +func TestClassify_DNS(t *testing.T) { + err := &net.DNSError{Name: "registry.example.com", Err: "no such host"} + diag := Classify(err) + require.NotNil(t, diag) + // Category is dynamic: "DNS resolution failed for 'registry.example.com'" + assert.Contains(t, diag.Category, CategoryDNS) + assert.Contains(t, diag.Category, "registry.example.com") +} + +func TestClassify_Timeout_Context(t *testing.T) { + err := fmt.Errorf("validate access: %w", context.DeadlineExceeded) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryTimeout, diag.Category) +} + +func TestClassify_Timeout_OS(t *testing.T) { + err := fmt.Errorf("validate access: %w", os.ErrDeadlineExceeded) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryTimeout, diag.Category) +} + +func TestClassify_Network_ConnRefused(t *testing.T) { + err := &net.OpError{ + Op: "dial", + Net: "tcp", + Addr: &net.TCPAddr{ + IP: net.IPv4(127, 0, 0, 1), + Port: 443, + }, + Err: &os.SyscallError{ + Syscall: "connect", + Err: syscall.ECONNREFUSED, + }, + } + diag := Classify(err) + require.NotNil(t, diag) + // Category is dynamic: "Network connection failed to 127.0.0.1:443" + assert.Contains(t, diag.Category, CategoryNetwork) + assert.Contains(t, diag.Category, "127.0.0.1:443") +} + +func TestClassify_Network_ConnReset(t *testing.T) { + err := &net.OpError{ + Op: "read", + Net: "tcp", + Err: &os.SyscallError{ + Syscall: "read", + Err: syscall.ECONNRESET, + }, + } + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryNetwork, diag.Category) +} + +func TestClassify_ImageNotFound(t *testing.T) { + err := errors.New("MANIFEST_UNKNOWN: manifest unknown") + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryImageNotFound, diag.Category) +} + +func TestClassify_RepoNotFound(t *testing.T) { + err := errors.New("NAME_UNKNOWN: repository not found") + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryRepoNotFound, diag.Category) +} + +func TestClassify_TrivyMediaType(t *testing.T) { + err := errors.New("MANIFEST_INVALID: media type vnd.aquasec.trivy not allowed") + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryUnsupportedOCI, diag.Category) +} + +func TestClassify_DeepWrapping(t *testing.T) { + inner := x509.UnknownAuthorityError{} + err := fmt.Errorf("l1: %w", fmt.Errorf("l2: %w", fmt.Errorf("l3: %w", fmt.Errorf("l4: %w", fmt.Errorf("l5: %w", inner))))) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryTLS, diag.Category) +} + +func TestDiagnostic_Unwrap(t *testing.T) { + originalErr := io.EOF + diag := Classify(fmt.Errorf("wrap: %w", originalErr)) + require.NotNil(t, diag) + assert.True(t, errors.Is(diag, originalErr)) +} + +func TestDiagnostic_Format_NoColor(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &Diagnostic{ + Category: CategoryNetwork, + OriginalErr: errors.New("test"), + Causes: []string{"cause1"}, + Solutions: []string{"fix1"}, + } + + output := diag.Format() + assert.NotContains(t, output, "\033[") + assert.Contains(t, output, CategoryNetwork) + assert.Contains(t, output, "cause1") + assert.Contains(t, output, "fix1") +} + +func TestDiagnostic_Format_Structure(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &Diagnostic{ + Category: CategoryNetwork, + OriginalErr: errors.New("connection refused"), + Causes: []string{"Network down", "Firewall blocking"}, + Solutions: []string{"Check network", "Check firewall"}, + } + + output := diag.Format() + assert.Contains(t, output, "error: "+CategoryNetwork) + assert.Contains(t, output, "connection refused") + assert.Contains(t, output, "Possible causes:") + assert.Contains(t, output, "Network down") + assert.Contains(t, output, "Firewall blocking") + assert.Contains(t, output, "How to fix:") + assert.Contains(t, output, "Check network") + assert.Contains(t, output, "Check firewall") +} + +func TestClassify_Priority_DNSOverNetwork(t *testing.T) { + err := &net.DNSError{Name: "example.com", Err: "no such host"} + diag := Classify(err) + require.NotNil(t, diag) + assert.Contains(t, diag.Category, CategoryDNS) + assert.NotContains(t, diag.Category, CategoryNetwork) +} + +func TestClassify_Priority_TimeoutOverNetwork(t *testing.T) { + diag := Classify(context.DeadlineExceeded) + require.NotNil(t, diag) + assert.Equal(t, CategoryTimeout, diag.Category) +} + +func TestClassify_WrappedAuth(t *testing.T) { + inner := &transport.Error{StatusCode: http.StatusUnauthorized} + err := fmt.Errorf("validate access: %w", inner) + diag := Classify(err) + require.NotNil(t, diag) + assert.Equal(t, CategoryAuth401, diag.Category) +} + +func TestIsImageNotFound(t *testing.T) { + assert.True(t, IsImageNotFound(errors.New("MANIFEST_UNKNOWN: not found"))) + assert.True(t, IsImageNotFound(errors.New("404 Not Found"))) + assert.False(t, IsImageNotFound(errors.New("some other error"))) + assert.False(t, IsImageNotFound(nil)) +} + +func TestIsRepoNotFound(t *testing.T) { + assert.True(t, IsRepoNotFound(errors.New("NAME_UNKNOWN: repo"))) + assert.False(t, IsRepoNotFound(errors.New("some other error"))) + assert.False(t, IsRepoNotFound(nil)) +} + +func TestIsTrivyMediaTypeNotAllowed(t *testing.T) { + assert.True(t, IsTrivyMediaTypeNotAllowed(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) + assert.True(t, IsTrivyMediaTypeNotAllowed(errors.New("MANIFEST_INVALID: application/octet-stream"))) + assert.False(t, IsTrivyMediaTypeNotAllowed(errors.New("MANIFEST_INVALID: other"))) + assert.False(t, IsTrivyMediaTypeNotAllowed(nil)) +} + +func TestDiagnostic_Error_PlainText(t *testing.T) { + diag := &Diagnostic{ + Category: CategoryNetwork, + OriginalErr: errors.New("connection refused"), + Causes: []string{"cause"}, + Solutions: []string{"fix"}, + } + + errStr := diag.Error() + assert.Equal(t, CategoryNetwork+": connection refused", errStr) + assert.NotContains(t, errStr, "\033[") + assert.NotContains(t, errStr, "Possible causes") +} + +func TestDiagnostic_Format_ForceColor(t *testing.T) { + t.Setenv("FORCE_COLOR", "1") + t.Setenv("NO_COLOR", "") + + diag := &Diagnostic{ + Category: CategoryEOF, + OriginalErr: errors.New("test"), + Causes: []string{"cause1"}, + Solutions: []string{"fix1"}, + } + + output := diag.Format() + assert.True(t, strings.Contains(output, "\033[")) +} diff --git a/pkg/libmirror/util/registryerr/format.go b/pkg/libmirror/util/registryerr/format.go new file mode 100644 index 00000000..9207e168 --- /dev/null +++ b/pkg/libmirror/util/registryerr/format.go @@ -0,0 +1,122 @@ +/* +Copyright 2024 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package registryerr + +import ( + "os" + "strings" + + "golang.org/x/term" +) + +// ANSI escape codes for terminal color output. +const ( + ansiReset = "\033[0m" + ansiRed = "\033[31m" + ansiYellow = "\033[33m" + ansiCyan = "\033[36m" + ansiBold = "\033[1m" +) + +// textStyler controls whether styled output uses ANSI codes or plain text. +type textStyler struct { + enabled bool +} + +// style wraps text with ANSI codes when enabled, returns plain text otherwise. +func (t textStyler) style(text string, ansiCodes ...string) string { + if !t.enabled { + return text + } + return strings.Join(ansiCodes, "") + text + ansiReset +} + +// Semantic text styles used by Diagnostic.Format(). +func (t textStyler) danger(s string) string { return t.style(s, ansiBold, ansiRed) } // error labels +func (t textStyler) header(s string) string { return t.style(s, ansiBold) } // category name +func (t textStyler) hint(s string) string { return t.style(s, ansiCyan) } // arrows, solutions +func (t textStyler) warn(s string) string { return t.style(s, ansiYellow) } // possible causes + +// newTextStyler returns a textStyler configured for the current environment. +// Colors are enabled when stderr is a TTY, unless overridden by NO_COLOR or FORCE_COLOR. +func newTextStyler() textStyler { + if os.Getenv("NO_COLOR") != "" { + return textStyler{} + } + return textStyler{ + enabled: os.Getenv("FORCE_COLOR") != "" || term.IsTerminal(int(os.Stderr.Fd())), + } +} + +// Diagnostic is a classified registry error with user-friendly diagnostics. +// It implements the error interface so it can propagate up the call chain and be +// printed once at the top level, avoiding double output. +type Diagnostic struct { + Category string // e.g. "DNS resolution failed for 'registry.example.com'" + OriginalErr error // the underlying error from go-containerregistry or net + Causes []string // "Possible causes" shown to the user + Solutions []string // "How to fix" shown to the user +} + +// Error returns a plain-text representation suitable for logging and error wrapping. +// Use Format() for user-facing colored terminal output. +func (d *Diagnostic) Error() string { + return d.Category + ": " + d.OriginalErr.Error() +} + +// Unwrap returns the original error so errors.Is/errors.As work through the diagnostic. +func (d *Diagnostic) Unwrap() error { + return d.OriginalErr +} + +// Format returns the formatted diagnostic string with colors if stderr is a TTY. +// +// Example: +// +// error: Network connection failed to 127.0.0.1:443 +// ╰─▶ dial tcp 127.0.0.1:443: connect: connection refused +// +// Possible causes: +// * Network connectivity issues or no internet connection +// +// How to fix: +// * Check your network connection and internet access +func (d *Diagnostic) Format() string { + t := newTextStyler() + + var b strings.Builder + b.WriteString("\n" + t.danger("error") + t.header(": "+d.Category) + "\n") + b.WriteString(t.hint(" ╰─▶ ") + d.OriginalErr.Error() + "\n\n") + + if len(d.Causes) > 0 { + b.WriteString(t.warn(" Possible causes:") + "\n") + for _, cause := range d.Causes { + b.WriteString(" * " + cause + "\n") + } + b.WriteString("\n") + } + + if len(d.Solutions) > 0 { + b.WriteString(t.hint(" How to fix:") + "\n") + for _, solution := range d.Solutions { + b.WriteString(" * " + solution + "\n") + } + } + + b.WriteString("\n") + return b.String() +} diff --git a/pkg/libmirror/validation/registry_access.go b/pkg/libmirror/validation/registry_access.go index da2d37cb..62c7e3b3 100644 --- a/pkg/libmirror/validation/registry_access.go +++ b/pkg/libmirror/validation/registry_access.go @@ -27,7 +27,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/auth" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/errorutil" + "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" ) var ErrImageUnavailable = errors.New("required image is not present in registry") @@ -88,7 +88,7 @@ func (v *RemoteRegistryAccessValidator) ValidateReadAccessForImage(ctx context.C _, err = remote.Head(ref, remoteOpts...) if err != nil { - if errorutil.IsImageNotFoundError(err) { + if registryerr.IsImageNotFound(err) { return ErrImageUnavailable } return err From b898286833d89ef413a37cc84b5e22e37f48da87 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 14:10:14 +0300 Subject: [PATCH 02/10] Refactor registry error diagnostics into layered architecture - Split monolithic registryerr into three packages: pkg/diagnostic (generic HelpfulError), pkg/registry/errdiag (registry classifier), pkg/registry/errmatch (flow-control matchers) - Each command classifies errors with its own domain classifier; root.go only catches HelpfulError via errors.As, preventing false diagnostics across unrelated commands - Extend OCI media type detection to cover all Deckhouse artifact types, not just Trivy - Unify Trivy error handling: errors propagate to Classify instead of being intercepted and replaced with a warning string in the pusher retry loop - Add typed transport.Error checks in errmatch with string fallback for HEAD responses - Guard against double classification in Classify via errors.As check Signed-off-by: Roman Berezkin --- cmd/d8/root.go | 12 +- internal/mirror/cmd/pull/pull.go | 4 +- internal/mirror/cmd/push/push.go | 6 +- internal/mirror/pusher/pusher.go | 4 - pkg/diagnostic/README.md | 145 ++++++++ pkg/diagnostic/diagnostic.go | 41 +++ pkg/diagnostic/diagnostic_test.go | 138 ++++++++ pkg/diagnostic/doc.go | 101 ++++++ .../util/registryerr => diagnostic}/format.go | 46 +-- pkg/libmirror/util/registryerr/doc.go | 75 ---- pkg/libmirror/validation/registry_access.go | 4 +- .../errdiag/classify.go} | 127 +++---- .../errdiag/classify_integration_test.go} | 40 +-- pkg/registry/errdiag/classify_network_test.go | 41 +++ .../errdiag/classify_test.go} | 156 ++------- pkg/registry/errdiag/doc.go | 49 +++ pkg/registry/errmatch/errmatch.go | 77 +++++ pkg/registry/errmatch/errmatch_test.go | 77 +++++ scripts/demo-registry-errors/main.go | 323 ++++++++++++++++++ 19 files changed, 1119 insertions(+), 347 deletions(-) create mode 100644 pkg/diagnostic/README.md create mode 100644 pkg/diagnostic/diagnostic.go create mode 100644 pkg/diagnostic/diagnostic_test.go create mode 100644 pkg/diagnostic/doc.go rename pkg/{libmirror/util/registryerr => diagnostic}/format.go (66%) delete mode 100644 pkg/libmirror/util/registryerr/doc.go rename pkg/{libmirror/util/registryerr/errors.go => registry/errdiag/classify.go} (82%) rename pkg/{libmirror/util/registryerr/errors_integration_test.go => registry/errdiag/classify_integration_test.go} (81%) create mode 100644 pkg/registry/errdiag/classify_network_test.go rename pkg/{libmirror/util/registryerr/errors_test.go => registry/errdiag/classify_test.go} (60%) create mode 100644 pkg/registry/errdiag/doc.go create mode 100644 pkg/registry/errmatch/errmatch.go create mode 100644 pkg/registry/errmatch/errmatch_test.go create mode 100644 scripts/demo-registry-errors/main.go diff --git a/cmd/d8/root.go b/cmd/d8/root.go index 0c68538a..9044df3a 100644 --- a/cmd/d8/root.go +++ b/cmd/d8/root.go @@ -48,7 +48,7 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/tools" useroperation "github.com/deckhouse/deckhouse-cli/internal/useroperation/cmd" "github.com/deckhouse/deckhouse-cli/internal/version" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" ) type RootCommand struct { @@ -175,10 +175,12 @@ func (r *RootCommand) Execute() error { func execute() { rootCmd := NewRootCommand() if err := rootCmd.Execute(); err != nil { - var diag *registryerr.Diagnostic - // If registry error - show formatted diagnostic with causes and solutions if possible - if errors.As(err, &diag) { - fmt.Fprint(os.Stderr, diag.Format()) + // If a command returned a HelpfulError, show formatted diagnostic. + // Commands are responsible for classifying their own errors using + // domain-specific classifiers (e.g. errdiag.Classify for registry). + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + fmt.Fprint(os.Stderr, helpErr.Format()) } else { fmt.Fprintf(os.Stderr, "Error executing command: %v\n", err) } diff --git a/internal/mirror/cmd/pull/pull.go b/internal/mirror/cmd/pull/pull.go index 3d567ddb..bdbabf4e 100644 --- a/internal/mirror/cmd/pull/pull.go +++ b/internal/mirror/cmd/pull/pull.go @@ -46,9 +46,9 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" + "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" registryservice "github.com/deckhouse/deckhouse-cli/pkg/registry/service" "github.com/deckhouse/deckhouse-cli/pkg/stub" ) @@ -120,7 +120,7 @@ func pull(cmd *cobra.Command, _ []string) error { puller.logger.WarnLn("Operation cancelled by user") return nil } - if diag := registryerr.Classify(err); diag != nil { + if diag := errdiag.Classify(err); diag != nil { return diag } return fmt.Errorf("pull failed: %w", err) diff --git a/internal/mirror/cmd/push/push.go b/internal/mirror/cmd/push/push.go index 71c9a482..783eb02f 100644 --- a/internal/mirror/cmd/push/push.go +++ b/internal/mirror/cmd/push/push.go @@ -38,9 +38,9 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" + "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" ) // CLI Parameters @@ -179,14 +179,14 @@ func (p *Pusher) Execute() error { } if err := p.validateRegistryAccess(); err != nil { - if diag := registryerr.Classify(err); diag != nil { + if diag := errdiag.Classify(err); diag != nil { return diag } return err } if err := p.executeNewPush(); err != nil { - if diag := registryerr.Classify(err); diag != nil { + if diag := errdiag.Classify(err); diag != nil { return diag } return err diff --git a/internal/mirror/pusher/pusher.go b/internal/mirror/pusher/pusher.go index e359aeb1..c065c9fa 100644 --- a/internal/mirror/pusher/pusher.go +++ b/internal/mirror/pusher/pusher.go @@ -31,7 +31,6 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/mirror/chunked" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/retry" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/retry/task" client "github.com/deckhouse/deckhouse-cli/pkg/registry" @@ -106,9 +105,6 @@ func (s *Service) PushLayout(ctx context.Context, layoutPath layout.Path, client fmt.Sprintf("[%d / %d] Pushing %s", i+1, len(indexManifest.Manifests), imageReferenceString), task.WithConstantRetries(pushRetryAttempts, pushRetryDelay, func(ctx context.Context) error { if err := client.PushImage(ctx, tag, img); err != nil { - if registryerr.IsTrivyMediaTypeNotAllowed(err) { - return fmt.Errorf(registryerr.TrivyMediaTypesWarning) - } return fmt.Errorf("write %s:%s to registry: %w", client.GetRegistry(), tag, err) } return nil diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md new file mode 100644 index 00000000..a95fed64 --- /dev/null +++ b/pkg/diagnostic/README.md @@ -0,0 +1,145 @@ +# pkg/diagnostic + +User-friendly error diagnostics for d8 CLI. Known errors get formatted +with possible causes and solutions instead of raw Go error text: + +``` +error: TLS/certificate verification failed + ╰─▶ x509: certificate signed by unknown authority + + Possible causes: + * Self-signed certificate without proper trust chain + * Corporate proxy intercepting HTTPS connections + + How to fix: + * Use --tls-skip-verify flag to skip TLS verification + * Add the registry's CA certificate to your system trust store +``` + +## How it works + +``` + root.go mirror/cmd/pull (RunE) + ─────── ────────────────────── + + rootCmd.Execute() + | + | cobra dispatches + | to subcommand ──────────────> err := puller.Execute() + | | + | [Classify err] -> is it HelpfulError? + | | + | yes | no + | | | + | *HelpfulError <-+ +-> fmt.Errorf("pull failed: %w", err) + | | | + | error returns <───────────────+─────+ + | + [errors.As HelpfulError?] + | + yes | no + | | + v v + .Format() "Error executing command: ..." (as usual) + (colored) (plain) +``` + +Each command classifies errors with its own domain classifier. +`root.go` only catches `*HelpfulError` via `errors.As` - it does not +import any classifier, so unrelated commands never get false diagnostics. + +## HelpfulError + +```go +type HelpfulError struct { + Category string // what went wrong: "TLS/certificate verification failed" + OriginalErr error // the underlying error (required, used by Unwrap/Error/Format) + Causes []string // why it might have happened (optional) + Solutions []string // how to fix it (optional) +} +``` + +| Field | Required | What happens if empty | +|-------|----------|----------------------| +| `Category` | yes | output shows `error: ` with no description | +| `OriginalErr` | yes | safe (no panic), but `Unwrap` returns nil and `Format` skips the error line | +| `Causes` | no | "Possible causes" section is omitted | +| `Solutions` | no | "How to fix" section is omitted | + +How fields map to output (`Format()`): + +``` +error: TLS/certificate verification failed <-- Category + ╰─▶ x509: certificate signed by unknown authority <-- OriginalErr.Error() + + Possible causes: <-- Causes + * Self-signed certificate without proper trust chain + * Corporate proxy intercepting HTTPS connections + + How to fix: <-- Solutions + * Use --tls-skip-verify flag + * Add the registry's CA certificate to your system trust store +``` + +`Error()` returns plain text for logs: `"Category: OriginalErr.Error()"`. +`Unwrap()` returns `OriginalErr` so `errors.Is`/`errors.As` work through it. + +## Example Package layout + +``` +pkg/diagnostic/ HelpfulError + Format (generic, no domain logic) +pkg/registry/errdiag/ Classify - registry error classifier +pkg/registry/errmatch/ IsImageNotFound, IsRepoNotFound - for flow control +``` + +## Adding diagnostics to a new command + +**1. Create a classifier** for your domain: + +```go +// internal/backup/errdiag/classify.go +package errdiag + +import ( + "errors" + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +func Classify(err error) *diagnostic.HelpfulError { + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + return nil // already classified, don't wrap twice + } + + if isETCDError(err) { + return &diagnostic.HelpfulError{ + Category: "ETCD connection failed", + OriginalErr: err, + Causes: []string{"ETCD cluster is unreachable"}, + Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, + } + } + return nil +} +``` + +**2. Call it in RunE** of your leaf command: + +```go +if err := doSnapshot(); err != nil { + if diag := errdiag.Classify(err); diag != nil { + return diag + } + return fmt.Errorf("snapshot failed: %w", err) +} +``` + +No changes to `root.go` needed - it catches any `*HelpfulError` +regardless of which classifier produced it. + +## Rules (Best Practice) + +- Classify in the **leaf command** (RunE), not in libraries or root.go +- Each domain uses its **own classifier** - prevents false diagnostics +- Skip classification if the error is already a `*HelpfulError` (see guard in the example above) +- `Causes` and `Solutions` are optional - empty slices are omitted from output, but highly recommended to provide those diff --git a/pkg/diagnostic/diagnostic.go b/pkg/diagnostic/diagnostic.go new file mode 100644 index 00000000..503695bd --- /dev/null +++ b/pkg/diagnostic/diagnostic.go @@ -0,0 +1,41 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package diagnostic + +// HelpfulError is an error enriched with possible causes and actionable solutions. +// It implements the error interface so it can propagate up the call chain +// and be printed once at the top level, avoiding double output. +type HelpfulError struct { + Category string // e.g. "DNS resolution failed for 'registry.example.com'" + OriginalErr error // the underlying error + Causes []string // "Possible causes" shown to the user + Solutions []string // "How to fix" shown to the user +} + +// Error returns a plain-text representation suitable for logging and error wrapping. +// Use Format() for user-facing terminal output. +func (e *HelpfulError) Error() string { + if e.OriginalErr == nil { + return e.Category + } + return e.Category + ": " + e.OriginalErr.Error() +} + +// Unwrap returns the original error so errors.Is/errors.As work through the wrapper. +func (e *HelpfulError) Unwrap() error { + return e.OriginalErr +} diff --git a/pkg/diagnostic/diagnostic_test.go b/pkg/diagnostic/diagnostic_test.go new file mode 100644 index 00000000..be4e0c8a --- /dev/null +++ b/pkg/diagnostic/diagnostic_test.go @@ -0,0 +1,138 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package diagnostic + +import ( + "errors" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHelpfulError_Error_PlainText(t *testing.T) { + diag := &HelpfulError{ + Category: "Network connection failed", + OriginalErr: errors.New("connection refused"), + Causes: []string{"cause"}, + Solutions: []string{"fix"}, + } + + errStr := diag.Error() + assert.Equal(t, "Network connection failed: connection refused", errStr) + assert.NotContains(t, errStr, "\033[") + assert.NotContains(t, errStr, "Possible causes") +} + +func TestHelpfulError_Unwrap(t *testing.T) { + originalErr := io.EOF + diag := &HelpfulError{ + Category: "Test", + OriginalErr: originalErr, + } + require.True(t, errors.Is(diag, originalErr)) +} + +func TestHelpfulError_Format_NoColor(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Network connection failed", + OriginalErr: errors.New("test"), + Causes: []string{"cause1"}, + Solutions: []string{"fix1"}, + } + + output := diag.Format() + assert.NotContains(t, output, "\033[") + assert.Contains(t, output, "Network connection failed") + assert.Contains(t, output, "cause1") + assert.Contains(t, output, "fix1") +} + +func TestHelpfulError_Format_Structure(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Network connection failed", + OriginalErr: errors.New("connection refused"), + Causes: []string{"Network down", "Firewall blocking"}, + Solutions: []string{"Check network", "Check firewall"}, + } + + output := diag.Format() + assert.Contains(t, output, "error: Network connection failed") + assert.Contains(t, output, "connection refused") + assert.Contains(t, output, "Possible causes:") + assert.Contains(t, output, "Network down") + assert.Contains(t, output, "Firewall blocking") + assert.Contains(t, output, "How to fix:") + assert.Contains(t, output, "Check network") + assert.Contains(t, output, "Check firewall") +} + +func TestHelpfulError_Error_NilOriginalErr(t *testing.T) { + diag := &HelpfulError{Category: "Something failed"} + assert.Equal(t, "Something failed", diag.Error()) + assert.Nil(t, diag.Unwrap()) +} + +func TestHelpfulError_Format_NilOriginalErr(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Something failed", + Solutions: []string{"Try again"}, + } + + output := diag.Format() + assert.Contains(t, output, "Something failed") + assert.Contains(t, output, "Try again") + assert.NotContains(t, output, "╰─▶") +} + +func TestHelpfulError_Format_NoCausesNoSolutions(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Something failed", + OriginalErr: errors.New("oops"), + } + + output := diag.Format() + assert.Contains(t, output, "Something failed") + assert.Contains(t, output, "oops") + assert.NotContains(t, output, "Possible causes") + assert.NotContains(t, output, "How to fix") +} + +func TestHelpfulError_Format_ForceColor(t *testing.T) { + t.Setenv("FORCE_COLOR", "1") + t.Setenv("NO_COLOR", "") + + diag := &HelpfulError{ + Category: "Test error", + OriginalErr: errors.New("test"), + Causes: []string{"cause1"}, + Solutions: []string{"fix1"}, + } + + output := diag.Format() + assert.True(t, strings.Contains(output, "\033[")) +} diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go new file mode 100644 index 00000000..53732604 --- /dev/null +++ b/pkg/diagnostic/doc.go @@ -0,0 +1,101 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package diagnostic provides [HelpfulError] - a wrapper around standard Go errors +// that adds possible causes and actionable solutions for the user. +// +// When a command returns a [HelpfulError], the top-level handler in cmd/d8/root.go +// detects it via [errors.As] and prints a formatted diagnostic instead of a raw error. +// If an error is not wrapped in [HelpfulError], it is printed as usual. +// +// # Creating a HelpfulError +// +// Option 1: use a domain-specific classifier that wraps known errors +// (see [github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag] for the registry +// implementation): +// +// if diag := errdiag.Classify(err); diag != nil { +// return diag +// } +// +// Option 2: wrap an error directly: +// +// return &diagnostic.HelpfulError{ +// Category: "ETCD snapshot failed", +// OriginalErr: err, +// Causes: []string{"ETCD cluster is unreachable"}, +// Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, +// } +// +// Causes and Solutions are optional - empty slices are silently omitted from output. +// +// # How fields map to Format() output +// +// error: ETCD snapshot failed <-- Category +// ╰─▶ dial tcp 10.0.0.1:2379: connection refused <-- OriginalErr.Error() +// +// Possible causes: <-- Causes +// * ETCD cluster is unreachable +// +// How to fix: <-- Solutions +// * Check ETCD health: etcdctl endpoint health +// +// # How it propagates +// +// [HelpfulError] implements the error interface. It propagates up the call chain +// like any other error. The original error is preserved via [HelpfulError.Unwrap], +// so [errors.Is] and [errors.As] work through the wrapper. +// +// In cmd/d8/root.go: +// +// var helpErr *diagnostic.HelpfulError +// if errors.As(err, &helpErr) { +// fmt.Fprint(os.Stderr, helpErr.Format()) // colored output, once +// } +// +// [HelpfulError.Error] returns plain text (safe for logs). +// [HelpfulError.Format] returns colored terminal output (TTY-aware, respects NO_COLOR). +// +// # Adding a new domain classifier +// +// To add diagnostics for a new domain (e.g. backup), create a Classify function +// that wraps known errors into *[HelpfulError]: +// +// // pkg/backup/errdiag/classify.go +// func Classify(err error) *diagnostic.HelpfulError { +// if isETCDError(err) { +// return &diagnostic.HelpfulError{ +// Category: "ETCD connection failed", OriginalErr: err, +// Causes: []string{"ETCD cluster is unreachable"}, +// Solutions: []string{"Check ETCD health"}, +// } +// } +// return nil +// } +// +// Then call it at the command level, same pattern as registry: +// +// if diag := errdiag.Classify(err); diag != nil { +// return diag +// } +// +// # Important: classify at the command level, not in root.go +// +// Each command must call its own domain classifier. root.go only catches +// [HelpfulError] via [errors.As] - it does not import or call any classifier. +// This prevents false classification: a DNS error from "d8 backup" must not +// be classified with registry-specific advice like "--tls-skip-verify". +package diagnostic diff --git a/pkg/libmirror/util/registryerr/format.go b/pkg/diagnostic/format.go similarity index 66% rename from pkg/libmirror/util/registryerr/format.go rename to pkg/diagnostic/format.go index 9207e168..a3d8c808 100644 --- a/pkg/libmirror/util/registryerr/format.go +++ b/pkg/diagnostic/format.go @@ -1,5 +1,5 @@ /* -Copyright 2024 Flant JSC +Copyright 2026 Flant JSC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package registryerr +package diagnostic import ( "os" @@ -45,7 +45,7 @@ func (t textStyler) style(text string, ansiCodes ...string) string { return strings.Join(ansiCodes, "") + text + ansiReset } -// Semantic text styles used by Diagnostic.Format(). +// Semantic text styles used by HelpfulError.Format(). func (t textStyler) danger(s string) string { return t.style(s, ansiBold, ansiRed) } // error labels func (t textStyler) header(s string) string { return t.style(s, ansiBold) } // category name func (t textStyler) hint(s string) string { return t.style(s, ansiCyan) } // arrows, solutions @@ -62,31 +62,8 @@ func newTextStyler() textStyler { } } -// Diagnostic is a classified registry error with user-friendly diagnostics. -// It implements the error interface so it can propagate up the call chain and be -// printed once at the top level, avoiding double output. -type Diagnostic struct { - Category string // e.g. "DNS resolution failed for 'registry.example.com'" - OriginalErr error // the underlying error from go-containerregistry or net - Causes []string // "Possible causes" shown to the user - Solutions []string // "How to fix" shown to the user -} - -// Error returns a plain-text representation suitable for logging and error wrapping. -// Use Format() for user-facing colored terminal output. -func (d *Diagnostic) Error() string { - return d.Category + ": " + d.OriginalErr.Error() -} - -// Unwrap returns the original error so errors.Is/errors.As work through the diagnostic. -func (d *Diagnostic) Unwrap() error { - return d.OriginalErr -} - // Format returns the formatted diagnostic string with colors if stderr is a TTY. // -// Example: -// // error: Network connection failed to 127.0.0.1:443 // ╰─▶ dial tcp 127.0.0.1:443: connect: connection refused // @@ -95,24 +72,27 @@ func (d *Diagnostic) Unwrap() error { // // How to fix: // * Check your network connection and internet access -func (d *Diagnostic) Format() string { +func (e *HelpfulError) Format() string { t := newTextStyler() var b strings.Builder - b.WriteString("\n" + t.danger("error") + t.header(": "+d.Category) + "\n") - b.WriteString(t.hint(" ╰─▶ ") + d.OriginalErr.Error() + "\n\n") + b.WriteString("\n" + t.danger("error") + t.header(": "+e.Category) + "\n") + if e.OriginalErr != nil { + b.WriteString(t.hint(" ╰─▶ ") + e.OriginalErr.Error() + "\n") + } + b.WriteString("\n") - if len(d.Causes) > 0 { + if len(e.Causes) > 0 { b.WriteString(t.warn(" Possible causes:") + "\n") - for _, cause := range d.Causes { + for _, cause := range e.Causes { b.WriteString(" * " + cause + "\n") } b.WriteString("\n") } - if len(d.Solutions) > 0 { + if len(e.Solutions) > 0 { b.WriteString(t.hint(" How to fix:") + "\n") - for _, solution := range d.Solutions { + for _, solution := range e.Solutions { b.WriteString(" * " + solution + "\n") } } diff --git a/pkg/libmirror/util/registryerr/doc.go b/pkg/libmirror/util/registryerr/doc.go deleted file mode 100644 index 6192eae6..00000000 --- a/pkg/libmirror/util/registryerr/doc.go +++ /dev/null @@ -1,75 +0,0 @@ -/* -Copyright 2026 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Package registryerr classifies container registry errors and provides -// user-friendly diagnostics for d8 mirror operations. -// -// When users encounter registry errors during "d8 mirror pull/push", raw Go -// errors like "EOF" or "x509: certificate signed by unknown authority" are -// often not actionable. This package analyzes such errors, determines the -// root cause category, and produces a formatted diagnostic with possible -// causes and concrete solutions. -// -// # Usage -// -// if diag := registryerr.Classify(err); diag != nil { -// return diag // implements error interface -// } -// -// The returned [Diagnostic] implements the error interface with two representations: -// - [Diagnostic.Error] returns plain text suitable for logging -// - [Diagnostic.Format] returns colored terminal output (when stderr is a TTY) -// -// # Output example -// -// [Diagnostic.Format] produces output like this: -// -// error: TLS/certificate verification failed -// ╰─▶ Get "https://registry.example.com/v2/": x509: certificate signed by unknown authority -// -// Possible causes: -// * Self-signed certificate without proper trust chain -// * Certificate expired or not yet valid -// * Corporate proxy or middleware intercepting HTTPS connections -// -// How to fix: -// * Use --tls-skip-verify flag to skip TLS verification (not recommended for production) -// * Add the registry's CA certificate to your system trust store -// -// # Classification -// -// [Classify] uses Go's [errors.Is] and [errors.As] to detect error types from -// the standard library (crypto/x509, net, io) and go-containerregistry -// (transport.Error). Detection order matters - more specific checks run first: -// -// 1. EOF - io.EOF, io.ErrUnexpectedEOF (proxy/middleware termination) -// 2. TLS/Certificate - x509.UnknownAuthorityError, HostnameError, etc. -// 3. Authentication - transport.Error with HTTP 401/403 -// 4. Rate limiting - transport.Error with HTTP 429 -// 5. Server errors - transport.Error with HTTP 500/502/503/504 -// 6. DNS - net.DNSError -// 7. Timeout - context.DeadlineExceeded -// 8. Network - net.OpError, syscall.ECONNREFUSED, etc. -// 9. Image not found - error message contains "MANIFEST_UNKNOWN" -// 10. Repo not found - error message contains "NAME_UNKNOWN" -// 11. Unsupported OCI - Trivy media type rejection (Project Quay) -// -// DNS is checked before Network because [net.DNSError] satisfies [net.Error]. -// Timeout is checked before Network for the same reason. -// -// Error type checkers ([IsImageNotFound], [IsRepoNotFound], [IsTrivyMediaTypeNotAllowed]) -// are used for flow control in mirror operations (e.g., skipping optional images). -package registryerr diff --git a/pkg/libmirror/validation/registry_access.go b/pkg/libmirror/validation/registry_access.go index 62c7e3b3..b5c46bc5 100644 --- a/pkg/libmirror/validation/registry_access.go +++ b/pkg/libmirror/validation/registry_access.go @@ -27,7 +27,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/auth" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/registryerr" + "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" ) var ErrImageUnavailable = errors.New("required image is not present in registry") @@ -88,7 +88,7 @@ func (v *RemoteRegistryAccessValidator) ValidateReadAccessForImage(ctx context.C _, err = remote.Head(ref, remoteOpts...) if err != nil { - if registryerr.IsImageNotFound(err) { + if errmatch.IsImageNotFound(err) { return ErrImageUnavailable } return err diff --git a/pkg/libmirror/util/registryerr/errors.go b/pkg/registry/errdiag/classify.go similarity index 82% rename from pkg/libmirror/util/registryerr/errors.go rename to pkg/registry/errdiag/classify.go index 5e97b082..bbc83790 100644 --- a/pkg/libmirror/util/registryerr/errors.go +++ b/pkg/registry/errdiag/classify.go @@ -1,5 +1,5 @@ /* -Copyright 2024 Flant JSC +Copyright 2026 Flant JSC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package registryerr +package errdiag import ( "context" @@ -29,48 +29,10 @@ import ( "syscall" "github.com/google/go-containerregistry/pkg/v1/remote/transport" -) - -const TrivyMediaTypesWarning = `` + - "It looks like you are using Project Quay registry and it is not configured correctly for hosting Deckhouse.\n" + - "See the docs at https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry for more details.\n\n" + - "TL;DR: You should retry push after allowing some additional types of OCI artifacts in your config.yaml as follows:\n" + - `FEATURE_GENERAL_OCI_SUPPORT: true -ALLOWED_OCI_ARTIFACT_TYPES: - "application/octet-stream": - - "application/deckhouse.io.bdu.layer.v1.tar+gzip" - - "application/vnd.cncf.openpolicyagent.layer.v1.tar+gzip" - "application/vnd.aquasec.trivy.config.v1+json": - - "application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip" - - "application/vnd.aquasec.trivy.db.layer.v1.tar+gzip"` - -func IsImageNotFound(err error) bool { - if err == nil { - return false - } - errMsg := err.Error() - return strings.Contains(errMsg, "MANIFEST_UNKNOWN") || strings.Contains(errMsg, "404 Not Found") -} - -func IsRepoNotFound(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "NAME_UNKNOWN") -} - -func IsTrivyMediaTypeNotAllowed(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "MANIFEST_INVALID") && - (strings.Contains(errMsg, "vnd.aquasec.trivy") || strings.Contains(errMsg, "application/octet-stream")) -} + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" + "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" +) // Error category names displayed to the user after "error: ". // Some categories are extended with dynamic details (hostname, port, HTTP code) @@ -91,17 +53,23 @@ const ( CategoryUnsupportedOCI = "Unsupported OCI artifact type" ) -// Classify analyzes an error and returns a Diagnostic if +// Classify analyzes an error and returns a *diagnostic.HelpfulError if // the error can be classified into a known category, or nil otherwise. // Detection order matters: more specific checks come before general ones. -func Classify(err error) *Diagnostic { +func Classify(err error) *diagnostic.HelpfulError { if err == nil { return nil } + // Already classified - don't wrap twice. + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + return nil + } + switch { case isEOFError(err): - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: CategoryEOF, OriginalErr: err, Causes: []string{ @@ -118,7 +86,7 @@ func Classify(err error) *Diagnostic { } case isCertificateError(err): - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: CategoryTLS, OriginalErr: err, Causes: []string{ @@ -147,7 +115,7 @@ func Classify(err error) *Diagnostic { } } - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ @@ -163,7 +131,7 @@ func Classify(err error) *Diagnostic { } case isRateLimitError(err): - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: CategoryRateLimit, OriginalErr: err, Causes: []string{ @@ -183,7 +151,7 @@ func Classify(err error) *Diagnostic { category = fmt.Sprintf("%s (HTTP %d)", CategoryServerError, transportErr.StatusCode) } - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ @@ -205,7 +173,7 @@ func Classify(err error) *Diagnostic { category = fmt.Sprintf("%s for '%s'", CategoryDNS, dnsErr.Name) } - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ @@ -221,7 +189,7 @@ func Classify(err error) *Diagnostic { } case isTimeoutError(err): - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: CategoryTimeout, OriginalErr: err, Causes: []string{ @@ -243,7 +211,7 @@ func Classify(err error) *Diagnostic { category = fmt.Sprintf("%s to %s", CategoryNetwork, opErr.Addr.String()) } - return &Diagnostic{ + return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ @@ -258,8 +226,8 @@ func Classify(err error) *Diagnostic { }, } - case IsImageNotFound(err): - return &Diagnostic{ + case errmatch.IsImageNotFound(err): + return &diagnostic.HelpfulError{ Category: CategoryImageNotFound, OriginalErr: err, Causes: []string{ @@ -272,8 +240,8 @@ func Classify(err error) *Diagnostic { }, } - case IsRepoNotFound(err): - return &Diagnostic{ + case errmatch.IsRepoNotFound(err): + return &diagnostic.HelpfulError{ Category: CategoryRepoNotFound, OriginalErr: err, Causes: []string{ @@ -286,17 +254,26 @@ func Classify(err error) *Diagnostic { }, } - case IsTrivyMediaTypeNotAllowed(err): - return &Diagnostic{ + case isUnsupportedOCIMediaType(err): + return &diagnostic.HelpfulError{ Category: CategoryUnsupportedOCI, OriginalErr: err, Causes: []string{ - "Registry doesn't support required media types for Trivy security databases", - "Project Quay registry not configured for Deckhouse artifacts", + "Registry doesn't support required OCI media types for Deckhouse artifacts", + "Project Quay or similar registry not configured for custom artifact types", }, Solutions: []string{ "Configure registry to allow custom OCI artifact types", "See: https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry", + "For Project Quay, add the following to config.yaml and retry push:\n" + + " FEATURE_GENERAL_OCI_SUPPORT: true\n" + + " ALLOWED_OCI_ARTIFACT_TYPES:\n" + + " \"application/octet-stream\":\n" + + " - \"application/deckhouse.io.bdu.layer.v1.tar+gzip\"\n" + + " - \"application/vnd.cncf.openpolicyagent.layer.v1.tar+gzip\"\n" + + " \"application/vnd.aquasec.trivy.config.v1+json\":\n" + + " - \"application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip\"\n" + + " - \"application/vnd.aquasec.trivy.db.layer.v1.tar+gzip\"", }, } } @@ -426,3 +403,31 @@ func isNetworkError(err error) bool { return false } + +// unsupportedOCIMediaTypes lists media type substrings whose rejection by a +// registry (via MANIFEST_INVALID) indicates an OCI artifact configuration issue. +var unsupportedOCIMediaTypes = []string{ + "vnd.aquasec.trivy", + "application/octet-stream", + "deckhouse.io.bdu", + "vnd.cncf.openpolicyagent", +} + +func isUnsupportedOCIMediaType(err error) bool { + if err == nil { + return false + } + + errMsg := err.Error() + if !strings.Contains(errMsg, "MANIFEST_INVALID") { + return false + } + + for _, mediaType := range unsupportedOCIMediaTypes { + if strings.Contains(errMsg, mediaType) { + return true + } + } + + return false +} diff --git a/pkg/libmirror/util/registryerr/errors_integration_test.go b/pkg/registry/errdiag/classify_integration_test.go similarity index 81% rename from pkg/libmirror/util/registryerr/errors_integration_test.go rename to pkg/registry/errdiag/classify_integration_test.go index 8c453246..81740ca1 100644 --- a/pkg/libmirror/util/registryerr/errors_integration_test.go +++ b/pkg/registry/errdiag/classify_integration_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package registryerr +package errdiag import ( "context" @@ -31,6 +31,8 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" ) // headImage performs remote.Head against the given host with insecure (plain HTTP) mode. @@ -65,7 +67,7 @@ func newRegistryErrorHandler(statusCode int, code, message string) http.Handler // classifyFromServer starts an httptest server with the given handler, // makes a real remote.Head call, and returns the Classify result. -func classifyFromServer(t *testing.T, handler http.Handler) *Diagnostic { +func classifyFromServer(t *testing.T, handler http.Handler) *diagnostic.HelpfulError { t.Helper() server := httptest.NewServer(handler) defer server.Close() @@ -75,10 +77,6 @@ func classifyFromServer(t *testing.T, handler http.Handler) *Diagnostic { return Classify(err) } -// --- HTTP Status Code Tests --- -// Verify that real go-containerregistry errors from HTTP responses -// are correctly classified using the Category constants. - func TestIntegration_Auth401(t *testing.T) { diag := classifyFromServer(t, newRegistryErrorHandler( http.StatusUnauthorized, "UNAUTHORIZED", "authentication required", @@ -126,18 +124,13 @@ func TestIntegration_ServerErrors(t *testing.T) { diag := classifyFromServer(t, handler) require.NotNil(t, diag) - // Category is dynamic: "Registry server error (HTTP 500)" assert.Contains(t, diag.Category, CategoryServerError) assert.Contains(t, diag.Category, tt.name) }) } } -// --- TLS Error Test --- - func TestIntegration_TLSCertificateError(t *testing.T) { - // httptest.NewTLSServer uses a self-signed cert not in the system trust store. - // Connecting without TLS skip produces an x509 certificate error. server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -155,11 +148,7 @@ func TestIntegration_TLSCertificateError(t *testing.T) { assert.Equal(t, CategoryTLS, diag.Category) } -// --- EOF Error Test --- - func TestIntegration_EOF(t *testing.T) { - // Server accepts the TCP connection then closes it immediately, - // producing an EOF on the client side. server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { hijacker, ok := w.(http.Hijacker) if !ok { @@ -181,10 +170,7 @@ func TestIntegration_EOF(t *testing.T) { assert.Equal(t, CategoryEOF, diag.Category) } -// --- Connection Refused Test --- - func TestIntegration_ConnectionRefused(t *testing.T) { - // Bind a port, then close the listener so nothing accepts connections. listener, err := net.Listen("tcp", "127.0.0.1:0") require.NoError(t, err) addr := listener.Addr().String() @@ -198,12 +184,9 @@ func TestIntegration_ConnectionRefused(t *testing.T) { diag := Classify(err) require.NotNil(t, diag, "expected network error to be classified, got raw: %v", err) - // Category is dynamic: "Network connection failed to 127.0.0.1:XXXXX" assert.Contains(t, diag.Category, CategoryNetwork) } -// --- Timeout Test --- - func TestIntegration_Timeout(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { select { @@ -224,18 +207,3 @@ func TestIntegration_Timeout(t *testing.T) { assert.Equal(t, CategoryTimeout, diag.Category) } -// --- DNS Error Test --- - -func TestIntegration_DNSResolutionFailure(t *testing.T) { - // .invalid TLD is reserved by RFC 2606 and guaranteed to never resolve. - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - err := headImage(ctx, "nonexistent.invalid:443") - require.Error(t, err) - - diag := Classify(err) - require.NotNil(t, diag, "expected DNS error to be classified, got raw: %v", err) - // Category is dynamic: "DNS resolution failed for 'nonexistent.invalid'" - assert.Contains(t, diag.Category, CategoryDNS) -} diff --git a/pkg/registry/errdiag/classify_network_test.go b/pkg/registry/errdiag/classify_network_test.go new file mode 100644 index 00000000..f47c5ba7 --- /dev/null +++ b/pkg/registry/errdiag/classify_network_test.go @@ -0,0 +1,41 @@ +//go:build dfrunnetwork + +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errdiag + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIntegration_DNSResolutionFailure(t *testing.T) { + // .invalid TLD is reserved by RFC 2606 and guaranteed to never resolve. + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := headImage(ctx, "nonexistent.invalid:443") + require.Error(t, err) + + diag := Classify(err) + require.NotNil(t, diag, "expected DNS error to be classified, got raw: %v", err) + assert.Contains(t, diag.Category, CategoryDNS) +} diff --git a/pkg/libmirror/util/registryerr/errors_test.go b/pkg/registry/errdiag/classify_test.go similarity index 60% rename from pkg/libmirror/util/registryerr/errors_test.go rename to pkg/registry/errdiag/classify_test.go index 1a9aeb0b..715a5333 100644 --- a/pkg/libmirror/util/registryerr/errors_test.go +++ b/pkg/registry/errdiag/classify_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package registryerr +package errdiag import ( "context" @@ -25,7 +25,6 @@ import ( "net" "net/http" "os" - "strings" "syscall" "testing" @@ -83,15 +82,13 @@ func TestClassify_TLS_Hostname(t *testing.T) { } func TestClassify_Auth_401(t *testing.T) { - err := &transport.Error{StatusCode: http.StatusUnauthorized} - diag := Classify(err) + diag := Classify(&transport.Error{StatusCode: http.StatusUnauthorized}) require.NotNil(t, diag) assert.Equal(t, CategoryAuth401, diag.Category) } func TestClassify_Auth_403(t *testing.T) { - err := &transport.Error{StatusCode: http.StatusForbidden} - diag := Classify(err) + diag := Classify(&transport.Error{StatusCode: http.StatusForbidden}) require.NotNil(t, diag) assert.Equal(t, CategoryAuth403, diag.Category) } @@ -99,9 +96,7 @@ func TestClassify_Auth_403(t *testing.T) { func TestClassify_Auth_DiagnosticCode(t *testing.T) { err := &transport.Error{ StatusCode: http.StatusOK, - Errors: []transport.Diagnostic{ - {Code: transport.UnauthorizedErrorCode}, - }, + Errors: []transport.Diagnostic{{Code: transport.UnauthorizedErrorCode}}, } diag := Classify(err) require.NotNil(t, diag) @@ -109,8 +104,7 @@ func TestClassify_Auth_DiagnosticCode(t *testing.T) { } func TestClassify_RateLimit_429(t *testing.T) { - err := &transport.Error{StatusCode: http.StatusTooManyRequests} - diag := Classify(err) + diag := Classify(&transport.Error{StatusCode: http.StatusTooManyRequests}) require.NotNil(t, diag) assert.Equal(t, CategoryRateLimit, diag.Category) } @@ -118,9 +112,7 @@ func TestClassify_RateLimit_429(t *testing.T) { func TestClassify_RateLimit_DiagnosticCode(t *testing.T) { err := &transport.Error{ StatusCode: http.StatusOK, - Errors: []transport.Diagnostic{ - {Code: transport.TooManyRequestsErrorCode}, - }, + Errors: []transport.Diagnostic{{Code: transport.TooManyRequestsErrorCode}}, } diag := Classify(err) require.NotNil(t, diag) @@ -142,7 +134,6 @@ func TestClassify_ServerErrors(t *testing.T) { t.Run(tt.name, func(t *testing.T) { diag := Classify(&transport.Error{StatusCode: tt.statusCode}) require.NotNil(t, diag) - // Category is dynamic: "Registry server error (HTTP 500)" assert.Contains(t, diag.Category, CategoryServerError) assert.Contains(t, diag.Category, tt.name) }) @@ -152,9 +143,7 @@ func TestClassify_ServerErrors(t *testing.T) { func TestClassify_ServerError_Unavailable(t *testing.T) { err := &transport.Error{ StatusCode: http.StatusOK, - Errors: []transport.Diagnostic{ - {Code: transport.UnavailableErrorCode}, - }, + Errors: []transport.Diagnostic{{Code: transport.UnavailableErrorCode}}, } diag := Classify(err) require.NotNil(t, diag) @@ -165,7 +154,6 @@ func TestClassify_DNS(t *testing.T) { err := &net.DNSError{Name: "registry.example.com", Err: "no such host"} diag := Classify(err) require.NotNil(t, diag) - // Category is dynamic: "DNS resolution failed for 'registry.example.com'" assert.Contains(t, diag.Category, CategoryDNS) assert.Contains(t, diag.Category, "registry.example.com") } @@ -186,32 +174,20 @@ func TestClassify_Timeout_OS(t *testing.T) { func TestClassify_Network_ConnRefused(t *testing.T) { err := &net.OpError{ - Op: "dial", - Net: "tcp", - Addr: &net.TCPAddr{ - IP: net.IPv4(127, 0, 0, 1), - Port: 443, - }, - Err: &os.SyscallError{ - Syscall: "connect", - Err: syscall.ECONNREFUSED, - }, + Op: "dial", Net: "tcp", + Addr: &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 443}, + Err: &os.SyscallError{Syscall: "connect", Err: syscall.ECONNREFUSED}, } diag := Classify(err) require.NotNil(t, diag) - // Category is dynamic: "Network connection failed to 127.0.0.1:443" assert.Contains(t, diag.Category, CategoryNetwork) assert.Contains(t, diag.Category, "127.0.0.1:443") } func TestClassify_Network_ConnReset(t *testing.T) { err := &net.OpError{ - Op: "read", - Net: "tcp", - Err: &os.SyscallError{ - Syscall: "read", - Err: syscall.ECONNRESET, - }, + Op: "read", Net: "tcp", + Err: &os.SyscallError{Syscall: "read", Err: syscall.ECONNRESET}, } diag := Classify(err) require.NotNil(t, diag) @@ -219,22 +195,19 @@ func TestClassify_Network_ConnReset(t *testing.T) { } func TestClassify_ImageNotFound(t *testing.T) { - err := errors.New("MANIFEST_UNKNOWN: manifest unknown") - diag := Classify(err) + diag := Classify(errors.New("MANIFEST_UNKNOWN: manifest unknown")) require.NotNil(t, diag) assert.Equal(t, CategoryImageNotFound, diag.Category) } func TestClassify_RepoNotFound(t *testing.T) { - err := errors.New("NAME_UNKNOWN: repository not found") - diag := Classify(err) + diag := Classify(errors.New("NAME_UNKNOWN: repository not found")) require.NotNil(t, diag) assert.Equal(t, CategoryRepoNotFound, diag.Category) } -func TestClassify_TrivyMediaType(t *testing.T) { - err := errors.New("MANIFEST_INVALID: media type vnd.aquasec.trivy not allowed") - diag := Classify(err) +func TestClassify_UnsupportedOCIMediaType(t *testing.T) { + diag := Classify(errors.New("MANIFEST_INVALID: media type vnd.aquasec.trivy not allowed")) require.NotNil(t, diag) assert.Equal(t, CategoryUnsupportedOCI, diag.Category) } @@ -247,51 +220,13 @@ func TestClassify_DeepWrapping(t *testing.T) { assert.Equal(t, CategoryTLS, diag.Category) } -func TestDiagnostic_Unwrap(t *testing.T) { +func TestClassify_Unwrap(t *testing.T) { originalErr := io.EOF diag := Classify(fmt.Errorf("wrap: %w", originalErr)) require.NotNil(t, diag) assert.True(t, errors.Is(diag, originalErr)) } -func TestDiagnostic_Format_NoColor(t *testing.T) { - t.Setenv("NO_COLOR", "1") - - diag := &Diagnostic{ - Category: CategoryNetwork, - OriginalErr: errors.New("test"), - Causes: []string{"cause1"}, - Solutions: []string{"fix1"}, - } - - output := diag.Format() - assert.NotContains(t, output, "\033[") - assert.Contains(t, output, CategoryNetwork) - assert.Contains(t, output, "cause1") - assert.Contains(t, output, "fix1") -} - -func TestDiagnostic_Format_Structure(t *testing.T) { - t.Setenv("NO_COLOR", "1") - - diag := &Diagnostic{ - Category: CategoryNetwork, - OriginalErr: errors.New("connection refused"), - Causes: []string{"Network down", "Firewall blocking"}, - Solutions: []string{"Check network", "Check firewall"}, - } - - output := diag.Format() - assert.Contains(t, output, "error: "+CategoryNetwork) - assert.Contains(t, output, "connection refused") - assert.Contains(t, output, "Possible causes:") - assert.Contains(t, output, "Network down") - assert.Contains(t, output, "Firewall blocking") - assert.Contains(t, output, "How to fix:") - assert.Contains(t, output, "Check network") - assert.Contains(t, output, "Check firewall") -} - func TestClassify_Priority_DNSOverNetwork(t *testing.T) { err := &net.DNSError{Name: "example.com", Err: "no such host"} diag := Classify(err) @@ -314,51 +249,20 @@ func TestClassify_WrappedAuth(t *testing.T) { assert.Equal(t, CategoryAuth401, diag.Category) } -func TestIsImageNotFound(t *testing.T) { - assert.True(t, IsImageNotFound(errors.New("MANIFEST_UNKNOWN: not found"))) - assert.True(t, IsImageNotFound(errors.New("404 Not Found"))) - assert.False(t, IsImageNotFound(errors.New("some other error"))) - assert.False(t, IsImageNotFound(nil)) -} +func TestClassify_AlreadyClassified(t *testing.T) { + first := Classify(io.EOF) + require.NotNil(t, first) -func TestIsRepoNotFound(t *testing.T) { - assert.True(t, IsRepoNotFound(errors.New("NAME_UNKNOWN: repo"))) - assert.False(t, IsRepoNotFound(errors.New("some other error"))) - assert.False(t, IsRepoNotFound(nil)) + second := Classify(first) + assert.Nil(t, second, "must not wrap an already classified error") } -func TestIsTrivyMediaTypeNotAllowed(t *testing.T) { - assert.True(t, IsTrivyMediaTypeNotAllowed(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) - assert.True(t, IsTrivyMediaTypeNotAllowed(errors.New("MANIFEST_INVALID: application/octet-stream"))) - assert.False(t, IsTrivyMediaTypeNotAllowed(errors.New("MANIFEST_INVALID: other"))) - assert.False(t, IsTrivyMediaTypeNotAllowed(nil)) -} - -func TestDiagnostic_Error_PlainText(t *testing.T) { - diag := &Diagnostic{ - Category: CategoryNetwork, - OriginalErr: errors.New("connection refused"), - Causes: []string{"cause"}, - Solutions: []string{"fix"}, - } - - errStr := diag.Error() - assert.Equal(t, CategoryNetwork+": connection refused", errStr) - assert.NotContains(t, errStr, "\033[") - assert.NotContains(t, errStr, "Possible causes") -} - -func TestDiagnostic_Format_ForceColor(t *testing.T) { - t.Setenv("FORCE_COLOR", "1") - t.Setenv("NO_COLOR", "") - - diag := &Diagnostic{ - Category: CategoryEOF, - OriginalErr: errors.New("test"), - Causes: []string{"cause1"}, - Solutions: []string{"fix1"}, - } - - output := diag.Format() - assert.True(t, strings.Contains(output, "\033[")) +func TestIsUnsupportedOCIMediaType(t *testing.T) { + assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) + assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: application/octet-stream"))) + assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: deckhouse.io.bdu.layer"))) + assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: vnd.cncf.openpolicyagent"))) + assert.False(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: other"))) + assert.False(t, isUnsupportedOCIMediaType(errors.New("some error without manifest invalid"))) + assert.False(t, isUnsupportedOCIMediaType(nil)) } diff --git a/pkg/registry/errdiag/doc.go b/pkg/registry/errdiag/doc.go new file mode 100644 index 00000000..3e3fa08d --- /dev/null +++ b/pkg/registry/errdiag/doc.go @@ -0,0 +1,49 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package errdiag classifies container registry errors and returns +// [diagnostic.HelpfulError] with user-friendly causes and solutions. +// +// # Usage +// +// if diag := errdiag.Classify(err); diag != nil { +// return diag // implements error interface via *diagnostic.HelpfulError +// } +// +// # Classification +// +// [Classify] uses Go's [errors.Is] and [errors.As] to detect error types from +// the standard library (crypto/x509, net, io) and go-containerregistry +// (transport.Error). Detection order matters - more specific checks run first: +// +// 1. EOF - io.EOF, io.ErrUnexpectedEOF (proxy/middleware termination) +// 2. TLS/Certificate - x509.UnknownAuthorityError, HostnameError, etc. +// 3. Authentication - transport.Error with HTTP 401/403 +// 4. Rate limiting - transport.Error with HTTP 429 +// 5. Server errors - transport.Error with HTTP 500/502/503/504 +// 6. DNS - net.DNSError +// 7. Timeout - context.DeadlineExceeded +// 8. Network - net.OpError, syscall.ECONNREFUSED, etc. +// 9. Image not found - error message contains "MANIFEST_UNKNOWN" +// 10. Repo not found - error message contains "NAME_UNKNOWN" +// 11. Unsupported OCI - unsupported OCI media types (e.g. Project Quay) +// +// DNS is checked before Network because [net.DNSError] satisfies [net.Error]. +// Timeout is checked before Network for the same reason. +// +// Error matchers for flow control (e.g., skipping optional images) are in the +// sibling package [github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch]. +package errdiag diff --git a/pkg/registry/errmatch/errmatch.go b/pkg/registry/errmatch/errmatch.go new file mode 100644 index 00000000..0e65652e --- /dev/null +++ b/pkg/registry/errmatch/errmatch.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package errmatch provides error matchers for container registry responses. +// These are used for flow control in mirror operations (e.g., skipping optional images). +package errmatch + +import ( + "errors" + "strings" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" +) + +// IsImageNotFound returns true if the error indicates that the requested image +// tag or manifest does not exist in the registry. +func IsImageNotFound(err error) bool { + if err == nil { + return false + } + + // Typed check: works for GET responses where registry returns JSON with error codes. + if hasDiagnosticCode(err, transport.ManifestUnknownErrorCode) { + return true + } + + // String fallback: HEAD responses have no body per HTTP spec, so transport.Error.Errors + // is empty. Also covers registries that return plain text instead of structured JSON. + errMsg := err.Error() + return strings.Contains(errMsg, "MANIFEST_UNKNOWN") || strings.Contains(errMsg, "404 Not Found") +} + +// IsRepoNotFound returns true if the error indicates that the requested +// repository does not exist in the registry. +func IsRepoNotFound(err error) bool { + if err == nil { + return false + } + + // Typed check: works for GET responses with structured JSON error codes. + if hasDiagnosticCode(err, transport.NameUnknownErrorCode) { + return true + } + + // String fallback: same as IsImageNotFound - covers HEAD responses and plain text errors. + return strings.Contains(err.Error(), "NAME_UNKNOWN") +} + +// hasDiagnosticCode checks if err is a *transport.Error containing +// a Diagnostic with the given error code. +func hasDiagnosticCode(err error, code transport.ErrorCode) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + for _, diag := range transportErr.Errors { + if diag.Code == code { + return true + } + } + + return false +} diff --git a/pkg/registry/errmatch/errmatch_test.go b/pkg/registry/errmatch/errmatch_test.go new file mode 100644 index 00000000..f6e2db2f --- /dev/null +++ b/pkg/registry/errmatch/errmatch_test.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errmatch + +import ( + "errors" + "fmt" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" +) + +func TestIsImageNotFound_TypedError(t *testing.T) { + err := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.ManifestUnknownErrorCode, Message: "manifest unknown"}}, + } + assert.True(t, IsImageNotFound(err)) +} + +func TestIsImageNotFound_WrappedTypedError(t *testing.T) { + inner := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.ManifestUnknownErrorCode}}, + } + assert.True(t, IsImageNotFound(fmt.Errorf("get manifest: %w", inner))) +} + +func TestIsImageNotFound_FallbackString(t *testing.T) { + assert.True(t, IsImageNotFound(errors.New("MANIFEST_UNKNOWN: not found"))) + assert.True(t, IsImageNotFound(errors.New("404 Not Found"))) +} + +func TestIsImageNotFound_Negative(t *testing.T) { + assert.False(t, IsImageNotFound(errors.New("some other error"))) + assert.False(t, IsImageNotFound(nil)) +} + +func TestIsRepoNotFound_TypedError(t *testing.T) { + err := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.NameUnknownErrorCode, Message: "repository name not known"}}, + } + assert.True(t, IsRepoNotFound(err)) +} + +func TestIsRepoNotFound_WrappedTypedError(t *testing.T) { + inner := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.NameUnknownErrorCode}}, + } + assert.True(t, IsRepoNotFound(fmt.Errorf("check repo: %w", inner))) +} + +func TestIsRepoNotFound_FallbackString(t *testing.T) { + assert.True(t, IsRepoNotFound(errors.New("NAME_UNKNOWN: repo"))) +} + +func TestIsRepoNotFound_Negative(t *testing.T) { + assert.False(t, IsRepoNotFound(errors.New("some other error"))) + assert.False(t, IsRepoNotFound(nil)) +} diff --git a/scripts/demo-registry-errors/main.go b/scripts/demo-registry-errors/main.go new file mode 100644 index 00000000..2f2949b7 --- /dev/null +++ b/scripts/demo-registry-errors/main.go @@ -0,0 +1,323 @@ +// Standalone demo tool for showcasing registry error diagnostics. +// Starts local test servers that simulate various registry failure modes +// and runs errdiag.Classify against real errors from go-containerregistry. +// +// Usage: go run ./scripts/demo-registry-errors/ +// +// Press Enter to step through each scenario. +// NOT intended to be committed to the project. +package main + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "net/http/httptest" + "os" + "strings" + "time" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/remote" + + "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" +) + +const ( + bold = "\033[1m" + dim = "\033[2m" + reset = "\033[0m" + cyan = "\033[36m" +) + +type demo struct { + name string // scenario title + request string // what request is being made + setup string // how the server is configured + fn func() error // actual demo function +} + +var demos = []demo{ + { + name: "EOF (proxy/middleware terminating connection)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server accepts TCP, then closes connection immediately (Hijack + Close)", + fn: demoEOF, + }, + { + name: "TLS/certificate verification failed", + request: "HEAD https:///v2/test/manifests/latest (no CA trust)", + setup: "httptest.NewTLSServer with self-signed cert, client does NOT skip TLS verify", + fn: demoTLSCert, + }, + { + name: "Authentication failed (HTTP 401)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server returns 401 with {\"errors\":[{\"code\":\"UNAUTHORIZED\"}]}", + fn: demoAuth401, + }, + { + name: "Access denied (HTTP 403)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server returns 403 with {\"errors\":[{\"code\":\"DENIED\"}]}", + fn: demoAuth403, + }, + { + name: "Rate limited (HTTP 429)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server returns 429 with {\"errors\":[{\"code\":\"TOOMANYREQUESTS\"}]}", + fn: demoRateLimit, + }, + { + name: "Server error (HTTP 500)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server returns 500 with {\"errors\":[{\"code\":\"UNKNOWN\"}]}", + fn: demoServerError500, + }, + { + name: "Server error (HTTP 502)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server returns 502 Bad Gateway (raw HTML body)", + fn: demoServerError502, + }, + { + name: "Server error (HTTP 503)", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Server returns 503 with {\"errors\":[{\"code\":\"UNAVAILABLE\"}]}", + fn: demoServerError503, + }, + { + name: "DNS resolution failure", + request: "HEAD https://nonexistent.invalid:443/v2/test/manifests/latest", + setup: "No server - hostname uses .invalid TLD (RFC 2606, guaranteed unresolvable)", + fn: demoDNS, + }, + { + name: "Connection refused", + request: "HEAD http:///v2/test/manifests/latest", + setup: "Bind port with net.Listen, close it, then connect - nothing accepts", + fn: demoConnRefused, + }, + { + name: "Operation timeout", + request: "HEAD http:///v2/test/manifests/latest (timeout 1s)", + setup: "Server handler sleeps forever, client has context.WithTimeout(1s)", + fn: demoTimeout, + }, +} + +func main() { + _ = os.Setenv("FORCE_COLOR", "1") + scanner := bufio.NewScanner(os.Stdin) + + fmt.Println(bold + "=== Registry Error Diagnostics Demo ===" + reset) + fmt.Println(dim + "Press Enter to step through each scenario" + reset) + fmt.Println() + + for i, d := range demos { + // Clear screen and move cursor to top + fmt.Print("\033[2J\033[H") + + fmt.Println(bold + "=== Registry Error Diagnostics Demo ===" + reset) + fmt.Printf(dim+"Scenario %d/%d"+reset+"\n\n", i+1, len(demos)) + + fmt.Printf(bold+"--- %s ---"+reset+"\n", d.name) + fmt.Printf(cyan+" Request: "+reset+"%s\n", d.request) + fmt.Printf(cyan+" Setup: "+reset+"%s\n", d.setup) + + if err := d.fn(); err != nil { + fmt.Fprintf(os.Stderr, " demo setup error: %v\n", err) + } + + if i < len(demos)-1 { + fmt.Print(dim + "Press Enter for next..." + reset) + scanner.Scan() + } + } + + fmt.Println("\n" + bold + "=== Done ===" + reset) +} + +func headImage(ctx context.Context, host string, insecure bool) error { + var opts []name.Option + if insecure { + opts = append(opts, name.Insecure) + } + ref, err := name.ParseReference(host+"/test:latest", opts...) + if err != nil { + return err + } + _, err = remote.Head(ref, remote.WithContext(ctx)) + return err +} + +func showDiagnostic(err error) { + diag := errdiag.Classify(err) + if diag != nil { + fmt.Fprint(os.Stderr, diag.Format()) + } else { + fmt.Fprintf(os.Stderr, " [unclassified] %v\n", err) + } +} + +func writeError(w http.ResponseWriter, status int, code, msg string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(struct { + Errors []struct { + Code string `json:"code"` + Message string `json:"message"` + } `json:"errors"` + }{ + Errors: []struct { + Code string `json:"code"` + Message string `json:"message"` + }{{code, msg}}, + }) +} + +// --- Demo functions --- + +func demoEOF() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + h, ok := w.(http.Hijacker) + if !ok { + return + } + conn, _, _ := h.Hijack() + conn.Close() + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoTLSCert() error { + server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = io.WriteString(w, "ok") + })) + defer server.Close() + + ref, _ := name.ParseReference(trimHTTPS(server.URL)+"/test:latest", name.StrictValidation) + _, err := remote.Head(ref) + showDiagnostic(err) + return nil +} + +func demoAuth401() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + writeError(w, http.StatusUnauthorized, "UNAUTHORIZED", "authentication required") + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoAuth403() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + writeError(w, http.StatusForbidden, "DENIED", "requested access to the resource is denied") + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoRateLimit() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + writeError(w, http.StatusTooManyRequests, "TOOMANYREQUESTS", "rate limit exceeded") + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoServerError500() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + writeError(w, http.StatusInternalServerError, "UNKNOWN", "internal server error") + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoServerError502() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusBadGateway) + _, _ = io.WriteString(w, "502 Bad Gateway") + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoServerError503() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + writeError(w, http.StatusServiceUnavailable, "UNAVAILABLE", "service temporarily unavailable") + })) + defer server.Close() + + err := headImage(context.Background(), trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func demoDNS() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := headImage(ctx, "nonexistent.invalid:443", false) + showDiagnostic(err) + return nil +} + +func demoConnRefused() error { + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return err + } + addr := listener.Addr().String() + listener.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + connErr := headImage(ctx, addr, true) + showDiagnostic(connErr) + return nil +} + +func demoTimeout() error { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + select { + case <-time.After(30 * time.Second): + case <-r.Context().Done(): + } + })) + defer server.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + err := headImage(ctx, trimHTTP(server.URL), true) + showDiagnostic(err) + return nil +} + +func trimHTTP(url string) string { return strings.TrimPrefix(url, "http://") } +func trimHTTPS(url string) string { return strings.TrimPrefix(url, "https://") } From da62565a5022d5ddb30a1c6101a2d8b8e0105756 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 14:22:15 +0300 Subject: [PATCH 03/10] Move validation package to internal/mirror - Package is only used by mirror pull/push, no need to keep it in pkg Signed-off-by: Roman Berezkin --- internal/mirror/cmd/pull/pull.go | 2 +- internal/mirror/cmd/pull/pull_test.go | 2 +- internal/mirror/cmd/push/push.go | 2 +- .../libmirror => internal/mirror}/validation/registry_access.go | 0 .../mirror}/validation/registry_access_test.go | 0 5 files changed, 3 insertions(+), 3 deletions(-) rename {pkg/libmirror => internal/mirror}/validation/registry_access.go (100%) rename {pkg/libmirror => internal/mirror}/validation/registry_access_test.go (100%) diff --git a/internal/mirror/cmd/pull/pull.go b/internal/mirror/cmd/pull/pull.go index bdbabf4e..2224af3a 100644 --- a/internal/mirror/cmd/pull/pull.go +++ b/internal/mirror/cmd/pull/pull.go @@ -43,10 +43,10 @@ import ( pullflags "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/flags" "github.com/deckhouse/deckhouse-cli/internal/mirror/gostsums" "github.com/deckhouse/deckhouse-cli/internal/mirror/modules" + "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" registryservice "github.com/deckhouse/deckhouse-cli/pkg/registry/service" diff --git a/internal/mirror/cmd/pull/pull_test.go b/internal/mirror/cmd/pull/pull_test.go index a26a10d4..4bc488ce 100644 --- a/internal/mirror/cmd/pull/pull_test.go +++ b/internal/mirror/cmd/pull/pull_test.go @@ -37,7 +37,7 @@ import ( pullflags "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/flags" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" + "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" ) func TestNewCommand(t *testing.T) { diff --git a/internal/mirror/cmd/push/push.go b/internal/mirror/cmd/push/push.go index 783eb02f..422b93f1 100644 --- a/internal/mirror/cmd/push/push.go +++ b/internal/mirror/cmd/push/push.go @@ -35,10 +35,10 @@ import ( regclient "github.com/deckhouse/deckhouse/pkg/registry/client" "github.com/deckhouse/deckhouse-cli/internal/mirror" + "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" ) diff --git a/pkg/libmirror/validation/registry_access.go b/internal/mirror/validation/registry_access.go similarity index 100% rename from pkg/libmirror/validation/registry_access.go rename to internal/mirror/validation/registry_access.go diff --git a/pkg/libmirror/validation/registry_access_test.go b/internal/mirror/validation/registry_access_test.go similarity index 100% rename from pkg/libmirror/validation/registry_access_test.go rename to internal/mirror/validation/registry_access_test.go From 34428854f35d3db5b881284e982647841aada4f6 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 14:59:01 +0300 Subject: [PATCH 04/10] Remove demo script from diffs Signed-off-by: Roman Berezkin --- scripts/demo-registry-errors/main.go | 323 --------------------------- 1 file changed, 323 deletions(-) delete mode 100644 scripts/demo-registry-errors/main.go diff --git a/scripts/demo-registry-errors/main.go b/scripts/demo-registry-errors/main.go deleted file mode 100644 index 2f2949b7..00000000 --- a/scripts/demo-registry-errors/main.go +++ /dev/null @@ -1,323 +0,0 @@ -// Standalone demo tool for showcasing registry error diagnostics. -// Starts local test servers that simulate various registry failure modes -// and runs errdiag.Classify against real errors from go-containerregistry. -// -// Usage: go run ./scripts/demo-registry-errors/ -// -// Press Enter to step through each scenario. -// NOT intended to be committed to the project. -package main - -import ( - "bufio" - "context" - "encoding/json" - "fmt" - "io" - "net" - "net/http" - "net/http/httptest" - "os" - "strings" - "time" - - "github.com/google/go-containerregistry/pkg/name" - "github.com/google/go-containerregistry/pkg/v1/remote" - - "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" -) - -const ( - bold = "\033[1m" - dim = "\033[2m" - reset = "\033[0m" - cyan = "\033[36m" -) - -type demo struct { - name string // scenario title - request string // what request is being made - setup string // how the server is configured - fn func() error // actual demo function -} - -var demos = []demo{ - { - name: "EOF (proxy/middleware terminating connection)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server accepts TCP, then closes connection immediately (Hijack + Close)", - fn: demoEOF, - }, - { - name: "TLS/certificate verification failed", - request: "HEAD https:///v2/test/manifests/latest (no CA trust)", - setup: "httptest.NewTLSServer with self-signed cert, client does NOT skip TLS verify", - fn: demoTLSCert, - }, - { - name: "Authentication failed (HTTP 401)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server returns 401 with {\"errors\":[{\"code\":\"UNAUTHORIZED\"}]}", - fn: demoAuth401, - }, - { - name: "Access denied (HTTP 403)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server returns 403 with {\"errors\":[{\"code\":\"DENIED\"}]}", - fn: demoAuth403, - }, - { - name: "Rate limited (HTTP 429)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server returns 429 with {\"errors\":[{\"code\":\"TOOMANYREQUESTS\"}]}", - fn: demoRateLimit, - }, - { - name: "Server error (HTTP 500)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server returns 500 with {\"errors\":[{\"code\":\"UNKNOWN\"}]}", - fn: demoServerError500, - }, - { - name: "Server error (HTTP 502)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server returns 502 Bad Gateway (raw HTML body)", - fn: demoServerError502, - }, - { - name: "Server error (HTTP 503)", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Server returns 503 with {\"errors\":[{\"code\":\"UNAVAILABLE\"}]}", - fn: demoServerError503, - }, - { - name: "DNS resolution failure", - request: "HEAD https://nonexistent.invalid:443/v2/test/manifests/latest", - setup: "No server - hostname uses .invalid TLD (RFC 2606, guaranteed unresolvable)", - fn: demoDNS, - }, - { - name: "Connection refused", - request: "HEAD http:///v2/test/manifests/latest", - setup: "Bind port with net.Listen, close it, then connect - nothing accepts", - fn: demoConnRefused, - }, - { - name: "Operation timeout", - request: "HEAD http:///v2/test/manifests/latest (timeout 1s)", - setup: "Server handler sleeps forever, client has context.WithTimeout(1s)", - fn: demoTimeout, - }, -} - -func main() { - _ = os.Setenv("FORCE_COLOR", "1") - scanner := bufio.NewScanner(os.Stdin) - - fmt.Println(bold + "=== Registry Error Diagnostics Demo ===" + reset) - fmt.Println(dim + "Press Enter to step through each scenario" + reset) - fmt.Println() - - for i, d := range demos { - // Clear screen and move cursor to top - fmt.Print("\033[2J\033[H") - - fmt.Println(bold + "=== Registry Error Diagnostics Demo ===" + reset) - fmt.Printf(dim+"Scenario %d/%d"+reset+"\n\n", i+1, len(demos)) - - fmt.Printf(bold+"--- %s ---"+reset+"\n", d.name) - fmt.Printf(cyan+" Request: "+reset+"%s\n", d.request) - fmt.Printf(cyan+" Setup: "+reset+"%s\n", d.setup) - - if err := d.fn(); err != nil { - fmt.Fprintf(os.Stderr, " demo setup error: %v\n", err) - } - - if i < len(demos)-1 { - fmt.Print(dim + "Press Enter for next..." + reset) - scanner.Scan() - } - } - - fmt.Println("\n" + bold + "=== Done ===" + reset) -} - -func headImage(ctx context.Context, host string, insecure bool) error { - var opts []name.Option - if insecure { - opts = append(opts, name.Insecure) - } - ref, err := name.ParseReference(host+"/test:latest", opts...) - if err != nil { - return err - } - _, err = remote.Head(ref, remote.WithContext(ctx)) - return err -} - -func showDiagnostic(err error) { - diag := errdiag.Classify(err) - if diag != nil { - fmt.Fprint(os.Stderr, diag.Format()) - } else { - fmt.Fprintf(os.Stderr, " [unclassified] %v\n", err) - } -} - -func writeError(w http.ResponseWriter, status int, code, msg string) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(struct { - Errors []struct { - Code string `json:"code"` - Message string `json:"message"` - } `json:"errors"` - }{ - Errors: []struct { - Code string `json:"code"` - Message string `json:"message"` - }{{code, msg}}, - }) -} - -// --- Demo functions --- - -func demoEOF() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - h, ok := w.(http.Hijacker) - if !ok { - return - } - conn, _, _ := h.Hijack() - conn.Close() - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoTLSCert() error { - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - _, _ = io.WriteString(w, "ok") - })) - defer server.Close() - - ref, _ := name.ParseReference(trimHTTPS(server.URL)+"/test:latest", name.StrictValidation) - _, err := remote.Head(ref) - showDiagnostic(err) - return nil -} - -func demoAuth401() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - writeError(w, http.StatusUnauthorized, "UNAUTHORIZED", "authentication required") - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoAuth403() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - writeError(w, http.StatusForbidden, "DENIED", "requested access to the resource is denied") - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoRateLimit() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - writeError(w, http.StatusTooManyRequests, "TOOMANYREQUESTS", "rate limit exceeded") - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoServerError500() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - writeError(w, http.StatusInternalServerError, "UNKNOWN", "internal server error") - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoServerError502() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusBadGateway) - _, _ = io.WriteString(w, "502 Bad Gateway") - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoServerError503() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - writeError(w, http.StatusServiceUnavailable, "UNAVAILABLE", "service temporarily unavailable") - })) - defer server.Close() - - err := headImage(context.Background(), trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func demoDNS() error { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - err := headImage(ctx, "nonexistent.invalid:443", false) - showDiagnostic(err) - return nil -} - -func demoConnRefused() error { - listener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - return err - } - addr := listener.Addr().String() - listener.Close() - - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - connErr := headImage(ctx, addr, true) - showDiagnostic(connErr) - return nil -} - -func demoTimeout() error { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - select { - case <-time.After(30 * time.Second): - case <-r.Context().Done(): - } - })) - defer server.Close() - - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() - - err := headImage(ctx, trimHTTP(server.URL), true) - showDiagnostic(err) - return nil -} - -func trimHTTP(url string) string { return strings.TrimPrefix(url, "http://") } -func trimHTTPS(url string) string { return strings.TrimPrefix(url, "https://") } From 953dd8a4ec840e898893dc97cd8207932889e071 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 15:11:35 +0300 Subject: [PATCH 05/10] Move error classifier to internal/mirror - Classifier contains application-level advice (CLI flags, docs links) specific to mirror commands - Document classifier placement convention in README and doc.go Signed-off-by: Roman Berezkin --- internal/mirror/cmd/pull/pull.go | 2 +- internal/mirror/cmd/push/push.go | 2 +- .../mirror}/errdiag/classify.go | 0 .../errdiag/classify_integration_test.go | 0 .../mirror}/errdiag/classify_network_test.go | 0 .../mirror}/errdiag/classify_test.go | 0 .../mirror}/errdiag/doc.go | 4 ++-- pkg/diagnostic/README.md | 23 +++++++++++++------ pkg/diagnostic/doc.go | 2 +- 9 files changed, 21 insertions(+), 12 deletions(-) rename {pkg/registry => internal/mirror}/errdiag/classify.go (100%) rename {pkg/registry => internal/mirror}/errdiag/classify_integration_test.go (100%) rename {pkg/registry => internal/mirror}/errdiag/classify_network_test.go (100%) rename {pkg/registry => internal/mirror}/errdiag/classify_test.go (100%) rename {pkg/registry => internal/mirror}/errdiag/doc.go (95%) diff --git a/internal/mirror/cmd/pull/pull.go b/internal/mirror/cmd/pull/pull.go index 2224af3a..0100da1b 100644 --- a/internal/mirror/cmd/pull/pull.go +++ b/internal/mirror/cmd/pull/pull.go @@ -41,6 +41,7 @@ import ( "github.com/deckhouse/deckhouse-cli/internal" "github.com/deckhouse/deckhouse-cli/internal/mirror" pullflags "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/flags" + "github.com/deckhouse/deckhouse-cli/internal/mirror/errdiag" "github.com/deckhouse/deckhouse-cli/internal/mirror/gostsums" "github.com/deckhouse/deckhouse-cli/internal/mirror/modules" "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" @@ -48,7 +49,6 @@ import ( "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" - "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" registryservice "github.com/deckhouse/deckhouse-cli/pkg/registry/service" "github.com/deckhouse/deckhouse-cli/pkg/stub" ) diff --git a/internal/mirror/cmd/push/push.go b/internal/mirror/cmd/push/push.go index 422b93f1..edbd04dd 100644 --- a/internal/mirror/cmd/push/push.go +++ b/internal/mirror/cmd/push/push.go @@ -35,12 +35,12 @@ import ( regclient "github.com/deckhouse/deckhouse/pkg/registry/client" "github.com/deckhouse/deckhouse-cli/internal/mirror" + "github.com/deckhouse/deckhouse-cli/internal/mirror/errdiag" "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" - "github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag" ) // CLI Parameters diff --git a/pkg/registry/errdiag/classify.go b/internal/mirror/errdiag/classify.go similarity index 100% rename from pkg/registry/errdiag/classify.go rename to internal/mirror/errdiag/classify.go diff --git a/pkg/registry/errdiag/classify_integration_test.go b/internal/mirror/errdiag/classify_integration_test.go similarity index 100% rename from pkg/registry/errdiag/classify_integration_test.go rename to internal/mirror/errdiag/classify_integration_test.go diff --git a/pkg/registry/errdiag/classify_network_test.go b/internal/mirror/errdiag/classify_network_test.go similarity index 100% rename from pkg/registry/errdiag/classify_network_test.go rename to internal/mirror/errdiag/classify_network_test.go diff --git a/pkg/registry/errdiag/classify_test.go b/internal/mirror/errdiag/classify_test.go similarity index 100% rename from pkg/registry/errdiag/classify_test.go rename to internal/mirror/errdiag/classify_test.go diff --git a/pkg/registry/errdiag/doc.go b/internal/mirror/errdiag/doc.go similarity index 95% rename from pkg/registry/errdiag/doc.go rename to internal/mirror/errdiag/doc.go index 3e3fa08d..00dfee76 100644 --- a/pkg/registry/errdiag/doc.go +++ b/internal/mirror/errdiag/doc.go @@ -44,6 +44,6 @@ limitations under the License. // DNS is checked before Network because [net.DNSError] satisfies [net.Error]. // Timeout is checked before Network for the same reason. // -// Error matchers for flow control (e.g., skipping optional images) are in the -// sibling package [github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch]. +// Error matchers for flow control (e.g., skipping optional images) are in +// [github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch]. package errdiag diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md index a95fed64..ff705398 100644 --- a/pkg/diagnostic/README.md +++ b/pkg/diagnostic/README.md @@ -84,17 +84,25 @@ error: TLS/certificate verification failed <-- Category `Error()` returns plain text for logs: `"Category: OriginalErr.Error()"`. `Unwrap()` returns `OriginalErr` so `errors.Is`/`errors.As` work through it. -## Example Package layout +## Where classifiers live + +Classifiers are **application/UI logic**, not library code. They contain +user-facing advice (CLI flags, links to docs) that is specific to a command group. +Place them in `internal/` next to the commands they serve. ``` -pkg/diagnostic/ HelpfulError + Format (generic, no domain logic) -pkg/registry/errdiag/ Classify - registry error classifier -pkg/registry/errmatch/ IsImageNotFound, IsRepoNotFound - for flow control +pkg/diagnostic/ HelpfulError + Format (generic, reusable) +pkg/registry/errmatch/ error matchers (generic, reusable) +internal/mirror/errdiag/ mirror classifier (advice about --tls-skip-verify, --license) +internal/backup/errdiag/ backup classifier (advice about etcdctl, kubeconfig) ``` +Why not `pkg/`: solutions mention specific CLI flags (`--tls-skip-verify`, `--license`). +A different command group working with the same registry would need different advice. + ## Adding diagnostics to a new command -**1. Create a classifier** for your domain: +**1. Create a classifier** next to your command group: ```go // internal/backup/errdiag/classify.go @@ -139,7 +147,8 @@ regardless of which classifier produced it. ## Rules (Best Practice) +- Classifiers go in `internal//errdiag/` - they are application logic, not libraries - Classify in the **leaf command** (RunE), not in libraries or root.go -- Each domain uses its **own classifier** - prevents false diagnostics +- Each command group uses its **own classifier** - prevents false diagnostics - Skip classification if the error is already a `*HelpfulError` (see guard in the example above) -- `Causes` and `Solutions` are optional - empty slices are omitted from output, but highly recommended to provide those +- `Causes` and `Solutions` are optional but highly recommended diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go index 53732604..f7a62758 100644 --- a/pkg/diagnostic/doc.go +++ b/pkg/diagnostic/doc.go @@ -74,7 +74,7 @@ limitations under the License. // To add diagnostics for a new domain (e.g. backup), create a Classify function // that wraps known errors into *[HelpfulError]: // -// // pkg/backup/errdiag/classify.go +// // internal/backup/errdiag/classify.go // func Classify(err error) *diagnostic.HelpfulError { // if isETCDError(err) { // return &diagnostic.HelpfulError{ From e91f118b4c72f6801473c8b6dd5750f140242bfa Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 19:16:40 +0300 Subject: [PATCH 06/10] Split error diagnostics into per-command classifiers - Pull and push each have their own errdetect package with command-specific advice - Pull auth suggests --license and --source-login, push suggests --registry-login - Push references positional argument, pull references --source flag Signed-off-by: Roman Berezkin --- .../pull/errdetect}/classify.go | 226 +++++----- .../cmd/pull/errdetect/classify_test.go | 96 +++++ internal/mirror/cmd/pull/pull.go | 4 +- .../mirror/cmd/push/errdetect/classify.go | 399 ++++++++++++++++++ .../cmd/push/errdetect/classify_test.go | 92 ++++ internal/mirror/cmd/push/push.go | 6 +- .../errdiag/classify_integration_test.go | 209 --------- .../mirror/errdiag/classify_network_test.go | 41 -- internal/mirror/errdiag/classify_test.go | 268 ------------ internal/mirror/errdiag/doc.go | 49 --- 10 files changed, 688 insertions(+), 702 deletions(-) rename internal/mirror/{errdiag => cmd/pull/errdetect}/classify.go (54%) create mode 100644 internal/mirror/cmd/pull/errdetect/classify_test.go create mode 100644 internal/mirror/cmd/push/errdetect/classify.go create mode 100644 internal/mirror/cmd/push/errdetect/classify_test.go delete mode 100644 internal/mirror/errdiag/classify_integration_test.go delete mode 100644 internal/mirror/errdiag/classify_network_test.go delete mode 100644 internal/mirror/errdiag/classify_test.go delete mode 100644 internal/mirror/errdiag/doc.go diff --git a/internal/mirror/errdiag/classify.go b/internal/mirror/cmd/pull/errdetect/classify.go similarity index 54% rename from internal/mirror/errdiag/classify.go rename to internal/mirror/cmd/pull/errdetect/classify.go index bbc83790..14547452 100644 --- a/internal/mirror/errdiag/classify.go +++ b/internal/mirror/cmd/pull/errdetect/classify.go @@ -14,7 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -package errdiag +// Package errdetect classifies registry errors for d8 mirror pull +// with pull-specific causes and solutions. +package errdetect import ( "context" @@ -25,7 +27,6 @@ import ( "net" "net/http" "os" - "strings" "syscall" "github.com/google/go-containerregistry/pkg/v1/remote/transport" @@ -34,108 +35,95 @@ import ( "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" ) -// Error category names displayed to the user after "error: ". -// Some categories are extended with dynamic details (hostname, port, HTTP code) -// at classification time via fmt.Sprintf. const ( - CategoryEOF = "Connection terminated unexpectedly (EOF)" - CategoryTLS = "TLS/certificate verification failed" - CategoryAuth = "Authentication failed" - CategoryAuth401 = "Authentication failed (HTTP 401 Unauthorized)" - CategoryAuth403 = "Access denied (HTTP 403 Forbidden)" - CategoryRateLimit = "Rate limited by registry (HTTP 429 Too Many Requests)" - CategoryServerError = "Registry server error" - CategoryDNS = "DNS resolution failed" - CategoryTimeout = "Operation timed out" - CategoryNetwork = "Network connection failed" - CategoryImageNotFound = "Image not found in registry" - CategoryRepoNotFound = "Repository not found in registry" - CategoryUnsupportedOCI = "Unsupported OCI artifact type" + categoryEOF = "Connection terminated unexpectedly (EOF)" + categoryTLS = "TLS/certificate verification failed" + categoryAuth = "Authentication failed" + categoryAuth401 = "Authentication failed (HTTP 401 Unauthorized)" + categoryAuth403 = "Access denied (HTTP 403 Forbidden)" + categoryRateLimit = "Rate limited by registry (HTTP 429 Too Many Requests)" + categoryServerError = "Registry server error" + categoryDNS = "DNS resolution failed" + categoryTimeout = "Operation timed out" + categoryNetwork = "Network connection failed" + categoryImageNotFound = "Image not found in registry" + categoryRepoNotFound = "Repository not found in registry" ) -// Classify analyzes an error and returns a *diagnostic.HelpfulError if -// the error can be classified into a known category, or nil otherwise. -// Detection order matters: more specific checks come before general ones. -func Classify(err error) *diagnostic.HelpfulError { +// Diagnose analyzes an error and returns a *diagnostic.HelpfulError +// with pull-specific causes and solutions, or nil if the error is not recognized. +func Diagnose(err error) *diagnostic.HelpfulError { if err == nil { return nil } - // Already classified - don't wrap twice. var helpErr *diagnostic.HelpfulError if errors.As(err, &helpErr) { return nil } switch { - case isEOFError(err): + case isEOF(err): return &diagnostic.HelpfulError{ - Category: CategoryEOF, + Category: categoryEOF, OriginalErr: err, Causes: []string{ "Corporate proxy or middleware intercepting and terminating HTTPS connections", - "Registry server closed the connection unexpectedly", + "Source registry closed the connection unexpectedly", "Network device (firewall, load balancer) dropping packets", }, Solutions: []string{ "Check if a corporate proxy is intercepting HTTPS traffic", "If using a proxy, ensure it is configured to pass through registry traffic", - "Use --tls-skip-verify flag if a proxy is replacing TLS certificates", "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", }, } case isCertificateError(err): return &diagnostic.HelpfulError{ - Category: CategoryTLS, + Category: categoryTLS, OriginalErr: err, Causes: []string{ - "Self-signed certificate without proper trust chain", + "Self-signed certificate on the source registry", "Certificate expired or not yet valid", - "Hostname mismatch between certificate and registry URL", "Corporate proxy or middleware intercepting HTTPS connections", }, Solutions: []string{ "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", - "Add the registry's CA certificate to your system trust store", - "Verify the registry URL hostname matches the certificate", + "Add the source registry's CA certificate to your system trust store", "Verify system clock is correct (expired certificates can be caused by wrong time)", }, } case isAuthenticationError(err): - var transportErr *transport.Error - category := CategoryAuth - if errors.As(err, &transportErr) { - switch transportErr.StatusCode { - case http.StatusUnauthorized: - category = CategoryAuth401 - case http.StatusForbidden: - category = CategoryAuth403 - } + category := categoryAuth + if code := authStatusCode(err); code == http.StatusUnauthorized { + category = categoryAuth401 + } else if code == http.StatusForbidden { + category = categoryAuth403 } return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ - "Invalid or expired credentials", - "License key or registry credentials are incorrect or not provided", - "Insufficient permissions for the requested operation", + "License key is invalid, expired, or not provided", + "Source registry credentials are incorrect", + "Insufficient permissions for the requested images", }, Solutions: []string{ - "For pull: verify your license key and pass it with --license flag", - "For push: verify --registry-login and --registry-password are correct", + "Verify your license key and pass it with --license flag", + "For custom source registries, use --source-login and --source-password", "Contact registry administrator to verify access rights", }, } case isRateLimitError(err): return &diagnostic.HelpfulError{ - Category: CategoryRateLimit, + Category: categoryRateLimit, OriginalErr: err, Causes: []string{ - "Too many requests to the registry in a short time", + "Too many requests to the source registry in a short time", "Registry-side rate limiting policy", }, Solutions: []string{ @@ -145,44 +133,42 @@ func Classify(err error) *diagnostic.HelpfulError { } case isServerError(err): - var transportErr *transport.Error - category := CategoryServerError - if errors.As(err, &transportErr) { - category = fmt.Sprintf("%s (HTTP %d)", CategoryServerError, transportErr.StatusCode) + category := categoryServerError + if code := serverStatusCode(err); code != 0 { + category = fmt.Sprintf("%s (HTTP %d)", categoryServerError, code) } return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ - "Registry server is experiencing internal errors", + "Source registry is experiencing internal errors", "Backend storage is temporarily unavailable", "Registry is overloaded or being maintained", }, Solutions: []string{ "Wait a few minutes and retry the operation", - "Check registry server status and health", + "Check source registry status and health", "Contact registry administrator if the problem persists", }, } case isDNSError(err): - var dnsErr *net.DNSError - category := CategoryDNS - if errors.As(err, &dnsErr) && dnsErr.Name != "" { - category = fmt.Sprintf("%s for '%s'", CategoryDNS, dnsErr.Name) + category := categoryDNS + if name := dnsHostname(err); name != "" { + category = fmt.Sprintf("%s for '%s'", categoryDNS, name) } return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ - "Registry hostname cannot be resolved by DNS", + "Source registry hostname cannot be resolved by DNS", "DNS server is unreachable or not responding", - "Incorrect registry URL or typo in hostname", + "Incorrect source registry URL or typo in hostname", }, Solutions: []string{ - "Verify the registry URL is spelled correctly", + "Verify the --source registry URL is spelled correctly", "Check your DNS server configuration", "Try using the registry's IP address instead of hostname", }, @@ -190,98 +176,75 @@ func Classify(err error) *diagnostic.HelpfulError { case isTimeoutError(err): return &diagnostic.HelpfulError{ - Category: CategoryTimeout, + Category: categoryTimeout, OriginalErr: err, Causes: []string{ - "Registry server took too long to respond", + "Source registry took too long to respond", "Network latency is too high", "Firewall silently dropping packets (no RST, no ICMP)", }, Solutions: []string{ - "Check network connectivity to the registry", - "Try increasing the timeout with --timeout flag", + "Check network connectivity to the source registry", "Verify firewall rules allow outbound HTTPS (port 443)", }, } case isNetworkError(err): - var opErr *net.OpError - category := CategoryNetwork - if errors.As(err, &opErr) && opErr.Addr != nil { - category = fmt.Sprintf("%s to %s", CategoryNetwork, opErr.Addr.String()) + category := categoryNetwork + if addr := networkAddr(err); addr != "" { + category = fmt.Sprintf("%s to %s", categoryNetwork, addr) } return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, Causes: []string{ - "Network connectivity issues or no internet connection", + "No network connection to the source registry", "Firewall or security group blocking the connection", - "Registry server is down or unreachable", + "Source registry is down or unreachable", }, Solutions: []string{ "Check your network connection and internet access", "Verify firewall rules allow outbound HTTPS (port 443)", - "Test connectivity with: curl -v https://", + "Test connectivity with: curl -v https://", }, } case errmatch.IsImageNotFound(err): return &diagnostic.HelpfulError{ - Category: CategoryImageNotFound, + Category: categoryImageNotFound, OriginalErr: err, Causes: []string{ - "Image tag doesn't exist in the registry", + "Image tag doesn't exist in the source registry", "Incorrect image name or tag specified", }, Solutions: []string{ - "Verify the image name and tag are correct", - "Check if you have permission to access this image", + "Verify the source registry path with --source flag", + "Check if the requested Deckhouse version exists", }, } case errmatch.IsRepoNotFound(err): return &diagnostic.HelpfulError{ - Category: CategoryRepoNotFound, + Category: categoryRepoNotFound, OriginalErr: err, Causes: []string{ - "Repository doesn't exist in the registry", - "Incorrect repository path or name", + "Repository doesn't exist in the source registry", + "Incorrect source registry path", }, Solutions: []string{ - "Verify the repository path is correct", + "Verify the --source registry path is correct", "Ensure you have permission to access this repository", }, } - - case isUnsupportedOCIMediaType(err): - return &diagnostic.HelpfulError{ - Category: CategoryUnsupportedOCI, - OriginalErr: err, - Causes: []string{ - "Registry doesn't support required OCI media types for Deckhouse artifacts", - "Project Quay or similar registry not configured for custom artifact types", - }, - Solutions: []string{ - "Configure registry to allow custom OCI artifact types", - "See: https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry", - "For Project Quay, add the following to config.yaml and retry push:\n" + - " FEATURE_GENERAL_OCI_SUPPORT: true\n" + - " ALLOWED_OCI_ARTIFACT_TYPES:\n" + - " \"application/octet-stream\":\n" + - " - \"application/deckhouse.io.bdu.layer.v1.tar+gzip\"\n" + - " - \"application/vnd.cncf.openpolicyagent.layer.v1.tar+gzip\"\n" + - " \"application/vnd.aquasec.trivy.config.v1+json\":\n" + - " - \"application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip\"\n" + - " - \"application/vnd.aquasec.trivy.db.layer.v1.tar+gzip\"", - }, - } } return nil } -func isEOFError(err error) bool { +// --- detection functions --- + +func isEOF(err error) bool { return errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) } @@ -322,6 +285,14 @@ func isAuthenticationError(err error) bool { return false } +func authStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + func isRateLimitError(err error) bool { var transportErr *transport.Error if !errors.As(err, &transportErr) { @@ -364,17 +335,32 @@ func isServerError(err error) bool { return false } +func serverStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + func isDNSError(err error) bool { var dnsErr *net.DNSError return errors.As(err, &dnsErr) } +func dnsHostname(err error) string { + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return dnsErr.Name + } + return "" +} + func isTimeoutError(err error) bool { return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, os.ErrDeadlineExceeded) } func isNetworkError(err error) bool { - // DNS and timeout are checked before this, so we skip them here if isDNSError(err) || isTimeoutError(err) { return false } @@ -404,30 +390,10 @@ func isNetworkError(err error) bool { return false } -// unsupportedOCIMediaTypes lists media type substrings whose rejection by a -// registry (via MANIFEST_INVALID) indicates an OCI artifact configuration issue. -var unsupportedOCIMediaTypes = []string{ - "vnd.aquasec.trivy", - "application/octet-stream", - "deckhouse.io.bdu", - "vnd.cncf.openpolicyagent", -} - -func isUnsupportedOCIMediaType(err error) bool { - if err == nil { - return false +func networkAddr(err error) string { + var opErr *net.OpError + if errors.As(err, &opErr) && opErr.Addr != nil { + return opErr.Addr.String() } - - errMsg := err.Error() - if !strings.Contains(errMsg, "MANIFEST_INVALID") { - return false - } - - for _, mediaType := range unsupportedOCIMediaTypes { - if strings.Contains(errMsg, mediaType) { - return true - } - } - - return false + return "" } diff --git a/internal/mirror/cmd/pull/errdetect/classify_test.go b/internal/mirror/cmd/pull/errdetect/classify_test.go new file mode 100644 index 00000000..648f5dc7 --- /dev/null +++ b/internal/mirror/cmd/pull/errdetect/classify_test.go @@ -0,0 +1,96 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errdetect + +import ( + "crypto/x509" + "errors" + "fmt" + "io" + "net/http" + "strings" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +func TestDiagnose_Nil(t *testing.T) { + assert.Nil(t, Diagnose(nil)) +} + +func TestDiagnose_Unclassified(t *testing.T) { + assert.Nil(t, Diagnose(errors.New("some random error"))) +} + +func TestDiagnose_AlreadyClassified(t *testing.T) { + first := Diagnose(io.EOF) + require.NotNil(t, first) + assert.Nil(t, Diagnose(first)) +} + +func TestDiagnose_AllCategories(t *testing.T) { + tests := []struct { + name string + err error + category string + }{ + {"EOF", io.EOF, categoryEOF}, + {"TLS", fmt.Errorf("reg: %w", x509.UnknownAuthorityError{}), categoryTLS}, + {"Auth401", &transport.Error{StatusCode: http.StatusUnauthorized}, categoryAuth401}, + {"Auth403", &transport.Error{StatusCode: http.StatusForbidden}, categoryAuth403}, + {"RateLimit", &transport.Error{StatusCode: http.StatusTooManyRequests}, categoryRateLimit}, + {"Server500", &transport.Error{StatusCode: http.StatusInternalServerError}, categoryServerError}, + {"ImageNotFound", errors.New("MANIFEST_UNKNOWN: not found"), categoryImageNotFound}, + {"RepoNotFound", errors.New("NAME_UNKNOWN: repo"), categoryRepoNotFound}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diag := Diagnose(tt.err) + require.NotNil(t, diag) + assert.Contains(t, diag.Category, tt.category) + }) + } +} + +func TestDiagnose_PullSpecificAuth(t *testing.T) { + diag := Diagnose(&transport.Error{StatusCode: http.StatusUnauthorized}) + require.NotNil(t, diag) + + solutions := strings.Join(diag.Solutions, " ") + assert.Contains(t, solutions, "--license") + assert.Contains(t, solutions, "--source-login") + assert.NotContains(t, solutions, "--registry-login") + assert.NotContains(t, solutions, "--registry-password") +} + +func TestDiagnose_NoUnsupportedOCI(t *testing.T) { + assert.Nil(t, Diagnose(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) +} + +func TestDiagnose_Unwrap(t *testing.T) { + diag := Diagnose(io.EOF) + require.NotNil(t, diag) + + var helpErr *diagnostic.HelpfulError + require.True(t, errors.As(diag, &helpErr)) + assert.True(t, errors.Is(diag, io.EOF)) +} diff --git a/internal/mirror/cmd/pull/pull.go b/internal/mirror/cmd/pull/pull.go index 0100da1b..f7a0f015 100644 --- a/internal/mirror/cmd/pull/pull.go +++ b/internal/mirror/cmd/pull/pull.go @@ -40,8 +40,8 @@ import ( "github.com/deckhouse/deckhouse-cli/internal" "github.com/deckhouse/deckhouse-cli/internal/mirror" + "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/errdetect" pullflags "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/flags" - "github.com/deckhouse/deckhouse-cli/internal/mirror/errdiag" "github.com/deckhouse/deckhouse-cli/internal/mirror/gostsums" "github.com/deckhouse/deckhouse-cli/internal/mirror/modules" "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" @@ -120,7 +120,7 @@ func pull(cmd *cobra.Command, _ []string) error { puller.logger.WarnLn("Operation cancelled by user") return nil } - if diag := errdiag.Classify(err); diag != nil { + if diag := errdetect.Diagnose(err); diag != nil { return diag } return fmt.Errorf("pull failed: %w", err) diff --git a/internal/mirror/cmd/push/errdetect/classify.go b/internal/mirror/cmd/push/errdetect/classify.go new file mode 100644 index 00000000..c62a1f10 --- /dev/null +++ b/internal/mirror/cmd/push/errdetect/classify.go @@ -0,0 +1,399 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package errdetect classifies registry errors for d8 mirror push +// with push-specific causes and solutions. +package errdetect + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "syscall" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" + "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" +) + +const ( + categoryEOF = "Connection terminated unexpectedly (EOF)" + categoryTLS = "TLS/certificate verification failed" + categoryAuth = "Authentication failed" + categoryAuth401 = "Authentication failed (HTTP 401 Unauthorized)" + categoryAuth403 = "Access denied (HTTP 403 Forbidden)" + categoryRateLimit = "Rate limited by registry (HTTP 429 Too Many Requests)" + categoryServerError = "Registry server error" + categoryDNS = "DNS resolution failed" + categoryTimeout = "Operation timed out" + categoryNetwork = "Network connection failed" + categoryImageNotFound = "Image not found in registry" + categoryRepoNotFound = "Repository not found in registry" +) + +// Diagnose analyzes an error and returns a *diagnostic.HelpfulError +// with push-specific causes and solutions, or nil if the error is not recognized. +func Diagnose(err error) *diagnostic.HelpfulError { + if err == nil { + return nil + } + + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + return nil + } + + switch { + case isEOF(err): + return &diagnostic.HelpfulError{ + Category: categoryEOF, + OriginalErr: err, + Causes: []string{ + "Corporate proxy or middleware intercepting and terminating HTTPS connections", + "Target registry closed the connection unexpectedly", + "Network device (firewall, load balancer) dropping packets", + }, + Solutions: []string{ + "Check if a corporate proxy is intercepting HTTPS traffic", + "If using a proxy, ensure it is configured to pass through registry traffic", + "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + }, + } + + case isCertificateError(err): + return &diagnostic.HelpfulError{ + Category: categoryTLS, + OriginalErr: err, + Causes: []string{ + "Self-signed certificate on the target registry", + "Certificate expired or not yet valid", + "Corporate proxy or middleware intercepting HTTPS connections", + }, + Solutions: []string{ + "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", + "Add the target registry's CA certificate to your system trust store", + "Verify system clock is correct (expired certificates can be caused by wrong time)", + }, + } + + case isAuthenticationError(err): + category := categoryAuth + if code := authStatusCode(err); code == http.StatusUnauthorized { + category = categoryAuth401 + } else if code == http.StatusForbidden { + category = categoryAuth403 + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Registry credentials are invalid or not provided", + "Account does not have push permissions", + "Repository path requires different access rights", + }, + Solutions: []string{ + "Verify --registry-login and --registry-password are correct", + "Ensure the account has write access to the target repository", + "Contact registry administrator to verify push permissions", + }, + } + + case isRateLimitError(err): + return &diagnostic.HelpfulError{ + Category: categoryRateLimit, + OriginalErr: err, + Causes: []string{ + "Too many requests to the target registry in a short time", + "Registry-side rate limiting policy", + }, + Solutions: []string{ + "Wait a few minutes and retry the operation", + "Contact registry administrator to increase rate limits", + }, + } + + case isServerError(err): + category := categoryServerError + if code := serverStatusCode(err); code != 0 { + category = fmt.Sprintf("%s (HTTP %d)", categoryServerError, code) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Target registry is experiencing internal errors", + "Backend storage is temporarily unavailable", + "Registry is overloaded or being maintained", + }, + Solutions: []string{ + "Wait a few minutes and retry the operation", + "Check target registry status and health", + "Contact registry administrator if the problem persists", + }, + } + + case isDNSError(err): + category := categoryDNS + if name := dnsHostname(err); name != "" { + category = fmt.Sprintf("%s for '%s'", categoryDNS, name) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Causes: []string{ + "Target registry hostname cannot be resolved by DNS", + "DNS server is unreachable or not responding", + "Incorrect registry address or typo in hostname", + }, + Solutions: []string{ + "Verify the argument is spelled correctly", + "Check your DNS server configuration", + "Try using the registry's IP address instead of hostname", + }, + } + + case isTimeoutError(err): + return &diagnostic.HelpfulError{ + Category: categoryTimeout, + OriginalErr: err, + Causes: []string{ + "Target registry took too long to respond", + "Network latency is too high", + "Firewall silently dropping packets (no RST, no ICMP)", + }, + Solutions: []string{ + "Check network connectivity to the target registry", + "Verify firewall rules allow outbound HTTPS (port 443)", + }, + } + + case isNetworkError(err): + category := categoryNetwork + if addr := networkAddr(err); addr != "" { + category = fmt.Sprintf("%s to %s", categoryNetwork, addr) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Causes: []string{ + "No network connection to the target registry", + "Firewall or security group blocking the connection", + "Target registry is down or unreachable", + }, + Solutions: []string{ + "Check your network connection", + "Verify firewall rules allow outbound HTTPS (port 443)", + "Test connectivity with: curl -v https://", + }, + } + + case errmatch.IsImageNotFound(err): + return &diagnostic.HelpfulError{ + Category: categoryImageNotFound, + OriginalErr: err, + Causes: []string{ + "Expected image is missing in the target registry", + "Previous push may have been interrupted", + }, + Solutions: []string{ + "Retry the push operation", + "Verify the argument is correct", + }, + } + + case errmatch.IsRepoNotFound(err): + return &diagnostic.HelpfulError{ + Category: categoryRepoNotFound, + OriginalErr: err, + Causes: []string{ + "Repository doesn't exist in the target registry", + "Incorrect argument", + }, + Solutions: []string{ + "Verify the argument is correct", + "Ensure the repository is created in the target registry", + }, + } + } + + return nil +} + +// --- detection functions --- + +func isEOF(err error) bool { + return errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) +} + +func isCertificateError(err error) bool { + var ( + unknownAuthErr x509.UnknownAuthorityError + certInvalidErr x509.CertificateInvalidError + hostnameErr x509.HostnameError + systemRootsErr x509.SystemRootsError + constraintErr x509.ConstraintViolationError + insecureAlgErr x509.InsecureAlgorithmError + ) + + return errors.As(err, &unknownAuthErr) || + errors.As(err, &certInvalidErr) || + errors.As(err, &hostnameErr) || + errors.As(err, &systemRootsErr) || + errors.As(err, &constraintErr) || + errors.As(err, &insecureAlgErr) +} + +func isAuthenticationError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusUnauthorized || transportErr.StatusCode == http.StatusForbidden { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnauthorizedErrorCode || diag.Code == transport.DeniedErrorCode { + return true + } + } + + return false +} + +func authStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + +func isRateLimitError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusTooManyRequests { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.TooManyRequestsErrorCode { + return true + } + } + + return false +} + +func isServerError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + switch transportErr.StatusCode { + case http.StatusInternalServerError, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout: + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnavailableErrorCode { + return true + } + } + + return false +} + +func serverStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + +func isDNSError(err error) bool { + var dnsErr *net.DNSError + return errors.As(err, &dnsErr) +} + +func dnsHostname(err error) string { + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return dnsErr.Name + } + return "" +} + +func isTimeoutError(err error) bool { + return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, os.ErrDeadlineExceeded) +} + +func isNetworkError(err error) bool { + if isDNSError(err) || isTimeoutError(err) { + return false + } + + var ( + netErr net.Error + opErr *net.OpError + syscallErr syscall.Errno + ) + + if errors.As(err, &opErr) { + return true + } + + if errors.As(err, &netErr) { + return true + } + + if errors.As(err, &syscallErr) { + return syscallErr == syscall.ECONNREFUSED || + syscallErr == syscall.ECONNRESET || + syscallErr == syscall.ETIMEDOUT || + syscallErr == syscall.ENETUNREACH || + syscallErr == syscall.EHOSTUNREACH + } + + return false +} + +func networkAddr(err error) string { + var opErr *net.OpError + if errors.As(err, &opErr) && opErr.Addr != nil { + return opErr.Addr.String() + } + return "" +} diff --git a/internal/mirror/cmd/push/errdetect/classify_test.go b/internal/mirror/cmd/push/errdetect/classify_test.go new file mode 100644 index 00000000..3e57564d --- /dev/null +++ b/internal/mirror/cmd/push/errdetect/classify_test.go @@ -0,0 +1,92 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errdetect + +import ( + "crypto/x509" + "errors" + "fmt" + "io" + "net/http" + "strings" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +func TestDiagnose_Nil(t *testing.T) { + assert.Nil(t, Diagnose(nil)) +} + +func TestDiagnose_Unclassified(t *testing.T) { + assert.Nil(t, Diagnose(errors.New("some random error"))) +} + +func TestDiagnose_AlreadyClassified(t *testing.T) { + first := Diagnose(io.EOF) + require.NotNil(t, first) + assert.Nil(t, Diagnose(first)) +} + +func TestDiagnose_AllCategories(t *testing.T) { + tests := []struct { + name string + err error + category string + }{ + {"EOF", io.EOF, categoryEOF}, + {"TLS", fmt.Errorf("reg: %w", x509.UnknownAuthorityError{}), categoryTLS}, + {"Auth401", &transport.Error{StatusCode: http.StatusUnauthorized}, categoryAuth401}, + {"Auth403", &transport.Error{StatusCode: http.StatusForbidden}, categoryAuth403}, + {"RateLimit", &transport.Error{StatusCode: http.StatusTooManyRequests}, categoryRateLimit}, + {"Server500", &transport.Error{StatusCode: http.StatusInternalServerError}, categoryServerError}, + {"ImageNotFound", errors.New("MANIFEST_UNKNOWN: not found"), categoryImageNotFound}, + {"RepoNotFound", errors.New("NAME_UNKNOWN: repo"), categoryRepoNotFound}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diag := Diagnose(tt.err) + require.NotNil(t, diag) + assert.Contains(t, diag.Category, tt.category) + }) + } +} + +func TestDiagnose_PushSpecificAuth(t *testing.T) { + diag := Diagnose(&transport.Error{StatusCode: http.StatusUnauthorized}) + require.NotNil(t, diag) + + solutions := strings.Join(diag.Solutions, " ") + assert.Contains(t, solutions, "--registry-login") + assert.Contains(t, solutions, "--registry-password") + assert.NotContains(t, solutions, "--license") + assert.NotContains(t, solutions, "--source-login") +} + +func TestDiagnose_Unwrap(t *testing.T) { + diag := Diagnose(io.EOF) + require.NotNil(t, diag) + + var helpErr *diagnostic.HelpfulError + require.True(t, errors.As(diag, &helpErr)) + assert.True(t, errors.Is(diag, io.EOF)) +} diff --git a/internal/mirror/cmd/push/push.go b/internal/mirror/cmd/push/push.go index edbd04dd..ba75e0a8 100644 --- a/internal/mirror/cmd/push/push.go +++ b/internal/mirror/cmd/push/push.go @@ -35,7 +35,7 @@ import ( regclient "github.com/deckhouse/deckhouse/pkg/registry/client" "github.com/deckhouse/deckhouse-cli/internal/mirror" - "github.com/deckhouse/deckhouse-cli/internal/mirror/errdiag" + "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/push/errdetect" "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" @@ -179,14 +179,14 @@ func (p *Pusher) Execute() error { } if err := p.validateRegistryAccess(); err != nil { - if diag := errdiag.Classify(err); diag != nil { + if diag := errdetect.Diagnose(err); diag != nil { return diag } return err } if err := p.executeNewPush(); err != nil { - if diag := errdiag.Classify(err); diag != nil { + if diag := errdetect.Diagnose(err); diag != nil { return diag } return err diff --git a/internal/mirror/errdiag/classify_integration_test.go b/internal/mirror/errdiag/classify_integration_test.go deleted file mode 100644 index 81740ca1..00000000 --- a/internal/mirror/errdiag/classify_integration_test.go +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright 2026 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package errdiag - -import ( - "context" - "encoding/json" - "fmt" - "net" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" - - "github.com/google/go-containerregistry/pkg/name" - "github.com/google/go-containerregistry/pkg/v1/remote" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" -) - -// headImage performs remote.Head against the given host with insecure (plain HTTP) mode. -func headImage(ctx context.Context, host string) error { - ref, err := name.ParseReference(host+"/test:latest", name.Insecure) - if err != nil { - return fmt.Errorf("parse reference: %w", err) - } - _, err = remote.Head(ref, remote.WithContext(ctx)) - return err -} - -// newRegistryErrorHandler returns an http.Handler that responds with -// a Docker V2 API error in JSON format. -func newRegistryErrorHandler(statusCode int, code, message string) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(statusCode) - _ = json.NewEncoder(w).Encode(struct { - Errors []struct { - Code string `json:"code"` - Message string `json:"message"` - } `json:"errors"` - }{ - Errors: []struct { - Code string `json:"code"` - Message string `json:"message"` - }{{code, message}}, - }) - }) -} - -// classifyFromServer starts an httptest server with the given handler, -// makes a real remote.Head call, and returns the Classify result. -func classifyFromServer(t *testing.T, handler http.Handler) *diagnostic.HelpfulError { - t.Helper() - server := httptest.NewServer(handler) - defer server.Close() - - err := headImage(context.Background(), strings.TrimPrefix(server.URL, "http://")) - require.Error(t, err) - return Classify(err) -} - -func TestIntegration_Auth401(t *testing.T) { - diag := classifyFromServer(t, newRegistryErrorHandler( - http.StatusUnauthorized, "UNAUTHORIZED", "authentication required", - )) - require.NotNil(t, diag) - assert.Equal(t, CategoryAuth401, diag.Category) -} - -func TestIntegration_Auth403(t *testing.T) { - diag := classifyFromServer(t, newRegistryErrorHandler( - http.StatusForbidden, "DENIED", "access denied", - )) - require.NotNil(t, diag) - assert.Equal(t, CategoryAuth403, diag.Category) -} - -func TestIntegration_RateLimit429(t *testing.T) { - diag := classifyFromServer(t, newRegistryErrorHandler( - http.StatusTooManyRequests, "TOOMANYREQUESTS", "rate limit exceeded", - )) - require.NotNil(t, diag) - assert.Equal(t, CategoryRateLimit, diag.Category) -} - -func TestIntegration_ServerErrors(t *testing.T) { - tests := []struct { - name string - statusCode int - errCode string - }{ - {"500", http.StatusInternalServerError, "UNKNOWN"}, - {"502", http.StatusBadGateway, ""}, - {"503", http.StatusServiceUnavailable, "UNAVAILABLE"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - if tt.errCode != "" { - newRegistryErrorHandler(tt.statusCode, tt.errCode, "server error").ServeHTTP(w, nil) - } else { - w.WriteHeader(tt.statusCode) - } - }) - - diag := classifyFromServer(t, handler) - require.NotNil(t, diag) - assert.Contains(t, diag.Category, CategoryServerError) - assert.Contains(t, diag.Category, tt.name) - }) - } -} - -func TestIntegration_TLSCertificateError(t *testing.T) { - server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - })) - defer server.Close() - - host := strings.TrimPrefix(server.URL, "https://") - ref, err := name.ParseReference(host+"/test:latest", name.StrictValidation) - require.NoError(t, err) - - _, err = remote.Head(ref) - require.Error(t, err) - - diag := Classify(err) - require.NotNil(t, diag, "expected TLS error to be classified, got raw: %v", err) - assert.Equal(t, CategoryTLS, diag.Category) -} - -func TestIntegration_EOF(t *testing.T) { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - hijacker, ok := w.(http.Hijacker) - if !ok { - t.Fatal("server does not support hijacking") - } - conn, _, err := hijacker.Hijack() - if err != nil { - t.Fatalf("hijack failed: %v", err) - } - conn.Close() - })) - defer server.Close() - - err := headImage(context.Background(), strings.TrimPrefix(server.URL, "http://")) - require.Error(t, err) - - diag := Classify(err) - require.NotNil(t, diag, "expected EOF error to be classified, got raw: %v", err) - assert.Equal(t, CategoryEOF, diag.Category) -} - -func TestIntegration_ConnectionRefused(t *testing.T) { - listener, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - addr := listener.Addr().String() - listener.Close() - - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - err = headImage(ctx, addr) - require.Error(t, err) - - diag := Classify(err) - require.NotNil(t, diag, "expected network error to be classified, got raw: %v", err) - assert.Contains(t, diag.Category, CategoryNetwork) -} - -func TestIntegration_Timeout(t *testing.T) { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - select { - case <-time.After(10 * time.Second): - case <-r.Context().Done(): - } - })) - defer server.Close() - - ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) - defer cancel() - - err := headImage(ctx, strings.TrimPrefix(server.URL, "http://")) - require.Error(t, err) - - diag := Classify(err) - require.NotNil(t, diag, "expected timeout error to be classified, got raw: %v", err) - assert.Equal(t, CategoryTimeout, diag.Category) -} - diff --git a/internal/mirror/errdiag/classify_network_test.go b/internal/mirror/errdiag/classify_network_test.go deleted file mode 100644 index f47c5ba7..00000000 --- a/internal/mirror/errdiag/classify_network_test.go +++ /dev/null @@ -1,41 +0,0 @@ -//go:build dfrunnetwork - -/* -Copyright 2026 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package errdiag - -import ( - "context" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestIntegration_DNSResolutionFailure(t *testing.T) { - // .invalid TLD is reserved by RFC 2606 and guaranteed to never resolve. - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - err := headImage(ctx, "nonexistent.invalid:443") - require.Error(t, err) - - diag := Classify(err) - require.NotNil(t, diag, "expected DNS error to be classified, got raw: %v", err) - assert.Contains(t, diag.Category, CategoryDNS) -} diff --git a/internal/mirror/errdiag/classify_test.go b/internal/mirror/errdiag/classify_test.go deleted file mode 100644 index 715a5333..00000000 --- a/internal/mirror/errdiag/classify_test.go +++ /dev/null @@ -1,268 +0,0 @@ -/* -Copyright 2026 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package errdiag - -import ( - "context" - "crypto/x509" - "errors" - "fmt" - "io" - "net" - "net/http" - "os" - "syscall" - "testing" - - "github.com/google/go-containerregistry/pkg/v1/remote/transport" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestClassify_Nil(t *testing.T) { - assert.Nil(t, Classify(nil)) -} - -func TestClassify_Unclassified(t *testing.T) { - assert.Nil(t, Classify(errors.New("some random error"))) -} - -func TestClassify_EOF(t *testing.T) { - diag := Classify(io.EOF) - require.NotNil(t, diag) - assert.Equal(t, CategoryEOF, diag.Category) -} - -func TestClassify_UnexpectedEOF(t *testing.T) { - diag := Classify(io.ErrUnexpectedEOF) - require.NotNil(t, diag) - assert.Equal(t, CategoryEOF, diag.Category) -} - -func TestClassify_WrappedEOF(t *testing.T) { - err := fmt.Errorf("pull from registry: %w", fmt.Errorf("get manifest: %w", io.EOF)) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryEOF, diag.Category) -} - -func TestClassify_TLS_UnknownAuthority(t *testing.T) { - err := fmt.Errorf("registry: %w", x509.UnknownAuthorityError{}) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryTLS, diag.Category) -} - -func TestClassify_TLS_CertificateInvalid(t *testing.T) { - err := fmt.Errorf("registry: %w", x509.CertificateInvalidError{}) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryTLS, diag.Category) -} - -func TestClassify_TLS_Hostname(t *testing.T) { - err := fmt.Errorf("registry: %w", x509.HostnameError{Host: "example.com"}) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryTLS, diag.Category) -} - -func TestClassify_Auth_401(t *testing.T) { - diag := Classify(&transport.Error{StatusCode: http.StatusUnauthorized}) - require.NotNil(t, diag) - assert.Equal(t, CategoryAuth401, diag.Category) -} - -func TestClassify_Auth_403(t *testing.T) { - diag := Classify(&transport.Error{StatusCode: http.StatusForbidden}) - require.NotNil(t, diag) - assert.Equal(t, CategoryAuth403, diag.Category) -} - -func TestClassify_Auth_DiagnosticCode(t *testing.T) { - err := &transport.Error{ - StatusCode: http.StatusOK, - Errors: []transport.Diagnostic{{Code: transport.UnauthorizedErrorCode}}, - } - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryAuth, diag.Category) -} - -func TestClassify_RateLimit_429(t *testing.T) { - diag := Classify(&transport.Error{StatusCode: http.StatusTooManyRequests}) - require.NotNil(t, diag) - assert.Equal(t, CategoryRateLimit, diag.Category) -} - -func TestClassify_RateLimit_DiagnosticCode(t *testing.T) { - err := &transport.Error{ - StatusCode: http.StatusOK, - Errors: []transport.Diagnostic{{Code: transport.TooManyRequestsErrorCode}}, - } - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryRateLimit, diag.Category) -} - -func TestClassify_ServerErrors(t *testing.T) { - tests := []struct { - name string - statusCode int - }{ - {"500", http.StatusInternalServerError}, - {"502", http.StatusBadGateway}, - {"503", http.StatusServiceUnavailable}, - {"504", http.StatusGatewayTimeout}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - diag := Classify(&transport.Error{StatusCode: tt.statusCode}) - require.NotNil(t, diag) - assert.Contains(t, diag.Category, CategoryServerError) - assert.Contains(t, diag.Category, tt.name) - }) - } -} - -func TestClassify_ServerError_Unavailable(t *testing.T) { - err := &transport.Error{ - StatusCode: http.StatusOK, - Errors: []transport.Diagnostic{{Code: transport.UnavailableErrorCode}}, - } - diag := Classify(err) - require.NotNil(t, diag) - assert.Contains(t, diag.Category, CategoryServerError) -} - -func TestClassify_DNS(t *testing.T) { - err := &net.DNSError{Name: "registry.example.com", Err: "no such host"} - diag := Classify(err) - require.NotNil(t, diag) - assert.Contains(t, diag.Category, CategoryDNS) - assert.Contains(t, diag.Category, "registry.example.com") -} - -func TestClassify_Timeout_Context(t *testing.T) { - err := fmt.Errorf("validate access: %w", context.DeadlineExceeded) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryTimeout, diag.Category) -} - -func TestClassify_Timeout_OS(t *testing.T) { - err := fmt.Errorf("validate access: %w", os.ErrDeadlineExceeded) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryTimeout, diag.Category) -} - -func TestClassify_Network_ConnRefused(t *testing.T) { - err := &net.OpError{ - Op: "dial", Net: "tcp", - Addr: &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 443}, - Err: &os.SyscallError{Syscall: "connect", Err: syscall.ECONNREFUSED}, - } - diag := Classify(err) - require.NotNil(t, diag) - assert.Contains(t, diag.Category, CategoryNetwork) - assert.Contains(t, diag.Category, "127.0.0.1:443") -} - -func TestClassify_Network_ConnReset(t *testing.T) { - err := &net.OpError{ - Op: "read", Net: "tcp", - Err: &os.SyscallError{Syscall: "read", Err: syscall.ECONNRESET}, - } - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryNetwork, diag.Category) -} - -func TestClassify_ImageNotFound(t *testing.T) { - diag := Classify(errors.New("MANIFEST_UNKNOWN: manifest unknown")) - require.NotNil(t, diag) - assert.Equal(t, CategoryImageNotFound, diag.Category) -} - -func TestClassify_RepoNotFound(t *testing.T) { - diag := Classify(errors.New("NAME_UNKNOWN: repository not found")) - require.NotNil(t, diag) - assert.Equal(t, CategoryRepoNotFound, diag.Category) -} - -func TestClassify_UnsupportedOCIMediaType(t *testing.T) { - diag := Classify(errors.New("MANIFEST_INVALID: media type vnd.aquasec.trivy not allowed")) - require.NotNil(t, diag) - assert.Equal(t, CategoryUnsupportedOCI, diag.Category) -} - -func TestClassify_DeepWrapping(t *testing.T) { - inner := x509.UnknownAuthorityError{} - err := fmt.Errorf("l1: %w", fmt.Errorf("l2: %w", fmt.Errorf("l3: %w", fmt.Errorf("l4: %w", fmt.Errorf("l5: %w", inner))))) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryTLS, diag.Category) -} - -func TestClassify_Unwrap(t *testing.T) { - originalErr := io.EOF - diag := Classify(fmt.Errorf("wrap: %w", originalErr)) - require.NotNil(t, diag) - assert.True(t, errors.Is(diag, originalErr)) -} - -func TestClassify_Priority_DNSOverNetwork(t *testing.T) { - err := &net.DNSError{Name: "example.com", Err: "no such host"} - diag := Classify(err) - require.NotNil(t, diag) - assert.Contains(t, diag.Category, CategoryDNS) - assert.NotContains(t, diag.Category, CategoryNetwork) -} - -func TestClassify_Priority_TimeoutOverNetwork(t *testing.T) { - diag := Classify(context.DeadlineExceeded) - require.NotNil(t, diag) - assert.Equal(t, CategoryTimeout, diag.Category) -} - -func TestClassify_WrappedAuth(t *testing.T) { - inner := &transport.Error{StatusCode: http.StatusUnauthorized} - err := fmt.Errorf("validate access: %w", inner) - diag := Classify(err) - require.NotNil(t, diag) - assert.Equal(t, CategoryAuth401, diag.Category) -} - -func TestClassify_AlreadyClassified(t *testing.T) { - first := Classify(io.EOF) - require.NotNil(t, first) - - second := Classify(first) - assert.Nil(t, second, "must not wrap an already classified error") -} - -func TestIsUnsupportedOCIMediaType(t *testing.T) { - assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) - assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: application/octet-stream"))) - assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: deckhouse.io.bdu.layer"))) - assert.True(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: vnd.cncf.openpolicyagent"))) - assert.False(t, isUnsupportedOCIMediaType(errors.New("MANIFEST_INVALID: other"))) - assert.False(t, isUnsupportedOCIMediaType(errors.New("some error without manifest invalid"))) - assert.False(t, isUnsupportedOCIMediaType(nil)) -} diff --git a/internal/mirror/errdiag/doc.go b/internal/mirror/errdiag/doc.go deleted file mode 100644 index 00dfee76..00000000 --- a/internal/mirror/errdiag/doc.go +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright 2026 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Package errdiag classifies container registry errors and returns -// [diagnostic.HelpfulError] with user-friendly causes and solutions. -// -// # Usage -// -// if diag := errdiag.Classify(err); diag != nil { -// return diag // implements error interface via *diagnostic.HelpfulError -// } -// -// # Classification -// -// [Classify] uses Go's [errors.Is] and [errors.As] to detect error types from -// the standard library (crypto/x509, net, io) and go-containerregistry -// (transport.Error). Detection order matters - more specific checks run first: -// -// 1. EOF - io.EOF, io.ErrUnexpectedEOF (proxy/middleware termination) -// 2. TLS/Certificate - x509.UnknownAuthorityError, HostnameError, etc. -// 3. Authentication - transport.Error with HTTP 401/403 -// 4. Rate limiting - transport.Error with HTTP 429 -// 5. Server errors - transport.Error with HTTP 500/502/503/504 -// 6. DNS - net.DNSError -// 7. Timeout - context.DeadlineExceeded -// 8. Network - net.OpError, syscall.ECONNREFUSED, etc. -// 9. Image not found - error message contains "MANIFEST_UNKNOWN" -// 10. Repo not found - error message contains "NAME_UNKNOWN" -// 11. Unsupported OCI - unsupported OCI media types (e.g. Project Quay) -// -// DNS is checked before Network because [net.DNSError] satisfies [net.Error]. -// Timeout is checked before Network for the same reason. -// -// Error matchers for flow control (e.g., skipping optional images) are in -// [github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch]. -package errdiag From afa0bf03fa037a401a538e1872dcb29987c61316 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 19:20:41 +0300 Subject: [PATCH 07/10] Update diagnostic docs to reflect per-command errdetect structure Signed-off-by: Roman Berezkin --- pkg/diagnostic/README.md | 45 ++++++++++++++++++++-------------------- pkg/diagnostic/doc.go | 26 +++++++++++------------ 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md index ff705398..c9cee4be 100644 --- a/pkg/diagnostic/README.md +++ b/pkg/diagnostic/README.md @@ -27,7 +27,7 @@ error: TLS/certificate verification failed | cobra dispatches | to subcommand ──────────────> err := puller.Execute() | | - | [Classify err] -> is it HelpfulError? + | [Diagnose err] -> is it HelpfulError? | | | yes | no | | | @@ -44,9 +44,9 @@ error: TLS/certificate verification failed (colored) (plain) ``` -Each command classifies errors with its own domain classifier. +Each command diagnoses errors with its own errdetect package. `root.go` only catches `*HelpfulError` via `errors.As` - it does not -import any classifier, so unrelated commands never get false diagnostics. +import any errdetect, so unrelated commands never get false diagnostics. ## HelpfulError @@ -87,36 +87,37 @@ error: TLS/certificate verification failed <-- Category ## Where classifiers live Classifiers are **application/UI logic**, not library code. They contain -user-facing advice (CLI flags, links to docs) that is specific to a command group. -Place them in `internal/` next to the commands they serve. +user-facing advice (CLI flags, links to docs) that is specific to each command. +Place them in `internal/` next to the command they serve. ``` -pkg/diagnostic/ HelpfulError + Format (generic, reusable) -pkg/registry/errmatch/ error matchers (generic, reusable) -internal/mirror/errdiag/ mirror classifier (advice about --tls-skip-verify, --license) -internal/backup/errdiag/ backup classifier (advice about etcdctl, kubeconfig) +pkg/diagnostic/ HelpfulError + Format (generic, reusable) +pkg/registry/errmatch/ error matchers (generic, reusable) +internal/mirror/cmd/pull/errdetect/ pull-specific diagnostics +internal/mirror/cmd/push/errdetect/ push-specific diagnostics ``` -Why not `pkg/`: solutions mention specific CLI flags (`--tls-skip-verify`, `--license`). -A different command group working with the same registry would need different advice. +Why per-command: pull advises `--license`/`--source-login`, push advises +`--registry-login`/`--registry-password`. Shared classifier would give +ambiguous advice. ## Adding diagnostics to a new command -**1. Create a classifier** next to your command group: +**1. Create an errdetect package** next to your command: ```go -// internal/backup/errdiag/classify.go -package errdiag +// internal/backup/cmd/snapshot/errdetect/classify.go +package errdetect import ( "errors" "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" ) -func Classify(err error) *diagnostic.HelpfulError { +func Diagnose(err error) *diagnostic.HelpfulError { var helpErr *diagnostic.HelpfulError if errors.As(err, &helpErr) { - return nil // already classified, don't wrap twice + return nil // already diagnosed, don't wrap twice } if isETCDError(err) { @@ -135,7 +136,7 @@ func Classify(err error) *diagnostic.HelpfulError { ```go if err := doSnapshot(); err != nil { - if diag := errdiag.Classify(err); diag != nil { + if diag := errdetect.Diagnose(err); diag != nil { return diag } return fmt.Errorf("snapshot failed: %w", err) @@ -143,12 +144,12 @@ if err := doSnapshot(); err != nil { ``` No changes to `root.go` needed - it catches any `*HelpfulError` -regardless of which classifier produced it. +regardless of which errdetect produced it. ## Rules (Best Practice) -- Classifiers go in `internal//errdiag/` - they are application logic, not libraries -- Classify in the **leaf command** (RunE), not in libraries or root.go -- Each command group uses its **own classifier** - prevents false diagnostics -- Skip classification if the error is already a `*HelpfulError` (see guard in the example above) +- Classifiers go in `internal//errdetect/` - they are application logic, not libraries +- Diagnose in the **leaf command** (RunE), not in libraries or root.go +- Each command uses its **own errdetect** - prevents false diagnostics +- Skip diagnosis if the error is already a `*HelpfulError` (see guard in the example above) - `Causes` and `Solutions` are optional but highly recommended diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go index f7a62758..871fcdcf 100644 --- a/pkg/diagnostic/doc.go +++ b/pkg/diagnostic/doc.go @@ -23,11 +23,10 @@ limitations under the License. // // # Creating a HelpfulError // -// Option 1: use a domain-specific classifier that wraps known errors -// (see [github.com/deckhouse/deckhouse-cli/pkg/registry/errdiag] for the registry -// implementation): +// Option 1: use a command-specific errdetect package +// (see internal/mirror/cmd/pull/errdetect for an example): // -// if diag := errdiag.Classify(err); diag != nil { +// if diag := errdetect.Diagnose(err); diag != nil { // return diag // } // @@ -69,13 +68,12 @@ limitations under the License. // [HelpfulError.Error] returns plain text (safe for logs). // [HelpfulError.Format] returns colored terminal output (TTY-aware, respects NO_COLOR). // -// # Adding a new domain classifier +// # Adding diagnostics to a new command // -// To add diagnostics for a new domain (e.g. backup), create a Classify function -// that wraps known errors into *[HelpfulError]: +// Create an errdetect package next to your command with a Diagnose function: // -// // internal/backup/errdiag/classify.go -// func Classify(err error) *diagnostic.HelpfulError { +// // internal/backup/cmd/snapshot/errdetect/classify.go +// func Diagnose(err error) *diagnostic.HelpfulError { // if isETCDError(err) { // return &diagnostic.HelpfulError{ // Category: "ETCD connection failed", OriginalErr: err, @@ -86,16 +84,16 @@ limitations under the License. // return nil // } // -// Then call it at the command level, same pattern as registry: +// Then call it at the command level: // -// if diag := errdiag.Classify(err); diag != nil { +// if diag := errdetect.Diagnose(err); diag != nil { // return diag // } // -// # Important: classify at the command level, not in root.go +// # Important: diagnose at the command level, not in root.go // -// Each command must call its own domain classifier. root.go only catches +// Each command must call its own errdetect package. root.go only catches // [HelpfulError] via [errors.As] - it does not import or call any classifier. // This prevents false classification: a DNS error from "d8 backup" must not -// be classified with registry-specific advice like "--tls-skip-verify". +// be diagnosed with registry-specific advice like "--tls-skip-verify". package diagnostic From 094bc2fb7840daa0e9e4b42508a1cb2d6e9d6375 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Fri, 10 Apr 2026 19:29:46 +0300 Subject: [PATCH 08/10] Move errmatch to internal/mirror and align file naming - Error matchers are only used by mirror commands, belong in internal - File names match function names: classify.go -> diagnose.go (better name for clarity in general) Signed-off-by: Roman Berezkin --- cmd/d8/root.go | 2 +- internal/mirror/cmd/pull/errdetect/{classify.go => diagnose.go} | 2 +- .../cmd/pull/errdetect/{classify_test.go => diagnose_test.go} | 0 internal/mirror/cmd/push/errdetect/{classify.go => diagnose.go} | 2 +- .../cmd/push/errdetect/{classify_test.go => diagnose_test.go} | 0 {pkg/registry => internal/mirror}/errmatch/errmatch.go | 0 {pkg/registry => internal/mirror}/errmatch/errmatch_test.go | 0 internal/mirror/validation/registry_access.go | 2 +- pkg/diagnostic/README.md | 2 +- pkg/diagnostic/doc.go | 2 +- 10 files changed, 6 insertions(+), 6 deletions(-) rename internal/mirror/cmd/pull/errdetect/{classify.go => diagnose.go} (99%) rename internal/mirror/cmd/pull/errdetect/{classify_test.go => diagnose_test.go} (100%) rename internal/mirror/cmd/push/errdetect/{classify.go => diagnose.go} (99%) rename internal/mirror/cmd/push/errdetect/{classify_test.go => diagnose_test.go} (100%) rename {pkg/registry => internal/mirror}/errmatch/errmatch.go (100%) rename {pkg/registry => internal/mirror}/errmatch/errmatch_test.go (100%) diff --git a/cmd/d8/root.go b/cmd/d8/root.go index 9044df3a..3581426e 100644 --- a/cmd/d8/root.go +++ b/cmd/d8/root.go @@ -177,7 +177,7 @@ func execute() { if err := rootCmd.Execute(); err != nil { // If a command returned a HelpfulError, show formatted diagnostic. // Commands are responsible for classifying their own errors using - // domain-specific classifiers (e.g. errdiag.Classify for registry). + // domain-specific errdetect packages (e.g. errdetect.Diagnose for mirror). var helpErr *diagnostic.HelpfulError if errors.As(err, &helpErr) { fmt.Fprint(os.Stderr, helpErr.Format()) diff --git a/internal/mirror/cmd/pull/errdetect/classify.go b/internal/mirror/cmd/pull/errdetect/diagnose.go similarity index 99% rename from internal/mirror/cmd/pull/errdetect/classify.go rename to internal/mirror/cmd/pull/errdetect/diagnose.go index 14547452..5e9aa96d 100644 --- a/internal/mirror/cmd/pull/errdetect/classify.go +++ b/internal/mirror/cmd/pull/errdetect/diagnose.go @@ -31,8 +31,8 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/deckhouse/deckhouse-cli/internal/mirror/errmatch" "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" - "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" ) const ( diff --git a/internal/mirror/cmd/pull/errdetect/classify_test.go b/internal/mirror/cmd/pull/errdetect/diagnose_test.go similarity index 100% rename from internal/mirror/cmd/pull/errdetect/classify_test.go rename to internal/mirror/cmd/pull/errdetect/diagnose_test.go diff --git a/internal/mirror/cmd/push/errdetect/classify.go b/internal/mirror/cmd/push/errdetect/diagnose.go similarity index 99% rename from internal/mirror/cmd/push/errdetect/classify.go rename to internal/mirror/cmd/push/errdetect/diagnose.go index c62a1f10..f2c34052 100644 --- a/internal/mirror/cmd/push/errdetect/classify.go +++ b/internal/mirror/cmd/push/errdetect/diagnose.go @@ -31,8 +31,8 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/deckhouse/deckhouse-cli/internal/mirror/errmatch" "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" - "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" ) const ( diff --git a/internal/mirror/cmd/push/errdetect/classify_test.go b/internal/mirror/cmd/push/errdetect/diagnose_test.go similarity index 100% rename from internal/mirror/cmd/push/errdetect/classify_test.go rename to internal/mirror/cmd/push/errdetect/diagnose_test.go diff --git a/pkg/registry/errmatch/errmatch.go b/internal/mirror/errmatch/errmatch.go similarity index 100% rename from pkg/registry/errmatch/errmatch.go rename to internal/mirror/errmatch/errmatch.go diff --git a/pkg/registry/errmatch/errmatch_test.go b/internal/mirror/errmatch/errmatch_test.go similarity index 100% rename from pkg/registry/errmatch/errmatch_test.go rename to internal/mirror/errmatch/errmatch_test.go diff --git a/internal/mirror/validation/registry_access.go b/internal/mirror/validation/registry_access.go index b5c46bc5..b10f424a 100644 --- a/internal/mirror/validation/registry_access.go +++ b/internal/mirror/validation/registry_access.go @@ -26,8 +26,8 @@ import ( "github.com/google/go-containerregistry/pkg/v1/random" "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/deckhouse/deckhouse-cli/internal/mirror/errmatch" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/auth" - "github.com/deckhouse/deckhouse-cli/pkg/registry/errmatch" ) var ErrImageUnavailable = errors.New("required image is not present in registry") diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md index c9cee4be..a222fcda 100644 --- a/pkg/diagnostic/README.md +++ b/pkg/diagnostic/README.md @@ -106,7 +106,7 @@ ambiguous advice. **1. Create an errdetect package** next to your command: ```go -// internal/backup/cmd/snapshot/errdetect/classify.go +// internal/backup/cmd/snapshot/errdetect/diagnose.go package errdetect import ( diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go index 871fcdcf..11ed0d09 100644 --- a/pkg/diagnostic/doc.go +++ b/pkg/diagnostic/doc.go @@ -72,7 +72,7 @@ limitations under the License. // // Create an errdetect package next to your command with a Diagnose function: // -// // internal/backup/cmd/snapshot/errdetect/classify.go +// // internal/backup/cmd/snapshot/errdetect/diagnose.go // func Diagnose(err error) *diagnostic.HelpfulError { // if isETCDError(err) { // return &diagnostic.HelpfulError{ From 929733f79f62a3a7524a9765def3cb7b790cda1f Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Sun, 12 Apr 2026 18:49:06 +0300 Subject: [PATCH 09/10] Unwrap error chain into indented tree in diagnostic output - Display each wrapped error level on its own line with increasing indentation for easier root-cause identification - Walk errors.Unwrap() to extract per-level context instead of showing a single flattened message - Update doc examples in README.md and doc.go to reflect the chain format Signed-off-by: Roman Berezkin --- pkg/diagnostic/README.md | 10 ++++++---- pkg/diagnostic/doc.go | 10 ++++++---- pkg/diagnostic/format.go | 40 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md index a222fcda..1928fdfb 100644 --- a/pkg/diagnostic/README.md +++ b/pkg/diagnostic/README.md @@ -69,14 +69,16 @@ type HelpfulError struct { How fields map to output (`Format()`): ``` -error: TLS/certificate verification failed <-- Category - ╰─▶ x509: certificate signed by unknown authority <-- OriginalErr.Error() +error: TLS/certificate verification failed <-- Category + ╰─▶ Get "https://registry.example.com/v2/" <-- OriginalErr chain + ╰─▶ tls: failed to verify certificate (unwrapped level by level) + ╰─▶ x509: certificate signed by unknown authority - Possible causes: <-- Causes + Possible causes: <-- Causes * Self-signed certificate without proper trust chain * Corporate proxy intercepting HTTPS connections - How to fix: <-- Solutions + How to fix: <-- Solutions * Use --tls-skip-verify flag * Add the registry's CA certificate to your system trust store ``` diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go index 11ed0d09..57bde054 100644 --- a/pkg/diagnostic/doc.go +++ b/pkg/diagnostic/doc.go @@ -43,13 +43,15 @@ limitations under the License. // // # How fields map to Format() output // -// error: ETCD snapshot failed <-- Category -// ╰─▶ dial tcp 10.0.0.1:2379: connection refused <-- OriginalErr.Error() +// error: ETCD snapshot failed <-- Category +// ╰─▶ save snapshot <-- OriginalErr chain (unwrapped) +// ╰─▶ dial tcp 10.0.0.1:2379 +// ╰─▶ connection refused // -// Possible causes: <-- Causes +// Possible causes: <-- Causes // * ETCD cluster is unreachable // -// How to fix: <-- Solutions +// How to fix: <-- Solutions // * Check ETCD health: etcdctl endpoint health // // # How it propagates diff --git a/pkg/diagnostic/format.go b/pkg/diagnostic/format.go index a3d8c808..6f397dd4 100644 --- a/pkg/diagnostic/format.go +++ b/pkg/diagnostic/format.go @@ -17,6 +17,7 @@ limitations under the License. package diagnostic import ( + "errors" "os" "strings" @@ -65,7 +66,10 @@ func newTextStyler() textStyler { // Format returns the formatted diagnostic string with colors if stderr is a TTY. // // error: Network connection failed to 127.0.0.1:443 -// ╰─▶ dial tcp 127.0.0.1:443: connect: connection refused +// ╰─▶ pull from registry +// ╰─▶ validate platform access +// ╰─▶ dial tcp 127.0.0.1:443 +// ╰─▶ connect: connection refused // // Possible causes: // * Network connectivity issues or no internet connection @@ -78,7 +82,11 @@ func (e *HelpfulError) Format() string { var b strings.Builder b.WriteString("\n" + t.danger("error") + t.header(": "+e.Category) + "\n") if e.OriginalErr != nil { - b.WriteString(t.hint(" ╰─▶ ") + e.OriginalErr.Error() + "\n") + chain := unwrapChain(e.OriginalErr) + for i, msg := range chain { + indent := strings.Repeat(" ", i) + b.WriteString(" " + indent + t.hint("╰─▶ ") + msg + "\n") + } } b.WriteString("\n") @@ -100,3 +108,31 @@ func (e *HelpfulError) Format() string { b.WriteString("\n") return b.String() } + +// unwrapChain walks errors.Unwrap() and extracts each level's unique context. +// For "a: b: c" wrapped via fmt.Errorf("%w"), returns ["a", "b", "c"]. +func unwrapChain(err error) []string { + var chain []string + + for err != nil { + inner := errors.Unwrap(err) + if inner == nil { + chain = append(chain, err.Error()) + break + } + + full := err.Error() + innerText := inner.Error() + context := strings.TrimSuffix(full, ": "+innerText) + if context == full { + // Can't extract prefix cleanly - use full message and stop + chain = append(chain, full) + break + } + + chain = append(chain, context) + err = inner + } + + return chain +} From c1066a54a067252f5f7ee07234b4084404f8cc25 Mon Sep 17 00:00:00 2001 From: Roman Berezkin Date: Sun, 12 Apr 2026 20:57:14 +0300 Subject: [PATCH 10/10] Link each diagnostic cause with its specific solutions - Each cause now carries its own solutions so the user sees actionable advice next to the relevant problem, not two disconnected lists - Add Suggestion type pairing Cause with Solutions in HelpfulError - Causes without solutions (e.g. "registry closed connection") render cleanly without empty arrows - Regroup all pull and push classifier suggestions into cause-solution pairs Signed-off-by: Roman Berezkin --- .../mirror/cmd/pull/errdetect/diagnose.go | 199 ++++++++++-------- .../cmd/pull/errdetect/diagnose_test.go | 10 +- .../mirror/cmd/push/errdetect/diagnose.go | 199 ++++++++++-------- .../cmd/push/errdetect/diagnose_test.go | 10 +- pkg/diagnostic/README.md | 55 +++-- pkg/diagnostic/diagnostic.go | 13 +- pkg/diagnostic/diagnostic_test.go | 33 +-- pkg/diagnostic/doc.go | 27 +-- pkg/diagnostic/format.go | 30 +-- 9 files changed, 329 insertions(+), 247 deletions(-) diff --git a/internal/mirror/cmd/pull/errdetect/diagnose.go b/internal/mirror/cmd/pull/errdetect/diagnose.go index 5e9aa96d..25888fc2 100644 --- a/internal/mirror/cmd/pull/errdetect/diagnose.go +++ b/internal/mirror/cmd/pull/errdetect/diagnose.go @@ -67,15 +67,17 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryEOF, OriginalErr: err, - Causes: []string{ - "Corporate proxy or middleware intercepting and terminating HTTPS connections", - "Source registry closed the connection unexpectedly", - "Network device (firewall, load balancer) dropping packets", - }, - Solutions: []string{ - "Check if a corporate proxy is intercepting HTTPS traffic", - "If using a proxy, ensure it is configured to pass through registry traffic", - "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Corporate proxy or middleware intercepting and terminating HTTPS connections", + Solutions: []string{ + "Check if a corporate proxy is intercepting HTTPS traffic", + "If using a proxy, ensure it is configured to pass through registry traffic", + "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + }, + }, + {Cause: "Source registry closed the connection unexpectedly"}, + {Cause: "Network device (firewall, load balancer) dropping packets"}, }, } @@ -83,15 +85,22 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryTLS, OriginalErr: err, - Causes: []string{ - "Self-signed certificate on the source registry", - "Certificate expired or not yet valid", - "Corporate proxy or middleware intercepting HTTPS connections", - }, - Solutions: []string{ - "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", - "Add the source registry's CA certificate to your system trust store", - "Verify system clock is correct (expired certificates can be caused by wrong time)", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Self-signed certificate on the source registry", + Solutions: []string{"Use --tls-skip-verify flag to skip TLS verification (not recommended for production)"}, + }, + { + Cause: "Certificate expired or not yet valid", + Solutions: []string{ + "Verify system clock is correct (expired certificates can be caused by wrong time)", + "Add the source registry's CA certificate to your system trust store", + }, + }, + { + Cause: "Corporate proxy or middleware intercepting HTTPS connections", + Solutions: []string{"Check if a corporate proxy is intercepting HTTPS traffic"}, + }, }, } @@ -106,15 +115,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "License key is invalid, expired, or not provided", - "Source registry credentials are incorrect", - "Insufficient permissions for the requested images", - }, - Solutions: []string{ - "Verify your license key and pass it with --license flag", - "For custom source registries, use --source-login and --source-password", - "Contact registry administrator to verify access rights", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "License key is invalid, expired, or not provided", + Solutions: []string{"Verify your license key and pass it with --license flag"}, + }, + { + Cause: "Source registry credentials are incorrect", + Solutions: []string{"For custom source registries, use --source-login and --source-password"}, + }, + { + Cause: "Insufficient permissions for the requested images", + Solutions: []string{"Contact registry administrator to verify access rights"}, + }, }, } @@ -122,13 +135,15 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryRateLimit, OriginalErr: err, - Causes: []string{ - "Too many requests to the source registry in a short time", - "Registry-side rate limiting policy", - }, - Solutions: []string{ - "Wait a few minutes and retry the operation", - "Contact registry administrator to increase rate limits", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Too many requests to the source registry in a short time", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Registry-side rate limiting policy", + Solutions: []string{"Contact registry administrator to increase rate limits"}, + }, }, } @@ -141,15 +156,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "Source registry is experiencing internal errors", - "Backend storage is temporarily unavailable", - "Registry is overloaded or being maintained", - }, - Solutions: []string{ - "Wait a few minutes and retry the operation", - "Check source registry status and health", - "Contact registry administrator if the problem persists", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Source registry is experiencing internal errors", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Backend storage is temporarily unavailable", + Solutions: []string{"Check source registry status and health"}, + }, + { + Cause: "Registry is overloaded or being maintained", + Solutions: []string{"Contact registry administrator if the problem persists"}, + }, }, } @@ -162,15 +181,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "Source registry hostname cannot be resolved by DNS", - "DNS server is unreachable or not responding", - "Incorrect source registry URL or typo in hostname", - }, - Solutions: []string{ - "Verify the --source registry URL is spelled correctly", - "Check your DNS server configuration", - "Try using the registry's IP address instead of hostname", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Incorrect source registry URL or typo in hostname", + Solutions: []string{"Verify the --source registry URL is spelled correctly"}, + }, + { + Cause: "DNS server is unreachable or not responding", + Solutions: []string{"Check your DNS server configuration"}, + }, + { + Cause: "Source registry hostname cannot be resolved by DNS", + Solutions: []string{"Try using the registry's IP address instead of hostname"}, + }, }, } @@ -178,14 +201,16 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryTimeout, OriginalErr: err, - Causes: []string{ - "Source registry took too long to respond", - "Network latency is too high", - "Firewall silently dropping packets (no RST, no ICMP)", - }, - Solutions: []string{ - "Check network connectivity to the source registry", - "Verify firewall rules allow outbound HTTPS (port 443)", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Source registry took too long to respond", + Solutions: []string{"Check network connectivity to the source registry"}, + }, + { + Cause: "Firewall silently dropping packets (no RST, no ICMP)", + Solutions: []string{"Verify firewall rules allow outbound HTTPS (port 443)"}, + }, + {Cause: "Network latency is too high"}, }, } @@ -198,15 +223,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "No network connection to the source registry", - "Firewall or security group blocking the connection", - "Source registry is down or unreachable", - }, - Solutions: []string{ - "Check your network connection and internet access", - "Verify firewall rules allow outbound HTTPS (port 443)", - "Test connectivity with: curl -v https://", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "No network connection to the source registry", + Solutions: []string{"Check your network connection and internet access"}, + }, + { + Cause: "Firewall or security group blocking the connection", + Solutions: []string{"Verify firewall rules allow outbound HTTPS (port 443)"}, + }, + { + Cause: "Source registry is down or unreachable", + Solutions: []string{"Test connectivity with: curl -v https://"}, + }, }, } @@ -214,13 +243,15 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryImageNotFound, OriginalErr: err, - Causes: []string{ - "Image tag doesn't exist in the source registry", - "Incorrect image name or tag specified", - }, - Solutions: []string{ - "Verify the source registry path with --source flag", - "Check if the requested Deckhouse version exists", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Image tag doesn't exist in the source registry", + Solutions: []string{"Check if the requested Deckhouse version exists"}, + }, + { + Cause: "Incorrect image name or tag specified", + Solutions: []string{"Verify the source registry path with --source flag"}, + }, }, } @@ -228,13 +259,15 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryRepoNotFound, OriginalErr: err, - Causes: []string{ - "Repository doesn't exist in the source registry", - "Incorrect source registry path", - }, - Solutions: []string{ - "Verify the --source registry path is correct", - "Ensure you have permission to access this repository", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Repository doesn't exist in the source registry", + Solutions: []string{"Verify the --source registry path is correct"}, + }, + { + Cause: "Incorrect source registry path", + Solutions: []string{"Ensure you have permission to access this repository"}, + }, }, } } diff --git a/internal/mirror/cmd/pull/errdetect/diagnose_test.go b/internal/mirror/cmd/pull/errdetect/diagnose_test.go index 648f5dc7..78a617ec 100644 --- a/internal/mirror/cmd/pull/errdetect/diagnose_test.go +++ b/internal/mirror/cmd/pull/errdetect/diagnose_test.go @@ -75,13 +75,21 @@ func TestDiagnose_PullSpecificAuth(t *testing.T) { diag := Diagnose(&transport.Error{StatusCode: http.StatusUnauthorized}) require.NotNil(t, diag) - solutions := strings.Join(diag.Solutions, " ") + solutions := allSolutions(diag) assert.Contains(t, solutions, "--license") assert.Contains(t, solutions, "--source-login") assert.NotContains(t, solutions, "--registry-login") assert.NotContains(t, solutions, "--registry-password") } +func allSolutions(diag *diagnostic.HelpfulError) string { + var parts []string + for _, s := range diag.Suggestions { + parts = append(parts, s.Solutions...) + } + return strings.Join(parts, " ") +} + func TestDiagnose_NoUnsupportedOCI(t *testing.T) { assert.Nil(t, Diagnose(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) } diff --git a/internal/mirror/cmd/push/errdetect/diagnose.go b/internal/mirror/cmd/push/errdetect/diagnose.go index f2c34052..64eb9dfd 100644 --- a/internal/mirror/cmd/push/errdetect/diagnose.go +++ b/internal/mirror/cmd/push/errdetect/diagnose.go @@ -67,15 +67,17 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryEOF, OriginalErr: err, - Causes: []string{ - "Corporate proxy or middleware intercepting and terminating HTTPS connections", - "Target registry closed the connection unexpectedly", - "Network device (firewall, load balancer) dropping packets", - }, - Solutions: []string{ - "Check if a corporate proxy is intercepting HTTPS traffic", - "If using a proxy, ensure it is configured to pass through registry traffic", - "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Corporate proxy or middleware intercepting and terminating HTTPS connections", + Solutions: []string{ + "Check if a corporate proxy is intercepting HTTPS traffic", + "If using a proxy, ensure it is configured to pass through registry traffic", + "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + }, + }, + {Cause: "Target registry closed the connection unexpectedly"}, + {Cause: "Network device (firewall, load balancer) dropping packets"}, }, } @@ -83,15 +85,22 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryTLS, OriginalErr: err, - Causes: []string{ - "Self-signed certificate on the target registry", - "Certificate expired or not yet valid", - "Corporate proxy or middleware intercepting HTTPS connections", - }, - Solutions: []string{ - "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", - "Add the target registry's CA certificate to your system trust store", - "Verify system clock is correct (expired certificates can be caused by wrong time)", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Self-signed certificate on the target registry", + Solutions: []string{"Use --tls-skip-verify flag to skip TLS verification (not recommended for production)"}, + }, + { + Cause: "Certificate expired or not yet valid", + Solutions: []string{ + "Verify system clock is correct (expired certificates can be caused by wrong time)", + "Add the target registry's CA certificate to your system trust store", + }, + }, + { + Cause: "Corporate proxy or middleware intercepting HTTPS connections", + Solutions: []string{"Check if a corporate proxy is intercepting HTTPS traffic"}, + }, }, } @@ -106,15 +115,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "Registry credentials are invalid or not provided", - "Account does not have push permissions", - "Repository path requires different access rights", - }, - Solutions: []string{ - "Verify --registry-login and --registry-password are correct", - "Ensure the account has write access to the target repository", - "Contact registry administrator to verify push permissions", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Registry credentials are invalid or not provided", + Solutions: []string{"Verify --registry-login and --registry-password are correct"}, + }, + { + Cause: "Account does not have push permissions", + Solutions: []string{"Ensure the account has write access to the target repository"}, + }, + { + Cause: "Repository path requires different access rights", + Solutions: []string{"Contact registry administrator to verify push permissions"}, + }, }, } @@ -122,13 +135,15 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryRateLimit, OriginalErr: err, - Causes: []string{ - "Too many requests to the target registry in a short time", - "Registry-side rate limiting policy", - }, - Solutions: []string{ - "Wait a few minutes and retry the operation", - "Contact registry administrator to increase rate limits", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Too many requests to the target registry in a short time", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Registry-side rate limiting policy", + Solutions: []string{"Contact registry administrator to increase rate limits"}, + }, }, } @@ -141,15 +156,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "Target registry is experiencing internal errors", - "Backend storage is temporarily unavailable", - "Registry is overloaded or being maintained", - }, - Solutions: []string{ - "Wait a few minutes and retry the operation", - "Check target registry status and health", - "Contact registry administrator if the problem persists", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Target registry is experiencing internal errors", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Backend storage is temporarily unavailable", + Solutions: []string{"Check target registry status and health"}, + }, + { + Cause: "Registry is overloaded or being maintained", + Solutions: []string{"Contact registry administrator if the problem persists"}, + }, }, } @@ -162,15 +181,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "Target registry hostname cannot be resolved by DNS", - "DNS server is unreachable or not responding", - "Incorrect registry address or typo in hostname", - }, - Solutions: []string{ - "Verify the argument is spelled correctly", - "Check your DNS server configuration", - "Try using the registry's IP address instead of hostname", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Incorrect registry address or typo in hostname", + Solutions: []string{"Verify the argument is spelled correctly"}, + }, + { + Cause: "DNS server is unreachable or not responding", + Solutions: []string{"Check your DNS server configuration"}, + }, + { + Cause: "Target registry hostname cannot be resolved by DNS", + Solutions: []string{"Try using the registry's IP address instead of hostname"}, + }, }, } @@ -178,14 +201,16 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryTimeout, OriginalErr: err, - Causes: []string{ - "Target registry took too long to respond", - "Network latency is too high", - "Firewall silently dropping packets (no RST, no ICMP)", - }, - Solutions: []string{ - "Check network connectivity to the target registry", - "Verify firewall rules allow outbound HTTPS (port 443)", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Target registry took too long to respond", + Solutions: []string{"Check network connectivity to the target registry"}, + }, + { + Cause: "Firewall silently dropping packets (no RST, no ICMP)", + Solutions: []string{"Verify firewall rules allow outbound HTTPS (port 443)"}, + }, + {Cause: "Network latency is too high"}, }, } @@ -198,15 +223,19 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: category, OriginalErr: err, - Causes: []string{ - "No network connection to the target registry", - "Firewall or security group blocking the connection", - "Target registry is down or unreachable", - }, - Solutions: []string{ - "Check your network connection", - "Verify firewall rules allow outbound HTTPS (port 443)", - "Test connectivity with: curl -v https://", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "No network connection to the target registry", + Solutions: []string{"Check your network connection"}, + }, + { + Cause: "Firewall or security group blocking the connection", + Solutions: []string{"Verify firewall rules allow outbound HTTPS (port 443)"}, + }, + { + Cause: "Target registry is down or unreachable", + Solutions: []string{"Test connectivity with: curl -v https://"}, + }, }, } @@ -214,13 +243,15 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryImageNotFound, OriginalErr: err, - Causes: []string{ - "Expected image is missing in the target registry", - "Previous push may have been interrupted", - }, - Solutions: []string{ - "Retry the push operation", - "Verify the argument is correct", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Expected image is missing in the target registry", + Solutions: []string{"Retry the push operation"}, + }, + { + Cause: "Previous push may have been interrupted", + Solutions: []string{"Verify the argument is correct"}, + }, }, } @@ -228,13 +259,15 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: categoryRepoNotFound, OriginalErr: err, - Causes: []string{ - "Repository doesn't exist in the target registry", - "Incorrect argument", - }, - Solutions: []string{ - "Verify the argument is correct", - "Ensure the repository is created in the target registry", + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Repository doesn't exist in the target registry", + Solutions: []string{"Verify the argument is correct"}, + }, + { + Cause: "Incorrect argument", + Solutions: []string{"Ensure the repository is created in the target registry"}, + }, }, } } diff --git a/internal/mirror/cmd/push/errdetect/diagnose_test.go b/internal/mirror/cmd/push/errdetect/diagnose_test.go index 3e57564d..5857dedb 100644 --- a/internal/mirror/cmd/push/errdetect/diagnose_test.go +++ b/internal/mirror/cmd/push/errdetect/diagnose_test.go @@ -75,13 +75,21 @@ func TestDiagnose_PushSpecificAuth(t *testing.T) { diag := Diagnose(&transport.Error{StatusCode: http.StatusUnauthorized}) require.NotNil(t, diag) - solutions := strings.Join(diag.Solutions, " ") + solutions := allSolutions(diag) assert.Contains(t, solutions, "--registry-login") assert.Contains(t, solutions, "--registry-password") assert.NotContains(t, solutions, "--license") assert.NotContains(t, solutions, "--source-login") } +func allSolutions(diag *diagnostic.HelpfulError) string { + var parts []string + for _, s := range diag.Suggestions { + parts = append(parts, s.Solutions...) + } + return strings.Join(parts, " ") +} + func TestDiagnose_Unwrap(t *testing.T) { diag := Diagnose(io.EOF) require.NotNil(t, diag) diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md index 1928fdfb..334a5ca0 100644 --- a/pkg/diagnostic/README.md +++ b/pkg/diagnostic/README.md @@ -7,13 +7,10 @@ with possible causes and solutions instead of raw Go error text: error: TLS/certificate verification failed ╰─▶ x509: certificate signed by unknown authority - Possible causes: - * Self-signed certificate without proper trust chain - * Corporate proxy intercepting HTTPS connections - - How to fix: - * Use --tls-skip-verify flag to skip TLS verification - * Add the registry's CA certificate to your system trust store + * Self-signed certificate on the source registry + -> Use --tls-skip-verify flag to skip TLS verification + * Corporate proxy intercepting HTTPS connections + -> Check if a corporate proxy is intercepting HTTPS traffic ``` ## How it works @@ -51,11 +48,15 @@ import any errdetect, so unrelated commands never get false diagnostics. ## HelpfulError ```go +type Suggestion struct { + Cause string // why it might have happened + Solutions []string // how to fix this specific cause +} + type HelpfulError struct { - Category string // what went wrong: "TLS/certificate verification failed" - OriginalErr error // the underlying error (required, used by Unwrap/Error/Format) - Causes []string // why it might have happened (optional) - Solutions []string // how to fix it (optional) + Category string // what went wrong: "TLS/certificate verification failed" + OriginalErr error // the underlying error (required, used by Unwrap/Error/Format) + Suggestions []Suggestion // cause-solution pairs (optional) } ``` @@ -63,24 +64,18 @@ type HelpfulError struct { |-------|----------|----------------------| | `Category` | yes | output shows `error: ` with no description | | `OriginalErr` | yes | safe (no panic), but `Unwrap` returns nil and `Format` skips the error line | -| `Causes` | no | "Possible causes" section is omitted | -| `Solutions` | no | "How to fix" section is omitted | +| `Suggestions` | no | suggestions section is omitted | How fields map to output (`Format()`): ``` -error: TLS/certificate verification failed <-- Category - ╰─▶ Get "https://registry.example.com/v2/" <-- OriginalErr chain - ╰─▶ tls: failed to verify certificate (unwrapped level by level) - ╰─▶ x509: certificate signed by unknown authority - - Possible causes: <-- Causes - * Self-signed certificate without proper trust chain - * Corporate proxy intercepting HTTPS connections - - How to fix: <-- Solutions - * Use --tls-skip-verify flag - * Add the registry's CA certificate to your system trust store +error: TLS/certificate verification failed <-- Category + ╰─▶ x509: certificate signed by ... <-- OriginalErr (unwrapped chain) + + * Self-signed certificate <-- Suggestion.Cause + -> Use --tls-skip-verify flag <-- Suggestion.Solutions + * Corporate proxy intercepting HTTPS <-- next Suggestion.Cause + -> Check if proxy is intercepting ... <-- its Solutions ``` `Error()` returns plain text for logs: `"Category: OriginalErr.Error()"`. @@ -126,8 +121,12 @@ func Diagnose(err error) *diagnostic.HelpfulError { return &diagnostic.HelpfulError{ Category: "ETCD connection failed", OriginalErr: err, - Causes: []string{"ETCD cluster is unreachable"}, - Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "ETCD cluster is unreachable", + Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, + }, + }, } } return nil @@ -154,4 +153,4 @@ regardless of which errdetect produced it. - Diagnose in the **leaf command** (RunE), not in libraries or root.go - Each command uses its **own errdetect** - prevents false diagnostics - Skip diagnosis if the error is already a `*HelpfulError` (see guard in the example above) -- `Causes` and `Solutions` are optional but highly recommended +- `Suggestions` are optional but highly recommended diff --git a/pkg/diagnostic/diagnostic.go b/pkg/diagnostic/diagnostic.go index 503695bd..96e6627d 100644 --- a/pkg/diagnostic/diagnostic.go +++ b/pkg/diagnostic/diagnostic.go @@ -16,14 +16,19 @@ limitations under the License. package diagnostic +// Suggestion pairs a possible cause with its specific solutions. +type Suggestion struct { + Cause string // why it might have happened + Solutions []string // how to fix this specific cause +} + // HelpfulError is an error enriched with possible causes and actionable solutions. // It implements the error interface so it can propagate up the call chain // and be printed once at the top level, avoiding double output. type HelpfulError struct { - Category string // e.g. "DNS resolution failed for 'registry.example.com'" - OriginalErr error // the underlying error - Causes []string // "Possible causes" shown to the user - Solutions []string // "How to fix" shown to the user + Category string // e.g. "DNS resolution failed for 'registry.example.com'" + OriginalErr error // the underlying error + Suggestions []Suggestion // cause-solution pairs shown to the user } // Error returns a plain-text representation suitable for logging and error wrapping. diff --git a/pkg/diagnostic/diagnostic_test.go b/pkg/diagnostic/diagnostic_test.go index be4e0c8a..ea9784cb 100644 --- a/pkg/diagnostic/diagnostic_test.go +++ b/pkg/diagnostic/diagnostic_test.go @@ -30,14 +30,15 @@ func TestHelpfulError_Error_PlainText(t *testing.T) { diag := &HelpfulError{ Category: "Network connection failed", OriginalErr: errors.New("connection refused"), - Causes: []string{"cause"}, - Solutions: []string{"fix"}, + Suggestions: []Suggestion{ + {Cause: "cause", Solutions: []string{"fix"}}, + }, } errStr := diag.Error() assert.Equal(t, "Network connection failed: connection refused", errStr) assert.NotContains(t, errStr, "\033[") - assert.NotContains(t, errStr, "Possible causes") + assert.NotContains(t, errStr, "cause") } func TestHelpfulError_Unwrap(t *testing.T) { @@ -55,8 +56,9 @@ func TestHelpfulError_Format_NoColor(t *testing.T) { diag := &HelpfulError{ Category: "Network connection failed", OriginalErr: errors.New("test"), - Causes: []string{"cause1"}, - Solutions: []string{"fix1"}, + Suggestions: []Suggestion{ + {Cause: "cause1", Solutions: []string{"fix1"}}, + }, } output := diag.Format() @@ -72,18 +74,18 @@ func TestHelpfulError_Format_Structure(t *testing.T) { diag := &HelpfulError{ Category: "Network connection failed", OriginalErr: errors.New("connection refused"), - Causes: []string{"Network down", "Firewall blocking"}, - Solutions: []string{"Check network", "Check firewall"}, + Suggestions: []Suggestion{ + {Cause: "Network down", Solutions: []string{"Check network"}}, + {Cause: "Firewall blocking", Solutions: []string{"Check firewall"}}, + }, } output := diag.Format() assert.Contains(t, output, "error: Network connection failed") assert.Contains(t, output, "connection refused") - assert.Contains(t, output, "Possible causes:") assert.Contains(t, output, "Network down") - assert.Contains(t, output, "Firewall blocking") - assert.Contains(t, output, "How to fix:") assert.Contains(t, output, "Check network") + assert.Contains(t, output, "Firewall blocking") assert.Contains(t, output, "Check firewall") } @@ -97,8 +99,10 @@ func TestHelpfulError_Format_NilOriginalErr(t *testing.T) { t.Setenv("NO_COLOR", "1") diag := &HelpfulError{ - Category: "Something failed", - Solutions: []string{"Try again"}, + Category: "Something failed", + Suggestions: []Suggestion{ + {Cause: "Unknown", Solutions: []string{"Try again"}}, + }, } output := diag.Format() @@ -129,8 +133,9 @@ func TestHelpfulError_Format_ForceColor(t *testing.T) { diag := &HelpfulError{ Category: "Test error", OriginalErr: errors.New("test"), - Causes: []string{"cause1"}, - Solutions: []string{"fix1"}, + Suggestions: []Suggestion{ + {Cause: "cause1", Solutions: []string{"fix1"}}, + }, } output := diag.Format() diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go index 57bde054..99df9f06 100644 --- a/pkg/diagnostic/doc.go +++ b/pkg/diagnostic/doc.go @@ -35,24 +35,26 @@ limitations under the License. // return &diagnostic.HelpfulError{ // Category: "ETCD snapshot failed", // OriginalErr: err, -// Causes: []string{"ETCD cluster is unreachable"}, -// Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, +// Suggestions: []diagnostic.Suggestion{ +// { +// Cause: "ETCD cluster is unreachable", +// Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, +// }, +// }, // } // -// Causes and Solutions are optional - empty slices are silently omitted from output. +// Suggestions are optional - an empty slice is silently omitted from output. +// Each Suggestion pairs a cause with its specific solutions. // // # How fields map to Format() output // -// error: ETCD snapshot failed <-- Category -// ╰─▶ save snapshot <-- OriginalErr chain (unwrapped) +// error: ETCD snapshot failed <-- Category +// ╰─▶ save snapshot <-- OriginalErr chain (unwrapped) // ╰─▶ dial tcp 10.0.0.1:2379 // ╰─▶ connection refused // -// Possible causes: <-- Causes -// * ETCD cluster is unreachable -// -// How to fix: <-- Solutions -// * Check ETCD health: etcdctl endpoint health +// * ETCD cluster is unreachable <-- Suggestion.Cause +// -> Check ETCD health: etcdctl ... <-- Suggestion.Solutions // // # How it propagates // @@ -79,8 +81,9 @@ limitations under the License. // if isETCDError(err) { // return &diagnostic.HelpfulError{ // Category: "ETCD connection failed", OriginalErr: err, -// Causes: []string{"ETCD cluster is unreachable"}, -// Solutions: []string{"Check ETCD health"}, +// Suggestions: []diagnostic.Suggestion{ +// {Cause: "ETCD cluster is unreachable", Solutions: []string{"Check ETCD health"}}, +// }, // } // } // return nil diff --git a/pkg/diagnostic/format.go b/pkg/diagnostic/format.go index 6f397dd4..164f7ee6 100644 --- a/pkg/diagnostic/format.go +++ b/pkg/diagnostic/format.go @@ -66,16 +66,12 @@ func newTextStyler() textStyler { // Format returns the formatted diagnostic string with colors if stderr is a TTY. // // error: Network connection failed to 127.0.0.1:443 -// ╰─▶ pull from registry -// ╰─▶ validate platform access -// ╰─▶ dial tcp 127.0.0.1:443 -// ╰─▶ connect: connection refused +// ╰─▶ dial tcp 127.0.0.1:443: connect: connection refused // -// Possible causes: -// * Network connectivity issues or no internet connection -// -// How to fix: -// * Check your network connection and internet access +// * Firewall or security group blocking the connection +// -> Verify firewall rules allow outbound HTTPS (port 443) +// * Registry is down or unreachable +// -> Test connectivity with: curl -v https:// func (e *HelpfulError) Format() string { t := newTextStyler() @@ -90,18 +86,10 @@ func (e *HelpfulError) Format() string { } b.WriteString("\n") - if len(e.Causes) > 0 { - b.WriteString(t.warn(" Possible causes:") + "\n") - for _, cause := range e.Causes { - b.WriteString(" * " + cause + "\n") - } - b.WriteString("\n") - } - - if len(e.Solutions) > 0 { - b.WriteString(t.hint(" How to fix:") + "\n") - for _, solution := range e.Solutions { - b.WriteString(" * " + solution + "\n") + for _, s := range e.Suggestions { + b.WriteString(" " + t.warn("* "+s.Cause) + "\n") + for _, sol := range s.Solutions { + b.WriteString(" " + t.hint("-> ") + sol + "\n") } }