diff --git a/cmd/d8/root.go b/cmd/d8/root.go index ca955b79..3581426e 100644 --- a/cmd/d8/root.go +++ b/cmd/d8/root.go @@ -48,6 +48,7 @@ import ( "github.com/deckhouse/deckhouse-cli/internal/tools" useroperation "github.com/deckhouse/deckhouse-cli/internal/useroperation/cmd" "github.com/deckhouse/deckhouse-cli/internal/version" + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" ) type RootCommand struct { @@ -174,7 +175,15 @@ func (r *RootCommand) Execute() error { func execute() { rootCmd := NewRootCommand() if err := rootCmd.Execute(); err != nil { - fmt.Fprintf(os.Stderr, "Error executing command: %v\n", err) + // If a command returned a HelpfulError, show formatted diagnostic. + // Commands are responsible for classifying their own errors using + // domain-specific errdetect packages (e.g. errdetect.Diagnose for mirror). + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + fmt.Fprint(os.Stderr, helpErr.Format()) + } else { + fmt.Fprintf(os.Stderr, "Error executing command: %v\n", err) + } os.Exit(1) } } diff --git a/internal/mirror/cmd/pull/errdetect/diagnose.go b/internal/mirror/cmd/pull/errdetect/diagnose.go new file mode 100644 index 00000000..6930ce11 --- /dev/null +++ b/internal/mirror/cmd/pull/errdetect/diagnose.go @@ -0,0 +1,520 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package errdetect classifies registry errors for d8 mirror pull +// with pull-specific causes and solutions. +package errdetect + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "syscall" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + + "github.com/deckhouse/deckhouse-cli/internal/mirror/errmatch" + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +const ( + categoryEOF = "Connection terminated unexpectedly (EOF)" + categoryTLS = "TLS/certificate verification failed" + categoryAuth = "Authentication failed" + categoryAuth401 = "Authentication failed (HTTP 401 Unauthorized)" + categoryAuth403 = "Access denied (HTTP 403 Forbidden)" + categoryRateLimit = "Rate limited by registry (HTTP 429 Too Many Requests)" + categoryServerError = "Registry server error" + categoryDNS = "DNS resolution failed" + categoryTimeout = "Operation timed out" + categoryNetwork = "Network connection failed" + categoryDiskFull = "Disk space exhausted" + categoryPermission = "Permission denied" + categoryImageNotFound = "Image not found in registry" + categoryRepoNotFound = "Repository not found in registry" +) + +// Diagnose analyzes an error and returns a *diagnostic.HelpfulError +// with pull-specific causes and solutions, or nil if the error is not recognized. +func Diagnose(err error) *diagnostic.HelpfulError { + if err == nil { + return nil + } + + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + return nil + } + + switch { + case isEOF(err): + return &diagnostic.HelpfulError{ + Category: categoryEOF, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Large layer transfer interrupted by an intermediate timeout", + Solutions: []string{ + "Increase the timeout with D8_MIRROR_TIMEOUT env variable, e.g.: export D8_MIRROR_TIMEOUT=4h", + "Pull supports resuming — simply re-run the same command to continue from where it stopped", + }, + }, + { + Cause: "Corporate proxy or middleware intercepting and terminating HTTPS connections", + Solutions: []string{ + "Check if a corporate proxy is intercepting HTTPS traffic", + "If using a proxy, ensure it is configured to pass through registry traffic", + "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + }, + }, + {Cause: "Source registry closed the connection unexpectedly"}, + {Cause: "Network device (firewall, load balancer) dropping packets"}, + }, + } + + case isCertificateError(err): + return &diagnostic.HelpfulError{ + Category: categoryTLS, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Self-signed or private CA certificate on the source registry", + Solutions: []string{ + "Add the source registry's CA certificate to your system trust store", + "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", + }, + }, + { + Cause: "Certificate expired or not yet valid", + Solutions: []string{ + "Verify system clock is correct (wrong time is a common cause of certificate errors)", + "Renew the registry certificate if it has expired", + }, + }, + { + Cause: "Source registry is serving plain HTTP, not HTTPS", + Solutions: []string{"Use --insecure flag if the source registry uses HTTP instead of HTTPS"}, + }, + { + Cause: "Corporate proxy or middleware intercepting and replacing TLS certificates", + Solutions: []string{ + "Add the proxy's CA certificate to your system trust store", + "Check if a corporate proxy is intercepting HTTPS traffic", + }, + }, + }, + } + + case isAuthenticationError(err): + category := categoryAuth + if code := authStatusCode(err); code == http.StatusUnauthorized { + category = categoryAuth401 + } else if code == http.StatusForbidden { + category = categoryAuth403 + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "License key is invalid, expired, or not provided", + Solutions: []string{"Verify your license key and pass it with --license flag"}, + }, + { + Cause: "Source registry credentials are incorrect", + Solutions: []string{"For custom source registries, use --source-login and --source-password"}, + }, + { + Cause: "Insufficient permissions for the requested images", + Solutions: []string{"Contact registry administrator to verify access rights"}, + }, + }, + } + + case isRateLimitError(err): + return &diagnostic.HelpfulError{ + Category: categoryRateLimit, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Too many requests to the source registry in a short time", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Registry-side rate limiting policy", + Solutions: []string{"Contact registry administrator to increase rate limits"}, + }, + }, + } + + case isServerError(err): + category := categoryServerError + if code := serverStatusCode(err); code != 0 { + category = fmt.Sprintf("%s (HTTP %d)", categoryServerError, code) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Source registry is experiencing internal errors", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Backend storage is temporarily unavailable", + Solutions: []string{"Check source registry status and health"}, + }, + { + Cause: "Registry is overloaded or being maintained", + Solutions: []string{"Contact registry administrator if the problem persists"}, + }, + }, + } + + case isDNSError(err): + category := categoryDNS + if name := dnsHostname(err); name != "" { + category = fmt.Sprintf("%s for '%s'", categoryDNS, name) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Incorrect source registry URL or typo in hostname", + Solutions: []string{"Verify the --source registry URL is spelled correctly"}, + }, + { + Cause: "DNS server is unreachable or not responding", + Solutions: []string{"Check your DNS server configuration"}, + }, + { + Cause: "Source registry hostname cannot be resolved by DNS", + Solutions: []string{"Try using the registry's IP address instead of hostname"}, + }, + }, + } + + case isTimeoutError(err): + return &diagnostic.HelpfulError{ + Category: categoryTimeout, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Large image layers require more time to transfer than the default timeout", + Solutions: []string{ + "Increase the timeout with D8_MIRROR_TIMEOUT env variable, e.g.: export D8_MIRROR_TIMEOUT=4h", + "Pull supports resuming — re-run the same command to continue from where it stopped", + }, + }, + { + Cause: "Firewall silently dropping packets (no RST, no ICMP)", + Solutions: []string{"Verify firewall rules allow outbound HTTPS (port 443) to the source registry"}, + }, + { + Cause: "Source registry took too long to respond", + Solutions: []string{"Check network connectivity and latency to the source registry"}, + }, + }, + } + + case isDiskFullError(err): + return &diagnostic.HelpfulError{ + Category: categoryDiskFull, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Not enough free disk space for the bundle output", + Solutions: []string{ + "Free up disk space on the output partition", + "Use --images-bundle-chunk-size to split the bundle into smaller chunks", + }, + }, + { + Cause: "Temporary working directory is on a partition with insufficient space", + Solutions: []string{ + "Mirror operations require free space approximately equal to the full bundle size", + "Use --tmp-dir to point to a partition with more free space", + }, + }, + }, + } + + case isPermissionError(err): + return &diagnostic.HelpfulError{ + Category: categoryPermission, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Output directory is not writable by the current user", + Solutions: []string{ + "Check write permissions on the output path", + "Run with a user that has write access, or change directory permissions", + }, + }, + { + Cause: "Temporary directory is not writable", + Solutions: []string{"Use --tmp-dir to specify a writable temporary directory"}, + }, + }, + } + + case isNetworkError(err): + category := categoryNetwork + if addr := networkAddr(err); addr != "" { + category = fmt.Sprintf("%s to %s", categoryNetwork, addr) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Source registry is serving plain HTTP, not HTTPS", + Solutions: []string{"Use --insecure flag if the source registry uses HTTP instead of HTTPS"}, + }, + { + Cause: "Firewall or security group blocking outbound connections", + Solutions: []string{ + "Verify firewall rules allow outbound HTTPS (port 443) to the source registry", + "Test connectivity with: curl -v https://", + }, + }, + { + Cause: "Source registry is down or temporarily unreachable", + Solutions: []string{"Check your network connection and the source registry status"}, + }, + }, + } + + case errmatch.IsImageNotFound(err): + return &diagnostic.HelpfulError{ + Category: categoryImageNotFound, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Requested Deckhouse version or tag does not exist in the source registry", + Solutions: []string{ + "Check --deckhouse-tag or --since-version value for typos or non-existent versions", + "Browse available release versions in the source registry", + }, + }, + { + Cause: "License key does not have access to the requested edition or version", + Solutions: []string{"Verify the --license key grants access to the requested Deckhouse edition"}, + }, + }, + } + + case errmatch.IsRepoNotFound(err): + return &diagnostic.HelpfulError{ + Category: categoryRepoNotFound, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Source registry path is incorrect or the repository does not exist", + Solutions: []string{ + "Double-check the --source flag value for typos or extra path segments", + "Default source is registry.deckhouse.ru/deckhouse/ee", + }, + }, + { + Cause: "Account does not have read access to the repository", + Solutions: []string{ + "Verify the --license key or --source-login credentials have read permissions", + "Contact registry administrator to confirm your access rights", + }, + }, + }, + } + } + + return nil +} + +// --- detection functions --- + +func isEOF(err error) bool { + return errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) +} + +func isCertificateError(err error) bool { + var ( + unknownAuthErr x509.UnknownAuthorityError + certInvalidErr x509.CertificateInvalidError + hostnameErr x509.HostnameError + systemRootsErr x509.SystemRootsError + constraintErr x509.ConstraintViolationError + insecureAlgErr x509.InsecureAlgorithmError + ) + + return errors.As(err, &unknownAuthErr) || + errors.As(err, &certInvalidErr) || + errors.As(err, &hostnameErr) || + errors.As(err, &systemRootsErr) || + errors.As(err, &constraintErr) || + errors.As(err, &insecureAlgErr) +} + +func isAuthenticationError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusUnauthorized || transportErr.StatusCode == http.StatusForbidden { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnauthorizedErrorCode || diag.Code == transport.DeniedErrorCode { + return true + } + } + + return false +} + +func authStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + +func isRateLimitError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusTooManyRequests { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.TooManyRequestsErrorCode { + return true + } + } + + return false +} + +func isServerError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + switch transportErr.StatusCode { + case http.StatusInternalServerError, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout: + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnavailableErrorCode { + return true + } + } + + return false +} + +func serverStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + +func isDNSError(err error) bool { + var dnsErr *net.DNSError + return errors.As(err, &dnsErr) +} + +func dnsHostname(err error) string { + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return dnsErr.Name + } + return "" +} + +func isTimeoutError(err error) bool { + return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, os.ErrDeadlineExceeded) +} + +func isNetworkError(err error) bool { + if isDNSError(err) || isTimeoutError(err) { + return false + } + + var ( + netErr net.Error + opErr *net.OpError + syscallErr syscall.Errno + ) + + if errors.As(err, &opErr) { + return true + } + + if errors.As(err, &netErr) { + return true + } + + if errors.As(err, &syscallErr) { + return syscallErr == syscall.ECONNREFUSED || + syscallErr == syscall.ECONNRESET || + syscallErr == syscall.ETIMEDOUT || + syscallErr == syscall.ENETUNREACH || + syscallErr == syscall.EHOSTUNREACH + } + + return false +} + +func networkAddr(err error) string { + var opErr *net.OpError + if errors.As(err, &opErr) && opErr.Addr != nil { + return opErr.Addr.String() + } + return "" +} + +func isDiskFullError(err error) bool { + return errors.Is(err, syscall.ENOSPC) +} + +func isPermissionError(err error) bool { + return errors.Is(err, os.ErrPermission) || + errors.Is(err, syscall.EACCES) || + errors.Is(err, syscall.EPERM) +} diff --git a/internal/mirror/cmd/pull/errdetect/diagnose_test.go b/internal/mirror/cmd/pull/errdetect/diagnose_test.go new file mode 100644 index 00000000..dea522a4 --- /dev/null +++ b/internal/mirror/cmd/pull/errdetect/diagnose_test.go @@ -0,0 +1,122 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errdetect + +import ( + "crypto/x509" + "errors" + "fmt" + "io" + "net/http" + "os" + "strings" + "syscall" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +func TestDiagnose_Nil(t *testing.T) { + assert.Nil(t, Diagnose(nil)) +} + +func TestDiagnose_Unclassified(t *testing.T) { + assert.Nil(t, Diagnose(errors.New("some random error"))) +} + +func TestDiagnose_AlreadyClassified(t *testing.T) { + first := Diagnose(io.EOF) + require.NotNil(t, first) + assert.Nil(t, Diagnose(first)) +} + +func TestDiagnose_AllCategories(t *testing.T) { + tests := []struct { + name string + err error + category string + }{ + {"EOF", io.EOF, categoryEOF}, + {"TLS", fmt.Errorf("reg: %w", x509.UnknownAuthorityError{}), categoryTLS}, + {"Auth401", &transport.Error{StatusCode: http.StatusUnauthorized}, categoryAuth401}, + {"Auth403", &transport.Error{StatusCode: http.StatusForbidden}, categoryAuth403}, + {"RateLimit", &transport.Error{StatusCode: http.StatusTooManyRequests}, categoryRateLimit}, + {"Server500", &transport.Error{StatusCode: http.StatusInternalServerError}, categoryServerError}, + {"DiskFull", fmt.Errorf("write bundle: %w", syscall.ENOSPC), categoryDiskFull}, + {"Permission", fmt.Errorf("create file: %w", os.ErrPermission), categoryPermission}, + {"ImageNotFound", errors.New("MANIFEST_UNKNOWN: not found"), categoryImageNotFound}, + {"RepoNotFound", errors.New("NAME_UNKNOWN: repo"), categoryRepoNotFound}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diag := Diagnose(tt.err) + require.NotNil(t, diag) + assert.Contains(t, diag.Category, tt.category) + }) + } +} + +func TestDiagnose_PullSpecificAuth(t *testing.T) { + diag := Diagnose(&transport.Error{StatusCode: http.StatusUnauthorized}) + require.NotNil(t, diag) + + solutions := allSolutions(diag) + assert.Contains(t, solutions, "--license") + assert.Contains(t, solutions, "--source-login") + assert.NotContains(t, solutions, "--registry-login") + assert.NotContains(t, solutions, "--registry-password") +} + +func TestDiagnose_DiskFull(t *testing.T) { + diag := Diagnose(fmt.Errorf("write bundle: %w", syscall.ENOSPC)) + require.NotNil(t, diag) + assert.Equal(t, categoryDiskFull, diag.Category) + assert.NotEmpty(t, diag.Suggestions) +} + +func TestDiagnose_PermissionDenied(t *testing.T) { + diag := Diagnose(fmt.Errorf("create file: %w", os.ErrPermission)) + require.NotNil(t, diag) + assert.Equal(t, categoryPermission, diag.Category) + assert.NotEmpty(t, diag.Suggestions) +} + +func allSolutions(diag *diagnostic.HelpfulError) string { + var parts []string + for _, s := range diag.Suggestions { + parts = append(parts, s.Solutions...) + } + return strings.Join(parts, " ") +} + +func TestDiagnose_NoUnsupportedOCI(t *testing.T) { + assert.Nil(t, Diagnose(errors.New("MANIFEST_INVALID: vnd.aquasec.trivy"))) +} + +func TestDiagnose_Unwrap(t *testing.T) { + diag := Diagnose(io.EOF) + require.NotNil(t, diag) + + var helpErr *diagnostic.HelpfulError + require.True(t, errors.As(diag, &helpErr)) + assert.True(t, errors.Is(diag, io.EOF)) +} diff --git a/internal/mirror/cmd/pull/pull.go b/internal/mirror/cmd/pull/pull.go index 8866c970..f7a0f015 100644 --- a/internal/mirror/cmd/pull/pull.go +++ b/internal/mirror/cmd/pull/pull.go @@ -40,13 +40,14 @@ import ( "github.com/deckhouse/deckhouse-cli/internal" "github.com/deckhouse/deckhouse-cli/internal/mirror" + "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/errdetect" pullflags "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/flags" "github.com/deckhouse/deckhouse-cli/internal/mirror/gostsums" "github.com/deckhouse/deckhouse-cli/internal/mirror/modules" + "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" registryservice "github.com/deckhouse/deckhouse-cli/pkg/registry/service" "github.com/deckhouse/deckhouse-cli/pkg/stub" @@ -119,6 +120,9 @@ func pull(cmd *cobra.Command, _ []string) error { puller.logger.WarnLn("Operation cancelled by user") return nil } + if diag := errdetect.Diagnose(err); diag != nil { + return diag + } return fmt.Errorf("pull failed: %w", err) } diff --git a/internal/mirror/cmd/pull/pull_test.go b/internal/mirror/cmd/pull/pull_test.go index a26a10d4..4bc488ce 100644 --- a/internal/mirror/cmd/pull/pull_test.go +++ b/internal/mirror/cmd/pull/pull_test.go @@ -37,7 +37,7 @@ import ( pullflags "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/pull/flags" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" + "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" ) func TestNewCommand(t *testing.T) { diff --git a/internal/mirror/cmd/push/errdetect/diagnose.go b/internal/mirror/cmd/push/errdetect/diagnose.go new file mode 100644 index 00000000..49e4e276 --- /dev/null +++ b/internal/mirror/cmd/push/errdetect/diagnose.go @@ -0,0 +1,513 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package errdetect classifies registry errors for d8 mirror push +// with push-specific causes and solutions. +package errdetect + +import ( + "context" + "crypto/x509" + "errors" + "fmt" + "io" + "net" + "net/http" + "os" + "syscall" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + + "github.com/deckhouse/deckhouse-cli/internal/mirror/errmatch" + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +const ( + categoryEOF = "Connection terminated unexpectedly (EOF)" + categoryTLS = "TLS/certificate verification failed" + categoryAuth = "Authentication failed" + categoryAuth401 = "Authentication failed (HTTP 401 Unauthorized)" + categoryAuth403 = "Access denied (HTTP 403 Forbidden)" + categoryRateLimit = "Rate limited by registry (HTTP 429 Too Many Requests)" + categoryServerError = "Registry server error" + categoryDNS = "DNS resolution failed" + categoryTimeout = "Operation timed out" + categoryNetwork = "Network connection failed" + categoryDiskFull = "Disk space exhausted" + categoryPermission = "Permission denied" + categoryImageNotFound = "Image not found in registry" + categoryRepoNotFound = "Repository not found in registry" +) + +// Diagnose analyzes an error and returns a *diagnostic.HelpfulError +// with push-specific causes and solutions, or nil if the error is not recognized. +func Diagnose(err error) *diagnostic.HelpfulError { + if err == nil { + return nil + } + + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + return nil + } + + switch { + case isEOF(err): + return &diagnostic.HelpfulError{ + Category: categoryEOF, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Large layer upload interrupted by an intermediate timeout", + Solutions: []string{ + "Increase the timeout with D8_MIRROR_TIMEOUT env variable, e.g.: export D8_MIRROR_TIMEOUT=4h", + "Re-run the same push command to retry the failed uploads", + }, + }, + { + Cause: "Corporate proxy or middleware intercepting and terminating HTTPS connections", + Solutions: []string{ + "Check if a corporate proxy is intercepting HTTPS traffic", + "If using a proxy, ensure it is configured to pass through registry traffic", + "Try connecting directly without proxy: unset HTTP_PROXY HTTPS_PROXY", + }, + }, + {Cause: "Target registry closed the connection unexpectedly"}, + {Cause: "Network device (firewall, load balancer) dropping packets"}, + }, + } + + case isCertificateError(err): + return &diagnostic.HelpfulError{ + Category: categoryTLS, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Self-signed or private CA certificate on the target registry", + Solutions: []string{ + "Add the target registry's CA certificate to your system trust store", + "Use --tls-skip-verify flag to skip TLS verification (not recommended for production)", + }, + }, + { + Cause: "Certificate expired or not yet valid", + Solutions: []string{ + "Verify system clock is correct (wrong time is a common cause of certificate errors)", + "Renew the registry certificate if it has expired", + }, + }, + { + Cause: "Target registry is serving plain HTTP, not HTTPS", + Solutions: []string{"Use --insecure flag if the target registry uses HTTP instead of HTTPS"}, + }, + { + Cause: "Corporate proxy or middleware intercepting and replacing TLS certificates", + Solutions: []string{ + "Add the proxy's CA certificate to your system trust store", + "Check if a corporate proxy is intercepting HTTPS traffic", + }, + }, + }, + } + + case isAuthenticationError(err): + category := categoryAuth + if code := authStatusCode(err); code == http.StatusUnauthorized { + category = categoryAuth401 + } else if code == http.StatusForbidden { + category = categoryAuth403 + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Registry credentials are invalid or not provided", + Solutions: []string{"Verify --registry-login and --registry-password are correct"}, + }, + { + Cause: "Account does not have push permissions", + Solutions: []string{"Ensure the account has write access to the target repository"}, + }, + { + Cause: "Repository path requires different access rights", + Solutions: []string{"Contact registry administrator to verify push permissions"}, + }, + }, + } + + case isRateLimitError(err): + return &diagnostic.HelpfulError{ + Category: categoryRateLimit, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Too many requests to the target registry in a short time", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Registry-side rate limiting policy", + Solutions: []string{"Contact registry administrator to increase rate limits"}, + }, + }, + } + + case isServerError(err): + category := categoryServerError + if code := serverStatusCode(err); code != 0 { + category = fmt.Sprintf("%s (HTTP %d)", categoryServerError, code) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Target registry is experiencing internal errors", + Solutions: []string{"Wait a few minutes and retry the operation"}, + }, + { + Cause: "Backend storage is temporarily unavailable", + Solutions: []string{"Check target registry status and health"}, + }, + { + Cause: "Registry is overloaded or being maintained", + Solutions: []string{"Contact registry administrator if the problem persists"}, + }, + }, + } + + case isDNSError(err): + category := categoryDNS + if name := dnsHostname(err); name != "" { + category = fmt.Sprintf("%s for '%s'", categoryDNS, name) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Incorrect registry address or typo in hostname", + Solutions: []string{"Verify the argument is spelled correctly"}, + }, + { + Cause: "DNS server is unreachable or not responding", + Solutions: []string{"Check your DNS server configuration"}, + }, + { + Cause: "Target registry hostname cannot be resolved by DNS", + Solutions: []string{"Try using the registry's IP address instead of hostname"}, + }, + }, + } + + case isTimeoutError(err): + return &diagnostic.HelpfulError{ + Category: categoryTimeout, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Large image layers require more time to upload than the default timeout", + Solutions: []string{ + "Increase the timeout with D8_MIRROR_TIMEOUT env variable, e.g.: export D8_MIRROR_TIMEOUT=4h", + "Re-run the same push command to retry the failed uploads", + }, + }, + { + Cause: "Firewall silently dropping packets (no RST, no ICMP)", + Solutions: []string{"Verify firewall rules allow outbound HTTPS (port 443) to the target registry"}, + }, + { + Cause: "Target registry took too long to respond", + Solutions: []string{"Check network connectivity and latency to the target registry"}, + }, + }, + } + + case isDiskFullError(err): + return &diagnostic.HelpfulError{ + Category: categoryDiskFull, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Not enough free disk space in the temporary working directory", + Solutions: []string{ + "Free up disk space on the partition", + "Use --tmp-dir to point to a partition with more free space", + }, + }, + }, + } + + case isPermissionError(err): + return &diagnostic.HelpfulError{ + Category: categoryPermission, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Bundle directory or temporary directory is not readable by the current user", + Solutions: []string{ + "Check read permissions on the bundle path", + "Run with a user that has read access, or change directory permissions", + }, + }, + { + Cause: "Temporary directory is not writable", + Solutions: []string{"Use --tmp-dir to specify a writable temporary directory"}, + }, + }, + } + + case isNetworkError(err): + category := categoryNetwork + if addr := networkAddr(err); addr != "" { + category = fmt.Sprintf("%s to %s", categoryNetwork, addr) + } + + return &diagnostic.HelpfulError{ + Category: category, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Target registry is serving plain HTTP, not HTTPS", + Solutions: []string{"Use --insecure flag if the target registry uses HTTP instead of HTTPS"}, + }, + { + Cause: "Firewall or security group blocking outbound connections", + Solutions: []string{ + "Verify firewall rules allow outbound HTTPS (port 443) to the target registry", + "Test connectivity with: curl -v https://", + }, + }, + { + Cause: "Target registry is down or temporarily unreachable", + Solutions: []string{"Check your network connection and the target registry status"}, + }, + }, + } + + case errmatch.IsImageNotFound(err): + return &diagnostic.HelpfulError{ + Category: categoryImageNotFound, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Bundle is incomplete or corrupted — expected image manifest was not found", + Solutions: []string{ + "Re-run d8 mirror pull to download a fresh complete bundle", + "If using a split bundle, ensure all chunk files are present and intact", + }, + }, + { + Cause: "Registry inconsistency during concurrent push operations", + Solutions: []string{"Retry the push operation"}, + }, + }, + } + + case errmatch.IsRepoNotFound(err): + return &diagnostic.HelpfulError{ + Category: categoryRepoNotFound, + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "Repository does not exist in the target registry", + Solutions: []string{ + "Some registries require the repository to be created before pushing", + "Create the target repository in the registry's web interface or API", + }, + }, + { + Cause: "Incorrect target registry path", + Solutions: []string{ + "Verify the argument is spelled correctly", + "Check for extra path segments, leading slashes, or typos", + }, + }, + }, + } + } + + return nil +} + +// --- detection functions --- + +func isEOF(err error) bool { + return errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) +} + +func isCertificateError(err error) bool { + var ( + unknownAuthErr x509.UnknownAuthorityError + certInvalidErr x509.CertificateInvalidError + hostnameErr x509.HostnameError + systemRootsErr x509.SystemRootsError + constraintErr x509.ConstraintViolationError + insecureAlgErr x509.InsecureAlgorithmError + ) + + return errors.As(err, &unknownAuthErr) || + errors.As(err, &certInvalidErr) || + errors.As(err, &hostnameErr) || + errors.As(err, &systemRootsErr) || + errors.As(err, &constraintErr) || + errors.As(err, &insecureAlgErr) +} + +func isAuthenticationError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusUnauthorized || transportErr.StatusCode == http.StatusForbidden { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnauthorizedErrorCode || diag.Code == transport.DeniedErrorCode { + return true + } + } + + return false +} + +func authStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + +func isRateLimitError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + if transportErr.StatusCode == http.StatusTooManyRequests { + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.TooManyRequestsErrorCode { + return true + } + } + + return false +} + +func isServerError(err error) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + switch transportErr.StatusCode { + case http.StatusInternalServerError, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout: + return true + } + + for _, diag := range transportErr.Errors { + if diag.Code == transport.UnavailableErrorCode { + return true + } + } + + return false +} + +func serverStatusCode(err error) int { + var transportErr *transport.Error + if errors.As(err, &transportErr) { + return transportErr.StatusCode + } + return 0 +} + +func isDNSError(err error) bool { + var dnsErr *net.DNSError + return errors.As(err, &dnsErr) +} + +func dnsHostname(err error) string { + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return dnsErr.Name + } + return "" +} + +func isTimeoutError(err error) bool { + return errors.Is(err, context.DeadlineExceeded) || errors.Is(err, os.ErrDeadlineExceeded) +} + +func isNetworkError(err error) bool { + if isDNSError(err) || isTimeoutError(err) { + return false + } + + var ( + netErr net.Error + opErr *net.OpError + syscallErr syscall.Errno + ) + + if errors.As(err, &opErr) { + return true + } + + if errors.As(err, &netErr) { + return true + } + + if errors.As(err, &syscallErr) { + return syscallErr == syscall.ECONNREFUSED || + syscallErr == syscall.ECONNRESET || + syscallErr == syscall.ETIMEDOUT || + syscallErr == syscall.ENETUNREACH || + syscallErr == syscall.EHOSTUNREACH + } + + return false +} + +func networkAddr(err error) string { + var opErr *net.OpError + if errors.As(err, &opErr) && opErr.Addr != nil { + return opErr.Addr.String() + } + return "" +} + +func isDiskFullError(err error) bool { + return errors.Is(err, syscall.ENOSPC) +} + +func isPermissionError(err error) bool { + return errors.Is(err, os.ErrPermission) || + errors.Is(err, syscall.EACCES) || + errors.Is(err, syscall.EPERM) +} diff --git a/internal/mirror/cmd/push/errdetect/diagnose_test.go b/internal/mirror/cmd/push/errdetect/diagnose_test.go new file mode 100644 index 00000000..e87cc9f4 --- /dev/null +++ b/internal/mirror/cmd/push/errdetect/diagnose_test.go @@ -0,0 +1,118 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errdetect + +import ( + "crypto/x509" + "errors" + "fmt" + "io" + "net/http" + "os" + "strings" + "syscall" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +func TestDiagnose_Nil(t *testing.T) { + assert.Nil(t, Diagnose(nil)) +} + +func TestDiagnose_Unclassified(t *testing.T) { + assert.Nil(t, Diagnose(errors.New("some random error"))) +} + +func TestDiagnose_AlreadyClassified(t *testing.T) { + first := Diagnose(io.EOF) + require.NotNil(t, first) + assert.Nil(t, Diagnose(first)) +} + +func TestDiagnose_AllCategories(t *testing.T) { + tests := []struct { + name string + err error + category string + }{ + {"EOF", io.EOF, categoryEOF}, + {"TLS", fmt.Errorf("reg: %w", x509.UnknownAuthorityError{}), categoryTLS}, + {"Auth401", &transport.Error{StatusCode: http.StatusUnauthorized}, categoryAuth401}, + {"Auth403", &transport.Error{StatusCode: http.StatusForbidden}, categoryAuth403}, + {"RateLimit", &transport.Error{StatusCode: http.StatusTooManyRequests}, categoryRateLimit}, + {"Server500", &transport.Error{StatusCode: http.StatusInternalServerError}, categoryServerError}, + {"DiskFull", fmt.Errorf("write temp: %w", syscall.ENOSPC), categoryDiskFull}, + {"Permission", fmt.Errorf("open bundle: %w", os.ErrPermission), categoryPermission}, + {"ImageNotFound", errors.New("MANIFEST_UNKNOWN: not found"), categoryImageNotFound}, + {"RepoNotFound", errors.New("NAME_UNKNOWN: repo"), categoryRepoNotFound}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diag := Diagnose(tt.err) + require.NotNil(t, diag) + assert.Contains(t, diag.Category, tt.category) + }) + } +} + +func TestDiagnose_PushSpecificAuth(t *testing.T) { + diag := Diagnose(&transport.Error{StatusCode: http.StatusUnauthorized}) + require.NotNil(t, diag) + + solutions := allSolutions(diag) + assert.Contains(t, solutions, "--registry-login") + assert.Contains(t, solutions, "--registry-password") + assert.NotContains(t, solutions, "--license") + assert.NotContains(t, solutions, "--source-login") +} + +func TestDiagnose_DiskFull(t *testing.T) { + diag := Diagnose(fmt.Errorf("write temp: %w", syscall.ENOSPC)) + require.NotNil(t, diag) + assert.Equal(t, categoryDiskFull, diag.Category) + assert.NotEmpty(t, diag.Suggestions) +} + +func TestDiagnose_PermissionDenied(t *testing.T) { + diag := Diagnose(fmt.Errorf("open bundle: %w", os.ErrPermission)) + require.NotNil(t, diag) + assert.Equal(t, categoryPermission, diag.Category) + assert.NotEmpty(t, diag.Suggestions) +} + +func allSolutions(diag *diagnostic.HelpfulError) string { + var parts []string + for _, s := range diag.Suggestions { + parts = append(parts, s.Solutions...) + } + return strings.Join(parts, " ") +} + +func TestDiagnose_Unwrap(t *testing.T) { + diag := Diagnose(io.EOF) + require.NotNil(t, diag) + + var helpErr *diagnostic.HelpfulError + require.True(t, errors.As(diag, &helpErr)) + assert.True(t, errors.Is(diag, io.EOF)) +} diff --git a/internal/mirror/cmd/push/push.go b/internal/mirror/cmd/push/push.go index a6190233..ba75e0a8 100644 --- a/internal/mirror/cmd/push/push.go +++ b/internal/mirror/cmd/push/push.go @@ -35,10 +35,11 @@ import ( regclient "github.com/deckhouse/deckhouse/pkg/registry/client" "github.com/deckhouse/deckhouse-cli/internal/mirror" + "github.com/deckhouse/deckhouse-cli/internal/mirror/cmd/push/errdetect" + "github.com/deckhouse/deckhouse-cli/internal/mirror/validation" "github.com/deckhouse/deckhouse-cli/internal/version" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/operations/params" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/validation" pkgclient "github.com/deckhouse/deckhouse-cli/pkg/registry/client" ) @@ -178,10 +179,20 @@ func (p *Pusher) Execute() error { } if err := p.validateRegistryAccess(); err != nil { + if diag := errdetect.Diagnose(err); diag != nil { + return diag + } + return err + } + + if err := p.executeNewPush(); err != nil { + if diag := errdetect.Diagnose(err); diag != nil { + return diag + } return err } - return p.executeNewPush() + return nil } // executeNewPush runs the push using the push service. diff --git a/internal/mirror/errmatch/errmatch.go b/internal/mirror/errmatch/errmatch.go new file mode 100644 index 00000000..0e65652e --- /dev/null +++ b/internal/mirror/errmatch/errmatch.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package errmatch provides error matchers for container registry responses. +// These are used for flow control in mirror operations (e.g., skipping optional images). +package errmatch + +import ( + "errors" + "strings" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" +) + +// IsImageNotFound returns true if the error indicates that the requested image +// tag or manifest does not exist in the registry. +func IsImageNotFound(err error) bool { + if err == nil { + return false + } + + // Typed check: works for GET responses where registry returns JSON with error codes. + if hasDiagnosticCode(err, transport.ManifestUnknownErrorCode) { + return true + } + + // String fallback: HEAD responses have no body per HTTP spec, so transport.Error.Errors + // is empty. Also covers registries that return plain text instead of structured JSON. + errMsg := err.Error() + return strings.Contains(errMsg, "MANIFEST_UNKNOWN") || strings.Contains(errMsg, "404 Not Found") +} + +// IsRepoNotFound returns true if the error indicates that the requested +// repository does not exist in the registry. +func IsRepoNotFound(err error) bool { + if err == nil { + return false + } + + // Typed check: works for GET responses with structured JSON error codes. + if hasDiagnosticCode(err, transport.NameUnknownErrorCode) { + return true + } + + // String fallback: same as IsImageNotFound - covers HEAD responses and plain text errors. + return strings.Contains(err.Error(), "NAME_UNKNOWN") +} + +// hasDiagnosticCode checks if err is a *transport.Error containing +// a Diagnostic with the given error code. +func hasDiagnosticCode(err error, code transport.ErrorCode) bool { + var transportErr *transport.Error + if !errors.As(err, &transportErr) { + return false + } + + for _, diag := range transportErr.Errors { + if diag.Code == code { + return true + } + } + + return false +} diff --git a/internal/mirror/errmatch/errmatch_test.go b/internal/mirror/errmatch/errmatch_test.go new file mode 100644 index 00000000..f6e2db2f --- /dev/null +++ b/internal/mirror/errmatch/errmatch_test.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errmatch + +import ( + "errors" + "fmt" + "testing" + + "github.com/google/go-containerregistry/pkg/v1/remote/transport" + "github.com/stretchr/testify/assert" +) + +func TestIsImageNotFound_TypedError(t *testing.T) { + err := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.ManifestUnknownErrorCode, Message: "manifest unknown"}}, + } + assert.True(t, IsImageNotFound(err)) +} + +func TestIsImageNotFound_WrappedTypedError(t *testing.T) { + inner := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.ManifestUnknownErrorCode}}, + } + assert.True(t, IsImageNotFound(fmt.Errorf("get manifest: %w", inner))) +} + +func TestIsImageNotFound_FallbackString(t *testing.T) { + assert.True(t, IsImageNotFound(errors.New("MANIFEST_UNKNOWN: not found"))) + assert.True(t, IsImageNotFound(errors.New("404 Not Found"))) +} + +func TestIsImageNotFound_Negative(t *testing.T) { + assert.False(t, IsImageNotFound(errors.New("some other error"))) + assert.False(t, IsImageNotFound(nil)) +} + +func TestIsRepoNotFound_TypedError(t *testing.T) { + err := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.NameUnknownErrorCode, Message: "repository name not known"}}, + } + assert.True(t, IsRepoNotFound(err)) +} + +func TestIsRepoNotFound_WrappedTypedError(t *testing.T) { + inner := &transport.Error{ + StatusCode: 404, + Errors: []transport.Diagnostic{{Code: transport.NameUnknownErrorCode}}, + } + assert.True(t, IsRepoNotFound(fmt.Errorf("check repo: %w", inner))) +} + +func TestIsRepoNotFound_FallbackString(t *testing.T) { + assert.True(t, IsRepoNotFound(errors.New("NAME_UNKNOWN: repo"))) +} + +func TestIsRepoNotFound_Negative(t *testing.T) { + assert.False(t, IsRepoNotFound(errors.New("some other error"))) + assert.False(t, IsRepoNotFound(nil)) +} diff --git a/internal/mirror/pusher/pusher.go b/internal/mirror/pusher/pusher.go index 49c41062..c065c9fa 100644 --- a/internal/mirror/pusher/pusher.go +++ b/internal/mirror/pusher/pusher.go @@ -30,7 +30,6 @@ import ( dkplog "github.com/deckhouse/deckhouse/pkg/log" "github.com/deckhouse/deckhouse-cli/internal/mirror/chunked" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/errorutil" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/log" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/retry" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/retry/task" @@ -106,9 +105,6 @@ func (s *Service) PushLayout(ctx context.Context, layoutPath layout.Path, client fmt.Sprintf("[%d / %d] Pushing %s", i+1, len(indexManifest.Manifests), imageReferenceString), task.WithConstantRetries(pushRetryAttempts, pushRetryDelay, func(ctx context.Context) error { if err := client.PushImage(ctx, tag, img); err != nil { - if errorutil.IsTrivyMediaTypeNotAllowedError(err) { - return fmt.Errorf(errorutil.CustomTrivyMediaTypesWarning) - } return fmt.Errorf("write %s:%s to registry: %w", client.GetRegistry(), tag, err) } return nil diff --git a/pkg/libmirror/validation/registry_access.go b/internal/mirror/validation/registry_access.go similarity index 97% rename from pkg/libmirror/validation/registry_access.go rename to internal/mirror/validation/registry_access.go index da2d37cb..b10f424a 100644 --- a/pkg/libmirror/validation/registry_access.go +++ b/internal/mirror/validation/registry_access.go @@ -26,8 +26,8 @@ import ( "github.com/google/go-containerregistry/pkg/v1/random" "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/deckhouse/deckhouse-cli/internal/mirror/errmatch" "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/auth" - "github.com/deckhouse/deckhouse-cli/pkg/libmirror/util/errorutil" ) var ErrImageUnavailable = errors.New("required image is not present in registry") @@ -88,7 +88,7 @@ func (v *RemoteRegistryAccessValidator) ValidateReadAccessForImage(ctx context.C _, err = remote.Head(ref, remoteOpts...) if err != nil { - if errorutil.IsImageNotFoundError(err) { + if errmatch.IsImageNotFound(err) { return ErrImageUnavailable } return err diff --git a/pkg/libmirror/validation/registry_access_test.go b/internal/mirror/validation/registry_access_test.go similarity index 100% rename from pkg/libmirror/validation/registry_access_test.go rename to internal/mirror/validation/registry_access_test.go diff --git a/pkg/diagnostic/README.md b/pkg/diagnostic/README.md new file mode 100644 index 00000000..334a5ca0 --- /dev/null +++ b/pkg/diagnostic/README.md @@ -0,0 +1,156 @@ +# pkg/diagnostic + +User-friendly error diagnostics for d8 CLI. Known errors get formatted +with possible causes and solutions instead of raw Go error text: + +``` +error: TLS/certificate verification failed + ╰─▶ x509: certificate signed by unknown authority + + * Self-signed certificate on the source registry + -> Use --tls-skip-verify flag to skip TLS verification + * Corporate proxy intercepting HTTPS connections + -> Check if a corporate proxy is intercepting HTTPS traffic +``` + +## How it works + +``` + root.go mirror/cmd/pull (RunE) + ─────── ────────────────────── + + rootCmd.Execute() + | + | cobra dispatches + | to subcommand ──────────────> err := puller.Execute() + | | + | [Diagnose err] -> is it HelpfulError? + | | + | yes | no + | | | + | *HelpfulError <-+ +-> fmt.Errorf("pull failed: %w", err) + | | | + | error returns <───────────────+─────+ + | + [errors.As HelpfulError?] + | + yes | no + | | + v v + .Format() "Error executing command: ..." (as usual) + (colored) (plain) +``` + +Each command diagnoses errors with its own errdetect package. +`root.go` only catches `*HelpfulError` via `errors.As` - it does not +import any errdetect, so unrelated commands never get false diagnostics. + +## HelpfulError + +```go +type Suggestion struct { + Cause string // why it might have happened + Solutions []string // how to fix this specific cause +} + +type HelpfulError struct { + Category string // what went wrong: "TLS/certificate verification failed" + OriginalErr error // the underlying error (required, used by Unwrap/Error/Format) + Suggestions []Suggestion // cause-solution pairs (optional) +} +``` + +| Field | Required | What happens if empty | +|-------|----------|----------------------| +| `Category` | yes | output shows `error: ` with no description | +| `OriginalErr` | yes | safe (no panic), but `Unwrap` returns nil and `Format` skips the error line | +| `Suggestions` | no | suggestions section is omitted | + +How fields map to output (`Format()`): + +``` +error: TLS/certificate verification failed <-- Category + ╰─▶ x509: certificate signed by ... <-- OriginalErr (unwrapped chain) + + * Self-signed certificate <-- Suggestion.Cause + -> Use --tls-skip-verify flag <-- Suggestion.Solutions + * Corporate proxy intercepting HTTPS <-- next Suggestion.Cause + -> Check if proxy is intercepting ... <-- its Solutions +``` + +`Error()` returns plain text for logs: `"Category: OriginalErr.Error()"`. +`Unwrap()` returns `OriginalErr` so `errors.Is`/`errors.As` work through it. + +## Where classifiers live + +Classifiers are **application/UI logic**, not library code. They contain +user-facing advice (CLI flags, links to docs) that is specific to each command. +Place them in `internal/` next to the command they serve. + +``` +pkg/diagnostic/ HelpfulError + Format (generic, reusable) +pkg/registry/errmatch/ error matchers (generic, reusable) +internal/mirror/cmd/pull/errdetect/ pull-specific diagnostics +internal/mirror/cmd/push/errdetect/ push-specific diagnostics +``` + +Why per-command: pull advises `--license`/`--source-login`, push advises +`--registry-login`/`--registry-password`. Shared classifier would give +ambiguous advice. + +## Adding diagnostics to a new command + +**1. Create an errdetect package** next to your command: + +```go +// internal/backup/cmd/snapshot/errdetect/diagnose.go +package errdetect + +import ( + "errors" + "github.com/deckhouse/deckhouse-cli/pkg/diagnostic" +) + +func Diagnose(err error) *diagnostic.HelpfulError { + var helpErr *diagnostic.HelpfulError + if errors.As(err, &helpErr) { + return nil // already diagnosed, don't wrap twice + } + + if isETCDError(err) { + return &diagnostic.HelpfulError{ + Category: "ETCD connection failed", + OriginalErr: err, + Suggestions: []diagnostic.Suggestion{ + { + Cause: "ETCD cluster is unreachable", + Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, + }, + }, + } + } + return nil +} +``` + +**2. Call it in RunE** of your leaf command: + +```go +if err := doSnapshot(); err != nil { + if diag := errdetect.Diagnose(err); diag != nil { + return diag + } + return fmt.Errorf("snapshot failed: %w", err) +} +``` + +No changes to `root.go` needed - it catches any `*HelpfulError` +regardless of which errdetect produced it. + +## Rules (Best Practice) + +- Classifiers go in `internal//errdetect/` - they are application logic, not libraries +- Diagnose in the **leaf command** (RunE), not in libraries or root.go +- Each command uses its **own errdetect** - prevents false diagnostics +- Skip diagnosis if the error is already a `*HelpfulError` (see guard in the example above) +- `Suggestions` are optional but highly recommended diff --git a/pkg/diagnostic/diagnostic.go b/pkg/diagnostic/diagnostic.go new file mode 100644 index 00000000..96e6627d --- /dev/null +++ b/pkg/diagnostic/diagnostic.go @@ -0,0 +1,46 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package diagnostic + +// Suggestion pairs a possible cause with its specific solutions. +type Suggestion struct { + Cause string // why it might have happened + Solutions []string // how to fix this specific cause +} + +// HelpfulError is an error enriched with possible causes and actionable solutions. +// It implements the error interface so it can propagate up the call chain +// and be printed once at the top level, avoiding double output. +type HelpfulError struct { + Category string // e.g. "DNS resolution failed for 'registry.example.com'" + OriginalErr error // the underlying error + Suggestions []Suggestion // cause-solution pairs shown to the user +} + +// Error returns a plain-text representation suitable for logging and error wrapping. +// Use Format() for user-facing terminal output. +func (e *HelpfulError) Error() string { + if e.OriginalErr == nil { + return e.Category + } + return e.Category + ": " + e.OriginalErr.Error() +} + +// Unwrap returns the original error so errors.Is/errors.As work through the wrapper. +func (e *HelpfulError) Unwrap() error { + return e.OriginalErr +} diff --git a/pkg/diagnostic/diagnostic_test.go b/pkg/diagnostic/diagnostic_test.go new file mode 100644 index 00000000..ea9784cb --- /dev/null +++ b/pkg/diagnostic/diagnostic_test.go @@ -0,0 +1,143 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package diagnostic + +import ( + "errors" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHelpfulError_Error_PlainText(t *testing.T) { + diag := &HelpfulError{ + Category: "Network connection failed", + OriginalErr: errors.New("connection refused"), + Suggestions: []Suggestion{ + {Cause: "cause", Solutions: []string{"fix"}}, + }, + } + + errStr := diag.Error() + assert.Equal(t, "Network connection failed: connection refused", errStr) + assert.NotContains(t, errStr, "\033[") + assert.NotContains(t, errStr, "cause") +} + +func TestHelpfulError_Unwrap(t *testing.T) { + originalErr := io.EOF + diag := &HelpfulError{ + Category: "Test", + OriginalErr: originalErr, + } + require.True(t, errors.Is(diag, originalErr)) +} + +func TestHelpfulError_Format_NoColor(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Network connection failed", + OriginalErr: errors.New("test"), + Suggestions: []Suggestion{ + {Cause: "cause1", Solutions: []string{"fix1"}}, + }, + } + + output := diag.Format() + assert.NotContains(t, output, "\033[") + assert.Contains(t, output, "Network connection failed") + assert.Contains(t, output, "cause1") + assert.Contains(t, output, "fix1") +} + +func TestHelpfulError_Format_Structure(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Network connection failed", + OriginalErr: errors.New("connection refused"), + Suggestions: []Suggestion{ + {Cause: "Network down", Solutions: []string{"Check network"}}, + {Cause: "Firewall blocking", Solutions: []string{"Check firewall"}}, + }, + } + + output := diag.Format() + assert.Contains(t, output, "error: Network connection failed") + assert.Contains(t, output, "connection refused") + assert.Contains(t, output, "Network down") + assert.Contains(t, output, "Check network") + assert.Contains(t, output, "Firewall blocking") + assert.Contains(t, output, "Check firewall") +} + +func TestHelpfulError_Error_NilOriginalErr(t *testing.T) { + diag := &HelpfulError{Category: "Something failed"} + assert.Equal(t, "Something failed", diag.Error()) + assert.Nil(t, diag.Unwrap()) +} + +func TestHelpfulError_Format_NilOriginalErr(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Something failed", + Suggestions: []Suggestion{ + {Cause: "Unknown", Solutions: []string{"Try again"}}, + }, + } + + output := diag.Format() + assert.Contains(t, output, "Something failed") + assert.Contains(t, output, "Try again") + assert.NotContains(t, output, "╰─▶") +} + +func TestHelpfulError_Format_NoCausesNoSolutions(t *testing.T) { + t.Setenv("NO_COLOR", "1") + + diag := &HelpfulError{ + Category: "Something failed", + OriginalErr: errors.New("oops"), + } + + output := diag.Format() + assert.Contains(t, output, "Something failed") + assert.Contains(t, output, "oops") + assert.NotContains(t, output, "Possible causes") + assert.NotContains(t, output, "How to fix") +} + +func TestHelpfulError_Format_ForceColor(t *testing.T) { + t.Setenv("FORCE_COLOR", "1") + t.Setenv("NO_COLOR", "") + + diag := &HelpfulError{ + Category: "Test error", + OriginalErr: errors.New("test"), + Suggestions: []Suggestion{ + {Cause: "cause1", Solutions: []string{"fix1"}}, + }, + } + + output := diag.Format() + assert.True(t, strings.Contains(output, "\033[")) +} diff --git a/pkg/diagnostic/doc.go b/pkg/diagnostic/doc.go new file mode 100644 index 00000000..99df9f06 --- /dev/null +++ b/pkg/diagnostic/doc.go @@ -0,0 +1,104 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package diagnostic provides [HelpfulError] - a wrapper around standard Go errors +// that adds possible causes and actionable solutions for the user. +// +// When a command returns a [HelpfulError], the top-level handler in cmd/d8/root.go +// detects it via [errors.As] and prints a formatted diagnostic instead of a raw error. +// If an error is not wrapped in [HelpfulError], it is printed as usual. +// +// # Creating a HelpfulError +// +// Option 1: use a command-specific errdetect package +// (see internal/mirror/cmd/pull/errdetect for an example): +// +// if diag := errdetect.Diagnose(err); diag != nil { +// return diag +// } +// +// Option 2: wrap an error directly: +// +// return &diagnostic.HelpfulError{ +// Category: "ETCD snapshot failed", +// OriginalErr: err, +// Suggestions: []diagnostic.Suggestion{ +// { +// Cause: "ETCD cluster is unreachable", +// Solutions: []string{"Check ETCD health: etcdctl endpoint health"}, +// }, +// }, +// } +// +// Suggestions are optional - an empty slice is silently omitted from output. +// Each Suggestion pairs a cause with its specific solutions. +// +// # How fields map to Format() output +// +// error: ETCD snapshot failed <-- Category +// ╰─▶ save snapshot <-- OriginalErr chain (unwrapped) +// ╰─▶ dial tcp 10.0.0.1:2379 +// ╰─▶ connection refused +// +// * ETCD cluster is unreachable <-- Suggestion.Cause +// -> Check ETCD health: etcdctl ... <-- Suggestion.Solutions +// +// # How it propagates +// +// [HelpfulError] implements the error interface. It propagates up the call chain +// like any other error. The original error is preserved via [HelpfulError.Unwrap], +// so [errors.Is] and [errors.As] work through the wrapper. +// +// In cmd/d8/root.go: +// +// var helpErr *diagnostic.HelpfulError +// if errors.As(err, &helpErr) { +// fmt.Fprint(os.Stderr, helpErr.Format()) // colored output, once +// } +// +// [HelpfulError.Error] returns plain text (safe for logs). +// [HelpfulError.Format] returns colored terminal output (TTY-aware, respects NO_COLOR). +// +// # Adding diagnostics to a new command +// +// Create an errdetect package next to your command with a Diagnose function: +// +// // internal/backup/cmd/snapshot/errdetect/diagnose.go +// func Diagnose(err error) *diagnostic.HelpfulError { +// if isETCDError(err) { +// return &diagnostic.HelpfulError{ +// Category: "ETCD connection failed", OriginalErr: err, +// Suggestions: []diagnostic.Suggestion{ +// {Cause: "ETCD cluster is unreachable", Solutions: []string{"Check ETCD health"}}, +// }, +// } +// } +// return nil +// } +// +// Then call it at the command level: +// +// if diag := errdetect.Diagnose(err); diag != nil { +// return diag +// } +// +// # Important: diagnose at the command level, not in root.go +// +// Each command must call its own errdetect package. root.go only catches +// [HelpfulError] via [errors.As] - it does not import or call any classifier. +// This prevents false classification: a DNS error from "d8 backup" must not +// be diagnosed with registry-specific advice like "--tls-skip-verify". +package diagnostic diff --git a/pkg/diagnostic/format.go b/pkg/diagnostic/format.go new file mode 100644 index 00000000..164f7ee6 --- /dev/null +++ b/pkg/diagnostic/format.go @@ -0,0 +1,126 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package diagnostic + +import ( + "errors" + "os" + "strings" + + "golang.org/x/term" +) + +// ANSI escape codes for terminal color output. +const ( + ansiReset = "\033[0m" + ansiRed = "\033[31m" + ansiYellow = "\033[33m" + ansiCyan = "\033[36m" + ansiBold = "\033[1m" +) + +// textStyler controls whether styled output uses ANSI codes or plain text. +type textStyler struct { + enabled bool +} + +// style wraps text with ANSI codes when enabled, returns plain text otherwise. +func (t textStyler) style(text string, ansiCodes ...string) string { + if !t.enabled { + return text + } + return strings.Join(ansiCodes, "") + text + ansiReset +} + +// Semantic text styles used by HelpfulError.Format(). +func (t textStyler) danger(s string) string { return t.style(s, ansiBold, ansiRed) } // error labels +func (t textStyler) header(s string) string { return t.style(s, ansiBold) } // category name +func (t textStyler) hint(s string) string { return t.style(s, ansiCyan) } // arrows, solutions +func (t textStyler) warn(s string) string { return t.style(s, ansiYellow) } // possible causes + +// newTextStyler returns a textStyler configured for the current environment. +// Colors are enabled when stderr is a TTY, unless overridden by NO_COLOR or FORCE_COLOR. +func newTextStyler() textStyler { + if os.Getenv("NO_COLOR") != "" { + return textStyler{} + } + return textStyler{ + enabled: os.Getenv("FORCE_COLOR") != "" || term.IsTerminal(int(os.Stderr.Fd())), + } +} + +// Format returns the formatted diagnostic string with colors if stderr is a TTY. +// +// error: Network connection failed to 127.0.0.1:443 +// ╰─▶ dial tcp 127.0.0.1:443: connect: connection refused +// +// * Firewall or security group blocking the connection +// -> Verify firewall rules allow outbound HTTPS (port 443) +// * Registry is down or unreachable +// -> Test connectivity with: curl -v https:// +func (e *HelpfulError) Format() string { + t := newTextStyler() + + var b strings.Builder + b.WriteString("\n" + t.danger("error") + t.header(": "+e.Category) + "\n") + if e.OriginalErr != nil { + chain := unwrapChain(e.OriginalErr) + for i, msg := range chain { + indent := strings.Repeat(" ", i) + b.WriteString(" " + indent + t.hint("╰─▶ ") + msg + "\n") + } + } + b.WriteString("\n") + + for _, s := range e.Suggestions { + b.WriteString(" " + t.warn("* "+s.Cause) + "\n") + for _, sol := range s.Solutions { + b.WriteString(" " + t.hint("-> ") + sol + "\n") + } + } + + b.WriteString("\n") + return b.String() +} + +// unwrapChain walks errors.Unwrap() and extracts each level's unique context. +// For "a: b: c" wrapped via fmt.Errorf("%w"), returns ["a", "b", "c"]. +func unwrapChain(err error) []string { + var chain []string + + for err != nil { + inner := errors.Unwrap(err) + if inner == nil { + chain = append(chain, err.Error()) + break + } + + full := err.Error() + innerText := inner.Error() + context := strings.TrimSuffix(full, ": "+innerText) + if context == full { + // Can't extract prefix cleanly - use full message and stop + chain = append(chain, full) + break + } + + chain = append(chain, context) + err = inner + } + + return chain +} diff --git a/pkg/libmirror/util/errorutil/errors.go b/pkg/libmirror/util/errorutil/errors.go deleted file mode 100644 index 1ab00ff7..00000000 --- a/pkg/libmirror/util/errorutil/errors.go +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright 2024 Flant JSC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package errorutil - -import "strings" - -const CustomTrivyMediaTypesWarning = `` + - "It looks like you are using Project Quay registry and it is not configured correctly for hosting Deckhouse.\n" + - "See the docs at https://deckhouse.io/products/kubernetes-platform/documentation/v1/supported_versions.html#container-registry for more details.\n\n" + - "TL;DR: You should retry push after allowing some additional types of OCI artifacts in your config.yaml as follows:\n" + - `FEATURE_GENERAL_OCI_SUPPORT: true -ALLOWED_OCI_ARTIFACT_TYPES: - "application/octet-stream": - - "application/deckhouse.io.bdu.layer.v1.tar+gzip" - - "application/vnd.cncf.openpolicyagent.layer.v1.tar+gzip" - "application/vnd.aquasec.trivy.config.v1+json": - - "application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip" - - "application/vnd.aquasec.trivy.db.layer.v1.tar+gzip"` - -func IsImageNotFoundError(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "MANIFEST_UNKNOWN") || strings.Contains(errMsg, "404 Not Found") -} - -func IsRepoNotFoundError(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "NAME_UNKNOWN") -} - -func IsTrivyMediaTypeNotAllowedError(err error) bool { - if err == nil { - return false - } - - errMsg := err.Error() - return strings.Contains(errMsg, "MANIFEST_INVALID") && - (strings.Contains(errMsg, "vnd.aquasec.trivy") || strings.Contains(errMsg, "application/octet-stream")) -}