From 8485131914a556e3b496222744e2d201ab3f16cf Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:24:09 +0100 Subject: [PATCH 01/15] modelerrors: remove numeric status-code patterns from string matching ExtractHTTPStatusCode + IsRetryableStatusCode already handle numeric status codes (500, 502, 429, 401, etc.) via regex with word boundaries. Having the same codes as bare substrings in the retryable/non-retryable pattern lists caused false positives (e.g., "processed 500 items" would match). Remove the numeric patterns and keep only the descriptive text patterns. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 4283262e7..34ecab17b 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -277,14 +277,12 @@ func IsRetryableModelError(err error) bool { // Fall back to message-pattern matching for errors without structured status codes errMsg := strings.ToLower(err.Error()) - // Retryable patterns (5xx, timeout, network issues) - // NOTE: 429 is explicitly NOT in this list - we skip to next model for rate limits + // Retryable patterns (timeout, network issues) + // NOTE: Numeric status codes (500, 502, etc.) are already handled by + // ExtractHTTPStatusCode + IsRetryableStatusCode above; they are not + // duplicated here to avoid false positives on arbitrary numbers in + // error messages (e.g., "processed 500 items"). retryablePatterns := []string{ - "500", // Internal server error - "502", // Bad gateway - "503", // Service unavailable - "504", // Gateway timeout - "408", // Request timeout "timeout", // Generic timeout "connection reset", // Connection reset "connection refused", // Connection refused @@ -311,17 +309,14 @@ func IsRetryableModelError(err error) bool { } // Non-retryable patterns (skip to next model immediately) + // NOTE: Numeric status codes (429, 401, etc.) are already handled by + // ExtractHTTPStatusCode above; they are not duplicated here. nonRetryablePatterns := []string{ - "429", // Rate limit - skip to next model "rate limit", // Rate limit message "too many requests", // Rate limit message "throttl", // Throttling (rate limiting) "quota", // Quota exceeded "capacity", // Capacity issues (often rate-limit related) - "401", // Unauthorized - "403", // Forbidden - "404", // Not found - "400", // Bad request "invalid", // Invalid request "unauthorized", // Auth error "authentication", // Auth error From f18f5b5546e309da18841b2cd4f8a38cb297d02b Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:25:00 +0100 Subject: [PATCH 02/15] modelerrors: unexport IsRetryableModelError The only caller outside the package is ClassifyModelError itself (as a fallback path). ClassifyModelError is the intended public API and provides strictly more information (retryable + rateLimited + retryAfter). Rename to isRetryableModelError to reduce API surface. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 7 ++++--- pkg/modelerrors/modelerrors_test.go | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 34ecab17b..ed1be15c3 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -220,7 +220,8 @@ func IsRetryableStatusCode(statusCode int) bool { } } -// IsRetryableModelError determines if an error should trigger a retry of the SAME model. +// isRetryableModelError determines if an error should trigger a retry of the SAME model. +// It is used as a fallback by ClassifyModelError when no *StatusError is present. // // Retryable errors (retry same model with backoff): // - Network timeouts @@ -238,7 +239,7 @@ func IsRetryableStatusCode(statusCode int) bool { // // The key distinction is: 429 means "you're calling too fast, slow down" which // suggests we should try a different model, not keep hammering the same one. -func IsRetryableModelError(err error) bool { +func isRetryableModelError(err error) bool { if err == nil { return false } @@ -404,7 +405,7 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time if statusCode != 0 { return IsRetryableStatusCode(statusCode), false, 0 } - return IsRetryableModelError(err), false, 0 + return isRetryableModelError(err), false, 0 } // CalculateBackoff returns the backoff duration for a given attempt (0-indexed). diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 361c89f34..211477108 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -66,7 +66,7 @@ func TestIsRetryableModelError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - assert.Equal(t, tt.expected, IsRetryableModelError(tt.err), "IsRetryableModelError(%v)", tt.err) + assert.Equal(t, tt.expected, isRetryableModelError(tt.err), "isRetryableModelError(%v)", tt.err) }) } } @@ -252,7 +252,7 @@ func TestIsRetryableModelError_ContextOverflow(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - assert.False(t, IsRetryableModelError(tt.err), "context overflow errors should not be retryable: %v", tt.err) + assert.False(t, isRetryableModelError(tt.err), "context overflow errors should not be retryable: %v", tt.err) }) } } From 747c989df64955860110b92eaad4925c65d9c144 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:26:01 +0100 Subject: [PATCH 03/15] modelerrors: remove provider SDK imports from ExtractHTTPStatusCode Now that all providers (Anthropic, OpenAI, Gemini) wrap their errors in *StatusError via WrapHTTPError, the provider-specific type assertions (anthropic.Error, genai.APIError) in ExtractHTTPStatusCode are redundant. ClassifyModelError already checks *StatusError first. Replace the SDK-specific checks with a *StatusError check, keeping the regex fallback for unwrapped errors. This removes the coupling between the error classification package and provider-specific SDKs. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index ed1be15c3..be640453f 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -16,9 +16,6 @@ import ( "strconv" "strings" "time" - - "github.com/anthropics/anthropic-sdk-go" - "google.golang.org/genai" ) // Backoff and retry-after configuration constants. @@ -161,27 +158,25 @@ func IsContextOverflowError(err error) bool { // statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ") var statusCodeRegex = regexp.MustCompile(`\b([45]\d{2})\b`) -// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error. -// Checks in order: -// 1. Known provider SDK error types (Anthropic, Gemini) -// 2. Regex parsing of error message (fallback for OpenAI and others) +// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error +// using regex parsing of the error message. This is a fallback for providers +// whose errors are not yet wrapped in *StatusError (the preferred path). +// +// The regex matches 4xx/5xx codes at word boundaries +// (e.g., "429 Too Many Requests", "500 Internal Server Error"). // Returns 0 if no status code found. func ExtractHTTPStatusCode(err error) int { if err == nil { return 0 } - // Check Anthropic SDK error type (public) - if anthropicErr, ok := errors.AsType[*anthropic.Error](err); ok { - return anthropicErr.StatusCode - } - - // Check Google Gemini SDK error type (public) - if geminiErr, ok := errors.AsType[*genai.APIError](err); ok { - return geminiErr.Code + // Check for *StatusError first (preferred structured path). + var statusErr *StatusError + if errors.As(err, &statusErr) { + return statusErr.StatusCode } - // For other providers (OpenAI, etc.), extract from error message using regex + // Fallback: extract from error message using regex. // OpenAI SDK error format: `POST "/v1/...": 429 Too Many Requests {...}` matches := statusCodeRegex.FindStringSubmatch(err.Error()) if len(matches) >= 2 { From c303c34dfa6ef9d9992974ff8c7c7bbd573ac3e0 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:26:41 +0100 Subject: [PATCH 04/15] modelerrors: use strconv.Atoi instead of fmt.Sscanf in ExtractHTTPStatusCode The regex match is a simple integer string. strconv.Atoi is simpler, faster, and already imported. This also removes the fmt import. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index be640453f..0dae04fad 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -7,7 +7,6 @@ package modelerrors import ( "context" "errors" - "fmt" "log/slog" "math/rand" "net" @@ -180,8 +179,7 @@ func ExtractHTTPStatusCode(err error) int { // OpenAI SDK error format: `POST "/v1/...": 429 Too Many Requests {...}` matches := statusCodeRegex.FindStringSubmatch(err.Error()) if len(matches) >= 2 { - var code int - if _, err := fmt.Sscanf(matches[1], "%d", &code); err == nil { + if code, err := strconv.Atoi(matches[1]); err == nil { return code } } From f182f8de4329b9e9af132b811159d4d3c7713f08 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:27:09 +0100 Subject: [PATCH 05/15] modelerrors: switch from math/rand to math/rand/v2 math/rand/v2 is the preferred package since Go 1.22. The API is compatible (rand.Float64 exists in both). Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 0dae04fad..44b0fa03f 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -8,7 +8,7 @@ import ( "context" "errors" "log/slog" - "math/rand" + "math/rand/v2" "net" "net/http" "regexp" From d13541e364293d4870f3507737e22b2370f6b0a2 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:27:56 +0100 Subject: [PATCH 06/15] modelerrors: promote retryable/non-retryable patterns to package-level vars The retryablePatterns and nonRetryablePatterns slices were local variables inside isRetryableModelError, causing a new allocation on every call. Promote them to package-level vars (like contextOverflowPatterns) so they are allocated once. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 77 ++++++++++++++++------------------ 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 44b0fa03f..ed7ae6bf1 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -213,6 +213,43 @@ func IsRetryableStatusCode(statusCode int) bool { } } +// retryablePatterns contains error message substrings that indicate a +// transient/retryable failure. Numeric status codes (500, 502, etc.) are +// handled separately by ExtractHTTPStatusCode + IsRetryableStatusCode. +var retryablePatterns = []string{ + "timeout", // Generic timeout + "connection reset", // Connection reset + "connection refused", // Connection refused + "no such host", // DNS failure + "temporary failure", // Temporary failure + "service unavailable", // Service unavailable + "internal server error", // Server error + "bad gateway", // Gateway error + "gateway timeout", // Gateway timeout + "overloaded", // Server overloaded + "overloaded_error", // Server overloaded + "other side closed", // Connection closed by peer + "fetch failed", // Network fetch failure + "reset before headers", // Connection reset before headers received + "upstream connect", // Upstream connection error + "internal_error", // HTTP/2 INTERNAL_ERROR (stream-level) +} + +// nonRetryablePatterns contains error message substrings that indicate a +// permanent/non-retryable failure. Numeric status codes (429, 401, etc.) are +// handled separately by ExtractHTTPStatusCode. +var nonRetryablePatterns = []string{ + "rate limit", // Rate limit message + "too many requests", // Rate limit message + "throttl", // Throttling (rate limiting) + "quota", // Quota exceeded + "capacity", // Capacity issues (often rate-limit related) + "invalid", // Invalid request + "unauthorized", // Auth error + "authentication", // Auth error + "api key", // API key error +} + // isRetryableModelError determines if an error should trigger a retry of the SAME model. // It is used as a fallback by ClassifyModelError when no *StatusError is present. // @@ -268,33 +305,8 @@ func isRetryableModelError(err error) bool { } } - // Fall back to message-pattern matching for errors without structured status codes errMsg := strings.ToLower(err.Error()) - // Retryable patterns (timeout, network issues) - // NOTE: Numeric status codes (500, 502, etc.) are already handled by - // ExtractHTTPStatusCode + IsRetryableStatusCode above; they are not - // duplicated here to avoid false positives on arbitrary numbers in - // error messages (e.g., "processed 500 items"). - retryablePatterns := []string{ - "timeout", // Generic timeout - "connection reset", // Connection reset - "connection refused", // Connection refused - "no such host", // DNS failure - "temporary failure", // Temporary failure - "service unavailable", // Service unavailable - "internal server error", // Server error - "bad gateway", // Gateway error - "gateway timeout", // Gateway timeout - "overloaded", // Server overloaded - "overloaded_error", // Server overloaded - "other side closed", // Connection closed by peer - "fetch failed", // Network fetch failure - "reset before headers", // Connection reset before headers received - "upstream connect", // Upstream connection error - "internal_error", // HTTP/2 INTERNAL_ERROR (stream-level) - } - for _, pattern := range retryablePatterns { if strings.Contains(errMsg, pattern) { slog.Debug("Matched retryable error pattern", "pattern", pattern) @@ -302,21 +314,6 @@ func isRetryableModelError(err error) bool { } } - // Non-retryable patterns (skip to next model immediately) - // NOTE: Numeric status codes (429, 401, etc.) are already handled by - // ExtractHTTPStatusCode above; they are not duplicated here. - nonRetryablePatterns := []string{ - "rate limit", // Rate limit message - "too many requests", // Rate limit message - "throttl", // Throttling (rate limiting) - "quota", // Quota exceeded - "capacity", // Capacity issues (often rate-limit related) - "invalid", // Invalid request - "unauthorized", // Auth error - "authentication", // Auth error - "api key", // API key error - } - for _, pattern := range nonRetryablePatterns { if strings.Contains(errMsg, pattern) { slog.Debug("Matched non-retryable error pattern", "pattern", pattern) From 4fd44ed62decd1ee953aa0654a9685f171865cad Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:31:49 +0100 Subject: [PATCH 07/15] modelerrors: move backoff and sleep utilities to pkg/backoff SleepWithContext and CalculateBackoff are general-purpose utilities not specific to model errors. Move them to a new pkg/backoff package for clarity and reusability. MaxRetryAfterWait moves there too. Deprecated wrapper variables are left in modelerrors to maintain backward compatibility. The runtime/fallback package now imports pkg/backoff directly. Assisted-By: docker-agent --- pkg/backoff/backoff.go | 60 +++++++++++++++++++++++++ pkg/backoff/backoff_test.go | 70 +++++++++++++++++++++++++++++ pkg/modelerrors/modelerrors.go | 49 ++++---------------- pkg/modelerrors/modelerrors_test.go | 60 ------------------------- pkg/runtime/fallback.go | 21 ++++----- 5 files changed, 150 insertions(+), 110 deletions(-) create mode 100644 pkg/backoff/backoff.go create mode 100644 pkg/backoff/backoff_test.go diff --git a/pkg/backoff/backoff.go b/pkg/backoff/backoff.go new file mode 100644 index 000000000..722fb1d4e --- /dev/null +++ b/pkg/backoff/backoff.go @@ -0,0 +1,60 @@ +// Package backoff provides exponential backoff calculation and +// context-aware sleep utilities. +package backoff + +import ( + "context" + "math/rand/v2" + "time" +) + +// Configuration constants for exponential backoff. +const ( + baseDelay = 200 * time.Millisecond + maxDelay = 2 * time.Second + factor = 2.0 + jitter = 0.1 + + // MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent + // a misbehaving server from blocking the agent for an unreasonable amount of time. + MaxRetryAfterWait = 60 * time.Second +) + +// Calculate returns the backoff duration for a given attempt (0-indexed). +// Uses exponential backoff with jitter. +func Calculate(attempt int) time.Duration { + if attempt < 0 { + attempt = 0 + } + + // Calculate exponential delay + delay := float64(baseDelay) + for range attempt { + delay *= factor + } + + // Cap at max delay + if delay > float64(maxDelay) { + delay = float64(maxDelay) + } + + // Add jitter (±10%) + j := delay * jitter * (2*rand.Float64() - 1) + delay += j + + return time.Duration(delay) +} + +// SleepWithContext sleeps for the specified duration, returning early if context is cancelled. +// Returns true if the sleep completed, false if it was interrupted by context cancellation. +func SleepWithContext(ctx context.Context, d time.Duration) bool { + timer := time.NewTimer(d) + defer timer.Stop() + + select { + case <-timer.C: + return true + case <-ctx.Done(): + return false + } +} diff --git a/pkg/backoff/backoff_test.go b/pkg/backoff/backoff_test.go new file mode 100644 index 000000000..e0a61d9c5 --- /dev/null +++ b/pkg/backoff/backoff_test.go @@ -0,0 +1,70 @@ +package backoff + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestCalculate(t *testing.T) { + t.Parallel() + + tests := []struct { + attempt int + minExpected time.Duration + maxExpected time.Duration + }{ + {attempt: 0, minExpected: 180 * time.Millisecond, maxExpected: 220 * time.Millisecond}, + {attempt: 1, minExpected: 360 * time.Millisecond, maxExpected: 440 * time.Millisecond}, + {attempt: 2, minExpected: 720 * time.Millisecond, maxExpected: 880 * time.Millisecond}, + {attempt: 3, minExpected: 1440 * time.Millisecond, maxExpected: 1760 * time.Millisecond}, + {attempt: 10, minExpected: 1800 * time.Millisecond, maxExpected: 2200 * time.Millisecond}, // capped at 2s + } + + for _, tt := range tests { + t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) { + t.Parallel() + b := Calculate(tt.attempt) + assert.GreaterOrEqual(t, b, tt.minExpected, "backoff should be at least %v", tt.minExpected) + assert.LessOrEqual(t, b, tt.maxExpected, "backoff should be at most %v", tt.maxExpected) + }) + } + + t.Run("negative attempt treated as 0", func(t *testing.T) { + t.Parallel() + b := Calculate(-1) + assert.GreaterOrEqual(t, b, 180*time.Millisecond) + assert.LessOrEqual(t, b, 220*time.Millisecond) + }) +} + +func TestSleepWithContext(t *testing.T) { + t.Parallel() + + t.Run("completes normally", func(t *testing.T) { + t.Parallel() + ctx := t.Context() + start := time.Now() + completed := SleepWithContext(ctx, 10*time.Millisecond) + elapsed := time.Since(start) + + assert.True(t, completed, "should complete normally") + assert.GreaterOrEqual(t, elapsed, 10*time.Millisecond) + }) + + t.Run("interrupted by context", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(t.Context()) + time.AfterFunc(10*time.Millisecond, cancel) + + start := time.Now() + completed := SleepWithContext(ctx, 1*time.Second) + elapsed := time.Since(start) + + assert.False(t, completed, "should be interrupted") + assert.Less(t, elapsed, 100*time.Millisecond, "should return quickly after cancel") + }) +} diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index ed7ae6bf1..41544e921 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -8,25 +8,21 @@ import ( "context" "errors" "log/slog" - "math/rand/v2" "net" "net/http" "regexp" "strconv" "strings" "time" + + "github.com/docker/docker-agent/pkg/backoff" ) // Backoff and retry-after configuration constants. const ( - backoffBaseDelay = 200 * time.Millisecond - backoffMaxDelay = 2 * time.Second - backoffFactor = 2.0 - backoffJitter = 0.1 - // MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent // a misbehaving server from blocking the agent for an unreasonable amount of time. - MaxRetryAfterWait = 60 * time.Second + MaxRetryAfterWait = backoff.MaxRetryAfterWait ) // StatusError wraps an HTTP API error with structured metadata for retry decisions. @@ -400,42 +396,15 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time // CalculateBackoff returns the backoff duration for a given attempt (0-indexed). // Uses exponential backoff with jitter. -func CalculateBackoff(attempt int) time.Duration { - if attempt < 0 { - attempt = 0 - } - - // Calculate exponential delay - delay := float64(backoffBaseDelay) - for range attempt { - delay *= backoffFactor - } - - // Cap at max delay - if delay > float64(backoffMaxDelay) { - delay = float64(backoffMaxDelay) - } - - // Add jitter (±10%) - jitter := delay * backoffJitter * (2*rand.Float64() - 1) - delay += jitter - - return time.Duration(delay) -} +// +// Deprecated: Use [backoff.Calculate] directly. +var CalculateBackoff = backoff.Calculate // SleepWithContext sleeps for the specified duration, returning early if context is cancelled. // Returns true if the sleep completed, false if it was interrupted by context cancellation. -func SleepWithContext(ctx context.Context, d time.Duration) bool { - timer := time.NewTimer(d) - defer timer.Stop() - - select { - case <-timer.C: - return true - case <-ctx.Done(): - return false - } -} +// +// Deprecated: Use [backoff.SleepWithContext] directly. +var SleepWithContext = backoff.SleepWithContext // FormatError returns a user-friendly error message for model errors. // Context overflow gets a dedicated actionable message; all other errors diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 211477108..7a9ab3922 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -71,66 +71,6 @@ func TestIsRetryableModelError(t *testing.T) { } } -func TestCalculateBackoff(t *testing.T) { - t.Parallel() - - tests := []struct { - attempt int - minExpected time.Duration - maxExpected time.Duration - }{ - {attempt: 0, minExpected: 180 * time.Millisecond, maxExpected: 220 * time.Millisecond}, - {attempt: 1, minExpected: 360 * time.Millisecond, maxExpected: 440 * time.Millisecond}, - {attempt: 2, minExpected: 720 * time.Millisecond, maxExpected: 880 * time.Millisecond}, - {attempt: 3, minExpected: 1440 * time.Millisecond, maxExpected: 1760 * time.Millisecond}, - {attempt: 10, minExpected: 1800 * time.Millisecond, maxExpected: 2200 * time.Millisecond}, // capped at 2s - } - - for _, tt := range tests { - t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) { - t.Parallel() - backoff := CalculateBackoff(tt.attempt) - assert.GreaterOrEqual(t, backoff, tt.minExpected, "backoff should be at least %v", tt.minExpected) - assert.LessOrEqual(t, backoff, tt.maxExpected, "backoff should be at most %v", tt.maxExpected) - }) - } - - t.Run("negative attempt treated as 0", func(t *testing.T) { - t.Parallel() - backoff := CalculateBackoff(-1) - assert.GreaterOrEqual(t, backoff, 180*time.Millisecond) - assert.LessOrEqual(t, backoff, 220*time.Millisecond) - }) -} - -func TestSleepWithContext(t *testing.T) { - t.Parallel() - - t.Run("completes normally", func(t *testing.T) { - t.Parallel() - ctx := t.Context() - start := time.Now() - completed := SleepWithContext(ctx, 10*time.Millisecond) - elapsed := time.Since(start) - - assert.True(t, completed, "should complete normally") - assert.GreaterOrEqual(t, elapsed, 10*time.Millisecond) - }) - - t.Run("interrupted by context", func(t *testing.T) { - t.Parallel() - ctx, cancel := context.WithCancel(t.Context()) - time.AfterFunc(10*time.Millisecond, cancel) - - start := time.Now() - completed := SleepWithContext(ctx, 1*time.Second) - elapsed := time.Since(start) - - assert.False(t, completed, "should be interrupted") - assert.Less(t, elapsed, 100*time.Millisecond, "should return quickly after cancel") - }) -} - func TestExtractHTTPStatusCode(t *testing.T) { t.Parallel() diff --git a/pkg/runtime/fallback.go b/pkg/runtime/fallback.go index 546b3e32f..0adec41a6 100644 --- a/pkg/runtime/fallback.go +++ b/pkg/runtime/fallback.go @@ -8,6 +8,7 @@ import ( "time" "github.com/docker/docker-agent/pkg/agent" + "github.com/docker/docker-agent/pkg/backoff" "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/model/provider" "github.com/docker/docker-agent/pkg/modelerrors" @@ -69,12 +70,12 @@ func logFallbackAttempt(agentName string, model modelWithFallback, attempt, maxR } // logRetryBackoff logs when we're backing off before a retry -func logRetryBackoff(agentName, modelID string, attempt int, backoff time.Duration) { +func logRetryBackoff(agentName, modelID string, attempt int, backoffDelay time.Duration) { slog.Debug("Backing off before retry", "agent", agentName, "model", modelID, "attempt", attempt+1, - "backoff", backoff) + "backoff", backoffDelay) } // getCooldownState returns the current cooldown state for an agent (thread-safe). @@ -222,9 +223,9 @@ func (r *LocalRuntime) tryModelWithFallback( // Apply backoff before retry (not on first attempt of each model) if attempt > 0 { - backoff := modelerrors.CalculateBackoff(attempt - 1) - logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoff) - if !modelerrors.SleepWithContext(ctx, backoff) { + backoffDelay := backoff.Calculate(attempt - 1) + logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoffDelay) + if !backoff.SleepWithContext(ctx, backoffDelay) { return streamResult{}, nil, ctx.Err() } } @@ -382,14 +383,14 @@ func (r *LocalRuntime) handleModelError( // Opt-in enabled, no fallbacks → retry same model after honouring Retry-After (or backoff). waitDuration := retryAfter if waitDuration <= 0 { - waitDuration = modelerrors.CalculateBackoff(attempt) - } else if waitDuration > modelerrors.MaxRetryAfterWait { + waitDuration = backoff.Calculate(attempt) + } else if waitDuration > backoff.MaxRetryAfterWait { slog.Warn("Retry-After exceeds maximum, capping", "agent", a.Name(), "model", modelEntry.provider.ID(), "retry_after", retryAfter, - "max", modelerrors.MaxRetryAfterWait) - waitDuration = modelerrors.MaxRetryAfterWait + "max", backoff.MaxRetryAfterWait) + waitDuration = backoff.MaxRetryAfterWait } slog.Warn("Rate limited, retrying (opt-in enabled)", "agent", a.Name(), @@ -398,7 +399,7 @@ func (r *LocalRuntime) handleModelError( "wait", waitDuration, "retry_after_from_header", retryAfter > 0, "error", err) - if !modelerrors.SleepWithContext(ctx, waitDuration) { + if !backoff.SleepWithContext(ctx, waitDuration) { return retryDecisionReturn } return retryDecisionContinue From 744db2501d5857ba9e57717e4f62f87ab4b0fc6c Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:33:01 +0100 Subject: [PATCH 08/15] modelerrors: include HTTP status code in StatusError.Error() StatusError.Error() previously delegated directly to the wrapped error, discarding the status code. This made debugging harder since log lines wouldn't reveal the HTTP status. Now Error() returns a message like "HTTP 429: rate limit exceeded" for better observability. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 3 ++- pkg/modelerrors/modelerrors_test.go | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 41544e921..c53465b30 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -7,6 +7,7 @@ package modelerrors import ( "context" "errors" + "fmt" "log/slog" "net" "net/http" @@ -38,7 +39,7 @@ type StatusError struct { } func (e *StatusError) Error() string { - return e.Err.Error() + return fmt.Sprintf("HTTP %d: %s", e.StatusCode, e.Err.Error()) } func (e *StatusError) Unwrap() error { diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 7a9ab3922..17247538f 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -265,11 +265,11 @@ func TestParseRetryAfterHeader(t *testing.T) { func TestStatusError(t *testing.T) { t.Parallel() - t.Run("Error() delegates to wrapped error", func(t *testing.T) { + t.Run("Error() includes status code and wrapped message", func(t *testing.T) { t.Parallel() underlying := errors.New("rate limit exceeded") se := &StatusError{StatusCode: 429, Err: underlying} - assert.Equal(t, underlying.Error(), se.Error()) + assert.Equal(t, "HTTP 429: rate limit exceeded", se.Error()) }) t.Run("Unwrap() allows errors.Is traversal", func(t *testing.T) { @@ -315,7 +315,7 @@ func TestWrapHTTPError(t *testing.T) { require.ErrorAs(t, result, &se) assert.Equal(t, 429, se.StatusCode) assert.Equal(t, time.Duration(0), se.RetryAfter) - assert.Equal(t, origErr.Error(), se.Error()) + assert.Equal(t, "HTTP 429: rate limited", se.Error()) }) t.Run("429 with Retry-After header sets RetryAfter", func(t *testing.T) { From 6512b13dd406ed0f062de1186f21b53e05c20561 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:33:48 +0100 Subject: [PATCH 09/15] modelerrors: add NewContextOverflowError constructor Callers were constructing &ContextOverflowError{Underlying: err} directly. Adding a constructor improves encapsulation and allows future field additions without breaking callers. Update runtime/fallback.go to use the constructor. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 7 +++++++ pkg/runtime/fallback.go | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index c53465b30..6ea294412 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -84,6 +84,13 @@ type ContextOverflowError struct { Underlying error } +// NewContextOverflowError creates a ContextOverflowError wrapping the given +// underlying error. Use this constructor rather than building the struct +// directly so that future field additions don't break callers. +func NewContextOverflowError(underlying error) *ContextOverflowError { + return &ContextOverflowError{Underlying: underlying} +} + func (e *ContextOverflowError) Error() string { if e.Underlying == nil { return "context window overflow" diff --git a/pkg/runtime/fallback.go b/pkg/runtime/fallback.go index 0adec41a6..b6eb347d6 100644 --- a/pkg/runtime/fallback.go +++ b/pkg/runtime/fallback.go @@ -326,7 +326,7 @@ func (r *LocalRuntime) tryModelWithFallback( if lastErr != nil { wrapped := fmt.Errorf("all models failed: %w", lastErr) if modelerrors.IsContextOverflowError(lastErr) { - return streamResult{}, nil, &modelerrors.ContextOverflowError{Underlying: wrapped} + return streamResult{}, nil, modelerrors.NewContextOverflowError(wrapped) } return streamResult{}, nil, wrapped } From 4a94178f9d2328a5cd30d91c9f18d2fe0b84e046 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:35:24 +0100 Subject: [PATCH 10/15] modelerrors: add missing test coverage Add tests for: - ClassifyModelError with ContextOverflowError wrapping a StatusError - ExtractHTTPStatusCode with *StatusError in the error chain - FormatError with a wrapped ContextOverflowError (via fmt.Errorf) - NewContextOverflowError constructor usage - ContextOverflowError with nil Underlying (Unwrap returns nil) Assisted-By: docker-agent --- pkg/modelerrors/modelerrors_test.go | 41 +++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 17247538f..274fe99e4 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -85,6 +85,10 @@ func TestExtractHTTPStatusCode(t *testing.T) { {name: "502 in message", err: errors.New("502 bad gateway"), expected: 502}, {name: "401 in message", err: errors.New("401 unauthorized"), expected: 401}, {name: "no status code", err: errors.New("connection refused"), expected: 0}, + // StatusError structural path + {name: "StatusError 429", err: &StatusError{StatusCode: 429, Err: errors.New("rate limited")}, expected: 429}, + {name: "StatusError 500", err: &StatusError{StatusCode: 500, Err: errors.New("server error")}, expected: 500}, + {name: "wrapped StatusError", err: fmt.Errorf("outer: %w", &StatusError{StatusCode: 503, Err: errors.New("unavailable")}), expected: 503}, } for _, tt := range tests { @@ -159,20 +163,28 @@ func TestContextOverflowError(t *testing.T) { t.Run("wraps underlying error", func(t *testing.T) { t.Parallel() underlying := errors.New("prompt is too long: 226360 tokens > 200000 maximum") - ctxErr := &ContextOverflowError{Underlying: underlying} + ctxErr := NewContextOverflowError(underlying) assert.Contains(t, ctxErr.Error(), "context window overflow") assert.Contains(t, ctxErr.Error(), "prompt is too long") assert.ErrorIs(t, ctxErr, underlying) }) - t.Run("errors.As works", func(t *testing.T) { + t.Run("nil underlying returns fallback message", func(t *testing.T) { + t.Parallel() + ctxErr := NewContextOverflowError(nil) + assert.Equal(t, "context window overflow", ctxErr.Error()) + assert.NoError(t, ctxErr.Unwrap()) + }) + + t.Run("errors.As works through wrapping", func(t *testing.T) { t.Parallel() underlying := errors.New("test error") - wrapped := fmt.Errorf("all models failed: %w", &ContextOverflowError{Underlying: underlying}) + wrapped := fmt.Errorf("all models failed: %w", NewContextOverflowError(underlying)) var ctxErr *ContextOverflowError - assert.ErrorAs(t, wrapped, &ctxErr) + require.ErrorAs(t, wrapped, &ctxErr) + assert.Equal(t, underlying, ctxErr.Underlying) }) } @@ -207,13 +219,21 @@ func TestFormatError(t *testing.T) { t.Run("context overflow shows user-friendly message", func(t *testing.T) { t.Parallel() - err := &ContextOverflowError{Underlying: errors.New("prompt is too long")} + err := NewContextOverflowError(errors.New("prompt is too long")) msg := FormatError(err) assert.Contains(t, msg, "context window") assert.Contains(t, msg, "/compact") assert.NotContains(t, msg, "prompt is too long") }) + t.Run("wrapped context overflow shows user-friendly message", func(t *testing.T) { + t.Parallel() + err := fmt.Errorf("outer: %w", NewContextOverflowError(errors.New("prompt is too long"))) + msg := FormatError(err) + assert.Contains(t, msg, "context window") + assert.Contains(t, msg, "/compact") + }) + t.Run("generic error preserves message", func(t *testing.T) { t.Parallel() err := errors.New("authentication failed") @@ -404,4 +424,15 @@ func TestClassifyModelError(t *testing.T) { assert.True(t, rateLimited) assert.Equal(t, 15*time.Second, retryAfterOut) }) + + t.Run("ContextOverflowError wrapping a StatusError is not retryable", func(t *testing.T) { + t.Parallel() + // A 400 StatusError whose message also triggers context overflow detection + statusErr := &StatusError{StatusCode: 400, Err: errors.New("prompt is too long")} + ctxErr := NewContextOverflowError(statusErr) + retryable, rateLimited, retryAfter := ClassifyModelError(ctxErr) + assert.False(t, retryable, "context overflow should never be retryable") + assert.False(t, rateLimited) + assert.Equal(t, time.Duration(0), retryAfter) + }) } From 53b9c8f35664a7e7ba0bc42200b9918c1948533c Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:38:25 +0100 Subject: [PATCH 11/15] bedrock: add error wrapping in StatusError for retry classification Bedrock was the only provider not wrapping SDK errors in *modelerrors.StatusError. This meant ClassifyModelError had to fall back to regex parsing for Bedrock errors. Add wrapBedrockError() which extracts the HTTP status code and Retry-After header from smithyhttp.ResponseError, and call it from both the stream adapter Recv() and CreateChatCompletionStream(). Assisted-By: docker-agent --- go.mod | 2 +- pkg/model/provider/bedrock/adapter.go | 2 +- pkg/model/provider/bedrock/client.go | 2 +- pkg/model/provider/bedrock/wrap.go | 35 ++++++++ pkg/model/provider/bedrock/wrap_test.go | 110 ++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 pkg/model/provider/bedrock/wrap.go create mode 100644 pkg/model/provider/bedrock/wrap_test.go diff --git a/go.mod b/go.mod index ea7add929..9b542daa2 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/aws/aws-sdk-go-v2/credentials v1.19.12 github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.2 github.com/aws/aws-sdk-go-v2/service/sts v1.41.9 + github.com/aws/smithy-go v1.24.2 github.com/aymanbagabas/go-udiff v0.4.1 github.com/blevesearch/bleve/v2 v2.5.7 github.com/bmatcuk/doublestar/v4 v4.10.0 @@ -90,7 +91,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/signin v1.0.8 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 // indirect - github.com/aws/smithy-go v1.24.2 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/bits-and-blooms/bitset v1.24.4 // indirect diff --git a/pkg/model/provider/bedrock/adapter.go b/pkg/model/provider/bedrock/adapter.go index 8f29cd78e..0c6552fd9 100644 --- a/pkg/model/provider/bedrock/adapter.go +++ b/pkg/model/provider/bedrock/adapter.go @@ -71,7 +71,7 @@ func (a *streamAdapter) Recv() (chat.MessageStreamResponse, error) { // Check for errors if err := a.stream.Err(); err != nil { slog.Debug("Bedrock stream: error on channel close", "error", err) - return chat.MessageStreamResponse{}, err + return chat.MessageStreamResponse{}, wrapBedrockError(err) } // If we have a pending finish reason but never got metadata, emit it now if a.pendingFinishReason != "" { diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index a083f1d47..8086d41de 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -219,7 +219,7 @@ func (c *Client) CreateChatCompletionStream( output, err := c.bedrockClient.ConverseStream(ctx, input) if err != nil { slog.Error("Bedrock ConverseStream failed", "error", err) - return nil, fmt.Errorf("bedrock converse stream failed: %w", err) + return nil, wrapBedrockError(fmt.Errorf("bedrock converse stream failed: %w", err)) } trackUsage := c.ModelConfig.TrackUsage == nil || *c.ModelConfig.TrackUsage diff --git a/pkg/model/provider/bedrock/wrap.go b/pkg/model/provider/bedrock/wrap.go new file mode 100644 index 000000000..84c06c313 --- /dev/null +++ b/pkg/model/provider/bedrock/wrap.go @@ -0,0 +1,35 @@ +package bedrock + +import ( + "errors" + + smithyhttp "github.com/aws/smithy-go/transport/http" + + "github.com/docker/docker-agent/pkg/modelerrors" +) + +// wrapBedrockError wraps an AWS Bedrock SDK error in a *modelerrors.StatusError +// to carry HTTP status code metadata for the retry loop. +// The AWS SDK v2 exposes HTTP status via smithyhttp.ResponseError. +// Non-AWS errors (e.g., io.EOF, network errors) pass through unchanged. +func wrapBedrockError(err error) error { + if err == nil { + return nil + } + + var respErr *smithyhttp.ResponseError + if !errors.As(err, &respErr) { + return err + } + + var resp *smithyhttp.Response + if respErr.HTTPResponse() != nil { + resp = respErr.HTTPResponse() + } + + statusCode := respErr.HTTPStatusCode() + if resp != nil { + return modelerrors.WrapHTTPError(statusCode, resp.Response, err) + } + return modelerrors.WrapHTTPError(statusCode, nil, err) +} diff --git a/pkg/model/provider/bedrock/wrap_test.go b/pkg/model/provider/bedrock/wrap_test.go new file mode 100644 index 000000000..13f1a24d4 --- /dev/null +++ b/pkg/model/provider/bedrock/wrap_test.go @@ -0,0 +1,110 @@ +package bedrock + +import ( + "errors" + "fmt" + "net/http" + "testing" + "time" + + smithy "github.com/aws/smithy-go" + smithyhttp "github.com/aws/smithy-go/transport/http" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/modelerrors" +) + +func makeTestBedrockError(statusCode int, retryAfterValue string) error { + header := http.Header{} + if retryAfterValue != "" { + header.Set("Retry-After", retryAfterValue) + } + + httpResp := &http.Response{ + StatusCode: statusCode, + Header: header, + } + resp := &smithyhttp.Response{Response: httpResp} + + return &smithy.OperationError{ + ServiceID: "BedrockRuntime", + OperationName: "ConverseStream", + Err: &smithyhttp.ResponseError{ + Response: resp, + Err: &smithy.GenericAPIError{ + Code: "ThrottlingException", + Message: "Rate exceeded", + }, + }, + } +} + +func TestWrapBedrockError(t *testing.T) { + t.Parallel() + + t.Run("nil returns nil", func(t *testing.T) { + t.Parallel() + assert.NoError(t, wrapBedrockError(nil)) + }) + + t.Run("non-AWS error passes through unchanged", func(t *testing.T) { + t.Parallel() + orig := errors.New("some network error") + result := wrapBedrockError(orig) + assert.Equal(t, orig, result) + var se *modelerrors.StatusError + assert.NotErrorAs(t, result, &se) + }) + + t.Run("429 without Retry-After wraps with zero RetryAfter", func(t *testing.T) { + t.Parallel() + awsErr := makeTestBedrockError(429, "") + result := wrapBedrockError(awsErr) + var se *modelerrors.StatusError + require.ErrorAs(t, result, &se) + assert.Equal(t, 429, se.StatusCode) + assert.Equal(t, time.Duration(0), se.RetryAfter) + // Original error still accessible + assert.ErrorIs(t, result, awsErr) + }) + + t.Run("429 with Retry-After header sets RetryAfter", func(t *testing.T) { + t.Parallel() + awsErr := makeTestBedrockError(429, "20") + result := wrapBedrockError(awsErr) + var se *modelerrors.StatusError + require.ErrorAs(t, result, &se) + assert.Equal(t, 429, se.StatusCode) + assert.Equal(t, 20*time.Second, se.RetryAfter) + }) + + t.Run("500 wraps with correct status code", func(t *testing.T) { + t.Parallel() + awsErr := makeTestBedrockError(500, "") + result := wrapBedrockError(awsErr) + var se *modelerrors.StatusError + require.ErrorAs(t, result, &se) + assert.Equal(t, 500, se.StatusCode) + assert.Equal(t, time.Duration(0), se.RetryAfter) + }) + + t.Run("wrapped error is classified correctly by ClassifyModelError", func(t *testing.T) { + t.Parallel() + awsErr := makeTestBedrockError(429, "15") + result := wrapBedrockError(awsErr) + retryable, rateLimited, retryAfter := modelerrors.ClassifyModelError(result) + assert.False(t, retryable) + assert.True(t, rateLimited) + assert.Equal(t, 15*time.Second, retryAfter) + }) + + t.Run("wrapped in fmt.Errorf still classified correctly", func(t *testing.T) { + t.Parallel() + awsErr := makeTestBedrockError(500, "") + wrapped := fmt.Errorf("bedrock converse stream failed: %w", wrapBedrockError(awsErr)) + retryable, rateLimited, _ := modelerrors.ClassifyModelError(wrapped) + assert.True(t, retryable) + assert.False(t, rateLimited) + }) +} From e76494ef0abb5aab199350991c0b474ea16cfe37 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:44:15 +0100 Subject: [PATCH 12/15] modelerrors: remove unused deprecated wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CalculateBackoff and SleepWithContext were deprecated in favor of backoff.Calculate and backoff.SleepWithContext. No callers use the modelerrors versions — all code imports pkg/backoff directly. Remove the deprecated wrappers to reduce API surface. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 6ea294412..764325d3a 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -402,18 +402,6 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time return isRetryableModelError(err), false, 0 } -// CalculateBackoff returns the backoff duration for a given attempt (0-indexed). -// Uses exponential backoff with jitter. -// -// Deprecated: Use [backoff.Calculate] directly. -var CalculateBackoff = backoff.Calculate - -// SleepWithContext sleeps for the specified duration, returning early if context is cancelled. -// Returns true if the sleep completed, false if it was interrupted by context cancellation. -// -// Deprecated: Use [backoff.SleepWithContext] directly. -var SleepWithContext = backoff.SleepWithContext - // FormatError returns a user-friendly error message for model errors. // Context overflow gets a dedicated actionable message; all other errors // pass through their original message. From f69980672997ba3c32a359f1bbabcfb297368c8b Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:46:59 +0100 Subject: [PATCH 13/15] modelerrors: unexport ParseRetryAfterHeader Only called within the package (by WrapHTTPError and tests). Rename to parseRetryAfterHeader to reduce the public API surface. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 6 +++--- pkg/modelerrors/modelerrors_test.go | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 764325d3a..9f3abd0a1 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -55,7 +55,7 @@ func WrapHTTPError(statusCode int, resp *http.Response, err error) error { } var retryAfter time.Duration if resp != nil { - retryAfter = ParseRetryAfterHeader(resp.Header.Get("Retry-After")) + retryAfter = parseRetryAfterHeader(resp.Header.Get("Retry-After")) } return &StatusError{ StatusCode: statusCode, @@ -330,10 +330,10 @@ func isRetryableModelError(err error) bool { return false } -// ParseRetryAfterHeader parses a Retry-After header value. +// parseRetryAfterHeader parses a Retry-After header value. // Supports both seconds (integer) and HTTP-date formats per RFC 7231 §7.1.3. // Returns 0 if the value is empty, invalid, or results in a non-positive duration. -func ParseRetryAfterHeader(value string) time.Duration { +func parseRetryAfterHeader(value string) time.Duration { if value == "" { return 0 } diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 274fe99e4..5e2ed85b3 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -261,15 +261,15 @@ func TestParseRetryAfterHeader(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - got := ParseRetryAfterHeader(tt.value) - assert.Equal(t, tt.expected, got, "ParseRetryAfterHeader(%q)", tt.value) + got := parseRetryAfterHeader(tt.value) + assert.Equal(t, tt.expected, got, "parseRetryAfterHeader(%q)", tt.value) }) } t.Run("HTTP-date in the future", func(t *testing.T) { t.Parallel() future := time.Now().Add(10 * time.Second).UTC().Format(http.TimeFormat) - got := ParseRetryAfterHeader(future) + got := parseRetryAfterHeader(future) assert.Greater(t, got, 0*time.Second, "should return positive duration for future HTTP-date") assert.LessOrEqual(t, got, 11*time.Second, "should not exceed ~10s for near-future date") }) @@ -277,7 +277,7 @@ func TestParseRetryAfterHeader(t *testing.T) { t.Run("HTTP-date in the past", func(t *testing.T) { t.Parallel() past := time.Now().Add(-10 * time.Second).UTC().Format(http.TimeFormat) - got := ParseRetryAfterHeader(past) + got := parseRetryAfterHeader(past) assert.Equal(t, 0*time.Second, got, "should return 0 for past HTTP-date") }) } From dcfb3c76d5c2dbe5dcc058bda7de10f9872e93e1 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:47:36 +0100 Subject: [PATCH 14/15] modelerrors: unexport IsRetryableStatusCode and ExtractHTTPStatusCode Both are only used within the package. Rename to isRetryableStatusCode and extractHTTPStatusCode to reduce the public API surface. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 22 +++++++++++----------- pkg/modelerrors/modelerrors_test.go | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index 9f3abd0a1..b5c4fdaad 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -161,14 +161,14 @@ func IsContextOverflowError(err error) bool { // statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ") var statusCodeRegex = regexp.MustCompile(`\b([45]\d{2})\b`) -// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error +// extractHTTPStatusCode attempts to extract an HTTP status code from the error // using regex parsing of the error message. This is a fallback for providers // whose errors are not yet wrapped in *StatusError (the preferred path). // // The regex matches 4xx/5xx codes at word boundaries // (e.g., "429 Too Many Requests", "500 Internal Server Error"). // Returns 0 if no status code found. -func ExtractHTTPStatusCode(err error) int { +func extractHTTPStatusCode(err error) int { if err == nil { return 0 } @@ -191,7 +191,7 @@ func ExtractHTTPStatusCode(err error) int { return 0 } -// IsRetryableStatusCode determines if an HTTP status code is retryable. +// isRetryableStatusCode determines if an HTTP status code is retryable. // Retryable means we should retry the SAME model with exponential backoff. // // Retryable status codes: @@ -202,7 +202,7 @@ func ExtractHTTPStatusCode(err error) int { // Non-retryable status codes (skip to next model immediately): // - 429 (rate limit) - provider is explicitly telling us to back off // - 4xx client errors (400, 401, 403, 404) - won't get better with retry -func IsRetryableStatusCode(statusCode int) bool { +func isRetryableStatusCode(statusCode int) bool { switch statusCode { case 500, 502, 503, 504: // Server errors return true @@ -219,7 +219,7 @@ func IsRetryableStatusCode(statusCode int) bool { // retryablePatterns contains error message substrings that indicate a // transient/retryable failure. Numeric status codes (500, 502, etc.) are -// handled separately by ExtractHTTPStatusCode + IsRetryableStatusCode. +// handled separately by extractHTTPStatusCode + isRetryableStatusCode. var retryablePatterns = []string{ "timeout", // Generic timeout "connection reset", // Connection reset @@ -241,7 +241,7 @@ var retryablePatterns = []string{ // nonRetryablePatterns contains error message substrings that indicate a // permanent/non-retryable failure. Numeric status codes (429, 401, etc.) are -// handled separately by ExtractHTTPStatusCode. +// handled separately by extractHTTPStatusCode. var nonRetryablePatterns = []string{ "rate limit", // Rate limit message "too many requests", // Rate limit message @@ -292,8 +292,8 @@ func isRetryableModelError(err error) bool { } // First, try to extract HTTP status code from known SDK error types - if statusCode := ExtractHTTPStatusCode(err); statusCode != 0 { - retryable := IsRetryableStatusCode(statusCode) + if statusCode := extractHTTPStatusCode(err); statusCode != 0 { + retryable := isRetryableStatusCode(statusCode) slog.Debug("Classified error by status code", "status_code", statusCode, "retryable", retryable) @@ -387,17 +387,17 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time if statusErr.StatusCode == http.StatusTooManyRequests { return false, true, statusErr.RetryAfter } - return IsRetryableStatusCode(statusErr.StatusCode), false, 0 + return isRetryableStatusCode(statusErr.StatusCode), false, 0 } // Fallback: providers that don't yet wrap (e.g. Bedrock), or non-provider // errors (network, pattern matching). - statusCode := ExtractHTTPStatusCode(err) + statusCode := extractHTTPStatusCode(err) if statusCode == http.StatusTooManyRequests { return false, true, 0 // No Retry-After without StatusError } if statusCode != 0 { - return IsRetryableStatusCode(statusCode), false, 0 + return isRetryableStatusCode(statusCode), false, 0 } return isRetryableModelError(err), false, 0 } diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go index 5e2ed85b3..3df53d1bd 100644 --- a/pkg/modelerrors/modelerrors_test.go +++ b/pkg/modelerrors/modelerrors_test.go @@ -94,7 +94,7 @@ func TestExtractHTTPStatusCode(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - assert.Equal(t, tt.expected, ExtractHTTPStatusCode(tt.err), "ExtractHTTPStatusCode(%v)", tt.err) + assert.Equal(t, tt.expected, extractHTTPStatusCode(tt.err), "extractHTTPStatusCode(%v)", tt.err) }) } } @@ -118,7 +118,7 @@ func TestIsRetryableStatusCode(t *testing.T) { for _, tt := range tests { t.Run(fmt.Sprintf("status_%d", tt.statusCode), func(t *testing.T) { t.Parallel() - assert.Equal(t, tt.expected, IsRetryableStatusCode(tt.statusCode), "IsRetryableStatusCode(%d)", tt.statusCode) + assert.Equal(t, tt.expected, isRetryableStatusCode(tt.statusCode), "isRetryableStatusCode(%d)", tt.statusCode) }) } } From 20400137a0840cc162a39a3726bd8b2896098184 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Thu, 19 Mar 2026 19:48:34 +0100 Subject: [PATCH 15/15] modelerrors: remove unused MaxRetryAfterWait re-export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No callers reference modelerrors.MaxRetryAfterWait — all usage goes through backoff.MaxRetryAfterWait directly. Remove the constant and the now-unused backoff import. Assisted-By: docker-agent --- pkg/modelerrors/modelerrors.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go index b5c4fdaad..a24d8edb6 100644 --- a/pkg/modelerrors/modelerrors.go +++ b/pkg/modelerrors/modelerrors.go @@ -15,15 +15,6 @@ import ( "strconv" "strings" "time" - - "github.com/docker/docker-agent/pkg/backoff" -) - -// Backoff and retry-after configuration constants. -const ( - // MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent - // a misbehaving server from blocking the agent for an unreasonable amount of time. - MaxRetryAfterWait = backoff.MaxRetryAfterWait ) // StatusError wraps an HTTP API error with structured metadata for retry decisions.