From 8485131914a556e3b496222744e2d201ab3f16cf Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:24:09 +0100
Subject: [PATCH 01/15] modelerrors: remove numeric status-code patterns from
 string matching

ExtractHTTPStatusCode + IsRetryableStatusCode already handle numeric
status codes (500, 502, 429, 401, etc.) via regex with word boundaries.
Having the same codes as bare substrings in the retryable/non-retryable
pattern lists caused false positives (e.g., "processed 500 items"
would match). Remove the numeric patterns and keep only the descriptive
text patterns.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 4283262e7..34ecab17b 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -277,14 +277,12 @@ func IsRetryableModelError(err error) bool {
 	// Fall back to message-pattern matching for errors without structured status codes
 	errMsg := strings.ToLower(err.Error())
 
-	// Retryable patterns (5xx, timeout, network issues)
-	// NOTE: 429 is explicitly NOT in this list - we skip to next model for rate limits
+	// Retryable patterns (timeout, network issues)
+	// NOTE: Numeric status codes (500, 502, etc.) are already handled by
+	// ExtractHTTPStatusCode + IsRetryableStatusCode above; they are not
+	// duplicated here to avoid false positives on arbitrary numbers in
+	// error messages (e.g., "processed 500 items").
 	retryablePatterns := []string{
-		"500",                   // Internal server error
-		"502",                   // Bad gateway
-		"503",                   // Service unavailable
-		"504",                   // Gateway timeout
-		"408",                   // Request timeout
 		"timeout",               // Generic timeout
 		"connection reset",      // Connection reset
 		"connection refused",    // Connection refused
@@ -311,17 +309,14 @@ func IsRetryableModelError(err error) bool {
 	}
 
 	// Non-retryable patterns (skip to next model immediately)
+	// NOTE: Numeric status codes (429, 401, etc.) are already handled by
+	// ExtractHTTPStatusCode above; they are not duplicated here.
 	nonRetryablePatterns := []string{
-		"429",               // Rate limit - skip to next model
 		"rate limit",        // Rate limit message
 		"too many requests", // Rate limit message
 		"throttl",           // Throttling (rate limiting)
 		"quota",             // Quota exceeded
 		"capacity",          // Capacity issues (often rate-limit related)
-		"401",               // Unauthorized
-		"403",               // Forbidden
-		"404",               // Not found
-		"400",               // Bad request
 		"invalid",           // Invalid request
 		"unauthorized",      // Auth error
 		"authentication",    // Auth error

From f18f5b5546e309da18841b2cd4f8a38cb297d02b Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:25:00 +0100
Subject: [PATCH 02/15] modelerrors: unexport IsRetryableModelError

The only caller outside the package is ClassifyModelError itself
(as a fallback path). ClassifyModelError is the intended public API
and provides strictly more information (retryable + rateLimited +
retryAfter). Rename to isRetryableModelError to reduce API surface.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go      | 7 ++++---
 pkg/modelerrors/modelerrors_test.go | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 34ecab17b..ed1be15c3 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -220,7 +220,8 @@ func IsRetryableStatusCode(statusCode int) bool {
 	}
 }
 
-// IsRetryableModelError determines if an error should trigger a retry of the SAME model.
+// isRetryableModelError determines if an error should trigger a retry of the SAME model.
+// It is used as a fallback by ClassifyModelError when no *StatusError is present.
 //
 // Retryable errors (retry same model with backoff):
 // - Network timeouts
@@ -238,7 +239,7 @@ func IsRetryableStatusCode(statusCode int) bool {
 //
 // The key distinction is: 429 means "you're calling too fast, slow down" which
 // suggests we should try a different model, not keep hammering the same one.
-func IsRetryableModelError(err error) bool {
+func isRetryableModelError(err error) bool {
 	if err == nil {
 		return false
 	}
@@ -404,7 +405,7 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time
 	if statusCode != 0 {
 		return IsRetryableStatusCode(statusCode), false, 0
 	}
-	return IsRetryableModelError(err), false, 0
+	return isRetryableModelError(err), false, 0
 }
 
 // CalculateBackoff returns the backoff duration for a given attempt (0-indexed).
diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 361c89f34..211477108 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -66,7 +66,7 @@ func TestIsRetryableModelError(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			assert.Equal(t, tt.expected, IsRetryableModelError(tt.err), "IsRetryableModelError(%v)", tt.err)
+			assert.Equal(t, tt.expected, isRetryableModelError(tt.err), "isRetryableModelError(%v)", tt.err)
 		})
 	}
 }
@@ -252,7 +252,7 @@ func TestIsRetryableModelError_ContextOverflow(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			assert.False(t, IsRetryableModelError(tt.err), "context overflow errors should not be retryable: %v", tt.err)
+			assert.False(t, isRetryableModelError(tt.err), "context overflow errors should not be retryable: %v", tt.err)
 		})
 	}
 }

From 747c989df64955860110b92eaad4925c65d9c144 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:26:01 +0100
Subject: [PATCH 03/15] modelerrors: remove provider SDK imports from
 ExtractHTTPStatusCode

Now that all providers (Anthropic, OpenAI, Gemini) wrap their errors in
*StatusError via WrapHTTPError, the provider-specific type assertions
(anthropic.Error, genai.APIError) in ExtractHTTPStatusCode are redundant.
ClassifyModelError already checks *StatusError first.

Replace the SDK-specific checks with a *StatusError check, keeping the
regex fallback for unwrapped errors. This removes the coupling between
the error classification package and provider-specific SDKs.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index ed1be15c3..be640453f 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -16,9 +16,6 @@ import (
 	"strconv"
 	"strings"
 	"time"
-
-	"github.com/anthropics/anthropic-sdk-go"
-	"google.golang.org/genai"
 )
 
 // Backoff and retry-after configuration constants.
@@ -161,27 +158,25 @@ func IsContextOverflowError(err error) bool {
 // statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ")
 var statusCodeRegex = regexp.MustCompile(`\b([45]\d{2})\b`)
 
-// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error.
-// Checks in order:
-// 1. Known provider SDK error types (Anthropic, Gemini)
-// 2. Regex parsing of error message (fallback for OpenAI and others)
+// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error
+// using regex parsing of the error message. This is a fallback for providers
+// whose errors are not yet wrapped in *StatusError (the preferred path).
+//
+// The regex matches 4xx/5xx codes at word boundaries
+// (e.g., "429 Too Many Requests", "500 Internal Server Error").
 // Returns 0 if no status code found.
 func ExtractHTTPStatusCode(err error) int {
 	if err == nil {
 		return 0
 	}
 
-	// Check Anthropic SDK error type (public)
-	if anthropicErr, ok := errors.AsType[*anthropic.Error](err); ok {
-		return anthropicErr.StatusCode
-	}
-
-	// Check Google Gemini SDK error type (public)
-	if geminiErr, ok := errors.AsType[*genai.APIError](err); ok {
-		return geminiErr.Code
+	// Check for *StatusError first (preferred structured path).
+	var statusErr *StatusError
+	if errors.As(err, &statusErr) {
+		return statusErr.StatusCode
 	}
 
-	// For other providers (OpenAI, etc.), extract from error message using regex
+	// Fallback: extract from error message using regex.
 	// OpenAI SDK error format: `POST "/v1/...": 429 Too Many Requests {...}`
 	matches := statusCodeRegex.FindStringSubmatch(err.Error())
 	if len(matches) >= 2 {

From c303c34dfa6ef9d9992974ff8c7c7bbd573ac3e0 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:26:41 +0100
Subject: [PATCH 04/15] modelerrors: use strconv.Atoi instead of fmt.Sscanf in
 ExtractHTTPStatusCode

The regex match is a simple integer string. strconv.Atoi is simpler,
faster, and already imported. This also removes the fmt import.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index be640453f..0dae04fad 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -7,7 +7,6 @@ package modelerrors
 import (
 	"context"
 	"errors"
-	"fmt"
 	"log/slog"
 	"math/rand"
 	"net"
@@ -180,8 +179,7 @@ func ExtractHTTPStatusCode(err error) int {
 	// OpenAI SDK error format: `POST "/v1/...": 429 Too Many Requests {...}`
 	matches := statusCodeRegex.FindStringSubmatch(err.Error())
 	if len(matches) >= 2 {
-		var code int
-		if _, err := fmt.Sscanf(matches[1], "%d", &code); err == nil {
+		if code, err := strconv.Atoi(matches[1]); err == nil {
 			return code
 		}
 	}

From f182f8de4329b9e9af132b811159d4d3c7713f08 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:27:09 +0100
Subject: [PATCH 05/15] modelerrors: switch from math/rand to math/rand/v2

math/rand/v2 is the preferred package since Go 1.22. The API is
compatible (rand.Float64 exists in both).

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 0dae04fad..44b0fa03f 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -8,7 +8,7 @@ import (
 	"context"
 	"errors"
 	"log/slog"
-	"math/rand"
+	"math/rand/v2"
 	"net"
 	"net/http"
 	"regexp"

From d13541e364293d4870f3507737e22b2370f6b0a2 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:27:56 +0100
Subject: [PATCH 06/15] modelerrors: promote retryable/non-retryable patterns
 to package-level vars

The retryablePatterns and nonRetryablePatterns slices were local
variables inside isRetryableModelError, causing a new allocation on
every call. Promote them to package-level vars (like
contextOverflowPatterns) so they are allocated once.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 77 ++++++++++++++++------------------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 44b0fa03f..ed7ae6bf1 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -213,6 +213,43 @@ func IsRetryableStatusCode(statusCode int) bool {
 	}
 }
 
+// retryablePatterns contains error message substrings that indicate a
+// transient/retryable failure. Numeric status codes (500, 502, etc.) are
+// handled separately by ExtractHTTPStatusCode + IsRetryableStatusCode.
+var retryablePatterns = []string{
+	"timeout",               // Generic timeout
+	"connection reset",      // Connection reset
+	"connection refused",    // Connection refused
+	"no such host",          // DNS failure
+	"temporary failure",     // Temporary failure
+	"service unavailable",   // Service unavailable
+	"internal server error", // Server error
+	"bad gateway",           // Gateway error
+	"gateway timeout",       // Gateway timeout
+	"overloaded",            // Server overloaded
+	"overloaded_error",      // Server overloaded
+	"other side closed",     // Connection closed by peer
+	"fetch failed",          // Network fetch failure
+	"reset before headers",  // Connection reset before headers received
+	"upstream connect",      // Upstream connection error
+	"internal_error",        // HTTP/2 INTERNAL_ERROR (stream-level)
+}
+
+// nonRetryablePatterns contains error message substrings that indicate a
+// permanent/non-retryable failure. Numeric status codes (429, 401, etc.) are
+// handled separately by ExtractHTTPStatusCode.
+var nonRetryablePatterns = []string{
+	"rate limit",        // Rate limit message
+	"too many requests", // Rate limit message
+	"throttl",           // Throttling (rate limiting)
+	"quota",             // Quota exceeded
+	"capacity",          // Capacity issues (often rate-limit related)
+	"invalid",           // Invalid request
+	"unauthorized",      // Auth error
+	"authentication",    // Auth error
+	"api key",           // API key error
+}
+
 // isRetryableModelError determines if an error should trigger a retry of the SAME model.
 // It is used as a fallback by ClassifyModelError when no *StatusError is present.
 //
@@ -268,33 +305,8 @@ func isRetryableModelError(err error) bool {
 		}
 	}
 
-	// Fall back to message-pattern matching for errors without structured status codes
 	errMsg := strings.ToLower(err.Error())
 
-	// Retryable patterns (timeout, network issues)
-	// NOTE: Numeric status codes (500, 502, etc.) are already handled by
-	// ExtractHTTPStatusCode + IsRetryableStatusCode above; they are not
-	// duplicated here to avoid false positives on arbitrary numbers in
-	// error messages (e.g., "processed 500 items").
-	retryablePatterns := []string{
-		"timeout",               // Generic timeout
-		"connection reset",      // Connection reset
-		"connection refused",    // Connection refused
-		"no such host",          // DNS failure
-		"temporary failure",     // Temporary failure
-		"service unavailable",   // Service unavailable
-		"internal server error", // Server error
-		"bad gateway",           // Gateway error
-		"gateway timeout",       // Gateway timeout
-		"overloaded",            // Server overloaded
-		"overloaded_error",      // Server overloaded
-		"other side closed",     // Connection closed by peer
-		"fetch failed",          // Network fetch failure
-		"reset before headers",  // Connection reset before headers received
-		"upstream connect",      // Upstream connection error
-		"internal_error",        // HTTP/2 INTERNAL_ERROR (stream-level)
-	}
-
 	for _, pattern := range retryablePatterns {
 		if strings.Contains(errMsg, pattern) {
 			slog.Debug("Matched retryable error pattern", "pattern", pattern)
@@ -302,21 +314,6 @@ func isRetryableModelError(err error) bool {
 		}
 	}
 
-	// Non-retryable patterns (skip to next model immediately)
-	// NOTE: Numeric status codes (429, 401, etc.) are already handled by
-	// ExtractHTTPStatusCode above; they are not duplicated here.
-	nonRetryablePatterns := []string{
-		"rate limit",        // Rate limit message
-		"too many requests", // Rate limit message
-		"throttl",           // Throttling (rate limiting)
-		"quota",             // Quota exceeded
-		"capacity",          // Capacity issues (often rate-limit related)
-		"invalid",           // Invalid request
-		"unauthorized",      // Auth error
-		"authentication",    // Auth error
-		"api key",           // API key error
-	}
-
 	for _, pattern := range nonRetryablePatterns {
 		if strings.Contains(errMsg, pattern) {
 			slog.Debug("Matched non-retryable error pattern", "pattern", pattern)

From 4fd44ed62decd1ee953aa0654a9685f171865cad Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:31:49 +0100
Subject: [PATCH 07/15] modelerrors: move backoff and sleep utilities to
 pkg/backoff

SleepWithContext and CalculateBackoff are general-purpose utilities not
specific to model errors. Move them to a new pkg/backoff package for
clarity and reusability. MaxRetryAfterWait moves there too.

Deprecated wrapper variables are left in modelerrors to maintain
backward compatibility. The runtime/fallback package now imports
pkg/backoff directly.

Assisted-By: docker-agent
---
 pkg/backoff/backoff.go              | 60 +++++++++++++++++++++++++
 pkg/backoff/backoff_test.go         | 70 +++++++++++++++++++++++++++++
 pkg/modelerrors/modelerrors.go      | 49 ++++----------------
 pkg/modelerrors/modelerrors_test.go | 60 -------------------------
 pkg/runtime/fallback.go             | 21 ++++-----
 5 files changed, 150 insertions(+), 110 deletions(-)
 create mode 100644 pkg/backoff/backoff.go
 create mode 100644 pkg/backoff/backoff_test.go

diff --git a/pkg/backoff/backoff.go b/pkg/backoff/backoff.go
new file mode 100644
index 000000000..722fb1d4e
--- /dev/null
+++ b/pkg/backoff/backoff.go
@@ -0,0 +1,60 @@
+// Package backoff provides exponential backoff calculation and
+// context-aware sleep utilities.
+package backoff
+
+import (
+	"context"
+	"math/rand/v2"
+	"time"
+)
+
+// Configuration constants for exponential backoff.
+const (
+	baseDelay = 200 * time.Millisecond
+	maxDelay  = 2 * time.Second
+	factor    = 2.0
+	jitter    = 0.1
+
+	// MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent
+	// a misbehaving server from blocking the agent for an unreasonable amount of time.
+	MaxRetryAfterWait = 60 * time.Second
+)
+
+// Calculate returns the backoff duration for a given attempt (0-indexed).
+// Uses exponential backoff with jitter.
+func Calculate(attempt int) time.Duration {
+	if attempt < 0 {
+		attempt = 0
+	}
+
+	// Calculate exponential delay
+	delay := float64(baseDelay)
+	for range attempt {
+		delay *= factor
+	}
+
+	// Cap at max delay
+	if delay > float64(maxDelay) {
+		delay = float64(maxDelay)
+	}
+
+	// Add jitter (±10%)
+	j := delay * jitter * (2*rand.Float64() - 1)
+	delay += j
+
+	return time.Duration(delay)
+}
+
+// SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
+// Returns true if the sleep completed, false if it was interrupted by context cancellation.
+func SleepWithContext(ctx context.Context, d time.Duration) bool {
+	timer := time.NewTimer(d)
+	defer timer.Stop()
+
+	select {
+	case <-timer.C:
+		return true
+	case <-ctx.Done():
+		return false
+	}
+}
diff --git a/pkg/backoff/backoff_test.go b/pkg/backoff/backoff_test.go
new file mode 100644
index 000000000..e0a61d9c5
--- /dev/null
+++ b/pkg/backoff/backoff_test.go
@@ -0,0 +1,70 @@
+package backoff
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestCalculate(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		attempt     int
+		minExpected time.Duration
+		maxExpected time.Duration
+	}{
+		{attempt: 0, minExpected: 180 * time.Millisecond, maxExpected: 220 * time.Millisecond},
+		{attempt: 1, minExpected: 360 * time.Millisecond, maxExpected: 440 * time.Millisecond},
+		{attempt: 2, minExpected: 720 * time.Millisecond, maxExpected: 880 * time.Millisecond},
+		{attempt: 3, minExpected: 1440 * time.Millisecond, maxExpected: 1760 * time.Millisecond},
+		{attempt: 10, minExpected: 1800 * time.Millisecond, maxExpected: 2200 * time.Millisecond}, // capped at 2s
+	}
+
+	for _, tt := range tests {
+		t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) {
+			t.Parallel()
+			b := Calculate(tt.attempt)
+			assert.GreaterOrEqual(t, b, tt.minExpected, "backoff should be at least %v", tt.minExpected)
+			assert.LessOrEqual(t, b, tt.maxExpected, "backoff should be at most %v", tt.maxExpected)
+		})
+	}
+
+	t.Run("negative attempt treated as 0", func(t *testing.T) {
+		t.Parallel()
+		b := Calculate(-1)
+		assert.GreaterOrEqual(t, b, 180*time.Millisecond)
+		assert.LessOrEqual(t, b, 220*time.Millisecond)
+	})
+}
+
+func TestSleepWithContext(t *testing.T) {
+	t.Parallel()
+
+	t.Run("completes normally", func(t *testing.T) {
+		t.Parallel()
+		ctx := t.Context()
+		start := time.Now()
+		completed := SleepWithContext(ctx, 10*time.Millisecond)
+		elapsed := time.Since(start)
+
+		assert.True(t, completed, "should complete normally")
+		assert.GreaterOrEqual(t, elapsed, 10*time.Millisecond)
+	})
+
+	t.Run("interrupted by context", func(t *testing.T) {
+		t.Parallel()
+		ctx, cancel := context.WithCancel(t.Context())
+		time.AfterFunc(10*time.Millisecond, cancel)
+
+		start := time.Now()
+		completed := SleepWithContext(ctx, 1*time.Second)
+		elapsed := time.Since(start)
+
+		assert.False(t, completed, "should be interrupted")
+		assert.Less(t, elapsed, 100*time.Millisecond, "should return quickly after cancel")
+	})
+}
diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index ed7ae6bf1..41544e921 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -8,25 +8,21 @@ import (
 	"context"
 	"errors"
 	"log/slog"
-	"math/rand/v2"
 	"net"
 	"net/http"
 	"regexp"
 	"strconv"
 	"strings"
 	"time"
+
+	"github.com/docker/docker-agent/pkg/backoff"
 )
 
 // Backoff and retry-after configuration constants.
 const (
-	backoffBaseDelay = 200 * time.Millisecond
-	backoffMaxDelay  = 2 * time.Second
-	backoffFactor    = 2.0
-	backoffJitter    = 0.1
-
 	// MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent
 	// a misbehaving server from blocking the agent for an unreasonable amount of time.
-	MaxRetryAfterWait = 60 * time.Second
+	MaxRetryAfterWait = backoff.MaxRetryAfterWait
 )
 
 // StatusError wraps an HTTP API error with structured metadata for retry decisions.
@@ -400,42 +396,15 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time
 
 // CalculateBackoff returns the backoff duration for a given attempt (0-indexed).
 // Uses exponential backoff with jitter.
-func CalculateBackoff(attempt int) time.Duration {
-	if attempt < 0 {
-		attempt = 0
-	}
-
-	// Calculate exponential delay
-	delay := float64(backoffBaseDelay)
-	for range attempt {
-		delay *= backoffFactor
-	}
-
-	// Cap at max delay
-	if delay > float64(backoffMaxDelay) {
-		delay = float64(backoffMaxDelay)
-	}
-
-	// Add jitter (±10%)
-	jitter := delay * backoffJitter * (2*rand.Float64() - 1)
-	delay += jitter
-
-	return time.Duration(delay)
-}
+//
+// Deprecated: Use [backoff.Calculate] directly.
+var CalculateBackoff = backoff.Calculate
 
 // SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
 // Returns true if the sleep completed, false if it was interrupted by context cancellation.
-func SleepWithContext(ctx context.Context, d time.Duration) bool {
-	timer := time.NewTimer(d)
-	defer timer.Stop()
-
-	select {
-	case <-timer.C:
-		return true
-	case <-ctx.Done():
-		return false
-	}
-}
+//
+// Deprecated: Use [backoff.SleepWithContext] directly.
+var SleepWithContext = backoff.SleepWithContext
 
 // FormatError returns a user-friendly error message for model errors.
 // Context overflow gets a dedicated actionable message; all other errors
diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 211477108..7a9ab3922 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -71,66 +71,6 @@ func TestIsRetryableModelError(t *testing.T) {
 	}
 }
 
-func TestCalculateBackoff(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		attempt     int
-		minExpected time.Duration
-		maxExpected time.Duration
-	}{
-		{attempt: 0, minExpected: 180 * time.Millisecond, maxExpected: 220 * time.Millisecond},
-		{attempt: 1, minExpected: 360 * time.Millisecond, maxExpected: 440 * time.Millisecond},
-		{attempt: 2, minExpected: 720 * time.Millisecond, maxExpected: 880 * time.Millisecond},
-		{attempt: 3, minExpected: 1440 * time.Millisecond, maxExpected: 1760 * time.Millisecond},
-		{attempt: 10, minExpected: 1800 * time.Millisecond, maxExpected: 2200 * time.Millisecond}, // capped at 2s
-	}
-
-	for _, tt := range tests {
-		t.Run(fmt.Sprintf("attempt_%d", tt.attempt), func(t *testing.T) {
-			t.Parallel()
-			backoff := CalculateBackoff(tt.attempt)
-			assert.GreaterOrEqual(t, backoff, tt.minExpected, "backoff should be at least %v", tt.minExpected)
-			assert.LessOrEqual(t, backoff, tt.maxExpected, "backoff should be at most %v", tt.maxExpected)
-		})
-	}
-
-	t.Run("negative attempt treated as 0", func(t *testing.T) {
-		t.Parallel()
-		backoff := CalculateBackoff(-1)
-		assert.GreaterOrEqual(t, backoff, 180*time.Millisecond)
-		assert.LessOrEqual(t, backoff, 220*time.Millisecond)
-	})
-}
-
-func TestSleepWithContext(t *testing.T) {
-	t.Parallel()
-
-	t.Run("completes normally", func(t *testing.T) {
-		t.Parallel()
-		ctx := t.Context()
-		start := time.Now()
-		completed := SleepWithContext(ctx, 10*time.Millisecond)
-		elapsed := time.Since(start)
-
-		assert.True(t, completed, "should complete normally")
-		assert.GreaterOrEqual(t, elapsed, 10*time.Millisecond)
-	})
-
-	t.Run("interrupted by context", func(t *testing.T) {
-		t.Parallel()
-		ctx, cancel := context.WithCancel(t.Context())
-		time.AfterFunc(10*time.Millisecond, cancel)
-
-		start := time.Now()
-		completed := SleepWithContext(ctx, 1*time.Second)
-		elapsed := time.Since(start)
-
-		assert.False(t, completed, "should be interrupted")
-		assert.Less(t, elapsed, 100*time.Millisecond, "should return quickly after cancel")
-	})
-}
-
 func TestExtractHTTPStatusCode(t *testing.T) {
 	t.Parallel()
 
diff --git a/pkg/runtime/fallback.go b/pkg/runtime/fallback.go
index 546b3e32f..0adec41a6 100644
--- a/pkg/runtime/fallback.go
+++ b/pkg/runtime/fallback.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/docker/docker-agent/pkg/agent"
+	"github.com/docker/docker-agent/pkg/backoff"
 	"github.com/docker/docker-agent/pkg/chat"
 	"github.com/docker/docker-agent/pkg/model/provider"
 	"github.com/docker/docker-agent/pkg/modelerrors"
@@ -69,12 +70,12 @@ func logFallbackAttempt(agentName string, model modelWithFallback, attempt, maxR
 }
 
 // logRetryBackoff logs when we're backing off before a retry
-func logRetryBackoff(agentName, modelID string, attempt int, backoff time.Duration) {
+func logRetryBackoff(agentName, modelID string, attempt int, backoffDelay time.Duration) {
 	slog.Debug("Backing off before retry",
 		"agent", agentName,
 		"model", modelID,
 		"attempt", attempt+1,
-		"backoff", backoff)
+		"backoff", backoffDelay)
 }
 
 // getCooldownState returns the current cooldown state for an agent (thread-safe).
@@ -222,9 +223,9 @@ func (r *LocalRuntime) tryModelWithFallback(
 
 			// Apply backoff before retry (not on first attempt of each model)
 			if attempt > 0 {
-				backoff := modelerrors.CalculateBackoff(attempt - 1)
-				logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoff)
-				if !modelerrors.SleepWithContext(ctx, backoff) {
+				backoffDelay := backoff.Calculate(attempt - 1)
+				logRetryBackoff(a.Name(), modelEntry.provider.ID(), attempt, backoffDelay)
+				if !backoff.SleepWithContext(ctx, backoffDelay) {
 					return streamResult{}, nil, ctx.Err()
 				}
 			}
@@ -382,14 +383,14 @@ func (r *LocalRuntime) handleModelError(
 		// Opt-in enabled, no fallbacks → retry same model after honouring Retry-After (or backoff).
 		waitDuration := retryAfter
 		if waitDuration <= 0 {
-			waitDuration = modelerrors.CalculateBackoff(attempt)
-		} else if waitDuration > modelerrors.MaxRetryAfterWait {
+			waitDuration = backoff.Calculate(attempt)
+		} else if waitDuration > backoff.MaxRetryAfterWait {
 			slog.Warn("Retry-After exceeds maximum, capping",
 				"agent", a.Name(),
 				"model", modelEntry.provider.ID(),
 				"retry_after", retryAfter,
-				"max", modelerrors.MaxRetryAfterWait)
-			waitDuration = modelerrors.MaxRetryAfterWait
+				"max", backoff.MaxRetryAfterWait)
+			waitDuration = backoff.MaxRetryAfterWait
 		}
 		slog.Warn("Rate limited, retrying (opt-in enabled)",
 			"agent", a.Name(),
@@ -398,7 +399,7 @@ func (r *LocalRuntime) handleModelError(
 			"wait", waitDuration,
 			"retry_after_from_header", retryAfter > 0,
 			"error", err)
-		if !modelerrors.SleepWithContext(ctx, waitDuration) {
+		if !backoff.SleepWithContext(ctx, waitDuration) {
 			return retryDecisionReturn
 		}
 		return retryDecisionContinue

From 744db2501d5857ba9e57717e4f62f87ab4b0fc6c Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:33:01 +0100
Subject: [PATCH 08/15] modelerrors: include HTTP status code in
 StatusError.Error()

StatusError.Error() previously delegated directly to the wrapped error,
discarding the status code. This made debugging harder since log lines
wouldn't reveal the HTTP status. Now Error() returns a message like
"HTTP 429: rate limit exceeded" for better observability.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go      | 3 ++-
 pkg/modelerrors/modelerrors_test.go | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 41544e921..c53465b30 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -7,6 +7,7 @@ package modelerrors
 import (
 	"context"
 	"errors"
+	"fmt"
 	"log/slog"
 	"net"
 	"net/http"
@@ -38,7 +39,7 @@ type StatusError struct {
 }
 
 func (e *StatusError) Error() string {
-	return e.Err.Error()
+	return fmt.Sprintf("HTTP %d: %s", e.StatusCode, e.Err.Error())
 }
 
 func (e *StatusError) Unwrap() error {
diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 7a9ab3922..17247538f 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -265,11 +265,11 @@ func TestParseRetryAfterHeader(t *testing.T) {
 func TestStatusError(t *testing.T) {
 	t.Parallel()
 
-	t.Run("Error() delegates to wrapped error", func(t *testing.T) {
+	t.Run("Error() includes status code and wrapped message", func(t *testing.T) {
 		t.Parallel()
 		underlying := errors.New("rate limit exceeded")
 		se := &StatusError{StatusCode: 429, Err: underlying}
-		assert.Equal(t, underlying.Error(), se.Error())
+		assert.Equal(t, "HTTP 429: rate limit exceeded", se.Error())
 	})
 
 	t.Run("Unwrap() allows errors.Is traversal", func(t *testing.T) {
@@ -315,7 +315,7 @@ func TestWrapHTTPError(t *testing.T) {
 		require.ErrorAs(t, result, &se)
 		assert.Equal(t, 429, se.StatusCode)
 		assert.Equal(t, time.Duration(0), se.RetryAfter)
-		assert.Equal(t, origErr.Error(), se.Error())
+		assert.Equal(t, "HTTP 429: rate limited", se.Error())
 	})
 
 	t.Run("429 with Retry-After header sets RetryAfter", func(t *testing.T) {

From 6512b13dd406ed0f062de1186f21b53e05c20561 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:33:48 +0100
Subject: [PATCH 09/15] modelerrors: add NewContextOverflowError constructor

Callers were constructing &ContextOverflowError{Underlying: err}
directly. Adding a constructor improves encapsulation and allows
future field additions without breaking callers. Update
runtime/fallback.go to use the constructor.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 7 +++++++
 pkg/runtime/fallback.go        | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index c53465b30..6ea294412 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -84,6 +84,13 @@ type ContextOverflowError struct {
 	Underlying error
 }
 
+// NewContextOverflowError creates a ContextOverflowError wrapping the given
+// underlying error. Use this constructor rather than building the struct
+// directly so that future field additions don't break callers.
+func NewContextOverflowError(underlying error) *ContextOverflowError {
+	return &ContextOverflowError{Underlying: underlying}
+}
+
 func (e *ContextOverflowError) Error() string {
 	if e.Underlying == nil {
 		return "context window overflow"
diff --git a/pkg/runtime/fallback.go b/pkg/runtime/fallback.go
index 0adec41a6..b6eb347d6 100644
--- a/pkg/runtime/fallback.go
+++ b/pkg/runtime/fallback.go
@@ -326,7 +326,7 @@ func (r *LocalRuntime) tryModelWithFallback(
 	if lastErr != nil {
 		wrapped := fmt.Errorf("all models failed: %w", lastErr)
 		if modelerrors.IsContextOverflowError(lastErr) {
-			return streamResult{}, nil, &modelerrors.ContextOverflowError{Underlying: wrapped}
+			return streamResult{}, nil, modelerrors.NewContextOverflowError(wrapped)
 		}
 		return streamResult{}, nil, wrapped
 	}

From 4a94178f9d2328a5cd30d91c9f18d2fe0b84e046 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:35:24 +0100
Subject: [PATCH 10/15] modelerrors: add missing test coverage

Add tests for:
- ClassifyModelError with ContextOverflowError wrapping a StatusError
- ExtractHTTPStatusCode with *StatusError in the error chain
- FormatError with a wrapped ContextOverflowError (via fmt.Errorf)
- NewContextOverflowError constructor usage
- ContextOverflowError with nil Underlying (Unwrap returns nil)

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors_test.go | 41 +++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 17247538f..274fe99e4 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -85,6 +85,10 @@ func TestExtractHTTPStatusCode(t *testing.T) {
 		{name: "502 in message", err: errors.New("502 bad gateway"), expected: 502},
 		{name: "401 in message", err: errors.New("401 unauthorized"), expected: 401},
 		{name: "no status code", err: errors.New("connection refused"), expected: 0},
+		// StatusError structural path
+		{name: "StatusError 429", err: &StatusError{StatusCode: 429, Err: errors.New("rate limited")}, expected: 429},
+		{name: "StatusError 500", err: &StatusError{StatusCode: 500, Err: errors.New("server error")}, expected: 500},
+		{name: "wrapped StatusError", err: fmt.Errorf("outer: %w", &StatusError{StatusCode: 503, Err: errors.New("unavailable")}), expected: 503},
 	}
 
 	for _, tt := range tests {
@@ -159,20 +163,28 @@ func TestContextOverflowError(t *testing.T) {
 	t.Run("wraps underlying error", func(t *testing.T) {
 		t.Parallel()
 		underlying := errors.New("prompt is too long: 226360 tokens > 200000 maximum")
-		ctxErr := &ContextOverflowError{Underlying: underlying}
+		ctxErr := NewContextOverflowError(underlying)
 
 		assert.Contains(t, ctxErr.Error(), "context window overflow")
 		assert.Contains(t, ctxErr.Error(), "prompt is too long")
 		assert.ErrorIs(t, ctxErr, underlying)
 	})
 
-	t.Run("errors.As works", func(t *testing.T) {
+	t.Run("nil underlying returns fallback message", func(t *testing.T) {
+		t.Parallel()
+		ctxErr := NewContextOverflowError(nil)
+		assert.Equal(t, "context window overflow", ctxErr.Error())
+		assert.NoError(t, ctxErr.Unwrap())
+	})
+
+	t.Run("errors.As works through wrapping", func(t *testing.T) {
 		t.Parallel()
 		underlying := errors.New("test error")
-		wrapped := fmt.Errorf("all models failed: %w", &ContextOverflowError{Underlying: underlying})
+		wrapped := fmt.Errorf("all models failed: %w", NewContextOverflowError(underlying))
 
 		var ctxErr *ContextOverflowError
-		assert.ErrorAs(t, wrapped, &ctxErr)
+		require.ErrorAs(t, wrapped, &ctxErr)
+		assert.Equal(t, underlying, ctxErr.Underlying)
 	})
 }
 
@@ -207,13 +219,21 @@ func TestFormatError(t *testing.T) {
 
 	t.Run("context overflow shows user-friendly message", func(t *testing.T) {
 		t.Parallel()
-		err := &ContextOverflowError{Underlying: errors.New("prompt is too long")}
+		err := NewContextOverflowError(errors.New("prompt is too long"))
 		msg := FormatError(err)
 		assert.Contains(t, msg, "context window")
 		assert.Contains(t, msg, "/compact")
 		assert.NotContains(t, msg, "prompt is too long")
 	})
 
+	t.Run("wrapped context overflow shows user-friendly message", func(t *testing.T) {
+		t.Parallel()
+		err := fmt.Errorf("outer: %w", NewContextOverflowError(errors.New("prompt is too long")))
+		msg := FormatError(err)
+		assert.Contains(t, msg, "context window")
+		assert.Contains(t, msg, "/compact")
+	})
+
 	t.Run("generic error preserves message", func(t *testing.T) {
 		t.Parallel()
 		err := errors.New("authentication failed")
@@ -404,4 +424,15 @@ func TestClassifyModelError(t *testing.T) {
 		assert.True(t, rateLimited)
 		assert.Equal(t, 15*time.Second, retryAfterOut)
 	})
+
+	t.Run("ContextOverflowError wrapping a StatusError is not retryable", func(t *testing.T) {
+		t.Parallel()
+		// A 400 StatusError whose message also triggers context overflow detection
+		statusErr := &StatusError{StatusCode: 400, Err: errors.New("prompt is too long")}
+		ctxErr := NewContextOverflowError(statusErr)
+		retryable, rateLimited, retryAfter := ClassifyModelError(ctxErr)
+		assert.False(t, retryable, "context overflow should never be retryable")
+		assert.False(t, rateLimited)
+		assert.Equal(t, time.Duration(0), retryAfter)
+	})
 }

From 53b9c8f35664a7e7ba0bc42200b9918c1948533c Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:38:25 +0100
Subject: [PATCH 11/15] bedrock: add error wrapping in StatusError for retry
 classification

Bedrock was the only provider not wrapping SDK errors in
*modelerrors.StatusError. This meant ClassifyModelError had to fall back
to regex parsing for Bedrock errors.

Add wrapBedrockError() which extracts the HTTP status code and
Retry-After header from smithyhttp.ResponseError, and call it from both
the stream adapter Recv() and CreateChatCompletionStream().

Assisted-By: docker-agent
---
 go.mod                                  |   2 +-
 pkg/model/provider/bedrock/adapter.go   |   2 +-
 pkg/model/provider/bedrock/client.go    |   2 +-
 pkg/model/provider/bedrock/wrap.go      |  35 ++++++++
 pkg/model/provider/bedrock/wrap_test.go | 110 ++++++++++++++++++++++++
 5 files changed, 148 insertions(+), 3 deletions(-)
 create mode 100644 pkg/model/provider/bedrock/wrap.go
 create mode 100644 pkg/model/provider/bedrock/wrap_test.go

diff --git a/go.mod b/go.mod
index ea7add929..9b542daa2 100644
--- a/go.mod
+++ b/go.mod
@@ -19,6 +19,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/credentials v1.19.12
 	github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.2
 	github.com/aws/aws-sdk-go-v2/service/sts v1.41.9
+	github.com/aws/smithy-go v1.24.2
 	github.com/aymanbagabas/go-udiff v0.4.1
 	github.com/blevesearch/bleve/v2 v2.5.7
 	github.com/bmatcuk/doublestar/v4 v4.10.0
@@ -90,7 +91,6 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/signin v1.0.8 // indirect
 	github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 // indirect
-	github.com/aws/smithy-go v1.24.2 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/bahlo/generic-list-go v0.2.0 // indirect
 	github.com/bits-and-blooms/bitset v1.24.4 // indirect
diff --git a/pkg/model/provider/bedrock/adapter.go b/pkg/model/provider/bedrock/adapter.go
index 8f29cd78e..0c6552fd9 100644
--- a/pkg/model/provider/bedrock/adapter.go
+++ b/pkg/model/provider/bedrock/adapter.go
@@ -71,7 +71,7 @@ func (a *streamAdapter) Recv() (chat.MessageStreamResponse, error) {
 		// Check for errors
 		if err := a.stream.Err(); err != nil {
 			slog.Debug("Bedrock stream: error on channel close", "error", err)
-			return chat.MessageStreamResponse{}, err
+			return chat.MessageStreamResponse{}, wrapBedrockError(err)
 		}
 		// If we have a pending finish reason but never got metadata, emit it now
 		if a.pendingFinishReason != "" {
diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go
index a083f1d47..8086d41de 100644
--- a/pkg/model/provider/bedrock/client.go
+++ b/pkg/model/provider/bedrock/client.go
@@ -219,7 +219,7 @@ func (c *Client) CreateChatCompletionStream(
 	output, err := c.bedrockClient.ConverseStream(ctx, input)
 	if err != nil {
 		slog.Error("Bedrock ConverseStream failed", "error", err)
-		return nil, fmt.Errorf("bedrock converse stream failed: %w", err)
+		return nil, wrapBedrockError(fmt.Errorf("bedrock converse stream failed: %w", err))
 	}
 
 	trackUsage := c.ModelConfig.TrackUsage == nil || *c.ModelConfig.TrackUsage
diff --git a/pkg/model/provider/bedrock/wrap.go b/pkg/model/provider/bedrock/wrap.go
new file mode 100644
index 000000000..84c06c313
--- /dev/null
+++ b/pkg/model/provider/bedrock/wrap.go
@@ -0,0 +1,35 @@
+package bedrock
+
+import (
+	"errors"
+
+	smithyhttp "github.com/aws/smithy-go/transport/http"
+
+	"github.com/docker/docker-agent/pkg/modelerrors"
+)
+
+// wrapBedrockError wraps an AWS Bedrock SDK error in a *modelerrors.StatusError
+// to carry HTTP status code metadata for the retry loop.
+// The AWS SDK v2 exposes HTTP status via smithyhttp.ResponseError.
+// Non-AWS errors (e.g., io.EOF, network errors) pass through unchanged.
+func wrapBedrockError(err error) error {
+	if err == nil {
+		return nil
+	}
+
+	var respErr *smithyhttp.ResponseError
+	if !errors.As(err, &respErr) {
+		return err
+	}
+
+	var resp *smithyhttp.Response
+	if respErr.HTTPResponse() != nil {
+		resp = respErr.HTTPResponse()
+	}
+
+	statusCode := respErr.HTTPStatusCode()
+	if resp != nil {
+		return modelerrors.WrapHTTPError(statusCode, resp.Response, err)
+	}
+	return modelerrors.WrapHTTPError(statusCode, nil, err)
+}
diff --git a/pkg/model/provider/bedrock/wrap_test.go b/pkg/model/provider/bedrock/wrap_test.go
new file mode 100644
index 000000000..13f1a24d4
--- /dev/null
+++ b/pkg/model/provider/bedrock/wrap_test.go
@@ -0,0 +1,110 @@
+package bedrock
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"testing"
+	"time"
+
+	smithy "github.com/aws/smithy-go"
+	smithyhttp "github.com/aws/smithy-go/transport/http"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/modelerrors"
+)
+
+func makeTestBedrockError(statusCode int, retryAfterValue string) error {
+	header := http.Header{}
+	if retryAfterValue != "" {
+		header.Set("Retry-After", retryAfterValue)
+	}
+
+	httpResp := &http.Response{
+		StatusCode: statusCode,
+		Header:     header,
+	}
+	resp := &smithyhttp.Response{Response: httpResp}
+
+	return &smithy.OperationError{
+		ServiceID:     "BedrockRuntime",
+		OperationName: "ConverseStream",
+		Err: &smithyhttp.ResponseError{
+			Response: resp,
+			Err: &smithy.GenericAPIError{
+				Code:    "ThrottlingException",
+				Message: "Rate exceeded",
+			},
+		},
+	}
+}
+
+func TestWrapBedrockError(t *testing.T) {
+	t.Parallel()
+
+	t.Run("nil returns nil", func(t *testing.T) {
+		t.Parallel()
+		assert.NoError(t, wrapBedrockError(nil))
+	})
+
+	t.Run("non-AWS error passes through unchanged", func(t *testing.T) {
+		t.Parallel()
+		orig := errors.New("some network error")
+		result := wrapBedrockError(orig)
+		assert.Equal(t, orig, result)
+		var se *modelerrors.StatusError
+		assert.NotErrorAs(t, result, &se)
+	})
+
+	t.Run("429 without Retry-After wraps with zero RetryAfter", func(t *testing.T) {
+		t.Parallel()
+		awsErr := makeTestBedrockError(429, "")
+		result := wrapBedrockError(awsErr)
+		var se *modelerrors.StatusError
+		require.ErrorAs(t, result, &se)
+		assert.Equal(t, 429, se.StatusCode)
+		assert.Equal(t, time.Duration(0), se.RetryAfter)
+		// Original error still accessible
+		assert.ErrorIs(t, result, awsErr)
+	})
+
+	t.Run("429 with Retry-After header sets RetryAfter", func(t *testing.T) {
+		t.Parallel()
+		awsErr := makeTestBedrockError(429, "20")
+		result := wrapBedrockError(awsErr)
+		var se *modelerrors.StatusError
+		require.ErrorAs(t, result, &se)
+		assert.Equal(t, 429, se.StatusCode)
+		assert.Equal(t, 20*time.Second, se.RetryAfter)
+	})
+
+	t.Run("500 wraps with correct status code", func(t *testing.T) {
+		t.Parallel()
+		awsErr := makeTestBedrockError(500, "")
+		result := wrapBedrockError(awsErr)
+		var se *modelerrors.StatusError
+		require.ErrorAs(t, result, &se)
+		assert.Equal(t, 500, se.StatusCode)
+		assert.Equal(t, time.Duration(0), se.RetryAfter)
+	})
+
+	t.Run("wrapped error is classified correctly by ClassifyModelError", func(t *testing.T) {
+		t.Parallel()
+		awsErr := makeTestBedrockError(429, "15")
+		result := wrapBedrockError(awsErr)
+		retryable, rateLimited, retryAfter := modelerrors.ClassifyModelError(result)
+		assert.False(t, retryable)
+		assert.True(t, rateLimited)
+		assert.Equal(t, 15*time.Second, retryAfter)
+	})
+
+	t.Run("wrapped in fmt.Errorf still classified correctly", func(t *testing.T) {
+		t.Parallel()
+		awsErr := makeTestBedrockError(500, "")
+		wrapped := fmt.Errorf("bedrock converse stream failed: %w", wrapBedrockError(awsErr))
+		retryable, rateLimited, _ := modelerrors.ClassifyModelError(wrapped)
+		assert.True(t, retryable)
+		assert.False(t, rateLimited)
+	})
+}

From e76494ef0abb5aab199350991c0b474ea16cfe37 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:44:15 +0100
Subject: [PATCH 12/15] modelerrors: remove unused deprecated wrappers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CalculateBackoff and SleepWithContext were deprecated in favor of
backoff.Calculate and backoff.SleepWithContext. No callers use the
modelerrors versions — all code imports pkg/backoff directly.

Remove the deprecated wrappers to reduce API surface.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 6ea294412..764325d3a 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -402,18 +402,6 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time
 	return isRetryableModelError(err), false, 0
 }
 
-// CalculateBackoff returns the backoff duration for a given attempt (0-indexed).
-// Uses exponential backoff with jitter.
-//
-// Deprecated: Use [backoff.Calculate] directly.
-var CalculateBackoff = backoff.Calculate
-
-// SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
-// Returns true if the sleep completed, false if it was interrupted by context cancellation.
-//
-// Deprecated: Use [backoff.SleepWithContext] directly.
-var SleepWithContext = backoff.SleepWithContext
-
 // FormatError returns a user-friendly error message for model errors.
 // Context overflow gets a dedicated actionable message; all other errors
 // pass through their original message.

From f69980672997ba3c32a359f1bbabcfb297368c8b Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:46:59 +0100
Subject: [PATCH 13/15] modelerrors: unexport ParseRetryAfterHeader

Only called within the package (by WrapHTTPError and tests).
Rename to parseRetryAfterHeader to reduce the public API surface.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go      | 6 +++---
 pkg/modelerrors/modelerrors_test.go | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 764325d3a..9f3abd0a1 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -55,7 +55,7 @@ func WrapHTTPError(statusCode int, resp *http.Response, err error) error {
 	}
 	var retryAfter time.Duration
 	if resp != nil {
-		retryAfter = ParseRetryAfterHeader(resp.Header.Get("Retry-After"))
+		retryAfter = parseRetryAfterHeader(resp.Header.Get("Retry-After"))
 	}
 	return &StatusError{
 		StatusCode: statusCode,
@@ -330,10 +330,10 @@ func isRetryableModelError(err error) bool {
 	return false
 }
 
-// ParseRetryAfterHeader parses a Retry-After header value.
+// parseRetryAfterHeader parses a Retry-After header value.
 // Supports both seconds (integer) and HTTP-date formats per RFC 7231 §7.1.3.
 // Returns 0 if the value is empty, invalid, or results in a non-positive duration.
-func ParseRetryAfterHeader(value string) time.Duration {
+func parseRetryAfterHeader(value string) time.Duration {
 	if value == "" {
 		return 0
 	}
diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 274fe99e4..5e2ed85b3 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -261,15 +261,15 @@ func TestParseRetryAfterHeader(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			got := ParseRetryAfterHeader(tt.value)
-			assert.Equal(t, tt.expected, got, "ParseRetryAfterHeader(%q)", tt.value)
+			got := parseRetryAfterHeader(tt.value)
+			assert.Equal(t, tt.expected, got, "parseRetryAfterHeader(%q)", tt.value)
 		})
 	}
 
 	t.Run("HTTP-date in the future", func(t *testing.T) {
 		t.Parallel()
 		future := time.Now().Add(10 * time.Second).UTC().Format(http.TimeFormat)
-		got := ParseRetryAfterHeader(future)
+		got := parseRetryAfterHeader(future)
 		assert.Greater(t, got, 0*time.Second, "should return positive duration for future HTTP-date")
 		assert.LessOrEqual(t, got, 11*time.Second, "should not exceed ~10s for near-future date")
 	})
@@ -277,7 +277,7 @@ func TestParseRetryAfterHeader(t *testing.T) {
 	t.Run("HTTP-date in the past", func(t *testing.T) {
 		t.Parallel()
 		past := time.Now().Add(-10 * time.Second).UTC().Format(http.TimeFormat)
-		got := ParseRetryAfterHeader(past)
+		got := parseRetryAfterHeader(past)
 		assert.Equal(t, 0*time.Second, got, "should return 0 for past HTTP-date")
 	})
 }

From dcfb3c76d5c2dbe5dcc058bda7de10f9872e93e1 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:47:36 +0100
Subject: [PATCH 14/15] modelerrors: unexport IsRetryableStatusCode and
 ExtractHTTPStatusCode

Both are only used within the package. Rename to isRetryableStatusCode
and extractHTTPStatusCode to reduce the public API surface.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go      | 22 +++++++++++-----------
 pkg/modelerrors/modelerrors_test.go |  4 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index 9f3abd0a1..b5c4fdaad 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -161,14 +161,14 @@ func IsContextOverflowError(err error) bool {
 // statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ")
 var statusCodeRegex = regexp.MustCompile(`\b([45]\d{2})\b`)
 
-// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error
+// extractHTTPStatusCode attempts to extract an HTTP status code from the error
 // using regex parsing of the error message. This is a fallback for providers
 // whose errors are not yet wrapped in *StatusError (the preferred path).
 //
 // The regex matches 4xx/5xx codes at word boundaries
 // (e.g., "429 Too Many Requests", "500 Internal Server Error").
 // Returns 0 if no status code found.
-func ExtractHTTPStatusCode(err error) int {
+func extractHTTPStatusCode(err error) int {
 	if err == nil {
 		return 0
 	}
@@ -191,7 +191,7 @@ func ExtractHTTPStatusCode(err error) int {
 	return 0
 }
 
-// IsRetryableStatusCode determines if an HTTP status code is retryable.
+// isRetryableStatusCode determines if an HTTP status code is retryable.
 // Retryable means we should retry the SAME model with exponential backoff.
 //
 // Retryable status codes:
@@ -202,7 +202,7 @@ func ExtractHTTPStatusCode(err error) int {
 // Non-retryable status codes (skip to next model immediately):
 // - 429 (rate limit) - provider is explicitly telling us to back off
 // - 4xx client errors (400, 401, 403, 404) - won't get better with retry
-func IsRetryableStatusCode(statusCode int) bool {
+func isRetryableStatusCode(statusCode int) bool {
 	switch statusCode {
 	case 500, 502, 503, 504: // Server errors
 		return true
@@ -219,7 +219,7 @@ func IsRetryableStatusCode(statusCode int) bool {
 
 // retryablePatterns contains error message substrings that indicate a
 // transient/retryable failure. Numeric status codes (500, 502, etc.) are
-// handled separately by ExtractHTTPStatusCode + IsRetryableStatusCode.
+// handled separately by extractHTTPStatusCode + isRetryableStatusCode.
 var retryablePatterns = []string{
 	"timeout",               // Generic timeout
 	"connection reset",      // Connection reset
@@ -241,7 +241,7 @@ var retryablePatterns = []string{
 
 // nonRetryablePatterns contains error message substrings that indicate a
 // permanent/non-retryable failure. Numeric status codes (429, 401, etc.) are
-// handled separately by ExtractHTTPStatusCode.
+// handled separately by extractHTTPStatusCode.
 var nonRetryablePatterns = []string{
 	"rate limit",        // Rate limit message
 	"too many requests", // Rate limit message
@@ -292,8 +292,8 @@ func isRetryableModelError(err error) bool {
 	}
 
 	// First, try to extract HTTP status code from known SDK error types
-	if statusCode := ExtractHTTPStatusCode(err); statusCode != 0 {
-		retryable := IsRetryableStatusCode(statusCode)
+	if statusCode := extractHTTPStatusCode(err); statusCode != 0 {
+		retryable := isRetryableStatusCode(statusCode)
 		slog.Debug("Classified error by status code",
 			"status_code", statusCode,
 			"retryable", retryable)
@@ -387,17 +387,17 @@ func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time
 		if statusErr.StatusCode == http.StatusTooManyRequests {
 			return false, true, statusErr.RetryAfter
 		}
-		return IsRetryableStatusCode(statusErr.StatusCode), false, 0
+		return isRetryableStatusCode(statusErr.StatusCode), false, 0
 	}
 
 	// Fallback: providers that don't yet wrap (e.g. Bedrock), or non-provider
 	// errors (network, pattern matching).
-	statusCode := ExtractHTTPStatusCode(err)
+	statusCode := extractHTTPStatusCode(err)
 	if statusCode == http.StatusTooManyRequests {
 		return false, true, 0 // No Retry-After without StatusError
 	}
 	if statusCode != 0 {
-		return IsRetryableStatusCode(statusCode), false, 0
+		return isRetryableStatusCode(statusCode), false, 0
 	}
 	return isRetryableModelError(err), false, 0
 }
diff --git a/pkg/modelerrors/modelerrors_test.go b/pkg/modelerrors/modelerrors_test.go
index 5e2ed85b3..3df53d1bd 100644
--- a/pkg/modelerrors/modelerrors_test.go
+++ b/pkg/modelerrors/modelerrors_test.go
@@ -94,7 +94,7 @@ func TestExtractHTTPStatusCode(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			t.Parallel()
-			assert.Equal(t, tt.expected, ExtractHTTPStatusCode(tt.err), "ExtractHTTPStatusCode(%v)", tt.err)
+			assert.Equal(t, tt.expected, extractHTTPStatusCode(tt.err), "extractHTTPStatusCode(%v)", tt.err)
 		})
 	}
 }
@@ -118,7 +118,7 @@ func TestIsRetryableStatusCode(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(fmt.Sprintf("status_%d", tt.statusCode), func(t *testing.T) {
 			t.Parallel()
-			assert.Equal(t, tt.expected, IsRetryableStatusCode(tt.statusCode), "IsRetryableStatusCode(%d)", tt.statusCode)
+			assert.Equal(t, tt.expected, isRetryableStatusCode(tt.statusCode), "isRetryableStatusCode(%d)", tt.statusCode)
 		})
 	}
 }

From 20400137a0840cc162a39a3726bd8b2896098184 Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Thu, 19 Mar 2026 19:48:34 +0100
Subject: [PATCH 15/15] modelerrors: remove unused MaxRetryAfterWait re-export
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No callers reference modelerrors.MaxRetryAfterWait — all usage goes
through backoff.MaxRetryAfterWait directly. Remove the constant and
the now-unused backoff import.

Assisted-By: docker-agent
---
 pkg/modelerrors/modelerrors.go | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/pkg/modelerrors/modelerrors.go b/pkg/modelerrors/modelerrors.go
index b5c4fdaad..a24d8edb6 100644
--- a/pkg/modelerrors/modelerrors.go
+++ b/pkg/modelerrors/modelerrors.go
@@ -15,15 +15,6 @@ import (
 	"strconv"
 	"strings"
 	"time"
-
-	"github.com/docker/docker-agent/pkg/backoff"
-)
-
-// Backoff and retry-after configuration constants.
-const (
-	// MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent
-	// a misbehaving server from blocking the agent for an unreasonable amount of time.
-	MaxRetryAfterWait = backoff.MaxRetryAfterWait
 )
 
 // StatusError wraps an HTTP API error with structured metadata for retry decisions.