From 2c377e2261e4042202c102f0f38b8874e78825c8 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Wed, 18 Mar 2026 16:52:32 -0400 Subject: [PATCH 1/7] improve deploymentrecord client to support rate limiting backoffs provided in headers Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 130 +++++++++++++++++++++++----- pkg/deploymentrecord/client_test.go | 117 +++++++++++++++++++++++++ 2 files changed, 226 insertions(+), 21 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index fa6286a..c8f783b 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -14,6 +14,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/bradleyfalzon/ghinstallation/v2" @@ -37,6 +38,10 @@ type Client struct { apiToken string transport *ghinstallation.Transport rateLimiter *rate.Limiter + + // rateLimitDelay is shared across workers + rateLimitDelayMu sync.Mutex + rateLimitDelay time.Time } // NewClient creates a new API client with the given base URL and @@ -197,23 +202,12 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { var lastErr error // The first attempt is not a retry! for attempt := range c.retries + 1 { - if attempt > 0 { - backoff := time.Duration(math.Pow(2, - float64(attempt))) * 100 * time.Millisecond - //nolint:gosec - jitter := time.Duration(rand.Int64N(50)) * time.Millisecond - delay := backoff + jitter - - if delay > 5*time.Second { - delay = 5 * time.Second - } + if err = waitForBackoff(ctx, attempt); err != nil { + return err + } - // Wait with context cancellation support - select { - case <-time.After(delay): - case <-ctx.Done(): - return fmt.Errorf("context cancelled during retry backoff: %w", ctx.Err()) - } + if err = c.waitForSecondaryRateLimit(ctx); err != nil { + return err } // Reset reader position for retries @@ -268,7 +262,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { switch { case resp.StatusCode == 404: - // No artifact found + // No artifact found - do not retry dtmetrics.PostDeploymentRecordNoAttestation.Inc() slog.Debug("no artifact attestation found, no record created", "attempt", attempt, @@ -279,14 +273,15 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { ) return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)} case resp.StatusCode >= 400 && resp.StatusCode < 500: - if resp.Header.Get("retry-after") != "" || resp.Header.Get("x-ratelimit-remaining") == "0" { - // Rate limited — retry with backoff - // Could be 403 or 429 + // Check headers that indicate rate limiting + if resp.Header.Get("Retry-After") != "" || resp.Header.Get("X-Ratelimit-Remaining") == "0" { + retryDelay := parseRateLimitDelay(resp) + c.setRetryAfter(retryDelay) dtmetrics.PostDeploymentRecordRateLimited.Inc() slog.Warn("rate limited, retrying", "attempt", attempt, "status_code", resp.StatusCode, - "retry_after", resp.Header.Get("Retry-After"), + "retry_delay", retryDelay.Seconds(), "container_name", record.Name, "resp_msg", string(respBody), ) @@ -323,3 +318,96 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { ) return fmt.Errorf("all retries exhausted: %w", lastErr) } + +// waitForSecondaryRateLimit blocks until the global secondary rate limit backoff has elapsed. +// All workers sharing this client observe the same deadline. +func (c *Client) waitForSecondaryRateLimit(ctx context.Context) error { + c.rateLimitDelayMu.Lock() + waitUntil := c.rateLimitDelay + c.rateLimitDelayMu.Unlock() + + delay := time.Until(waitUntil) + if delay <= 0 { + return nil + } + + slog.Info("waiting for secondary rate limit backoff", + "delay", delay.Round(time.Millisecond), + ) + + select { + case <-time.After(delay): + return nil + case <-ctx.Done(): + return fmt.Errorf("context cancelled during secondary rate limit wait: %w", ctx.Err()) + } +} + +// setRetryAfter records a global backoff deadline. +// Ensures deadline can only be extended, not shortened. +func (c *Client) setRetryAfter(d time.Duration) { + until := time.Now().Add(d) + c.rateLimitDelayMu.Lock() + defer c.rateLimitDelayMu.Unlock() + if until.After(c.rateLimitDelay) { + c.rateLimitDelay = until + } +} + +// parseRateLimitDelay extracts the backoff duration from a rate-limit response: +// Return largest delay from header options. +// If no headers are set, default to 1 minute. +func parseRateLimitDelay(resp *http.Response) time.Duration { + // GitHub docs show Retry-After header will always be an int + var replyAfterDelay *time.Duration + if ra := resp.Header.Get("Retry-After"); ra != "" { + if seconds, err := strconv.Atoi(ra); err == nil { + rad := time.Duration(seconds) * time.Second + replyAfterDelay = &rad + } + } + + var rateLimitResetDelay *time.Duration + if resp.Header.Get("X-Ratelimit-Remaining") == "0" { + if resetStr := resp.Header.Get("X-Ratelimit-Reset"); resetStr != "" { + if epoch, err := strconv.ParseInt(resetStr, 10, 64); err == nil { + if d := time.Until(time.Unix(epoch, 0)); d > 0 { + rateLimitResetDelay = &d + } + } + } + } + + switch { + case replyAfterDelay != nil && rateLimitResetDelay != nil: + return max(*replyAfterDelay, *rateLimitResetDelay) + case replyAfterDelay != nil: + return *replyAfterDelay + case rateLimitResetDelay != nil: + return *rateLimitResetDelay + default: + return time.Minute + } +} + +func waitForBackoff(ctx context.Context, attempt int) error { + if attempt > 0 { + backoff := time.Duration(math.Pow(2, + float64(attempt))) * 100 * time.Millisecond + //nolint:gosec + jitter := time.Duration(rand.Int64N(50)) * time.Millisecond + delay := backoff + jitter + + if delay > 5*time.Second { + delay = 5 * time.Second + } + + // Wait with context cancellation support + select { + case <-time.After(delay): + case <-ctx.Done(): + return fmt.Errorf("context cancelled during retry backoff: %w", ctx.Err()) + } + } + return nil +} diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index d72e54f..8b17272 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -5,7 +5,9 @@ import ( "errors" "net/http" "net/http/httptest" + "strconv" "strings" + "sync" "sync/atomic" "testing" "time" @@ -421,6 +423,7 @@ func TestPostOne(t *testing.T) { retries: 1, handler: func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("X-Ratelimit-Remaining", "0") + w.Header().Set("X-Ratelimit-Reset", strconv.FormatInt(time.Now().Add(1*time.Second).Unix(), 10)) w.WriteHeader(http.StatusForbidden) }, wantErr: true, @@ -604,3 +607,117 @@ func TestPostOneSendsCorrectRequest(t *testing.T) { t.Fatalf("unexpected error: %v", err) } } + +func TestParseRateLimitDelay(t *testing.T) { + tests := []struct { + name string + headers http.Header + wantMin time.Duration + wantMax time.Duration + }{ + { + name: "Retry-After in seconds", + headers: http.Header{"Retry-After": []string{"5"}}, + wantMin: 5 * time.Second, + wantMax: 5 * time.Second, + }, + { + name: "Retry-After zero seconds", + headers: http.Header{"Retry-After": []string{"0"}}, + wantMin: 0, + wantMax: 0, + }, + { + name: "X-Ratelimit-Remaining 0 with reset", + headers: http.Header{ + "X-Ratelimit-Remaining": []string{"0"}, + "X-Ratelimit-Reset": []string{strconv.FormatInt(time.Now().Add(10*time.Second).Unix(), 10)}, + }, + wantMin: 9 * time.Second, + wantMax: 11 * time.Second, + }, + { + name: "no relevant headers defaults to 1 minute", + headers: http.Header{}, + wantMin: time.Minute, + wantMax: time.Minute, + }, + { + name: "Largest delay takes precedence", + headers: http.Header{ + "Retry-After": []string{"3"}, + "X-Ratelimit-Remaining": []string{"0"}, + "X-Ratelimit-Reset": []string{strconv.FormatInt(time.Now().Add(60*time.Second).Unix(), 10)}, + }, + wantMin: 59 * time.Second, + wantMax: 61 * time.Second, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resp := &http.Response{Header: tt.headers} + result := parseRateLimitDelay(resp) + if result < tt.wantMin || result > tt.wantMax { + t.Errorf("parseRateLimitDelay() = %v, want between %v and %v", result, tt.wantMin, tt.wantMax) + } + }) + } +} + +func TestPostOneRespectsRetryAfterAcrossGoroutines(t *testing.T) { + var reqCount atomic.Int32 + firstReqDone := make(chan struct{}) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + count := reqCount.Add(1) + if count == 1 { + w.Header().Set("Retry-After", "2") + w.WriteHeader(http.StatusTooManyRequests) + close(firstReqDone) + return + } + w.WriteHeader(http.StatusOK) + })) + t.Cleanup(srv.Close) + + client, err := NewClient(srv.URL, "test-org", WithRetries(2)) + if err != nil { + t.Fatalf("failed to create client: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + var wg sync.WaitGroup + + // Goroutine 1: triggers the rate limit + wg.Add(1) + go func() { + defer wg.Done() + if err := client.PostOne(ctx, testRecord()); err != nil { + t.Errorf("goroutine 1 error: %v", err) + } + }() + + // Wait for the rate limit to be received and backoff set + <-firstReqDone + time.Sleep(50 * time.Millisecond) + + // Goroutine 2: must observe the shared backoff + start := time.Now() + wg.Add(1) + go func() { + defer wg.Done() + if err := client.PostOne(ctx, testRecord()); err != nil { + t.Errorf("goroutine 2 error: %v", err) + } + }() + + wg.Wait() + + elapsed := time.Since(start) + if elapsed < 1800*time.Millisecond { + t.Errorf("goroutine 2 should have waited for retry-after, but only waited %v", elapsed) + } +} From d8c8e95b97a2e7b3bef35cef18483405fe985618 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Wed, 18 Mar 2026 17:03:55 -0400 Subject: [PATCH 2/7] fix typo Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index c8f783b..74587ae 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -359,11 +359,11 @@ func (c *Client) setRetryAfter(d time.Duration) { // If no headers are set, default to 1 minute. func parseRateLimitDelay(resp *http.Response) time.Duration { // GitHub docs show Retry-After header will always be an int - var replyAfterDelay *time.Duration + var retryAfterDelay *time.Duration if ra := resp.Header.Get("Retry-After"); ra != "" { if seconds, err := strconv.Atoi(ra); err == nil { rad := time.Duration(seconds) * time.Second - replyAfterDelay = &rad + retryAfterDelay = &rad } } @@ -379,10 +379,10 @@ func parseRateLimitDelay(resp *http.Response) time.Duration { } switch { - case replyAfterDelay != nil && rateLimitResetDelay != nil: - return max(*replyAfterDelay, *rateLimitResetDelay) - case replyAfterDelay != nil: - return *replyAfterDelay + case retryAfterDelay != nil && rateLimitResetDelay != nil: + return max(*retryAfterDelay, *rateLimitResetDelay) + case retryAfterDelay != nil: + return *retryAfterDelay case rateLimitResetDelay != nil: return *rateLimitResetDelay default: From 95f96053b528f2d2b269686aca18b3f69cb10ac3 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Thu, 19 Mar 2026 12:00:25 -0400 Subject: [PATCH 3/7] add user agent Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 74587ae..80978a3 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -230,6 +230,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { } else if c.apiToken != "" { req.Header.Set("Authorization", "Bearer "+c.apiToken) } + req.Header.Set("User-Agent", "GitHub-Deployment-Tracker") start := time.Now() // nolint: gosec From 1c82254698d889d93d349f0914716bd27b1aabb4 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Thu, 19 Mar 2026 16:11:03 -0400 Subject: [PATCH 4/7] rename existing rateLimiter to requestThrottler and reduce throughput limits to match github secondary rate limits Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 42 +++++++++++++++-------------- pkg/deploymentrecord/client_test.go | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 80978a3..1815d92 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -31,13 +31,13 @@ var validOrgPattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) // Client is an API client for posting deployment records. type Client struct { - baseURL string - org string - httpClient *http.Client - retries int - apiToken string - transport *ghinstallation.Transport - rateLimiter *rate.Limiter + baseURL string + org string + httpClient *http.Client + retries int + apiToken string + transport *ghinstallation.Transport + requestThrottler *rate.Limiter // rateLimitDelay is shared across workers rateLimitDelayMu sync.Mutex @@ -75,8 +75,8 @@ func NewClient(baseURL, org string, opts ...ClientOption) (*Client, error) { Timeout: 5 * time.Second, }, retries: 3, - // 20 req/sec with burst of 50 - rateLimiter: rate.NewLimiter(rate.Limit(20), 50), + // 3 req/sec (180 req/min) with burst of 20 + requestThrottler: rate.NewLimiter(rate.Limit(3), 20), } for _, opt := range opts { @@ -145,10 +145,10 @@ func WithGHApp(id, installID string, pkBytes []byte, pkPath string) ClientOption } } -// WithRateLimiter sets a custom rate limiter for API calls. -func WithRateLimiter(rps float64, burst int) ClientOption { +// WithRequestThrottler sets a custom rate limiter for API calls. +func WithRequestThrottler(rps float64, burst int) ClientOption { return func(c *Client) { - c.rateLimiter = rate.NewLimiter(rate.Limit(rps), burst) + c.requestThrottler = rate.NewLimiter(rate.Limit(rps), burst) } } @@ -185,9 +185,9 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return errors.New("record cannot be nil") } - // Wait for rate limiter - if err := c.rateLimiter.Wait(ctx); err != nil { - return fmt.Errorf("rate limiter wait failed: %w", err) + // Wait for request throttler + if err := c.requestThrottler.Wait(ctx); err != nil { + return fmt.Errorf("request throttler wait failed: %w", err) } url := fmt.Sprintf("%s/orgs/%s/artifacts/metadata/deployment-record", c.baseURL, c.org) @@ -206,7 +206,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return err } - if err = c.waitForSecondaryRateLimit(ctx); err != nil { + if err = c.waitForServerRateLimit(ctx); err != nil { return err } @@ -320,9 +320,9 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return fmt.Errorf("all retries exhausted: %w", lastErr) } -// waitForSecondaryRateLimit blocks until the global secondary rate limit backoff has elapsed. +// waitForServerRateLimit blocks until the global secondary rate limit backoff has elapsed. // All workers sharing this client observe the same deadline. -func (c *Client) waitForSecondaryRateLimit(ctx context.Context) error { +func (c *Client) waitForServerRateLimit(ctx context.Context) error { c.rateLimitDelayMu.Lock() waitUntil := c.rateLimitDelay c.rateLimitDelayMu.Unlock() @@ -332,7 +332,7 @@ func (c *Client) waitForSecondaryRateLimit(ctx context.Context) error { return nil } - slog.Info("waiting for secondary rate limit backoff", + slog.Info("waiting for server rate limit backoff", "delay", delay.Round(time.Millisecond), ) @@ -340,7 +340,7 @@ func (c *Client) waitForSecondaryRateLimit(ctx context.Context) error { case <-time.After(delay): return nil case <-ctx.Done(): - return fmt.Errorf("context cancelled during secondary rate limit wait: %w", ctx.Err()) + return fmt.Errorf("context cancelled during server rate limit wait: %w", ctx.Err()) } } @@ -363,6 +363,8 @@ func parseRateLimitDelay(resp *http.Response) time.Duration { var retryAfterDelay *time.Duration if ra := resp.Header.Get("Retry-After"); ra != "" { if seconds, err := strconv.Atoi(ra); err == nil { + // Max Retry-After of 60 seconds + seconds = min(seconds, 60) rad := time.Duration(seconds) * time.Second retryAfterDelay = &rad } diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index 8b17272..11e4ed5 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -717,7 +717,7 @@ func TestPostOneRespectsRetryAfterAcrossGoroutines(t *testing.T) { wg.Wait() elapsed := time.Since(start) - if elapsed < 1800*time.Millisecond { + if elapsed < 1500*time.Millisecond { t.Errorf("goroutine 2 should have waited for retry-after, but only waited %v", elapsed) } } From eca959eda84a39adfeea3eed9b10981c65aab2bd Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Thu, 19 Mar 2026 16:58:55 -0400 Subject: [PATCH 5/7] switch to atomic rate limit deadline Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 32 +++++++++++++++-------------- pkg/deploymentrecord/client_test.go | 12 ++++------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 1815d92..6a38ca9 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -14,7 +14,7 @@ import ( "regexp" "strconv" "strings" - "sync" + "sync/atomic" "time" "github.com/bradleyfalzon/ghinstallation/v2" @@ -39,9 +39,8 @@ type Client struct { transport *ghinstallation.Transport requestThrottler *rate.Limiter - // rateLimitDelay is shared across workers - rateLimitDelayMu sync.Mutex - rateLimitDelay time.Time + // rateLimitDeadline is a UnixNano timestamp shared across workers. + rateLimitDeadline atomic.Int64 } // NewClient creates a new API client with the given base URL and @@ -282,6 +281,8 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { slog.Warn("rate limited, retrying", "attempt", attempt, "status_code", resp.StatusCode, + "retry-after", resp.Header.Get("Retry-After"), + "x-ratelimit-remaining", resp.Header.Get("X-Ratelimit-Remaining"), "retry_delay", retryDelay.Seconds(), "container_name", record.Name, "resp_msg", string(respBody), @@ -320,14 +321,11 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return fmt.Errorf("all retries exhausted: %w", lastErr) } -// waitForServerRateLimit blocks until the global secondary rate limit backoff has elapsed. +// waitForServerRateLimit blocks until the global server rate limit backoff has elapsed. // All workers sharing this client observe the same deadline. func (c *Client) waitForServerRateLimit(ctx context.Context) error { - c.rateLimitDelayMu.Lock() - waitUntil := c.rateLimitDelay - c.rateLimitDelayMu.Unlock() - - delay := time.Until(waitUntil) + deadline := c.rateLimitDeadline.Load() + delay := time.Until(time.Unix(0, deadline)) if delay <= 0 { return nil } @@ -347,11 +345,15 @@ func (c *Client) waitForServerRateLimit(ctx context.Context) error { // setRetryAfter records a global backoff deadline. // Ensures deadline can only be extended, not shortened. func (c *Client) setRetryAfter(d time.Duration) { - until := time.Now().Add(d) - c.rateLimitDelayMu.Lock() - defer c.rateLimitDelayMu.Unlock() - if until.After(c.rateLimitDelay) { - c.rateLimitDelay = until + newDeadline := time.Now().Add(d).UnixNano() + for { + current := c.rateLimitDeadline.Load() + if newDeadline <= current { + return + } + if c.rateLimitDeadline.CompareAndSwap(current, newDeadline) { + return + } } } diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index 11e4ed5..d5ff421 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -692,13 +692,11 @@ func TestPostOneRespectsRetryAfterAcrossGoroutines(t *testing.T) { var wg sync.WaitGroup // Goroutine 1: triggers the rate limit - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { if err := client.PostOne(ctx, testRecord()); err != nil { t.Errorf("goroutine 1 error: %v", err) } - }() + }) // Wait for the rate limit to be received and backoff set <-firstReqDone @@ -706,13 +704,11 @@ func TestPostOneRespectsRetryAfterAcrossGoroutines(t *testing.T) { // Goroutine 2: must observe the shared backoff start := time.Now() - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { if err := client.PostOne(ctx, testRecord()); err != nil { t.Errorf("goroutine 2 error: %v", err) } - }() + }) wg.Wait() From 1605e533d9b09aa8296ee7f4348ad5d06c3821e8 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Thu, 19 Mar 2026 17:39:53 -0400 Subject: [PATCH 6/7] address comments Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 19 ++++++++++++------- pkg/deploymentrecord/client_test.go | 8 +++++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 6a38ca9..3e9c434 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -184,11 +184,6 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return errors.New("record cannot be nil") } - // Wait for request throttler - if err := c.requestThrottler.Wait(ctx); err != nil { - return fmt.Errorf("request throttler wait failed: %w", err) - } - url := fmt.Sprintf("%s/orgs/%s/artifacts/metadata/deployment-record", c.baseURL, c.org) body, err := json.Marshal(record) @@ -209,6 +204,10 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return err } + if err = c.requestThrottler.Wait(ctx); err != nil { + return fmt.Errorf("request throttler wait failed: %w", err) + } + // Reset reader position for retries bodyReader.Reset(body) @@ -334,8 +333,11 @@ func (c *Client) waitForServerRateLimit(ctx context.Context) error { "delay", delay.Round(time.Millisecond), ) + timer := time.NewTimer(delay) + defer timer.Stop() + select { - case <-time.After(delay): + case <-timer.C: return nil case <-ctx.Done(): return fmt.Errorf("context cancelled during server rate limit wait: %w", ctx.Err()) @@ -408,8 +410,11 @@ func waitForBackoff(ctx context.Context, attempt int) error { } // Wait with context cancellation support + timer := time.NewTimer(delay) + defer timer.Stop() + select { - case <-time.After(delay): + case <-timer.C: case <-ctx.Done(): return fmt.Errorf("context cancelled during retry backoff: %w", ctx.Err()) } diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index d5ff421..0fbbff3 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -703,17 +703,19 @@ func TestPostOneRespectsRetryAfterAcrossGoroutines(t *testing.T) { time.Sleep(50 * time.Millisecond) // Goroutine 2: must observe the shared backoff + secondReqDone := make(chan struct{}) start := time.Now() wg.Go(func() { + defer close(secondReqDone) if err := client.PostOne(ctx, testRecord()); err != nil { t.Errorf("goroutine 2 error: %v", err) } }) - - wg.Wait() - + // Measure only goroutine 2's duration + <-secondReqDone elapsed := time.Since(start) if elapsed < 1500*time.Millisecond { t.Errorf("goroutine 2 should have waited for retry-after, but only waited %v", elapsed) } + wg.Wait() } From 81100e5505fbf1cdfccd236a4588ca7a67bc01d0 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Fri, 20 Mar 2026 12:42:08 -0400 Subject: [PATCH 7/7] remove Retry-After max check Signed-off-by: Eric Pickard --- pkg/deploymentrecord/client.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 3e9c434..9726523 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -367,8 +367,6 @@ func parseRateLimitDelay(resp *http.Response) time.Duration { var retryAfterDelay *time.Duration if ra := resp.Header.Get("Retry-After"); ra != "" { if seconds, err := strconv.Atoi(ra); err == nil { - // Max Retry-After of 60 seconds - seconds = min(seconds, 60) rad := time.Duration(seconds) * time.Second retryAfterDelay = &rad }