diff --git a/pkg/utils/http_retry.go b/pkg/utils/http_retry.go index 135ea0ef5..514f9781b 100644 --- a/pkg/utils/http_retry.go +++ b/pkg/utils/http_retry.go @@ -4,12 +4,16 @@ import ( "context" "fmt" "net/http" + "strconv" "time" ) const maxRetries = 3 -var retryDelayUnit = time.Second +var ( + retryDelayUnit = time.Second + maxRetrySleepDuration = 1 * time.Minute +) func shouldRetry(statusCode int) bool { return statusCode == http.StatusTooManyRequests || @@ -36,7 +40,7 @@ func DoRequestWithRetry(client *http.Client, req *http.Request) (*http.Response, } if i < maxRetries-1 { - if err = sleepWithCtx(req.Context(), retryDelayUnit*time.Duration(i+1)); err != nil { + if err = sleepWithCtx(req.Context(), retryDelayForAttempt(resp, i)); err != nil { if resp != nil { resp.Body.Close() } @@ -47,6 +51,57 @@ func DoRequestWithRetry(client *http.Client, req *http.Request) (*http.Response, return resp, err } +func retryDelayForAttempt(resp *http.Response, attempt int) time.Duration { + fallback := retryDelayUnit * time.Duration(attempt+1) + if resp == nil || resp.StatusCode != http.StatusTooManyRequests { + return clampRetryDelay(fallback) + } + + retryAfter := resp.Header.Get("Retry-After") + if retryAfter == "" { + return clampRetryDelay(fallback) + } + + if delay, ok := numericRetryAfterDelay(retryAfter); ok { + return delay + } + + if when, err := http.ParseTime(retryAfter); err == nil { + delay := time.Until(when) + if serverDate, err := http.ParseTime(resp.Header.Get("Date")); err == nil { + delay = when.Sub(serverDate) + } + if delay < 0 { + return 0 + } + return clampRetryDelay(delay) + } + + return clampRetryDelay(fallback) +} + +func numericRetryAfterDelay(retryAfter string) (time.Duration, bool) { + seconds, err := strconv.ParseInt(retryAfter, 10, 64) + if err != nil || seconds < 0 { + return 0, false + } + maxSeconds := int64(maxRetrySleepDuration / time.Second) + if seconds > maxSeconds { + return maxRetrySleepDuration, true + } + return clampRetryDelay(time.Duration(seconds) * time.Second), true +} + +func clampRetryDelay(delay time.Duration) time.Duration { + if delay <= 0 { + return 0 + } + if delay > maxRetrySleepDuration { + return maxRetrySleepDuration + } + return delay +} + func sleepWithCtx(ctx context.Context, d time.Duration) error { timer := time.NewTimer(d) defer timer.Stop() diff --git a/pkg/utils/http_retry_test.go b/pkg/utils/http_retry_test.go index d64cd5eda..4d6021ff7 100644 --- a/pkg/utils/http_retry_test.go +++ b/pkg/utils/http_retry_test.go @@ -80,6 +80,81 @@ func TestDoRequestWithRetry(t *testing.T) { } } +func TestDoRequestWithRetry_RetryAfter429Honored(t *testing.T) { + retryDelayUnit = 10 * time.Millisecond + t.Cleanup(func() { retryDelayUnit = time.Second }) + + attempts := 0 + var firstAttemptAt time.Time + var secondAttemptAt time.Time + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts == 1 { + firstAttemptAt = time.Now() + w.Header().Set("Retry-After", "1") + w.WriteHeader(http.StatusTooManyRequests) + return + } + if attempts == 2 { + secondAttemptAt = time.Now() + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + client := &http.Client{Timeout: 5 * time.Second} + req, err := http.NewRequest(http.MethodGet, server.URL, nil) + require.NoError(t, err) + + resp, err := DoRequestWithRetry(client, req) + require.NoError(t, err) + require.NotNil(t, resp) + assert.Equal(t, http.StatusOK, resp.StatusCode) + resp.Body.Close() + require.Equal(t, 2, attempts) + + assert.GreaterOrEqual(t, secondAttemptAt.Sub(firstAttemptAt), 900*time.Millisecond) +} + +func TestDoRequestWithRetry_RetryAfter429InvalidFallsBack(t *testing.T) { + retryDelayUnit = 50 * time.Millisecond + t.Cleanup(func() { retryDelayUnit = time.Second }) + + attempts := 0 + var firstAttemptAt time.Time + var secondAttemptAt time.Time + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts == 1 { + firstAttemptAt = time.Now() + w.Header().Set("Retry-After", "invalid") + w.WriteHeader(http.StatusTooManyRequests) + return + } + if attempts == 2 { + secondAttemptAt = time.Now() + } + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + client := &http.Client{Timeout: 5 * time.Second} + req, err := http.NewRequest(http.MethodGet, server.URL, nil) + require.NoError(t, err) + + resp, err := DoRequestWithRetry(client, req) + require.NoError(t, err) + require.NotNil(t, resp) + assert.Equal(t, http.StatusOK, resp.StatusCode) + resp.Body.Close() + require.Equal(t, 2, attempts) + + assert.GreaterOrEqual(t, secondAttemptAt.Sub(firstAttemptAt), 45*time.Millisecond) + assert.Less(t, secondAttemptAt.Sub(firstAttemptAt), 500*time.Millisecond) +} + func TestDoRequestWithRetry_ContextCancel(t *testing.T) { // Use a long retry delay so cancellation always hits during sleepWithCtx. retryDelayUnit = 10 * time.Second @@ -204,3 +279,87 @@ func TestDoRequestWithRetry_Delay(t *testing.T) { assert.GreaterOrEqual(t, delays[2], time.Millisecond) } + +func TestRetryDelayForAttempt_DateRetryAfterUsesResponseDateHeader(t *testing.T) { + maxRetrySleepDuration = time.Minute + t.Cleanup(func() { maxRetrySleepDuration = time.Minute }) + + serverDate := time.Date(2000, 1, 2, 15, 4, 5, 0, time.UTC) + retryAfterAt := serverDate.Add(10 * time.Second) + resp := &http.Response{ + StatusCode: http.StatusTooManyRequests, + Header: http.Header{ + "Retry-After": []string{retryAfterAt.Format(http.TimeFormat)}, + "Date": []string{serverDate.Format(http.TimeFormat)}, + }, + } + + assert.Equal(t, 10*time.Second, retryDelayForAttempt(resp, 0)) +} + +func TestRetryDelayForAttempt_DateRetryAfterInvalidOrMissingDateFallsBackSafely(t *testing.T) { + maxRetrySleepDuration = 30 * time.Second + t.Cleanup(func() { maxRetrySleepDuration = time.Minute }) + + retryAfterAt := time.Now().UTC().Add(3 * time.Second).Format(http.TimeFormat) + testcases := []struct { + name string + header http.Header + }{ + { + name: "invalid-date-header", + header: http.Header{ + "Retry-After": []string{retryAfterAt}, + "Date": []string{"invalid-date"}, + }, + }, + { + name: "missing-date-header", + header: http.Header{ + "Retry-After": []string{retryAfterAt}, + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + resp := &http.Response{ + StatusCode: http.StatusTooManyRequests, + Header: tc.header, + } + + delay := retryDelayForAttempt(resp, 0) + assert.Greater(t, delay, time.Duration(0)) + assert.GreaterOrEqual(t, delay, 1500*time.Millisecond) + assert.LessOrEqual(t, delay, 5*time.Second) + }) + } +} + +func TestRetryDelayForAttempt_RetryAfterIsCapped(t *testing.T) { + maxRetrySleepDuration = 2 * time.Second + t.Cleanup(func() { maxRetrySleepDuration = time.Minute }) + + resp := &http.Response{ + StatusCode: http.StatusTooManyRequests, + Header: http.Header{ + "Retry-After": []string{"999999"}, + }, + } + + assert.Equal(t, 2*time.Second, retryDelayForAttempt(resp, 0)) +} + +func TestRetryDelayForAttempt_RetryAfterNumericOverflowStillCaps(t *testing.T) { + maxRetrySleepDuration = 2 * time.Second + t.Cleanup(func() { maxRetrySleepDuration = time.Minute }) + + resp := &http.Response{ + StatusCode: http.StatusTooManyRequests, + Header: http.Header{ + "Retry-After": []string{"9223372036854775807"}, + }, + } + + assert.Equal(t, 2*time.Second, retryDelayForAttempt(resp, 0)) +}