From ccdb24a08607298f8dafd748ee9e7fe8ba13d5fe Mon Sep 17 00:00:00 2001 From: Sayak Maity Date: Thu, 5 Feb 2026 15:38:08 -0500 Subject: [PATCH] fix: add litellm retry with exponential backoff for rate limit errors - Add `num_retries=3` default to LLMConfig so litellm retries on OpenAI 429 rate limit errors with built-in exponential backoff - Increase Temporal DEFAULT_RETRY_POLICY from 1 attempt (no retries) to 3 attempts with exponential backoff (1s, 2s, 4s... up to 30s) This complements the HTTPX connection limit reduction in agentex backend (scaleapi/scale-agentex#144) to address OpenAI rate limiting under high concurrent load. --- src/agentex/lib/adk/providers/_modules/litellm.py | 8 +++++++- src/agentex/lib/types/llm_messages.py | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/agentex/lib/adk/providers/_modules/litellm.py b/src/agentex/lib/adk/providers/_modules/litellm.py index 2f012f7e3..1bae7f3a5 100644 --- a/src/agentex/lib/adk/providers/_modules/litellm.py +++ b/src/agentex/lib/adk/providers/_modules/litellm.py @@ -26,7 +26,13 @@ logger = make_logger(__name__) # Default retry policy for all LiteLLM operations -DEFAULT_RETRY_POLICY = RetryPolicy(maximum_attempts=1) +# Retries with exponential backoff: 1s, 2s, 4s, ... up to 30s between attempts +DEFAULT_RETRY_POLICY = RetryPolicy( + maximum_attempts=3, + initial_interval=timedelta(seconds=1), + backoff_coefficient=2.0, + maximum_interval=timedelta(seconds=30), +) class LiteLLMModule: diff --git a/src/agentex/lib/types/llm_messages.py b/src/agentex/lib/types/llm_messages.py index 706939f15..04192c003 100644 --- a/src/agentex/lib/types/llm_messages.py +++ b/src/agentex/lib/types/llm_messages.py @@ -58,6 +58,7 @@ class LLMConfig(BaseModel): parallel_tool_calls: bool | None = None logprobs: bool | None = None top_logprobs: int | None = None + num_retries: int | None = 3 class ContentPartText(BaseModel):