From ccdb24a08607298f8dafd748ee9e7fe8ba13d5fe Mon Sep 17 00:00:00 2001
From: Sayak Maity <sayak.maity@scale.com>
Date: Thu, 5 Feb 2026 15:38:08 -0500
Subject: [PATCH] fix: add litellm retry with exponential backoff for rate
 limit errors

- Add `num_retries=3` default to LLMConfig so litellm retries on OpenAI
  429 rate limit errors with built-in exponential backoff
- Increase Temporal DEFAULT_RETRY_POLICY from 1 attempt (no retries) to
  3 attempts with exponential backoff (1s, 2s, 4s... up to 30s)

This complements the HTTPX connection limit reduction in agentex backend
(scaleapi/scale-agentex#144) to address OpenAI rate limiting under high
concurrent load.
---
 src/agentex/lib/adk/providers/_modules/litellm.py | 8 +++++++-
 src/agentex/lib/types/llm_messages.py             | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/agentex/lib/adk/providers/_modules/litellm.py b/src/agentex/lib/adk/providers/_modules/litellm.py
index 2f012f7e3..1bae7f3a5 100644
--- a/src/agentex/lib/adk/providers/_modules/litellm.py
+++ b/src/agentex/lib/adk/providers/_modules/litellm.py
@@ -26,7 +26,13 @@
 logger = make_logger(__name__)
 
 # Default retry policy for all LiteLLM operations
-DEFAULT_RETRY_POLICY = RetryPolicy(maximum_attempts=1)
+# Retries with exponential backoff: 1s, 2s, 4s, ... up to 30s between attempts
+DEFAULT_RETRY_POLICY = RetryPolicy(
+    maximum_attempts=3,
+    initial_interval=timedelta(seconds=1),
+    backoff_coefficient=2.0,
+    maximum_interval=timedelta(seconds=30),
+)
 
 
 class LiteLLMModule:
diff --git a/src/agentex/lib/types/llm_messages.py b/src/agentex/lib/types/llm_messages.py
index 706939f15..04192c003 100644
--- a/src/agentex/lib/types/llm_messages.py
+++ b/src/agentex/lib/types/llm_messages.py
@@ -58,6 +58,7 @@ class LLMConfig(BaseModel):
     parallel_tool_calls: bool | None = None
     logprobs: bool | None = None
     top_logprobs: int | None = None
+    num_retries: int | None = 3
 
 
 class ContentPartText(BaseModel):