feat(auth): Add retry logic with exponential backoff for S2A and Codex API

- Add exponential backoff with jitter to S2A OAuth submission (3 attempts)
- Implement 5xx error retry mechanism in Codex API workspace selection (5 attempts)
- Add 429 rate limit handling with retry support in Codex API
- Improve team member processing with staggered delays to avoid concurrent conflicts
- Add per-attempt proxy rotation to avoid reusing failed proxies
- Enhance retry delay calculation with random jitter to prevent thundering herd
- Update logging to display retry attempts and delay durations
- Improve error messages with HTTP status codes and response body snippets
- Refactor retry loops to use consistent exponential backoff pattern across modules
This commit is contained in:
2026-02-07 21:37:50 +08:00
parent 231611af84
commit eaf6073cb2
3 changed files with 78 additions and 37 deletions

View File

@@ -519,12 +519,16 @@ func (c *CodexAPIAuth) obtainAuthorizationCodeInternal() (string, error) {
"workspace_id": c.workspaceID,
}
// 添加 500 错误重试机制 - 最多重试 3
// 添加 500 错误重试机制 - 最多重试 5,指数退避 + 随机抖动
var lastErr error
for retry := 0; retry < 3; retry++ {
for retry := 0; retry < 5; retry++ {
if retry > 0 {
c.logStep(StepSelectWorkspace, "第 %d 次重试选择工作区...", retry+1)
time.Sleep(time.Duration(2+retry) * time.Second) // 递增延迟: 2s, 3s, 4s
// 指数退避: 3s, 5s, 8s, 12s 基础延迟 + 0~3s 随机抖动
baseDelay := time.Duration(3+retry*2) * time.Second
jitter := time.Duration(rand.Intn(3000)) * time.Millisecond
delay := baseDelay + jitter
c.logStep(StepSelectWorkspace, "第 %d 次重试选择工作区 (等待 %.1fs)...", retry+1, delay.Seconds())
time.Sleep(delay)
// 重新获取 Sentinel token
if !c.callSentinelReq("password_verify__auto") {
@@ -575,6 +579,13 @@ func (c *CodexAPIAuth) obtainAuthorizationCodeInternal() (string, error) {
return "", fmt.Errorf("未能获取授权码")
}
// 429 限流,可重试
if resp.StatusCode == 429 {
c.logStep(StepSelectWorkspace, "请求限流 429将重试...")
lastErr = fmt.Errorf("请求限流: 429")
continue
}
// 5xx 服务器错误,可重试
if resp.StatusCode >= 500 && resp.StatusCode < 600 {
c.logStep(StepSelectWorkspace, "服务器错误 %d将重试...", resp.StatusCode)