From 5ed82db770fa31af0da7b89cdb11a2ace36ca6e0 Mon Sep 17 00:00:00 2001 From: Bryan Johnson Date: Wed, 27 May 2026 21:51:49 -0700 Subject: [PATCH] v0.8.8: force unconditional 429 header capture so headers.log always generates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bryan's MobaXterm work-box 429s never wrote headers.log because the v0.8.5 gate only fired on (OAuth + unified-*) OR retry-after — and his bare burst 429s carry neither. Detect 429 from the HTTP status line in the -D dump and ALWAYS write the full raw header block, exempt from the OAuth 50-call cap (own STATUS_429_HEADER_LOG_LIMIT budget), with a live phi/rl> stderr pointer. Non-stream path already reached the parser (call_api -D dump); the bug was the write-gate, not the call. Streaming path shares the same function. Co-Authored-By: Clover (Claude Opus 4.7) --- CHANGELOG.md | 51 +++++++++++++++++++++++++++ VERSION | 2 +- larry.sh | 99 ++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 136 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee40dae..a77db49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,57 @@ All notable changes to `cloverleaf-larry` / `larry-anywhere` are recorded here. Versioning is loose-semver; bumps trigger the in-process self-update on every running client via `LARRY_BASE_URL` + `MANIFEST`. +## v0.8.8 — 2026-05-27 + +Force unconditional 429 header capture (Clover). Symptom: Bryan's MobaXterm +work-box hits `rate_limit_error` repeatedly, but `$LARRY_HOME/log/headers.log` +NEVER generates — so we cannot diagnose which rate-limit rail / auth path is +failing. The single goal: guarantee the log generates on the NEXT 429 so Bryan +can `tail` it and paste it (manual paste is the plan; auto-sync is dropped). + +**Call-flow trace (first, to disprove the deeper hypothesis).** Bryan's box +runs `LARRY_NO_STREAM=1` (auto-set on MobaXterm since v0.8.5), so `agent_turn` +takes `resp=$(call_api …)`. `call_api` (larry.sh) ALWAYS dumps response headers +via `curl -D` and ALWAYS calls `_parse_response_headers` on that dump after curl +returns — regardless of HTTP status (it explicitly comments "headers carry +rate-limit info even on 429s"). So the non-stream 429 path WAS reaching the +parser. The parser was NOT the missing call — the bug was entirely the +over-clever write-gate INSIDE the parser. + +**Root cause = the write-gate was too clever for its own purpose.** The v0.8.5 +gate wrote headers.log only if `(OAuth-mode AND a unified-* header was present) +OR (retry-after was non-empty)`. Bryan's 429s carry NEITHER: the backoff used +the exponential 2/4/8s fallback (proving no server `retry-after`), and a +per-minute burst 429 routinely omits the `unified-*` family. Neither branch +fired → no write → no log → no diagnosis. The capture defeated its own purpose. + +- **Unconditional write on ANY 429, detected from the status line.** + `_parse_response_headers` now greps the `-D` dump for `^HTTP/ 429` + (CRLF-tolerant) and, on a match, ALWAYS writes the full raw header block to + `$LARRY_HOME/log/headers.log` — regardless of `retry-after`, `unified-*`, or + auth mode. A bare 429 with no diagnostic headers STILL logs; that absence is + itself the finding (signals a low/bare-tier limit). +- **429s exempt from the OAuth 50-call cap.** New `STATUS_429_headers_logged` + counter with its own budget (`STATUS_429_HEADER_LOG_LIMIT=200`), independent + of the 200-path OAuth sampling cap. A session that burned all 50 OAuth + captures on successful calls STILL logs its next (51st-call) 429. +- **Full diagnostic dump.** The 429 block writes: a banner with `auth-mode` + (OAuth-Max vs API-key rail), the detected limit-rail, `retry-after`, org-id, + and request-id; then the HTTP status line, ALL `anthropic-*` headers (not just + `-ratelimit-*`), `retry-after`, `request-id`, and every `x-*` header — so the + auth-rail + which-limit question is answerable from one paste. +- **Live stderr pointer.** On every 429 capture, prints + `phi/rl> 429 headers logged to ~/.larry/log/headers.log (rail=, + retry-after=) — paste for diagnosis` so Bryan knows the log now exists. +- **Same-pattern sweep.** Streaming path (`call_api_stream` → + `_drain_pending_stream_headers` → `_parse_response_headers`) shares the same + function, so Mac/Linux streaming users get identical 429 capture. The v0.8.0 + `tool_read_file` PHI path-block (which blocks `$LARRY_HOME/log/`) is + tool-dispatch-only — Bryan reading his own headers.log via interactive shell + `tail` is unaffected (verified: no shell-level block; `bash_exec` runs + `bash -c` directly without the path-block). The 200-path OAuth sampling cap is + unchanged. + ## v0.8.7 — 2026-05-27 Status-line render fix for MobaXterm/Cygwin (Clover). Symptom: the dim diff --git a/VERSION b/VERSION index 1e9b46b..6201b5f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.8.7 +0.8.8 diff --git a/larry.sh b/larry.sh index baa7e76..e71a2b1 100755 --- a/larry.sh +++ b/larry.sh @@ -65,7 +65,7 @@ set -o pipefail # ───────────────────────────────────────────────────────────────────────────── # Config # ───────────────────────────────────────────────────────────────────────────── -LARRY_VERSION="0.8.7" +LARRY_VERSION="0.8.8" LARRY_HOME="${LARRY_HOME:-$HOME/.larry}" # ───────────────────────────────────────────────────────────────────────────── @@ -2423,6 +2423,14 @@ STATUS_rl_reset_epoch="" # epoch when the tripped rail resets (best-effo # actual account. Auto-disables after 50 calls. STATUS_oauth_headers_logged=0 STATUS_OAUTH_HEADER_LOG_LIMIT=50 +# v0.8.8: 429 captures get their OWN budget, separate from the OAuth 200-call +# cap above. The whole point of headers.log is diagnosing rate limits, so a +# session that burned its 50 OAuth captures on successful calls must STILL log +# the next 429. A bare 429 with no diagnostic headers is itself a finding +# (signals a low/bare-tier limit). 429 budget is large so a flap won't silence +# it, but bounded so a pathological retry storm can't grow the file unbounded. +STATUS_429_headers_logged=0 +STATUS_429_HEADER_LOG_LIMIT=200 # Model context-window lookup table (tokens). Source: Pax §4. # Default for unknown models: 200000 (safe lower bound for legacy releases). @@ -2587,26 +2595,87 @@ _parse_response_headers() { done fi - # ── Safety net: log raw OAuth headers for first 50 calls ───────────────── - # v0.8.5: ALSO log unconditionally whenever a `retry-after` header is present - # (i.e. a 429) so the NEXT rate-limit is always diagnosable from headers.log, - # regardless of auth mode or whether the unified-* family was emitted. The - # original v0.6.9 gate only fired in OAuth mode with a unified-* header — an - # API-key 429, or an OAuth 429 that omitted unified-*, would not be captured. - if { { [ "$LARRY_AUTH_MODE" = "oauth" ] \ - && [ -n "$STATUS_oauth_status$STATUS_oauth_5h_utilization$STATUS_oauth_7d_utilization" ]; } \ - || [ -n "$_ra" ]; } \ + # ── v0.8.8: detect a 429 straight from the status line in the -D dump ──── + # We do NOT rely on retry-after / unified-* being present (Bryan's box 429s + # carry neither — the exponential 2/4/8s fallback proves no server retry-after, + # and a per-minute burst 429 routinely omits the unified-* family). The HTTP + # status line is the ONE thing every 429 always has. Match `HTTP/1.1 429`, + # `HTTP/2 429`, etc. tolerant of the CRLF curl writes on Windows/MobaXterm. + local _is_429=0 + if grep -iqE '^HTTP/[0-9.]+[[:space:]]+429([[:space:]]|$|[^0-9])' "$f" 2>/dev/null; then + _is_429=1 + fi + + local log_dir="$LARRY_HOME/log" + + # ── ALWAYS-ON 429 CAPTURE (the whole point of headers.log) ─────────────── + # On ANY 429, write the FULL raw header block — regardless of retry-after, + # unified-*, auth mode, or the OAuth 200-call cap. The 429 has its own budget + # (STATUS_429_HEADER_LOG_LIMIT) so a session that exhausted the OAuth cap on + # successful calls still logs its next rate-limit. A bare 429 with no + # diagnostic headers STILL logs — that absence is itself the finding. + if [ "$_is_429" = "1" ] \ + && [ "$STATUS_429_headers_logged" -lt "$STATUS_429_HEADER_LOG_LIMIT" ]; then + mkdir -p "$log_dir" 2>/dev/null || true + if [ -d "$log_dir" ]; then + # Best-effort rail/account hints for the header line + live pointer. + local _rail_hint="${STATUS_rl_tripped_rail:-unknown}" + local _ra_hint="${_ra:-none}" + local _org_hint; _org_hint=$(strip_cr "$(_header_value "$f" "anthropic-organization-id")") + local _reqid_hint; _reqid_hint=$(strip_cr "$(_header_value "$f" "request-id")") + # Rail label: which AUTH RAIL authenticated this request. unified-* headers + # ⇒ OAuth/subscription (Max) rail; their absence on an API-key 429 ⇒ the + # API-key rail. We surface BOTH the limit-rail and the auth-mode. + local _auth_rail="$LARRY_AUTH_MODE" + [ -z "$_auth_rail" ] && _auth_rail="unknown" + { + printf '════ %s *** HTTP 429 RATE LIMIT *** ════\n' \ + "$(date -Iseconds 2>/dev/null || date)" + printf ' auth-mode=%s limit-rail=%s retry-after=%s org=%s request-id=%s model=%s 429#%d\n' \ + "$_auth_rail" "$_rail_hint" "$_ra_hint" \ + "${_org_hint:-—}" "${_reqid_hint:-—}" "$LARRY_MODEL" \ + "$((STATUS_429_headers_logged + 1))" + # Dump EVERYTHING useful for diagnosis. Order: status line, then the + # full anthropic-* family (NOT just ratelimit), retry-after, request-id, + # and every x-* header (account/proxy/edge hints live here). De-dup the + # request-id line if it also matched x-* (harmless, kept simple). + grep -iE '^HTTP/' "$f" 2>/dev/null || true + grep -iE '^anthropic-' "$f" 2>/dev/null || true + grep -iE '^retry-after:' "$f" 2>/dev/null || true + grep -iE '^request-id:' "$f" 2>/dev/null || true + grep -iE '^x-' "$f" 2>/dev/null || true + printf '\n' + } >> "$log_dir/headers.log" 2>/dev/null || true + STATUS_429_headers_logged=$((STATUS_429_headers_logged + 1)) + + # ── Surface it LIVE so Bryan knows the log now exists (no hunting) ───── + printf '%sphi/rl>%s 429 headers logged to ~/.larry/log/headers.log (rail=%s, retry-after=%s) — paste for diagnosis\n' \ + "$C_YELLOW" "$C_RESET" "$_rail_hint" "$_ra_hint" >&2 + + if [ "$STATUS_429_headers_logged" -eq "$STATUS_429_HEADER_LOG_LIMIT" ]; then + printf '%s[v0.8.8 429-log] reached %d 429 captures this session; further 429 capture disabled. See %s%s\n' \ + "$C_DIM" "$STATUS_429_HEADER_LOG_LIMIT" "$log_dir/headers.log" "$C_RESET" >&2 + fi + fi + fi + + # ── Safety net: log raw OAuth headers for the first 50 SUCCESSFUL calls ─── + # Unchanged purpose from v0.6.9: sample the OAuth unified-* family on normal + # (non-429) traffic to verify Pax's spec against Bryan's real account. This + # arm is now strictly for the 200-path; 429s are handled above with their own + # budget and never consume this cap. We still skip it on a 429 we already + # logged (no double-write of the same dump). + if [ "$_is_429" != "1" ] \ + && { [ "$LARRY_AUTH_MODE" = "oauth" ] \ + && [ -n "$STATUS_oauth_status$STATUS_oauth_5h_utilization$STATUS_oauth_7d_utilization" ]; } \ && [ "$STATUS_oauth_headers_logged" -lt "$STATUS_OAUTH_HEADER_LOG_LIMIT" ]; then - local log_dir="$LARRY_HOME/log" mkdir -p "$log_dir" 2>/dev/null || true if [ -d "$log_dir" ]; then { - local _tag="" - [ -n "$_ra" ] && _tag=" *** 429 retry-after=${_ra}s rail=${STATUS_rl_tripped_rail:-unknown} ***" - printf '── %s call #%d model=%s%s ──\n' \ + printf '── %s call #%d model=%s ──\n' \ "$(date -Iseconds 2>/dev/null || date)" \ "$((STATUS_oauth_headers_logged + 1))" \ - "$LARRY_MODEL" "$_tag" + "$LARRY_MODEL" grep -i '^anthropic-' "$f" 2>/dev/null || true grep -i '^retry-after:' "$f" 2>/dev/null || true grep -iE '^(http/|HTTP/)' "$f" 2>/dev/null || true