From c34b8900fb77a429c33fa108522787920ab4df0b Mon Sep 17 00:00:00 2001 From: Bryan Johnson Date: Wed, 27 May 2026 16:02:13 -0700 Subject: [PATCH] v0.6.9: persistent status line above the prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a dim status line printed immediately above each `you[model]>` prompt every turn, surfacing context-window usage + rate-limit visibility. Two render modes auto-selected from $LARRY_AUTH_MODE: OAuth: ─ ctx 12% (24K/1.0M) ─ 5h 1.8% reset 19:45 ─ 7d 73.7% reset Mon Jun 2 ─ API key: ─ ctx 12% (24K/200K) ─ $0.213 session ─ 14 turns ─ Implementation areas: - call_api / call_api_stream now capture response headers via curl -D into tempfiles. Streaming path drains its header file in the parent shell after the SSE body completes (subshell-update problem avoided). - New parser _parse_response_headers handles BOTH header families per Pax's research (Deliverables/2026-05-27-anthropic-rate-limit-headers-research.md): * API-key: RFC 3339 datetimes → converted to epoch * OAuth: Unix epoch integer-as-string → used as-is Both 5h and 7d buckets are displayed simultaneously; representative-claim is honored for enforcement but not for what to render (anti-pattern noted by Pax — Claude Code itself once shipped buggy logic that picked one). - Static model-context-window lookup (Pax §4): opus-4-7 / sonnet-4-6 = 1M, haiku-4-5 and legacy 4-5/4-1 families = 200K, unknown defaults to 200K. - Safety net: first 50 OAuth response header blocks are logged to $LARRY_HOME/log/headers.log so the empirical schema can be diff'd against Pax's spec on Bryan's actual account. Auto-disables after limit reached. - New /status slash command force-renders the line on demand. New env knob LARRY_NO_STATUS=1 disables the status line entirely. - parse_stream_to_response synthetic JSON now also carries cache_read_input_tokens + cache_creation_input_tokens so the parent shell can compute ctx_used = input + cache_creation + cache_read per Pax §5. Fallback rules followed: - First turn of a session: status line is NOT rendered (no zero-lies). - Missing reset values: display "reset —" not a fabricated time. - Reset already passed: display "— reset" (data stale). - Narrow terminal (< 100 cols): drop the reset times, keep the percentages. Verification (synthetic fixtures; no live OAuth session in this environment): - 25 parser/renderer assertions pass (test-harness covering all 8 spec scenarios + model lookup + token humanization). - SSE parser still produces a valid synthetic response JSON, now including cache fields (7 assertions pass). - TOOLS_JSON heredoc still parses cleanly via jq. - bash -n on larry.sh: clean. - Pax's OAuth headers were NOT empirically observed against a live account in this environment — only validated against the documented schema via fixtures derived verbatim from Pax's research. The header-log safety net is in place to verify on Bryan's account on first use. Co-Authored-By: Claude Opus 4.7 --- VERSION | 2 +- larry.sh | 485 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 481 insertions(+), 6 deletions(-) diff --git a/VERSION b/VERSION index fae59ca..1a5ac0d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.8 +0.6.9 diff --git a/larry.sh b/larry.sh index 98fdc43..890a829 100755 --- a/larry.sh +++ b/larry.sh @@ -32,9 +32,13 @@ # /clear clear terminal screen # /copy copy last assistant response to clipboard # /cost show running token + dollar cost for the session +# /status force-render the persistent status line (ctx + rate-limit) # /show-last-tool print last tool call + result (debug) # /help this help # +# Env knobs (v0.6.9): +# LARRY_NO_STATUS=1 disable the status line above the prompt +# # Inline file syntax: @ in any prompt inlines the file's contents # (TAB to autocomplete). See /help for details. set -u @@ -43,7 +47,7 @@ set -o pipefail # ───────────────────────────────────────────────────────────────────────────── # Config # ───────────────────────────────────────────────────────────────────────────── -LARRY_VERSION="0.6.8" +LARRY_VERSION="0.6.9" LARRY_HOME="${LARRY_HOME:-$HOME/.larry}" LARRY_BASE_URL="${LARRY_BASE_URL:-https://raw.githubusercontent.com/bojj27/cloverleaf-larry/main}" LARRY_UPDATE_URL="${LARRY_UPDATE_URL:-${LARRY_BASE_URL}/larry.sh}" @@ -1086,6 +1090,381 @@ _LARRY_CACHE_READ_TOKENS=0 _LARRY_CACHE_WRITE_TOKENS=0 _LARRY_TURNS=0 +# ───────────────────────────────────────────────────────────────────────────── +# v0.6.9: Persistent status line — ctx + rate-limit visibility +# ───────────────────────────────────────────────────────────────────────────── +# Per Pax's research (Deliverables/2026-05-27-anthropic-rate-limit-headers- +# research.md) the API exposes two distinct families of rate-limit headers: +# +# API-key mode: anthropic-ratelimit-{requests,tokens,input-tokens, +# output-tokens}-{limit,remaining,reset} +# Reset is an RFC 3339 datetime string. +# +# OAuth mode: anthropic-ratelimit-unified-{5h,7d}-{status,utilization, +# reset} + -representative-claim + a top-level -reset. +# Reset is a Unix epoch integer-as-string. +# +# Two DIFFERENT parsers needed (easy footgun called out by Pax). +# +# STATUS_* globals are updated by _parse_response_headers after every API +# call, then read by render_status_line which is invoked before each prompt. +# Empty string = "unknown" — render as "—", never as "0%". +STATUS_ctx_used_tokens="" # input + cache_creation + cache_read for LAST turn +STATUS_ctx_window="" # from MODEL_CONTEXT_WINDOWS lookup +STATUS_oauth_5h_utilization="" # 0.0–1.0 (decimal string) +STATUS_oauth_5h_reset_epoch="" # unix seconds +STATUS_oauth_7d_utilization="" +STATUS_oauth_7d_reset_epoch="" +STATUS_oauth_representative="" # five_hour | seven_day | seven_day_opus | seven_day_sonnet +STATUS_oauth_status="" # allowed | warning | rate_limited +STATUS_api_reset_epoch="" # earliest of the *-reset RFC3339 timestamps, as epoch +# session_cost is reused from _LARRY_INPUT/OUTPUT/CACHE_*_TOKENS via +# _render_session_cost_dollars (no new state needed). +# Session turns counter == _LARRY_TURNS (no new state needed). + +# Header-capture safety net: log the first 50 OAuth response header blocks +# to $LARRY_HOME/log/headers.log so we can verify Pax's spec against Bryan's +# actual account. Auto-disables after 50 calls. +STATUS_oauth_headers_logged=0 +STATUS_OAUTH_HEADER_LOG_LIMIT=50 + +# Model context-window lookup table (tokens). Source: Pax §4. +# Default for unknown models: 200000 (safe lower bound for legacy releases). +_model_context_window() { + local m="$1" + case "$m" in + *opus-4-7*|*opus-4-6*) echo 1000000 ;; + *sonnet-4-6*) echo 1000000 ;; + *haiku-4-5*) echo 200000 ;; + *sonnet-4-5*) echo 200000 ;; + *opus-4-5*|*opus-4-1*) echo 200000 ;; + *) echo 200000 ;; + esac +} + +# _header_value HEADER_FILE NAME — case-insensitive header lookup. +# curl -D writes "Header-Name: value\r\n" lines. We strip the trailing CR +# and any leading/trailing whitespace from the value. +_header_value() { + local f="$1" name="$2" + # grep -i for case-insensitive name match; cut at first ':'; trim. + local line val + line=$(grep -i -m1 "^${name}:" "$f" 2>/dev/null) || return 0 + val="${line#*:}" + # Strip CR (curl on Windows / SSE responses). + val="${val%$'\r'}" + # Trim leading whitespace. + val="${val# }" + val="${val##[[:space:]]*}" # tolerate multiple leading spaces + # Re-strip with parameter expansion (the bracket form is fussy). + val="${val#"${val%%[![:space:]]*}"}" + val="${val%"${val##*[![:space:]]}"}" + printf '%s' "$val" +} + +# _rfc3339_to_epoch STR — convert RFC 3339 datetime → Unix epoch seconds. +# Returns empty string on parse failure. macOS `date -j -f` and GNU `date -d` +# behave differently; we try GNU first, fall back to BSD. +_rfc3339_to_epoch() { + local s="$1" + [ -z "$s" ] && return 0 + local out + # GNU date (Linux, Cygwin). + out=$(date -d "$s" +%s 2>/dev/null) && [ -n "$out" ] && { printf '%s' "$out"; return 0; } + # BSD date (macOS). Try ISO 8601 with timezone, then without. + out=$(date -j -f "%Y-%m-%dT%H:%M:%SZ" "$s" +%s 2>/dev/null) \ + && [ -n "$out" ] && { printf '%s' "$out"; return 0; } + out=$(date -j -f "%Y-%m-%dT%H:%M:%S%z" "${s/Z/+0000}" +%s 2>/dev/null) \ + && [ -n "$out" ] && { printf '%s' "$out"; return 0; } + # Give up silently — caller renders "—". + return 0 +} + +# _epoch_to_hhmm EPOCH — format epoch as HH:MM in local time. +_epoch_to_hhmm() { + local e="$1" + [ -z "$e" ] && return 0 + date -d "@$e" +%H:%M 2>/dev/null || date -r "$e" +%H:%M 2>/dev/null || true +} + +# _epoch_to_ddd_mmm_d EPOCH — format epoch as "Mon Jun 2". +_epoch_to_ddd_mmm_d() { + local e="$1" + [ -z "$e" ] && return 0 + date -d "@$e" "+%a %b %-d" 2>/dev/null || date -r "$e" "+%a %b %-d" 2>/dev/null || true +} + +# _humanize_tokens N — render an integer as 24K / 1.2M. +_humanize_tokens() { + local n="$1" + [ -z "$n" ] && { printf '—'; return; } + if [ "$n" -ge 1000000 ]; then + awk -v n="$n" 'BEGIN{printf "%.1fM", n/1000000}' + elif [ "$n" -ge 1000 ]; then + awk -v n="$n" 'BEGIN{printf "%dK", n/1000}' + else + printf '%s' "$n" + fi +} + +# _parse_response_headers HEADER_FILE — extract rate-limit fields from a +# curl -D dump and update STATUS_* globals. Idempotent; safe to call on +# empty / partial files. +# +# Per Pax §2 / §3: +# API-key resets: RFC 3339 datetime strings → convert to epoch. +# OAuth resets: Unix epoch integer-as-string → use as-is. +_parse_response_headers() { + local f="$1" + [ -s "$f" ] || return 0 + + # ── OAuth unified-* family ─────────────────────────────────────────────── + local v + v=$(_header_value "$f" "anthropic-ratelimit-unified-status") + [ -n "$v" ] && STATUS_oauth_status="$v" + v=$(_header_value "$f" "anthropic-ratelimit-unified-5h-utilization") + [ -n "$v" ] && STATUS_oauth_5h_utilization="$v" + v=$(_header_value "$f" "anthropic-ratelimit-unified-5h-reset") + [ -n "$v" ] && STATUS_oauth_5h_reset_epoch="$v" + v=$(_header_value "$f" "anthropic-ratelimit-unified-7d-utilization") + [ -n "$v" ] && STATUS_oauth_7d_utilization="$v" + v=$(_header_value "$f" "anthropic-ratelimit-unified-7d-reset") + [ -n "$v" ] && STATUS_oauth_7d_reset_epoch="$v" + v=$(_header_value "$f" "anthropic-ratelimit-unified-representative-claim") + [ -n "$v" ] && STATUS_oauth_representative="$v" + + # ── API-key family (find earliest reset) ───────────────────────────────── + # The four buckets (requests/tokens/input-tokens/output-tokens) each have + # their own reset. We display the most-imminent one. + local earliest="" + local hname epoch rfc + for hname in \ + anthropic-ratelimit-requests-reset \ + anthropic-ratelimit-tokens-reset \ + anthropic-ratelimit-input-tokens-reset \ + anthropic-ratelimit-output-tokens-reset; do + rfc=$(_header_value "$f" "$hname") + [ -z "$rfc" ] && continue + epoch=$(_rfc3339_to_epoch "$rfc") + [ -z "$epoch" ] && continue + if [ -z "$earliest" ] || [ "$epoch" -lt "$earliest" ]; then + earliest="$epoch" + fi + done + [ -n "$earliest" ] && STATUS_api_reset_epoch="$earliest" + + # ── Safety net: log raw OAuth headers for first 50 calls ───────────────── + # Only relevant in OAuth mode and only if we saw at least one unified-* + # header (no point logging API-key responses). + if [ "$LARRY_AUTH_MODE" = "oauth" ] \ + && [ -n "$STATUS_oauth_status$STATUS_oauth_5h_utilization$STATUS_oauth_7d_utilization" ] \ + && [ "$STATUS_oauth_headers_logged" -lt "$STATUS_OAUTH_HEADER_LOG_LIMIT" ]; then + local log_dir="$LARRY_HOME/log" + mkdir -p "$log_dir" 2>/dev/null || true + if [ -d "$log_dir" ]; then + { + printf '── %s call #%d model=%s ──\n' \ + "$(date -Iseconds 2>/dev/null || date)" \ + "$((STATUS_oauth_headers_logged + 1))" \ + "$LARRY_MODEL" + grep -i '^anthropic-' "$f" 2>/dev/null || true + grep -i '^retry-after:' "$f" 2>/dev/null || true + printf '\n' + } >> "$log_dir/headers.log" 2>/dev/null || true + STATUS_oauth_headers_logged=$((STATUS_oauth_headers_logged + 1)) + if [ "$STATUS_oauth_headers_logged" -eq "$STATUS_OAUTH_HEADER_LOG_LIMIT" ]; then + printf '%s[v0.6.9 header-log] reached %d OAuth calls; raw header capture disabled. See %s%s\n' \ + "$C_DIM" "$STATUS_OAUTH_HEADER_LOG_LIMIT" "$log_dir/headers.log" "$C_RESET" >&2 + fi + fi + fi +} + +# render_status_line — print the dim status line above the prompt. +# Honors LARRY_NO_STATUS=1. Prints nothing if we have no data yet (first +# turn of a session). Always ends with a trailing newline so the prompt +# lands cleanly below. +render_status_line() { + [ "${LARRY_NO_STATUS:-0}" = "1" ] && return 0 + + # Pick template by auth mode. + case "$LARRY_AUTH_MODE" in + oauth) + # Suppress if we have NO context data AND no OAuth data — first turn. + if [ -z "$STATUS_ctx_used_tokens" ] \ + && [ -z "$STATUS_oauth_5h_utilization" ] \ + && [ -z "$STATUS_oauth_7d_utilization" ]; then + return 0 + fi + _render_status_line_oauth + ;; + apikey) + # Suppress only when context AND cost both absent (first turn). + if [ -z "$STATUS_ctx_used_tokens" ] && [ "$_LARRY_TURNS" -eq 0 ]; then + return 0 + fi + _render_status_line_apikey + ;; + *) + return 0 ;; + esac +} + +# _ctx_segment — render "ctx 12% (24K/200K)" or "ctx — (—/—)". +_ctx_segment() { + local used="$STATUS_ctx_used_tokens" + local win="$STATUS_ctx_window" + # Lazy-init the window from the current model if not set. + if [ -z "$win" ]; then + win=$(_model_context_window "$LARRY_MODEL") + STATUS_ctx_window="$win" + fi + if [ -z "$used" ]; then + printf 'ctx — (—/%s)' "$(_humanize_tokens "$win")" + return + fi + local pct + pct=$(awk -v u="$used" -v w="$win" 'BEGIN{ if(w==0){print "—"} else {printf "%d", (u*100/w)} }') + local color="$C_DIM" + if [ "$pct" != "—" ]; then + if [ "$pct" -ge 90 ]; then color="$C_RED" + elif [ "$pct" -ge 75 ]; then color="$C_YELLOW" + fi + fi + printf '%sctx %s%% (%s/%s)%s%s' "$color" "$pct" \ + "$(_humanize_tokens "$used")" "$(_humanize_tokens "$win")" \ + "$C_RESET" "$C_DIM" +} + +# _utilization_pct DECIMAL — turn "0.7370692..." into "73" (integer percent). +_utilization_pct() { + local d="$1" + [ -z "$d" ] && { printf '—'; return; } + awk -v d="$d" 'BEGIN{printf "%d", d*100}' +} + +# _utilization_pct_one DECIMAL — same but with one decimal place ("73.7"). +_utilization_pct_one() { + local d="$1" + [ -z "$d" ] && { printf '—'; return; } + awk -v d="$d" 'BEGIN{printf "%.1f", d*100}' +} + +_render_status_line_oauth() { + local ctx; ctx=$(_ctx_segment) + local now; now=$(date +%s) + + # 5h segment + local five_pct five_reset five_color="$C_DIM" + if [ -n "$STATUS_oauth_5h_utilization" ]; then + five_pct=$(_utilization_pct_one "$STATUS_oauth_5h_utilization") + # Color by utilization or status. + local raw_pct; raw_pct=$(_utilization_pct "$STATUS_oauth_5h_utilization") + if [ "$raw_pct" -ge 90 ]; then five_color="$C_RED" + elif [ "$raw_pct" -ge 75 ]; then five_color="$C_YELLOW" + fi + else + five_pct="—" + fi + if [ -n "$STATUS_oauth_5h_reset_epoch" ]; then + if [ "$STATUS_oauth_5h_reset_epoch" -le "$now" ]; then + five_reset="— reset" + else + five_reset="reset $(_epoch_to_hhmm "$STATUS_oauth_5h_reset_epoch")" + fi + else + five_reset="reset —" + fi + + # 7d segment + local seven_pct seven_reset seven_color="$C_DIM" + if [ -n "$STATUS_oauth_7d_utilization" ]; then + seven_pct=$(_utilization_pct_one "$STATUS_oauth_7d_utilization") + local raw_pct7; raw_pct7=$(_utilization_pct "$STATUS_oauth_7d_utilization") + if [ "$raw_pct7" -ge 90 ]; then seven_color="$C_RED" + elif [ "$raw_pct7" -ge 75 ]; then seven_color="$C_YELLOW" + fi + else + seven_pct="—" + fi + if [ -n "$STATUS_oauth_7d_reset_epoch" ]; then + if [ "$STATUS_oauth_7d_reset_epoch" -le "$now" ]; then + seven_reset="— reset" + else + seven_reset="reset $(_epoch_to_ddd_mmm_d "$STATUS_oauth_7d_reset_epoch")" + fi + else + seven_reset="reset —" + fi + + # Status-level color override (warning → yellow, rate_limited → red wins). + local overall_pre="" + case "$STATUS_oauth_status" in + rate_limited) overall_pre="$C_RED" ;; + warning) overall_pre="$C_YELLOW" ;; + esac + + # Build the line. Width-aware: if cols < 100, drop the reset times. + local cols + cols=$(tput cols 2>/dev/null || echo 100) + local line + if [ "$cols" -ge 100 ]; then + line=$(printf '%s─ %s ─ %s5h %s%% %s%s ─ %s7d %s%% %s%s ─%s' \ + "$C_DIM" "$ctx" \ + "$five_color" "$five_pct" "$five_reset" "$C_DIM" \ + "$seven_color" "$seven_pct" "$seven_reset" "$C_DIM" \ + "$C_RESET") + else + line=$(printf '%s─ %s ─ %s5h %s%%%s ─ %s7d %s%%%s ─%s' \ + "$C_DIM" "$ctx" \ + "$five_color" "$five_pct" "$C_DIM" \ + "$seven_color" "$seven_pct" "$C_DIM" \ + "$C_RESET") + fi + if [ -n "$overall_pre" ]; then + printf '%s%s\n' "$overall_pre" "$line" + else + printf '%s\n' "$line" + fi +} + +_render_status_line_apikey() { + local ctx; ctx=$(_ctx_segment) + # Session $ from current cost trackers. + local dollars; dollars=$(_render_session_cost_dollars) + printf '%s─ %s ─ $%s session ─ %d turns ─%s\n' \ + "$C_DIM" "$ctx" "$dollars" "$_LARRY_TURNS" "$C_RESET" +} + +# _render_session_cost_dollars — reuse the existing pricing logic. +# Returns the running session $ amount to 3 decimals. +_render_session_cost_dollars() { + local prices; prices=$(_price_for_model "$LARRY_MODEL") + local in_price out_price + in_price="${prices% *}" + out_price="${prices#* }" + awk -v ti="$_LARRY_INPUT_TOKENS" -v to="$_LARRY_OUTPUT_TOKENS" \ + -v tcr="$_LARRY_CACHE_READ_TOKENS" -v tcw="$_LARRY_CACHE_WRITE_TOKENS" \ + -v pi="$in_price" -v po="$out_price" \ + 'BEGIN{ + c = ti*pi/1000000 + to*po/1000000 \ + + tcr*pi*0.1/1000000 + tcw*pi*1.25/1000000; + printf "%.3f", c + }' +} + +# _record_ctx_used IN_TOK CACHE_READ CACHE_WRITE — update STATUS_ctx_used_tokens +# with the LATEST turn's total context size. Per Pax §5: ctx_used = +# input_tokens + cache_creation_input_tokens + cache_read_input_tokens. +# (NOT the running cumulative sum — context resets per turn from Anthropic's +# perspective.) +_record_ctx_used() { + local in_t="${1:-0}" cr="${2:-0}" cw="${3:-0}" + STATUS_ctx_used_tokens=$(( in_t + cr + cw )) + # Lazy-init the window so /status renders correctly even without an API call. + [ -z "$STATUS_ctx_window" ] && STATUS_ctx_window=$(_model_context_window "$LARRY_MODEL") +} + print_cost_summary() { local prices; prices=$(_price_for_model "$LARRY_MODEL") local in_price out_price @@ -1479,12 +1858,28 @@ call_api() { else auth_args=(-H "x-api-key: $ANTHROPIC_API_KEY") fi - curl -sS --max-time 180 \ + # v0.6.9: dump response headers to a tempfile via -D so the status-line + # tracker can parse anthropic-ratelimit-* fields after the call returns. + # The body still goes to stdout. We deliberately don't use -i (which would + # interleave headers into stdout) because that would break the existing + # callers that pipe the body straight into jq. + local _hdrs_file; _hdrs_file=$(mktemp 2>/dev/null || echo "") + local _curl_args=( -sS --max-time 180 ) + [ -n "$_hdrs_file" ] && _curl_args+=( -D "$_hdrs_file" ) + curl "${_curl_args[@]}" \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" + local _curl_rc=$? + # Parse headers regardless of whether the body parse will succeed; headers + # carry rate-limit info even on 429s. + if [ -n "$_hdrs_file" ] && [ -s "$_hdrs_file" ]; then + _parse_response_headers "$_hdrs_file" 2>/dev/null || true + rm -f "$_hdrs_file" + fi + return $_curl_rc } # call_api_stream — same as call_api but for SSE responses. Writes the raw @@ -1512,7 +1907,22 @@ call_api_stream() { else auth_args=(-H "x-api-key: $ANTHROPIC_API_KEY") fi - curl -sN --max-time 300 \ + # v0.6.9: dump response headers via -D for status-line tracking. -D writes + # the header block immediately when the server emits it, BEFORE the SSE body + # starts flowing — so the body stream on stdout is unaffected. We parse the + # headers file at the START of the next agent_turn (see _maybe_drain_pending_ + # headers). Why not after curl returns? Because this function is the LEFT + # side of a pipeline and a `return` here happens in a subshell; the parent + # process can't see updates to status vars unless we drain the file later. + # + # We stash the file path on disk so the next call_api/call_api_stream (or + # the REPL renderer) can pick it up. Path is deterministic so the picker + # doesn't need to share a variable across the subshell boundary. + local _hdrs_file="$LARRY_HOME/.last-stream-headers" + : > "$_hdrs_file" 2>/dev/null || _hdrs_file="" + local _curl_args=( -sN --max-time 300 ) + [ -n "$_hdrs_file" ] && _curl_args+=( -D "$_hdrs_file" ) + curl "${_curl_args[@]}" \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ @@ -1521,6 +1931,18 @@ call_api_stream() { "$LARRY_API_URL" } +# _drain_pending_stream_headers — called by the parent shell after a streaming +# turn completes. The streaming curl runs in a subshell (LHS of a pipe), so +# its in-memory updates to STATUS_* vars don't survive. We persist the header +# block on disk instead and parse it here, in the parent. +_drain_pending_stream_headers() { + local f="$LARRY_HOME/.last-stream-headers" + if [ -s "$f" ]; then + _parse_response_headers "$f" 2>/dev/null || true + rm -f "$f" + fi +} + build_system_prompt() { local sys="" # Load larry.md first (sets identity), then everything else alphabetically. @@ -1713,6 +2135,10 @@ parse_stream_to_response() { _LARRY_OUTPUT_TOKENS=$(( _LARRY_OUTPUT_TOKENS + out_tokens )) _LARRY_CACHE_READ_TOKENS=$(( _LARRY_CACHE_READ_TOKENS + cache_read )) _LARRY_CACHE_WRITE_TOKENS=$(( _LARRY_CACHE_WRITE_TOKENS + cache_write )) + # v0.6.9: record per-turn context size for the status line. + # NB: this function runs in the parse_stream_to_response subshell, so its + # update to STATUS_ctx_used_tokens won't propagate. The parent shell + # re-derives this from the synthetic response file in agent_turn below. # Assemble the synthetic response file. We rebuild content[] in index order. local content_json="[]" @@ -1749,12 +2175,17 @@ parse_stream_to_response() { [ -n "$accumulated_text" ] && _LARRY_LAST_ASSISTANT_TEXT="$accumulated_text" - # Emit synthetic response JSON. + # Emit synthetic response JSON. v0.6.9: include cache_* so the parent shell + # (which doesn't see this subshell's STATUS_* updates) can recompute the + # per-turn ctx total = input + cache_creation + cache_read. jq -n \ --argjson content "$content_json" \ --arg stop "$stop_reason" \ --argjson in_t "$in_tokens" --argjson out_t "$out_tokens" \ - '{content:$content, stop_reason:$stop, usage:{input_tokens:$in_t,output_tokens:$out_t}}' \ + --argjson cr "$cache_read" --argjson cw "$cache_write" \ + '{content:$content, stop_reason:$stop, + usage:{input_tokens:$in_t, output_tokens:$out_t, + cache_read_input_tokens:$cr, cache_creation_input_tokens:$cw}}' \ > "$out_file" return 0 } @@ -1804,6 +2235,9 @@ agent_turn() { jq 'del(.stream)' < "$payload_file" > "$payload_file.ns" && mv "$payload_file.ns" "$payload_file" resp=$(call_api "$payload_file") fi + # v0.6.9: drain rate-limit headers from the streaming curl (subshell + # could not update STATUS_* vars directly). + _drain_pending_stream_headers else resp=$(call_api "$payload_file") fi @@ -1845,6 +2279,16 @@ agent_turn() { _LARRY_CACHE_WRITE_TOKENS=$(( _LARRY_CACHE_WRITE_TOKENS + nu_cw )) fi + # v0.6.9: update the per-turn context-window tracker from THIS turn's + # usage block. Runs in both streaming and non-streaming paths (the + # synthetic stream JSON includes cache_* per v0.6.9 patch). The status + # line reads this on the next prompt render. + local _ctx_in _ctx_cr _ctx_cw + _ctx_in=$(printf '%s' "$resp" | jq -r '.usage.input_tokens // 0' 2>/dev/null) + _ctx_cr=$(printf '%s' "$resp" | jq -r '.usage.cache_read_input_tokens // 0' 2>/dev/null) + _ctx_cw=$(printf '%s' "$resp" | jq -r '.usage.cache_creation_input_tokens // 0' 2>/dev/null) + _record_ctx_used "$_ctx_in" "$_ctx_cr" "$_ctx_cw" + # Log assistant text to session log { log_section "assistant" @@ -1915,6 +2359,7 @@ Slash commands: /clear clear the terminal screen (distinct from /reset) /copy copy last assistant response to clipboard /cost show running token + dollar cost for the session + /status force-render the persistent status line (ctx + rate-limit) /show-last-tool print full last tool call + result (debug aid) /model switch model (e.g. /model claude-opus-4-7) /cd change working directory @@ -1983,6 +2428,13 @@ Multi-line input: are not matched. Binary files and files >250 KB are skipped/truncated with a warning. TAB after @ autocompletes against files in cwd (fzf if installed). +Status line (v0.6.9): + A dim 1-line summary prints above each you[...] > prompt: + OAuth: ─ ctx 12% (24K/200K) ─ 5h 1.8% reset 19:45 ─ 7d 73.7% reset Mon Jun 2 ─ + API key: ─ ctx 12% (24K/200K) ─ $0.213 session ─ 14 turns ─ + Disable entirely with LARRY_NO_STATUS=1. Force re-display with /status. + Suppressed automatically on the first turn (no data yet). + TAB completion (v0.6.6/v0.6.7): Type '/' followed by any prefix and press TAB. /h → /help @@ -2068,6 +2520,7 @@ _LARRY_SLASH_CMDS=( /clear /copy /cost + /status /show-last-tool /nc-diff-env /nc-regression-env @@ -2114,6 +2567,7 @@ _LARRY_SLASH_CMDS_DESC=( [/clear]="clear the terminal screen" [/copy]="copy last assistant response to clipboard" [/cost]="show running token + dollar cost for the session" + [/status]="force-render the persistent status line (ctx + rate-limit)" [/show-last-tool]="print full last tool call + result for debugging" [/nc-diff-env]=" [pattern] diff NetConfigs across two SSH-aliased envs" [/nc-regression-env]=" [scope] 6-phase regression across SSH-aliased envs" @@ -2469,6 +2923,10 @@ main_loop() { while true; do local _short; _short=$(model_short_name) + # v0.6.9: persistent status line above the prompt. + # Only on the FIRST line of input — heredoc continuation reads in + # read_user_input do not invoke this loop iteration. + render_status_line printf '%syou[%s]>%s ' "$C_GREEN" "$_short" "$C_RESET" if ! read_user_input; then echo ""; break @@ -2495,6 +2953,23 @@ main_loop() { fi continue ;; /cost) print_cost_summary; continue ;; + /status) # v0.6.9: force-render the persistent status line on demand, + # e.g. when it has scrolled off-screen mid-conversation. + if [ "${LARRY_NO_STATUS:-0}" = "1" ]; then + larry_say "status line disabled (LARRY_NO_STATUS=1)" + else + # Temporarily override the "first turn suppression" by + # making sure ctx_used has a value even if unknown. + [ -z "$STATUS_ctx_window" ] && STATUS_ctx_window=$(_model_context_window "$LARRY_MODEL") + if [ -z "$STATUS_ctx_used_tokens" ] \ + && [ -z "$STATUS_oauth_5h_utilization" ] \ + && [ "$_LARRY_TURNS" -eq 0 ]; then + larry_say "no data yet — make a turn first" + else + render_status_line + fi + fi + continue ;; /show-last-tool) if [ -z "$_LARRY_LAST_TOOL_NAME" ]; then err "no tool calls yet this session"