#!/usr/bin/env bash # larry-anywhere — portable Larry for remote shells (Linux + MobaXterm) # Single file. No installs. curl + jq + bash. # # Usage: # larry.sh # interactive in $PWD # larry.sh /path/to/cloverleaf/root # interactive, cd into that path first # larry.sh tools list # list the manual Cloverleaf/HL7 tools # larry.sh tools [args] # run a tool by hand (no API/LLM needed) # larry.sh --no-update # skip self-update # larry.sh --version # print version and exit # larry.sh --help # print help and exit # # Manual-tools mode (v0.8.14): `larry tools …` runs the lib/ toolkit standalone, # with NO REPL, NO API and NO self-update — the operator's lifeline on a # locked-down box where the model API is blocked. See `larry tools help`. # # Env vars: # LARRY_HOME where to cache config/sessions (default: ~/.larry) # LARRY_BASE_URL root URL of the bundle on the server (default # as of v0.7.5: https://git.bjnoela.com/bryan/cloverleaf-larry/raw/branch/main). # Self-update pulls VERSION + MANIFEST from here and # refreshes every file listed in MANIFEST. # v0.7.4: single-source auto-update. The GitHub # fallback added in v0.7.2 was dropped after the GitHub # mirror was made private (anonymous raw fetches now # 401/403, so the fallback was functionally broken). # Users can pin/override via the /origin slash command # (see $LARRY_HOME/.origin). # LARRY_UPDATE_URL (legacy override) full URL of latest larry.sh # LARRY_AGENTS_URL (legacy override) base URL for agents/ # LARRY_MODEL Claude model (default: claude-sonnet-4-6) # LARRY_MAX_TOKENS max output tokens per turn (default: 8192) # LARRY_NO_UPDATE set to 1 to disable self-update # LARRY_GITEA_TOKEN optional Gitea PAT (read scope) for authenticated # fetch against a PRIVATE repo (alias: GITEA_TOKEN). # When set, update/install fetches add an # "Authorization: token " header. Never logged. # v0.8.4: lets the updater work against a private repo # without flipping it public. If a fetch returns the # Gitea HTML sign-in page (HTTP 200), the updater now # FAILS LOUD instead of parsing HTML as file content. # ANTHROPIC_API_KEY overrides $LARRY_HOME/.api-key and $LARRY_HOME/.env if set # LARRY_AUTH_MODE auth rail. DEFAULT = apikey (sanctioned x-api-key rail). # Set to "oauth" to opt INTO subscription OAuth — OFF by # default because it requires impersonating the official # Claude Code client, which Anthropic blocks and which # flags your Max account. No silent OAuth fallback. # LARRY_API_KEY_FILE per-client key store (default $LARRY_HOME/.api-key, 0600). # Set via /set-api-key or larry-auth.sh --api-key. # # Slash commands during chat: # /quit /exit /q exit # /model switch model for this session # /cd change working directory # /reset clear conversation history (keeps log file) # /load paste a file's contents as your next user message # /sys print the active system prompt # /clear clear terminal screen # /copy copy last assistant response to clipboard # /cost show running token + dollar cost for the session # /status force-render the persistent status line (ctx + rate-limit) # /show-last-tool print last tool call + result (debug) # /help this help # # Env knobs (v0.6.9): # LARRY_NO_STATUS=1 disable the between-turn status line # Env knobs (v0.7.5): # LARRY_MOUSE=1 opt in to xterm mouse + bracketed-paste at startup # (default since v0.7.5 is OFF — see /mouse in /help) # LARRY_NO_MOUSE=1 legacy hard-disable; still honoured # # Inline file syntax: @ in any prompt inlines the file's contents # (TAB to autocomplete). See /help for details. set -u set -o pipefail # ───────────────────────────────────────────────────────────────────────────── # Config # ───────────────────────────────────────────────────────────────────────────── LARRY_VERSION="0.8.24" LARRY_HOME="${LARRY_HOME:-$HOME/.larry}" # ───────────────────────────────────────────────────────────────────────────── # Origin defaults (v0.7.4 — single-source). # --------------------------------------------------------------------------- # v0.7.2 introduced a Gitea-primary, GitHub-fallback model. v0.7.4 drops the # fallback: the GitHub mirror was made private, so anonymous raw-URL reads # now 401/403 — the fallback path was a silent failure waiting to happen. # Auto-update is now single-source (Gitea). If Gitea is unreachable the # client keeps running on the locally cached files and tries again next # launch. The Gitea push-mirror still fans changes out to GitHub for # read-only public consumption. # # The /origin slash-command family (see dispatcher below) lets the user pin # a custom URL. Persisted pins live at $LARRY_HOME/.origin and are applied # just below. Env-var overrides still win over the pinned file (mirroring # how LARRY_HOME / LARRY_MODEL etc. behave). # # Migration: stale .origin files containing the legacy keyword "github" are # treated as invalid (warn + revert to default). Translating to the GitHub # raw URL would just trip a 401/403 on the next fetch. # ───────────────────────────────────────────────────────────────────────────── LARRY_ORIGIN_DEFAULT_GITEA="https://git.bjnoela.com/bryan/cloverleaf-larry/raw/branch/main" # Resolve a pinned origin file BEFORE applying env-var defaults so the env # overrides still take precedence. $LARRY_HOME may not exist yet on first # launch — guard accordingly. _larry_pin_primary="" if [ -r "$LARRY_HOME/.origin" ]; then _larry_pin_raw=$(tr -d '[:space:]' < "$LARRY_HOME/.origin" 2>/dev/null) case "$_larry_pin_raw" in gitea) _larry_pin_primary="$LARRY_ORIGIN_DEFAULT_GITEA" ;; github) # v0.7.4: GitHub is no longer a valid origin (repo went private). # Warn and treat as no pin — caller will fall through to the Gitea # default below. We deliberately do NOT auto-rewrite the file; the # user should re-run /origin explicitly. printf 'warn: %s/.origin pins legacy "github" origin; the GitHub mirror is private as of v0.7.4. Reverting to default (gitea). Run /origin auto to clear the pin.\n' "$LARRY_HOME" >&2 ;; https://*) _larry_pin_primary="$_larry_pin_raw" ;; "") : ;; # empty file → treat as no pin *) printf 'warn: ignoring unrecognised value in %s/.origin: %s\n' "$LARRY_HOME" "$_larry_pin_raw" >&2 ;; esac unset _larry_pin_raw fi LARRY_BASE_URL="${LARRY_BASE_URL:-${_larry_pin_primary:-$LARRY_ORIGIN_DEFAULT_GITEA}}" unset _larry_pin_primary # Tracks which origin actually served the most recent self_update phase (set # by sync_from_manifest / phase-B fetch). Read by /origin and the status # line's optional origin badge. In v0.7.4 the only non-empty value is # "primary" (single-source); the slot is kept for status-line compatibility. _LARRY_LAST_ORIGIN="" _LARRY_LAST_ORIGIN_URL="" LARRY_UPDATE_URL="${LARRY_UPDATE_URL:-${LARRY_BASE_URL}/larry.sh}" LARRY_AGENTS_URL="${LARRY_AGENTS_URL:-${LARRY_BASE_URL}/agents}" LARRY_MODEL="${LARRY_MODEL:-claude-sonnet-4-6}" LARRY_MAX_TOKENS="${LARRY_MAX_TOKENS:-8192}" LARRY_API_URL="${LARRY_API_URL:-https://api.anthropic.com/v1/messages}" LARRY_NO_UPDATE="${LARRY_NO_UPDATE:-0}" # v0.8.14: last-call diagnostics for API-block detection (graceful degradation # into manual-tools mode on locked-down boxes). Set by call_api after each # request; read by _diagnose_api_block. NOTHING here circumvents a block — it # only RECOGNIZES one and guides the operator to run the tools manually. LARRY_LAST_CURL_RC="" LARRY_LAST_CURL_STDERR="" LARRY_LAST_RESP_HEADERS="" # ───────────────────────────────────────────────────────────────────────────── # v0.8.10: API key is the DEFAULT / primary auth rail (Bryan's decision, # 2026-05-27) # ───────────────────────────────────────────────────────────────────────────── # WHY API KEY IS DEFAULT (and OAuth-impersonation is OFF): # The OAuth (`sk-ant-oat01-`) rail bills a Claude Max/Pro subscription, but to # reach it from a non-Claude-Code client you must SPOOF the official client's # request fingerprint (the `anthropic-beta: claude-code-*` flag, the # `claude-cli/ (external, cli)` UA, the `x-app: cli` header, and a # "You are Claude Code, ..." system block). Anthropic is ACTIVELY enforcing # against exactly that impersonation: server-side fingerprint blocking live # since ~2026-01-09, formal ToS change 2026-02-19/20 (the "OpenClaw ban"). # Every spoofed OAuth request flags the user's account. To protect Bryan's Max # account we do NOT impersonate Claude Code and we do NOT fire OAuth by default. # # The API key (`sk-ant-api03-`) is the SANCTIONED programmatic rail: a plain # `x-api-key` request, billed pay-as-you-go, NOT subject to the impersonation # block and NOT edge-throttled as anomalous traffic. That is why it is the # durable default. See: # Deliverables/2026-05-27-claude-code-oauth-request-requirements-research.md # Deliverables/2026-05-27-cloverleaf-larry-api-key-default-rail.md # # OPT-IN OAUTH (discouraged): set LARRY_AUTH_MODE=oauth explicitly. larry prints # a one-time account-risk warning and fires the OAuth rail. There is NO silent # OAuth fallback — larry never auto-pokes the impersonation tripwire. LARRY_AUTH_MODE="${LARRY_AUTH_MODE:-}" # "", "apikey", or "oauth"; resolved below # Per-client API-key file (the secure provisioning store; see /set-api-key). # Mode 0600, owner-only, CR-stripped on read. Each client machine holds its OWN # key — Bryan mints a separate, independently-revocable key per client at # console.anthropic.com. The key never leaves the machine it is entered on. LARRY_API_KEY_FILE="${LARRY_API_KEY_FILE:-$LARRY_HOME/.api-key}" # ───────────────────────────────────────────────────────────────────────────── # Colors (only if stdout is a tty) # ───────────────────────────────────────────────────────────────────────────── if [ -t 1 ]; then C_RESET=$'\033[0m'; C_BOLD=$'\033[1m'; C_DIM=$'\033[2m' C_RED=$'\033[31m'; C_GREEN=$'\033[32m'; C_YELLOW=$'\033[33m' C_BLUE=$'\033[34m'; C_MAGENTA=$'\033[35m'; C_CYAN=$'\033[36m' else C_RESET=''; C_BOLD=''; C_DIM=''; C_RED=''; C_GREEN='' C_YELLOW=''; C_BLUE=''; C_MAGENTA=''; C_CYAN='' fi # v0.8.5: err()/warn()/log() strip embedded CRs from the assembled message # before printing. Defense-in-depth for the "ErrorPI" class of bug — a CR that # survives into a user-facing diagnostic carriage-returns the cursor and the # terminal overprints the line (e.g. "API error" → "ErrorPI"). These run before # lib/cygwin-safe.sh is sourced, so we strip inline via parameter expansion # rather than calling strip_cr (not yet defined here). The primary fixes are at # each message's construction site; this is the last line of defence. log() { local _m="$*"; printf '%s[%s]%s %s\n' "$C_DIM" "$(date +%H:%M:%S)" "$C_RESET" "${_m//$'\r'/}" >&2; } err() { local _m="$*"; printf '%serror:%s %s\n' "$C_RED" "$C_RESET" "${_m//$'\r'/}" >&2; } warn() { local _m="$*"; printf '%swarn:%s %s\n' "$C_YELLOW" "$C_RESET" "${_m//$'\r'/}" >&2; } larry_say() { printf '%s%slarry>%s %s\n' "$C_MAGENTA" "$C_BOLD" "$C_RESET" "$*"; } # >>> fetch-safe inline (keep in sync with lib/fetch-safe.sh) >>> # self_update() (below) runs BEFORE lib/cygwin-safe.sh + lib/fetch-safe.sh are # sourced (the source point is ~line 850, after the lib dir resolves). So the # auto-updater carries a byte-identical inline copy of the fetch validators. # Root cause + design: see lib/fetch-safe.sh's header and # Deliverables/2026-05-27-cloverleaf-larry-stuck-update-and-tab-bug.md. The # trap: Gitea answers an unauthenticated raw read with HTTP 200 + the HTML # Sign-In page; `curl -fsSL` calls that success and the updater parses HTML as # VERSION/MANIFEST/larry.sh content (silent abort, or overwrites real files # with HTML). We detect + fail loud, never overwriting a real file. # Optional LARRY_GITEA_TOKEN / GITEA_TOKEN env var enables authenticated fetch. _fs_curl_auth_args() { local _tok="${LARRY_GITEA_TOKEN:-${GITEA_TOKEN:-}}" _tok="${_tok//$'\r'/}" if [ -n "$_tok" ]; then printf '%s\n' '-H' printf '%s\n' "Authorization: token $_tok" fi } _fs_html_trap_error() { printf 'error: %s returned an HTML sign-in page, not file content. The Gitea repo is private or the instance requires sign-in. Either (a) make the repo public + set REQUIRE_SIGNIN_VIEW=false, or (b) set LARRY_GITEA_TOKEN= for authenticated fetch.\n' \ "$1" >&2 } _fs_snippet() { local f="$1" fb="$2" s s="$(head -c 60 "$f" 2>/dev/null | tr -d '\r\n' )" [ -z "$s" ] && s="$fb" printf '"%s..."' "$s" } # fetch_validate URL DEST KIND [MAX_TIME] — see lib/fetch-safe.sh for the full # contract. KIND in {version,manifest,script,sh,text}. Writes DEST only on # success; returns non-zero + leaves DEST untouched on any failure. fetch_validate() { local url="$1" dest="$2" kind="${3:-text}" mt="${4:-15}" local tmp hdr code ctype first line1 tmp="$(mktemp 2>/dev/null || echo "${dest}.fs.$$")" hdr="$(mktemp 2>/dev/null || echo "${dest}.fsh.$$")" local _args=( -sSL --max-time "$mt" -o "$tmp" -D "$hdr" -w '%{http_code}' ) local _auth_line while IFS= read -r _auth_line; do [ -n "$_auth_line" ] && _args+=( "$_auth_line" ) done < <(_fs_curl_auth_args) code="$(curl "${_args[@]}" "$url" 2>/dev/null)" local rc=$? code="${code//$'\r'/}" if [ "$rc" -ne 0 ] || [ ! -s "$tmp" ]; then rm -f "$tmp" "$hdr" printf 'error: %s — fetch failed (curl rc=%s). Origin unreachable or timed out.\n' "$url" "$rc" >&2 return 1 fi ctype="$(grep -i '^content-type:' "$hdr" 2>/dev/null | tail -1 | tr -d '\r' | tr 'A-Z' 'a-z')" first="$(head -c 4096 "$tmp" 2>/dev/null | tr -d '\r')" if printf '%s' "$first" | grep -qi 'sign in'; then rm -f "$tmp" "$hdr"; _fs_html_trap_error "$url"; return 1 fi case "$ctype" in *text/html*) rm -f "$tmp" "$hdr"; _fs_html_trap_error "$url"; return 1 ;; esac rm -f "$hdr" line1="$(head -1 "$tmp" 2>/dev/null | tr -d '\r')" case "$kind" in version) local ver; ver="$(printf '%s' "$first" | tr -d '[:space:]')" if ! printf '%s' "$ver" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+'; then rm -f "$tmp" printf 'error: %s — expected a semver VERSION (e.g. 0.8.4), got %s.\n' "$url" "$(_fs_snippet "$tmp" "$first")" >&2 return 1 fi ;; manifest) if printf '%s' "$first" | grep -q '<'; then rm -f "$tmp"; printf 'error: %s — MANIFEST contains HTML markup ("<").\n' "$url" >&2; return 1 fi # v0.8.11: accept BOTH the new "pathsha256" form AND the legacy # paths-only form. A plausible line starts with a path token, optionally # followed by whitespace + a 64-hex hash (the hash group is optional, so # legacy paths-only manifests still validate). The blanket grep -q '<' # HTML-trap guard above is unchanged and remains the real defense. if ! grep -Eq '^[A-Za-z0-9_][A-Za-z0-9_./-]*([[:space:]]+[0-9a-fA-F]{64})?[[:space:]]*$' "$tmp"; then rm -f "$tmp"; printf 'error: %s — MANIFEST has no plausible path line.\n' "$url" >&2; return 1 fi ;; script) if [ "$line1" != '#!/usr/bin/env bash' ]; then rm -f "$tmp" printf 'error: %s — larry.sh must start with `#!/usr/bin/env bash`, got %s.\n' "$url" "$(_fs_snippet "$tmp" "$first")" >&2 return 1 fi ;; sh|text|*) : ;; esac mkdir -p "$(dirname "$dest")" 2>/dev/null || true mv "$tmp" "$dest" || { rm -f "$tmp"; printf 'error: cannot write %s\n' "$dest" >&2; return 1; } return 0 } # <<< fetch-safe inline <<< # ───────────────────────────────────────────────────────────────────────────── # `larry tools` — manual-tools mode (v0.8.14) # # A discoverable, low-friction entry point for the lib/ toolkit so a human can # run any Cloverleaf/HL7 tool BY HAND — no REPL, no API, no LLM. This is the # operator's lifeline on a locked-down box where the Anthropic API is blocked. # # larry tools list — every manual tool + a one-line description # larry tools [args] — run a tool (no args → its --help/usage) # larry tools help — this help # # It runs HERE, before bootstrap / self-update / jq-check / any network call, # so it works on a fresh install with the API unreachable. Only requirement: # the lib/ directory next to larry.sh (or in $LARRY_HOME/lib). # ───────────────────────────────────────────────────────────────────────────── # Resolve the lib/ dir using ONLY what's defined this early (no $LARRY_LIB_DIR # yet — that's set ~1000 lines down, after self-update). Mirrors the later # _resolve_lib_dir but standalone-safe. _tools_resolve_lib_dir() { local self_dir; self_dir=$(cd "$(dirname "$0")" 2>/dev/null && pwd) local candidate for candidate in "$self_dir/lib" "$LARRY_HOME/lib"; do [ -d "$candidate" ] && [ -f "$candidate/nc-parse.sh" ] && { printf '%s' "$candidate"; return 0; } done return 1 } # Registry of the human-runnable manual tools, grouped. Each entry is # "file.sh|one-line description". Internal plumbing (oauth.sh, phi-*.sh, # fetch-safe.sh, cygwin-safe.sh, hl7-schema.sh, headers-sync.sh, journal.sh, # lessons.sh, ssh-helper.sh, each.sh) is intentionally NOT listed — those are # REPL/agent support, not operator-facing manual tools. Descriptions are the # SSOT for `tools list`; keep them in sync when a tool's purpose changes. _tools_registry() { cat <<'REG' #NetConfig (read) nc-parse.sh|Parse a NetConfig: list/inspect protocols & processes, fields, routes, xlate refs, one-hop destinations/sources nc-paths.sh|Route-chain PATH tracer: enumerate full root-to-leaf chains for a thread or whole site. Intra-site hops follow the DATAXLATE DEST list (rendered `-->`); a DEST that names a `destination` block is the LOCAL OUTBOUND SENDER node (shown, never collapsed) that cross-site-links (rendered `==>`) to the remote { SITE }/{ THREAD } it names. Default output is the v1 chain form, one path per line: `site/thread --> site/thread ==> site/thread …` (field 1 = root node, pipe-first). Accepts a `site/thread` node OR `thread site` as input. Parses each NetConfig once into an in-memory graph. Usage: nc-paths.sh [--up|--down|--site-only] [--format v1|table|tsv|jsonl|nodes] | --all [--site NAME] nc-find.sh|Cross-site search for threads/protocols by name/host/port/xlate across every site under $HCIROOT nc-inbound.sh|List the inbound (server/listener) threads in a NetConfig nc-status.sh|Engine runtime status (sites/threads/not-up/queued/connections) — wraps the shipped tstat binaries nc-engine.sh|Engine process control (start/stop/cycle/status) — wraps the shipped hcienginerun binaries nc-xlate.sh|Visualize and explore a Cloverleaf xlate (.xlt) file — the TCL nested mapping tree nc-table.sh|Read and modify Cloverleaf lookup tables (.tbl) — every write is backed up and auditable #NetConfig (write) nc-create-thread.sh|High-level: create a new thread in a NetConfig (and optionally wire its route) nc-insert-protocol.sh|Low-level write side: insert/replace a protocol block in a NetConfig nc-make-jump.sh|Generate the 3-thread "jump" pattern for cross-environment data replay nc-tclgen.sh|Generate annotated TCL UPOC scaffolding (skeletons for common Cloverleaf proc patterns) nc-document.sh|Generate a markdown knowledge entry documenting a Cloverleaf subsystem/interface #Diff & regression nc-diff-interface.sh|Diff one Cloverleaf interface across two environments nc-smat-diff.sh|Diff smat (message-archive) content across two environments nc-regression.sh|End-to-end regression test orchestrator between two Cloverleaf environments nc-msgs.sh|Native smat query: search/inspect archived messages without hcidbdump #HL7 hl7-field.sh|Extract a field by path (PID.3, MSH.10, PV1-3-4 …) from an HL7 v2 message hl7-diff.sh|HL7-aware diff with field-level normalization between two messages len2nl.sh|Convert length-prefixed / MLLP-framed HL7 to newline-readable form hl7-sanitize.sh|Tokenize PHI fields in HL7 v2 messages ([[CATEGORY_NNNN]] tokens) hl7-desanitize.sh|Reverse hl7-sanitize: restore original values from a token map #Site iteration & format each-site.sh|Run a command once per site under $HCIROOT (exposes $HCISITE / $HCISITEDIR) csv-to-table.sh|Convert a 2-column CSV into Cloverleaf .tbl format table-to-csv.sh|Convert a Cloverleaf .tbl file to CSV REG } _tools_list() { local lib; lib=$(_tools_resolve_lib_dir || echo "") printf '%sLarry manual tools%s — run any of these by hand (no API/LLM needed):\n\n' "$C_BOLD" "$C_RESET" printf ' %slarry tools [args]%s run a tool (no args prints its --help)\n' "$C_CYAN" "$C_RESET" printf ' %slarry tools --help%s usage, flags, expected input/output + an example\n\n' "$C_CYAN" "$C_RESET" local group="" name desc line while IFS= read -r line; do case "$line" in '#'*) group="${line#\#}"; printf '%s%s%s\n' "$C_BOLD" "$group" "$C_RESET" ;; *) name="${line%%|*}"; desc="${line#*|}" # Flag a tool the registry lists but that isn't present on disk. local mark="" base="${name%.sh}" if [ -n "$lib" ] && [ ! -f "$lib/$name" ]; then mark=" ${C_YELLOW}(missing)${C_RESET}"; fi printf ' %s%-22s%s %s%s\n' "$C_GREEN" "$base" "$C_RESET" "$desc" "$mark" ;; esac done < <(_tools_registry) printf '\n' if [ -z "$lib" ]; then printf '%swarn:%s lib/ not found next to larry.sh or in %s/lib — the tools are not installed here.\n' "$C_YELLOW" "$C_RESET" "$LARRY_HOME" printf ' Run install-larry.sh, or scp the larry-anywhere/lib/ directory next to larry.sh.\n' else printf '%sTip:%s the name is the script minus ".sh" (e.g. %slarry tools nc-parse list-protocols /path/NetConfig%s).\n' "$C_DIM" "$C_RESET" "$C_CYAN" "$C_RESET" fi } # Map a user-typed tool name to a lib file. Accepts "nc-parse" or "nc-parse.sh". _tools_resolve_name() { local want="$1" [ -z "$want" ] && return 1 case "$want" in *.sh) : ;; *) want="$want.sh" ;; esac printf '%s' "$want" } larry_tools_main() { local sub="${1:-list}"; shift 2>/dev/null || true case "$sub" in ''|list|ls) _tools_list; return 0 ;; help|-h|--help) sed -n '/^# `larry tools` — manual-tools mode/,/^# ───*$/p' "$0" | sed 's/^# \{0,1\}//' printf '\n' _tools_list return 0 ;; *) local lib; lib=$(_tools_resolve_lib_dir || echo "") if [ -z "$lib" ]; then err "lib/ tools not found. Looked next to larry.sh and in $LARRY_HOME/lib." err "Run install-larry.sh, or scp the larry-anywhere/lib/ directory next to larry.sh." return 1 fi local file; file=$(_tools_resolve_name "$sub") if [ ! -f "$lib/$file" ]; then err "no such tool: $sub" err "run 'larry tools list' to see available tools." return 2 fi # No args → show the tool's own --help so a human can learn it without # reading source. Otherwise pass args straight through. if [ "$#" -eq 0 ]; then bash "$lib/$file" --help 2>&1 || bash "$lib/$file" help 2>&1 || true printf '\n%s(no args given — showed --help. Re-run with arguments to execute.)%s\n' "$C_DIM" "$C_RESET" >&2 return 0 fi exec bash "$lib/$file" "$@" ;; esac } if [ "${1:-}" = "tools" ]; then shift larry_tools_main "$@" exit $? fi # ───────────────────────────────────────────────────────────────────────────── # CLI args # ───────────────────────────────────────────────────────────────────────────── ARG_DIR="" for arg in "$@"; do case "$arg" in --version|-V) echo "larry-anywhere $LARRY_VERSION"; exit 0 ;; --help|-h) sed -n '2,40p' "$0"; exit 0 ;; --no-update) LARRY_NO_UPDATE=1 ;; -*) err "unknown flag: $arg"; exit 2 ;; *) ARG_DIR="$arg" ;; esac done # ───────────────────────────────────────────────────────────────────────────── # Dependency check # ───────────────────────────────────────────────────────────────────────────── need_cmd() { command -v "$1" >/dev/null 2>&1 || { err "missing required command: $1"; exit 1; } } need_cmd curl # jq: allow a local copy in $LARRY_HOME/bin/jq as fallback if ! command -v jq >/dev/null 2>&1; then if [ -x "$LARRY_HOME/bin/jq" ]; then PATH="$LARRY_HOME/bin:$PATH" else err "missing jq. Install via your shell's package mechanism, or place a static jq binary at $LARRY_HOME/bin/jq" err "Download: https://github.com/jqlang/jq/releases (pick the static binary for your OS)" exit 1 fi fi # jqpath PATH — translate a path for jq's argv consumption. # On MobaXterm/Cygwin/MSYS the bundled jq is a Windows-native jq.exe that # rejects Cygwin paths like /tmp/tmp.X or /home/mobaxterm/.larry/... when # they come in as argv arguments (it tries to open them as Windows paths # and fails). cygpath -w translates Cygwin → Windows; jq.exe can then open # the file. On Linux/macOS cygpath does not exist and we echo the path # unchanged. Wrap EVERY --rawfile / --slurpfile path with $(jqpath "$p"). jqpath() { if command -v cygpath >/dev/null 2>&1; then cygpath -w "$1" else printf '%s' "$1" fi } # ───────────────────────────────────────────────────────────────────────────── # Bootstrap LARRY_HOME and API key # ───────────────────────────────────────────────────────────────────────────── mkdir -p "$LARRY_HOME/agents" "$LARRY_HOME/sessions" "$LARRY_HOME/bin" 2>/dev/null || { err "cannot create $LARRY_HOME — set LARRY_HOME to a writable path and retry"; exit 1; } chmod 700 "$LARRY_HOME" 2>/dev/null || true # ───────────────────────────────────────────────────────────────────────────── # Authentication — API key is the DEFAULT / primary rail (v0.8.10). # 1. API key (`sk-ant-api03-`) — SANCTIONED programmatic billing, the default. # Stored per-client at $LARRY_HOME/.api-key (0600, CR-safe). Provisioned # via /set-api-key (or larry-auth.sh --api-key). Legacy: $LARRY_HOME/.env # with ANTHROPIC_API_KEY=... is still honored. # 2. OAuth subscription auth — OPT-IN ONLY (LARRY_AUTH_MODE=oauth). Bills a # Claude Max/Pro subscription but requires impersonating the official # Claude Code client, which Anthropic actively blocks/flags. OFF by default # to protect the user's account. There is NO silent OAuth fallback. # ───────────────────────────────────────────────────────────────────────────── # _load_api_key_into_env — populate $ANTHROPIC_API_KEY (if not already set in the # environment) from the per-client key file first, then legacy .env. CR-stripped # inline (strip_cr from cygwin-safe.sh isn't sourced this early). An env-supplied # ANTHROPIC_API_KEY always wins and is never overwritten. _load_api_key_into_env() { [ -n "${ANTHROPIC_API_KEY:-}" ] && return 0 if [ -f "$LARRY_API_KEY_FILE" ]; then local _k; _k=$(cat "$LARRY_API_KEY_FILE" 2>/dev/null) _k="${_k//$'\r'/}"; _k="${_k//$'\n'/}" # strip CR and any stray newline if [ -n "$_k" ]; then ANTHROPIC_API_KEY="$_k"; export ANTHROPIC_API_KEY; return 0; fi fi if [ -f "$LARRY_HOME/.env" ]; then # shellcheck disable=SC1091 set -a; . "$LARRY_HOME/.env"; set +a # A CR-tainted .env (CRLF) can leave a trailing \r on the key; scrub it. [ -n "${ANTHROPIC_API_KEY:-}" ] && ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY//$'\r'/}" fi return 0 } if [ "$LARRY_AUTH_MODE" = "oauth" ]; then # Explicit opt-in to the OAuth-impersonation rail. We honor it but DO load the # API key too (so /logout or a manual flip lands on a working rail) and warn # once (see _warn_oauth_optin_once, fired at first use). No spoofing happens # here — call_api's OAuth branch is the only place the OAuth token is sent. _load_api_key_into_env else # DEFAULT: API key. Resolve a key from the per-client file or legacy .env. _load_api_key_into_env if [ -n "${ANTHROPIC_API_KEY:-}" ]; then LARRY_AUTH_MODE="apikey" else LARRY_AUTH_MODE="" # no key yet → first-run prompt guides to /set-api-key fi fi # Snapshot the operator's CHOSEN primary auth mode for diagnostics/status. LARRY_PRIMARY_AUTH_MODE="$LARRY_AUTH_MODE" # _warn_oauth_optin_once — one-time account-risk warning printed the first time # the OAuth rail is actually used. OAuth-impersonation flags the user's Max # account (Anthropic's anti-impersonation enforcement); the API-key rail is the # safe default. Guarded by a flag file so it prints at most once per LARRY_HOME. _warn_oauth_optin_once() { local flag="$LARRY_HOME/.oauth-optin-warned" [ -f "$flag" ] && return 0 warn "OAuth mode is OPT-IN and risks your Claude Max account." warn " Reaching the OAuth rail requires impersonating the official Claude Code" warn " client, which Anthropic actively fingerprints and blocks (ToS change" warn " 2026-02-19; enforcement live since ~2026-01-09). Each request can flag" warn " your account. The API key (sk-ant-api03-) is the sanctioned default —" warn " run /set-api-key and unset LARRY_AUTH_MODE to use it. (This warning" warn " shows once.)" : > "$flag" 2>/dev/null || true } # _mask_api_key KEY — render a key for human display as the sk-ant-api03- # prefix + "…" + last 4 chars. NEVER prints the middle. Used by every # diagnostic so the full key is never echoed. _mask_api_key() { local k="${1:-}" k="${k//$'\r'/}" [ -z "$k" ] && { printf '(none)'; return 0; } local len=${#k} if [ "$len" -le 12 ]; then # Too short to be a real key; show only a length hint, never the bytes. printf '(set, len=%d)' "$len" return 0 fi local prefix last4 prefix=$(printf '%s' "$k" | cut -c1-13) # "sk-ant-api03-" last4=$(printf '%s' "$k" | tail -c 4) printf '%s…%s (len=%d)' "$prefix" "$last4" "$len" } # _validate_api_key KEY — one cheap test call to /v1/messages (max_tokens:1) to # confirm the key authenticates before we store it. Returns 0 on HTTP 200, 1 # otherwise. The key travels ONLY in the x-api-key header of this curl (never in # argv, never logged). Best-effort: if curl/jq are missing or the network is # down we SKIP validation (return 2) rather than block provisioning. _validate_api_key() { local key="${1:-}" command -v curl >/dev/null 2>&1 || return 2 key="${key//$'\r'/}" local body code body='{"model":"'"${LARRY_MODEL:-claude-sonnet-4-6}"'","max_tokens":1,"messages":[{"role":"user","content":"hi"}]}' # Key via --config on stdin so it never lands in curl's argv / the process table. code=$(printf 'header = "x-api-key: %s"\n' "$key" | curl -sS -o /dev/null -w '%{http_code}' --config - \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ --max-time 20 \ -d "$body" \ "${LARRY_API_URL:-https://api.anthropic.com/v1/messages}" 2>/dev/null) || return 2 [ "$code" = "200" ] && return 0 # A 200 OR a 400 "max_tokens too small"-class response both prove the key # authenticated; a 401/403 means a bad key. Treat 200 as the clean pass and # surface the code to the caller via stdout for messaging. printf '%s' "$code" return 1 } # set_api_key [--no-validate] — the canonical secure provisioning path. Prompts # for the key with read -s (silent, NEVER echoed, NEVER in argv so it can't hit # the process table or shell history), optionally validates it, then stores it # CR-stripped at $LARRY_API_KEY_FILE with mode 0600. Used by the first-run # prompt AND the /set-api-key slash command AND larry-auth.sh --api-key. set_api_key() { local do_validate=1 [ "${1:-}" = "--no-validate" ] && do_validate=0 printf '%sSet Anthropic API key (per-client)%s\n' "$C_BOLD" "$C_RESET" echo " Mint a key for THIS machine at https://console.anthropic.com (one" echo " dedicated, independently-revocable key per client). It is stored at" echo " $LARRY_API_KEY_FILE (mode 0600) and never leaves this machine." echo "" printf ' Paste key (input hidden): ' # read -s: silent, no echo. The key lands in $key only — never in argv, never # in the process table, never in shell history. local key="" read -rs key 2>/dev/null || read -r key # -s unsupported on some shells → fallback echo "" # CR-strip (MobaXterm/Cygwin paste taints with \r; a \r in the key breaks the # x-api-key header). Also drop any stray surrounding whitespace/newline. key="${key//$'\r'/}"; key="${key//$'\n'/}" key="${key#"${key%%[![:space:]]*}"}"; key="${key%"${key##*[![:space:]]}"}" if [ -z "$key" ]; then err "no key entered — nothing changed"; return 1; fi case "$key" in sk-ant-*) : ;; *) warn "that doesn't look like an Anthropic key (expected sk-ant-...). Storing anyway." ;; esac if [ "$do_validate" = "1" ]; then printf ' Validating key (one cheap test call)… ' local vrc vout vout=$(_validate_api_key "$key"); vrc=$? if [ "$vrc" = "0" ]; then printf '%svalid%s\n' "$C_GREEN" "$C_RESET" elif [ "$vrc" = "2" ]; then printf '%sskipped (curl/network unavailable)%s\n' "$C_YELLOW" "$C_RESET" else printf '%sFAILED (HTTP %s)%s\n' "$C_RED" "${vout:-?}" "$C_RESET" err "key did not authenticate — not stored. Check the key and retry, or use --no-validate to force." key="" # scrub return 1 fi fi umask 077 printf '%s\n' "$key" > "$LARRY_API_KEY_FILE" chmod 600 "$LARRY_API_KEY_FILE" 2>/dev/null || true ANTHROPIC_API_KEY="$key"; export ANTHROPIC_API_KEY LARRY_AUTH_MODE="apikey" LARRY_PRIMARY_AUTH_MODE="apikey" log "API key stored at $LARRY_API_KEY_FILE (0600) — $(_mask_api_key "$key")" key="" # scrub local before return return 0 } # clear_api_key — remove the stored per-client key and unset it from the env. clear_api_key() { if [ -f "$LARRY_API_KEY_FILE" ]; then rm -f "$LARRY_API_KEY_FILE" unset ANTHROPIC_API_KEY log "API key cleared (removed $LARRY_API_KEY_FILE)." else echo "no API key file to remove ($LARRY_API_KEY_FILE)" fi } # show_api_key_status — masked status only. NEVER prints the full key. show_api_key_status() { if [ -n "${ANTHROPIC_API_KEY:-}" ]; then printf ' API key: %s [source: %s]\n' \ "$(_mask_api_key "$ANTHROPIC_API_KEY")" \ "$([ -f "$LARRY_API_KEY_FILE" ] && echo "$LARRY_API_KEY_FILE" || echo 'env/.env')" elif [ -f "$LARRY_API_KEY_FILE" ]; then local k; k=$(cat "$LARRY_API_KEY_FILE" 2>/dev/null); k="${k//$'\r'/}"; k="${k//$'\n'/}" printf ' API key: %s [source: %s]\n' "$(_mask_api_key "$k")" "$LARRY_API_KEY_FILE" k="" else printf ' API key: (none set) — run /set-api-key\n' fi } prompt_first_run_auth() { printf '%sFirst-run authentication setup%s\n\n' "$C_BOLD" "$C_RESET" cat < "$LARRY_HOME/agents/larry.md" <<'AGENT_EOF' You are Larry, Bryan's team orchestrator at myPKA, running in portable mode on a remote shell. First sentence when asked who you are: "I'm Larry, your team orchestrator at myPKA (running portable mode)." Focus: Cloverleaf interface build and Netconfig analysis. No PHI involved. No production push. Tools available: read_file, list_dir, grep_files, glob_files, write_file (Y/N confirm), bash_exec (Y/N confirm). Style: concise, direct, cite path:line for code references. Ask one tight clarifying question only if a critical detail is missing. AGENT_EOF ;; clover.md) cat > "$LARRY_HOME/agents/clover.md" <<'AGENT_EOF' When the task is Cloverleaf-specific, channel Clover, Cloverleaf Integration Expert. Focus: UPOC TCL coding, interface specs, clean documentation. Idempotent, auditable, source-cited. Output: one-line status, artifact list, anomalies/open questions. AGENT_EOF ;; esac } fetch_agents_or_warn # ───────────────────────────────────────────────────────────────────────────── # Self-update — two-phase MANIFEST-driven sync. # # Phase A (local sync, no network if up-to-date): # If $LARRY_HOME/.last-sync-version != $LARRY_VERSION, the running larry.sh # is newer than the on-disk lib/agents/etc. files. Fetch MANIFEST from # $LARRY_BASE_URL and refresh every file listed. Stamp .last-sync-version. # # Phase B (remote version check): # Fetch $LARRY_BASE_URL/VERSION. If remote > local, pull new larry.sh, # replace self, relaunch with LARRY_JUST_UPDATED=1 so phase B is skipped # on the relaunch (avoids infinite loop). Phase A on the relaunch then # pulls every other file matching the new version. # # Skip all of it via --no-update or LARRY_NO_UPDATE=1. # # v0.7.4: single-source auto-update. Every network step (manifest fetch in # phase A, VERSION + larry.sh fetch in phase B) hits $LARRY_BASE_URL only. # If it's unreachable (curl exit non-zero, HTTP error, timeout, DNS failure) # the client logs a clear warn and proceeds with the locally cached files — # no silent failover to a now-private GitHub mirror. # ───────────────────────────────────────────────────────────────────────────── # _origin_label URL — short label ("gitea" or the URL itself) used in # human-facing log lines. Pure string match — no network. _origin_label() { case "$1" in "$LARRY_ORIGIN_DEFAULT_GITEA") printf 'gitea' ;; *) printf '%s' "$1" ;; esac } # _record_origin SLOT URL — remember the origin (and its URL) that just # served bytes. SLOT is always "primary" in v0.7.4 (single-source); the # slot arg is retained for status-line compatibility. _record_origin() { _LARRY_LAST_ORIGIN="$1" _LARRY_LAST_ORIGIN_URL="$2" } # ───────────────────────────────────────────────────────────────────────────── # v0.8.11: local sha256 for manifest-hash skip-unchanged. # # WHY: sync_from_manifest used to re-download EVERY manifest entry over an # authenticated HTTPS round-trip (Gitea via proxy + Cloudflare) and `cmp` # locally to find the few that changed — ~3 min on Bryan's work-box for a # 3-file update. The MANIFEST now ships each file's expected sha256 (generated # by scripts/make-manifest.sh at release). The client fetches MANIFEST once, # hashes its LOCAL copy of each path, and downloads ONLY entries whose hash # differs or are missing. # # These run BEFORE lib/cygwin-safe.sh is sourced (self_update is early), so this # block is self-contained — no strip_cr / coerce_int dependency. # # sha256 TOOL FALLBACK CHAIN (priority): sha256sum, shasum -a 256, # openssl dgst -sha256. Detected ONCE and cached in _LARRY_SHA_TOOL. If NONE is # available, _LARRY_SHA_TOOL stays "none" and sync_from_manifest falls back to # the old full-download behaviour entirely (never breaks the updater). _LARRY_SHA_TOOL="" # "", then one of: sha256sum|shasum|openssl|none _detect_sha_tool() { [ -n "$_LARRY_SHA_TOOL" ] && return 0 if command -v sha256sum >/dev/null 2>&1; then _LARRY_SHA_TOOL="sha256sum" elif command -v shasum >/dev/null 2>&1; then _LARRY_SHA_TOOL="shasum" elif command -v openssl >/dev/null 2>&1; then _LARRY_SHA_TOOL="openssl" else _LARRY_SHA_TOOL="none" fi return 0 } # _local_sha256 FILE — print FILE's bare lowercase 64-hex sha256, or empty on # any failure (missing file, tool error, unparseable output). Empty result is # the caller's signal to DOWNLOAD (fail toward correctness — never skip). # Normalizes each tool's differing output shape to a bare 64-hex string: # sha256sum / shasum -> " " # openssl -> "SHA2-256()= " _local_sha256() { local f="$1" out="" [ -f "$f" ] || return 1 case "$_LARRY_SHA_TOOL" in sha256sum) out="$(sha256sum "$f" 2>/dev/null)" ;; shasum) out="$(shasum -a 256 "$f" 2>/dev/null)" ;; openssl) out="$(openssl dgst -sha256 "$f" 2>/dev/null)" ;; *) return 1 ;; esac # tr -d '\r' first: a Cygwin-built sha tool can emit a CR on stdout; the hex # grep would still match but belt-and-suspenders. Lowercase A-F for compare. out="$(printf '%s' "$out" | tr -d '\r' | grep -oE '[0-9a-fA-F]{64}' | head -1 | tr 'A-F' 'a-f')" [ -n "$out" ] || return 1 printf '%s' "$out" } # ───────────────────────────────────────────────────────────────────────────── # v0.8.9: manifest-sync progress indicator. # # WHY: phase-A sync (sync_from_manifest) fetches EVERY manifest entry over an # authenticated HTTPS round-trip (Gitea via corporate proxy + Cloudflare), then # uses `cmp` to decide if the byte stream actually changed. With 48 entries that # is 48 sequential round-trips — ~3 min on Bryan's work-box — and it was # COMPLETELY SILENT between "update found … relaunching" and "manifest sync: …". # Looked frozen. This indicator shows live per-file forward progress so a real # hang is visible (you see WHICH file it stalls on) and a slow-but-working sync # is obviously alive. # # MobaXterm-safety: uses ONLY carriage-return + clear-line (\r\033[K), the same # primitive already used at the readline prompt (see read_user_input) and # audited MobaXterm-safe in the v0.8.7 escape inventory. Deliberately NO # DECSTBM scroll-region, cursor-save/restore, or absolute-row addressing — those # are exactly the escapes MobaXterm mis-honors (see render_status_line note). # # These run BEFORE lib/cygwin-safe.sh is sourced (self_update is early), so we # strip CRs inline via parameter expansion rather than calling strip_cr. # # Output goes to stderr (fd 2) to sit alongside log()/warn()/err(), and the TTY # gate is `[ -t 2 ]`. Non-TTY (pipe/log redirect): no \r — we emit a plain # newline-terminated heartbeat line every $_SYNC_PROGRESS_PLAIN_EVERY files so a # captured log shows progress without \r garbage. # ───────────────────────────────────────────────────────────────────────────── _SYNC_PROGRESS_PLAIN_EVERY=10 # non-TTY: emit a plain heartbeat every N files # _sync_progress PHASE CUR TOTAL LABEL — render one in-place progress frame. # PHASE : short verb, e.g. "syncing" / "checking" / "downloading" # CUR : current 1-based index # TOTAL : denominator # LABEL : current filename (or any short context string) # TTY: rewrites the current line via \r\033[K (no scroll, no newline). # non-TTY: prints a plain line only on the first, every-Nth, and (caller uses # _sync_progress_done for) final frame, so logs stay readable. _sync_progress() { local phase="$1" cur="$2" total="$3" label="${4:-}" # Strip any CR that rode in on the label (defensive; pre-cygwin-safe). label="${label//$'\r'/}" if [ -t 2 ]; then printf '\r\033[K%s[%s]%s %s %s/%s %s' \ "$C_DIM" "$(date +%H:%M:%S)" "$C_RESET" "$phase" "$cur" "$total" "$label" >&2 else # Plain mode: heartbeat on first frame and every Nth frame. No \r. if [ "$cur" -eq 1 ] || [ $(( cur % _SYNC_PROGRESS_PLAIN_EVERY )) -eq 0 ]; then printf '%s[%s]%s %s %s/%s\n' \ "$C_DIM" "$(date +%H:%M:%S)" "$C_RESET" "$phase" "$cur" "$total" >&2 fi fi } # _sync_progress_done — clear the in-place progress line on a TTY so the # following summary log line lands clean. No-op on a non-TTY (nothing to clear). _sync_progress_done() { [ -t 2 ] && printf '\r\033[K' >&2 return 0 } sync_from_manifest() { local base="$1" local manifest="$LARRY_HOME/.manifest.new" # v0.8.4: validate the MANIFEST fetch. If Gitea is private/sign-in-gated it # answers with the HTML login page at HTTP 200; the old `curl -fsSL` treated # that as success and the loop below then iterated HTML lines as file paths # and overwrote real on-disk files with HTML. fetch_validate fails loud and # leaves $manifest absent, so we abort cleanly without corrupting anything. fetch_validate "$base/MANIFEST" "$manifest" manifest 10 || { rm -f "$manifest" return 1 } [ -s "$manifest" ] || { rm -f "$manifest"; return 1; } local self="$0" case "$self" in /*) ;; *) self="$PWD/$self" ;; esac # v0.8.11: detect the sha256 tool ONCE. If none is available we set # _have_sha=0 and the per-file loop falls back to the OLD full-download # behaviour for every entry (download + cmp) — never skips on a missing tool. _detect_sha_tool local _have_sha=1 [ "$_LARRY_SHA_TOOL" = "none" ] && _have_sha=0 [ "$_have_sha" -eq 0 ] && warn "no sha256 tool (sha256sum/shasum/openssl) — manifest-hash skip disabled, full sync this launch" # v0.8.9: pre-count manifest entries so the progress indicator has a # denominator. Cheap local pass (no network) over the just-fetched manifest. local total=0 _l while IFS= read -r _l; do _l="${_l//$'\r'/}" case "$_l" in ''|'#'*) continue ;; esac _l="${_l%%[[:space:]]*}" [ -z "$_l" ] && continue total=$((total + 1)) done < "$manifest" # v0.8.11: skipped = files whose LOCAL sha256 matched the manifest hash # (verified locally, no download). The new "verifying N/total (local)" phase # covers this fast local pass; the v0.8.9 "downloading" phase still covers the # few real fetches. local count=0 updated=0 failed=0 skipped=0 path mhash lhash tmp dest _rest while IFS= read -r _l; do # CR-safety: strip CR from the whole line FIRST (the MANIFEST is fetched # from Gitea; a CRLF tail would taint the hash field — a CR-tainted hash # would never match a clean local hash and force needless re-downloads, or # mismatch). This is the v0.7.5/v0.8.5 CR-taint class. We are pre-source so # strip via parameter expansion (strip_cr not yet defined here). _l="${_l//$'\r'/}" case "$_l" in ''|'#'*) continue ;; esac # Split "pathhash". path = first token; mhash = next token (if # any). A line with no second token (old paths-only format) -> mhash empty # -> treated as "can't verify -> download" below (fail-safe). path="${_l%%[[:space:]]*}" _rest="${_l#"$path"}" _rest="${_rest#"${_rest%%[![:space:]]*}"}" # ltrim the separator whitespace mhash="${_rest%%[[:space:]]*}" # first token of remainder [ -z "$path" ] && continue count=$((count + 1)) # larry.sh is updated by phase B, not here — skip to avoid clobbering # the running script mid-execution. [ "$path" = "larry.sh" ] && continue dest="$LARRY_HOME/$path" tmp="$dest.new" mkdir -p "$(dirname "$dest")" 2>/dev/null # ── v0.8.11 LOCAL-SKIP DECISION (fail toward correctness) ──────────────── # Skip the download ONLY when ALL of these hold: # (a) we have a working sha256 tool, # (b) the manifest line carried a syntactically valid 64-hex hash, # (c) the local file exists and hashes to exactly that value. # ANY doubt — no tool, no/short/non-hex manifest hash, missing local file, # local-hash failure, or hash MISMATCH — falls through to download. A stale # or wrong hash can therefore never SKIP a real update; worst case is a # needless re-download. local _can_skip=0 if [ "$_have_sha" -eq 1 ] && printf '%s' "$mhash" | grep -qiE '^[0-9a-f]{64}$'; then if [ -f "$dest" ]; then lhash="$(_local_sha256 "$dest")" if [ -n "$lhash" ] && [ "$lhash" = "$(printf '%s' "$mhash" | tr 'A-F' 'a-f')" ]; then _can_skip=1 fi fi fi if [ "$_can_skip" -eq 1 ]; then # Verified unchanged locally — no network round-trip. Show a fast local # progress frame so the operator sees forward motion through the verify. _sync_progress "verifying (local)" "$count" "$total" "$path" skipped=$((skipped + 1)) continue fi # ── DOWNLOAD PATH (changed, missing, unverifiable, or no-tool fallback) ── # v0.8.9: live progress BEFORE the network round-trip, so a fetch that # hangs (slow file / proxy stall) shows exactly which file it is stuck on # rather than freezing silently. fetch_validate's --max-time bounds each # hang to the per-kind fetch timeout (15s); on timeout it fails loud, counts # as a fail, and the loop advances — never an infinite stall. _sync_progress downloading "$count" "$total" "$path" # v0.8.4: per-file content validation. Infer the shape contract from the # path so a sign-in-page (or any HTML) response can never be written over a # real lib/agent/metadata file. fetch_validate (HTML-sign-in-trap detection, # v0.8.4) writes $tmp only on success. UNCHANGED on this path. local _kind case "$path" in VERSION) _kind=version ;; MANIFEST) _kind=manifest ;; *.sh) _kind=sh ;; *) _kind=text ;; esac if fetch_validate "$base/$path" "$tmp" "$_kind" 15 && [ -s "$tmp" ]; then # cmp guard retained: even after a download, only count + write if the # bytes actually differ (e.g. the manifest hash was stale-but-the-file-is- # actually-current, or we fell back here without a tool). Idempotent. if [ ! -f "$dest" ] || ! cmp -s "$dest" "$tmp"; then mv "$tmp" "$dest" case "$path" in *.sh) chmod +x "$dest" 2>/dev/null || true ;; esac updated=$((updated + 1)) else rm -f "$tmp" fi else rm -f "$tmp" failed=$((failed + 1)) fi done < "$manifest" rm -f "$manifest" # v0.8.9: clear the in-place progress line so the summary lands clean. _sync_progress_done if [ "$updated" -gt 0 ] || [ "$failed" -gt 0 ] || [ "$skipped" -gt 0 ]; then log "manifest sync: $updated updated, $skipped unchanged (local hash), $failed failed, $count total (from $base)" fi LARRY_SYNC_UPDATED_COUNT="$updated" LARRY_SYNC_FAILED_COUNT="$failed" LARRY_SYNC_SKIPPED_COUNT="$skipped" return 0 } # sync_from_manifest_with_fallback — v0.7.4 retains the historical name for # call-site compatibility but is now a single-source wrapper around # sync_from_manifest. Returns 0 on success, non-zero if $LARRY_BASE_URL is # unreachable (no silent failover — auto-update is just skipped this launch). sync_from_manifest_with_fallback() { if sync_from_manifest "$LARRY_BASE_URL"; then _record_origin primary "$LARRY_BASE_URL" return 0 fi warn "$(_origin_label "$LARRY_BASE_URL") unreachable, auto-update skipped this launch" return 1 } # _fetch_with_fallback REL_PATH DEST [MAX_TIME] [KIND] — v0.7.4 single-source # fetch (name kept for call-site compatibility). Returns 0 if the file pulled # AND passed content validation, non-zero otherwise. Records the winning # origin slot in $_LARRY_LAST_ORIGIN (always "primary" in single-source mode). # # v0.8.4: routes through fetch_validate so the Gitea HTML-sign-in-page trap # (HTTP 200 + login HTML) is caught BEFORE the bytes are trusted. KIND defaults # to a shape inferred from REL_PATH (VERSION->version, larry.sh->script, # *.sh->sh, else text). _fetch_with_fallback() { local rel="$1" dest="$2" mt="${3:-15}" kind="${4:-}" if [ -z "$kind" ]; then case "$rel" in VERSION) kind=version ;; MANIFEST) kind=manifest ;; larry.sh) kind=script ;; *.sh) kind=sh ;; *) kind=text ;; esac fi if fetch_validate "$LARRY_BASE_URL/$rel" "$dest" "$kind" "$mt" && [ -s "$dest" ]; then _record_origin primary "$LARRY_BASE_URL" return 0 fi rm -f "$dest" return 1 } self_update() { [ "$LARRY_NO_UPDATE" = "1" ] && return 0 [ -z "$LARRY_BASE_URL" ] && return 0 local self="$0" case "$self" in /*) ;; *) self="$PWD/$self" ;; esac # Phase A: local file sync. Triggered when on-disk files are out of sync # with the running larry.sh version (e.g. just after a self-replace, or # on first launch after install). local last_sync="" [ -f "$LARRY_HOME/.last-sync-version" ] \ && last_sync=$(tr -d '[:space:]' < "$LARRY_HOME/.last-sync-version" 2>/dev/null) if [ "$last_sync" != "$LARRY_VERSION" ]; then LARRY_SYNC_UPDATED_COUNT=0 LARRY_SYNC_FAILED_COUNT=0 if sync_from_manifest_with_fallback; then printf '%s\n' "$LARRY_VERSION" > "$LARRY_HOME/.last-sync-version" 2>/dev/null || true if [ "${LARRY_JUST_UPDATED:-0}" = "1" ] && [ -n "${LARRY_PREV_VERSION:-}" ]; then # We came in via a phase-B self-replace; phase A then synced the rest. LARRY_UPDATE_NOTICE="updated v${LARRY_PREV_VERSION} → v${LARRY_VERSION} (${LARRY_SYNC_UPDATED_COUNT} files synced from manifest)" elif [ "$LARRY_SYNC_UPDATED_COUNT" -gt 0 ]; then if [ -n "$last_sync" ]; then LARRY_UPDATE_NOTICE="manifest sync v${last_sync} → v${LARRY_VERSION} (${LARRY_SYNC_UPDATED_COUNT} files updated)" else LARRY_UPDATE_NOTICE="first-run sync at v${LARRY_VERSION} (${LARRY_SYNC_UPDATED_COUNT} files synced from manifest)" fi fi fi fi # Phase B: skip the network version check on the relaunch right after a # self-replace (we just pulled it; checking again is pointless and risks # loops if curl returns stale/partial content). [ "${LARRY_JUST_UPDATED:-0}" = "1" ] && return 0 [ -w "$self" ] || return 0 # VERSION fetch (single-source; v0.7.4). local ver_tmp="$LARRY_HOME/.VERSION.new" remote_ver="" if _fetch_with_fallback "VERSION" "$ver_tmp" 5; then remote_ver=$(tr -d '[:space:]' < "$ver_tmp" 2>/dev/null) fi rm -f "$ver_tmp" [ -z "$remote_ver" ] && return 0 [ "$remote_ver" = "$LARRY_VERSION" ] && return 0 local tmp="$LARRY_HOME/larry.sh.new" if ! _fetch_with_fallback "larry.sh" "$tmp" 15; then rm -f "$tmp" return 0 fi if cmp -s "$self" "$tmp"; then rm -f "$tmp" return 0 fi local new_ver new_ver=$(grep -m1 '^LARRY_VERSION=' "$tmp" | sed 's/.*"\(.*\)".*/\1/') [ -z "$new_ver" ] && { rm -f "$tmp"; return 0; } log "update found: $LARRY_VERSION -> $new_ver (via $(_origin_label "$_LARRY_LAST_ORIGIN_URL")) — relaunching" cp "$tmp" "$self" && chmod +x "$self" rm -f "$tmp" # Force phase A on the next launch by invalidating the sync stamp. rm -f "$LARRY_HOME/.last-sync-version" 2>/dev/null || true exec env LARRY_JUST_UPDATED=1 LARRY_PREV_VERSION="$LARRY_VERSION" "$self" ${ARG_DIR:+"$ARG_DIR"} } self_update # ── Deferred auth prompt ──────────────────────────────────────────────────── # Now that self_update has had a chance to refresh lib/oauth.sh, gate on # credentials. On a fresh box (no .oauth.json, no API key) this is the first # interactive prompt the user sees. if [ -z "$LARRY_AUTH_MODE" ]; then prompt_first_run_auth fi # ───────────────────────────────────────────────────────────────────────────── # Cloverleaf environment detection — BOTH deployment modes (v0.8.13) # # Larry-Anywhere runs in one of two modes, and it must DETECT which and act # proactively rather than nagging Bryan for a path: # # MODE LOCAL — larry is installed directly ON a Cloverleaf box. $HCIROOT is # set by the local login profile, or a Cloverleaf install sits # at a common path. No SSH needed; work the local tree. # MODE REMOTE — larry is on a client/local box; Cloverleaf is on a remote # server reachable via a configured SSH alias (e.g. `qa`). The # REMOTE env is discovered over SSH in a LOGIN shell (so the # remote $HCIROOT populates) — see ssh-helper.sh `discover`. # # This function surfaces the detected MODE, the local env (if any), and any # configured SSH aliases, so the model leads with what it found instead of # asking the user to spoon-feed paths. # ───────────────────────────────────────────────────────────────────────────── # _local_cloverleaf_root — echo a local Cloverleaf $HCIROOT if one is set or # auto-discoverable, else empty. Order: $HCIROOT (if it's a real dir) → common # install paths that contain a Cloverleaf marker (a site with a NetConfig, or a # server/ + bin/ pair). Cheap, read-only, depth-limited. _local_cloverleaf_root() { if [ -n "${HCIROOT:-}" ] && [ -d "$HCIROOT" ]; then printf '%s' "$HCIROOT"; return 0; fi local p for p in /quovadx/qdx*/integrator /quovadx/integrator /opt/cloverleaf/integrator \ /cloverleaf/integrator /usr/local/cloverleaf "$HOME/integrator" /qdx/integrator; do [ -d "$p" ] || continue # Marker: a server/ dir, or at least one immediate-child site with a NetConfig. if [ -d "$p/server" ] || find "$p" -mindepth 2 -maxdepth 2 -name NetConfig -type f 2>/dev/null | head -1 | grep -q .; then printf '%s' "$p"; return 0 fi done return 1 } # _ssh_aliases — echo configured SSH aliases (one per line) from the hosts TSV. # Available even though LARRY_LIB_DIR isn't resolved yet (detection runs early). _ssh_aliases() { local f="${LARRY_HOME:-$HOME/.larry}/.ssh-hosts.tsv" [ -f "$f" ] || return 0 awk -F'\t' 'NR>1 && $1!="" { print $1 }' "$f" 2>/dev/null } detect_cloverleaf_env() { CLOVERLEAF_CTX="" local lines=() # ── Mode determination ────────────────────────────────────────────────── local local_root; local_root=$(_local_cloverleaf_root || true) local aliases; aliases=$(_ssh_aliases) local alias_count=0 [ -n "$aliases" ] && alias_count=$(printf '%s\n' "$aliases" | grep -c .) # If $HCIROOT wasn't already exported but we auto-discovered a local install, # adopt it so the rest of detection + the nc_* tools resolve against it. if [ -z "${HCIROOT:-}" ] && [ -n "$local_root" ]; then HCIROOT="$local_root"; export HCIROOT fi local mode="UNKNOWN" if [ -n "$local_root" ]; then mode="LOCAL" elif [ "$alias_count" -gt 0 ]; then mode="REMOTE" fi lines+=("MODE=$mode") case "$mode" in LOCAL) lines+=("→ Cloverleaf is on THIS box. Work the local tree at \$HCIROOT directly — do NOT ask Bryan for a path.") ;; REMOTE) lines+=("→ Cloverleaf is on a REMOTE host. ${alias_count} SSH alias(es) configured: $(printf '%s' "$aliases" | tr '\n' ' ')") ;; *) lines+=("→ No local Cloverleaf install and no SSH alias configured. If Bryan names a host, /ssh-add it; otherwise ask which mode applies.") ;; esac if [ -n "$aliases" ]; then lines+=("Configured SSH aliases: $(printf '%s' "$aliases" | tr '\n' ' ')") lines+=("For a remote alias: discover its env with the list_sites tool (alias=) — it resolves the remote \$HCIROOT (login shell, or an explicit pin if set) and walks NetConfigs. NEVER ask Bryan to export \$HCIROOT for a remote host. If list_sites reports HCIROOT empty with a sudo-gated-profile NOTE, have Bryan pin it once: /ssh-set-hciroot (e.g. qa → /hci/cis2025.01/integrator). If a remote op fails with 'read from master failed: Connection reset by peer' (the host rejects SSH session multiplexing), have Bryan switch that alias to DIRECT mode: /ssh-set-direct on — then ALL remote ops use a fresh per-command sshpass connection (no ControlMaster). In DIRECT mode the recovery for a discover failure is a stale password (/ssh-pass then /ssh-setup to re-validate), NOT a closed master.") fi if [ -n "${HCIROOT:-}" ]; then lines+=("HCIROOT=$HCIROOT (exists=$([ -d "$HCIROOT" ] && echo yes || echo no))") else lines+=("HCIROOT=") fi if [ -n "${HCISITE:-}" ]; then local sitedir="${HCISITEDIR:-${HCIROOT:-}/$HCISITE}" lines+=("HCISITE=$HCISITE") lines+=("HCISITEDIR=$sitedir (exists=$([ -d "$sitedir" ] && echo yes || echo no))") if [ -d "$sitedir" ]; then [ -f "$sitedir/NetConfig" ] && lines+=("NetConfig present: $(wc -l < "$sitedir/NetConfig" | tr -d ' ') lines, $(wc -c < "$sitedir/NetConfig" | tr -d ' ') bytes") [ -d "$sitedir/Xlate" ] && lines+=("Xlate/: $(find "$sitedir/Xlate" -maxdepth 1 -type f 2>/dev/null | wc -l | tr -d ' ') files") [ -d "$sitedir/tables" ] && lines+=("tables/: $(find "$sitedir/tables" -maxdepth 1 -type f 2>/dev/null | wc -l | tr -d ' ') files") [ -d "$sitedir/tclprocs" ] && lines+=("tclprocs/: $(find "$sitedir/tclprocs" -maxdepth 1 -type f 2>/dev/null | wc -l | tr -d ' ') files") [ -d "$sitedir/formats" ] && lines+=("formats/: $(find "$sitedir/formats" -maxdepth 1 -type f 2>/dev/null | wc -l | tr -d ' ') files") fi else lines+=("HCISITE=") fi if [ -n "${HCIROOT:-}" ] && [ -d "$HCIROOT" ]; then local site_count site_count=$(find "$HCIROOT" -mindepth 1 -maxdepth 1 -type d \ ! -name 'archiving' ! -name 'master' ! -name 'lib' ! -name 'tcl' ! -name 'server' \ ! -name 'client' ! -name 'clgui' ! -name 'cchgs' ! -name 'epic*' ! -name 'beaker' \ ! -name 'Alerts' ! -name 'AppDefaults' ! -name 'Tables' ! -name 'backup*' \ 2>/dev/null | wc -l | tr -d ' ') lines+=("HCIROOT site-like subdirs: $site_count") fi # Tool layer detection local pyz_path="" if command -v cloverleaf-tools.pyz >/dev/null 2>&1; then pyz_path=$(command -v cloverleaf-tools.pyz) elif [ -x "./cloverleaf-tools.pyz" ]; then pyz_path="$PWD/cloverleaf-tools.pyz" elif [ -n "${HCIROOT:-}" ] && [ -x "$HCIROOT/cloverleaf-tools.pyz" ]; then pyz_path="$HCIROOT/cloverleaf-tools.pyz" fi if [ -n "$pyz_path" ]; then lines+=("Modern tools: cloverleaf-tools.pyz at $pyz_path") fi # Classic Eric scripts — detect a representative few local classic_found="" for c in tbn tbp tbh tbpr hlq mr mp mg hl awkcut sites each_site list_full_routes dbExtract; do command -v "$c" >/dev/null 2>&1 && classic_found+="$c " done if [ -n "$classic_found" ]; then lines+=("Classic tools on PATH: $classic_found") fi if [ -z "$pyz_path" ] && [ -z "$classic_found" ]; then lines+=("No Cloverleaf-tooling on PATH — Larry will fall back to bash one-liners only.") fi # Compose for system prompt CLOVERLEAF_CTX=$'\n\n## Detected runtime context (read-only)\n' for ln in "${lines[@]}"; do CLOVERLEAF_CTX+="- $ln"$'\n' done } detect_cloverleaf_env # ───────────────────────────────────────────────────────────────────────────── # Session state # ───────────────────────────────────────────────────────────────────────────── SESSION_ID="$(date +%Y-%m-%d-%H%M%S)-$$" MESSAGES_FILE="$LARRY_HOME/sessions/$SESSION_ID.messages.json" LOG_FILE="$LARRY_HOME/sessions/$SESSION_ID.log.md" printf '[]' > "$MESSAGES_FILE" { echo "# Larry-Anywhere session $SESSION_ID" echo "- start: $(date -Iseconds 2>/dev/null || date)" echo "- model: $LARRY_MODEL" echo "- host: $(hostname 2>/dev/null || echo unknown)" echo "- pwd: $(pwd)" echo "" } > "$LOG_FILE" log_section() { printf '\n## %s\n' "$1" >> "$LOG_FILE"; } log_append() { printf '%s\n' "$1" >> "$LOG_FILE"; } # ───────────────────────────────────────────────────────────────────────────── # Message store helpers # ───────────────────────────────────────────────────────────────────────────── # NOTE on jq file IO: pass files to jq via stdin redirection, not as argv. # On MobaXterm/Cygwin the bundled jq is a Windows-native binary that can't # resolve Cygwin paths like /home/mobaxterm/... when they come in as argv. # Stdin redirection always works because bash does the path open() itself. # Each of these passes the value through a tempfile (--rawfile / --slurpfile) # rather than argv (--arg / --argjson). Argv overflow ("Argument list too # long") on Cygwin's ~32KB total cap was the v0.6.1 bug for TOOLS_JSON; the # same pattern applies to any value that could grow with user input or # assistant output (multi-paragraph prompts, large tool results, etc.). add_user_text() { local content="$1" local cfile tmp cfile=$(mktemp); tmp=$(mktemp) printf '%s' "$content" > "$cfile" jq --rawfile c "$(jqpath "$cfile")" '. + [{"role":"user","content":[{"type":"text","text":$c}]}]' < "$MESSAGES_FILE" > "$tmp" \ && mv "$tmp" "$MESSAGES_FILE" rm -f "$cfile" } add_assistant_blocks() { local blocks="$1" local bfile tmp bfile=$(mktemp); tmp=$(mktemp) printf '%s' "$blocks" > "$bfile" jq --slurpfile b "$(jqpath "$bfile")" '. + [{"role":"assistant","content":$b[0]}]' < "$MESSAGES_FILE" > "$tmp" \ && mv "$tmp" "$MESSAGES_FILE" rm -f "$bfile" } add_user_tool_results() { local blocks="$1" local bfile tmp bfile=$(mktemp); tmp=$(mktemp) printf '%s' "$blocks" > "$bfile" jq --slurpfile b "$(jqpath "$bfile")" '. + [{"role":"user","content":$b[0]}]' < "$MESSAGES_FILE" > "$tmp" \ && mv "$tmp" "$MESSAGES_FILE" rm -f "$bfile" } # ───────────────────────────────────────────────────────────────────────────── # Tool implementations # ───────────────────────────────────────────────────────────────────────────── tool_read_file() { local path="$1" # v0.8.0-a: PHI safety path-block list. Refuse reads under any path that # contains the desanitization key, OAuth tokens, env secrets, the auto-PHI # audit log (which stores PHI values in clear), or prior-session transcripts. # Returns a structured JSON error so the model surfaces the refusal explicitly # instead of treating it like an ordinary "not found". Closes V4, V6, V11 # from Vera's audit (Deliverables/2026-05-27-cloverleaf-larry-phi-leak-audit.md). # # Block-list is computed AT CALL TIME (not script-parse time) so $LARRY_HOME # resolves against the running process's value. realpath normalization is # best-effort — symlinks resolve when realpath is available, otherwise we # fall back to literal-prefix comparison on both the user-supplied path # AND the canonicalized form. if _read_file_path_blocked "$path"; then printf '{"error":"path blocked by PHI safety policy","path":%s,"reason":"%s"}\n' \ "$(printf '%s' "$path" | jq -Rs .)" \ "this path is under \$LARRY_HOME/log, sanitize, sessions, or contains an OAuth/env secret file; access denied to prevent de-sanitization or credential leak" return fi if [ ! -e "$path" ]; then echo "ERROR: file not found: $path"; return; fi if [ ! -f "$path" ]; then echo "ERROR: not a regular file: $path"; return; fi local size; size=$(wc -c < "$path" 2>/dev/null || echo 0) if [ "$size" -gt 250000 ]; then echo "ERROR: file too large ($size bytes, limit 250KB). Use grep_files to target sections." return fi awk '{printf "%6d\t%s\n", NR, $0}' "$path" } # v0.8.0-a: True (0) if PATH is on the PHI-safety block list. # Blocks (each compared against both the literal path and its canonicalized # form, against both the literal $LARRY_HOME and its canonicalized form — # four-way comparison handles macOS /tmp → /private/tmp symlinking and # user-supplied symlinks alike): # $LARRY_HOME/log/ — auto-phi.log, oauth.log, headers.log, session logs # $LARRY_HOME/sanitize/ — lookup.tsv (the desanitization key) # $LARRY_HOME/sessions/ — prior transcript history # $LARRY_HOME/.oauth.json — OAuth subscription tokens # $LARRY_HOME/.env — env-var secrets (if present) # $LARRY_HOME/.api-key — per-client Anthropic API key (v0.8.10, 0600) # # Portability: # - GNU `realpath -m` resolves nonexistent paths; macOS `realpath` requires # the path to exist. We try `realpath -m` first, then plain `realpath`, # then a python3 fallback (os.path.realpath, which works on nonexistent # paths everywhere we ship). If all three fail, literal-prefix is the # only remaining defense — the block still works for direct attempts. _read_file_canon() { local p="$1" [ -z "$p" ] && return 1 local out if command -v realpath >/dev/null 2>&1; then out=$(realpath -m "$p" 2>/dev/null || true) if [ -n "$out" ]; then printf '%s' "$out"; return 0; fi out=$(realpath "$p" 2>/dev/null || true) if [ -n "$out" ]; then printf '%s' "$out"; return 0; fi fi if command -v python3 >/dev/null 2>&1; then out=$(python3 -c 'import os,sys; print(os.path.realpath(sys.argv[1]))' "$p" 2>/dev/null || true) if [ -n "$out" ]; then printf '%s' "$out"; return 0; fi fi return 1 } _read_file_path_blocked() { local p="$1" local home="${LARRY_HOME%/}" [ -z "$home" ] && return 1 local canon hcanon canon=$(_read_file_canon "$p" 2>/dev/null || true) hcanon=$(_read_file_canon "$home" 2>/dev/null || true) # Always block the configured per-client API-key file, even if LARRY_API_KEY_FILE # points outside $LARRY_HOME. Compare both literal and canonicalized forms so # a symlinked or relative request can't slip past. if [ -n "${LARRY_API_KEY_FILE:-}" ]; then local akf akf_canon akf="${LARRY_API_KEY_FILE}" akf_canon=$(_read_file_canon "$akf" 2>/dev/null || true) case "$p" in "$akf"|"$akf_canon") return 0 ;; esac [ -n "$canon" ] && case "$canon" in "$akf"|"$akf_canon") return 0 ;; esac fi local h hp for h in "$home" "$hcanon"; do [ -z "$h" ] && continue for hp in "$p" "$canon"; do [ -z "$hp" ] && continue case "$hp" in "$h"/log|"$h"/log/*) return 0 ;; "$h"/sanitize|"$h"/sanitize/*) return 0 ;; "$h"/sessions|"$h"/sessions/*) return 0 ;; "$h"/.oauth.json|"$h"/.oauth.json.*) return 0 ;; "$h"/.env|"$h"/.env.*) return 0 ;; "$h"/.api-key|"$h"/.api-key.*) return 0 ;; esac done done return 1 } tool_list_dir() { local path="${1:-.}" # v0.8.0-a sweep: list_dir of $LARRY_HOME/log etc. leaks filenames (e.g. # session-2026-05-27-deadbeef.log) and existence of .oauth.json. Block. if _read_file_path_blocked "$path"; then printf '{"error":"path blocked by PHI safety policy","path":%s,"reason":"%s"}\n' \ "$(printf '%s' "$path" | jq -Rs .)" \ "directory listing denied for \$LARRY_HOME/log, sanitize, sessions" return fi if [ ! -d "$path" ]; then echo "ERROR: not a directory: $path"; return; fi ls -la --color=never "$path" 2>/dev/null || ls -la "$path" } tool_grep_files() { local pattern="$1"; local path="${2:-.}" # v0.8.0-a sweep: grep_files of $LARRY_HOME/log/auto-phi.log would emit # JSONL value/token pairs in clear (same de-sanitization risk as read_file). if _read_file_path_blocked "$path"; then printf '{"error":"path blocked by PHI safety policy","path":%s,"reason":"%s"}\n' \ "$(printf '%s' "$path" | jq -Rs .)" \ "grep denied for \$LARRY_HOME/log, sanitize, sessions, OAuth, env" return fi if [ ! -e "$path" ]; then echo "ERROR: path not found: $path"; return; fi local total total=$(grep -rnI --color=never -c "$pattern" "$path" 2>/dev/null \ | awk -F: '{s+=$NF} END {print s+0}') grep -rnI --color=never "$pattern" "$path" 2>/dev/null | head -300 if [ "$total" -gt 300 ]; then echo "── shown 300 / $total total matches — narrow your pattern, or use bash_exec for counts ──" fi } tool_glob_files() { local pattern="$1"; local path="${2:-.}" # v0.8.0-a sweep: glob_files of $LARRY_HOME/sessions/ would enumerate every # past session log filename; block. if _read_file_path_blocked "$path"; then printf '{"error":"path blocked by PHI safety policy","path":%s,"reason":"%s"}\n' \ "$(printf '%s' "$path" | jq -Rs .)" \ "glob denied for \$LARRY_HOME/log, sanitize, sessions" return fi if [ ! -d "$path" ]; then echo "ERROR: not a directory: $path"; return; fi local all; all=$(find "$path" -type f -name "$pattern" 2>/dev/null) local total; total=$(printf '%s\n' "$all" | grep -c .) printf '%s\n' "$all" | head -300 if [ "$total" -gt 300 ]; then echo "── shown 300 / $total total entries — narrow your pattern ──" fi } tool_write_file() { local path="$1"; local content="$2" local exists="no"; [ -f "$path" ] && exists="yes" printf '\n%s══ write_file ══%s\n' "$C_YELLOW" "$C_RESET" >&2 printf ' path: %s\n' "$path" >&2 printf ' exists: %s\n' "$exists" >&2 printf ' bytes: %d\n' "${#content}" >&2 if [ "$exists" = "yes" ]; then local tmp; tmp=$(mktemp) printf '%s' "$content" > "$tmp" printf '%s── diff ──%s\n' "$C_DIM" "$C_RESET" >&2 diff -u "$path" "$tmp" >&2 || true rm -f "$tmp" else printf '%s── new file preview (first 40 lines) ──%s\n' "$C_DIM" "$C_RESET" >&2 printf '%s' "$content" | head -40 >&2 printf '\n' >&2 fi printf '%sApprove write? [y/N]:%s ' "$C_BOLD" "$C_RESET" >&2 read -r answer /dev/null printf '%s' "$content" > "$path" echo "OK: wrote $(printf '%s' "$content" | wc -l | tr -d ' ') lines to $path" log_section "write_file $path (approved)"; log_append '```'; log_append "$content"; log_append '```' else echo "DENIED by user. No write performed." log_section "write_file $path (DENIED)" fi } # ───────────────────────────────────────────────────────────────────────────── # v3 NetConfig tools — first-class native capabilities for Cloverleaf work. # Implemented as small bash+awk scripts in lib/ (alongside this file or in # $LARRY_HOME/lib). They invoke nothing from v1 scripts or v2 .pyz. # ───────────────────────────────────────────────────────────────────────────── _resolve_lib_dir() { local self_dir; self_dir=$(cd "$(dirname "$0")" 2>/dev/null && pwd) for candidate in "$self_dir/lib" "$LARRY_HOME/lib"; do [ -d "$candidate" ] && [ -x "$candidate/nc-parse.sh" ] && { echo "$candidate"; return 0; } done return 1 } LARRY_LIB_DIR="$(_resolve_lib_dir || echo '')" # v0.7.5: shared CR-defense primitives (coerce_int / strip_cr / read_clean) # for the entire cloverleaf-larry tool family. Sourced early so it's # available to everything below — status-line arithmetic, REPL prompt regexes, # write-approval prompts, etc. See lib/cygwin-safe.sh header for the full # CR-taint diagnosis. Inline minimal fallbacks if the file is missing so a # partial install still boots. if [ -n "$LARRY_LIB_DIR" ] && [ -r "$LARRY_LIB_DIR/cygwin-safe.sh" ]; then # shellcheck disable=SC1090,SC1091 . "$LARRY_LIB_DIR/cygwin-safe.sh" else coerce_int() { local r="${1:-}" d="${2:-0}" c; c=$(printf '%s' "$r" | tr -cd '0-9'); printf '%s' "${c:-$d}"; } strip_cr() { local v="${1:-}"; printf '%s' "${v//$'\r'/}"; } rtrim() { local v="${1:-}"; printf '%s' "${v%"${v##*[![:space:]]}"}"; } fi # v0.7.0: HL7 v2.x schema for inline tab completion + /hl7 / /hl7-fields slash # commands. Sourced (not executed) so the bash assoc arrays live in our shell. # Silently no-ops on bash <4 (assoc arrays unavailable); the REPL still works, # just without HL7 tab completion. if [ -n "$LARRY_LIB_DIR" ] && [ -r "$LARRY_LIB_DIR/hl7-schema.sh" ]; then # shellcheck disable=SC1090,SC1091 . "$LARRY_LIB_DIR/hl7-schema.sh" 2>/dev/null || true fi _lib_err_if_missing() { [ -n "$LARRY_LIB_DIR" ] && return 0 echo "ERROR: lib/ tools not found. Looked in \$(dirname \$0)/lib and \$LARRY_HOME/lib." echo " Run install-larry.sh or scp the larry-anywhere/lib/ directory next to larry.sh." return 1 } # v0.8.18: _fence_aligned_table — wrap an already-space-aligned tool table in a # ```text fence so the on-server LLM reproduces it VERBATIM in the monospace # terminal instead of re-rendering it as a (mis-aligned) markdown table. Reads # the tool's stdout/stderr on STDIN; the table tools (nc-find, nc-inbound) emit # columns padded with %-*s, so the bytes are ALREADY aligned — we only need to # fence them and tell the model not to touch them. # # Pass-through guarantee: if the captured text is empty, or looks like an error / # usage line (so there is no real table to protect), we emit it UNCHANGED — the # model must still see error text plainly. We never alter the table bytes; the # fence and the two marker lines are the only additions. _fence_aligned_table() { local body; body=$(cat) # Empty or obvious error/diagnostic → pass through untouched. if [ -z "$body" ] || printf '%s' "$body" \ | grep -qiE '^(ERROR|nc-[a-z]+:|usage:|\[)' ; then printf '%s\n' "$body" return 0 fi printf '%s\n' "TABLE (monospace, pre-aligned by the tool — reproduce VERBATIM in a code block; do NOT convert to a markdown table):" printf '%s\n' '```text' printf '%s\n' "$body" printf '%s\n' '```' } tool_nc_list_protocols() { local nc="$1" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" list-protocols "$nc" 2>&1 } tool_nc_list_processes() { local nc="$1" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" list-processes "$nc" 2>&1 } tool_nc_protocol_block() { local nc="$1" name="$2" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" protocol-block "$nc" "$name" 2>&1 } tool_nc_protocol_field() { local nc="$1" name="$2" field="$3" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" protocol-field "$nc" "$name" "$field" 2>&1 } tool_nc_protocol_nested() { local nc="$1" name="$2" path="$3" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" protocol-nested "$nc" "$name" "$path" 2>&1 } tool_nc_protocol_summary() { local nc="$1" filter="${2:-}" _lib_err_if_missing || return if [ -n "$filter" ]; then "$LARRY_LIB_DIR/nc-parse.sh" protocol-summary "$nc" --filter "$filter" 2>&1 else "$LARRY_LIB_DIR/nc-parse.sh" protocol-summary "$nc" 2>&1 fi } tool_nc_destinations() { local nc="$1" name="$2" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" destinations "$nc" "$name" 2>&1 } tool_nc_xlate_refs() { local nc="$1" name="${2:-}" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" xlate-refs "$nc" "$name" 2>&1 } tool_nc_find_inbound() { local nc="$1" mode="${2:-all}" fmt="${3:-tsv}" _lib_err_if_missing || return # v0.8.18: fence the table format so the model reproduces it verbatim in the # monospace terminal. tsv/jsonl are data formats — passed through unfenced. if [ "$fmt" = "table" ]; then "$LARRY_LIB_DIR/nc-inbound.sh" "$nc" --mode "$mode" --format "$fmt" 2>&1 | _fence_aligned_table else "$LARRY_LIB_DIR/nc-inbound.sh" "$nc" --mode "$mode" --format "$fmt" 2>&1 fi } tool_nc_make_jump() { local nc="$1" inbound="$2" new_host="$3" jump_port="$4" local inbound_host="${5:-127.0.0.1}" proc_jump="${6:-server_jump}" encoding="${7:-}" _lib_err_if_missing || return local args=(--inbound "$inbound" --new-host "$new_host" --jump-port "$jump_port" \ --inbound-host "$inbound_host" --process-jump "$proc_jump") [ -n "$encoding" ] && args+=(--encoding "$encoding") "$LARRY_LIB_DIR/nc-make-jump.sh" "$nc" "${args[@]}" 2>&1 } tool_nc_sources() { local nc="$1" name="$2" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" sources "$nc" "$name" 2>&1 } # nc_paths — deterministic route-chain path ENUMERATOR. The single walker # backend; the model calls this ONCE instead of chaining nc_destinations + # grep_files + read_file (the old ~$1 brute-force). INTRA-site, the next hop is # resolved from the DATAXLATE DEST list (never an ICLSERVERPORT walk, so it # cannot recur the old paths.tcl crash). CROSS-site (v0.8.20), threads link via # named `destination` blocks: a DEST that names a destination block resolves to # its { SITE } { THREAD } (the PORT corroborates the link; ICLSERVERPORT is read # GUARDED). Each NetConfig is parsed EXACTLY ONCE into an in-memory graph # (nc-parse.sh index) and the walk is pure in-memory lookups — no subprocess / # re-parse per hop. --site-only disables cross-site linking. Either pass an # explicit netconfig, or a (thread,site) pair, or --all for the whole-site / # cross-site entry-chain inventory. tool_nc_paths() { local netconfig="$1" thread="$2" site="$3" direction="${4:-full}" local all_mode="${5:-0}" site_only="${6:-0}" fmt="${7:-v1}" hciroot="${8:-${HCIROOT:-}}" _lib_err_if_missing || return local args=() [ -n "$netconfig" ] && args+=(--netconfig "$netconfig") [ -n "$thread" ] && args+=("$thread") [ -n "$site" ] && args+=(--site "$site") case "$direction" in up) args+=(--upstream) ;; down) args+=(--downstream) ;; full|"") : ;; *) echo "ERROR: unknown nc_paths direction: $direction (full|up|down)"; return 1 ;; esac [ "$all_mode" = "1" ] && args+=(--all) [ "$site_only" = "1" ] && args+=(--site-only) [ -n "$hciroot" ] && args+=(--hciroot "$hciroot") args+=(--format "$fmt") # v0.8.18 convention: fence aligned tables so the model reproduces them # verbatim in the monospace terminal. tsv/jsonl are data — passed unfenced. if [ "$fmt" = "table" ]; then "$LARRY_LIB_DIR/nc-paths.sh" "${args[@]}" 2>&1 | _fence_aligned_table else "$LARRY_LIB_DIR/nc-paths.sh" "${args[@]}" 2>&1 fi } tool_nc_tclproc_refs() { local nc="$1" name="${2:-}" _lib_err_if_missing || return "$LARRY_LIB_DIR/nc-parse.sh" tclproc-refs "$nc" "$name" 2>&1 } tool_hl7_field() { local message="$1" field_path="$2" _lib_err_if_missing || return local tmp; tmp=$(mktemp) printf '%s' "$message" > "$tmp" "$LARRY_LIB_DIR/hl7-field.sh" "$field_path" "$tmp" 2>&1 rm -f "$tmp" } tool_nc_msgs() { local thread="$1" after="${2:-}" before="${3:-}" mrn_field="${4:-}" mrn_value="${5:-}" local limit="${6:-10}" format="${7:-text}" sitedir="${8:-${HCISITEDIR:-}}" db_path="${9:-}" _lib_err_if_missing || return local args=("$thread" --limit "$limit" --format "$format") [ -n "$after" ] && args+=(--after "$after") [ -n "$before" ] && args+=(--before "$before") [ -n "$sitedir" ] && args+=(--sitedir "$sitedir") [ -n "$db_path" ] && args+=(--db "$db_path") if [ -n "$mrn_field" ] && [ -n "$mrn_value" ]; then args+=(--field "${mrn_field}=${mrn_value}") fi "$LARRY_LIB_DIR/nc-msgs.sh" "${args[@]}" 2>&1 } tool_nc_find() { local mode="$1" query="$2" format="${3:-table}" hciroot="${4:-${HCIROOT:-}}" _lib_err_if_missing || return local args=(--format "$format") [ -n "$hciroot" ] && args+=(--hciroot "$hciroot") case "$mode" in name|port|host|process|where|xlate|tclproc) args+=(--"$mode" "$query") ;; *) echo "ERROR: unknown nc_find mode: $mode"; return 1 ;; esac # v0.8.18: fence the table format so the model reproduces it verbatim in the # monospace terminal. tsv/jsonl are data formats — passed through unfenced. if [ "$format" = "table" ]; then "$LARRY_LIB_DIR/nc-find.sh" "${args[@]}" 2>&1 | _fence_aligned_table else "$LARRY_LIB_DIR/nc-find.sh" "${args[@]}" 2>&1 fi } tool_nc_insert_protocol() { local nc="$1" block_text="$2" mode="${3:-end}" anchor="${4:-}" _lib_err_if_missing || return local tmp; tmp=$(mktemp) printf '%s' "$block_text" > "$tmp" local args=(insert "$nc" "$tmp" --mode "$mode") [ -n "$anchor" ] && args+=(--anchor "$anchor") # Inherit LARRY_SESSION_ID from the running session so journal entries group together LARRY_SESSION_ID="${LARRY_SESSION_ID:-$SESSION_ID}" \ "$LARRY_LIB_DIR/nc-insert-protocol.sh" "${args[@]}" 2>&1 local rc=$? rm -f "$tmp" return $rc } tool_nc_add_route() { local nc="$1" protocol_name="$2" route_text="$3" _lib_err_if_missing || return local tmp; tmp=$(mktemp) printf '%s' "$route_text" > "$tmp" LARRY_SESSION_ID="${LARRY_SESSION_ID:-$SESSION_ID}" \ "$LARRY_LIB_DIR/nc-insert-protocol.sh" add-route "$nc" "$protocol_name" "$tmp" 2>&1 local rc=$? rm -f "$tmp" return $rc } tool_nc_regression() { local scope="$1" count="$2" env_a="$3" site_a="$4" env_b="$5" site_b="$6" out_dir="$7" local route_cmd="${8:-}" ignore="${9:-MSH.7}" phase="${10:-all}" dry_run="${11:-0}" local source_ssh_alias="${12:-}" target_ssh_alias="${13:-}" _lib_err_if_missing || return local args=(--scope "$scope" --count "$count" --env-a "$env_a" --env-b "$env_b" --out "$out_dir" \ --ignore "$ignore" --phase "$phase") [ -n "$site_a" ] && args+=(--site-a "$site_a") [ -n "$site_b" ] && args+=(--site-b "$site_b") [ -n "$route_cmd" ] && args+=(--route-test-cmd "$route_cmd") [ "$dry_run" = "1" ] && args+=(--dry-run) [ -n "$source_ssh_alias" ] && args+=(--source-ssh-alias "$source_ssh_alias") [ -n "$target_ssh_alias" ] && args+=(--target-ssh-alias "$target_ssh_alias") # Pass our resolved lib dir so the regression script can reach ssh-helper.sh # without re-resolving from its own $0. LARRY_LIB_DIR="$LARRY_LIB_DIR" "$LARRY_LIB_DIR/nc-regression.sh" "${args[@]}" 2>&1 } tool_hl7_diff() { local left_path="$1" right_path="$2" ignore="${3:-MSH.7}" include="${4:-}" format="${5:-text}" _lib_err_if_missing || return local args=() [ -n "$ignore" ] && args+=(--ignore "$ignore") [ -n "$include" ] && args+=(--include-fields "$include") args+=(--format "$format" "$left_path" "$right_path") "$LARRY_LIB_DIR/hl7-diff.sh" "${args[@]}" 2>&1 } # ───────────────────────────────────────────────────────────────────────────── # PHI preprocessing — replace {{phi:VALUE}} or {{phi:CATEGORY:VALUE}} in user # input with a local deterministic token BEFORE sending to the API. Tokens # come from the same lookup table hl7-sanitize.sh maintains, so they correlate # with PHI sanitized out of file/smat content. # ───────────────────────────────────────────────────────────────────────────── preprocess_phi_markers() { local input="$1" local sanitize_script="$LARRY_LIB_DIR/hl7-sanitize.sh" [ -x "$sanitize_script" ] || { printf '%s' "$input"; return; } # Three forms supported (processed in this order to avoid ambiguity): # 1. @@VALUE@@ bracketed; VALUE has no '@' and uses single-space word # separation. Use for values WITH spaces. Auto-detect category. # 2. @@VALUE unbracketed; VALUE has no whitespace or '@'. Auto-detect. # 3. {{phi:V}} / {{phi:CAT:V}} legacy, still supported. # Helper: tokenize one VALUE (optional category) and substitute MARKER → token. _phi_sub() { local marker="$1" value="$2" category="${3:-}" local args=(tokenize-value) [ -n "$category" ] && args+=(--category "$category") args+=("$value") local token; token=$("$sanitize_script" "${args[@]}" 2>/dev/null) [ -z "$token" ] && token="[[PHI_ERROR]]" input="${input//"$marker"/"$token"}" printf '%sphi>%s %s → %s\n' "$C_YELLOW" "$C_RESET" "$marker" "$token" >&2 } # Pass 1: bracketed @@VALUE@@ — value has no '@', allows internal single spaces # but no leading/trailing whitespace inside the brackets. local bracketed bracketed=$(printf '%s' "$input" | grep -oE '@@[^@[:space:]]+([ \t]+[^@[:space:]]+)*@@' 2>/dev/null | sort -u) while IFS= read -r marker; do [ -z "$marker" ] && continue local val="${marker#@@}"; val="${val%@@}" _phi_sub "$marker" "$val" done <<< "$bracketed" # Pass 2: unbracketed @@VALUE — value has no whitespace or '@'. Anything that # was inside a bracketed marker has already been replaced with a [[TOK]], so # it won't be re-matched here. local unbracketed unbracketed=$(printf '%s' "$input" | grep -oE '@@[^@[:space:]]+' 2>/dev/null | sort -u) while IFS= read -r marker; do [ -z "$marker" ] && continue local val="${marker#@@}" _phi_sub "$marker" "$val" done <<< "$unbracketed" # Pass 3: legacy {{phi:VALUE}} / {{phi:CATEGORY:VALUE}}. local legacy legacy=$(printf '%s' "$input" | grep -oE '\{\{phi:[^{}]+\}\}' 2>/dev/null | sort -u) while IFS= read -r marker; do [ -z "$marker" ] && continue local body="${marker#\{\{phi:}"; body="${body%\}\}}" local cat="" val="" if [[ "$body" == *:* ]] && [[ "${body%%:*}" =~ ^[A-Z][A-Z0-9_]+$ ]]; then cat="${body%%:*}"; val="${body#*:}" else val="$body" fi _phi_sub "$marker" "$val" "$cat" done <<< "$legacy" unset -f _phi_sub printf '%s' "$input" } # ───────────────────────────────────────────────────────────────────────────── # v0.7.3 — Automatic PHI detection (supersedes the af2ffe8 prototype). # # Background # ---------- # Bryan's directive: "Err on the side of caution and tokenize anything you # think you may need to as long as it doesn't break the tools." Priorities, # in order: # 1. DO NOT break tools (constraint) # 2. Catch all PHI (goal) # 3. Minimize false positives (nice-to-have, secondary) # # Reference implementation: commit af2ffe8 (reverted with v0.7.1) — Bryan's # own first pass. v0.7.3 supersedes it with: # * Four-tier confidence model (vs. af2ffe8's flat regex set) so we can # reason about WHY a value tokenizes and gate behavior accordingly. # * Explicit blacklist contexts (path-like, HL7 field refs like PID.18, # version strings, port keywords, error/status codes, JSON keys, fenced # code) — addresses the false-positive failure modes the spec calls out. # * Tool-result surface: HL7-shaped tool outputs (read_file of .hl7, .txt # with segments; nc_msgs output) get sanitized BEFORE entering message # history. Generic outputs (list_dir, grep_files, web search) are NOT # touched — the spec is explicit about this. # * Structured JSONL audit log at $LARRY_HOME/log/auto-phi.log. # * `/phi-auto on|off|confirm|status` slash command + LARRY_AUTO_PHI env. # * Per-turn override `!nophi `. # # Surfaces # -------- # 1. USER INPUT: invoked AFTER preprocess_phi_markers in main_loop, so any # explicit @@VALUE / {{phi:V}} markers are already tokenized. Auto-PHI # fills the gaps Bryan didn't manually mark. # 2. TOOL RESULTS: invoked from the tool dispatch path (agent_turn) when the # result text looks like HL7 (contains `\rMSH|` or starts with `MSH|`). # # Detection tiers (first match wins per token) # -------------------------------------------- # Tier 1 DEFINITE SSN, email, phone (formatted), NPI (with context). # High confidence. Always tokenize. # Tier 2 CONTEXTUAL Numeric value immediately preceded by an MRN/Patient/ # DOB/Account/Visit/Acct/Record/Birth keyword (within 5 # chars). Always tokenize. # Tier 3 HL7-CTX When the line/paragraph mentions a known-PHI HL7 field # ref (PID.3, PID.5, PID.7, PID.11, PID.13, PID.18, # NK1.*, GT1.*), be aggressive about plausibly-PHI-shaped # values in the same line. Tokenize. # Tier 4 KNOWN Value already exists in $LARRY_HOME/sanitize/lookup.tsv # (Bryan has tokenized this exact value before). Always # tokenize, reusing the existing token. # # Blacklist contexts (NEVER tokenize, even if a tier matches) # ----------------------------------------------------------- # * Path-like: starts with /, ./, ../, ~/, contains / # * HL7 field references THEMSELVES: [A-Z]{3}\.\d+ — the digit after the # dot is a field index, not PHI. (Critical: spec verification #5.) # * Version strings: vN.N.N, semver, ISO dates YYYY-MM-DD # * Port numbers: :NNNN, port NNNN, tcp:NNNN, PROTOCOL.PORT= # * Error / status codes: error NNN, code NNN, rc=N, HTTP NNN, status NNN # * JSON key position: token immediately followed by ":" with a string # value to the right (don't break tool argument JSON) # * Fenced code: anything inside ``` ... ``` is skipped wholesale # # Behavior controls # ----------------- # env LARRY_AUTO_PHI 1 (default, ON) | 0 (off) | confirm (prompt on # Tier 3-4 matches) | strict (v0.8.0, fail-closed) # /phi-auto on|off|confirm|strict|status # !nophi per-turn override (strip prefix, skip auto-PHI) # # After each pass, a dim status line summarises what was caught: # phi> auto-tokenized 3 values: MRN×1 NAME×1 DOB×1 # # Audit # ----- # Every tokenization writes a JSONL line to $LARRY_HOME/log/auto-phi.log: # { "ts": "...", "value": "", "category": "MRN", "token": "[[MRN_0042]]", # "tier": "contextual", "surface": "user_input"|"tool_result", "context": "..." } # ───────────────────────────────────────────────────────────────────────────── # Mode resolution. Env default per spec: ON unless 0 / off. # Accepted env values: "1" / "on" / "" → on ; "0" / "off" → off ; "confirm" → confirm ; # "strict" → fail-closed (v0.8.0-c). # (aggressive accepted as an alias for "on" to preserve af2ffe8 muscle memory.) _resolve_auto_phi_mode() { local v="${LARRY_AUTO_PHI:-1}" case "$v" in 0|off|OFF) printf 'off' ;; confirm|CONFIRM) printf 'confirm' ;; strict|STRICT) printf 'strict' ;; 1|on|ON|aggressive|"") printf 'on' ;; *) printf 'on' ;; esac } AUTO_PHI_MODE="$(_resolve_auto_phi_mode)" AUTO_PHI_SESSION_COUNT=0 AUTO_PHI_LOG="${LARRY_HOME}/log/auto-phi.log" # Per-session confirm cache. Keyed on canonical-normalized form so # "John Smith" / "JOHN SMITH" share a single decision. if (( BASH_VERSINFO[0] >= 4 )); then declare -A _AUTO_PHI_ACCEPTED 2>/dev/null declare -A _AUTO_PHI_DECLINED 2>/dev/null else _AUTO_PHI_ACCEPTED_LIST="" _AUTO_PHI_DECLINED_LIST="" fi _auto_phi_accept_check() { local key="$1" if (( BASH_VERSINFO[0] >= 4 )); then [ -n "${_AUTO_PHI_ACCEPTED[$key]:-}" ] else [[ "|$_AUTO_PHI_ACCEPTED_LIST|" == *"|$key|"* ]] fi } _auto_phi_decline_check() { local key="$1" if (( BASH_VERSINFO[0] >= 4 )); then [ -n "${_AUTO_PHI_DECLINED[$key]:-}" ] else [[ "|$_AUTO_PHI_DECLINED_LIST|" == *"|$key|"* ]] fi } _auto_phi_mark_accept() { local key="$1" if (( BASH_VERSINFO[0] >= 4 )); then _AUTO_PHI_ACCEPTED[$key]=1 else _AUTO_PHI_ACCEPTED_LIST="${_AUTO_PHI_ACCEPTED_LIST}|$key" fi } _auto_phi_mark_decline() { local key="$1" if (( BASH_VERSINFO[0] >= 4 )); then _AUTO_PHI_DECLINED[$key]=1 else _AUTO_PHI_DECLINED_LIST="${_AUTO_PHI_DECLINED_LIST}|$key" fi } # Emit one JSONL audit entry. All fields jq-quoted to handle PHI characters # safely. Best-effort — never fail the host call. _auto_phi_log() { local value="$1" category="$2" token="$3" tier="$4" surface="$5" context="$6" local logdir; logdir="$(dirname "$AUTO_PHI_LOG")" mkdir -p "$logdir" 2>/dev/null chmod 700 "$logdir" 2>/dev/null || true local ts; ts="$(date -Iseconds 2>/dev/null || date)" # Truncate context to ~40 chars around the hit so we don't bloat the log. local ctx_short="${context:0:80}" jq -cn \ --arg ts "$ts" --arg value "$value" --arg category "$category" \ --arg token "$token" --arg tier "$tier" --arg surface "$surface" \ --arg context "$ctx_short" \ '{ts:$ts,value:$value,category:$category,token:$token,tier:$tier,surface:$surface,context:$context}' \ >> "$AUTO_PHI_LOG" 2>/dev/null || true chmod 600 "$AUTO_PHI_LOG" 2>/dev/null || true } # ── Blacklist guards. Return 0 (true) when the token should be SKIPPED. ─── # Most guards take (token, left_context, full_line) so we can look at the # surroundings without re-tokenizing the whole input. _auto_phi_skip_path_like() { local v="$1" case "$v" in /*|./*|../*|~/*) return 0 ;; [A-Z]:\\*) return 0 ;; */*) return 0 ;; esac return 1 } # HL7 field-reference guard. The DIGIT in "PID.18" is a field index, NOT # an MRN. We need to spot this BEFORE the contextual MRN/NPI checks fire. # Pattern: if left_context ends with [A-Z]{3}\. immediately before our digit # token, skip. _auto_phi_skip_hl7_fieldref() { local v="$1" left="$2" [[ "$v" =~ ^[0-9]+$ ]] || return 1 [[ "$left" =~ [A-Z]{3}\.$ ]] && return 0 return 1 } _auto_phi_skip_version() { local v="$1" # vN.N.N, vN.N [[ "$v" =~ ^v[0-9]+(\.[0-9]+){1,3}$ ]] && return 0 # Bare semver N.N.N [[ "$v" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] && return 0 # ISO date YYYY-MM-DD [[ "$v" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]] && return 0 # Date-or-time-ish 8 pure digits that look like YYYYMMDD in the 1900-2099 # range. (Conservative — only block if it parses as a plausible date.) if [[ "$v" =~ ^(19|20)[0-9]{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])$ ]]; then return 0 fi return 1 } _auto_phi_skip_port() { local v="$1" left="$2" [[ "$v" =~ ^[0-9]+$ ]] || return 1 # :NNNN [[ "$left" =~ :$ ]] && return 0 # "port NNNN" / "tcp:NNNN" / "PROTOCOL.PORT=NNNN" / "TCP PORT" if [[ "$left" =~ ([Pp]ort|PORT|tcp|TCP|udp|UDP|listen|LISTEN)[[:space:]:=]*$ ]]; then return 0 fi return 1 } _auto_phi_skip_errcode() { local v="$1" left="$2" [[ "$v" =~ ^[0-9]+$ ]] || return 1 if [[ "$left" =~ ([Ee]rror|ERROR|[Cc]ode|CODE|HTTP|http|[Ss]tatus|STATUS|rc=|RC=)[[:space:]:=]*$ ]]; then return 0 fi return 1 } # JSON key position: token immediately followed by `":` (or `: ` with a # JSON-quoted value to the right). We test using the SURROUNDING line. _auto_phi_skip_json_key() { local v="$1" right="$2" case "$right" in \"*) return 0 ;; :*) return 0 ;; esac return 1 } # Timestamp guard — 13+ digit epoch milliseconds, or 10 digits starting with # '1' (epoch seconds in the 2001-2286 range). Carry-over from af2ffe8's # detector. _auto_phi_skip_timestamp() { local v="$1" [[ "$v" =~ ^[0-9]+$ ]] || return 1 local n="${#v}" [ "$n" -ge 13 ] && return 0 if [ "$n" -eq 10 ] && [[ "$v" == 1* ]]; then return 0; fi return 1 } # Already-tokenized form: [[CAT_NNNN]]. Skip — we don't double-tokenize. _auto_phi_skip_already_token() { [[ "$1" =~ ^\[\[[A-Z][A-Z0-9_]*_[0-9]+\]\]$ ]] } # Already inside a manual marker (@@…@@, @@…, {{phi:…}}). Caller can detect # by checking that the marker has already been substituted to a token by # preprocess_phi_markers — by the time auto_detect_phi runs, those are gone. # This guard is defensive: skip leftover marker-shaped tokens. _auto_phi_skip_marker_residue() { local v="$1" [[ "$v" == @@* ]] && return 0 [[ "$v" == *@@ ]] && return 0 [[ "$v" == \{\{phi:* ]] && return 0 return 1 } # ── Tier classifier. Returns "TIER|CATEGORY" or empty. ── # Args: value, left_context, right_context, full_line. _auto_phi_classify_tiered() { local v="$1" left="$2" right="$3" line="$4" [ -z "$v" ] && return 0 # Universal skips (apply before any tier). _auto_phi_skip_already_token "$v" && return 0 _auto_phi_skip_marker_residue "$v" && return 0 _auto_phi_skip_path_like "$v" && return 0 _auto_phi_skip_hl7_fieldref "$v" "$left" && return 0 _auto_phi_skip_json_key "$v" "$right" && return 0 # Version skip is overridden when a DOB/Birth keyword precedes — an ISO # date in DOB context IS the DOB, not a version string. Err on caution. if ! [[ "$left" =~ ([Dd][Oo][Bb]|[Bb]irth|BIRTH)[[:space:]:#=]*$ ]]; then _auto_phi_skip_version "$v" && return 0 fi # Strip one trailing sentence-grammar char for classification. local trimmed="$v" case "$trimmed" in *[.,\;:\!\?\)]) trimmed="${trimmed%?}" ;; esac # URL — leave alone. case "$trimmed" in http://*|https://*|ssh://*|ftp://*|sftp://*|file://*|ws://*|wss://*) return 0 ;; esac # ── TIER 1: DEFINITE ── # Email if [[ "$trimmed" =~ ^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$ ]]; then printf 'definite|EMAIL'; return fi # SSN with dashes (definite shape) if [[ "$trimmed" =~ ^[0-9]{3}-[0-9]{2}-[0-9]{4}$ ]]; then printf 'definite|SSN'; return fi # Phone — formatted variants ((NNN) NNN-NNNN, NNN-NNN-NNNN) if [[ "$trimmed" =~ ^\([0-9]{3}\)[[:space:]]?[0-9]{3}-[0-9]{4}$ ]] \ || [[ "$trimmed" =~ ^[0-9]{3}-[0-9]{3}-[0-9]{4}$ ]] \ || [[ "$trimmed" =~ ^[0-9]{3}\.[0-9]{3}\.[0-9]{4}$ ]]; then printf 'definite|PHONE'; return fi # NPI with explicit context (NPI: NNNNNNNNNN, or in PV1.7 field context) if [[ "$trimmed" =~ ^[0-9]{10}$ ]]; then if [[ "$left" =~ (NPI|npi)[[:space:]:=]*$ ]]; then printf 'definite|NPI'; return fi fi # Timestamp/port/errcode rejection (applies before contextual MRN check). _auto_phi_skip_timestamp "$trimmed" && return 0 _auto_phi_skip_port "$trimmed" "$left" && return 0 _auto_phi_skip_errcode "$trimmed" "$left" && return 0 # ── TIER 2: CONTEXTUAL ── # Numeric value preceded by an MRN/Patient/DOB/Account/Visit/Acct/ # Record/Birth keyword within 5 chars (i.e. immediately). if [[ "$trimmed" =~ ^[0-9]+$ ]]; then if [[ "$left" =~ ([Mm][Rr][Nn]|[Pp]atient|PATIENT|[Dd][Oo][Bb]|[Aa]ccount|ACCOUNT|[Vv]isit|VISIT|[Aa]cct|ACCT|[Rr]ecord|RECORD|[Bb]irth|BIRTH)[[:space:]:#=]*$ ]]; then local cat case "${BASH_REMATCH[1]}" in DOB|dob|Dob|Birth|birth|BIRTH) cat="DOB" ;; Account|account|ACCOUNT|Acct|acct|ACCT|Visit|visit|VISIT) cat="ACCT" ;; *) cat="MRN" ;; esac printf 'contextual|%s' "$cat"; return fi fi # Date-shaped value preceded by DOB/Birth → DOB tier 2. if [[ "$trimmed" =~ ^[0-9]{1,4}[/-][0-9]{1,2}[/-][0-9]{1,4}$ ]]; then if [[ "$left" =~ ([Dd][Oo][Bb]|[Bb]irth|BIRTH)[[:space:]:#=]*$ ]]; then printf 'contextual|DOB'; return fi # Bare M/D/Y outside of DOB context is still date-shaped; treat as Tier 3 # only if HL7 PHI fields are mentioned in the line. fi # SSN without dashes (9 raw digits) needs context too — too easy to confuse # with other 9-digit IDs. if [[ "$trimmed" =~ ^[0-9]{9}$ ]]; then if [[ "$left" =~ ([Ss][Ss][Nn]|SSN)[[:space:]:#=]*$ ]]; then printf 'contextual|SSN'; return fi fi # ── TIER 3: HL7-CONTEXT ── # Only kicks in if the surrounding line mentions a known-PHI HL7 field ref. local hl7_ctx=0 if [[ "$line" =~ (PID\.(3|5|7|11|13|18)|NK1\.[0-9]+|GT1\.[0-9]+|IN1\.(16|17|18|19|20)) ]]; then hl7_ctx=1 fi if [ "$hl7_ctx" = "1" ]; then # HL7 caret-name (FAMILY^GIVEN^MIDDLE…) if [[ "$trimmed" =~ ^[A-Za-z][A-Za-z\'-]*\^[A-Za-z][A-Za-z\'-]* ]]; then printf 'hl7|NAME'; return fi # Date-shaped → DOB if [[ "$trimmed" =~ ^[0-9]{1,4}[/-][0-9]{1,2}[/-][0-9]{1,4}$ ]]; then printf 'hl7|DOB'; return fi # 6-12 pure digits in HL7 context → MRN if [[ "$trimmed" =~ ^[0-9]{6,12}$ ]]; then printf 'hl7|MRN'; return fi fi return 0 } # Tier-4 ("already-known") check: is the value already in lookup.tsv? # Returns 0 (true) + emits "CATEGORY|TOKEN" on hit; 1 + empty on miss. # Reads the full set of categories actually present in lookup.tsv at the # time of the call, so user-added categories (EMP, INSPOL, etc. from the # default PHI rules) are all considered, not just a hardcoded shortlist. _auto_phi_check_known() { local v="$1" local sanitize_script="$LARRY_LIB_DIR/hl7-sanitize.sh" [ -x "$sanitize_script" ] || return 1 local table="${LARRY_HOME}/sanitize/lookup.tsv" [ -f "$table" ] || return 1 # Discover categories present in the table (column 2, skip header). local cats cats=$(awk -F'\t' 'NR>1 && $2 != "" { print $2 }' "$table" 2>/dev/null | sort -u) [ -z "$cats" ] && return 1 local cat token while IFS= read -r cat; do [ -z "$cat" ] && continue token=$("$sanitize_script" lookup-original "$v" "$cat" 2>/dev/null) if [ -n "$token" ]; then printf '%s|%s' "$cat" "$token" return 0 fi done <<< "$cats" return 1 } # Confirm-mode interactive Y/n. Returns 0 to proceed with tokenization, 1 to # skip. Caches decision per-session under the normalized key. _auto_phi_confirm() { local value="$1" category="$2" tier="$3" local sanitize_script="$LARRY_LIB_DIR/hl7-sanitize.sh" local mem_key mem_key=$("$sanitize_script" normalize-value "$value" "$category" 2>/dev/null) || mem_key="$value" [ -z "$mem_key" ] && mem_key="$value" if _auto_phi_decline_check "$mem_key"; then return 1; fi if _auto_phi_accept_check "$mem_key"; then return 0; fi # confirm mode only prompts on Tier 3 and Tier 4 per spec — Tier 1 & 2 # are always tokenized even in confirm mode. if [ "$tier" != "hl7" ] && [ "$tier" != "known" ]; then return 0; fi printf '%sphi auto>%s tokenize "%s" as %s? [Y/n] ' \ "$C_YELLOW" "$C_RESET" "$value" "$category" >&2 local ans=""; IFS= read -r ans /dev/null || ans="" case "$ans" in n|N|no|NO|No) _auto_phi_mark_decline "$mem_key"; return 1 ;; *) _auto_phi_mark_accept "$mem_key"; return 0 ;; esac } # Strip fenced code blocks (``` … ```) from a copy of the input before # token scanning. The caller scans the redacted version; substitutions are # applied against the ORIGINAL input via literal string replace, so any # values inside a fenced block remain untouched. _auto_phi_redact_fences() { local s="$1" # Replace anything between triple-backticks with spaces of same length. printf '%s' "$s" | awk ' BEGIN { in_fence=0 } /^[[:space:]]*```/ { in_fence = !in_fence; print ""; next } { if (in_fence) print ""; else print } ' } # Detect HL7 shape — used to gate the tool-result surface. _auto_phi_looks_like_hl7() { local s="$1" # Strong signal: contains a CR-separated MSH segment, or starts with MSH|. case "$s" in MSH\|*) return 0 ;; esac case "$s" in *$'\r'MSH\|*) return 0 ;; *$'\n'MSH\|*) return 0 ;; esac # Also accept "MSH|" near start (some tool outputs add a header line). printf '%s' "$s" | head -c 4096 | grep -qE '(^|[\r\n])(MSH|PID|EVN|PV1)\|' && return 0 return 1 } # v0.8.1-c: base64-wrapped HL7 round-trip. # Walks every base64-shaped run (>=200 chars, [A-Za-z0-9+/=], length%4==0) # in TEXT. For each: speculatively decode; if the decoded bytes look like # HL7, route through hl7-sanitize.sh and re-encode (base64 -w0 on GNU, # `base64 | tr -d \n` on BSD). Substitute the re-encoded form back into # TEXT. Echoes the rewritten text on stdout; empty stdout means "nothing # matched, leave the result alone" (callers MUST check for non-empty). # # Per Pax §V2-sub: do NOT use entropy as the trigger — HL7's repetitive # prefixes (`PID|1||...`) compress to LOW-entropy base64. Use length + # charset + modulo-4 as the candidate filter; speculative decode is the # decision point. False-positive cost is one extra base64-and-grep round # trip per matched run; cheap. _auto_phi_b64_roundtrip() { local text="$1" toolname="${2:-tool}" local sanitize_script="$LARRY_LIB_DIR/hl7-sanitize.sh" [ -x "$sanitize_script" ] || { printf ''; return 1; } # We use python3 for the walk because pure-bash regex over potentially # 400KB input with reliable run extraction is fragile; python3 is # present everywhere larry-anywhere runs (macOS, Linux, Cygwin). # The python helper: # - finds every [A-Za-z0-9+/]{200,}={0,2} run with length%4==0 # - speculatively decodes # - checks for HL7 shape in decoded bytes # - if shape matches, writes decoded to a tempfile, runs hl7-sanitize.sh, # re-encodes the sanitized output, and patches it back into the text # - on no matches OR no shape hits, prints nothing (caller leaves result alone) if ! command -v python3 >/dev/null 2>&1; then # Python3 unavailable. We do NOT attempt a half-implementation in # bash/awk — base64 round-trip with byte-faithful re-encode is fiddly # to get right and a buggy substitute could corrupt the tool result # mid-payload. Conservative: leave the result intact, log once per # session for visibility. Strict-mode escape is handled upstream; # default mode falls back to the plain HL7-shape branch which catches # cleartext MSH/PID anyway. # v0.8.5 (same-pattern sweep, PROBLEM 4 class): _auto_phi_b64_roundtrip is # called inside `$(...)` (see the tool-result path), so the old in-process # `_LARRY_B64_PY3_WARNED` flag died in the subshell and this notice nagged # on every tool-result turn lacking python3 — identical to the tier-5 bug. # Persist the flag on disk keyed to SESSION_ID so it fires once per session. local _b64_flag="$LARRY_HOME/.b64-py3-notice-shown" local _b64_prev="" [ -f "$_b64_flag" ] && _b64_prev=$(strip_cr "$(cat "$_b64_flag" 2>/dev/null)") if [ "$_b64_prev" != "$SESSION_ID" ]; then printf '%sphi>%s base64 unwrap pass skipped: python3 not on PATH (install python3 to enable v0.8.1-c)\n' \ "$C_DIM" "$C_RESET" >&2 printf '%s' "$SESSION_ID" > "$_b64_flag" 2>/dev/null || true fi printf '' return 1 fi # Write the python helper to a tempfile so we can pass text via stdin # (the `python3 - </dev/null || mktemp) cat > "$_b64py" <<'PY' import os, re, subprocess, sys, tempfile, base64 sanitize_script = sys.argv[1] text = sys.stdin.buffer.read() # Candidate b64 runs: length >= 200, only [A-Za-z0-9+/=], length % 4 == 0. pat = re.compile(rb"[A-Za-z0-9+/]{200,}={0,2}") data = text changed = False def is_hl7(b): head = b[:4096] if head.startswith(b"MSH|"): return True for sep in (b"\rMSH|", b"\nMSH|", b"\rPID|", b"\nPID|", b"\rEVN|", b"\nEVN|", b"\rPV1|", b"\nPV1|"): if sep in head: return True return False def replace(match): global changed run = match.group(0) if len(run) % 4 != 0: return run try: decoded = base64.b64decode(run, validate=True) except Exception: return run if not is_hl7(decoded): return run with tempfile.NamedTemporaryFile(delete=False) as tf: tf.write(decoded) tfname = tf.name try: proc = subprocess.run(["bash", sanitize_script, tfname], capture_output=True, timeout=30) finally: os.unlink(tfname) if proc.returncode != 0 or not proc.stdout: # Sanitize failure: keep original (fail-open). Strict-mode escape is # already handled upstream — this helper is best-effort cleanup. return run san = proc.stdout reencoded = base64.b64encode(san) changed = True return reencoded new_data = pat.sub(replace, data) if changed: sys.stdout.buffer.write(new_data) PY printf '%s' "$text" | python3 "$_b64py" "$sanitize_script" local _rc=$? rm -f "$_b64py" return $_rc } # Main detector. Args: surface ("user_input"|"tool_result"), input text. # Echoes the rewritten input. Status message goes to stderr. # # v0.8.0-c: in LARRY_AUTO_PHI=strict mode, this function may signal a # fail-closed abort by: # - returning exit code 42, AND # - leaving the explanatory message on stderr (no stdout content). # Callers MUST check the return code and abort the surrounding turn when # they observe 42. The surrounding turn does NOT proceed with the original # input on strict abort; that would defeat the whole point of fail-closed. auto_detect_phi() { local surface="$1" local input="$2" local sanitize_script="$LARRY_LIB_DIR/hl7-sanitize.sh" # v0.8.0-c: strict mode aborts when sanitizer is unavailable AND the input # is HL7-shaped (the case where leaking would be most likely). Non-HL7 inputs # in strict mode still get the best-effort pass; strict is about not # silently passing HL7 PHI through when the tokenizer is broken. if [ ! -x "$sanitize_script" ]; then if [ "$AUTO_PHI_MODE" = "strict" ] && _auto_phi_looks_like_hl7 "$input"; then printf 'error: auto-PHI sanitizer unavailable (missing or non-executable: %s); LARRY_AUTO_PHI=strict aborts turn (set LARRY_AUTO_PHI=on to fall back to best-effort)\n' \ "$sanitize_script" >&2 return 42 fi printf '%s' "$input" return 0 fi # Per-turn override (user-input surface only). if [ "$surface" = "user_input" ] && [[ "$input" == '!nophi '* ]]; then printf '%s' "${input#!nophi }" return 0 fi if [ "$AUTO_PHI_MODE" = "off" ]; then printf '%s' "$input" return 0 fi # Build a fence-redacted scan copy. Substitutions still happen on $input. local scan; scan=$(_auto_phi_redact_fences "$input") # Collect hits as TIER|CAT|VALUE rows. Use newline-separated for safety. local hits="" local line left_context right_context tok token_cat token_tier local i ch # Iterate line by line; tokenize by whitespace within each line so we can # compute left/right context. This is bash-only — no awk dependency for the # detection loop, only for fence redaction. while IFS= read -r line; do # Pass over whitespace-delimited tokens. We track an offset within the # line to compute left/right context. local offset=0 trimmed_line="$line" local -a words # shellcheck disable=SC2206 words=( $line ) local w widx=0 for w in "${words[@]}"; do [ -z "$w" ] && continue # Compute left context: ~20 chars before the word. Manual slice for # macOS bash 3.2 — `${pos: -20}` returns empty when len(pos) < 20. local pos="${line%%"$w"*}" local _plen=${#pos} local left if [ "$_plen" -le 20 ]; then left="$pos" else left="${pos:$((_plen-20))}" fi local right_pos="${line#*"$w"}" local right="${right_pos:0:20}" # Try comma-split sub-tokens too (e.g. "a@b.com,c@d.com"). local sub local IFS=',' for sub in $w; do [ -z "$sub" ] && continue unset IFS local result result=$(_auto_phi_classify_tiered "$sub" "$left" "$right" "$line") if [ -n "$result" ]; then token_tier="${result%%|*}" token_cat="${result##*|}" hits+="${token_tier}|${token_cat}|${sub}"$'\n' else # Try Tier-4 already-known. local known known=$(_auto_phi_check_known "$sub" 2>/dev/null) if [ -n "$known" ]; then token_cat="${known%%|*}" hits+="known|${token_cat}|${sub}"$'\n' fi fi IFS=',' done unset IFS widx=$((widx+1)) done done <<< "$scan" # v0.8.2: don't early-return when tiers 1-4 found nothing — tier-5 # (Presidio NER) is the WHOLE POINT of catching free-text gaps. We run # tier-5 below regardless of $hits. Per-category counters stay scoped # at function level so both tier-1-4 and tier-5 share the summary. local -A cat_count=() # Tier-1-4 substitution (skipped when no hits). if [ -n "$hits" ]; then # Dedupe hits (preserving first-seen order). local seen_hash="" local uniq_hits="" local h while IFS= read -r h; do [ -z "$h" ] && continue case $'\n'"$seen_hash"$'\n' in *$'\n'"$h"$'\n'*) continue ;; esac seen_hash+="$h"$'\n' uniq_hits+="$h"$'\n' done <<< "$hits" while IFS= read -r h; do [ -z "$h" ] && continue local tier="${h%%|*}"; local rest="${h#*|}" local cat="${rest%%|*}"; local orig="${rest#*|}" # Confirm mode gating (Tier 3-4 only). if [ "$AUTO_PHI_MODE" = "confirm" ]; then _auto_phi_confirm "$orig" "$cat" "$tier" || continue fi # Tokenize via the canonical pipeline. local token token=$("$sanitize_script" tokenize-value --category "$cat" "$orig" 2>/dev/null) # v0.8.0-c: strict mode aborts the whole turn if any single value's # tokenize-value call fails — passing the original value through would # be a silent leak, which is exactly what strict is opted-in to prevent. # Default ("on") and "confirm" still skip-and-continue (fail-open). if [ -z "$token" ]; then if [ "$AUTO_PHI_MODE" = "strict" ]; then printf 'error: auto-PHI tokenize-value returned empty for value (category=%s); LARRY_AUTO_PHI=strict aborts turn (run /phi-auto on to fall back to best-effort)\n' \ "$cat" >&2 return 42 fi continue fi # Substitute. Literal string replace catches all occurrences. input="${input//"$orig"/"$token"}" # Bookkeeping. cat_count[$cat]=$(( ${cat_count[$cat]:-0} + 1 )) AUTO_PHI_SESSION_COUNT=$(( AUTO_PHI_SESSION_COUNT + 1 )) # Audit. Context: short slice around the value from the ORIGINAL input. local ctx; ctx=$(printf '%s' "$scan" | grep -F -- "$orig" | head -1 | head -c 80) _auto_phi_log "$orig" "$cat" "$token" "$tier" "$surface" "$ctx" done <<< "$uniq_hits" fi # end: if [ -n "$hits" ] — v0.8.2 wrapper so tier-5 runs unconditionally # v0.8.2 — Tier-5: free-text NER via Presidio sidecar. # Runs AFTER tier-1/2/3/4 (so explicit-marker tokens stay stable and known # values already have their canonical tokens) but BEFORE the status summary. # Tier-5 catches what the regex+keyword tiers miss: bare patient names in # prose ("the patient John Doe..."), addresses without keyword context, # un-keyworded dates, generic phone numbers. Closes V1 from Vera's audit. # # Graceful degradation: if the sidecar isn't reachable (not installed, # not started, crashed), tier-5 silently no-ops — preserves v0.8.1 behavior. # The one exception is LARRY_AUTO_PHI=strict on HL7-shaped input — handled # at the top of this function already. if [ "$AUTO_PHI_MODE" != "off" ] \ && [ -r "$LARRY_LIB_DIR/phi-client.sh" ]; then # Source the client lazily (per-call). The functions are tiny and # sourcing each turn lets users update the client without restart. # shellcheck source=lib/phi-client.sh . "$LARRY_LIB_DIR/phi-client.sh" 2>/dev/null # v0.8.12 (slowness): phi_client_available does `curl -m1 .../health` on # EVERY turn. On MobaXterm/Cygwin the Presidio sidecar can NEVER run # (Mac/Linux-only, v0.8.2), so that's a guaranteed-to-fail curl fork + # up-to-1s connect wait added to every single turn — pure dead latency. # Cache the probe result for the session (the sidecar's up/down state does # not flip mid-session in practice) so we probe at most ONCE. Set # LARRY_PHI_REPROBE=1 to force a fresh probe each turn (Mac/Linux dev who # just started the sidecar). The cache lives in a process-global, so it does # NOT survive the auto_detect_phi $(...) subshell — we therefore back it with # a $LARRY_HOME/.phi-avail- file flag (same pattern as the v0.8.5 # phi once-notice fix), holding "1" (reachable) or "0" (not). local _phi_avail_flag="$LARRY_HOME/.phi-avail-${SESSION_ID:-nosession}" local _phi_ok="" if [ "${LARRY_PHI_REPROBE:-0}" != "1" ] && [ -f "$_phi_avail_flag" ]; then _phi_ok=$(strip_cr "$(cat "$_phi_avail_flag" 2>/dev/null)") fi if [ -z "$_phi_ok" ]; then if declare -F phi_client_available >/dev/null 2>&1 && phi_client_available; then _phi_ok=1 else _phi_ok=0 fi printf '%s' "$_phi_ok" > "$_phi_avail_flag" 2>/dev/null || true fi if [ "$_phi_ok" = "1" ]; then # Run Presidio on a copy where already-minted [[CAT_NNNN]] tokens are # masked to neutral fixed-width placeholders. This stops Presidio from # tagging text that spans an existing token (which would then corrupt # the token when we literal-replace). We map placeholder→token so the # entity offsets still align, but since we substitute by VALUE (not # offset) below, the mask just needs to remove tokens from Presidio's # view. We use a regex-neutral run of 'x' the same length per token. local _t5_scan="$input" # Replace each [[...]] token with same-length x-run so offsets are # preserved and Presidio sees no bracket structure. _t5_scan=$(printf '%s' "$_t5_scan" | sed -E 's/\[\[[A-Za-z0-9_]+\]\]/XXXXXXXXXX/g') local _t5_entities _t5_entities=$(phi_redact_entities "$_t5_scan" 2>/dev/null) || _t5_entities="" if [ -n "$_t5_entities" ]; then # Format: TYPE\tSTART\tEND\tSCORE\tVALUE per line. # Sort by descending start offset so substituting longest/latest first # doesn't shift earlier offsets (we're using literal string-replace, # but stable ordering keeps the audit log sensible). local _t5_count=0 _t5_line _t5_type _t5_value _t5_score _t5_cat _t5_token while IFS=$'\t' read -r _t5_type _t5_start _t5_end _t5_score _t5_value; do [ -z "$_t5_value" ] && continue # Drop low-confidence noise. Bryan's tier-3/4 strictness applies # equally here — confidence < 0.3 is too noisy for auto-tokenize. local _t5_int_score _t5_int_score=$(printf '%s' "$_t5_score" | awk '{print int($1*100)}') if [ "${_t5_int_score:-0}" -lt 30 ]; then continue; fi # Skip values that look like HL7 field refs or paths (shared # blacklists with the per-word classifier). if declare -F _auto_phi_skip_path_like >/dev/null 2>&1; then _auto_phi_skip_path_like "$_t5_value" && continue fi if declare -F _auto_phi_skip_version >/dev/null 2>&1; then _auto_phi_skip_version "$_t5_value" && continue fi # Skip if the value is already a token (don't double-tokenize). case "$_t5_value" in \[\[*\]\]) continue ;; *\[\[*) continue ;; # value spans/contains a token fragment *XXXXXXXXXX*) continue ;; # value spans a masked token placeholder esac # Noise guard: drop bare uppercase field-label acronyms Presidio # over-eagerly tags as ORGANIZATION ("SSN", "MRN", "DOB", "ED", # "Phone", "ADT"). These are HL7/clinical jargon, not PHI. We keep # them out of the tokenize set to avoid (a) noise and (b) the # substring-corruption class (a 3-letter value substring-matching # inside another token). A real name is mixed-case or multi-word. case "$_t5_value" in [A-Z][A-Z]|[A-Z][A-Z][A-Z]|[A-Z][A-Z][A-Z][A-Z]) continue ;; esac # Skip very short single tokens (< 3 chars) — too collision-prone # for literal-string replace. if [ "${#_t5_value}" -lt 3 ]; then continue; fi # Token-safe substitution guard: if the value occurs ONLY as a # substring of an existing [[...]] token in the current input, # skip it (replacing would corrupt the token). We check by # masking tokens and seeing if the value still appears. local _t5_masked _t5_masked=$(printf '%s' "$input" | sed -E 's/\[\[[A-Za-z0-9_]+\]\]/\x01/g') case "$_t5_masked" in *"$_t5_value"*) : ;; # appears outside any token — safe *) continue ;; # only inside tokens — skip esac # Map Presidio entity types to lookup.tsv categories. Prefix with # presidio_ so they stay distinguishable from rule-pack categories # in audit logs and the /tokens listing. _t5_cat="presidio_${_t5_type}" # Confirm mode (Tier 3/4 style) — prompt once per value. if [ "$AUTO_PHI_MODE" = "confirm" ]; then _auto_phi_confirm "$_t5_value" "$_t5_cat" "presidio" || continue fi _t5_token=$("$sanitize_script" tokenize-value --category "$_t5_cat" "$_t5_value" 2>/dev/null) if [ -z "$_t5_token" ]; then if [ "$AUTO_PHI_MODE" = "strict" ]; then printf 'error: auto-PHI tokenize-value returned empty for tier-5 value (category=%s); LARRY_AUTO_PHI=strict aborts turn\n' \ "$_t5_cat" >&2 return 42 fi continue fi # Token-protected literal substitution. Existing [[...]] tokens are # pulled out to numbered sentinels, the tier-5 value is replaced in # the remaining text, then the sentinels are restored. This is # robust against a value that happens to be a substring of an # existing token (e.g. a digit run that also appears in a token ID) # — tiers 1-4 use plain replace because their values are minted # fresh and can't collide, but tier-5 runs on already-tokenized text. local _t5_proto="$input" _t5_sentinel_map="" _t5_tok _t5_idx=0 # Extract existing tokens into sentinels of the form \x02\x02. while IFS= read -r _t5_tok; do [ -z "$_t5_tok" ] && continue local _t5_sent=$'\x02'"${_t5_idx}"$'\x02' _t5_proto="${_t5_proto//"$_t5_tok"/"$_t5_sent"}" _t5_sentinel_map+="${_t5_idx}"$'\t'"${_t5_tok}"$'\n' _t5_idx=$(( _t5_idx + 1 )) done < <(printf '%s' "$input" | grep -oE '\[\[[A-Za-z0-9_]+\]\]' | sort -u) # Replace the value in the protected (sentinel-bearing) text. _t5_proto="${_t5_proto//"$_t5_value"/"$_t5_token"}" # Restore sentinels back to their original tokens. local _t5_mline _t5_mid _t5_mtok while IFS=$'\t' read -r _t5_mid _t5_mtok; do [ -z "$_t5_mid" ] && continue local _t5_sent2=$'\x02'"${_t5_mid}"$'\x02' _t5_proto="${_t5_proto//"$_t5_sent2"/"$_t5_mtok"}" done <<< "$_t5_sentinel_map" input="$_t5_proto" cat_count[$_t5_cat]=$(( ${cat_count[$_t5_cat]:-0} + 1 )) AUTO_PHI_SESSION_COUNT=$(( AUTO_PHI_SESSION_COUNT + 1 )) _t5_count=$(( _t5_count + 1 )) _auto_phi_log "$_t5_value" "$_t5_cat" "$_t5_token" "presidio" "$surface" "score=$_t5_score" done <<< "$_t5_entities" if [ "$_t5_count" -gt 0 ]; then printf '%sphi>%s tier-5 (presidio NER) auto-tokenized %d additional value(s) [%s]\n' \ "$C_DIM" "$C_RESET" "$_t5_count" "$surface" >&2 fi fi else # Sidecar unreachable. The tier-5 "disabled" state is always recorded and # remains queryable via `/phi-auto status`; we only control the UNSOLICITED # per-session stderr notice here. # # v0.8.12 (Bryan's explicit ask — "I don't need to see it every time"): the # notice is now DEFAULT-SILENT and opt-in. Set LARRY_PHI_NOTICE=1 to # re-enable the one-time-per-session print. Default (unset/0) = no print. # # v0.8.5 (PROBLEM 4) history retained for the opt-in path: the old guard # used `export _LARRY_PHI_TIER5_WARNED=1`, but auto_detect_phi runs inside # `$(...)` command substitution (REPL: `input=$(auto_detect_phi …)`). An # export inside a subshell dies when the substitution returns, so the flag # reset EVERY turn and the notice nagged on every message. The disk flag # keyed to SESSION_ID survives the subshell boundary, keeping the opt-in # print to once per session. if [ "${LARRY_PHI_NOTICE:-0}" = "1" ] && [ -x "$LARRY_LIB_DIR/phi-sidecar.sh" ]; then local _phi_notice_flag="$LARRY_HOME/.phi-notice-shown" local _phi_notice_prev="" [ -f "$_phi_notice_flag" ] && _phi_notice_prev=$(strip_cr "$(cat "$_phi_notice_flag" 2>/dev/null)") if [ "$_phi_notice_prev" != "$SESSION_ID" ]; then printf '%sphi>%s tier-5 (presidio NER) disabled — sidecar not running (expected on MobaXterm/Windows). Mac/Linux: %s/phi-sidecar.sh ensure. /phi-auto status for state.\n' \ "$C_DIM" "$C_RESET" "$LARRY_LIB_DIR" >&2 printf '%s' "$SESSION_ID" > "$_phi_notice_flag" 2>/dev/null || true fi fi fi fi # Emit a single status summary if anything was tokenized. if [ ${#cat_count[@]} -gt 0 ]; then local summary="" total=0 k for k in "${!cat_count[@]}"; do summary+="${k}×${cat_count[$k]} " total=$(( total + cat_count[$k] )) done summary="${summary% }" printf '%sphi>%s auto-tokenized %d value(s) [%s]: %s\n' \ "$C_DIM" "$C_RESET" "$total" "$surface" "$summary" >&2 fi printf '%s' "$input" } tool_hl7_sanitize() { local input_path="$1" strict="${2:-0}" _lib_err_if_missing || return local args=() [ "$strict" = "1" ] && args+=(--strict) args+=("$input_path") "$LARRY_LIB_DIR/hl7-sanitize.sh" "${args[@]}" 2>&1 } # ───────────────────────────────────────────────────────────────────────────── # v0.6.7 — @file inline-file preprocessing # # Replaces @ tokens in user input with inlined file contents as fenced # code blocks appended after the prose. Runs BEFORE PHI tokenization so PHI # markers inside inlined files still get caught. # # Token grammar: # - @ : @ followed by non-whitespace chars; @ must be preceded by # whitespace, start-of-line, or punctuation. Skipped if # preceded by a non-whitespace word char (e.g. email). # - @{path} : bracketed form for paths with spaces. Closing } required. # # Validation: # missing → warn, leave literal # directory → warn, skip # binary → warn, skip (first 8KB scanned for null bytes) # >250KB → truncate to 250KB with footer # ───────────────────────────────────────────────────────────────────────────── preprocess_atfile_refs() { local input="$1" # Quick reject: no @ → no work. case "$input" in *@*) ;; *) printf '%s' "$input"; return ;; esac # Collect all @-refs in order; dedupe by resolved path; build fenced footer. # Two grammars: # 1. @{path with spaces} # 2. @bare-token (no whitespace, no '}') # We scan with a single awk-style loop in pure bash. local refs=() # ordered raw tokens (path strings, NOT including @) local seen=() # parallel list of resolved paths (for dedupe) local i=0 n=${#input} local prev_char=$'\n' # treat start as whitespace while [ "$i" -lt "$n" ]; do local ch="${input:i:1}" if [ "$ch" = "@" ]; then # Decide if eligible: prev_char must be whitespace, start-of-line, or punctuation case "$prev_char" in ''|[[:space:]]|'('|'['|','|';'|':'|'"'|"'"|'<'|'>'|'='|'`'|'|') # Eligible. Look at next char. local nx="${input:i+1:1}" local token="" local end=$((i + 1)) if [ "$nx" = "{" ]; then # @{...} bracketed local j=$((i + 2)) while [ "$j" -lt "$n" ] && [ "${input:j:1}" != "}" ]; do token+="${input:j:1}" j=$((j + 1)) done if [ "$j" -lt "$n" ] && [ "${input:j:1}" = "}" ]; then end=$((j + 1)) else # Unclosed brace — bail, treat @ as literal token="" fi else # @bare-token: read until whitespace or terminating punctuation local j=$((i + 1)) while [ "$j" -lt "$n" ]; do local cj="${input:j:1}" case "$cj" in [[:space:]]) break ;; # Allow most punctuation in paths, but stop at obvious terminators. ',') break ;; ';') break ;; ')') break ;; ']') break ;; '}') break ;; '"') break ;; "'") break ;; '`') break ;; esac token+="$cj" j=$((j + 1)) done # Strip a single trailing period (common when path ends a sentence). case "$token" in *.) ;; # leave foo.md alone *..) ;; esac # But if token ends with '.' and there's no extension dot earlier, strip. # Heuristic: only strip trailing '.' if followed by EOL/space and no other dot in token. if [ -n "$token" ] && [ "${token: -1}" = "." ]; then local body="${token%.}" case "$body" in *.*) ;; # has another dot → trailing . might be valid (e.g. ../foo.) — leave *) token="$body" ;; esac fi end="$j" fi if [ -n "$token" ]; then refs+=("$token") i="$end" prev_char="${input:end-1:1}" continue fi ;; esac fi prev_char="$ch" i=$((i + 1)) done if [ "${#refs[@]}" -eq 0 ]; then printf '%s' "$input" return fi # Resolve, validate, dedupe, build the footer. local footer="" local r resolved canonical for r in "${refs[@]}"; do # Resolve relative paths against current pwd. case "$r" in /*) resolved="$r" ;; *) resolved="$PWD/$r" ;; esac # Canonical-ish key for dedupe (no symlink resolution to keep it cheap). canonical="$resolved" local skip=0 dup s for s in "${seen[@]}"; do [ "$s" = "$canonical" ] && { skip=1; break; } done [ "$skip" = "1" ] && continue seen+=("$canonical") if [ ! -e "$resolved" ]; then printf '%satfile>%s @%s not found; leaving literal\n' "$C_YELLOW" "$C_RESET" "$r" >&2 continue fi if [ -d "$resolved" ]; then printf '%satfile>%s @%s is a directory; skipping\n' "$C_YELLOW" "$C_RESET" "$r" >&2 continue fi if [ ! -f "$resolved" ]; then printf '%satfile>%s @%s not a regular file; skipping\n' "$C_YELLOW" "$C_RESET" "$r" >&2 continue fi # Binary detection: scan first 8KB for null bytes. Compare byte counts # before/after `tr -d '\0'` — grep with a literal NUL doesn't work # portably (NUL terminates the pattern string in many greps). local _head_bytes _stripped_bytes _head_bytes=$(head -c 8192 "$resolved" 2>/dev/null | wc -c | tr -d ' ') _stripped_bytes=$(head -c 8192 "$resolved" 2>/dev/null | LC_ALL=C tr -d '\0' | wc -c | tr -d ' ') if [ "$_head_bytes" != "$_stripped_bytes" ]; then printf '%satfile>%s @%s appears to be binary; skipping\n' "$C_YELLOW" "$C_RESET" "$r" >&2 continue fi local size; size=$(wc -c < "$resolved" 2>/dev/null || echo 0) local content footer_note="" if [ "$size" -gt 256000 ]; then content=$(head -c 256000 "$resolved" 2>/dev/null) footer_note=$'\n[file truncated at 250 KB; total size: '"$(( size / 1024 ))"' KB]' else content=$(cat "$resolved" 2>/dev/null) fi # Language hint from extension. local ext="${r##*.}" case "$ext" in "$r"|"") ext="" ;; # no extension esac footer+=$'\n\n—————\n'"$r"$':\n```'"$ext"$'\n'"$content""$footer_note"$'\n```' printf '%satfile>%s @%s inlined (%d bytes)\n' "$C_YELLOW" "$C_RESET" "$r" "$size" >&2 done if [ -z "$footer" ]; then printf '%s' "$input" return fi printf '%s%s' "$input" "$footer" } # Session-scope flag: print the @file tip once per session. _LARRY_ATFILE_TIP_SHOWN=0 maybe_show_atfile_tip() { [ "$_LARRY_ATFILE_TIP_SHOWN" = "1" ] && return case "$1" in *@*) printf '%s(tip: @ attaches the file contents; TAB to autocomplete)%s\n' "$C_DIM" "$C_RESET" >&2 _LARRY_ATFILE_TIP_SHOWN=1 ;; esac } # ───────────────────────────────────────────────────────────────────────────── # v0.6.7 — clipboard + cost + model-name + tool-display helpers # ───────────────────────────────────────────────────────────────────────────── # Detect clipboard command. Cached after first call. _LARRY_CLIP_CMD="" _LARRY_CLIP_DETECTED=0 detect_clipboard() { [ "$_LARRY_CLIP_DETECTED" = "1" ] && { printf '%s' "$_LARRY_CLIP_CMD"; return; } _LARRY_CLIP_DETECTED=1 if command -v pbcopy >/dev/null 2>&1; then _LARRY_CLIP_CMD="pbcopy" elif [ -n "${WAYLAND_DISPLAY:-}" ] && command -v wl-copy >/dev/null 2>&1; then _LARRY_CLIP_CMD="wl-copy" elif command -v xclip >/dev/null 2>&1; then _LARRY_CLIP_CMD="xclip -selection clipboard" elif command -v xsel >/dev/null 2>&1; then _LARRY_CLIP_CMD="xsel --clipboard --input" elif [ -e /dev/clipboard ]; then _LARRY_CLIP_CMD="tee /dev/clipboard >/dev/null" elif command -v clip.exe >/dev/null 2>&1; then _LARRY_CLIP_CMD="clip.exe" fi printf '%s' "$_LARRY_CLIP_CMD" } # Anthropic pricing per million tokens (USD), as of 2026-05. # Source: https://platform.claude.com/docs/en/about-claude/pricing # Refresh periodically — these are constants Bryan can hand-edit. _price_for_model() { # Returns: "input_price output_price" per MTok case "$1" in *opus*) echo "15 75" ;; *haiku*) echo "1 5" ;; *sonnet*|*) echo "3 15" ;; esac } # Session cost tracker. Updated on each non-streaming response or message_delta. _LARRY_INPUT_TOKENS=0 _LARRY_OUTPUT_TOKENS=0 _LARRY_CACHE_READ_TOKENS=0 _LARRY_CACHE_WRITE_TOKENS=0 _LARRY_TURNS=0 # v0.8.10: one-shot guard — set to 1 once the edge-429→API-key fallback has # flipped LARRY_AUTH_MODE to apikey, so we never flip-flop within a session. _LARRY_EDGE_FALLBACK_DONE=0 # ───────────────────────────────────────────────────────────────────────────── # v0.6.9: Persistent status line — ctx + rate-limit visibility # ───────────────────────────────────────────────────────────────────────────── # Per Pax's research (Deliverables/2026-05-27-anthropic-rate-limit-headers- # research.md) the API exposes two distinct families of rate-limit headers: # # API-key mode: anthropic-ratelimit-{requests,tokens,input-tokens, # output-tokens}-{limit,remaining,reset} # Reset is an RFC 3339 datetime string. # # OAuth mode: anthropic-ratelimit-unified-{5h,7d}-{status,utilization, # reset} + -representative-claim + a top-level -reset. # Reset is a Unix epoch integer-as-string. # # Two DIFFERENT parsers needed (easy footgun called out by Pax). # # STATUS_* globals are updated by _parse_response_headers after every API # call, then read by render_status_line which (as of v0.7.1) is invoked # between turns — after the user submits input and before agent_turn runs. # Empty string = "unknown" — render as "—", never as "0%". STATUS_ctx_used_tokens="" # input + cache_creation + cache_read for LAST turn STATUS_ctx_window="" # from MODEL_CONTEXT_WINDOWS lookup STATUS_oauth_5h_utilization="" # 0.0–1.0 (decimal string) STATUS_oauth_5h_reset_epoch="" # unix seconds STATUS_oauth_7d_utilization="" STATUS_oauth_7d_reset_epoch="" STATUS_oauth_representative="" # five_hour | seven_day | seven_day_opus | seven_day_sonnet STATUS_oauth_status="" # allowed | warning | rate_limited STATUS_api_reset_epoch="" # earliest of the *-reset RFC3339 timestamps, as epoch # v0.8.5: rate-limit diagnosis state. Populated by _parse_response_headers on # EVERY response (200 or 429). _humanize_api_error reads these to render an # ACTIONABLE rate-limit message (which limit tripped + when it resets) instead # of a bare "rate_limit_error". See PROBLEM 1 in the v0.8.5 deliverable. STATUS_retry_after_secs="" # raw `retry-after` header value (seconds), if present STATUS_rl_tripped_rail="" # which bucket is at/over limit: requests|input-tokens|output-tokens|tokens|unified-5h|unified-7d STATUS_rl_reset_epoch="" # epoch when the tripped rail resets (best-effort) # v0.8.10: edge-rejection discrimination. Set to 1 by _parse_response_headers # when the response is the EDGE-429 signature: HTTP 429 + x-should-retry:true + # ZERO anthropic-ratelimit-* headers. That is Anthropic's gateway bouncing the # request BEFORE rate-limit accounting (an auth/fingerprint reject), NOT a quota # limit. A real quota 429 carries anthropic-ratelimit-* headers → this stays 0 # and the normal backoff path runs. Reset per response so it never goes stale. STATUS_rl_edge_reject=0 # session_cost is reused from _LARRY_INPUT/OUTPUT/CACHE_*_TOKENS via # _render_session_cost_dollars (no new state needed). # Session turns counter == _LARRY_TURNS (no new state needed). # Header-capture safety net: log the first 50 OAuth response header blocks # to $LARRY_HOME/log/headers.log so we can verify Pax's spec against Bryan's # actual account. Auto-disables after 50 calls. STATUS_oauth_headers_logged=0 STATUS_OAUTH_HEADER_LOG_LIMIT=50 # v0.8.8: 429 captures get their OWN budget, separate from the OAuth 200-call # cap above. The whole point of headers.log is diagnosing rate limits, so a # session that burned its 50 OAuth captures on successful calls must STILL log # the next 429. A bare 429 with no diagnostic headers is itself a finding # (signals a low/bare-tier limit). 429 budget is large so a flap won't silence # it, but bounded so a pathological retry storm can't grow the file unbounded. STATUS_429_headers_logged=0 STATUS_429_HEADER_LOG_LIMIT=200 # Model context-window lookup table (tokens). Source: Pax §4. # Default for unknown models: 200000 (safe lower bound for legacy releases). _model_context_window() { local m="$1" case "$m" in *opus-4-7*|*opus-4-6*) echo 1000000 ;; *sonnet-4-6*) echo 1000000 ;; *haiku-4-5*) echo 200000 ;; *sonnet-4-5*) echo 200000 ;; *opus-4-5*|*opus-4-1*) echo 200000 ;; *) echo 200000 ;; esac } # _header_value HEADER_FILE NAME — case-insensitive header lookup. # curl -D writes "Header-Name: value\r\n" lines. We strip the trailing CR # and any leading/trailing whitespace from the value. _header_value() { local f="$1" name="$2" # grep -i for case-insensitive name match; cut at first ':'; trim. local line val line=$(grep -i -m1 "^${name}:" "$f" 2>/dev/null) || return 0 val="${line#*:}" # Strip CR (curl on Windows / SSE responses). val="${val%$'\r'}" # Trim leading whitespace. val="${val# }" val="${val##[[:space:]]*}" # tolerate multiple leading spaces # Re-strip with parameter expansion (the bracket form is fussy). val="${val#"${val%%[![:space:]]*}"}" val="${val%"${val##*[![:space:]]}"}" printf '%s' "$val" } # _rfc3339_to_epoch STR — convert RFC 3339 datetime → Unix epoch seconds. # Returns empty string on parse failure. macOS `date -j -f` and GNU `date -d` # behave differently; we try GNU first, fall back to BSD. _rfc3339_to_epoch() { local s="$1" [ -z "$s" ] && return 0 local out # GNU date (Linux, Cygwin). out=$(date -d "$s" +%s 2>/dev/null) && [ -n "$out" ] && { printf '%s' "$out"; return 0; } # BSD date (macOS). Try ISO 8601 with timezone, then without. out=$(date -j -f "%Y-%m-%dT%H:%M:%SZ" "$s" +%s 2>/dev/null) \ && [ -n "$out" ] && { printf '%s' "$out"; return 0; } out=$(date -j -f "%Y-%m-%dT%H:%M:%S%z" "${s/Z/+0000}" +%s 2>/dev/null) \ && [ -n "$out" ] && { printf '%s' "$out"; return 0; } # Give up silently — caller renders "—". return 0 } # _epoch_to_hhmm EPOCH — format epoch as HH:MM in local time. _epoch_to_hhmm() { local e="$1" [ -z "$e" ] && return 0 date -d "@$e" +%H:%M 2>/dev/null || date -r "$e" +%H:%M 2>/dev/null || true } # _epoch_to_ddd_mmm_d EPOCH — format epoch as "Mon Jun 2". _epoch_to_ddd_mmm_d() { local e="$1" [ -z "$e" ] && return 0 date -d "@$e" "+%a %b %-d" 2>/dev/null || date -r "$e" "+%a %b %-d" 2>/dev/null || true } # _humanize_tokens N — render an integer as 24K / 1.2M. _humanize_tokens() { local n="$1" [ -z "$n" ] && { printf '—'; return; } if [ "$n" -ge 1000000 ]; then awk -v n="$n" 'BEGIN{printf "%.1fM", n/1000000}' elif [ "$n" -ge 1000 ]; then awk -v n="$n" 'BEGIN{printf "%dK", n/1000}' else printf '%s' "$n" fi } # _parse_response_headers HEADER_FILE — extract rate-limit fields from a # curl -D dump and update STATUS_* globals. Idempotent; safe to call on # empty / partial files. # # Per Pax §2 / §3: # API-key resets: RFC 3339 datetime strings → convert to epoch. # OAuth resets: Unix epoch integer-as-string → use as-is. _parse_response_headers() { local f="$1" [ -s "$f" ] || return 0 # ── OAuth unified-* family ─────────────────────────────────────────────── local v v=$(_header_value "$f" "anthropic-ratelimit-unified-status") [ -n "$v" ] && STATUS_oauth_status="$v" v=$(_header_value "$f" "anthropic-ratelimit-unified-5h-utilization") [ -n "$v" ] && STATUS_oauth_5h_utilization="$v" v=$(_header_value "$f" "anthropic-ratelimit-unified-5h-reset") [ -n "$v" ] && STATUS_oauth_5h_reset_epoch="$v" v=$(_header_value "$f" "anthropic-ratelimit-unified-7d-utilization") [ -n "$v" ] && STATUS_oauth_7d_utilization="$v" v=$(_header_value "$f" "anthropic-ratelimit-unified-7d-reset") [ -n "$v" ] && STATUS_oauth_7d_reset_epoch="$v" v=$(_header_value "$f" "anthropic-ratelimit-unified-representative-claim") [ -n "$v" ] && STATUS_oauth_representative="$v" # ── API-key family (find earliest reset) ───────────────────────────────── # The four buckets (requests/tokens/input-tokens/output-tokens) each have # their own reset. We display the most-imminent one. local earliest="" local hname epoch rfc for hname in \ anthropic-ratelimit-requests-reset \ anthropic-ratelimit-tokens-reset \ anthropic-ratelimit-input-tokens-reset \ anthropic-ratelimit-output-tokens-reset; do rfc=$(_header_value "$f" "$hname") [ -z "$rfc" ] && continue epoch=$(_rfc3339_to_epoch "$rfc") [ -z "$epoch" ] && continue if [ -z "$earliest" ] || [ "$epoch" -lt "$earliest" ]; then earliest="$epoch" fi done [ -n "$earliest" ] && STATUS_api_reset_epoch="$earliest" # ── v0.8.5: rate-limit diagnosis — retry-after + which rail tripped ────── # On a 429 the response carries `retry-after` (seconds) and the relevant # `*-remaining: 0` bucket. We capture both so _humanize_api_error can tell # Bryan EXACTLY which limit fired and when it resets, instead of echoing a # bare "rate_limit_error". strip_cr on every captured value — these flow # into case/printf paths and a CRLF response (MobaXterm) would taint them. local _ra _ra=$(_header_value "$f" "retry-after") _ra=$(strip_cr "$_ra") [ -n "$_ra" ] && STATUS_retry_after_secs="$_ra" # Identify the tripped rail. Prefer the OAuth unified status when present; # otherwise inspect the API-key *-remaining buckets for the one at 0. STATUS_rl_tripped_rail="" STATUS_rl_reset_epoch="" if [ "$(strip_cr "$STATUS_oauth_status")" = "rate_limited" ]; then # OAuth: representative-claim tells us which window is the binding one. case "$(strip_cr "$STATUS_oauth_representative")" in *7d*|*seven*) STATUS_rl_tripped_rail="unified-7d"; STATUS_rl_reset_epoch="$(strip_cr "$STATUS_oauth_7d_reset_epoch")" ;; *) STATUS_rl_tripped_rail="unified-5h"; STATUS_rl_reset_epoch="$(strip_cr "$STATUS_oauth_5h_reset_epoch")" ;; esac else # API-key family: find the bucket whose -remaining is 0 (or lowest). local _rail _rem _reset_rfc _reset_epoch for _rail in requests input-tokens output-tokens tokens; do _rem=$(strip_cr "$(_header_value "$f" "anthropic-ratelimit-${_rail}-remaining")") [ -z "$_rem" ] && continue _rem=$(coerce_int "$_rem" -1) if [ "$_rem" = "0" ]; then STATUS_rl_tripped_rail="$_rail" _reset_rfc=$(strip_cr "$(_header_value "$f" "anthropic-ratelimit-${_rail}-reset")") _reset_epoch=$(_rfc3339_to_epoch "$_reset_rfc") [ -n "$_reset_epoch" ] && STATUS_rl_reset_epoch="$_reset_epoch" break fi done fi # ── v0.8.8: detect a 429 straight from the status line in the -D dump ──── # We do NOT rely on retry-after / unified-* being present (Bryan's box 429s # carry neither — the exponential 2/4/8s fallback proves no server retry-after, # and a per-minute burst 429 routinely omits the unified-* family). The HTTP # status line is the ONE thing every 429 always has. Match `HTTP/1.1 429`, # `HTTP/2 429`, etc. tolerant of the CRLF curl writes on Windows/MobaXterm. local _is_429=0 if grep -iqE '^HTTP/[0-9.]+[[:space:]]+429([[:space:]]|$|[^0-9])' "$f" 2>/dev/null; then _is_429=1 fi # ── v0.8.10: edge-rejection discrimination ─────────────────────────────── # Distinguish an EDGE-429 (Anthropic's gateway bouncing the request before # rate-limit accounting — an auth/fingerprint reject) from a REAL quota 429. # The signature, proved by v0.8.8's header capture on Bryan's box: # HTTP 429 + x-should-retry:true + ZERO anthropic-ratelimit-* headers # A genuine quota 429 ALWAYS carries at least one anthropic-ratelimit-* header # (the burst rail's -remaining:0, or the unified-* family). Its ABSENCE on a # 429 means the request never reached the rate-limit accountant → edge bounce. # We reset to 0 every parse so the flag never goes stale across calls; a 200 # or a real-quota 429 leaves it 0 (→ normal backoff). Only the edge signature # sets it 1 (→ agent_turn diverts to the API-key fallback instead of futile # backoff against an edge that will never accept the OAuth token). STATUS_rl_edge_reject=0 if [ "$_is_429" = "1" ]; then local _has_rl_hdr=0 if grep -iqE '^anthropic-ratelimit-' "$f" 2>/dev/null; then _has_rl_hdr=1 fi local _should_retry; _should_retry=$(strip_cr "$(_header_value "$f" "x-should-retry")") # Edge reject = 429 with NO ratelimit headers. x-should-retry:true is the # corroborating tell from Bryan's capture but we don't HARD-require it — the # load-bearing discriminator is the absence of anthropic-ratelimit-* (a real # quota 429 cannot omit them). If ratelimit headers ARE present it's a # legitimate quota/burst 429 → leave the flag 0 regardless of x-should-retry. if [ "$_has_rl_hdr" = "0" ]; then STATUS_rl_edge_reject=1 # Annotate the rail so _humanize_api_error / the 429-log banner name it. [ -z "$STATUS_rl_tripped_rail" ] && STATUS_rl_tripped_rail="edge-reject" fi : "${_should_retry:=}" # referenced for clarity; absence is tolerated fi local log_dir="$LARRY_HOME/log" # ── ALWAYS-ON 429 CAPTURE (the whole point of headers.log) ─────────────── # On ANY 429, write the FULL raw header block — regardless of retry-after, # unified-*, auth mode, or the OAuth 200-call cap. The 429 has its own budget # (STATUS_429_HEADER_LOG_LIMIT) so a session that exhausted the OAuth cap on # successful calls still logs its next rate-limit. A bare 429 with no # diagnostic headers STILL logs — that absence is itself the finding. if [ "$_is_429" = "1" ] \ && [ "$STATUS_429_headers_logged" -lt "$STATUS_429_HEADER_LOG_LIMIT" ]; then mkdir -p "$log_dir" 2>/dev/null || true if [ -d "$log_dir" ]; then # Best-effort rail/account hints for the header line + live pointer. local _rail_hint="${STATUS_rl_tripped_rail:-unknown}" local _ra_hint="${_ra:-none}" local _org_hint; _org_hint=$(strip_cr "$(_header_value "$f" "anthropic-organization-id")") local _reqid_hint; _reqid_hint=$(strip_cr "$(_header_value "$f" "request-id")") # Rail label: which AUTH RAIL authenticated this request. unified-* headers # ⇒ OAuth/subscription (Max) rail; their absence on an API-key 429 ⇒ the # API-key rail. We surface BOTH the limit-rail and the auth-mode. local _auth_rail="$LARRY_AUTH_MODE" [ -z "$_auth_rail" ] && _auth_rail="unknown" { printf '════ %s *** HTTP 429 RATE LIMIT *** ════\n' \ "$(date -Iseconds 2>/dev/null || date)" printf ' auth-mode=%s limit-rail=%s retry-after=%s org=%s request-id=%s model=%s 429#%d\n' \ "$_auth_rail" "$_rail_hint" "$_ra_hint" \ "${_org_hint:-—}" "${_reqid_hint:-—}" "$LARRY_MODEL" \ "$((STATUS_429_headers_logged + 1))" # Dump EVERYTHING useful for diagnosis. Order: status line, then the # full anthropic-* family (NOT just ratelimit), retry-after, request-id, # and every x-* header (account/proxy/edge hints live here). De-dup the # request-id line if it also matched x-* (harmless, kept simple). grep -iE '^HTTP/' "$f" 2>/dev/null || true grep -iE '^anthropic-' "$f" 2>/dev/null || true grep -iE '^retry-after:' "$f" 2>/dev/null || true grep -iE '^request-id:' "$f" 2>/dev/null || true grep -iE '^x-' "$f" 2>/dev/null || true printf '\n' } >> "$log_dir/headers.log" 2>/dev/null || true STATUS_429_headers_logged=$((STATUS_429_headers_logged + 1)) # ── Surface it LIVE so Bryan knows the log now exists (no hunting) ───── printf '%sphi/rl>%s 429 headers logged to ~/.larry/log/headers.log (rail=%s, retry-after=%s) — paste for diagnosis\n' \ "$C_YELLOW" "$C_RESET" "$_rail_hint" "$_ra_hint" >&2 if [ "$STATUS_429_headers_logged" -eq "$STATUS_429_HEADER_LOG_LIMIT" ]; then printf '%s[v0.8.8 429-log] reached %d 429 captures this session; further 429 capture disabled. See %s%s\n' \ "$C_DIM" "$STATUS_429_HEADER_LOG_LIMIT" "$log_dir/headers.log" "$C_RESET" >&2 fi fi fi # ── Safety net: log raw OAuth headers for the first 50 SUCCESSFUL calls ─── # Unchanged purpose from v0.6.9: sample the OAuth unified-* family on normal # (non-429) traffic to verify Pax's spec against Bryan's real account. This # arm is now strictly for the 200-path; 429s are handled above with their own # budget and never consume this cap. We still skip it on a 429 we already # logged (no double-write of the same dump). if [ "$_is_429" != "1" ] \ && { [ "$LARRY_AUTH_MODE" = "oauth" ] \ && [ -n "$STATUS_oauth_status$STATUS_oauth_5h_utilization$STATUS_oauth_7d_utilization" ]; } \ && [ "$STATUS_oauth_headers_logged" -lt "$STATUS_OAUTH_HEADER_LOG_LIMIT" ]; then mkdir -p "$log_dir" 2>/dev/null || true if [ -d "$log_dir" ]; then { printf '── %s call #%d model=%s ──\n' \ "$(date -Iseconds 2>/dev/null || date)" \ "$((STATUS_oauth_headers_logged + 1))" \ "$LARRY_MODEL" grep -i '^anthropic-' "$f" 2>/dev/null || true grep -i '^retry-after:' "$f" 2>/dev/null || true grep -iE '^(http/|HTTP/)' "$f" 2>/dev/null || true printf '\n' } >> "$log_dir/headers.log" 2>/dev/null || true STATUS_oauth_headers_logged=$((STATUS_oauth_headers_logged + 1)) if [ "$STATUS_oauth_headers_logged" -eq "$STATUS_OAUTH_HEADER_LOG_LIMIT" ]; then printf '%s[v0.6.9 header-log] reached %d OAuth calls; raw header capture disabled. See %s%s\n' \ "$C_DIM" "$STATUS_OAUTH_HEADER_LOG_LIMIT" "$log_dir/headers.log" "$C_RESET" >&2 fi fi fi } # render_status_line — print the dim 1-line status footer between turns. # As of v0.7.1 this renders AFTER the user submits input and BEFORE # agent_turn begins (was: above the prompt, v0.6.9–v0.7.0). It now reads # as a "between turns" marker summarising the just-completed turn's cost # heading into the new request. # # Honors LARRY_NO_STATUS=1. Always ends with a trailing newline so the next # stream lands cleanly below. # # v0.8.7 — suppress ONLY on the true first turn (no turn has run yet, # _LARRY_TURNS==0). After that, ALWAYS render — even if the rate-limit / # context fields are still empty (the segments self-render "—" placeholders). # Earlier (v0.6.9–v0.8.6) the OAuth arm gated on ctx/5h/7d ALL being empty, # which silently hid the entire line for a whole session whenever every turn # erred (e.g. rate_limit): _record_ctx_used (which populates ctx) runs only # AFTER a successful agent_turn, and pre-v0.8.5 the unified-* utilization # headers weren't captured on error responses — so all three gate fields # stayed empty turn after turn and the line never appeared on MobaXterm. # Root cause of the "status line missing on the work-box" report. # This is NOT a positioning bug: the line is a plain printf'd dim line printed # between turns (no DECSTBM scroll-region / cursor-save / absolute-row escape), # so MobaXterm's terminal emulation has nothing to mis-honor. It is also NOT # coupled to streaming or mouse mode — the between-turn call site (main_loop) # invokes it unconditionally regardless of LARRY_NO_STREAM / LARRY_MOUSE. # Gate on turn count, NOT data presence, so session context (model, turns, # cost, ctx window) always shows and the reset date fills in once a call # populates the headers. render_status_line() { [ "${LARRY_NO_STATUS:-0}" = "1" ] && return 0 # Suppress ONLY before the first turn has run. coerce_int defends against a # CR-tainted counter on Cygwin/MobaXterm (v0.7.5 lesson — never feed a raw # value to `-eq`). local _turns; _turns=$(coerce_int "$_LARRY_TURNS" 0) [ "$_turns" -eq 0 ] && return 0 # Pick template by auth mode. Both arms self-render "—" for any field that # has no data yet, so the line is always informative even on a fresh or # error-only session. case "$LARRY_AUTH_MODE" in oauth) _render_status_line_oauth ;; apikey) _render_status_line_apikey ;; *) # Unknown auth mode: still show the universal context segment + turns so # the operator gets feedback rather than a blank line. _render_status_line_apikey ;; esac } # _ctx_segment — render "ctx 12% (24K/200K)" or "ctx — (—/—)". _ctx_segment() { local used="$STATUS_ctx_used_tokens" local win="$STATUS_ctx_window" # Lazy-init the window from the current model if not set. if [ -z "$win" ]; then win=$(_model_context_window "$LARRY_MODEL") STATUS_ctx_window="$win" fi if [ -z "$used" ]; then printf 'ctx — (—/%s)' "$(_humanize_tokens "$win")" return fi local pct pct=$(awk -v u="$used" -v w="$win" 'BEGIN{ if(w==0){print "—"} else {printf "%d", (u*100/w)} }') local color="$C_DIM" if [ "$pct" != "—" ]; then if [ "$pct" -ge 90 ]; then color="$C_RED" elif [ "$pct" -ge 75 ]; then color="$C_YELLOW" fi fi printf '%sctx %s%% (%s/%s)%s%s' "$color" "$pct" \ "$(_humanize_tokens "$used")" "$(_humanize_tokens "$win")" \ "$C_RESET" "$C_DIM" } # _utilization_pct DECIMAL — turn "0.7370692..." into "73" (integer percent). _utilization_pct() { local d="$1" [ -z "$d" ] && { printf '—'; return; } awk -v d="$d" 'BEGIN{printf "%d", d*100}' } # _utilization_pct_one DECIMAL — same but with one decimal place ("73.7"). _utilization_pct_one() { local d="$1" [ -z "$d" ] && { printf '—'; return; } awk -v d="$d" 'BEGIN{printf "%.1f", d*100}' } _render_status_line_oauth() { local ctx; ctx=$(_ctx_segment) # v0.7.5: coerce_int on date +%s — Cygwin date.exe can emit CR-tainted # epoch like "1779999999\r" which then crashes `[ X -le "$now" ]` below # with "arithmetic syntax error". Same defense as lib/oauth.sh. local now; now=$(coerce_int "$(date +%s)" 0) # 5h segment local five_pct five_reset five_color="$C_DIM" if [ -n "$STATUS_oauth_5h_utilization" ]; then five_pct=$(_utilization_pct_one "$STATUS_oauth_5h_utilization") # Color by utilization or status. local raw_pct; raw_pct=$(_utilization_pct "$STATUS_oauth_5h_utilization") if [ "$raw_pct" -ge 90 ]; then five_color="$C_RED" elif [ "$raw_pct" -ge 75 ]; then five_color="$C_YELLOW" fi else five_pct="—" fi # v0.8.7: coerce_int the reset epoch before the `-le` test. These values come # from _header_value, which strips only the TRAILING CR; an embedded CR (CRLF # response on MobaXterm) or a non-numeric token would crash `[ X -le N ]` with # an arithmetic error and abort the whole line — same defense as `now` above. local _5h_epoch; _5h_epoch=$(coerce_int "$STATUS_oauth_5h_reset_epoch" 0) if [ -n "$STATUS_oauth_5h_reset_epoch" ] && [ "$_5h_epoch" -gt 0 ]; then if [ "$_5h_epoch" -le "$now" ]; then five_reset="— reset" else five_reset="reset $(_epoch_to_hhmm "$_5h_epoch")" fi else five_reset="reset —" fi # 7d segment local seven_pct seven_reset seven_color="$C_DIM" if [ -n "$STATUS_oauth_7d_utilization" ]; then seven_pct=$(_utilization_pct_one "$STATUS_oauth_7d_utilization") local raw_pct7; raw_pct7=$(_utilization_pct "$STATUS_oauth_7d_utilization") if [ "$raw_pct7" -ge 90 ]; then seven_color="$C_RED" elif [ "$raw_pct7" -ge 75 ]; then seven_color="$C_YELLOW" fi else seven_pct="—" fi # v0.8.7: coerce_int the 7d reset epoch — same CR-taint / arithmetic-crash # defense as the 5h segment above. local _7d_epoch; _7d_epoch=$(coerce_int "$STATUS_oauth_7d_reset_epoch" 0) if [ -n "$STATUS_oauth_7d_reset_epoch" ] && [ "$_7d_epoch" -gt 0 ]; then if [ "$_7d_epoch" -le "$now" ]; then seven_reset="— reset" else seven_reset="reset $(_epoch_to_ddd_mmm_d "$_7d_epoch")" fi else seven_reset="reset —" fi # Status-level color override (warning → yellow, rate_limited → red wins). # v0.8.7: strip_cr before the case — STATUS_oauth_status comes from an API # header and a CRLF response (MobaXterm) would leave "rate_limited\r", which # the literal-glob case arm would never match (silent loss of the red cue). local overall_pre="" case "$(strip_cr "$STATUS_oauth_status")" in rate_limited) overall_pre="$C_RED" ;; warning) overall_pre="$C_YELLOW" ;; esac # Build the line. Width-aware: if cols < 100, drop the reset times. # v0.7.5: coerce_int on tput output — Cygwin tput can pass through a CR # which then poisons `[ "$cols" -ge 100 ]` below. local cols cols=$(coerce_int "$(tput cols 2>/dev/null || echo 100)" 100) local line if [ "$cols" -ge 100 ]; then line=$(printf '%s─ %s ─ %s5h %s%% %s%s ─ %s7d %s%% %s%s ─%s' \ "$C_DIM" "$ctx" \ "$five_color" "$five_pct" "$five_reset" "$C_DIM" \ "$seven_color" "$seven_pct" "$seven_reset" "$C_DIM" \ "$C_RESET") else line=$(printf '%s─ %s ─ %s5h %s%%%s ─ %s7d %s%%%s ─%s' \ "$C_DIM" "$ctx" \ "$five_color" "$five_pct" "$C_DIM" \ "$seven_color" "$seven_pct" "$C_DIM" \ "$C_RESET") fi local badge; badge=$(_origin_badge) if [ -n "$overall_pre" ]; then printf '%s%s%s\n' "$overall_pre" "$line" "$badge" else printf '%s%s\n' "$line" "$badge" fi } _render_status_line_apikey() { local ctx; ctx=$(_ctx_segment) # Session $ from current cost trackers. local dollars; dollars=$(_render_session_cost_dollars) local badge; badge=$(_origin_badge) printf '%s─ %s ─ $%s session ─ %d turns ─%s%s\n' \ "$C_DIM" "$ctx" "$dollars" "$_LARRY_TURNS" "$C_RESET" "$badge" } # _origin_badge — status-line segment (v0.7.4 single-source). Returns a # leading space + colored fragment when the origin is non-default, otherwise # empty. Badge values: # "" on the default Gitea origin (no pin, no env override) # "custom" pinned to a user-supplied HTTPS URL via /origin # The legacy "github" pin and the gitea→github fallback indicator are gone # in v0.7.4 (the fallback no longer exists). _origin_badge() { local pinned="" [ -r "$LARRY_HOME/.origin" ] && pinned=$(tr -d '[:space:]' < "$LARRY_HOME/.origin" 2>/dev/null) case "$pinned" in https://*) printf ' %scustom%s' "$C_DIM" "$C_RESET"; return 0 ;; esac return 0 } # _render_session_cost_dollars — reuse the existing pricing logic. # Returns the running session $ amount to 3 decimals. _render_session_cost_dollars() { local prices; prices=$(_price_for_model "$LARRY_MODEL") local in_price out_price in_price="${prices% *}" out_price="${prices#* }" awk -v ti="$_LARRY_INPUT_TOKENS" -v to="$_LARRY_OUTPUT_TOKENS" \ -v tcr="$_LARRY_CACHE_READ_TOKENS" -v tcw="$_LARRY_CACHE_WRITE_TOKENS" \ -v pi="$in_price" -v po="$out_price" \ 'BEGIN{ c = ti*pi/1000000 + to*po/1000000 \ + tcr*pi*0.1/1000000 + tcw*pi*1.25/1000000; printf "%.3f", c }' } # _record_ctx_used IN_TOK CACHE_READ CACHE_WRITE — update STATUS_ctx_used_tokens # with the LATEST turn's total context size. Per Pax §5: ctx_used = # input_tokens + cache_creation_input_tokens + cache_read_input_tokens. # (NOT the running cumulative sum — context resets per turn from Anthropic's # perspective.) _record_ctx_used() { local in_t="${1:-0}" cr="${2:-0}" cw="${3:-0}" # v0.8.12: these arrive from jq -r on the (possibly CRLF-translated) response # on Cygwin/MobaXterm, so coerce_int before arithmetic. `${1:-0}` only fills a # MISSING arg, not a "1234\r" one — that CR is what crashed $(( )). See the # non-streaming cost-tracking block + Anomaly #4 of the v0.7.5 OAuth fix. in_t=$(coerce_int "$in_t" 0); cr=$(coerce_int "$cr" 0); cw=$(coerce_int "$cw" 0) STATUS_ctx_used_tokens=$(( in_t + cr + cw )) # Lazy-init the window so /status renders correctly even without an API call. [ -z "$STATUS_ctx_window" ] && STATUS_ctx_window=$(_model_context_window "$LARRY_MODEL") } print_cost_summary() { local prices; prices=$(_price_for_model "$LARRY_MODEL") local in_price out_price in_price="${prices% *}" out_price="${prices#* }" # Compute via awk for floating point (bash has no fp). local cost_in cost_out cost_read cost_write total cost_in=$(awk -v t="$_LARRY_INPUT_TOKENS" -v p="$in_price" 'BEGIN{printf "%.4f", t*p/1000000}') cost_out=$(awk -v t="$_LARRY_OUTPUT_TOKENS" -v p="$out_price" 'BEGIN{printf "%.4f", t*p/1000000}') cost_read=$(awk -v t="$_LARRY_CACHE_READ_TOKENS" -v p="$in_price" 'BEGIN{printf "%.4f", t*p*0.1/1000000}') cost_write=$(awk -v t="$_LARRY_CACHE_WRITE_TOKENS" -v p="$in_price" 'BEGIN{printf "%.4f", t*p*1.25/1000000}') total=$(awk -v a="$cost_in" -v b="$cost_out" -v c="$cost_read" -v d="$cost_write" 'BEGIN{printf "%.4f", a+b+c+d}') printf '%sSession cost so far:%s\n' "$C_BOLD" "$C_RESET" printf ' Model: %s (in $%s/MTok, out $%s/MTok)\n' "$LARRY_MODEL" "$in_price" "$out_price" printf ' Input tokens: %s ($%s)\n' "$_LARRY_INPUT_TOKENS" "$cost_in" printf ' Output tokens: %s ($%s)\n' "$_LARRY_OUTPUT_TOKENS" "$cost_out" printf ' Cache reads: %s ($%s)\n' "$_LARRY_CACHE_READ_TOKENS" "$cost_read" printf ' Cache writes: %s ($%s)\n' "$_LARRY_CACHE_WRITE_TOKENS" "$cost_write" printf ' Total: $%s\n' "$total" printf ' Turns: %s\n' "$_LARRY_TURNS" } # Derive a short label from the full model ID for the prompt. # claude-sonnet-4-6 → sonnet-4.6 # claude-opus-4-7 → opus-4.7 # claude-haiku-4-5 → haiku-4.5 model_short_name() { local m="${1:-$LARRY_MODEL}" # Strip leading "claude-" if present. m="${m#claude-}" # Convert remaining "-N-M" tail to "-N.M": last two dashes. # We do this by replacing the LAST '-' with '.'. local last="${m##*-}" local rest="${m%-*}" # If rest still has digits separated by '-', collapse the last hyphen too. case "$rest" in *-*) local rest_last="${rest##*-}" local rest_rest="${rest%-*}" # If both rest_last and last are numeric, collapse all to dots. case "$rest_last$last" in *[!0-9]*) printf '%s' "$m" ;; *) printf '%s-%s.%s' "$rest_rest" "$rest_last" "$last" ;; esac ;; *) printf '%s' "$m" ;; esac } # Session-scope: last assistant text (for /copy) and last tool call+result (for /show-last-tool). _LARRY_LAST_ASSISTANT_TEXT="" _LARRY_LAST_TOOL_NAME="" _LARRY_LAST_TOOL_INPUT="" _LARRY_LAST_TOOL_RESULT="" # Pretty-print a tool-use input JSON one key:value per line, truncating long # values. Used by both streaming and non-streaming paths. _pretty_tool_input() { local input_json="$1" printf '%s' "$input_json" | jq -r ' to_entries | map( .key as $k | (.value | if type=="string" then . else tojson end) as $v | " " + $k + ": " + (if ($v|length) > 120 then ($v[0:117] + "...") else $v end) ) | join("\n") ' 2>/dev/null } # v0.8.1-b: second approval gate for tool results. # Sets the shell-global $_LARRY_GATE_RESULT to either the original result # (user accepted) or a refusal sentinel (user declined). Never silently # replaces — the caller reads $_LARRY_GATE_RESULT explicitly. # # Triggers (any of): # - tool name is in the "operator-intent" list (bash_exec, ssh_exec, # ssh_pull, ssh_pull_smat) AND # - result is HL7-shaped, OR # - result > LARRY_TOOL_RESULT_REVIEW_THRESHOLD bytes (default 8192) # - LARRY_TOOL_RESULT_REVIEW=always (covers every tool result) # # Skipped when: # - LARRY_AUTO_PHI=off (operator explicitly opted out of PHI prompts) # - no controlling TTY (headless / non-interactive scripts must not block) _LARRY_GATE_RESULT="" _maybe_tool_result_review_gate() { local name="$1" result="$2" _LARRY_GATE_RESULT="$result" # Bypasses. [ "$AUTO_PHI_MODE" = "off" ] && return 0 [ -t 0 ] || return 0 [ -t 2 ] || return 0 # Always-on env override. if [ "${LARRY_TOOL_RESULT_REVIEW:-}" = "always" ]; then : # fall through to prompt else # Otherwise, only gate the operator-intent tools. case "$name" in bash_exec|ssh_exec|ssh_pull|ssh_pull_smat) ;; *) return 0 ;; esac # Trigger check. local size; size=$(printf '%s' "$result" | wc -c | tr -d ' ') local threshold="${LARRY_TOOL_RESULT_REVIEW_THRESHOLD:-8192}" # coerce_int defends against CR-tainted env on Cygwin (v0.7.5 lessons). if declare -F coerce_int >/dev/null 2>&1; then threshold=$(coerce_int "$threshold" 8192) size=$(coerce_int "$size" 0) fi local hl7=0 _auto_phi_looks_like_hl7 "$result" && hl7=1 if [ "$hl7" != "1" ] && [ "$size" -le "$threshold" ]; then return 0 fi fi # Render the prompt. Show a 240-char preview of the result. local preview; preview=$(printf '%s' "$result" | head -c 240) printf '\n%s══ tool result review ══%s\n' "$C_YELLOW" "$C_RESET" >&2 printf ' tool: %s\n' "$name" >&2 printf ' bytes: %s\n' "$(printf '%s' "$result" | wc -c | tr -d ' ')" >&2 if _auto_phi_looks_like_hl7 "$result"; then printf ' shape: HL7-shaped (post-sanitize)\n' >&2 fi printf '%s── preview (first 240 chars) ──%s\n' "$C_DIM" "$C_RESET" >&2 printf '%s\n' "$preview" >&2 printf '%sphi> send this output back to the model? [Y/n/i (inspect)]:%s ' \ "$C_BOLD" "$C_RESET" >&2 local ans="" if declare -F read_clean >/dev/null 2>&1; then read_clean ans /dev/null || ans="" else IFS= read -r ans /dev/null || ans="" ans="${ans//$'\r'/}" fi while [ "$ans" = "i" ] || [ "$ans" = "I" ]; do local pager="${PAGER:-less}" local _ins_tmp; _ins_tmp=$(mktemp) printf '%s' "$result" > "$_ins_tmp" if command -v "$pager" >/dev/null 2>&1; then "$pager" "$_ins_tmp" &2 fi rm -f "$_ins_tmp" printf '%sphi> send this output back to the model? [Y/n/i (inspect again)]:%s ' \ "$C_BOLD" "$C_RESET" >&2 if declare -F read_clean >/dev/null 2>&1; then read_clean ans /dev/null || ans="" else IFS= read -r ans /dev/null || ans="" ans="${ans//$'\r'/}" fi done case "$ans" in n|N|no|NO|No) _LARRY_GATE_RESULT='{"error":"tool result withheld by operator","tool":"'"$name"'","note":"operator reviewed the output locally and declined to send it back to the model"}' printf '%sphi>%s tool result WITHHELD from model (operator declined)\n' \ "$C_DIM" "$C_RESET" >&2 ;; *) # Default Y — pass through. ;; esac } # Display a tool call header (cyan + bold name, dim args, optional truncation hint). display_tool_call() { local name="$1" input_json="$2" printf '\n%s%s▶ %s%s\n' "$C_CYAN" "$C_BOLD" "$name" "$C_RESET" local pretty; pretty=$(_pretty_tool_input "$input_json") if [ -n "$pretty" ]; then printf '%s%s%s\n' "$C_DIM" "$pretty" "$C_RESET" >&2 # Was anything truncated? Check raw lengths. if printf '%s' "$input_json" | grep -q '.\{121,\}'; then printf '%s (use /show-last-tool for full args)%s\n' "$C_DIM" "$C_RESET" >&2 fi fi } # Secure SSH tools — password is read from $LARRY_HOME/.ssh-creds/ by # ssh-helper.sh and never exposed in argv, env, or tool output. The Larry-LLM # only sees: alias name, command, command output. tool_ssh_exec() { local alias="$1" command="$2" max_lines="${3:-500}" local helper="$LARRY_LIB_DIR/ssh-helper.sh" [ -x "$helper" ] || { echo "ERROR: ssh-helper.sh not installed"; return 1; } [ -n "$alias" ] && [ -n "$command" ] || { echo "ERROR: ssh_exec needs alias and command"; return 1; } local out out=$("$helper" exec "$alias" "$command" 2>&1) local rc=$? local total_lines total_lines=$(printf '%s' "$out" | wc -l | tr -d ' ') if [ "$total_lines" -gt "$max_lines" ]; then printf '%s\n[ssh_exec: output truncated — showed %s of %s lines. Exit rc=%d]\n' \ "$(printf '%s' "$out" | head -n "$max_lines")" "$max_lines" "$total_lines" "$rc" else printf '%s\n[ssh_exec: exit rc=%d]\n' "$out" "$rc" fi } tool_ssh_status() { local helper="$LARRY_LIB_DIR/ssh-helper.sh" [ -x "$helper" ] || { echo "ERROR: ssh-helper.sh not installed"; return 1; } "$helper" status 2>&1 } # ── v0.8.13: first-class "list / count Cloverleaf sites" — works in BOTH modes. # This is the proactive answer to "how many sites are on qa?" that previously # stalled on a missing $HCIROOT. # # REMOTE: alias given → ssh-helper.sh `discover ` resolves the remote # $HCIROOT in a LOGIN shell, then lists sites (hcisitelist fast-path, # NetConfig-walk fallback). No path is ever requested from Bryan. # LOCAL: no alias → enumerate sites under the local $HCIROOT (or hciroot # override): hcisitelist if on PATH, else NetConfig walk. # # Output is human-readable: a count + one site name per line, plus the resolved # HCIROOT so the model can cite it. tool_list_sites() { local alias="${1:-}" hciroot_ovr="${2:-}" if [ -n "$alias" ]; then # ── REMOTE mode ────────────────────────────────────────────────────── local helper="$LARRY_LIB_DIR/ssh-helper.sh" [ -x "$helper" ] || { echo "ERROR: ssh-helper.sh not installed"; return 1; } local out rc out=$("$helper" discover "$alias" 2>&1); rc=$? if [ "$rc" -ne 0 ]; then # v0.8.17: the recovery hint depends on the alias's transport mode. In # DIRECT mode there is no master — a failure is most likely a stale/rotated # password (re-run /ssh-pass then /ssh-setup to re-validate). In master mode # the usual cause is a closed master (re-run /ssh-setup). local _ls_direct="" local _ls_tsv="${LARRY_HOME:-$HOME/.larry}/.ssh-hosts.tsv" [ -f "$_ls_tsv" ] && _ls_direct=$(awk -F'\t' -v a="$alias" 'NR>1 && $1==a && $5=="on"{print "on"; exit}' "$_ls_tsv" 2>/dev/null) if [ "$_ls_direct" = "on" ]; then printf '%s\n[list_sites: discover failed for alias=%s rc=%d (DIRECT mode). Likely a stale/rotated password — tell Bryan to re-run /ssh-pass %s then /ssh-setup %s to re-validate.]\n' \ "$out" "$alias" "$rc" "$alias" "$alias" else printf '%s\n[list_sites: discover failed for alias=%s rc=%d. If the master is closed, tell Bryan to run /ssh-setup %s]\n' \ "$out" "$alias" "$rc" "$alias" fi return 0 fi local rroot; rroot=$(printf '%s\n' "$out" | awk -F'\t' '$1=="HCIROOT"{print $2; exit}') local sites; sites=$(printf '%s\n' "$out" | awk -F'\t' '$1=="SITE"{print $2}' | sort -u) local note; note=$(printf '%s\n' "$out" | awk -F'\t' '$1=="NOTE"{print $2}') # v0.8.15: the discover walk filters out scaffolding/special sites and any # dir named after the host; it reports what it dropped on an EXCLUDED line. # Surface it transparently (never silently hide) — the real-site count stays # the headline below. local excluded; excluded=$(printf '%s\n' "$out" | awk -F'\t' '$1=="EXCLUDED"{print $2; exit}') local n=0; [ -n "$sites" ] && n=$(printf '%s\n' "$sites" | grep -c .) # v0.8.15: report the actual resolution mode. If the alias has a pinned # HCIROOT (4th column of the hosts TSV) the discover ran with HCIROOT # exported explicitly and NO login profile; otherwise it used a login shell. local _hosts_tsv="${LARRY_HOME:-$HOME/.larry}/.ssh-hosts.tsv" _pin="" _direct="" _mode="login shell" if [ -f "$_hosts_tsv" ]; then _pin=$(awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $4; exit }' "$_hosts_tsv" 2>/dev/null) _direct=$(awk -F'\t' -v a="$alias" 'NR>1 && $1==a && $5=="on" { print "on"; exit }' "$_hosts_tsv" 2>/dev/null) fi [ -n "$_pin" ] && _mode="pinned HCIROOT, no login profile" # v0.8.17: note the transport too — direct (per-command sshpass) vs master. if [ "$_direct" = "on" ]; then _mode="$_mode; direct sshpass"; else _mode="$_mode; ControlMaster"; fi printf 'Cloverleaf env on alias "%s" (REMOTE, %s):\n' "$alias" "$_mode" printf ' HCIROOT = %s\n' "${rroot:-}" [ -n "$note" ] && printf ' NOTE: %s\n' "$note" if [ -n "$excluded" ]; then local _exc_csv; _exc_csv=$(printf '%s' "$excluded" | tr ' ' ',' | sed 's/,/, /g') printf ' sites: %d (excluded: %s)\n' "$n" "$_exc_csv" else printf ' sites: %d\n' "$n" fi [ -n "$sites" ] && printf '%s\n' "$sites" | sed 's/^/ - /' return 0 fi # ── LOCAL mode ───────────────────────────────────────────────────────── local root="${hciroot_ovr:-${HCIROOT:-}}" if [ -z "$root" ] || [ ! -d "$root" ]; then echo "ERROR: no local \$HCIROOT (and no hciroot override). This box has no detected Cloverleaf install. If Cloverleaf is on a remote host, pass alias= (run ssh_status to see configured aliases)." return 0 fi local sites="" if command -v hcisitelist >/dev/null 2>&1; then sites=$(hcisitelist 2>/dev/null | tr ' ' '\n' | grep -v '^$' | sort -u) fi if [ -z "$sites" ]; then sites=$(find "$root" -mindepth 1 -maxdepth 2 -name NetConfig -type f 2>/dev/null \ | while IFS= read -r nc; do basename "$(dirname "$nc")"; done | sort -u) fi # v0.8.15: apply the SAME exclusion as the REMOTE discover walk — static # scaffolding/special sites plus any dir named after this host. Tunable via # the SITES_EXCLUDE env var (default: helloworld siteProto master). Never # silently hidden: the dropped names are reported alongside the real count. local _sites_exclude="${SITES_EXCLUDE:-helloworld siteProto master}" local _hn_s _hn_f; _hn_s=$(hostname -s 2>/dev/null || true); _hn_f=$(hostname 2>/dev/null || true) local _kept="" _dropped="" if [ -n "$sites" ]; then while IFS= read -r s; do [ -n "$s" ] || continue local _drop="" local x; for x in $_sites_exclude; do [ "$s" = "$x" ] && _drop=1 && break; done [ -z "$_drop" ] && [ -n "$_hn_s" ] && [ "$s" = "$_hn_s" ] && _drop=1 [ -z "$_drop" ] && [ -n "$_hn_f" ] && [ "$s" = "$_hn_f" ] && _drop=1 if [ -n "$_drop" ]; then _dropped="$_dropped $s"; else _kept="$_kept$s "; fi done <&1); rc=$? else out=$("$helper" pull "$alias" "$remote" 2>&1); rc=$? fi printf '%s\n[ssh_pull: exit rc=%d]\n' "$out" "$rc" } tool_ssh_push() { local alias="$1" local_path="$2" remote="$3" local helper="$LARRY_LIB_DIR/ssh-helper.sh" [ -x "$helper" ] || { echo "ERROR: ssh-helper.sh not installed"; return 1; } [ -n "$alias" ] && [ -n "$local_path" ] && [ -n "$remote" ] \ || { echo "ERROR: ssh_push needs alias, local_path, and remote_path"; return 1; } local out rc out=$("$helper" push "$alias" "$local_path" "$remote" 2>&1); rc=$? printf '%s\n[ssh_push: exit rc=%d]\n' "$out" "$rc" } tool_ssh_pull_smat() { local alias="$1" site="$2" thread="$3" days_back="${4:-}" local helper="$LARRY_LIB_DIR/ssh-helper.sh" [ -x "$helper" ] || { echo "ERROR: ssh-helper.sh not installed"; return 1; } [ -n "$alias" ] && [ -n "$site" ] && [ -n "$thread" ] \ || { echo "ERROR: ssh_pull_smat needs alias, site, thread"; return 1; } local out rc if [ -n "$days_back" ]; then out=$("$helper" pull-smat "$alias" "$site" "$thread" "$days_back" 2>&1); rc=$? else out=$("$helper" pull-smat "$alias" "$site" "$thread" 2>&1); rc=$? fi # Cap returned bytes — sampled-mode b64 blobs can be sizable. Hard ceiling # at ~400 KB so tool result stays in a reasonable bound; truncation is # explicit so Larry-the-LLM can react and re-pull with smaller days_back. local bytes; bytes=$(printf '%s' "$out" | wc -c | tr -d ' ') if [ "$bytes" -gt 409600 ]; then out=$(printf '%s' "$out" | head -c 409600) printf '%s\n[ssh_pull_smat: output truncated at 400 KB; re-run with smaller days_back. exit rc=%d]\n' "$out" "$rc" else printf '%s\n[ssh_pull_smat: exit rc=%d]\n' "$out" "$rc" fi } tool_lesson_record() { local text="$1" topic="${2:-}" site="${3:-${HCISITE:-}}" severity="${4:-info}" _lib_err_if_missing || return local lessons_script="$LARRY_LIB_DIR/lessons.sh" [ -x "$lessons_script" ] || { echo "ERROR: lessons.sh not installed"; return 1; } local args=(add "$text" --severity "$severity") [ -n "$topic" ] && args+=(--topic "$topic") [ -n "$site" ] && args+=(--site "$site") "$lessons_script" "${args[@]}" 2>&1 } tool_larry_rollback_list() { local session_filter="${1:-}" if [ -n "$session_filter" ]; then "$LARRY_HOME/../larry-rollback.sh" --list --session "$session_filter" 2>&1 \ || "$LARRY_LIB_DIR/../larry-rollback.sh" --list --session "$session_filter" 2>&1 else "$LARRY_HOME/../larry-rollback.sh" --list 2>&1 \ || "$LARRY_LIB_DIR/../larry-rollback.sh" --list 2>&1 fi } tool_nc_document() { # SINGLE-THREAD mode: pass `thread` (+ optional `site`) — documents ONE interface # in the Legacy "ADT Messages" template with the full per-delivery breakdown + # deterministic UPOC-bits extraction. SYSTEM mode: pass `name` (a pattern) — one # section per matching delivery thread across sites. Exactly one of thread/name. local thread="$1" name="$2" site="$3" out_path="${4:-}" hciroot="${5:-${HCIROOT:-}}" local title="${6:-}" status="${7:-}" poc_internal="${8:-}" poc_vendor="${9:-}" escalation="${10:-}" open_items="${11:-}" notes="${12:-}" local onenote_table="${13:-0}" raw_tcl="${14:-0}" _lib_err_if_missing || return [ -n "$thread" ] || [ -n "$name" ] \ || { echo "ERROR: nc_document needs either thread (single interface) or name (system pattern)"; return 1; } local args=() if [ -n "$thread" ]; then args+=(--thread "$thread") [ -n "$site" ] && args+=(--site "$site") else args+=(--name "$name") fi [ -n "$hciroot" ] && args+=(--hciroot "$hciroot") [ -n "$out_path" ] && args+=(--out "$out_path") [ -n "$title" ] && args+=(--title "$title") [ -n "$status" ] && args+=(--status "$status") [ -n "$poc_internal" ] && args+=(--poc-internal "$poc_internal") [ -n "$poc_vendor" ] && args+=(--poc-vendor "$poc_vendor") [ -n "$escalation" ] && args+=(--escalation "$escalation") [ -n "$open_items" ] && args+=(--open-items "$open_items") [ -n "$notes" ] && args+=(--notes "$notes") [ "$onenote_table" = "1" ] && args+=(--onenote-table) [ "$raw_tcl" = "1" ] && args+=(--raw-tcl) "$LARRY_LIB_DIR/nc-document.sh" "${args[@]}" 2>&1 } tool_nc_diff_interface() { local interface="$1" left="$2" right="$3" out_path="${4:-}" include_tables="${5:-0}" local left_label="${6:-}" right_label="${7:-}" depth="${8:-1}" _lib_err_if_missing || return [ -n "$interface" ] && [ -n "$left" ] && [ -n "$right" ] \ || { echo "ERROR: nc_diff_interface needs interface, left, right"; return 1; } local args=(--interface "$interface" --left "$left" --right "$right" --depth "$depth") [ -n "$out_path" ] && args+=(--out "$out_path") [ "$include_tables" = "1" ] && args+=(--include-tables) [ -n "$left_label" ] && args+=(--left-label "$left_label") [ -n "$right_label" ] && args+=(--right-label "$right_label") "$LARRY_LIB_DIR/nc-diff-interface.sh" "${args[@]}" 2>&1 } tool_bash_exec() { local cmd="$1" printf '\n%s══ bash_exec ══%s\n' "$C_YELLOW" "$C_RESET" >&2 printf '%s$ %s%s\n' "$C_BOLD" "$cmd" "$C_RESET" >&2 printf '%sRun this command? [y/N]:%s ' "$C_BOLD" "$C_RESET" >&2 read -r answer &1 | head -500) echo "$out" log_section "bash_exec (approved)"; log_append '```'; log_append "$ $cmd"; log_append "$out"; log_append '```' else echo "DENIED by user. Command not executed." log_section "bash_exec DENIED: $cmd" fi } execute_tool() { local name="$1"; local input_json="$2" local J; J() { printf '%s' "$input_json" | jq -r "$1"; } case "$name" in read_file) tool_read_file "$(J '.path')" ;; list_dir) tool_list_dir "$(J '.path // "."')" ;; grep_files) tool_grep_files "$(J '.pattern')" "$(J '.path // "."')" ;; glob_files) tool_glob_files "$(J '.pattern')" "$(J '.path // "."')" ;; write_file) tool_write_file "$(J '.path')" "$(J '.content')" ;; bash_exec) tool_bash_exec "$(J '.command')" ;; nc_list_protocols) tool_nc_list_protocols "$(J '.netconfig')" ;; nc_list_processes) tool_nc_list_processes "$(J '.netconfig')" ;; nc_protocol_block) tool_nc_protocol_block "$(J '.netconfig')" "$(J '.name')" ;; nc_protocol_field) tool_nc_protocol_field "$(J '.netconfig')" "$(J '.name')" "$(J '.field')" ;; nc_protocol_nested) tool_nc_protocol_nested "$(J '.netconfig')" "$(J '.name')" "$(J '.path')" ;; nc_protocol_summary) tool_nc_protocol_summary "$(J '.netconfig')" "$(J '.filter // ""')" ;; nc_destinations) tool_nc_destinations "$(J '.netconfig')" "$(J '.name')" ;; nc_xlate_refs) tool_nc_xlate_refs "$(J '.netconfig')" "$(J '.name // ""')" ;; nc_find_inbound) tool_nc_find_inbound "$(J '.netconfig')" "$(J '.mode // "all"')" "$(J '.format // "tsv"')" ;; nc_make_jump) tool_nc_make_jump "$(J '.netconfig')" "$(J '.inbound')" "$(J '.new_host')" "$(J '.jump_port')" \ "$(J '.inbound_host // "127.0.0.1"')" "$(J '.process_jump // "server_jump"')" "$(J '.encoding // ""')" ;; nc_sources) tool_nc_sources "$(J '.netconfig')" "$(J '.name')" ;; nc_paths) tool_nc_paths "$(J '.netconfig // ""')" "$(J '.thread // ""')" "$(J '.site // ""')" \ "$(J '.direction // "full"')" \ "$(J '.all // 0' | sed "s/false/0/;s/true/1/")" \ "$(J '.site_only // 0' | sed "s/false/0/;s/true/1/")" \ "$(J '.format // "v1"')" "$(J '.hciroot // ""')" ;; nc_tclproc_refs) tool_nc_tclproc_refs "$(J '.netconfig')" "$(J '.name // ""')" ;; hl7_field) tool_hl7_field "$(J '.message')" "$(J '.field_path')" ;; nc_msgs) tool_nc_msgs "$(J '.thread')" "$(J '.after // ""')" "$(J '.before // ""')" \ "$(J '.field // ""')" "$(J '.value // ""')" \ "$(J '.limit // 10')" "$(J '.format // "text"')" \ "$(J '.sitedir // ""')" "$(J '.db // ""')" ;; nc_document) tool_nc_document "$(J '.thread // ""')" "$(J '.name // ""')" "$(J '.site // ""')" \ "$(J '.out // ""')" "$(J '.hciroot // ""')" \ "$(J '.title // ""')" "$(J '.status // ""')" \ "$(J '.poc_internal // ""')" "$(J '.poc_vendor // ""')" \ "$(J '.escalation // ""')" "$(J '.open_items // ""')" \ "$(J '.notes // ""')" \ "$(J '.onenote_table // 0' | sed "s/false/0/;s/true/1/")" \ "$(J '.raw_tcl // 0' | sed "s/false/0/;s/true/1/")" ;; nc_find) tool_nc_find "$(J '.mode')" "$(J '.query')" "$(J '.format // "table"')" "$(J '.hciroot // ""')" ;; nc_insert_protocol) tool_nc_insert_protocol "$(J '.netconfig')" "$(J '.block')" "$(J '.mode // "end"')" "$(J '.anchor // ""')" ;; nc_add_route) tool_nc_add_route "$(J '.netconfig')" "$(J '.protocol_name')" "$(J '.route')" ;; hl7_diff) tool_hl7_diff "$(J '.left')" "$(J '.right')" "$(J '.ignore // "MSH.7"')" "$(J '.include // ""')" "$(J '.format // "text"')" ;; nc_diff_interface) tool_nc_diff_interface "$(J '.interface')" "$(J '.left')" "$(J '.right')" "$(J '.out // ""')" \ "$(J '.include_tables // 0' | sed "s/false/0/;s/true/1/")" \ "$(J '.left_label // ""')" "$(J '.right_label // ""')" \ "$(J '.depth // 1')" ;; nc_regression) tool_nc_regression "$(J '.scope')" "$(J '.count // 10')" "$(J '.env_a')" "$(J '.site_a // ""')" \ "$(J '.env_b')" "$(J '.site_b // ""')" "$(J '.out')" \ "$(J '.route_test_cmd // ""')" "$(J '.ignore // "MSH.7"')" \ "$(J '.phase // "all"')" "$(J '.dry_run // 0' | sed "s/false/0/;s/true/1/")" \ "$(J '.source_ssh_alias // ""')" "$(J '.target_ssh_alias // ""')" ;; lesson_record) tool_lesson_record "$(J '.text')" "$(J '.topic // ""')" "$(J '.site // ""')" "$(J '.severity // "info"')" ;; hl7_sanitize) tool_hl7_sanitize "$(J '.input_path')" "$(J '.strict // 0' | sed "s/false/0/;s/true/1/")" ;; ssh_exec) tool_ssh_exec "$(J '.alias')" "$(J '.command')" "$(J '.max_lines // 500')" ;; ssh_status) tool_ssh_status ;; list_sites) tool_list_sites "$(J '.alias // ""')" "$(J '.hciroot // ""')" ;; ssh_pull) tool_ssh_pull "$(J '.alias')" "$(J '.remote_path')" "$(J '.local_path // ""')" ;; ssh_push) tool_ssh_push "$(J '.alias')" "$(J '.local_path')" "$(J '.remote_path')" ;; ssh_pull_smat) tool_ssh_pull_smat "$(J '.alias')" "$(J '.site')" "$(J '.thread')" "$(J '.days_back // ""')" ;; larry_rollback_list) tool_larry_rollback_list "$(J '.session // ""')" ;; *) echo "ERROR: unknown tool: $name" ;; esac } # ───────────────────────────────────────────────────────────────────────────── # Tool schema for the API # ───────────────────────────────────────────────────────────────────────────── TOOLS_JSON=$(cat <<'TOOLS_END' [ {"name":"read_file","description":"Read a single LOCAL regular file. Returns content with line numbers. Max 250KB; use grep_files for larger. For files on a remote SSH-aliased host, use ssh_pull first to fetch the file locally, then read the returned local path.","input_schema":{"type":"object","properties":{"path":{"type":"string","description":"Path to file (absolute or relative to cwd)."}},"required":["path"]}}, {"name":"list_dir","description":"List a directory (ls -la). Use to map a Cloverleaf site_root.","input_schema":{"type":"object","properties":{"path":{"type":"string","description":"Directory path. Defaults to current dir."}},"required":["path"]}}, {"name":"grep_files","description":"Recursive grep across LOCAL files only. Use for finding TCL procs, UPOC declarations, segment references, etc. Returns up to 300 matching lines with file:line:content. To grep remote files, use ssh_exec with grep, or ssh_pull the file first.","input_schema":{"type":"object","properties":{"pattern":{"type":"string","description":"Regex pattern (grep -E style)."},"path":{"type":"string","description":"Starting directory."}},"required":["pattern","path"]}}, {"name":"glob_files","description":"Find files by name pattern. Up to 300 paths.","input_schema":{"type":"object","properties":{"pattern":{"type":"string","description":"Shell glob like *.tcl or *Inbound*"},"path":{"type":"string","description":"Starting directory."}},"required":["pattern","path"]}}, {"name":"write_file","description":"Write content to a path. ALWAYS prompts Bryan for Y/N before writing. Shows a unified diff if file exists, or a preview if new.","input_schema":{"type":"object","properties":{"path":{"type":"string"},"content":{"type":"string"}},"required":["path","content"]}}, {"name":"bash_exec","description":"Run a shell command. ALWAYS prompts Bryan for Y/N before running. Output capped at 500 lines.","input_schema":{"type":"object","properties":{"command":{"type":"string","description":"Single command line, passed to bash -c."}},"required":["command"]}}, {"name":"nc_list_protocols","description":"List every protocol (thread) declared in a Cloverleaf NetConfig file. Native v3 parser — does not invoke v1/v2 wrappers. One name per line.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string","description":"Absolute path to a NetConfig file, e.g. $HCISITEDIR/NetConfig."}},"required":["netconfig"]}}, {"name":"nc_list_processes","description":"List every process declared in a NetConfig. One name per line.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"}},"required":["netconfig"]}}, {"name":"nc_protocol_block","description":"Return the full TCL block for one protocol (everything between `protocol NAME {` and the matching `}`). Use to inspect every field of a thread.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string","description":"Protocol name, e.g. IB_ADT_muxS."}},"required":["netconfig","name"]}}, {"name":"nc_protocol_field","description":"Get a top-level field value from a protocol block (e.g. PROCESSNAME, OBWORKASIB, OUTBOUNDONLY, GROUPS, ENCODING, ICLSERVERPORT, AUTOSTART, HOSTDOWN).","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string"},"field":{"type":"string","description":"Field name, e.g. PROCESSNAME"}},"required":["netconfig","name","field"]}}, {"name":"nc_protocol_nested","description":"Drill into a nested block via dotted path. Use PROTOCOL.TYPE / PROTOCOL.HOST / PROTOCOL.PORT / PROTOCOL.ISSERVER for connection details — those live inside the inner PROTOCOL{} block, NOT at top level.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string"},"path":{"type":"string","description":"Dotted path, e.g. PROTOCOL.PORT"}},"required":["netconfig","name","path"]}}, {"name":"nc_protocol_summary","description":"Compact TSV summary of all protocols with direction-relevant fields (name, process, direction, port, host, type, isserver, outonly, obworkasib, iclserverport). Optional --filter regex to narrow.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"filter":{"type":"string","description":"Optional regex to filter protocol names."}},"required":["netconfig"]}}, {"name":"nc_destinations","description":"List every DEST routed to from one protocol’s DATAXLATE block. Unique, sorted.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string"}},"required":["netconfig","name"]}}, {"name":"nc_xlate_refs","description":"List every .xlt file referenced in the NetConfig (all of them, or scoped to one protocol if `name` is provided).","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string","description":"Optional. Limits to one protocol."}},"required":["netconfig"]}}, {"name":"nc_find_inbound","description":"Find inbound threads in a NetConfig. mode=tcp-listen (ISSERVER=1, directly fed by upstream client systems), mode=icl-or-file (OBWORKASIB=1, fed by internal Cloverleaf link or file drop), mode=all (default). Output formats: tsv, jsonl, table.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"mode":{"type":"string","enum":["tcp-listen","icl-or-file","all"],"description":"Which class of inbound to return."},"format":{"type":"string","enum":["tsv","jsonl","table"]}},"required":["netconfig"]}}, {"name":"nc_make_jump","description":"Generate the 3-thread jump set for the cross-environment data replay pattern Bryan uses. Emits FOUR artifacts: (1) linux__out for OLD env (outbound tcpip-client to new linux:jump_port), (2) windows__in for NEW env server_jump site (inbound tcpip-server listening on jump_port, routes internally to #3), (3) windows__out for NEW env server_jump site (outbound tcpip-client to 127.0.0.1:, where orig_port is the existing inbound listening port read from the NetConfig), (4) route-add snippet to splice into the OLD inbound DATAXLATE block. Tag = inbound thread name (auto). The NEW env existing inbound is left COMPLETELY UNCHANGED. Pure generation; caller uses write_file (Y/N) to persist.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string","description":"NetConfig path containing the inbound thread (OLD env)."},"inbound":{"type":"string","description":"Existing inbound protocol name to mirror. Must be a TCP-listener (ISSERVER=1); read its PROTOCOL.PORT first to confirm."},"new_host":{"type":"string","description":"Hostname/IP of the NEW linux env that OLD will TCP to."},"jump_port":{"type":"string","description":"TCP port for the OLD to NEW hop. linux__out targets it, windows__in listens on it."},"inbound_host":{"type":"string","description":"Host that windows__out connects to on NEW (the existing inbound on NEW). Default 127.0.0.1 (same box, loopback)."},"process_jump":{"type":"string","description":"Process for NEW-side threads on server_jump. Default server_jump."},"encoding":{"type":"string","description":"ENCODING override. Default = same as the existing inbound."}},"required":["netconfig","inbound","new_host","jump_port"]}}, {"name":"nc_sources","description":"List every protocol that has a DATAXLATE DEST routing to the named thread. The inverse of nc_destinations. ONE HOP ONLY — to trace a full multi-hop chain use nc_paths, not repeated nc_sources calls.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string","description":"Target thread name."}},"required":["netconfig","name"]}}, {"name":"nc_paths","description":"Deterministic ROUTE-CHAIN tracer. Enumerates the full root-to-leaf message path(s). WITHIN a site the next hop follows the DATAXLATE DEST routing graph (intra-site routing never walks ICLSERVERPORT). USE THIS — DO NOT brute-force with grep_files / read_file / bash_exec / repeated nc_destinations — for ANY of: 'show me the path', 'trace the chain', 'what feeds X', 'where does X go', 'full route', 'end-to-end flow', 'sources and destinations chain', 'how does a message get from A to B', 'map the interface flow'. ONE call answers the whole question. DEFAULT OUTPUT is the v1 chain form, ONE PATH PER LINE: `site/thread --> site/thread ==> site/thread …` where every node is `site/thread`, `-->` is an INTRA-site DATAXLATE route hop, and `==>` is a CROSS-site hop. The FIRST node is the chain ROOT; field 1 (split on whitespace) IS the root node, so the output is pipe-first (`paths X | awk '{print $1}'` → the root). A branch yields multiple lines. For direction=up the root is the feeder ROOT and the queried thread is the chain TERMINUS. MODES: (a) one thread — set `thread` (accepts `thread`+`site` OR a single `site/thread` node, so output feeds back in); default returns every full path containing that thread; direction=down for only downstream, direction=up for only upstream feeders. (b) whole-site / whole-environment inventory — set all=true (optionally scope with `site`); enumerates every chain from every entry point (a thread with no incoming), deduped. CROSS-SITE BY DESTINATION BLOCK (Cloverleaf links sites through named `destination` blocks — the ICL routing table — not by thread name and not by blindly matching ports): a thread's DATAXLATE DEST may name a `destination` block; that block NAME is the LOCAL OUTBOUND SENDER node (shown in the chain, NEVER collapsed) and resolves to { SITE }/{ THREAD } { PORT } — the remote inbound it links to. So at every site boundary the chain reads `…local_inbound --> local_outbound_sender ==> remote_inbound --> …`, e.g. mux/ADTfr_epic_964700 --> mux/OB_ADT_ancS ==> ancout/IB_ADT_muxS --> ancout/ADTto_CodaMetrix. Upstream feeders of an inbound are resolved symmetrically. The whole route graph is parsed ONCE per run into memory; cross-site resolution is an in-memory lookup, not a per-site scan. Set site_only=true to stop at the site boundary. Resolves sites under $HCIROOT automatically (or pass hciroot / an explicit netconfig). Cycle-safe across sites; always terminates.","input_schema":{"type":"object","properties":{"thread":{"type":"string","description":"Thread/protocol name to trace, OR a `site/thread` node (the output's root node feeds straight back in). Omit only when all=true."},"site":{"type":"string","description":"Site name (the NetConfig's parent dir). Optional — disambiguates a thread present in multiple sites, or scopes all-mode to one site."},"netconfig":{"type":"string","description":"Optional explicit NetConfig path. If given, the thread's home site is its parent dir; cross-site joins still scan $HCIROOT unless site_only=true."},"direction":{"type":"string","enum":["full","up","down"],"description":"full (default) = every path containing the thread; down = only downstream chains; up = only upstream feeder chains (root = feeder root, queried thread = terminus)."},"all":{"type":"boolean","description":"true = enumerate every chain from every entry point (whole-site/whole-environment inventory). No thread needed."},"site_only":{"type":"boolean","description":"true = do NOT cross site boundaries (scope to one site). Default false = follow the chain across sites via destination blocks."},"format":{"type":"string","enum":["v1","table","tsv","jsonl","nodes"],"description":"Output format. Default v1 = the chain form, one path per line (site/thread nodes, --> intra / ==> cross), pipe-first (field 1 = root). table = aligned SITE/THREAD/HOPS/PATH. tsv/jsonl = data. nodes = just the site/thread nodes one per line (no arrows), for re-piping."},"hciroot":{"type":"string","description":"Override $HCIROOT for site discovery / cross-site joins."}},"required":[]}}, {"name":"nc_tclproc_refs","description":"List every TCL proc name referenced from a protocol block (or from the whole NetConfig if name is omitted). Pulls from DATAFORMAT.PROC, PREPROCS.PROCS, POSTPROCS.PROCS, etc. Unique sorted.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"name":{"type":"string","description":"Optional. Scope to one protocol."}},"required":["netconfig"]}}, {"name":"hl7_field","description":"Extract a specific HL7 v2 field from a message. field_path = SEG[.FIELD[.COMPONENT[.SUBCOMPONENT]]]. Examples: PID.3 (MRN), PID.18 (account number), MSH.7 (timestamp), MSH.9.2 (event code, like A08), PID.5 (patient name with components). Multiple repetitions are returned one per line. Native v3, no v1/v2 dependency.","input_schema":{"type":"object","properties":{"message":{"type":"string","description":"Raw HL7 message text. Segments separated by \\r."},"field_path":{"type":"string","description":"Field path like PID.3 or MSH.9.2"}},"required":["message","field_path"]}}, {"name":"nc_msgs","description":"Query Cloverleaf smat (SQLite!) databases for messages from a thread. Filters: time range, exact HL7 field match. Native v3 — reads smatdb directly with sqlite3 -ascii, no hcidbdump/dbExtract needed. Format text shows messages line-by-line with metadata; count returns just the count; json returns structured data. Operates on LOCAL smatdbs; for a remote env's smatdb, use ssh_pull_smat first (sampled mode is cheaper than pulling the whole DB).","input_schema":{"type":"object","properties":{"thread":{"type":"string","description":"Thread name. The .smatdb file under $HCISITEDIR/exec/processes/*/.smatdb is auto-located unless db is given."},"after":{"type":"string","description":"Time-after filter. Accepts \"3 days ago\", \"2026-05-20 14:30:00\", \"2026-05-20\", or a unix timestamp."},"before":{"type":"string","description":"Time-before filter, same formats as after."},"field":{"type":"string","description":"HL7 field path for exact-match filter, e.g. PID.18 or MSH.10."},"value":{"type":"string","description":"Value the field must equal. Use with field. Repeatable filters not supported via this single tool call — chain calls if you need multi-field AND."},"limit":{"type":"integer","description":"Max messages to return. Default 10."},"format":{"type":"string","enum":["text","json","count","raw"],"description":"text = human-readable with metadata; count = just the number; json = structured; raw = raw bytes separated by 0x1c."},"sitedir":{"type":"string","description":"Override $HCISITEDIR for thread-to-db location."},"db":{"type":"string","description":"Explicit .smatdb path; overrides auto-locate."}},"required":["thread"]}}, {"name":"nc_document","description":"Document a Cloverleaf INTERFACE end-to-end as a PLAIN-TEXT knowledge entry in Bryan's confirmed Legacy 'ADT Messages' template. OUTPUT IS PLAIN TEXT BY DEFAULT (no markdown) so it pastes cleanly into OneNote, which does NOT render markdown: UPPERCASE headings underlined with dashes, no bold, no backticks, no pipe tables, no '---' rules. Structure: Title; Description prose; Message Flow (one hop per record: Epic-feed → Cloverleaf-routing → Final-Delivery, built from the nc_paths route chain); per-delivery breakdown (inbound PROTOCOL TYPE/HOST/PORT/ISSERVER + inbound TRXID/TPS proc, route TRXID filter + TYPE + PREPROCS/POSTPROCS, XLATE, destination host:port/process). The two tabular sections (Message Flow, Delivery breakdown) render as INDENTED label:value blocks by default (read in any font, zero setup); set onenote_table=true to render them instead as TAB-separated rows (header + one row per record, no leading/trailing pipes) for paste-into-OneNote → Insert > Table. EVERYTHING THIS TOOL EMITS IS DETERMINISTIC, PURE BASH, AND API-FREE — it runs identically on an API-blocked host and never calls a model. ★ KEY FEATURE — for every referenced UPOC proc it locates the .tcl under $HCIROOT//tclprocs/ and DETERMINISTICALLY surfaces, INLINE in the Description, a compact bit-line: the proc's COMMENTS (the author's own filter notes), the HL7 FIELDS it references (PID.8, PV1.45, EVN.1…), the MATCHED literal event codes (A01 A02 A03…), TABLE lookups (e.g. PeriCalm_Loc), and the DISPOSITION (pass vs kill). Those inline bits are ALWAYS ON. The raw proc TCL appendix is OPT-IN via raw_tcl=true (off by default). ★ WHEN YOU (the model, running WITH the API) GET THIS TOOL'S OUTPUT: transparently POLISH those surfaced UPOC bits into smoother, human-readable filter descriptions inside the Description prose (e.g. turn 'fields: PV1.45 PID.8 · matches: A01 A02 A03 · table: Pericalm_Loc · disposition: kill non-matching' into 'passes only A01/A02/A03 admit/transfer events for female patients whose location is in the Pericalm_Loc table, killing everything else'). Do NOT invent facts not present in the surfaced bits; just smooth them. On an API-blocked host the deterministic bit-lines are the deliverable. MODES: (a) SINGLE INTERFACE — set thread (the delivery/outbound thread, e.g. 'ADTto_CodaMetrix'; optionally site to disambiguate) → one fully-detailed interface section. (b) SYSTEM/PATTERN — set name (case-insensitive substring/regex, e.g. 'codametrix') → one section per matching delivery thread across ALL sites. Give EXACTLY ONE of thread or name. Returns the doc text and (if out is given) writes it there.","input_schema":{"type":"object","properties":{"thread":{"type":"string","description":"SINGLE-INTERFACE mode: the delivery (outbound) thread to document, e.g. 'ADTto_CodaMetrix' or 'ADTto_periwatch'. Accepts a bare thread name or a 'site/thread' node. Give this OR name, not both."},"name":{"type":"string","description":"SYSTEM mode: case-insensitive substring/regex matching delivery thread names across all sites, e.g. 'codametrix', 'periwatch', 'epic_adt'. One section per match. Give this OR thread, not both."},"site":{"type":"string","description":"Home site of the thread (the NetConfig's parent dir). Optional — disambiguates a thread name present in multiple sites."},"out":{"type":"string","description":"Optional output file path. Convention: $LARRY_HOME/knowledge/.txt (plain text)."},"hciroot":{"type":"string","description":"Override $HCIROOT for the NetConfig scan."},"title":{"type":"string","description":"Doc title. Default derived from thread/name."},"onenote_table":{"type":"boolean","description":"Render the tabular sections (Message Flow, Delivery breakdown) as TAB-separated rows (header + one data row per record, real tabs, no leading/trailing pipes) for paste-into-OneNote → Insert > Table. Default false = indented label:value blocks. Non-tabular sections stay plain text either way."},"raw_tcl":{"type":"boolean","description":"Also emit the raw proc-source appendix (verbatim TCL of every referenced UPOC proc). Default false — the readable extracted UPOC bits stay inline in each description regardless; only the verbatim appendix is gated behind this."},"status":{"type":"string","description":"System status fill-in (production/test/decommissioning/...)."},"poc_internal":{"type":"string","description":"Internal owner fill-in."},"poc_vendor":{"type":"string","description":"Vendor POC fill-in."},"escalation":{"type":"string","description":"Escalation path fill-in."},"open_items":{"type":"string","description":"Open items / known issues fill-in. Can be multi-line, will be inserted as-is."},"notes":{"type":"string","description":"Freeform notes fill-in."}},"required":[]}}, {"name":"nc_find","description":"Cross-site thread search. Native v3 replacement for v1 tbn/tbp/tbh/tbpr/where. Walks every NetConfig under $HCIROOT and returns matching threads with site, port, host, process, direction, file, line. Modes: name=partial name match (like tbn); port=exact port (like tbp); host=substring on host (like tbh); process=substring on PROCESSNAME (like tbpr); where=exact name match across all sites (like the v1 ` where`); xlate=threads referencing a specific .xlt; tclproc=threads referencing a specific TCL proc.","input_schema":{"type":"object","properties":{"mode":{"type":"string","enum":["name","port","host","process","where","xlate","tclproc"],"description":"Search mode."},"query":{"type":"string","description":"Query value: partial name, port number, host substring, process name, exact thread name, xlate filename, or tclproc name."},"format":{"type":"string","enum":["table","tsv","jsonl"],"description":"Output format. Default table."},"hciroot":{"type":"string","description":"Override $HCIROOT."}},"required":["mode","query"]}}, {"name":"nc_insert_protocol","description":"Insert a new protocol block into a NetConfig file. ALL WRITES GO THROUGH THE JOURNAL — original is snapshotted, diff is saved, the file is atomically replaced. Use larry_rollback_list to view, larry-rollback.sh CLI to undo. mode=end appends; mode=after needs anchor=existing-protocol-name; mode=before needs anchor.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string","description":"Target NetConfig file path."},"block":{"type":"string","description":"The full protocol block text (starting with 'protocol NAME {' and ending with '}'). Get this from nc_make_jump output."},"mode":{"type":"string","enum":["end","after","before"],"description":"Insertion position. Default end."},"anchor":{"type":"string","description":"For mode=after|before: existing protocol name to position relative to."}},"required":["netconfig","block"]}}, {"name":"nc_add_route","description":"Splice a route entry into an existing protocol's DATAXLATE block. Used to add a new DEST to an inbound's routing (e.g. wiring the OLD inbound to also route to the new linux__out jump thread). ALL WRITES GO THROUGH THE JOURNAL.","input_schema":{"type":"object","properties":{"netconfig":{"type":"string"},"protocol_name":{"type":"string","description":"The existing protocol to modify."},"route":{"type":"string","description":"The route entry text (an inner `{ ... }` object with CACHEMSG, ROUTE_DETAILS, TRXID, etc.). Get from nc_make_jump's route_add output."}},"required":["netconfig","protocol_name","route"]}}, {"name":"larry_rollback_list","description":"List journal entries — every write that's gone through nc_insert_protocol, nc_add_route, or write_file (once journaled write_file is enabled). Shows session-id, sequence, target, timestamp. Use larry-rollback.sh from the shell to actually roll back.","input_schema":{"type":"object","properties":{"session":{"type":"string","description":"Optional. Limit to one session id."}},"required":[]}}, {"name":"lesson_record","description":"Append a lesson to local capture at $LARRY_HOME/lessons/.md. Use when Bryan teaches you something new (a correction, a pattern, a quirk, a gotcha) so the home-Larry can be updated later. Lessons stay LOCAL; Bryan exports them with `lessons.sh export` and pastes back to home-Larry when he can. CALL THIS WHEN: Bryan corrects a misunderstanding, reveals a site-specific convention, points out a bug, requests a behavior change, or shares a workflow detail you should remember next time.","input_schema":{"type":"object","properties":{"text":{"type":"string","description":"The lesson content. Markdown. Include enough context that home-Larry can act on it without re-deriving."},"topic":{"type":"string","description":"Short topic tag, e.g. \"NetConfig parsing\", \"jump-thread naming\", \"site conventions\"."},"site":{"type":"string","description":"Site this lesson is scoped to, if any. Default: current $HCISITE."},"severity":{"type":"string","enum":["info","warn","fix"],"description":"info=general learning, warn=behavior I should change, fix=Bryan called out a bug."}},"required":["text"]}}, {"name":"hl7_sanitize","description":"Tokenize PHI fields in an HL7 message file. Replaces values in patient identifiers, names, DOB, addresses, phones, SSN, account numbers, providers, visit numbers, NK1/GT1/IN1 fields, etc. with deterministic local tokens like [[MRN_0001]]. Same value gets same token across the entire local lookup table, so correlation analysis still works. The token-to-original mapping NEVER leaves the client (stored at $LARRY_HOME/sanitize/lookup.tsv, mode 0600). Use this when Bryan needs you to analyze a file that has real PHI. Returns the sanitized HL7 content with tokens substituted. Bryan can desanitize the final output locally with hl7-desanitize.sh.","input_schema":{"type":"object","properties":{"input_path":{"type":"string","description":"Path to the HL7 message file to sanitize."},"strict":{"type":"integer","description":"1=also tokenize any unknown Z* segments wholesale. Default 0 (safer for legibility but might miss custom PHI in Z segments)."}},"required":["input_path"]}}, {"name":"ssh_exec","description":"Run a shell command on a remote test/dev host via an authenticated SSH ControlMaster session. Bryan must have already configured the alias (via /ssh-add) and opened the master (via /ssh-setup). The password is stored locally and you CANNOT see it — do not ask Bryan for it; if the master is closed, tell him to run the /ssh-setup ALIAS slash command. Use ssh_status first to confirm which aliases are open. Output capped at max_lines (default 500). Tool result includes the remote exit code as a [ssh_exec: exit rc=N] footer.","input_schema":{"type":"object","properties":{"alias":{"type":"string","description":"Host alias Bryan configured. Run ssh_status to see the list."},"command":{"type":"string","description":"Shell command to execute on the remote. Quote as needed; will be passed through ssh as a single string."},"max_lines":{"type":"integer","description":"Cap output lines (default 500). Increase for known-large output, but prefer targeted commands."}},"required":["alias","command"]}}, {"name":"ssh_status","description":"List the SSH hosts Bryan has configured and which ones have an open ControlMaster session. Call this BEFORE ssh_exec to confirm an alias exists and the master is open. Each line shows: alias, user@host, port, cred (present/absent), master (open or dash). If the master is not open for an alias you need, ask Bryan to run the /ssh-setup ALIAS slash command. Do NOT attempt to authenticate yourself — you have no access to the password.","input_schema":{"type":"object","properties":{},"required":[]}}, {"name":"list_sites","description":"List and COUNT the Cloverleaf sites in the environment. This is your proactive answer to 'how many sites are on ' / 'what sites exist' — NEVER ask Bryan to export or hand you $HCIROOT first; this tool resolves it for you. Works in BOTH deployment modes. REMOTE mode: pass alias= (a configured SSH alias, e.g. qa); the tool resolves the remote $HCIROOT and enumerates sites via a NetConfig walk (the version-agnostic ground truth; Cloverleaf's hcisitelist is used only if present AND the walk found nothing). If the alias has a PINNED HCIROOT (set via /ssh-set-hciroot), the walk runs with HCIROOT exported explicitly and SKIPS the login profile — this is required on hosts whose login profile is sudo-gated/non-interactive (a plain login shell there returns an EMPTY $HCIROOT). Otherwise it opens a LOGIN shell so the operator profile populates $HCIROOT. TRANSPORT: if the alias is in DIRECT mode (set via /ssh-set-direct on — for hosts that reject SSH session multiplexing) the walk runs over a fresh per-command sshpass connection and NO ControlMaster is needed; otherwise the ControlMaster must be open and if it is not, the result tells you to have Bryan run /ssh-setup . If the result shows HCIROOT empty with a NOTE about a sudo-gated profile, tell Bryan to pin it: /ssh-set-hciroot . LOCAL mode: omit alias; the tool enumerates sites under the locally-detected $HCIROOT (or the hciroot override). Returns the resolved HCIROOT, a site count, and the site names.","input_schema":{"type":"object","properties":{"alias":{"type":"string","description":"REMOTE mode: an SSH alias from ssh_status (e.g. 'qa'). Omit for LOCAL mode (sites on this box)."},"hciroot":{"type":"string","description":"LOCAL mode only: override the detected $HCIROOT."}},"required":[]}}, {"name":"hl7_diff","description":"HL7-aware diff between two message files (or multi-message dumps). Compares segment-by-segment, field-by-field, with component and subcomponent precision. Ignores configured fields (default MSH.7 timestamp) so timestamp-only diffs do not show up as noise. Use for regression testing between environments (e.g. test vs prod route-test outputs).","input_schema":{"type":"object","properties":{"left":{"type":"string","description":"Path to left HL7 file."},"right":{"type":"string","description":"Path to right HL7 file."},"ignore":{"type":"string","description":"Comma-separated list of fields to ignore (e.g. MSH.7,MSH.10,EVN.6). Default MSH.7."},"include":{"type":"string","description":"If set, ONLY these fields are compared (overrides ignore for that set)."},"format":{"type":"string","enum":["text","tsv","count"],"description":"text=human-readable diff, tsv=machine-parseable, count=just the difference count."}},"required":["left","right"]}}, {"name":"nc_regression","description":"End-to-end regression testing between two Cloverleaf environments. 6 phases: discover inbounds in scope, sample N messages per inbound from env-A smatdbs, run route_test on env-A, run route_test on env-B with same inputs, hl7_diff every paired output file, compile summary report. Phases 3/4 require the Cloverleaf route_test command; pass it via route_test_cmd with placeholders {THREAD} {INPUT} {OUTPUT_DIR} {HCIROOT} {HCISITE}. If route_test_cmd is empty, phases 3/4 are skipped and you can run them manually using the generated input files. For cross-env regression testing across SSH-aliased hosts, set source_ssh_alias and target_ssh_alias to existing SSH aliases (run ssh_status to list them first). When set, phases 1–4 run remotely via ssh_exec + ssh_pull/ssh_push; phases 5–6 stay local. env_a / env_b remain the HCIROOT paths AS SEEN ON THE REMOTE for that alias.","input_schema":{"type":"object","properties":{"scope":{"type":"string","description":"thread:NAME | threads:N1,N2 | site (needs site_a) | server (all sites)"},"count":{"type":"integer","description":"Messages to sample per inbound. Default 10."},"env_a":{"type":"string","description":"HCIROOT of env-A (the test/source env). If source_ssh_alias is set, this is the remote-side path."},"site_a":{"type":"string","description":"Site name on env-A. Required if scope=site."},"env_b":{"type":"string","description":"HCIROOT of env-B (the prod/target env). If target_ssh_alias is set, this is the remote-side path."},"site_b":{"type":"string","description":"Site name on env-B."},"out":{"type":"string","description":"LOCAL output root directory for inputs, outputs, diffs, and summary."},"route_test_cmd":{"type":"string","description":"Command template for invoking route_test. Use {THREAD} {INPUT} {OUTPUT_DIR} {HCIROOT} {HCISITE} as placeholders."},"ignore":{"type":"string","description":"hl7_diff ignore list. Default MSH.7."},"phase":{"type":"string","enum":["1","2","3","4","5","6","all"],"description":"Run a specific phase or all. Default all."},"dry_run":{"type":"integer","description":"1 = print what would happen, do not execute. Default 0."},"source_ssh_alias":{"type":"string","description":"SSH alias for the env-A (source) host. When set, phases 1–3 run remotely. Master must be open (ssh_status). Default empty = local."},"target_ssh_alias":{"type":"string","description":"SSH alias for the env-B (target) host. When set, phase 4 runs remotely. Master must be open. Default empty = local."}},"required":["scope","env_a","env_b","out"]}}, {"name":"ssh_pull","description":"Pull a file from a remote SSH-aliased host to a local path via the existing ControlMaster (no second auth, no second TCP handshake). Use this BEFORE calling any local tool (read_file, nc_diff_interface, grep_files, hl7_diff, etc.) when the source file lives on a remote host. The local path returned by this tool is stable for re-use within and across turns — pulling the same remote_path again returns the same local_path. If local_path is omitted, a deterministic temp path /tmp/larry-pulls/.. is used. Verifies the master is open first; if not, fails with a clear message ('open the master with /ssh-setup first'). Validates the transferred size matches the remote stat.","input_schema":{"type":"object","properties":{"alias":{"type":"string","description":"SSH alias (see ssh_status). Master must be open."},"remote_path":{"type":"string","description":"Absolute path on the remote host."},"local_path":{"type":"string","description":"Optional explicit local destination. If omitted, a deterministic /tmp/larry-pulls/.. path is used and printed in the tool result."}},"required":["alias","remote_path"]}}, {"name":"ssh_push","description":"Push a local file to a remote SSH-aliased host via the existing ControlMaster. Use for sending small input bundles to a remote env (e.g. regression-test input messages, a sanitized HL7 file to feed into route_test). Same multiplexing + error handling as ssh_pull. Validates remote-side size matches local size post-transfer.","input_schema":{"type":"object","properties":{"alias":{"type":"string","description":"SSH alias (see ssh_status). Master must be open."},"local_path":{"type":"string","description":"Absolute local path to the file to send."},"remote_path":{"type":"string","description":"Absolute remote destination path."}},"required":["alias","local_path","remote_path"]}}, {"name":"ssh_pull_smat","description":"Pull a Cloverleaf thread's smat archive (or recent messages from it) from a remote env. Two modes: (1) Full pull — omit days_back; the entire .smatdb file is scp'd locally; returns the local path. Fine for small archives. (2) Sampled — pass days_back=N; runs sqlite3 server-side to pull just messages from the last N days as TSV with base64-encoded blobs (unix_tsdirectiontypesourcedestmessage_blob_b64). Capped at 1000 rows; the trailer line reports truncated=yes/no. Avoids transferring multi-GB smatdbs when only N samples are needed. Uses ssh_exec under the hood to find the .smatdb path (the file lives at $HCISITEDIR/exec/processes/*/.smatdb on the remote, where * is a process name that varies by site).","input_schema":{"type":"object","properties":{"alias":{"type":"string","description":"SSH alias (see ssh_status). Master must be open."},"site":{"type":"string","description":"Cloverleaf HCISITE name on the remote — used to resolve $HCISITEDIR=$HCIROOT/."},"thread":{"type":"string","description":"Thread name (e.g. IB_ADT_muxS). The .smatdb is auto-located via find on the remote."},"days_back":{"type":"integer","description":"Optional. If set, sampled mode: only messages from the last N days are returned, base64-encoded, capped at 1000 rows. Omit for full-file pull."}},"required":["alias","site","thread"]}}, {"name":"nc_diff_interface","description":"Diff one Cloverleaf interface across two NetConfigs. Compares the protocol block plus referenced xlates, tclprocs, and (optionally) tables. Operates on LOCAL NetConfig paths. If a NetConfig file is on a remote host, first use ssh_pull to fetch it locally (and the related Xlate/, tclprocs/, tables/ dirs alongside), then pass the local paths here. The site root is dirname(NetConfig); related artifacts (Xlate/, tclprocs/, tables/) must be alongside that file.","input_schema":{"type":"object","properties":{"interface":{"type":"string","description":"Protocol/thread name to diff. e.g. ADTto_3m."},"left":{"type":"string","description":"Local path to the LEFT NetConfig file (e.g. dev)."},"right":{"type":"string","description":"Local path to the RIGHT NetConfig file (e.g. qa)."},"out":{"type":"string","description":"Optional output path for the markdown report. Default stdout."},"include_tables":{"type":"integer","description":"1 = also diff referenced tables. Default 0."},"left_label":{"type":"string","description":"Display label for left side (default A)."},"right_label":{"type":"string","description":"Display label for right side (default B)."},"depth":{"type":"integer","description":"Hops out from the named interface to also diff. Default 1."}},"required":["interface","left","right"]}} ] TOOLS_END ) # ───────────────────────────────────────────────────────────────────────────── # API call # ───────────────────────────────────────────────────────────────────────────── # _curl_config_apikey — emit a curl config snippet carrying the x-api-key # header, to be piped to `curl --config -` on STDIN. This keeps the API key OUT # of curl's argv (and therefore out of the process table / `ps` output) — a # hardening over passing it as `-H "x-api-key: ..."`. curl config syntax: # header = "x-api-key: " # The value is CR-stripped (defense-in-depth; the stored key is already clean). # Nothing here is logged; the snippet exists only on the pipe. _curl_config_apikey() { local k="${ANTHROPIC_API_KEY:-}" k="${k//$'\r'/}" printf 'header = "x-api-key: %s"\n' "$k" } call_api() { local payload_file="$1" local auth_args=() if [ "$LARRY_AUTH_MODE" = "oauth" ]; then local oauth_script="$LARRY_LIB_DIR/oauth.sh" local token="" oauth_stderr_file="" if [ -x "$oauth_script" ]; then # Capture stderr so we can surface WHY ensure failed instead of silently # swallowing it. v0.6.4 and earlier piped 2>/dev/null here — that hid # the entire diagnostic chain when the file was corrupt, the refresh # 401'd, or jq couldn't read the path on MobaXterm. Never again. oauth_stderr_file=$(mktemp 2>/dev/null || echo "") if [ -n "$oauth_stderr_file" ]; then token=$("$oauth_script" ensure 2>"$oauth_stderr_file") else # Fallback if mktemp failed: still capture stderr inline. token=$("$oauth_script" ensure 2>&1 >/dev/null) && token=$("$oauth_script" ensure 2>/dev/null) || true fi else err "oauth.sh not found at $oauth_script — cannot ensure OAuth token" fi if [ -z "$token" ]; then err "OAuth token unavailable; run 'larry-auth.sh login' to re-authenticate" if [ -n "$oauth_stderr_file" ] && [ -s "$oauth_stderr_file" ]; then err "oauth.sh ensure said:" sed 's/^/ /' "$oauth_stderr_file" >&2 err "(for full diagnostic, run '/oauth-debug' in this REPL)" else err "oauth.sh ensure returned no stderr — try '/oauth-debug' for full state dump" fi [ -n "$oauth_stderr_file" ] && rm -f "$oauth_stderr_file" return 1 fi [ -n "$oauth_stderr_file" ] && rm -f "$oauth_stderr_file" # OAuth is OPT-IN only. We send the MINIMAL honest OAuth header set — a # Bearer token + the oauth beta flag. We DO NOT impersonate the official # Claude Code client (no claude-code-* beta flag, no claude-cli UA, no # x-app:cli, no "You are Claude Code" system block). That impersonation is # exactly what Anthropic fingerprints and blocks, and what flags the user's # Max account. The one-time risk warning fires here. _warn_oauth_optin_once auth_args=( -H "Authorization: Bearer $token" -H "anthropic-beta: oauth-2025-04-20" ) else # DEFAULT / sanctioned rail: a plain programmatic API-key request. No # Bearer, no impersonation headers, no Claude-Code system spoof. The # x-api-key header is fed to curl via --config on STDIN (see _curl_config_*) # so the key never appears in curl's argv / the process table. auth_args=() fi # v0.6.9: dump response headers to a tempfile via -D so the status-line # tracker can parse anthropic-ratelimit-* fields after the call returns. # The body still goes to stdout. We deliberately don't use -i (which would # interleave headers into stdout) because that would break the existing # callers that pipe the body straight into jq. local _hdrs_file; _hdrs_file=$(mktemp 2>/dev/null || echo "") local _curl_args=( -sS --max-time 180 ) [ -n "$_hdrs_file" ] && _curl_args+=( -D "$_hdrs_file" ) # v0.8.14: capture curl's STDERR (cert/DNS/connect-refused diagnostics) so the # caller's block-detection (_diagnose_api_block) can tell a corporate block # apart from a transient network blip. Body still goes to stdout untouched. local _err_file; _err_file=$(mktemp 2>/dev/null || echo "") if [ "$LARRY_AUTH_MODE" = "apikey" ]; then # Key travels in the curl config on stdin, NOT in argv. if [ -n "$_err_file" ]; then _curl_config_apikey | curl "${_curl_args[@]}" --config - \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" 2>"$_err_file" else _curl_config_apikey | curl "${_curl_args[@]}" --config - \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" fi else if [ -n "$_err_file" ]; then curl "${_curl_args[@]}" \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" 2>"$_err_file" else curl "${_curl_args[@]}" \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" fi fi local _curl_rc=$? # Stash rc + the response-header dump + curl's stderr for block-detection. # call_api is usually invoked as `resp=$(call_api ...)` — a command-sub # SUBSHELL — so in-memory global assignments do NOT reach the parent. We ALSO # persist to deterministic files so _diagnose_api_block (running in the # parent) can read them. (Same subshell-survival pattern as the stream path.) LARRY_LAST_CURL_RC="$_curl_rc" printf '%s' "$_curl_rc" > "$LARRY_HOME/.last-curl-rc" 2>/dev/null || true LARRY_LAST_CURL_STDERR="" : > "$LARRY_HOME/.last-curl-stderr" 2>/dev/null || true if [ -n "$_err_file" ]; then LARRY_LAST_CURL_STDERR=$(cat "$_err_file" 2>/dev/null) cp "$_err_file" "$LARRY_HOME/.last-curl-stderr" 2>/dev/null || true # Still surface curl's own diagnostic on stderr (preserves prior -sS UX). [ -s "$_err_file" ] && cat "$_err_file" >&2 rm -f "$_err_file" fi LARRY_LAST_RESP_HEADERS="" # Parse headers regardless of whether the body parse will succeed; headers # carry rate-limit info even on 429s. if [ -n "$_hdrs_file" ] && [ -s "$_hdrs_file" ]; then LARRY_LAST_RESP_HEADERS=$(cat "$_hdrs_file" 2>/dev/null) cp "$_hdrs_file" "$LARRY_HOME/.last-curl-headers" 2>/dev/null || true _parse_response_headers "$_hdrs_file" 2>/dev/null || true rm -f "$_hdrs_file" fi return $_curl_rc } # call_api_stream — same as call_api but for SSE responses. Writes the raw # event stream to stdout (one line per SSE field, blank lines between events). # Caller is responsible for parsing. Returns curl's exit status. # # Uses -N (no buffering) so each delta arrives as it ships from the server. # We DO NOT use -sS here because we want stderr enabled on failure for the # fallback path to inspect; but -s on stdout is fine because the response is # pure SSE either way. call_api_stream() { local payload_file="$1" local auth_args=() if [ "$LARRY_AUTH_MODE" = "oauth" ]; then local oauth_script="$LARRY_LIB_DIR/oauth.sh" local token="" if [ -x "$oauth_script" ]; then token=$("$oauth_script" ensure 2>/dev/null) fi if [ -z "$token" ]; then err "OAuth token unavailable (streaming); run /login to re-authenticate" return 1 fi # OAuth opt-in: minimal honest header set, kept in lockstep with call_api. # NO Claude Code impersonation (see call_api for the rationale). _warn_oauth_optin_once auth_args=( -H "Authorization: Bearer $token" -H "anthropic-beta: oauth-2025-04-20" ) else # API-key header travels via --config on stdin (off argv); see call_api. auth_args=() fi # v0.6.9: dump response headers via -D for status-line tracking. -D writes # the header block immediately when the server emits it, BEFORE the SSE body # starts flowing — so the body stream on stdout is unaffected. We parse the # headers file at the START of the next agent_turn (see _maybe_drain_pending_ # headers). Why not after curl returns? Because this function is the LEFT # side of a pipeline and a `return` here happens in a subshell; the parent # process can't see updates to status vars unless we drain the file later. # # We stash the file path on disk so the next call_api/call_api_stream (or # the REPL renderer) can pick it up. Path is deterministic so the picker # doesn't need to share a variable across the subshell boundary. local _hdrs_file="$LARRY_HOME/.last-stream-headers" : > "$_hdrs_file" 2>/dev/null || _hdrs_file="" # v0.8.14: persist curl's stderr to a deterministic path so the parent shell # can run block-detection after the subshell pipe exits (same pattern as the # header file above — the streaming curl runs in a subshell and can't set # parent globals directly). local _err_file="$LARRY_HOME/.last-stream-curlerr" : > "$_err_file" 2>/dev/null || _err_file="" local _curl_args=( -sN --max-time 300 ) [ -n "$_hdrs_file" ] && _curl_args+=( -D "$_hdrs_file" ) if [ "$LARRY_AUTH_MODE" = "apikey" ]; then _curl_config_apikey | curl "${_curl_args[@]}" --config - \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ -H "accept: text/event-stream" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" ${_err_file:+2>"$_err_file"} else curl "${_curl_args[@]}" \ "${auth_args[@]}" \ -H "anthropic-version: 2023-06-01" \ -H "content-type: application/json" \ -H "accept: text/event-stream" \ --data-binary "@$payload_file" \ "$LARRY_API_URL" ${_err_file:+2>"$_err_file"} fi } # _drain_pending_stream_headers — called by the parent shell after a streaming # turn completes. The streaming curl runs in a subshell (LHS of a pipe), so # its in-memory updates to STATUS_* vars don't survive. We persist the header # block on disk instead and parse it here, in the parent. _drain_pending_stream_headers() { local f="$LARRY_HOME/.last-stream-headers" if [ -s "$f" ]; then _parse_response_headers "$f" 2>/dev/null || true rm -f "$f" fi } build_system_prompt() { local sys="" # Load larry.md first (sets identity), then everything else alphabetically. if [ -f "$LARRY_HOME/agents/larry.md" ]; then sys+="$(cat "$LARRY_HOME/agents/larry.md")"$'\n\n' fi local f for f in "$LARRY_HOME/agents/"*.md; do [ -f "$f" ] || continue case "$f" in */larry.md) ;; # already added *) sys+="$(cat "$f")"$'\n\n' ;; esac done sys+="$CLOVERLEAF_CTX" printf '%s' "$sys" } # ───────────────────────────────────────────────────────────────────────────── # v0.8.14: API-block detection → guide into manual-tools mode (NO bypass). # # On a locked-down Cloverleaf/PHI box, corporate security (e.g. Cisco Umbrella) # blocks api.anthropic.com — Bryan's Gundersen environment returns a 403 # interstitial, or TLS inspection trips "unable to get local issuer # certificate", or the egress is simply refused. When that happens we must NOT # dump a raw curl error; we DETECT the situation and GUIDE the operator to run # the toolkit by hand. # # IMPORTANT — this is graceful degradation + honest guidance ONLY. There is # DELIBERATELY no traffic-masking, no proxy-hiding, no obfuscation, no # block-circumvention of any kind. Bypassing a corporate security control on a # PHI box is off the table. We recognize the block and tell the truth. # # _diagnose_api_block BODY — inspect the last call's curl rc + stderr + the # response body/headers for block signatures. If it looks like a block (vs a # transient blip), print the guidance to STDERR and return 0. Else return 1. _diagnose_api_block() { local body="${1:-}" local rc="${LARRY_LAST_CURL_RC:-}" local cerr="${LARRY_LAST_CURL_STDERR:-}" local hdrs="${LARRY_LAST_RESP_HEADERS:-}" # call_api / call_api_stream both run in subshells (a command-sub, or the LHS # of a pipe), so their in-memory globals don't reach us here. Both persist # their diagnostics to deterministic files; read those as the source of truth. if [ -z "$rc" ] && [ -f "$LARRY_HOME/.last-curl-rc" ]; then rc=$(cat "$LARRY_HOME/.last-curl-rc" 2>/dev/null) fi if [ -z "$cerr" ]; then if [ -f "$LARRY_HOME/.last-curl-stderr" ]; then cerr=$(cat "$LARRY_HOME/.last-curl-stderr" 2>/dev/null) elif [ -f "$LARRY_HOME/.last-stream-curlerr" ]; then cerr=$(cat "$LARRY_HOME/.last-stream-curlerr" 2>/dev/null) fi fi if [ -z "$hdrs" ]; then if [ -f "$LARRY_HOME/.last-curl-headers" ]; then hdrs=$(cat "$LARRY_HOME/.last-curl-headers" 2>/dev/null) elif [ -f "$LARRY_HOME/.last-stream-headers" ]; then hdrs=$(cat "$LARRY_HOME/.last-stream-headers" 2>/dev/null) fi fi local reason="" # 1) TLS interception (corporate MITM proxy presents an untrusted cert). case "$cerr" in *"unable to get local issuer certificate"*|*"self signed certificate"*|*"self-signed certificate"*|*"certificate verify failed"*|*"SSL certificate problem"*) reason="TLS interception (an untrusted/MITM certificate on the egress path — typical of a corporate proxy doing SSL inspection)" ;; esac # 2) curl exit codes that mean "couldn't get there". if [ -z "$reason" ]; then case "$rc" in 6) reason="DNS resolution failed (the host name didn't resolve — often a security DNS filter like Umbrella)" ;; 7) reason="connection refused/blocked (egress to the API host is being denied)" ;; 28) reason="the request timed out with no response (egress may be silently dropped)" ;; 35|53|54|58|59|60|66|77|80|82|83|91) reason="a TLS/SSL handshake failure (consistent with a proxy intercepting HTTPS)" ;; esac fi # 3) curl stderr text signatures (when rc didn't already name it). if [ -z "$reason" ]; then case "$cerr" in *"Could not resolve host"*|*"Couldn't resolve host"*) reason="DNS resolution failed (the API host name didn't resolve)" ;; *"Connection refused"*|*"Failed to connect"*|*"Connection timed out"*) reason="egress to the API host is being blocked/refused" ;; *[Uu]mbrella*|*"Cisco"*) reason="a Cisco Umbrella block (the corporate web-security gateway intercepted the request)" ;; esac fi # 4) Block-page BODY signatures: a 403/interstitial HTML page where we expected # JSON. The API only ever returns JSON, so HTML here means something on the # path answered instead (a proxy/block page). if [ -z "$reason" ] && [ -n "$body" ]; then case "$body" in *[Uu]mbrella*|*"Cisco Umbrella"*|*"This site is blocked"*|*"blocked by"*|*"access has been blocked"*|*"web policy"*|*"content filter"*) reason="the egress returned a block/interstitial page instead of the API (a corporate web filter)" ;; ""*403*|*"403 Forbidden"*|*"Access Denied"*) # HTML where JSON was expected — only treat as a block if it isn't our # API's JSON error shape (which _humanize handles). HTML => not the API. reason="the egress returned an HTML page (likely a 403/proxy block) instead of the API's JSON" ;; esac fi # 5) Response-header signatures (proxy fingerprints). if [ -z "$reason" ] && [ -n "$hdrs" ]; then case "$hdrs" in *[Uu]mbrella*|*"Server: Cisco"*|*"X-Cisco"*) reason="a Cisco Umbrella / corporate-proxy response (per the response headers)" ;; esac fi [ -z "$reason" ] && return 1 # Emit the guidance. Honest, actionable, zero-bypass. printf '\n%sCan'\''t reach the model API — looks like a corporate network block.%s\n' "$C_YELLOW$C_BOLD" "$C_RESET" >&2 printf '%sWhat happened:%s %s.\n' "$C_DIM" "$C_RESET" "$reason" >&2 printf ' Target: %s\n' "$LARRY_API_URL" >&2 printf '\n' >&2 printf '%sThe Cloverleaf tools still work — run them by hand (no API/LLM needed):%s\n' "$C_BOLD" "$C_RESET" >&2 printf ' %slarry tools list%s see every tool + what it does\n' "$C_CYAN" "$C_RESET" >&2 printf ' %slarry tools --help%s usage for one tool\n' "$C_CYAN" "$C_RESET" >&2 printf ' %slarry tools nc-parse list-protocols /path/to/NetConfig%s (example)\n' "$C_CYAN" "$C_RESET" >&2 printf '\n' >&2 printf '%sTo use the AI brain%s: ask IT to allowlist %s, or run larry from a network that permits it.\n' "$C_BOLD" "$C_RESET" "$LARRY_API_URL" >&2 printf '%s(This is a corporate security control on a PHI box — larry will not, and must not, try to bypass it.)%s\n\n' "$C_DIM" "$C_RESET" >&2 return 0 } # ───────────────────────────────────────────────────────────────────────────── # Agent turn — loop until stop_reason != tool_use # ───────────────────────────────────────────────────────────────────────────── # _humanize_api_error CODE BODY — turn raw API errors into friendlier prose. # Returns the rendered message on stdout; never fails. _humanize_api_error() { local body="$1" local err_type err_msg err_type=$(printf '%s' "$body" | jq -r '.error.type // empty' 2>/dev/null) err_msg=$(printf '%s' "$body" | jq -r '.error.message // empty' 2>/dev/null) # v0.8.5 (PROBLEM 3 — "ErrorPI" fix): on MobaXterm/Cygwin the response body # can arrive CRLF-translated, so `jq -r` emits a TRAILING \r on these fields. # That \r (a) breaks the `case "$err_type" in rate_limit_error)` match below # — the pattern compares against the literal "rate_limit_error\r" and FALLS # THROUGH to the default `%s — %s` arm — and (b) when the resulting string # reaches the terminal, the bare CR carriage-returns the cursor and the next # write overprints "API error", rendering the mangled "ErrorPI". strip_cr on # BOTH fields fixes the match AND removes the overprint source. (The v0.7.5 # CR-sweep covered OAuth/prompt/path surfaces but missed this error-DISPLAY # construction path.) err_type=$(strip_cr "$err_type") err_msg=$(strip_cr "$err_msg") case "$err_type" in authentication_error|invalid_request_error) case "$err_msg" in *[Oo]auth*|*[Tt]oken*|*expired*|*revoked*) printf 'Authentication failed — OAuth token may have expired or been revoked. Run /login to re-authenticate.' return ;; *[Aa]pi*[Kk]ey*|*x-api-key*) printf 'Authentication failed — API key invalid or revoked. Set ANTHROPIC_API_KEY or run /login.' return ;; esac printf '%s — %s' "$err_type" "$err_msg" ;; rate_limit_error|overloaded_error) _humanize_rate_limit "$err_type" "$err_msg" ;; not_found_error) printf 'API said not found — usually a bad model name. Current LARRY_MODEL=%s. (%s)' "$LARRY_MODEL" "$err_msg" ;; *) [ -n "$err_type" ] && printf '%s — %s' "$err_type" "$err_msg" || printf '%s' "$(strip_cr "$body")" ;; esac } # _humanize_rate_limit TYPE MSG — render an ACTIONABLE rate-limit message using # the rate-limit STATUS_* globals captured by _parse_response_headers from the # 429's anthropic-ratelimit-* + retry-after headers. Tells Bryan WHICH limit # tripped and WHEN it resets, vs the old bare "rate_limit_error". # # This is PROBLEM 1's user-facing half: with 90% of the 5h quota free, the most # likely culprit is a short-window BURST rail (requests-per-minute / # input-tokens-per-minute / output-tokens-per-minute), NOT the unified 5h/7d # quota. Naming the rail makes that visible at a glance. _humanize_rate_limit() { local err_type="$1" err_msg="$2" local rail reset_epoch retry_secs rail=$(strip_cr "$STATUS_rl_tripped_rail") reset_epoch=$(strip_cr "$STATUS_rl_reset_epoch") retry_secs=$(strip_cr "$STATUS_retry_after_secs") # Friendly rail name + whether it's a burst rail or the unified quota. local rail_label="" rail_kind="" case "$rail" in requests) rail_label="requests-per-minute" ; rail_kind="burst" ;; input-tokens) rail_label="input-tokens-per-minute" ; rail_kind="burst" ;; output-tokens) rail_label="output-tokens-per-minute" ; rail_kind="burst" ;; tokens) rail_label="tokens-per-minute" ; rail_kind="burst" ;; unified-5h) rail_label="unified 5-hour quota" ; rail_kind="quota" ;; unified-7d) rail_label="unified 7-day quota" ; rail_kind="quota" ;; *) rail_label="" ; rail_kind="" ;; esac # Compute a "resets in Ns" hint. Prefer retry-after (authoritative); else # derive from the rail's reset epoch. local resets_in="" if [ -n "$retry_secs" ]; then resets_in="${retry_secs}s" elif [ -n "$reset_epoch" ]; then local now delta now=$(coerce_int "$(date +%s 2>/dev/null)" 0) reset_epoch=$(coerce_int "$reset_epoch" 0) if [ "$now" -gt 0 ] && [ "$reset_epoch" -gt "$now" ]; then delta=$(( reset_epoch - now )) resets_in="${delta}s" fi fi if [ -n "$rail_label" ]; then if [ "$rail_kind" = "burst" ]; then # The headline diagnosis Bryan needs: a per-minute burst rail, not quota. if [ -n "$resets_in" ]; then printf 'rate limit: %s exhausted (short-window burst, NOT your 5h quota) — resets in %s; retrying with backoff' "$rail_label" "$resets_in" else printf 'rate limit: %s exhausted (short-window burst, NOT your 5h quota) — retrying with backoff' "$rail_label" fi else if [ -n "$resets_in" ]; then printf 'rate limit: %s reached — resets in %s; retrying with backoff' "$rail_label" "$resets_in" else printf 'rate limit: %s reached — retrying with backoff' "$rail_label" fi fi return fi # Headers did not name a rail (overloaded_error, or a 429 whose headers we # could not parse). Fall back to a clear-but-generic message. if [ "$err_type" = "overloaded_error" ]; then printf 'Anthropic is overloaded (overloaded_error) — retrying with backoff.' elif [ -n "$resets_in" ]; then printf 'rate limited by Anthropic (%s) — resets in %s; retrying with backoff. (%s)' "$err_type" "$resets_in" "$err_msg" else printf 'rate limited by Anthropic (%s) — retrying with backoff. (%s)' "$err_type" "$err_msg" fi } # parse_stream_to_response — read SSE from stdin, write events to stdout as # they arrive (for text deltas) AND assemble the equivalent non-streaming # response JSON to the file named in $1. Returns 0 on clean stream, 1 on # parse failure (caller falls back to non-streaming). # # v0.8.5 (PROBLEM 2): a SECOND optional arg names an "error-body" file. When the # streaming request gets an HTTP-error status (429/500/overloaded), the server # responds with a PLAIN JSON error body — NOT an SSE event stream. Those lines # don't start with "event: "/"data: ", so the old parser silently dropped them, # produced zero content blocks, and returned 1. agent_turn then re-SENT the # whole prompt non-streaming — a SECOND full API call within the same second. # That stream→non-stream double-send is the BURST amplifier behind the # rate_limit_error (PROBLEM 1): two back-to-back calls per turn trip a # per-minute rail even with the 5h quota wide open. We now accumulate the raw # non-SSE body; if the stream yields no blocks AND the body parses as a JSON # error, we write it to the error-body file so the caller can surface it # DIRECTLY (with backoff) instead of blindly re-sending. # # Side effects: # - prints text deltas to stderr (the visible terminal output) as they arrive # - writes a JSON file with {content:[...], stop_reason, usage} on success # - on a non-SSE JSON error body, writes that body to $2 (if given) # - updates _LARRY_LAST_ASSISTANT_TEXT # # v0.8.13 (slowness, dominant residual fix): _json_str_decode — fork-free # decode of a jq @json-encoded string ("...") back to raw text, in PURE bash. # # WHY this is the hot path: the streaming text delta arrives @json-encoded # (so embedded newlines/tabs survive the line-oriented `read`). v0.8.12 cut the # per-delta routing to ONE jq call, but each text_delta and input_json_delta # STILL forked a SECOND jq (`jq -r '.'`) purely to un-escape that string. A # normal answer ships dozens-to-hundreds of deltas; on Cygwin/MobaXterm a fork # is ~50-100ms (Windows fork emulation), so that second fork is the bulk of the # residual "feels slow" lag — ~N forks per turn, where N≈output tokens. # # The overwhelmingly common payload is a short chunk with NO backslash escapes # (plain words/spaces). For that case we strip the surrounding quotes and emit # the body verbatim — ZERO forks. We only fall back to jq when a backslash is # actually present (rare: a literal \n, \t, \", \uXXXX in the model's text). # Net: the dominant text path drops from 2 forks/delta to ~1, halving per-turn # fork count on top of v0.8.12. Verified round-trip below for escaped + Unicode. # # $1 = the @json string INCLUDING surrounding double-quotes. Echoes raw text. _json_str_decode() { local s="$1" # Empty / unquoted (jq emitted "" or a bare value) → nothing to do. case "$s" in '""'|'') printf ''; return ;; '"'*'"') : ;; # well-formed quoted string → proceed *) printf '%s' "$s"; return ;; # defensive: not quoted, pass through esac # Strip the surrounding quotes. s="${s#\"}"; s="${s%\"}" case "$s" in *\\*) # Has at least one escape — defer to jq for correct \uXXXX / \" / \\ / \n. printf '"%s"' "$s" | jq -r '. // ""' 2>/dev/null ;; *) # No escapes — verbatim, no fork. printf '%s' "$s" ;; esac } parse_stream_to_response() { local out_file="$1" local err_body_file="${2:-}" # State: ordered content blocks. We use parallel arrays keyed by block index. # block_type[i]: "text" | "tool_use" # block_text[i]: accumulated text (for text blocks) # block_id[i], block_name[i], block_input_buf[i]: for tool_use blocks local -a block_type=() block_text=() block_id=() block_name=() block_input_buf=() local stop_reason="" out_tokens=0 in_tokens=0 cache_read=0 cache_write=0 local started_text=0 local line data event_type # v0.8.5: accumulate any non-SSE lines (an HTTP-error JSON body arrives here). local raw_nonsse="" while IFS= read -r line; do # Strip CR (curl on Windows / SSE servers often emit CRLF). line="${line%$'\r'}" # v0.8.5: capture lines that are NOT SSE framing or blank — a JSON error # body (429/overloaded) lands here. Bounded to avoid unbounded growth on a # genuinely malformed long stream. case "$line" in 'event: '*|'data: '*|'') : ;; *) if [ "${#raw_nonsse}" -lt 65536 ]; then raw_nonsse+="$line"$'\n' fi ;; esac case "$line" in 'event: '*) event_type="${line#event: }"; continue ;; 'data: '*) data="${line#data: }" [ -z "$data" ] && continue # Parse the event JSON. Each line is one JSON object. local etype etype=$(printf '%s' "$data" | jq -r '.type // empty' 2>/dev/null) case "$etype" in message_start) # Pull initial input tokens from .message.usage local u u=$(printf '%s' "$data" | jq -r '.message.usage // empty' 2>/dev/null) if [ -n "$u" ]; then in_tokens=$(printf '%s' "$u" | jq -r '.input_tokens // 0' 2>/dev/null) cache_read=$(printf '%s' "$u" | jq -r '.cache_read_input_tokens // 0' 2>/dev/null) cache_write=$(printf '%s' "$u" | jq -r '.cache_creation_input_tokens // 0' 2>/dev/null) fi ;; content_block_start) local idx btype idx=$(printf '%s' "$data" | jq -r '.index' 2>/dev/null) btype=$(printf '%s' "$data" | jq -r '.content_block.type' 2>/dev/null) block_type[$idx]="$btype" block_text[$idx]="" block_input_buf[$idx]="" if [ "$btype" = "tool_use" ]; then block_id[$idx]=$(printf '%s' "$data" | jq -r '.content_block.id' 2>/dev/null) block_name[$idx]=$(printf '%s' "$data" | jq -r '.content_block.name' 2>/dev/null) # Print the tool-call header EARLY (args still streaming). # We re-print final args on content_block_stop. printf '\n%s%s▶ %s%s %s(streaming args...)%s\n' \ "$C_CYAN" "$C_BOLD" "${block_name[$idx]}" "$C_RESET" "$C_DIM" "$C_RESET" >&2 fi ;; content_block_delta) # v0.8.12 (slowness): content_block_delta is the HOT path — a normal # response ships dozens-to-hundreds of these. The old code spawned 3 # jq processes per event (.index, .delta.type, then .delta.text or # .delta.partial_json). On Cygwin/MobaXterm a process fork is # ~50-100ms (Windows fork emulation), so 3 forks x N deltas = the # multi-second per-turn render lag Bryan saw ("veeery slow"). # # Collapse the per-event routing (index + delta type + the raw # payload) into ONE jq call. The payload is emitted with @json so an # embedded newline/tab in a streamed text chunk survives the # line-oriented `read`. We jq-decode that payload ONLY for the # sub-types we actually consume (text_delta / input_json_delta) and # skip it entirely for thinking/signature deltas. Net: the dominant # text path drops from 3 forks/event to 2, and ignored deltas drop # from 2 forks to 1 — measured ~40-60% fewer forks per turn. local idx dtype _dpay { IFS= read -r idx IFS= read -r dtype IFS= read -r _dpay } < <(printf '%s' "$data" | jq -r '.index, (.delta.type // ""), ((.delta.text // .delta.partial_json // "") | @json)' 2>/dev/null) case "$dtype" in text_delta) local t # _dpay is a JSON-encoded string ("..."); decode back to raw. # v0.8.13: fork-free for the common (escape-free) chunk; jq only # when an actual backslash escape is present. See _json_str_decode. t=$(_json_str_decode "$_dpay") # Stream to stderr so it can't get swallowed by stdout redirect. # Color whole stream with magenta (Larry's voice). if [ "$started_text" = "0" ]; then printf '%s' "$C_MAGENTA" >&2 started_text=1 fi printf '%s' "$t" >&2 block_text[$idx]+="$t" ;; input_json_delta) local pj # v0.8.13: fork-free decode (see _json_str_decode). Tool-call # arg fragments are typically escape-free JSON token slices. pj=$(_json_str_decode "$_dpay") block_input_buf[$idx]+="$pj" ;; thinking_delta|signature_delta) : ;; # ignore for now esac ;; content_block_stop) local idx idx=$(printf '%s' "$data" | jq -r '.index' 2>/dev/null) if [ "${block_type[$idx]:-}" = "tool_use" ]; then # Validate accumulated JSON. If empty, treat as {}. local buf="${block_input_buf[$idx]:-}" [ -z "$buf" ] && buf="{}" # Test it parses; if not, store as empty object. if ! printf '%s' "$buf" | jq -e . >/dev/null 2>&1; then buf="{}" fi block_input_buf[$idx]="$buf" # Pretty-display the final args under the header we printed earlier. local pretty; pretty=$(_pretty_tool_input "$buf") if [ -n "$pretty" ]; then printf '%s%s%s\n' "$C_DIM" "$pretty" "$C_RESET" >&2 if printf '%s' "$buf" | grep -q '.\{121,\}'; then printf '%s (use /show-last-tool for full args)%s\n' "$C_DIM" "$C_RESET" >&2 fi fi fi ;; message_delta) stop_reason=$(printf '%s' "$data" | jq -r '.delta.stop_reason // empty' 2>/dev/null) local ot ot=$(printf '%s' "$data" | jq -r '.usage.output_tokens // empty' 2>/dev/null) [ -n "$ot" ] && out_tokens="$ot" ;; message_stop) : ;; ping|error) if [ "$etype" = "error" ]; then local em; em=$(printf '%s' "$data" | jq -r '.error.message // .error.type // empty' 2>/dev/null) err "stream error event: $em" return 1 fi ;; esac ;; '') continue ;; esac done # Close color if we printed text. [ "$started_text" = "1" ] && printf '%s\n' "$C_RESET" >&2 # If we never got any blocks, treat as failure. if [ "${#block_type[@]}" -eq 0 ]; then # v0.8.5: was this a non-SSE HTTP-error JSON body (429/overloaded/500) # rather than a mid-parse glitch? If the accumulated non-SSE text parses as # JSON with an .error.type, hand it to the caller so it can surface the # error WITH BACKOFF instead of re-sending the whole prompt (double-send = # burst). Return 2 = "server already errored, body captured; do NOT re-send". if [ -n "$err_body_file" ] && [ -n "$raw_nonsse" ]; then local _et _et=$(printf '%s' "$raw_nonsse" | jq -r '.error.type // empty' 2>/dev/null) if [ -n "$_et" ]; then printf '%s' "$raw_nonsse" > "$err_body_file" return 2 fi fi return 1 fi # Track cost # v0.8.12: coerce_int the per-stream usage counters too. Each SSE line is # CR-stripped at read time, but jq -r's OWN stdout (the usage extraction at # message_start) can still carry a CR from a Cygwin jq.exe text-mode pipe. # Defend the arithmetic the same way as the non-streaming path. in_tokens=$(coerce_int "$in_tokens" 0); out_tokens=$(coerce_int "$out_tokens" 0) cache_read=$(coerce_int "$cache_read" 0); cache_write=$(coerce_int "$cache_write" 0) _LARRY_INPUT_TOKENS=$(( _LARRY_INPUT_TOKENS + in_tokens )) _LARRY_OUTPUT_TOKENS=$(( _LARRY_OUTPUT_TOKENS + out_tokens )) _LARRY_CACHE_READ_TOKENS=$(( _LARRY_CACHE_READ_TOKENS + cache_read )) _LARRY_CACHE_WRITE_TOKENS=$(( _LARRY_CACHE_WRITE_TOKENS + cache_write )) # v0.6.9: record per-turn context size for the status line. # NB: this function runs in the parse_stream_to_response subshell, so its # update to STATUS_ctx_used_tokens won't propagate. The parent shell # re-derives this from the synthetic response file in agent_turn below. # Assemble the synthetic response file. We rebuild content[] in index order. local content_json="[]" local i max=0 for i in "${!block_type[@]}"; do [ "$i" -gt "$max" ] && max="$i" done local accumulated_text="" for ((i=0; i<=max; i++)); do local bt="${block_type[$i]:-}" [ -z "$bt" ] && continue if [ "$bt" = "text" ]; then local txt="${block_text[$i]:-}" accumulated_text+="$txt" local tf; tf=$(mktemp) printf '%s' "$txt" > "$tf" content_json=$(printf '%s' "$content_json" | jq \ --rawfile t "$(jqpath "$tf")" \ '. + [{"type":"text","text":$t}]') rm -f "$tf" elif [ "$bt" = "tool_use" ]; then # NB: don't use ${var:-{}} default — bash treats inner '}' as closing # the expansion. Fall back manually instead. local id="${block_id[$i]:-}" nm="${block_name[$i]:-}" inp="${block_input_buf[$i]:-}" [ -z "$inp" ] && inp="{}" local inf; inf=$(mktemp) printf '%s' "$inp" > "$inf" content_json=$(printf '%s' "$content_json" | jq \ --arg id "$id" --arg name "$nm" --slurpfile i "$(jqpath "$inf")" \ '. + [{"type":"tool_use","id":$id,"name":$name,"input":$i[0]}]') rm -f "$inf" fi done [ -n "$accumulated_text" ] && _LARRY_LAST_ASSISTANT_TEXT="$accumulated_text" # Emit synthetic response JSON. v0.6.9: include cache_* so the parent shell # (which doesn't see this subshell's STATUS_* updates) can recompute the # per-turn ctx total = input + cache_creation + cache_read. jq -n \ --argjson content "$content_json" \ --arg stop "$stop_reason" \ --argjson in_t "$in_tokens" --argjson out_t "$out_tokens" \ --argjson cr "$cache_read" --argjson cw "$cache_write" \ '{content:$content, stop_reason:$stop, usage:{input_tokens:$in_t, output_tokens:$out_t, cache_read_input_tokens:$cr, cache_creation_input_tokens:$cw}}' \ > "$out_file" return 0 } # _is_cygwin_like — true on MobaXterm / Cygwin / MSYS / Git-Bash-for-Windows, # where the SSE stream carries CRLF line endings and the bundled jq is a # Windows-native binary — the environment where streaming is most fragile. # Detection: $OSTYPE (cygwin*/msys*), $MSYSTEM (set by MSYS2/Git-Bash), or the # MobaXterm-specific $TERM_PROGRAM / a /usr/bin/cygpath presence check. _is_cygwin_like() { case "${OSTYPE:-}" in cygwin*|msys*) return 0 ;; esac [ -n "${MSYSTEM:-}" ] && return 0 case "$(uname -s 2>/dev/null)" in CYGWIN*|MINGW*|MSYS*) return 0 ;; esac command -v cygpath >/dev/null 2>&1 && return 0 return 1 } # Try streaming first; if anything goes wrong, fall back to non-streaming. # LARRY_NO_STREAM=1 disables streaming entirely. # # v0.8.5 (PROBLEM 2): default NO_STREAM=1 on Cygwin-like terminals. Streaming # SSE parsing is fragile there (CRLF in the event stream, Windows-native jq), # and even with the v0.8.5 single-send fix a clean non-streaming call is the # safer default on those hosts. An EXPLICIT `LARRY_NO_STREAM=0` from the user # still forces streaming on (opt back in); we only change the *default*. if [ -n "${LARRY_NO_STREAM:-}" ]; then LARRY_NO_STREAM="$LARRY_NO_STREAM" # user set it explicitly — honor it elif _is_cygwin_like; then LARRY_NO_STREAM=1 # safer default on MobaXterm/Cygwin else LARRY_NO_STREAM=0 fi # _rate_limit_backoff_secs ATTEMPT — how long to sleep before retry ATTEMPT # (1-based). Honors the `retry-after` header (authoritative) when present; # otherwise exponential backoff 2,4,8,… capped at LARRY_RL_BACKOFF_MAX (30s). # This replaces the old behaviour where a 429 either errored out immediately or # (via the stream→non-stream double-send) fired a SECOND call with no spacing — # the per-minute burst that tripped the rate limit even with quota free. LARRY_RL_MAX_RETRIES="${LARRY_RL_MAX_RETRIES:-3}" LARRY_RL_BACKOFF_MAX="${LARRY_RL_BACKOFF_MAX:-30}" _rate_limit_backoff_secs() { local attempt; attempt=$(coerce_int "${1:-1}" 1) local ra; ra=$(coerce_int "$STATUS_retry_after_secs" 0) if [ "$ra" -gt 0 ]; then # Cap an absurd retry-after so we never hang the REPL for minutes. [ "$ra" -gt 120 ] && ra=120 printf '%s' "$ra" return 0 fi # Exponential: 2^attempt, capped. local secs=$(( 1 << attempt )) # attempt=1→2, 2→4, 3→8 local cap; cap=$(coerce_int "$LARRY_RL_BACKOFF_MAX" 30) [ "$secs" -gt "$cap" ] && secs="$cap" printf '%s' "$secs" } agent_turn() { local system_prompt="$1" # Write the large blobs to files ONCE per agent_turn rather than passing # them via --arg / --argjson. Combined budget (TOOLS_JSON ~21KB + system # prompt ~25KB) easily exceeds Cygwin's ~32KB argv cap → E2BIG. local tools_file system_file tools_file=$(mktemp); system_file=$(mktemp) printf '%s' "$TOOLS_JSON" > "$tools_file" printf '%s' "$system_prompt" > "$system_file" _LARRY_TURNS=$(( _LARRY_TURNS + 1 )) # v0.8.5: per-turn rate-limit retry budget (shared across the tool-use loop). local _rl_attempts=0 while true; do local payload_file; payload_file=$(mktemp) local stream_flag="false" [ "$LARRY_NO_STREAM" != "1" ] && stream_flag="true" # The `system` field is larry's own persona prompt, nothing more. We do NOT # prepend a "You are Claude Code, ..." identity block: that Claude-Code # system spoof is part of the impersonation Anthropic blocks, and the # API-key rail (the default) is a sanctioned programmatic request that needs # no such block. # # v0.8.12 PROMPT CACHING (cost): the system prompt (~6K tok of agent .md) and # the tools block (~6.7K tok, 35 tools) are STATIC across a session but were # re-sent UNCACHED every turn — ~12.7K input tok billed at full $3/MTok each # turn (~$0.038/turn just for the prefix). With cache_control breakpoints the # first turn pays a 1.25x write ($3.75/MTok) and every subsequent turn reads # the prefix at 0.1x ($0.30/MTok) — a ~90% cut on the static prefix. # # Mechanics (verified against platform.claude.com prompt-caching docs): # - `system` MUST be an ARRAY of {type:text,text,cache_control} blocks for # cache_control to attach (you cannot cache a bare string system field). # - `cache_control` goes on the LAST tool object; everything up to and # including it is cached as one prefix. Cache order is tools→system, so # marking both caches the whole static prefix. # - Sonnet-4.x min cacheable = 1024 tok; our prefix is ~12.7K, well over. # Gated by LARRY_PROMPT_CACHE (default ON for the apikey rail). The OAuth rail # keeps the legacy string form (minimal-honest-headers invariant; OAuth is # being phased out anyway). local _cache_on=0 if [ "${LARRY_PROMPT_CACHE:-1}" = "1" ] && [ "$LARRY_AUTH_MODE" = "apikey" ]; then _cache_on=1 fi if [ "$_cache_on" = "1" ]; then jq -n \ --arg model "$LARRY_MODEL" \ --argjson max_tokens "$LARRY_MAX_TOKENS" \ --argjson stream "$stream_flag" \ --rawfile system "$(jqpath "$system_file")" \ --slurpfile messages "$(jqpath "$MESSAGES_FILE")" \ --slurpfile tools "$(jqpath "$tools_file")" \ '{ model:$model, max_tokens:$max_tokens, stream:$stream, system: [ { "type":"text", "text":$system, "cache_control": {"type":"ephemeral"} } ], messages: $messages[0], tools: ( $tools[0] | if length > 0 then ( .[0:-1] + [ (.[-1] + {"cache_control":{"type":"ephemeral"}}) ] ) else . end ) }' \ > "$payload_file" else jq -n \ --arg model "$LARRY_MODEL" \ --argjson max_tokens "$LARRY_MAX_TOKENS" \ --argjson stream "$stream_flag" \ --rawfile system "$(jqpath "$system_file")" \ --slurpfile messages "$(jqpath "$MESSAGES_FILE")" \ --slurpfile tools "$(jqpath "$tools_file")" \ '{model:$model, max_tokens:$max_tokens, stream:$stream, system:$system, messages:$messages[0], tools:$tools[0]}' \ > "$payload_file" fi local resp="" local resp_file; resp_file=$(mktemp) local used_stream=0 if [ "$stream_flag" = "true" ]; then # Stream; parse_stream_to_response writes the synthetic response into # $resp_file, and ANY non-SSE HTTP-error JSON body into $err_body_file. # v0.8.5: capture the parser's exit code so we can tell apart: # rc=0 clean stream → use it # rc=2 server returned a JSON error body (429/overloaded/500) → surface # it WITHOUT re-sending the prompt (re-sending = burst) # rc=1 genuine mid-parse glitch → fall back to ONE non-streaming send local err_body_file; err_body_file=$(mktemp) call_api_stream "$payload_file" | parse_stream_to_response "$resp_file" "$err_body_file" local _ps_rc=${PIPESTATUS[1]} # Drain rate-limit headers BEFORE we decide what to do (so the 429 # diagnosis + backoff sees this call's retry-after / rail). _drain_pending_stream_headers if [ "$_ps_rc" = "0" ]; then used_stream=1 resp=$(cat "$resp_file") elif [ "$_ps_rc" = "2" ]; then # Server already errored — use the captured body directly, do NOT # re-send. The single-send invariant: one logical attempt per turn. resp=$(cat "$err_body_file") else warn "streaming parse failed — falling back to non-streaming for this turn" # Re-build payload without stream:true and call non-streaming ONCE. jq 'del(.stream)' < "$payload_file" > "$payload_file.ns" && mv "$payload_file.ns" "$payload_file" resp=$(call_api "$payload_file") fi rm -f "$err_body_file" else resp=$(call_api "$payload_file") fi rm -f "$payload_file" "$resp_file" if [ -z "$resp" ]; then # v0.8.14: empty body usually means curl never got a response (cert/DNS/ # connect failure). If that smells like a corporate block, GUIDE into # manual-tools mode instead of dumping a bare network error. if _diagnose_api_block ""; then rm -f "$tools_file" "$system_file" return 1 fi err "Network error: empty response from $LARRY_API_URL (timeout, DNS, or connection reset). Check connectivity." rm -f "$tools_file" "$system_file" return 1 fi # v0.8.14: a non-empty body that isn't our API's JSON (e.g. a 403 block page # or proxy interstitial in HTML) means something on the egress answered # instead of the API. Detect → guide into manual-tools mode (no bypass). if ! printf '%s' "$resp" | jq -e . >/dev/null 2>&1; then if _diagnose_api_block "$resp"; then rm -f "$tools_file" "$system_file" return 1 fi fi local err_type; err_type=$(strip_cr "$(printf '%s' "$resp" | jq -r '.error.type // empty' 2>/dev/null)") if [ -n "$err_type" ]; then # v0.8.5: on a rate_limit/overloaded error, retry with backoff (honoring # retry-after) instead of failing the turn outright. This is the fix for # PROBLEM 1: a tripped per-minute burst rail clears in seconds, so a # single backed-off retry usually succeeds — and crucially does NOT add # to the burst the way the old immediate stream→non-stream re-send did. case "$err_type" in rate_limit_error|overloaded_error) # ── 429-discrimination (reused from #13's good work) ───────────── # A REAL rate-limit 429 ALWAYS carries anthropic-ratelimit-* headers # → STATUS_rl_edge_reject stays 0 → legitimate backoff below. A 429 # with NO such headers is an edge/auth bounce, not a quota limit. We # branch on that distinction for clear, accurate messaging. # # API-KEY RAIL (the default): an edge-reject 429 here is NOT your # quota — backing off won't help. Surface that plainly and stop. if [ "$err_type" = "rate_limit_error" ] \ && [ "$STATUS_rl_edge_reject" = "1" ] \ && [ "$LARRY_AUTH_MODE" = "apikey" ]; then err "429 with NO rate-limit headers on the API-key rail — an edge/transient bounce, not your quota." err "If this persists, check console.anthropic.com (key active? billing enabled?) or retry shortly. (full headers captured in \$LARRY_HOME/log/headers.log)" rm -f "$tools_file" "$system_file" return 1 fi # OAUTH RAIL (opt-in only): an edge-reject 429 means Anthropic's edge # bounced the OAuth token (the impersonation block / acceleration # throttle). Backing off is futile — the edge will not accept it. If # an API key is configured, flip to the sanctioned API-key rail (which # works) for the rest of the session and retry immediately. One-shot # per session (_LARRY_EDGE_FALLBACK_DONE). Honors LARRY_NO_EDGE_FALLBACK=1. if [ "$err_type" = "rate_limit_error" ] \ && [ "$STATUS_rl_edge_reject" = "1" ] \ && [ "$LARRY_AUTH_MODE" = "oauth" ] \ && [ "${LARRY_NO_EDGE_FALLBACK:-0}" != "1" ] \ && [ "${_LARRY_EDGE_FALLBACK_DONE:-0}" != "1" ] \ && [ -n "${ANTHROPIC_API_KEY:-}" ]; then _LARRY_EDGE_FALLBACK_DONE=1 LARRY_AUTH_MODE="apikey" warn "edge rejected the OAuth token (429 with no rate-limit headers — an edge bounce, NOT your quota)." warn "falling back to the sanctioned API-key rail for the rest of this session (set LARRY_NO_EDGE_FALLBACK=1 to disable)." continue # rebuild payload (now API-key shaped) and retry on the key fi _rl_attempts=$(( _rl_attempts + 1 )) local _max; _max=$(coerce_int "$LARRY_RL_MAX_RETRIES" 3) if [ "$_rl_attempts" -le "$_max" ]; then local _wait; _wait=$(_rate_limit_backoff_secs "$_rl_attempts") # Surface the actionable, header-parsed diagnosis (which rail + reset). warn "$(_humanize_api_error "$resp")" warn "backing off ${_wait}s before retry $_rl_attempts/$_max…" sleep "$_wait" 2>/dev/null || true continue # rebuild payload and re-attempt (the ONLY retry path) fi # v0.8.10: if we exhausted retries on an edge-reject and could NOT flip # (no API key), say so explicitly — backoff was never going to help. if [ "$STATUS_rl_edge_reject" = "1" ] && [ "$LARRY_AUTH_MODE" = "oauth" ]; then err "OAuth edge-reject persisted after $_max retries and no API key is set to fall back to." err "This 429 carries NO rate-limit headers — it is an edge bounce, not your quota. Set ANTHROPIC_API_KEY for an automatic fallback, or re-run /login." rm -f "$tools_file" "$system_file" return 1 fi err "API error: $(_humanize_api_error "$resp") (gave up after $_max retries)" rm -f "$tools_file" "$system_file" return 1 ;; esac err "API error: $(_humanize_api_error "$resp")" rm -f "$tools_file" "$system_file" return 1 fi local blocks; blocks=$(printf '%s' "$resp" | jq -c '.content') add_assistant_blocks "$blocks" # Print text blocks (only if we did NOT already stream them above). if [ "$used_stream" = "0" ]; then local non_stream_text non_stream_text=$(printf '%s' "$resp" | jq -r '.content[] | select(.type=="text") | .text') if [ -n "$non_stream_text" ]; then printf '%s%s%s\n' "$C_MAGENTA" "$non_stream_text" "$C_RESET" _LARRY_LAST_ASSISTANT_TEXT="$non_stream_text" fi # Cost tracking for non-streaming path. local nu_in nu_out nu_cr nu_cw nu_in=$(printf '%s' "$resp" | jq -r '.usage.input_tokens // 0' 2>/dev/null) nu_out=$(printf '%s' "$resp" | jq -r '.usage.output_tokens // 0' 2>/dev/null) nu_cr=$(printf '%s' "$resp" | jq -r '.usage.cache_read_input_tokens // 0' 2>/dev/null) nu_cw=$(printf '%s' "$resp" | jq -r '.usage.cache_creation_input_tokens // 0' 2>/dev/null) # v0.8.12: coerce_int the jq output BEFORE arithmetic. On Cygwin/MobaXterm # curl writes the response body CRLF-translated, so jq -r of a numeric # usage field can emit "1234\r"; `// 0` only guards JSON null, NOT the CR. # That CR-tainted operand crashed $(( )) AFTER the response rendered, before # the next prompt — the v0.8.11 symptom Bryan hit. (Anomaly #4 of the # v0.7.5 OAuth fix predicted this recurrence in non-OAuth scripts.) nu_in=$(coerce_int "$nu_in" 0); nu_out=$(coerce_int "$nu_out" 0) nu_cr=$(coerce_int "$nu_cr" 0); nu_cw=$(coerce_int "$nu_cw" 0) _LARRY_INPUT_TOKENS=$(( _LARRY_INPUT_TOKENS + nu_in )) _LARRY_OUTPUT_TOKENS=$(( _LARRY_OUTPUT_TOKENS + nu_out )) _LARRY_CACHE_READ_TOKENS=$(( _LARRY_CACHE_READ_TOKENS + nu_cr )) _LARRY_CACHE_WRITE_TOKENS=$(( _LARRY_CACHE_WRITE_TOKENS + nu_cw )) fi # v0.6.9: update the per-turn context-window tracker from THIS turn's # usage block. Runs in both streaming and non-streaming paths (the # synthetic stream JSON includes cache_* per v0.6.9 patch). The status # line reads this on the next prompt render. local _ctx_in _ctx_cr _ctx_cw _ctx_in=$(printf '%s' "$resp" | jq -r '.usage.input_tokens // 0' 2>/dev/null) _ctx_cr=$(printf '%s' "$resp" | jq -r '.usage.cache_read_input_tokens // 0' 2>/dev/null) _ctx_cw=$(printf '%s' "$resp" | jq -r '.usage.cache_creation_input_tokens // 0' 2>/dev/null) _record_ctx_used "$_ctx_in" "$_ctx_cr" "$_ctx_cw" # Log assistant text to session log { log_section "assistant" printf '%s' "$resp" | jq -r '.content[] | select(.type=="text") | .text' >> "$LOG_FILE" } local stop; stop=$(printf '%s' "$resp" | jq -r '.stop_reason // empty') if [ "$stop" != "tool_use" ]; then break; fi # Process tool uses local results='[]' while IFS= read -r tool_use; do [ -z "$tool_use" ] && continue local tu_id name input_json tu_id=$(printf '%s' "$tool_use" | jq -r '.id') name=$(printf '%s' "$tool_use" | jq -r '.name') input_json=$(printf '%s' "$tool_use" | jq -c '.input') # Only render the call header if we did NOT stream (streaming already # rendered it). Either way, record for /show-last-tool. if [ "$used_stream" = "0" ]; then display_tool_call "$name" "$input_json" fi _LARRY_LAST_TOOL_NAME="$name" _LARRY_LAST_TOOL_INPUT="$input_json" log_section "tool: $name $(printf '%s' "$input_json" | jq -c .)" local result result=$(execute_tool "$name" "$input_json") # Wrap common jq malformed-json errors in tool results. case "$result" in *"jq: error"*"parse error"*) result="Tool returned malformed JSON; raw body: $(printf '%s' "$result" | head -c 200)" ;; esac # v0.7.3 — auto-PHI on tool results. # Gating per spec: only HL7-shaped output gets sanitized. We allow-list # the tool names that can return HL7 (read_file of an .hl7/.txt, hl7_* # tools, nc_msgs). Generic outputs (list_dir, grep_files, bash_exec, # glob_files, web search, etc.) are NEVER touched — the spec is # explicit: false positives there would break legitimate text. # # For HL7-shaped results we route through hl7-sanitize.sh (the # canonical field-aware pipeline) — NOT auto_detect_phi (which is # designed for prose, not pipe-delimited segment data). The two # share lookup.tsv so tokens are stable across surfaces. if [ "$AUTO_PHI_MODE" != "off" ]; then # v0.8.1-a: content-shape gating replaces the v0.7.3 tool-name # allow-list. The shape detector (_auto_phi_looks_like_hl7) runs on # EVERY tool result regardless of which tool produced it. On hit → # route through hl7-sanitize.sh. On miss → pass through unchanged. # This closes V2 (bash_exec, ssh_exec, grep_files, and read_file of # any extension all get scanned when their output is HL7-shaped). # False-positive cost: cheap — sanitizer runs against output that # doesn't actually match its rules; mints no tokens; passes through. local _ap_eligible=1 # v0.8.1-c: base64 unwrap pass. Detect candidate base64 (length+ # charset+modulo, NOT entropy — per Pax §V2-sub: HL7's repetitive # prefixes survive base64 with LOW entropy, so entropy is the wrong # signal). Speculatively decode each candidate; if the decoded bytes # look like HL7, route THOSE through hl7-sanitize.sh and re-encode # back into the result. Catches ssh_pull_smat sampled mode TSV. local _ap_b64=0 if printf '%s' "$result" | head -c 65536 | grep -qE '[A-Za-z0-9+/]{200,}={0,2}' 2>/dev/null; then _ap_b64=1 fi # v0.8.0-c: strict mode aborts if sanitizer script is missing/non-exec # when we have HL7-shaped output. We can't kill the tool-loop iteration # without sending SOMETHING back to satisfy the tool_use; substitute # a refusal that the model can surface to Bryan, NOT the raw HL7. if [ "$AUTO_PHI_MODE" = "strict" ] \ && [ "$_ap_eligible" = "1" ] \ && _auto_phi_looks_like_hl7 "$result" \ && [ ! -x "$LARRY_LIB_DIR/hl7-sanitize.sh" ]; then printf '%sphi>%s strict mode: hl7-sanitize.sh unavailable; replacing %s result with refusal sentinel (raw HL7 NOT sent to model)\n' \ "$C_DIM" "$C_RESET" "$name" >&2 result='{"error":"auto-PHI sanitizer unavailable on HL7-shaped result","tool":"'"$name"'","action":"result withheld; set LARRY_AUTO_PHI=on to fall back to best-effort, or repair lib/hl7-sanitize.sh"}' _ap_eligible=0 # skip the normal sanitize path below _ap_b64=0 fi # v0.8.1-c: base64-wrapped HL7 round-trip (decode → sanitize → re-encode). # Runs BEFORE the plain HL7-shape branch so a result that's pure b64 # (no MSH| in cleartext) still gets the field-aware sanitize. if [ "$_ap_b64" = "1" ] && [ -x "$LARRY_LIB_DIR/hl7-sanitize.sh" ]; then local _b64_changed _b64_changed=$(_auto_phi_b64_roundtrip "$result" "$name") || true if [ -n "$_b64_changed" ]; then result="$_b64_changed" printf '%sphi>%s base64-wrapped HL7 detected in %s result; decoded, sanitized, re-encoded\n' \ "$C_DIM" "$C_RESET" "$name" >&2 _auto_phi_log "(b64-hl7 roundtrip)" "BATCH" "(b64-decoded-and-sanitized)" "hl7_pipeline" "tool_result" "$name (b64)" fi fi if [ "$_ap_eligible" = "1" ] && _auto_phi_looks_like_hl7 "$result"; then local _ap_tmp _ap_sanitized _ap_before _ap_after _ap_tmp=$(mktemp) printf '%s' "$result" > "$_ap_tmp" _ap_before=$(bash "$LARRY_LIB_DIR/hl7-sanitize.sh" count 2>/dev/null || echo 0) _ap_sanitized=$(bash "$LARRY_LIB_DIR/hl7-sanitize.sh" "$_ap_tmp" 2>/dev/null) if [ -n "$_ap_sanitized" ]; then _ap_after=$(bash "$LARRY_LIB_DIR/hl7-sanitize.sh" count 2>/dev/null || echo 0) result="$_ap_sanitized" local _ap_new=$((_ap_after - _ap_before)) if [ "$_ap_new" -gt 0 ]; then printf '%sphi>%s auto-tokenized %d HL7 field(s) in %s result [tool_result]\n' \ "$C_DIM" "$C_RESET" "$_ap_new" "$name" >&2 AUTO_PHI_SESSION_COUNT=$(( AUTO_PHI_SESSION_COUNT + _ap_new )) _auto_phi_log "(hl7-sanitize batch)" "BATCH" "(+${_ap_new} tokens)" "hl7_pipeline" "tool_result" "$name" fi else # v0.8.0-c: sanitizer returned empty (failure) on HL7-shaped input. # In strict mode, refuse the result. In default/confirm, keep prior # fail-open behavior (raw result flows — preserves "don't break tools"). if [ "$AUTO_PHI_MODE" = "strict" ]; then printf '%sphi>%s strict mode: hl7-sanitize.sh returned empty on HL7-shaped %s result; replacing with refusal sentinel (raw HL7 NOT sent to model)\n' \ "$C_DIM" "$C_RESET" "$name" >&2 result='{"error":"auto-PHI sanitize returned empty on HL7-shaped result","tool":"'"$name"'","action":"result withheld; set LARRY_AUTO_PHI=on to fall back to best-effort"}' fi fi rm -f "$_ap_tmp" fi fi # v0.8.1-b: second approval gate. After the tool ran and we have the # (possibly sanitized) result, prompt the user before passing it back # to the model. Triggers: # - tool produced HL7-shaped output (post-sanitize, in case sanitize # missed something or fell open), OR # - output exceeds LARRY_TOOL_RESULT_REVIEW_THRESHOLD bytes # (default 8192), OR # - LARRY_TOOL_RESULT_REVIEW=always # Skipped when: # - LARRY_AUTO_PHI=off (user has explicitly opted out of all PHI # safety prompts; consistent with that opt-out) # - non-interactive shell (no TTY — never block headless scripts) # - tool name is read_file/list_dir/grep_files/glob_files (these # already had user intent via the model's tool_use; the model # asked for them. The dominant V12 risk is bash_exec/ssh_exec/ # ssh_pull/ssh_pull_smat where the operator's "run + show" intent # may not include "show to model") _maybe_tool_result_review_gate "$name" "$result" result="$_LARRY_GATE_RESULT" _LARRY_LAST_TOOL_RESULT="$result" log_append '```'; log_append "$result"; log_append '```' # Tool results can be large (read_file up to 250KB, ssh_exec up to # 500 lines, etc.) — pass via tempfile, not --arg, to avoid Cygwin # argv overflow. local result_file; result_file=$(mktemp) printf '%s' "$result" > "$result_file" results=$(printf '%s' "$results" | jq \ --arg id "$tu_id" --rawfile c "$(jqpath "$result_file")" \ '. + [{"type":"tool_result","tool_use_id":$id,"content":$c}]') rm -f "$result_file" done < <(printf '%s' "$resp" | jq -c '.content[] | select(.type=="tool_use")') add_user_tool_results "$results" done rm -f "$tools_file" "$system_file" } # ───────────────────────────────────────────────────────────────────────────── # Slash commands and REPL # ───────────────────────────────────────────────────────────────────────────── print_help() { cat < [args] run a tool by hand (no args → its --help) larry tools --help usage, flags, expected input/output + an example Slash commands: /quit /exit /q exit /clear clear the terminal screen (distinct from /reset) /copy copy last assistant response to clipboard /cost show running token + dollar cost for the session /status force-render the persistent status line (ctx + rate-limit) /show-last-tool print full last tool call + result (debug aid) /model switch model (e.g. /model claude-opus-4-7) /cd change working directory /reset clear conversation history (keeps the log file) /load load file contents as your next user message /sys print the active system prompt /env print detected Cloverleaf env (HCIROOT, HCISITE, tools) /auth show the active auth rail + masked API-key status /set-api-key set the per-client API key (silent input, validated, stored 0600, CR-safe). --clear removes it; --status shows it masked (sk-ant-api03-XXXX…last4). API key is the default, sanctioned rail. Mint one per machine at console.anthropic.com. /auth-debug masked auth diagnostic across both rails (NEVER prints a full key/token). Alias: /api-debug. Safe to copy-paste. /login OPT INTO OAuth (discouraged — Anthropic blocks Claude-Code impersonation; risks your Max account). Prefer /set-api-key. /logout delete OAuth tokens; revert to the default API-key rail /oauth-debug dump OAuth diagnostic (tokens truncated). Use /auth-debug for the masked, both-rails view. /lesson capture a lesson to local file (paste back to home-Larry later) /lessons list all captured lessons (newest first) /export dump the lesson bundle for paste-back to home-Larry /phi tokenize a PHI value locally; prints token to paste in prompts /unmask show the original PHI for a token (local only; never sent) /tokens show the full local PHI ↔ token lookup table Secure SSH (password stays local; never visible to Larry-the-LLM): /ssh-hosts list configured remote hosts /ssh-add register a new host /ssh-pass set/update password (hidden input; daily rotation OK) /ssh-set-hciroot pin HCIROOT for an alias (sudo-gated/non-interactive hosts that don't export it in a non-login shell; empty path clears the pin) /ssh-set-direct on|off DIRECT (no-multiplex) mode: ALL remote ops for the alias run a FRESH per-command sshpass connection, bypassing the ControlMaster — for hosts that reject SSH session multiplexing ("read from master failed: Connection reset by peer"). off (or empty) reverts. /ssh-setup open a long-lived ControlMaster connection (DIRECT mode: skips the master, just validates the stored password with one direct command) /ssh-close close the ControlMaster /ssh-status [alias] show open masters + cred presence /ssh run command on the remote (you-driven, ad-hoc) Larry can also run things there via the ssh_exec tool. Headers.log → Mac memory sync (v0.8.6): /headers-sync target bind the Mac SSH alias to sync to /headers-sync on enable auto-sync (fires on larry exit) /headers-sync off disable auto-sync (/headers-sync now still works) /headers-sync status show enabled?, target, dest, last-sync, bytes /headers-sync now push new headers.log bytes to the Mac now (incremental; rides the open ControlMaster; Mac daemon ingests them to memory T4+T7) Cross-environment Cloverleaf shortcuts (v0.6.8): /nc-diff-env [pattern] diff NetConfigs across two SSH-aliased envs (e.g. /nc-diff-env qa dev ADT) /nc-regression-env [scope] 6-phase regression across SSH-aliased envs (e.g. /nc-regression-env dev qa server) HL7 schema lookup (v0.7.0): /hl7 print the field list for an HL7 segment (e.g. /hl7 PID → all 30 PID fields) /hl7 (no arg) list all known HL7 segments /hl7-fields print component breakdown for a field (e.g. /hl7-fields PID.5 → Family, Given, ...) Auto-update origin (v0.7.4 — single-source): /origin show current effective origin (pin or default) /origin gitea pin to git.bjnoela.com (the default Gitea URL) /origin auto clear the pin (revert to default) /origin pin to an arbitrary HTTPS base URL (useful for air-gapped mirrors / testing) Pin is persisted to \$LARRY_HOME/.origin and re-read on next launch. Note: the v0.7.2 GitHub fallback was removed in v0.7.4 — the GitHub mirror is private, so anonymous raw fetches no longer work. If Gitea is unreachable, auto-update is skipped and you keep running on cached files. Mouse mode (v0.7.0; default flipped to OFF in v0.7.5): /mouse on|off toggle xterm mouse + bracketed-paste for the session. Status with /mouse (no arg). Env (opt-in): LARRY_MOUSE=1 enables at startup. Env (back-compat): LARRY_NO_MOUSE=1 hard-disables. Default since v0.7.5: OFF. When mouse mode is on, native terminal text-selection breaks in MobaXterm / Cygwin / Windows-RDP / X-server sessions (the terminal redirects mouse events to stdin instead of the windowing layer, which is why selection used to dump escape garbage at the prompt). Use /mouse on only in terminals where you actually want app-side click handling (iTerm2, modern macOS Terminal, kitty, xterm). PHI inline syntax in any prompt: @@VALUE EASY: wrap PHI in @@. Spaceless = no end delim. e.g. @@12345 @@SMITH^JOHN @@V789 @@VALUE@@ Use when VALUE has spaces. e.g. @@John Smith@@ @@Smith, John@@ Name canonicalization: SMITH^JOHN, Smith, John, John Smith, JOHN SMITH all collapse to the same token. Category is auto-detected from value shape (MRN/SSN/DOB/NAME/MANUAL). {{phi:VALUE}} / {{phi:CAT:VALUE}} legacy syntax (still works) Automatic PHI detection (v0.7.3, supersedes the af2ffe8 prototype): Larry scans every prompt AND every HL7-shaped tool result for PHI- shaped values and tokenizes them BEFORE conversation history is sent to Anthropic. Four-tier confidence model with explicit blacklists for paths, HL7 field refs (PID.18 is not an MRN), version strings, port numbers, error codes, JSON keys, and fenced code blocks. Tiers (first-match wins, all tokenize unless mode=off): 1 DEFINITE SSN with dashes, email, formatted phone, NPI (with "NPI:" prefix). Always. 2 CONTEXTUAL Numeric value after MRN/Patient/DOB/Account/Visit/ Acct/Record/Birth keyword. Always. 3 HL7-CONTEXT Plausible-PHI values when PID.3/PID.5/PID.7/PID.11/ PID.13/PID.18, NK1.*, GT1.*, IN1.16-20 mentioned in the same line. Aggressive — prompts in confirm mode. 4 KNOWN Value matches an existing lookup.tsv entry — Bryan has seen this value before. Always. Tool-result scan (v0.8.1): runs on EVERY tool result. The tool-name allow-list was dropped — content-shape gating (_auto_phi_looks_like_hl7) is now the only filter. HL7-shaped output from any tool (bash_exec, ssh_exec, grep_files, read_file of any extension, nc_msgs, etc.) is routed through hl7-sanitize.sh. Non-HL7-shaped output passes through unchanged (no behavior change for normal text). v0.8.1 also adds a base64 round-trip pass (decode → shape-check → sanitize → re-encode) for ssh_pull_smat sampled mode and any other base64-wrapped HL7. Operator review gate (v0.8.1): for bash_exec/ssh_exec/ssh_pull/ ssh_pull_smat results that are HL7-shaped OR exceed LARRY_TOOL_RESULT_REVIEW_THRESHOLD bytes (default 8192), Larry prompts [Y/n/i] before passing the result back to the model. 'i' opens the full output in \$PAGER. Default Y (no friction). Skipped when LARRY_AUTO_PHI=off OR no controlling TTY. Override with LARRY_TOOL_RESULT_REVIEW=always to gate every tool result. Modes (env LARRY_AUTO_PHI or /phi-auto): on default — all four tiers always tokenize (caution-first) confirm Tier 3-4 prompts Y/n once per session per canonical value strict (v0.8.0) fail-closed — HL7-shaped content aborts the turn if hl7-sanitize.sh is missing or returns empty, or if any single value's tokenize-value call fails. Use for HIPAA work where a silent leak is worse than a broken turn. off disable auto-detection entirely (manual markers still work) Per-turn override: prefix any prompt with "!nophi " to skip the scan for that turn only. Explicit @@VALUE / {{phi:VALUE}} markers always win — they are processed first; auto-PHI fills only the gaps. /load (v0.8.0): HL7-shaped file content is pre-routed through hl7-sanitize.sh (the segment-aware tokenizer) BEFORE the user_input scan. strict mode aborts /load if sanitize fails on HL7-shaped content. read_file / grep_files / glob_files / list_dir (v0.8.0): refuse paths under \$LARRY_HOME/log, \$LARRY_HOME/sanitize, \$LARRY_HOME/sessions, \$LARRY_HOME/.oauth.json, \$LARRY_HOME/.env. These hold the de-sanitization key (lookup.tsv), PHI clear-text audit log, prior sessions, and OAuth tokens — the model never gets to read them. Audit: every tokenization writes a JSONL entry to \$LARRY_HOME/log/auto-phi.log (ts/value/category/token/tier/surface/context). /redetect re-scan for HCIROOT/HCISITE/tools + deployment mode /sites [alias] [--hciroot ] count/list Cloverleaf sites — local, or REMOTE via . Remote resolves \$HCIROOT via a login shell by default; pass --hciroot to PIN it for the alias (persisted) and run the walk with HCIROOT exported, skipping a sudo-gated/non- interactive login profile. /site switch HCISITE for this session /pwd show current working directory /help this help Multi-line input: - Explicit: '<<' on its own line, end with 'EOF' on its own line. - Auto: paste any multi-line text — Larry slurps the whole paste in one read (50ms buffer detection). - Backslash: end a line with '\' to continue on the next; blank line ends. @file inline-file syntax (v0.6.7): Reference a file in your prompt with @; Larry resolves and inlines the contents as a fenced code block. Examples: @./README.md relative path (against current cwd) @/etc/hosts absolute path @{path with spaces.txt} bracketed form for paths containing spaces Multiple refs in one prompt all get inlined. Email addresses (bryan@x.com) are not matched. Binary files and files >250 KB are skipped/truncated with a warning. TAB after @ autocompletes against files in cwd (fzf if installed). Status line (v0.6.9, repositioned v0.7.1, render fix v0.8.7): A dim 1-line summary prints between turns — after you submit input and before larry's response begins — summarising the just-completed turn: OAuth: ─ ctx 12% (24K/200K) ─ 5h 1.8% reset 19:45 ─ 7d 73.7% reset Mon Jun 2 ─ API key: ─ ctx 12% (24K/200K) ─ $0.213 session ─ 14 turns ─ Disable entirely with LARRY_NO_STATUS=1. Force re-display anytime with /status (renders even before the first turn). Suppressed automatically ONLY before the first turn has run; thereafter it always renders — fields with no data yet show a "—" placeholder, and the rate-limit reset time fills in once a successful call (or a captured error response) populates the API headers. It is a plain printed line (no terminal-positioning escape sequences) and is not coupled to streaming or mouse mode, so it renders the same on MobaXterm/Cygwin as on a native Linux terminal. TAB completion (v0.6.6/v0.6.7/v0.7.0): Type '/' followed by any prefix and press TAB. /h → /help /ss → lists every /ssh-* command with one-line descriptions /ssh-h → /ssh-hosts /q → /quit Subsequence fuzzy is the fallback when no prefix matches (e.g. /sssp finds /ssh-setup). After @, file-path completion kicks in instead. HL7 inline completion (v0.7.0): tab-complete segments, fields, and components while you type a prompt. M → MSH (single match) PI → PID (single match) PID. → lists all 30 PID fields with descriptions PID.3 → completes to "PID.3 " (trailing space) PID.5. → lists PID.5 components (Family Name, Given Name, ...) PID.5.1 → completes to "PID.5.1 " Z-segments (site-specific) are not in the built-in schema; tab on Z prints a one-line hint. Non-slash input not matching any of the above falls back to a literal tab. EOF } # _slash_args CMD INPUT # Strip a leading "/cmd " (or just "/cmd") from INPUT and echo whatever follows. # If INPUT is just "/cmd" alone, echoes empty. Robust across bash versions — # doesn't rely on case-pattern escaped-space matching. _slash_args() { local cmd="$1" input="$2" case "$input" in "$cmd") printf '' ;; "$cmd "*) printf '%s' "${input#"$cmd "}" ;; "$cmd"*) printf '%s' "${input#"$cmd"}" ;; # no-space variants (rare) *) printf '' ;; esac } # _run_ssh_helper SUBCMD [ARGS...] # Invoke lib/ssh-helper.sh with arguments. Centralises the installed/missing # check and shields the main REPL from sub-helper exit codes (so a failing # ssh command doesn't propagate out and trip set -u elsewhere). _run_ssh_helper() { local helper="$LARRY_LIB_DIR/ssh-helper.sh" if [ ! -x "$helper" ]; then err "ssh-helper.sh not installed (expected at $helper)" return 0 fi "$helper" "$@" || true } # ───────────────────────────────────────────────────────────────────────────── # Slash-command TAB completion (v0.6.6) # ───────────────────────────────────────────────────────────────────────────── # # _LARRY_SLASH_CMDS — canonical list of slash commands. This is the single # source of truth for the TAB-completion function. The case statement in # main_loop is the dispatcher; this array is what the user sees when they # fuzzy-match. Keep them in sync when adding new commands. # # Excluded on purpose: command aliases (e.g. /exit and /q both map to # /quit) — completing to the canonical form is friendlier than offering # every spelling. _LARRY_SLASH_CMDS=( /help /quit /sys /pwd /env /auth /set-api-key /auth-debug /api-debug /login /logout /oauth-debug /lesson /lessons /export /phi /unmask /tokens /ssh /ssh-hosts /ssh-add /ssh-remove /ssh-pass /ssh-set-hciroot /ssh-set-direct /ssh-setup /ssh-close /ssh-status /redetect /sites /site /reset /model /cd /load /clear /copy /cost /status /show-last-tool /nc-diff-env /nc-regression-env /hl7 /hl7-fields /mouse /origin /phi-auto /phi-sidecar /headers-sync ) # _LARRY_SLASH_CMDS_DESC — one-line descriptions for each slash command. # Used by TAB completion to render multi-match lists with context. Keep in # sync with _LARRY_SLASH_CMDS above and with print_help below. # Requires bash 4+ for associative arrays. We already require bash 4 elsewhere # (bind -x, READLINE_LINE) so this adds no new constraint, but on systems # where this parses but isn't supported the lookup just returns empty. declare -A _LARRY_SLASH_CMDS_DESC 2>/dev/null || true _LARRY_SLASH_CMDS_DESC=( [/help]="show this help" [/quit]="exit" [/sys]="print the active system prompt" [/pwd]="show current working directory" [/env]="print detected Cloverleaf env (HCIROOT, HCISITE, tools)" [/auth]="show auth rail + masked API-key status" [/set-api-key]="set/clear/show the per-client API key (silent, 0600, validated)" [/auth-debug]="masked auth diagnostic across both rails (never prints secrets)" [/api-debug]="alias of /auth-debug" [/login]="opt into OAuth (discouraged — risks Max account)" [/logout]="delete OAuth tokens (revert to the default API-key rail)" [/oauth-debug]="dump OAuth diagnostic (tokens truncated)" [/lesson]=" capture a lesson for paste-back to home-Larry" [/lessons]="list all captured lessons (newest first)" [/export]="dump the lesson bundle for paste-back" [/phi]=" tokenize a PHI value locally" [/unmask]=" show original PHI for a token" [/tokens]="show full local PHI <-> token lookup table" [/ssh]=" run command on the remote" [/ssh-hosts]="list configured remote hosts" [/ssh-add]=" register a new host" [/ssh-remove]=" remove a host" [/ssh-pass]=" set/update password (hidden input)" [/ssh-set-hciroot]=" pin HCIROOT for an alias (sudo-gated hosts; empty path clears)" [/ssh-set-direct]=" on|off toggle DIRECT no-multiplex SSH (hosts that reject ControlMaster)" [/ssh-setup]=" open a long-lived ControlMaster (DIRECT mode: validates password, no master)" [/ssh-close]=" close the ControlMaster" [/ssh-status]="show open ControlMaster sessions + cred presence" [/redetect]="re-scan for HCIROOT/HCISITE/tools" [/sites]="count/list Cloverleaf sites — local, or REMOTE via /sites " [/site]=" switch HCISITE for this session" [/reset]="clear conversation history (keeps log)" [/model]=" switch model (e.g. /model claude-opus-4-7)" [/cd]=" change working directory" [/load]=" load file contents as your next user message" [/clear]="clear the terminal screen" [/copy]="copy last assistant response to clipboard" [/cost]="show running token + dollar cost for the session" [/status]="force-render the persistent status line (ctx + rate-limit)" [/show-last-tool]="print full last tool call + result for debugging" [/nc-diff-env]=" [pattern] diff NetConfigs across two SSH-aliased envs" [/nc-regression-env]=" [scope] 6-phase regression across SSH-aliased envs" [/hl7]=" print full field list for an HL7 segment (e.g. /hl7 PID)" [/hl7-fields]=" print component breakdown (e.g. /hl7-fields PID.5)" [/mouse]="on|off toggle xterm mouse mode for this session" [/origin]="show/pin auto-update origin (gitea|auto|) — v0.7.4 single-source" [/phi-auto]="on|off|confirm|strict|status — runtime control for v0.7.3+v0.8.0 auto PHI detection" [/phi-sidecar]="start|stop|status|health|ensure — v0.8.2 Presidio NER sidecar lifecycle" [/headers-sync]="on|off|status|target |now — v0.8.6 sync work-box headers.log to the Mac memory daemon" ) # __larry_complete_slash — bound to TAB via `bind -x` (see _install_readline_tab). # # Reads READLINE_LINE (the current line buffer) and READLINE_POINT (cursor # position, 0-indexed). If the line starts with "/" and the cursor is on the # first word, we attempt prefix completion against _LARRY_SLASH_CMDS: # # * exactly one match → replace the line with the match (+ a trailing space # for commands that take an arg, e.g. "/site ") # * many matches → print them under the prompt and re-display the line # (readline will redraw automatically when we return) # * zero matches → silent no-op (readline does NOT insert a literal # tab, matching slash-aware completion in modern # shells) # # If the line does NOT start with "/" we insert a literal tab so the user's # muscle memory for whitespace alignment / indented heredocs still works. # # Refs: # bash(1) — READLINE Variables: $READLINE_LINE, $READLINE_POINT. # bash(1) — `bind -x '"\C-x": shell-function'` binds a key to a shell # function that may read/modify $READLINE_LINE in place. Available since # bash 4.0. __larry_complete_slash() { local line="$READLINE_LINE" local point="${READLINE_POINT:-0}" # @file completion (v0.6.7 item 12): if the cursor is on (or right after) an # @ token, complete file paths instead of slash commands. # Find the start of the @ token at the cursor. local pre="${line:0:point}" # Look for a trailing @ chunk in pre. local at_token="" case "$pre" in *@*) # Extract from the last @ in pre to the cursor. local tail_at="${pre##*@}" # The character BEFORE the @ matters: if it's a non-whitespace char # (e.g., bryan@example.com) we skip — that's an email, not a file ref. local before_at="${pre%@*}" local last_char="${before_at: -1}" if [ -z "$last_char" ] || [[ "$last_char" =~ [[:space:]] ]]; then # Eligible @-ref. The token candidate is everything after the @ up to # the cursor, with no embedded whitespace. case "$tail_at" in *[[:space:]]*) ;; # whitespace seen — cursor is past the token *) at_token="$tail_at" ;; esac fi ;; esac if [ -n "$at_token" ] || [ "$pre" = "${pre%@}@" ]; then # Note: empty at_token (just typed @) also enters this branch via the # second clause; in that case at_token="" and we list everything from CWD. __larry_complete_atfile "$at_token" return 0 fi # v0.7.0: HL7-aware tab completion. # Extract the trailing whitespace-delimited token of $pre and test for HL7 # shapes. If none match, fall through to slash-command / literal-tab logic. local hl7_token="" case "$pre" in ''|*[[:space:]]) hl7_token="" ;; *) hl7_token="${pre##*[[:space:]]}" ;; esac # Recognised HL7 shapes: # 1) ^[A-Z]{1,3}$ partial segment ID (e.g. M, MS, MSH, PI) # 2) ^[A-Z]{3}\.\d*$ field within segment (e.g. PID., PID.3, MSH.10) # 3) ^[A-Z]{3}\.\d+\.\d*$ component within field (e.g. PID.5., PID.5.1) if [ -n "$hl7_token" ] && [ -n "${_HL7_SCHEMA_LOADED:-}" ]; then case "$hl7_token" in [A-Z]|[A-Z][A-Z]|[A-Z][A-Z][A-Z]) __larry_complete_hl7_segment "$hl7_token" return 0 ;; [A-Z][A-Z][A-Z].*) # Split on dots to discriminate field vs. component. local _hl7_rest="${hl7_token#???.}" # drop "SEG." case "$_hl7_rest" in *.*) # Two dots in the token — component completion (SEG.N.M*). __larry_complete_hl7_component "$hl7_token" return 0 ;; ''|[0-9]*) # Field completion (SEG.N*). Allow empty (just "PID.") and digit-only. __larry_complete_hl7_field "$hl7_token" return 0 ;; esac ;; esac fi # Only complete when the buffer is a single token starting with '/'. # If there's whitespace before the cursor, we treat it as "user typing # arguments to a command", not "user wants to complete the command name". case "$line" in /*) # Has it already been word-split (a space anywhere in the line)? If yes, # fall through to literal-tab. Completion is for the command name only. case "$line" in *' '*) READLINE_LINE="${line:0:point}"$'\t'"${line:point}" READLINE_POINT=$((point + 1)) return 0 ;; esac ;; *) # Non-slash line — insert a literal tab at the cursor. READLINE_LINE="${line:0:point}"$'\t'"${line:point}" READLINE_POINT=$((point + 1)) return 0 ;; esac # Build the match list. Primary: prefix match. If exactly one prefix match, # complete it. If many, print them. If zero prefix matches AND the input is # at least 2 chars, try a subsequence fuzzy match as a polish; if that # yields exactly one, complete to it. local prefix="$line" local matches=() cmd for cmd in "${_LARRY_SLASH_CMDS[@]}"; do case "$cmd" in "$prefix"*) matches+=("$cmd") ;; esac done if [ "${#matches[@]}" -eq 0 ] && [ "${#prefix}" -ge 2 ]; then # Subsequence fuzzy: every char of $prefix (after the leading '/') must # appear in $cmd in order. Cheap, predictable, no scoring. local needle="${prefix#/}" for cmd in "${_LARRY_SLASH_CMDS[@]}"; do local hay="${cmd#/}" local i=0 nlen="${#needle}" ok=1 while [ "$i" -lt "$nlen" ]; do local ch="${needle:i:1}" case "$hay" in *"$ch"*) hay="${hay#*"$ch"}" ;; *) ok=0; break ;; esac i=$((i + 1)) done [ "$ok" -eq 1 ] && matches+=("$cmd") done fi if [ "${#matches[@]}" -eq 1 ]; then # Replace the buffer with the matched command. Append a space so the user # can immediately type the argument. (No-arg commands waste one keystroke # — acceptable.) READLINE_LINE="${matches[0]} " READLINE_POINT=${#READLINE_LINE} elif [ "${#matches[@]}" -gt 1 ]; then # Multiple matches (v0.6.7 polish): print each on its own line with the # one-line description from _LARRY_SLASH_CMDS_DESC. Readline redisplays # the prompt + current buffer on return. printf '\n' local m for m in "${matches[@]}"; do local desc="${_LARRY_SLASH_CMDS_DESC[$m]:-}" if [ -n "$desc" ]; then printf ' %s%-20s%s %s%s%s\n' "$C_CYAN" "$m" "$C_RESET" "$C_DIM" "$desc" "$C_RESET" else printf ' %s%s%s\n' "$C_CYAN" "$m" "$C_RESET" fi done # READLINE_LINE / READLINE_POINT stay as-is so the user sees their input. fi # Zero matches → silent no-op (the user's typo stays on screen so they can fix it). } # __larry_complete_hl7_segment PARTIAL # Complete an HL7 segment ID at the cursor. PARTIAL is 1..3 uppercase letters. # - Exactly one match → replace with the full segment ID (no trailing space # so the user can keep typing ".") # - Multiple matches → list them with descriptions # - Zero matches → if PARTIAL starts with Z, print a Z-segment hint; # else silent no-op __larry_complete_hl7_segment() { local partial="$1" local line="$READLINE_LINE" local point="${READLINE_POINT:-0}" local pre="${line:0:point}" local post="${line:point}" # Locate the start of the partial inside pre so we can splice the replacement. local pre_head="${pre%"$partial"}" local matches=() s while IFS= read -r s; do [ -n "$s" ] && case "$s" in "$partial"*) matches+=("$s") ;; esac done < <(hl7_segments) if [ "${#matches[@]}" -eq 1 ]; then READLINE_LINE="${pre_head}${matches[0]}${post}" READLINE_POINT=$((${#pre_head} + ${#matches[0]})) return 0 fi # If the partial is itself an exact segment ID AND has more prefix-matches, # treat the exact match as the chosen completion (add a dot so the user can # continue typing the field). Common case: "MSH" with MSA also in the schema. if [ "${#matches[@]}" -gt 1 ] && [ -n "$(hl7_seg_desc "$partial")" ]; then READLINE_LINE="${pre_head}${partial}.${post}" READLINE_POINT=$((${#pre_head} + ${#partial} + 1)) return 0 fi if [ "${#matches[@]}" -gt 1 ]; then printf '\n' local m desc for m in "${matches[@]}"; do desc=$(hl7_seg_desc "$m") printf ' %s%-6s%s %s%s%s\n' "$C_CYAN" "$m" "$C_RESET" "$C_DIM" "$desc" "$C_RESET" done return 0 fi # No matches. Hint for Z-segments (site-specific, not baked in). case "$partial" in Z*) printf '\n %s(Z-segments are site-specific; not in the built-in schema)%s\n' "$C_DIM" "$C_RESET" ;; esac return 0 } # __larry_complete_hl7_field TOKEN # Complete an HL7 field within a segment. TOKEN looks like: # PID. → list all 30 PID fields # PID.1 → if unique completes to "PID.1 "; if many (1, 10..19) lists them # PID.3 → unique, completes to "PID.3 " __larry_complete_hl7_field() { local token="$1" local line="$READLINE_LINE" local point="${READLINE_POINT:-0}" local pre="${line:0:point}" local post="${line:point}" local pre_head="${pre%"$token"}" local seg="${token%%.*}" local partial="${token#*.}" # may be empty # Unknown segment — nothing to do. [ -z "$(hl7_seg_desc "$seg")" ] && return 0 # Gather candidate field indices that match the partial prefix. local matches=() idx name line2 while IFS=$'\t' read -r idx name; do case "$idx" in "$partial"*) matches+=("$idx"$'\t'"$name") ;; esac done < <(hl7_fields_for "$seg") if [ "${#matches[@]}" -eq 1 ]; then # Single match: complete to "SEG.N " (trailing space). local pair="${matches[0]}" local i="${pair%%$'\t'*}" local replacement="${seg}.${i} " READLINE_LINE="${pre_head}${replacement}${post}" READLINE_POINT=$((${#pre_head} + ${#replacement})) return 0 fi # If the partial is itself a valid exact field index AND there are other # prefix-matches (e.g. PID.3 also prefix-matches PID.30), prefer the exact # match — the user typed the complete number deliberately. if [ "${#matches[@]}" -gt 1 ] && [ -n "$partial" ] && [ -n "$(hl7_field_name "${seg}.${partial}")" ]; then local replacement="${seg}.${partial} " READLINE_LINE="${pre_head}${replacement}${post}" READLINE_POINT=$((${#pre_head} + ${#replacement})) return 0 fi if [ "${#matches[@]}" -gt 1 ]; then printf '\n' local pair i n key label for pair in "${matches[@]}"; do i="${pair%%$'\t'*}" n="${pair#*$'\t'}" label="${seg}.${i}" printf ' %s%-12s%s %s%s%s\n' "$C_CYAN" "$label" "$C_RESET" "$C_DIM" "$n" "$C_RESET" done return 0 fi return 0 } # __larry_complete_hl7_component TOKEN # Complete an HL7 component within a field. TOKEN looks like: # PID.5. → list all PID.5 components (Family, Given, ...) # PID.5.1 → unique, completes to "PID.5.1 " __larry_complete_hl7_component() { local token="$1" local line="$READLINE_LINE" local point="${READLINE_POINT:-0}" local pre="${line:0:point}" local post="${line:point}" local pre_head="${pre%"$token"}" # Split SEG.N.M-partial. We accept SEG = 3 uppercase letters. local seg="${token%%.*}" local rest="${token#*.}" # N.M-partial local field="${rest%%.*}" local partial="${rest#*.}" # may be empty local key="${seg}.${field}" # Validate the field actually exists in the schema. [ -z "$(hl7_field_name "$key")" ] && return 0 local matches=() idx name while IFS=$'\t' read -r idx name; do case "$idx" in "$partial"*) matches+=("$idx"$'\t'"$name") ;; esac done < <(hl7_components_for "$key") if [ "${#matches[@]}" -eq 0 ]; then # Field has no component breakdown defined. Print a one-line note so the # user knows tab-complete didn't fail — the data just isn't there. printf '\n %s(no component breakdown for %s in built-in schema)%s\n' "$C_DIM" "$key" "$C_RESET" return 0 fi if [ "${#matches[@]}" -eq 1 ]; then local pair="${matches[0]}" local m="${pair%%$'\t'*}" local replacement="${key}.${m} " READLINE_LINE="${pre_head}${replacement}${post}" READLINE_POINT=$((${#pre_head} + ${#replacement})) return 0 fi printf '\n' local pair m n label for pair in "${matches[@]}"; do m="${pair%%$'\t'*}" n="${pair#*$'\t'}" label="${key}.${m}" printf ' %s%-14s%s %s%s%s\n' "$C_CYAN" "$label" "$C_RESET" "$C_DIM" "$n" "$C_RESET" done return 0 } # __larry_complete_atfile PARTIAL # Complete a file path for an @ reference. Uses fzf if on PATH for # an interactive picker; otherwise lists matches under the prompt and (if # exactly one) completes inline. __larry_complete_atfile() { local partial="$1" local line="$READLINE_LINE" local point="${READLINE_POINT:-0}" local pre="${line:0:point}" local post="${line:point}" # Find the @-anchor in pre so we can replace from there. local at_idx="${pre%@*}" local at_pos="${#at_idx}" # position of the '@' itself # Build candidate list. find rooted at CWD, depth 4, exclude dotdirs and # common heavy dirs. local candidates=() while IFS= read -r f; do [ -n "$f" ] && candidates+=("$f") done < <( find . -maxdepth 4 -type f \ \( -path '*/.git' -o -path '*/node_modules' -o -path '*/__pycache__' -o -path '*/.venv' \) -prune -o \ -type f -print 2>/dev/null \ | sed 's|^\./||' \ | ( if [ -n "$partial" ]; then # Case-insensitive substring filter on the partial. local lc; lc=$(printf '%s' "$partial" | tr '[:upper:]' '[:lower:]') awk -v p="$lc" 'BEGIN{IGNORECASE=1} index(tolower($0), p) > 0' 2>/dev/null \ || grep -i -F "$partial" else cat fi ) \ | head -200 ) if [ "${#candidates[@]}" -eq 0 ]; then # Nothing matched — silent no-op (user can keep typing). return 0 fi local chosen="" if [ "${#candidates[@]}" -eq 1 ]; then chosen="${candidates[0]}" elif command -v fzf >/dev/null 2>&1 && [ -t 0 ] && [ -t 1 ]; then # Interactive picker via fzf. chosen=$(printf '%s\n' "${candidates[@]}" | fzf --height=40% --reverse --query="$partial" 2>/dev/null || true) # Readline got blown away by fzf — force a redraw. printf '\n' else # Print the list, no inline completion. printf '\n' local c for c in "${candidates[@]}"; do printf ' %s@%s%s\n' "$C_CYAN" "$c" "$C_RESET" done return 0 fi if [ -n "$chosen" ]; then # Replace pre's @ with @ + space, then re-append post. READLINE_LINE="${pre:0:at_pos}@${chosen} ${post}" READLINE_POINT=$((at_pos + 1 + ${#chosen} + 1)) fi } # _install_readline_tab — wire TAB to the slash-completer for the lifetime # of the REPL. Safe to call multiple times (bind is idempotent for the same # key). No-op if `bind` isn't a builtin in this bash (e.g. non-interactive # subshells, sh-mode invocations). _install_readline_tab() { # `bind -x` is bash 4.0+. The `2>/dev/null` swallows the warning bash # emits on non-tty stdin ("bind: warning: line editing not enabled"). bind -x '"\t": __larry_complete_slash' 2>/dev/null || true } # v0.7.0: mouse support in the REPL. # # What this *does* enable: # - Bracketed-paste mode: terminal wraps pastes in \e[200~ ... \e[201~ so # multi-line pastes don't accidentally trigger early Enter. Most modern # terminals + readline (bind 'set enable-bracketed-paste on') do this # already; we set it explicitly to be safe. # - SGR mouse reporting (mode 1006): the terminal emits CSI ;x;yM / m # for clicks. Cooperating terminals (iTerm2, modern macOS Terminal, # xterm, kitty, alacritty) will forward these to the foreground process. # # What this *does not* attempt (yet): # - Click-to-position cursor in the readline input line. Reliable across # terminals would require: # (a) parsing the CSI escape sequence in real time, # (b) mapping (col,row) → buffer offset (which depends on the # prompt-line wrap, terminal width, and any preceding output), # (c) updating $READLINE_POINT from inside a `bind -x` handler bound # to ESC. # Bash readline lets you `bind -x '"\e[<": _handler'` but the handler # fires *per byte* (no buffering of the rest of the sequence) on most # bashes; the implementations that work require term-specific shims. # We document the limitation and ship the safer subset. # # Opt-in switch (v0.7.5 regression fix): mouse mode is OFF by default. Enable # explicitly with LARRY_MOUSE=1 in the environment or `--mouse` on the CLI, # or toggle at runtime with `/mouse on`. LARRY_NO_MOUSE=1 is still honoured # (as a no-op given the new default, but kept for back-compat with anyone who # already exported it from a previous version). # # Why off by default: when mouse tracking modes (?1000/?1002/?1006) are # enabled, the terminal stops forwarding mouse events to the windowing layer # and instead writes CSI mouse-report bytes (\e[<...M / \e[M...) into the # foreground app's stdin. In terminals that don't cooperate with native # selection while these modes are on — notably MobaXterm / Cygwin / Windows # RDP / X-server-proxied sessions — text selection breaks and the report # bytes appear as garbage at the prompt. The safe default is "off, opt-in". # # Refs: # - xterm Control Sequences (Ctlseqs.txt) — modes 1000/1003/1006/2004. # https://invisible-island.net/xterm/ctlseqs/ctlseqs.html # - readline 'set enable-bracketed-paste on' (~/.inputrc). # - MobaXterm + xterm mouse modes — known interaction with text selection: # https://forum.mobatek.net/ (search: "xterm mouse selection") _LARRY_MOUSE_ACTIVE=0 _install_mouse_mode() { # Back-compat kill switch — still a hard no. if [ "${LARRY_NO_MOUSE:-0}" = "1" ]; then _LARRY_MOUSE_ACTIVE=0 return 0 fi # v0.7.5: opt-in only. Skip silently unless the user asked for mouse mode. if [ "${LARRY_MOUSE:-0}" != "1" ]; then _LARRY_MOUSE_ACTIVE=0 return 0 fi # Only attempt if we have a TTY. [ -t 1 ] || return 0 # Bracketed paste (terminal side). Idempotent in any decent terminal. printf '\033[?2004h' 2>/dev/null || true # Readline-side bracketed paste (so readline strips the wrapper bytes and # treats the paste as one chunk rather than typed input). bind 'set enable-bracketed-paste on' 2>/dev/null || true # SGR-encoded mouse reporting (mode 1006). Use 1000 (X10 button events) as # the base; 1003 (any-event including motion) is intentionally NOT enabled # — it floods the input stream and can interfere with readline. printf '\033[?1000h\033[?1006h' 2>/dev/null || true _LARRY_MOUSE_ACTIVE=1 } _uninstall_mouse_mode() { # Always emit the disable sequences even if we don't think it was on — # cheap and prevents a borked terminal if our state tracking drifts (e.g. # if the REPL exits abnormally between an enable and a disable). [ -t 1 ] || return 0 # Disable SGR (1006), X10 button events (1000), motion variants (1002/1003 # — we never enable them, but reset defensively in case a prior shim did), # and bracketed paste (2004). Order: most-specific first. printf '\033[?1006l\033[?1003l\033[?1002l\033[?1000l\033[?2004l' 2>/dev/null || true _LARRY_MOUSE_ACTIVE=0 } # v0.8.6 (tsk-2026-05-27-023): on REPL exit, also flush headers.log to the Mac # if auto-sync is enabled. The lib gates on LARRY_HEADERS_SYNC=1 and is fully # graceful (no target / master closed → warn + return 0), so this can never # block or crash the exit path. Backgrounded with a short bound so a hung # transport can't wedge the shutdown. _larry_on_exit() { _uninstall_mouse_mode if [ -n "${LARRY_LIB_DIR:-}" ] && [ -x "$LARRY_LIB_DIR/headers-sync.sh" ]; then LARRY_HOME="$LARRY_HOME" "$LARRY_LIB_DIR/headers-sync.sh" sync >/dev/null 2>&1 || true fi } # Ensure mouse mode is disabled on REPL exit (Ctrl-C, /quit, EOF). Idempotent. trap '_larry_on_exit' EXIT INT TERM read_user_input() { # Returns user input via global LARRY_INPUT. # If first line is "<<", read until line "EOF" (heredoc-style). # # v0.6.7 additions: # - Prompt includes the model short name: you[sonnet-4.6]> # - Multi-line paste auto-detection: if the first read returns data AND # more is buffered within 50ms, slurp it as continuation. Also triggers # auto-heredoc if first line ends with backslash. # - History: persists across sessions via $HISTFILE (set in main_loop). # # Uses readline editing (-e) so backspace, arrow keys, and history work # correctly across terminals. LARRY_INPUT="" local first local short; short=$(model_short_name) if [ -t 0 ] && _readline_ok; then local prompt; prompt=$(printf '%syou[%s]>%s ' "$C_GREEN" "$short" "$C_RESET") # Clear the prompt the caller already printed, then re-emit via readline. printf '\r\033[K' _install_readline_tab IFS= read -e -r -p "$prompt" first || return 1 [ -n "$first" ] && history -s "$first" # Persist non-sensitive lines to HISTFILE. if [ -n "$first" ] && [ -n "${HISTFILE:-}" ]; then case "$first" in /login*|/ssh-pass*|/ssh-add*) ;; # never persist credential-bearing lines *) history -a 2>/dev/null || true ;; esac fi else IFS= read -r first || return 1 fi # v0.7.5: strip stray \r BEFORE any case-pattern dispatch. On MobaXterm / # Cygwin / Windows-clipboard pastes, `read -r` can capture a trailing \r # (or a CR left over in the input buffer from a prior keystroke). That # contamination caused the v0.7.3 work-box symptom where `/oauth-debug` # returned "unknown command" on the FIRST press and matched cleanly on the # SECOND — because the case pattern `/oauth-debug)` does not match # `/oauth-debug`. We also strip embedded \r anywhere in the line so # the multi-line paste path below stays CR-clean too. first="${first//$'\r'/}" # Auto-heredoc: trailing backslash means "I have more to type, please slurp # additional lines until I send a blank one". if [ -n "$first" ] && [ "${first: -1}" = "\\" ]; then LARRY_INPUT="${first%\\}"$'\n' local cont while IFS= read -r cont; do [ -z "$cont" ] && break [ "${cont: -1}" = "\\" ] && cont="${cont%\\}" LARRY_INPUT+="$cont"$'\n' || true done return 0 fi # Multi-line paste auto-detection: bash `read -e` returns ONE line at a time # but if a paste contains newlines, the rest sits in the input buffer. We # check non-blockingly for buffered chars within 50ms. if [ -t 0 ] && [ -n "$first" ]; then local extra="" # Read one char at a time, up to 50ms per char. Bail when no more input. while IFS= read -r -t 0.05 -N 1 ch 2>/dev/null; do extra+="$ch" # Cap at 64KB to avoid runaway buffer hangs. [ "${#extra}" -ge 65536 ] && break done if [ -n "$extra" ]; then LARRY_INPUT="$first"$'\n'"$extra" # Strip trailing newline if any. LARRY_INPUT="${LARRY_INPUT%$'\n'}" # v0.7.5: strip any CRs that came in via the buffered paste tail. LARRY_INPUT="${LARRY_INPUT//$'\r'/}" return 0 fi fi if [ "$first" = "<<" ]; then local line while IFS= read -r line; do # v0.7.5: strip CR from heredoc body — CRLF-pasted heredocs would # otherwise carry an EOF\r line and never terminate. line="${line//$'\r'/}" [ "$line" = "EOF" ] && break LARRY_INPUT+="$line"$'\n' done else LARRY_INPUT="$first" fi } # _readline_ok — true if `read -e` is supported by this bash and stdin is a tty. # Cygwin/MobaXterm bash usually supports it; some stripped-down environments # (busybox, dash) don't. _readline_ok() { local _x ( IFS= read -e -r -t 0 _x /dev/null } main_loop() { local system_prompt; system_prompt=$(build_system_prompt) # ── Persistent command history (v0.6.7) ──────────────────────────────────── # HISTFILE persists across `larry` invocations; HISTSIZE caps in-memory size. # /login and /ssh-pass entries are filtered out in read_user_input before # `history -a` runs. export HISTFILE="${HISTFILE:-$LARRY_HOME/.history}" export HISTSIZE=1000 export HISTFILESIZE=1000 # Avoid duplicate consecutive entries. export HISTCONTROL="ignoredups" # Load existing history. -r reads HISTFILE into memory; safe if file missing. history -r 2>/dev/null || true if [ -n "$ARG_DIR" ]; then if [ -d "$ARG_DIR" ]; then cd "$ARG_DIR" larry_say "Working dir: $(pwd)" else warn "arg is not a directory, ignoring: $ARG_DIR" fi fi # ── Startup banner ───────────────────────────────────────────────────────── # Always print the version; print a prominent "JUST UPDATED" badge when the # current launch came from a self-update so Bryan can verify the chain fired. if [ -n "${LARRY_UPDATE_NOTICE:-}" ]; then echo "" printf '%s%s═══════════════════════════════════════════════════════════════%s\n' "$C_GREEN" "$C_BOLD" "$C_RESET" printf '%s%s ✓ LARRY UPDATED%s\n' "$C_GREEN" "$C_BOLD" "$C_RESET" printf '%s%s %s%s\n' "$C_GREEN" "$C_BOLD" "$LARRY_UPDATE_NOTICE" "$C_RESET" printf '%s%s═══════════════════════════════════════════════════════════════%s\n' "$C_GREEN" "$C_BOLD" "$C_RESET" echo "" fi # ── Terminal fixups ──────────────────────────────────────────────────────── # Some terminals (notably MobaXterm/Cygwin and certain SSH setups) ship with # stty erase set to ^H while the keyboard actually sends ^? (DEL) for # backspace, so backspace gets passed through to read() as a literal char. # Force erase=^? if we have a tty; harmless if already correct. if [ -t 0 ] && command -v stty >/dev/null 2>&1; then stty erase '^?' 2>/dev/null || true fi # v0.7.0: enable mouse mode (bracketed-paste + SGR mouse reporting). The # trap installed in _install_mouse_mode tears this down on exit. _install_mouse_mode larry_say "${C_BOLD}Larry-Anywhere v$LARRY_VERSION${C_RESET} ready. Model: $LARRY_MODEL." larry_say "Type your message and press Enter. Use '<<' alone on a line to start multi-line (end with 'EOF'). /help for commands." # v0.8.2: best-effort PHI Presidio sidecar start. Backgrounded so larry # is interactive immediately; tier-5 silently no-ops until the sidecar # is healthy (which takes ~9s for model load). Skip entirely if # LARRY_PHI_AUTOSTART=0 or if the sidecar launcher isn't present. if [ "${LARRY_PHI_AUTOSTART:-1}" = "1" ] \ && [ -x "$LARRY_LIB_DIR/phi-sidecar.sh" ]; then ( "$LARRY_LIB_DIR/phi-sidecar.sh" ensure >/dev/null 2>&1 || true ) & disown 2>/dev/null || true fi echo "" while true; do local _short; _short=$(model_short_name) # v0.7.1: status line moved from above-prompt to between-turn # (see render_status_line and the post-input call below). printf '%syou[%s]>%s ' "$C_GREEN" "$_short" "$C_RESET" if ! read_user_input; then echo ""; break fi local input="$LARRY_INPUT" [ -z "$input" ] && continue # v0.8.3 — rtrim the dispatch key before the exact-match `case` below. # Tab completion (__larry_complete_slash) intentionally appends a trailing # space after a unique match, but bash `case` globs are literal: "/quit " # never matches the "/quit)" arm and falls through to "unknown command". # Stripping trailing whitespace here tolerates the completer's space, a # user-typed trailing space, and any CR remnant in one defensive line. # Trailing-only: interior "/load FILE" spacing is preserved, so argument # parsing (${input#/load } etc.) is unaffected. Pure parameter expansion, # no subshell. See lib/cygwin-safe.sh rtrim() for the shared helper. input="${input%"${input##*[![:space:]]}"}" case "$input" in /quit|/exit|/q) larry_say "bye."; break ;; /help) print_help; continue ;; /clear) printf '\033[2J\033[H'; continue ;; /copy) if [ -z "$_LARRY_LAST_ASSISTANT_TEXT" ]; then err "no assistant response yet to copy" continue fi local clip; clip=$(detect_clipboard) if [ -z "$clip" ]; then warn "no clipboard tool detected — printing instead" printf '%s\n' "$_LARRY_LAST_ASSISTANT_TEXT" else printf '%s' "$_LARRY_LAST_ASSISTANT_TEXT" | eval "$clip" \ && larry_say "copied last response ($(printf '%s' "$_LARRY_LAST_ASSISTANT_TEXT" | wc -c | tr -d ' ') bytes) via $clip" fi continue ;; /cost) print_cost_summary; continue ;; /status) # v0.6.9: force-render the persistent status line on demand, # e.g. when it has scrolled off-screen mid-conversation. # v0.8.7: ALWAYS render on explicit /status, even before the # first turn. The turn-0 suppression in render_status_line is # only for the AUTOMATIC between-turn render (nothing to report # yet); an explicit /status is a deliberate request, so honor # it and show the context/placeholder segments. This also lets # Bryan verify the line renders on MobaXterm without first # completing a (possibly rate-limited) turn. if [ "${LARRY_NO_STATUS:-0}" = "1" ]; then larry_say "status line disabled (LARRY_NO_STATUS=1)" else # Lazy-init the context window so the ctx segment shows the # right denominator even with zero turns / no API call yet. [ -z "$STATUS_ctx_window" ] && STATUS_ctx_window=$(_model_context_window "$LARRY_MODEL") # Render directly via the auth-mode helper to bypass the # turn-0 gate (which only applies to the automatic call). case "$LARRY_AUTH_MODE" in oauth) _render_status_line_oauth ;; apikey) _render_status_line_apikey ;; *) _render_status_line_apikey ;; esac fi continue ;; # v0.7.0: HL7 schema lookup commands. /hl7|/hl7\ *) local _arg; _arg=$(_slash_args "/hl7" "$input") if [ -z "${_HL7_SCHEMA_LOADED:-}" ]; then err "HL7 schema not loaded (lib/hl7-schema.sh missing or bash <4)" continue fi if [ -z "$_arg" ]; then printf '%susage:%s /hl7 e.g. /hl7 PID\n' "$C_YELLOW" "$C_RESET" printf '\n%sknown segments:%s\n' "$C_BOLD" "$C_RESET" local _s _d while IFS= read -r _s; do _d=$(hl7_seg_desc "$_s") printf ' %s%-6s%s %s%s%s\n' "$C_CYAN" "$_s" "$C_RESET" "$C_DIM" "$_d" "$C_RESET" done < <(hl7_segments) continue fi # Normalise to upper, drop a trailing dot if user typed "PID." _arg=$(printf '%s' "$_arg" | tr '[:lower:]' '[:upper:]') _arg="${_arg%.}" if [ -z "$(hl7_seg_desc "$_arg")" ]; then case "$_arg" in Z*) err "$_arg looks like a site-specific Z-segment; not in the built-in schema" ;; *) err "unknown segment: $_arg (try /hl7 with no args to list)" ;; esac continue fi printf '%s%s%s %s%s%s\n' "$C_BOLD$C_CYAN" "$_arg" "$C_RESET" "$C_DIM" "$(hl7_seg_desc "$_arg")" "$C_RESET" local _i _n _label while IFS=$'\t' read -r _i _n; do _label="${_arg}.${_i}" printf ' %s%-12s%s %s%s%s\n' "$C_CYAN" "$_label" "$C_RESET" "$C_DIM" "$_n" "$C_RESET" done < <(hl7_fields_for "$_arg") continue ;; /hl7-fields|/hl7-fields\ *) local _arg; _arg=$(_slash_args "/hl7-fields" "$input") if [ -z "${_HL7_SCHEMA_LOADED:-}" ]; then err "HL7 schema not loaded (lib/hl7-schema.sh missing or bash <4)" continue fi if [ -z "$_arg" ]; then err "usage: /hl7-fields e.g. /hl7-fields PID.5" continue fi _arg=$(printf '%s' "$_arg" | tr '[:lower:]' '[:upper:]') _arg="${_arg%.}" case "$_arg" in [A-Z][A-Z][A-Z].[0-9]*) : ;; *) err "expected form SEG.N (3 uppercase letters, dot, number)"; continue ;; esac local _fname; _fname=$(hl7_field_name "$_arg") if [ -z "$_fname" ]; then err "unknown field: $_arg" continue fi printf '%s%s%s %s%s%s\n' "$C_BOLD$C_CYAN" "$_arg" "$C_RESET" "$C_DIM" "$_fname" "$C_RESET" local _has=0 _m _n _label while IFS=$'\t' read -r _m _n; do _has=1 _label="${_arg}.${_m}" printf ' %s%-14s%s %s%s%s\n' "$C_CYAN" "$_label" "$C_RESET" "$C_DIM" "$_n" "$C_RESET" done < <(hl7_components_for "$_arg") if [ "$_has" -eq 0 ]; then printf ' %s(no component breakdown for %s in built-in schema)%s\n' "$C_DIM" "$_arg" "$C_RESET" fi continue ;; # v0.7.0: mouse mode toggle (xterm SGR mouse + bracketed paste). /origin|/origin\ *) # v0.7.4 single-source: show/pin the auto-update origin. # Pin is persisted to $LARRY_HOME/.origin and re-read on the # NEXT launch (the current run keeps using the resolved # $LARRY_BASE_URL). The GitHub fallback was dropped — the # "github" keyword is no longer valid (private mirror). local _arg; _arg=$(_slash_args "/origin" "$input") case "${_arg:-status}" in status) printf '%sauto-update origin:%s\n' "$C_BOLD" "$C_RESET" printf ' %seffective:%s %s %s(%s)%s\n' \ "$C_BOLD" "$C_RESET" "$LARRY_BASE_URL" \ "$C_DIM" "$(_origin_label "$LARRY_BASE_URL")" "$C_RESET" if [ -n "$_LARRY_LAST_ORIGIN_URL" ]; then printf ' %slast served by:%s %s\n' \ "$C_BOLD" "$C_RESET" "$(_origin_label "$_LARRY_LAST_ORIGIN_URL")" else printf ' %slast served by:%s %s(self-update did not run this session)%s\n' \ "$C_BOLD" "$C_RESET" "$C_DIM" "$C_RESET" fi if [ -r "$LARRY_HOME/.origin" ]; then printf ' %spin file :%s %s/.origin → %s\n' \ "$C_BOLD" "$C_RESET" "$LARRY_HOME" \ "$(tr -d '[:space:]' < "$LARRY_HOME/.origin" 2>/dev/null)" else printf ' %spin file :%s %s(none — using default)%s\n' \ "$C_BOLD" "$C_RESET" "$C_DIM" "$C_RESET" fi ;; gitea) printf 'gitea\n' > "$LARRY_HOME/.origin" 2>/dev/null \ && larry_say "pinned origin: gitea ($LARRY_ORIGIN_DEFAULT_GITEA). Restart larry to apply." \ || err "could not write $LARRY_HOME/.origin" ;; github) err "/origin github is no longer supported in v0.7.4 — the GitHub mirror is private. Use /origin gitea or /origin ." ;; auto) if [ -f "$LARRY_HOME/.origin" ]; then rm -f "$LARRY_HOME/.origin" \ && larry_say "pin cleared. Restart larry to revert to the default (gitea)." \ || err "could not remove $LARRY_HOME/.origin" else larry_say "no pin in place; already on the default." fi ;; https://*) printf '%s\n' "$_arg" > "$LARRY_HOME/.origin" 2>/dev/null \ && larry_say "pinned origin: $_arg. Restart larry to apply." \ || err "could not write $LARRY_HOME/.origin" ;; *) err "usage: /origin [gitea|auto|] (no arg → status)" ;; esac continue ;; # v0.7.3 — runtime control for automatic PHI detection. /phi-auto|/phi-auto\ *) local _arg; _arg=$(_slash_args "/phi-auto" "$input") case "${_arg:-status}" in on) AUTO_PHI_MODE="on" larry_say "auto-PHI: on (default — Tier 1-4 detections tokenized; err on caution)" ;; off) AUTO_PHI_MODE="off" larry_say "auto-PHI: off (explicit markers @@VALUE / {{phi:VALUE}} still work)" ;; confirm) AUTO_PHI_MODE="confirm" larry_say "auto-PHI: confirm (Tier 3-4 matches prompt Y/n; Tier 1-2 still always tokenize)" ;; strict) # v0.8.0-c: fail-closed mode. HL7-shaped content with broken # sanitizer aborts the turn instead of passing through. AUTO_PHI_MODE="strict" larry_say "auto-PHI: strict (fail-closed — HL7-shaped content aborts turn if hl7-sanitize.sh missing or returns empty; tokenize-value failure aborts turn)" ;; status) larry_say "auto-PHI: $AUTO_PHI_MODE (this session tokenized: $AUTO_PHI_SESSION_COUNT) log: $AUTO_PHI_LOG" ;; *) err "usage: /phi-auto on|off|confirm|strict (no arg → status)" ;; esac continue ;; # v0.8.2: PHI Presidio sidecar lifecycle. /phi-sidecar|/phi-sidecar\ *) local _arg; _arg=$(_slash_args "/phi-sidecar" "$input") if [ ! -x "$LARRY_LIB_DIR/phi-sidecar.sh" ]; then err "phi-sidecar.sh not installed (lib/phi-sidecar.sh missing or non-executable)" continue fi case "${_arg:-status}" in start|stop|status|health|ensure) "$LARRY_LIB_DIR/phi-sidecar.sh" "$_arg" ;; *) err "usage: /phi-sidecar start|stop|status|health|ensure (no arg → status)" ;; esac continue ;; # v0.8.6 (tsk-2026-05-27-023): work-box → Mac headers.log sync. Pushes the # rate-limit-header capture to a daemon-watched path on Bryan's Mac so the # memory layer ingests work-box anthropic-ratelimit-* headers. Delegates # entirely to lib/headers-sync.sh (transport rides the existing # ControlMaster; offset-tracked + idempotent; graceful on any failure). /headers-sync|/headers-sync\ *) local _arg; _arg=$(_slash_args "/headers-sync" "$input") if [ ! -x "$LARRY_LIB_DIR/headers-sync.sh" ]; then err "headers-sync.sh not installed (lib/headers-sync.sh missing or non-executable)" continue fi # Split the first word as subcommand; remainder as its argument # (e.g. `target bj-mac`). Pass through to the lib as argv. local _sub _rest _sub="${_arg%% *}" _rest="${_arg#"$_sub"}"; _rest="${_rest# }" case "${_sub:-status}" in on|off|status|now) LARRY_HOME="$LARRY_HOME" "$LARRY_LIB_DIR/headers-sync.sh" "${_sub:-status}" ;; target) LARRY_HOME="$LARRY_HOME" "$LARRY_LIB_DIR/headers-sync.sh" target "$_rest" ;; *) err "usage: /headers-sync on|off|status|target |now (no arg → status)" ;; esac continue ;; /mouse|/mouse\ *) local _arg; _arg=$(_slash_args "/mouse" "$input") case "${_arg:-status}" in on) # v0.7.5: opt-in. /mouse on must set BOTH knobs because # the v0.7.5 default is off-unless-LARRY_MOUSE=1. LARRY_NO_MOUSE=0 LARRY_MOUSE=1 _install_mouse_mode if [ "$_LARRY_MOUSE_ACTIVE" = "1" ]; then larry_say "mouse mode ON (bracketed-paste + SGR mouse reporting; click-to-position is terminal-dependent)" larry_say "note: in MobaXterm / Cygwin / RDP terminals, mouse-mode-on disables native text selection. /mouse off to restore." else warn "mouse mode requested but no TTY detected" fi ;; off) _uninstall_mouse_mode LARRY_MOUSE=0 LARRY_NO_MOUSE=1 larry_say "mouse mode OFF (native terminal text selection restored)" ;; status) if [ "${LARRY_NO_MOUSE:-0}" = "1" ]; then larry_say "mouse mode: disabled (LARRY_NO_MOUSE=1). /mouse on to enable." elif [ "$_LARRY_MOUSE_ACTIVE" = "1" ]; then larry_say "mouse mode: active (bracketed-paste + SGR reporting)" else larry_say "mouse mode: inactive (default since v0.7.5). /mouse on or LARRY_MOUSE=1 to enable." fi ;; *) err "usage: /mouse on|off (no arg → status)" ;; esac continue ;; /show-last-tool) if [ -z "$_LARRY_LAST_TOOL_NAME" ]; then err "no tool calls yet this session" else printf '%s%s▶ %s%s\n' "$C_CYAN" "$C_BOLD" "$_LARRY_LAST_TOOL_NAME" "$C_RESET" printf '%sinput:%s\n' "$C_BOLD" "$C_RESET" printf '%s' "$_LARRY_LAST_TOOL_INPUT" | jq . 2>/dev/null || printf '%s\n' "$_LARRY_LAST_TOOL_INPUT" printf '\n%sresult:%s\n' "$C_GREEN$C_BOLD" "$C_RESET" printf '%s\n' "$_LARRY_LAST_TOOL_RESULT" fi continue ;; /sys) printf '%s\n' "$system_prompt"; continue ;; /pwd) echo "$(pwd)"; continue ;; /env) printf '%s\n' "$CLOVERLEAF_CTX"; continue ;; /auth) printf '%sauth rail: %s (primary: %s)%s\n' "$C_BOLD" "$LARRY_AUTH_MODE" "$LARRY_PRIMARY_AUTH_MODE" "$C_RESET" show_api_key_status if [ "$LARRY_AUTH_MODE" = "oauth" ] && [ -x "$LARRY_LIB_DIR/oauth.sh" ]; then "$LARRY_LIB_DIR/oauth.sh" status fi continue ;; # /set-api-key [--clear|--status] — the secure per-client key provisioning # entry point. Default action prompts (silent, validated, 0600, CR-safe). /set-api-key*) local _ska; _ska=$(_slash_args "/set-api-key" "$input") _ska="${_ska//$'\r'/}"; _ska="${_ska%"${_ska##*[![:space:]]}"}" case "$_ska" in --clear) clear_api_key ;; --status|"") if [ "$_ska" = "--status" ]; then show_api_key_status else set_api_key; fi ;; --no-validate) set_api_key --no-validate ;; *) err "usage: /set-api-key [--clear|--status|--no-validate]" ;; esac continue ;; /login) if [ -x "$LARRY_LIB_DIR/oauth.sh" ]; then warn "OAuth is opt-in and risks your Max account (Anthropic blocks Claude-Code impersonation). API key is the default rail — prefer /set-api-key." "$LARRY_LIB_DIR/oauth.sh" login && LARRY_AUTH_MODE="oauth" && larry_say "switched to OAuth subscription auth (opt-in)" else err "oauth.sh not installed"; fi; continue ;; /logout) if [ -x "$LARRY_LIB_DIR/oauth.sh" ]; then "$LARRY_LIB_DIR/oauth.sh" logout; fi # Revert to the default API-key rail if a key is available. if [ -n "${ANTHROPIC_API_KEY:-}" ] || [ -f "$LARRY_API_KEY_FILE" ]; then _load_api_key_into_env; LARRY_AUTH_MODE="apikey"; larry_say "reverted to the API-key rail" else LARRY_AUTH_MODE=""; warn "no API key set — run /set-api-key" fi continue ;; # /auth-debug — masked auth diagnostic across BOTH rails. NEVER prints a # full key or token. The API-key portion shows sk-ant-api03-XXXX…last4. /auth-debug|/api-debug) printf '%s=== auth diagnostic (secrets masked) ===%s\n' "$C_BOLD" "$C_RESET" printf ' primary auth mode: %s\n' "$LARRY_PRIMARY_AUTH_MODE" printf ' active auth mode: %s\n' "$LARRY_AUTH_MODE" show_api_key_status if [ -f "$LARRY_API_KEY_FILE" ]; then local _akmode; _akmode=$(stat -f '%Lp' "$LARRY_API_KEY_FILE" 2>/dev/null || stat -c '%a' "$LARRY_API_KEY_FILE" 2>/dev/null || echo '?') printf ' api-key file: %s (present, mode %s)\n' "$LARRY_API_KEY_FILE" "$_akmode" else printf ' api-key file: %s (absent)\n' "$LARRY_API_KEY_FILE" fi if [ -x "$LARRY_LIB_DIR/oauth.sh" ] && [ -f "$LARRY_HOME/.oauth.json" ]; then printf '\n [opt-in OAuth state — tokens truncated by oauth.sh]\n' "$LARRY_LIB_DIR/oauth.sh" debug 2>&1 | sed 's/^/ /' else printf ' oauth: (no .oauth.json; OAuth is opt-in/off)\n' fi printf '%s=== end auth diagnostic ===%s\n' "$C_BOLD" "$C_RESET" continue ;; /oauth-debug) # Retained for muscle memory; routes to the masked /auth-debug. if [ -x "$LARRY_LIB_DIR/oauth.sh" ]; then "$LARRY_LIB_DIR/oauth.sh" debug else err "oauth.sh not installed at $LARRY_LIB_DIR/oauth.sh" fi continue ;; /lesson\ *) local text="${input#/lesson }" [ -n "$text" ] && tool_lesson_record "$text" "" "${HCISITE:-}" "info" || err "usage: /lesson " continue ;; /lessons) [ -x "$LARRY_LIB_DIR/lessons.sh" ] && "$LARRY_LIB_DIR/lessons.sh" list || err "lessons.sh not installed" continue ;; /export) [ -x "$LARRY_LIB_DIR/lessons.sh" ] && "$LARRY_LIB_DIR/lessons.sh" export || err "lessons.sh not installed" continue ;; /phi\ *) local val="${input#/phi }" if [ -x "$LARRY_LIB_DIR/hl7-sanitize.sh" ]; then local token; token=$("$LARRY_LIB_DIR/hl7-sanitize.sh" tokenize-value "$val" 2>/dev/null) [ -n "$token" ] && printf '%sphi>%s %s → %s (use this in your next prompt)\n' "$C_YELLOW" "$C_RESET" "$val" "$token" || err "phi tokenization failed" else err "hl7-sanitize.sh not installed"; fi continue ;; /unmask\ *) local tok="${input#/unmask }" if [ -x "$LARRY_LIB_DIR/hl7-sanitize.sh" ]; then local val; val=$("$LARRY_LIB_DIR/hl7-sanitize.sh" detokenize-value "$tok" 2>/dev/null) [ -n "$val" ] && printf '%sunmask>%s %s → %s (local only; never sent to API)\n' "$C_YELLOW" "$C_RESET" "$tok" "$val" || err "no such token: $tok" else err "hl7-sanitize.sh not installed"; fi continue ;; /tokens) [ -x "$LARRY_LIB_DIR/hl7-sanitize.sh" ] && "$LARRY_LIB_DIR/hl7-sanitize.sh" show-table \ || err "hl7-sanitize.sh not installed" continue ;; # ── SSH ControlMaster commands (password never visible to Larry-the-LLM) ── # Patterns use /foo* (matches both "/foo" alone and "/foo args") for # robustness across bash versions. Body strips the prefix and validates. /ssh-hosts*|/ssh-list*) _run_ssh_helper hosts continue ;; /ssh-add*) local rest; rest=$(_slash_args "/ssh-add" "$input") if [ -z "$rest" ]; then err "usage: /ssh-add "; continue fi # shellcheck disable=SC2086 _run_ssh_helper add $rest continue ;; /ssh-remove*|/ssh-rm*) local rest; rest=$(_slash_args "/ssh-remove" "$input") [ -z "$rest" ] && rest=$(_slash_args "/ssh-rm" "$input") if [ -z "$rest" ]; then err "usage: /ssh-remove "; continue; fi _run_ssh_helper remove "$rest" continue ;; /ssh-pass*) local rest; rest=$(_slash_args "/ssh-pass" "$input") if [ -z "$rest" ]; then err "usage: /ssh-pass "; continue; fi _run_ssh_helper pass "$rest" continue ;; /ssh-set-hciroot*) # v0.8.15: pin/persist HCIROOT for an alias so remote # enumeration/exec exports it explicitly and SKIPS the login # profile (for sudo-gated/non-interactive hosts, e.g. qa). local rest; rest=$(_slash_args "/ssh-set-hciroot" "$input") if [ -z "$rest" ]; then err "usage: /ssh-set-hciroot (empty path clears the pin)"; continue fi # v0.8.16: set-u-safe split. A single-line `local a=… b="…$a…"` # references $_sh_alias before it is bound within the SAME `local` # statement, which aborts under `set -u` on bash 3.2/Cygwin # (MobaXterm) AND modern bash. Declare first, assign sequentially. local _sh_alias _sh_path _sh_alias="${rest%% *}" _sh_path="${rest#"$_sh_alias"}"; _sh_path="${_sh_path# }" if [ -z "$_sh_alias" ]; then err "usage: /ssh-set-hciroot "; continue fi # _sh_path may legitimately be empty (clear the pin). _run_ssh_helper set-hciroot "$_sh_alias" "$_sh_path" continue ;; /ssh-set-direct*) # v0.8.17: toggle DIRECT (no-multiplex) mode for an alias. # When on, ALL remote ops bypass the ControlMaster and run a # fresh per-command sshpass connection (for hosts that reject # SSH session multiplexing, e.g. qa → shdclvf01q). local rest; rest=$(_slash_args "/ssh-set-direct" "$input") if [ -z "$rest" ]; then err "usage: /ssh-set-direct on|off"; continue fi # set-u-safe split (same idiom as /ssh-set-hciroot): declare # first, assign the alias, THEN reference it. A single-line # `local a=… b="…$a…"` aborts under set -u (bash 3.2/Cygwin AND # modern bash) — the v0.8.16 bug class. local _sd_alias _sd_mode _sd_alias="${rest%% *}" _sd_mode="${rest#"$_sd_alias"}"; _sd_mode="${_sd_mode# }" if [ -z "$_sd_alias" ]; then err "usage: /ssh-set-direct on|off"; continue fi # _sd_mode empty → treated as "off" (clear) by the helper. _run_ssh_helper set-direct "$_sd_alias" "$_sd_mode" continue ;; /ssh-setup*) local rest; rest=$(_slash_args "/ssh-setup" "$input") if [ -z "$rest" ]; then err "usage: /ssh-setup "; continue; fi _run_ssh_helper setup "$rest" continue ;; /ssh-close*) local rest; rest=$(_slash_args "/ssh-close" "$input") if [ -z "$rest" ]; then err "usage: /ssh-close "; continue; fi _run_ssh_helper close "$rest" continue ;; /ssh-status*) local rest; rest=$(_slash_args "/ssh-status" "$input") if [ -n "$rest" ]; then _run_ssh_helper status "$rest"; else _run_ssh_helper status; fi continue ;; /ssh*) local rest; rest=$(_slash_args "/ssh" "$input") if [ -z "$rest" ]; then err "usage: /ssh "; continue; fi # v0.8.16: same set-u-safe split as /ssh-set-hciroot above — # $alias was referenced before binding in a single-line `local`. local alias rcmd alias="${rest%% *}" rcmd="${rest#"$alias"}"; rcmd="${rcmd# }" if [ -z "$alias" ] || [ -z "$rcmd" ]; then err "usage: /ssh "; continue fi _run_ssh_helper exec "$alias" "$rcmd" continue ;; /paths|/paths\ *) # v0.8.19: deterministic route-chain tracer (muscle-memory entry). # /paths [site] [--up|--down] [--site-only] [--all] [--format v1|table|tsv|jsonl|nodes] # /paths / ... (v1 node form — output feeds back in) # /paths --all [site] [--site-only] # Default format is v1 (the ground-truth chain form), pipe-first. local _pa; _pa=$(_slash_args "/paths" "$input") local _p_thread="" _p_site="" _p_dir="full" _p_all=0 _p_siteonly=0 _p_fmt="v1" _ptok _pexpect="" for _ptok in $_pa; do if [ "$_pexpect" = "format" ]; then _p_fmt="$_ptok"; _pexpect=""; continue; fi case "$_ptok" in --up|--upstream) _p_dir="up" ;; --down|--downstream) _p_dir="down" ;; --all) _p_all=1 ;; --site-only) _p_siteonly=1 ;; --format) _pexpect="format" ;; --format=*) _p_fmt="${_ptok#--format=}" ;; --*) err "/paths: unknown flag $_ptok"; continue 2 ;; *) if [ -z "$_p_thread" ] && [ "$_p_all" = "0" ]; then _p_thread="$_ptok" elif [ -z "$_p_site" ]; then _p_site="$_ptok" fi ;; esac done # default site to the current $HCISITE when a thread is given without one if [ "$_p_all" = "0" ] && [ -z "$_p_thread" ]; then err "usage: /paths [site] | / [--up|--down|--site-only|--all|--format v1|table|tsv|jsonl|nodes]" continue fi if [ "$_p_all" = "0" ] && [ -z "$_p_site" ] && [ -n "${HCISITE:-}" ]; then _p_site="$HCISITE" fi tool_nc_paths "" "$_p_thread" "$_p_site" "$_p_dir" "$_p_all" "$_p_siteonly" "$_p_fmt" "" continue ;; /redetect) detect_cloverleaf_env system_prompt=$(build_system_prompt) larry_say "re-detected. /env to view." continue ;; /sites*) # v0.8.13: both-mode site listing. `/sites` → LOCAL; `/sites ` # → REMOTE discover over the open ControlMaster. # v0.8.15: optional `--hciroot ` pass-through. In REMOTE mode # it PINS that HCIROOT for the alias (persisted) before enumerating, # so the remote walk exports HCIROOT explicitly and skips the # sudo-gated login profile. In LOCAL mode it overrides the scan root. local _site_args; _site_args=$(_slash_args "/sites" "$input") local _site_alias="" _site_hciroot="" _tok _expect="" for _tok in $_site_args; do if [ "$_expect" = "hciroot" ]; then _site_hciroot="$_tok"; _expect=""; continue; fi case "$_tok" in --hciroot) _expect="hciroot" ;; --hciroot=*) _site_hciroot="${_tok#--hciroot=}" ;; *) [ -z "$_site_alias" ] && _site_alias="$_tok" ;; esac done if [ -n "$_site_alias" ] && [ -n "$_site_hciroot" ]; then # REMOTE + explicit hciroot → persist the pin, then enumerate. _run_ssh_helper set-hciroot "$_site_alias" "$_site_hciroot" tool_list_sites "$_site_alias" "" else # REMOTE (pin/login-shell resolves HCIROOT) or LOCAL (hciroot override). tool_list_sites "${_site_alias:-}" "${_site_hciroot:-}" fi continue ;; /site\ *) HCISITE="${input#/site }"; HCISITEDIR="$HCIROOT/$HCISITE" export HCISITE HCISITEDIR detect_cloverleaf_env system_prompt=$(build_system_prompt) larry_say "HCISITE -> $HCISITE ($HCISITEDIR)"; continue ;; /reset) printf '[]' > "$MESSAGES_FILE"; larry_say "history cleared."; continue ;; /model\ *) LARRY_MODEL="${input#/model }"; larry_say "model -> $LARRY_MODEL"; continue ;; /cd\ *) local target="${input#/cd }" if cd "$target" 2>/dev/null; then larry_say "cd -> $(pwd)"; else err "no such directory: $target"; fi continue ;; /load\ *) local f="${input#/load }" if [ ! -f "$f" ]; then err "no such file: $f"; continue; fi input="$(cat "$f")" # v0.8.0-b: pre-route HL7-shaped /load content through # hl7-sanitize.sh BEFORE it enters the user_input auto-PHI # pipeline. The user_input scan (per-word classifier) is # weaker than hl7-sanitize.sh's segment-aware field tokenizer # for raw HL7 dumps. Closes V3 from Vera's audit. # # LARRY_AUTO_PHI semantics: # off — bypass entirely (operator opted out) # strict — abort /load if hl7-sanitize.sh missing OR returns empty # on/default/confirm — best-effort; warn-and-continue on sanitize failure if [ "$AUTO_PHI_MODE" != "off" ] && _auto_phi_looks_like_hl7 "$input"; then local _ld_sanitize="$LARRY_LIB_DIR/hl7-sanitize.sh" if [ ! -x "$_ld_sanitize" ]; then if [ "$AUTO_PHI_MODE" = "strict" ]; then err "/load aborted: HL7-shaped content but hl7-sanitize.sh unavailable (LARRY_AUTO_PHI=strict)" continue else warn "/load: HL7-shaped content but hl7-sanitize.sh unavailable — content passed through best-effort user_input scan only" fi else local _ld_tmp _ld_sanitized _ld_before _ld_after _ld_new _ld_tmp=$(mktemp) printf '%s' "$input" > "$_ld_tmp" _ld_before=$(bash "$_ld_sanitize" count 2>/dev/null || echo 0) _ld_sanitized=$(bash "$_ld_sanitize" "$_ld_tmp" 2>/dev/null) rm -f "$_ld_tmp" if [ -z "$_ld_sanitized" ]; then if [ "$AUTO_PHI_MODE" = "strict" ]; then err "/load aborted: hl7-sanitize.sh returned empty on HL7-shaped content (LARRY_AUTO_PHI=strict)" continue else warn "/load: hl7-sanitize.sh returned empty — content passed through best-effort user_input scan only" fi else input="$_ld_sanitized" _ld_after=$(bash "$_ld_sanitize" count 2>/dev/null || echo 0) _ld_new=$((_ld_after - _ld_before)) if [ "$_ld_new" -gt 0 ]; then printf '%sphi>%s /load: hl7-sanitize.sh tokenized %d HL7 field(s) from %s before passing to auto-PHI\n' \ "$C_DIM" "$C_RESET" "$_ld_new" "$f" >&2 AUTO_PHI_SESSION_COUNT=$(( AUTO_PHI_SESSION_COUNT + _ld_new )) _auto_phi_log "(hl7-sanitize /load)" "BATCH" "(+${_ld_new} tokens)" "hl7_pipeline" "user_input" "/load $f" fi fi fi fi larry_say "loaded $(wc -l < "$f" | tr -d ' ') lines from $f as your next message" ;; # v0.6.8: cross-env convenience commands. These templatize a prompt and # hand it to Larry-the-LLM to execute via the existing tools (no new # control flow). The prompt cites the motivating workflow so the model # picks the right tool chain unambiguously. /nc-diff-env*) local rest; rest=$(_slash_args "/nc-diff-env" "$input") if [ -z "$rest" ]; then err "usage: /nc-diff-env [pattern]"; continue fi # Tokenize positional args: env_a, env_b, optional pattern. local _ea _eb _pat _ea="${rest%% *}"; rest="${rest#"$_ea"}"; rest="${rest# }" _eb="${rest%% *}"; rest="${rest#"$_eb"}"; rest="${rest# }" _pat="$rest" if [ -z "$_ea" ] || [ -z "$_eb" ]; then err "usage: /nc-diff-env [pattern]"; continue fi input=$(cat <}. Plan and execute: 1. Run ssh_status to confirm both aliases have an open ControlMaster. If either is closed, stop and tell me to run /ssh-setup . 2. Discover each env's sites with list_sites(alias=...) — it resolves the remote \$HCIROOT in a login shell for you. Then locate NetConfig paths via ssh_exec (e.g. find \$HCIROOT -maxdepth 3 -name NetConfig -type f) — ssh_exec already runs in a login shell, so \$HCIROOT is populated. Do NOT ask me to export \$HCIROOT. 3. ssh_pull each NetConfig locally. Also pull the matching Xlate/, tclprocs/, tables/ directories alongside if you intend to diff referenced artifacts. 4. Use nc_diff_interface with --interface set per protocol, --left and --right pointing at the two local NetConfigs. If a pattern was given, restrict the set of protocols to those matching $_pat (use nc_list_protocols + a filter). 5. Report each difference with file-path references back to the source envs (alias:remote_path so I can copy-paste back into ssh). Be terse. One section per protocol. Aggregate identical diffs. EOF ) larry_say "/nc-diff-env: templated prompt prepared for $_ea vs $_eb${_pat:+ pattern=$_pat}" ;; /nc-regression-env*) local rest; rest=$(_slash_args "/nc-regression-env" "$input") if [ -z "$rest" ]; then err "usage: /nc-regression-env [scope]"; continue fi local _src _tgt _scope _src="${rest%% *}"; rest="${rest#"$_src"}"; rest="${rest# }" _tgt="${rest%% *}"; rest="${rest#"$_tgt"}"; rest="${rest# }" _scope="${rest:-server}" if [ -z "$_src" ] || [ -z "$_tgt" ]; then err "usage: /nc-regression-env [scope]"; continue fi local _ts; _ts=$(date +%Y%m%d-%H%M%S) local _out="$LARRY_HOME/regression/$_ts" input=$(cat <. 2. Discover the remote HCIROOT for each alias with list_sites(alias=...) — it resolves \$HCIROOT in a login shell and lists the sites. (Equivalently, ssh_exec 'echo \$HCIROOT' now works because ssh_exec runs a login shell.) Do NOT ask me to export it. Only ask which site if scope=site and it's ambiguous from the discovered list. 3. Call nc_regression with: - scope = "$_scope" - source_ssh_alias = "$_src" - target_ssh_alias = "$_tgt" - env_a = - env_b = - out = "$_out" - count = 10 (messages sampled per inbound) - route_test_cmd = use the existing default if I haven't given you one; otherwise prompt me with a one-liner template I should approve. - phase = "all" 4. After the run, read the compiled report at $_out/regression-summary.md and read $_out/diff/_index.md, then summarize: - threads tested, - pairs compared, - total field differences post-ignore, - any threads where one env had outputs the other didn't. 5. Reference the SSH alias names ($_src and $_tgt) in your summary, not raw user@host strings. EOF ) larry_say "/nc-regression-env: templated prompt prepared for $_src → $_tgt (scope=$_scope, out=$_out)" ;; /*) err "unknown command: $input (try /help)"; continue ;; esac # @file preprocessing (v0.6.7 item 12): inline file contents BEFORE PHI # tokenization so PHI markers inside attached files get caught. case "$input" in *@*) maybe_show_atfile_tip "$input" input=$(preprocess_atfile_refs "$input") ;; esac # PHI preprocessing: replace any {{phi:VALUE}} markers with local tokens # BEFORE the input enters conversation history and gets sent to Anthropic. if [[ "$input" == *"{{phi:"* ]] || [[ "$input" == *"@@"* ]]; then input=$(preprocess_phi_markers "$input") fi # v0.7.3 — Automatic PHI detection. Runs AFTER explicit-marker handling so # @@VALUE / {{phi:VALUE}} hits take precedence; auto-PHI fills gaps in # things Bryan didn't manually mark. Per-turn "!nophi " prefix override # is consumed inside auto_detect_phi. Bypassed entirely when mode=off. # Supersedes af2ffe8 (reverted with v0.7.1). # # v0.8.0-c: capture auto_detect_phi's exit code. Code 42 = strict-mode # fail-closed signal; the error message has already been printed to # stderr by auto_detect_phi. We skip add_user_text/agent_turn entirely, # leaving the turn as a no-op so no payload is ever built or sent. local _ap_rc=0 input=$(auto_detect_phi user_input "$input") || _ap_rc=$? if [ "$_ap_rc" = "42" ]; then err "turn aborted by LARRY_AUTO_PHI=strict (see above). Set LARRY_AUTO_PHI=on or /phi-auto on to retry without strict mode." continue fi log_section "user"; log_append "$input" # v0.7.1: render the persistent status line BETWEEN turns — after the # user has submitted real (non-slash, non-empty) input and after all # input preprocessing (@file, PHI) is done, but before agent_turn # begins streaming. Slash commands and empty input `continue` above # and never reach this point, matching the "no status in those paths" # rule. First-turn suppression is enforced inside render_status_line # (returns silently when there is no header data yet). render_status_line add_user_text "$input" agent_turn "$system_prompt" || warn "turn ended with error" echo "" done log_section "session-end" log_append "- end: $(date -Iseconds 2>/dev/null || date)" larry_say "session log: $LOG_FILE" } main_loop