#!/usr/bin/env bash # ssh-helper.sh — secure SSH command execution via ControlMaster. # # Architecture: # • Hosts configured in $LARRY_HOME/.ssh-hosts.tsv as # alias \t user@host \t port # • Passwords stored at $LARRY_HOME/.ssh-creds/, mode 0600. # The password file is the single point of truth — to rotate (daily-changing # passwords) just overwrite the file with the new one and re-run 'setup'. # • sshpass reads the password via -f (file), so it never lands in argv or # environment where Larry the LLM (or other processes via /proc) could see it. # • The first 'setup' call opens a long-lived SSH ControlMaster connection # (default ControlPersist=8h). Subsequent 'exec' calls multiplex through # the master socket and need no password. # • Larry's tool layer only sees: alias, command, command_output. # Never the password. Never the user@host (unless added to the alias list). # # Subcommands: # hosts list configured hosts # add add a host to the alias list # remove remove an alias (also clears cred + socket) # pass set/update the password (hidden interactive) # set-hciroot pin (persist) $HCIROOT for an alias. When # set, remote enumeration/exec runs with # HCIROOT= exported EXPLICITLY and # WITHOUT the `bash -lc` login wrapper — for # hosts whose login profile is sudo-gated or # otherwise non-interactive (v0.8.15). # Pass an empty path to clear the pin. # set-direct on|off toggle (persist) DIRECT mode for an alias # (v0.8.17). When ON, ALL remote ops for the # alias BYPASS the ControlMaster and run a # FRESH per-command sshpass connection with # forced password auth. For hosts that reject # SSH session multiplexing ("read from master # failed: Connection reset by peer") — the # master opens but multiplexed sessions die. # The pinned HCIROOT (set-hciroot) is still # honoured: the remote command is shaped by # the same _remote_cmd_for path, just sent # over the direct connection. NO traffic # bypass — plain forced-password ssh, no # proxy/tunnel/masking, host-key checked # (accept-new). Persisted as TSV column 5. # setup open ControlMaster (uses stored password ONCE). # In DIRECT mode, skips the (pointless) master # and instead VALIDATES the password with one # trivial direct command, then reports ready. # close close ControlMaster # status [alias] show open masters / cred presence # exec run command via master (returns output) # discover auto-detect remote Cloverleaf env: # resolves $HCIROOT (LOGIN shell), then # enumerates sites (hcisitelist fast-path, # NetConfig-walk fallback). Prints TSV: # HCIROOT # SITE (one per site) # No nagging for paths — the remote's own # login profile is the source of truth. # pull [local] scp remote → local via existing master # push scp local → remote via existing master # pull-smat [days_back] # pull a thread's smatdb (full) or sample # recent messages from it (sampled, TSV b64) # help print this help set -u set -o pipefail LARRY_HOME="${LARRY_HOME:-$HOME/.larry}" SSH_HOSTS_FILE="$LARRY_HOME/.ssh-hosts.tsv" SSH_CREDS_DIR="$LARRY_HOME/.ssh-creds" SSH_SOCKETS_DIR="$LARRY_HOME/.ssh-sockets" SSH_CONTROL_PERSIST="${LARRY_SSH_CONTROL_PERSIST:-8h}" die() { printf 'ssh-helper: %s\n' "$*" >&2; exit 1; } warn() { printf 'ssh-helper: warn: %s\n' "$*" >&2; } ok() { printf 'ssh-helper: %s\n' "$*"; } # v0.8.25 — SAFE HIDDEN-PASSWORD READ (terminal-corruption fix). # Previously the hidden-password prompts did a bare `stty -echo` ... read ... # `stty echo`. If the user hit Ctrl-C (or the read got an EOF/signal) BETWEEN # those two stty calls, echo was never re-enabled and the terminal was left # corrupted (typing invisible) — recoverable only with `stty sane`/`reset`. # This wrapper SAVES the full prior termios state with `stty -g` and restores # it via a trap on EXIT/INT/TERM/HUP, so any interrupt path restores the tty. # Reads from /dev/tty (the real terminal), not stdin. Portable: `stty -g`/`stty ` # is POSIX and works on AIX/Linux/BSD/Cygwin; no GNU-only flags. # _read_hidden VARNAME # Sets the named variable to the line read (no echo). Returns 0 always (empty # input is the caller's "abort" signal). _read_hidden() { # varname local _rh_var="$1" _rh_val="" _rh_saved="" _rh_prior_trap="" if command -v stty >/dev/null 2>&1 && { [ -t 0 ] || [ -e /dev/tty ]; }; then _rh_saved=$(stty -g /dev/null || stty -g 2>/dev/null || true) # v0.8.26 nit (b): capture the caller's PRIOR INT/TERM/HUP trap so we can # RESTORE it on the way out instead of blindly resetting to default. `trap -p` # prints the re-installable trap commands (empty if none was set). Harmless # today (no caller sets these around the prompt) but correct. _rh_prior_trap=$(trap -p INT TERM HUP 2>/dev/null || true) # v0.8.26 nit (a): install a restore trap BEFORE touching echo, on EVERY # path. If `stty -g` saved a state, restore exactly that; if the save failed # (_rh_saved empty), fall back to `stty echo` so a ^C mid-read can never # leave the terminal with echo off. Previously the trap was only installed # when _rh_saved was non-empty, yet `stty -echo` ran regardless — an # interrupt in that window left the tty corrupted. if [ -n "$_rh_saved" ]; then trap 'stty "$_rh_saved" /dev/null || stty "$_rh_saved" 2>/dev/null' INT TERM HUP else trap 'stty echo /dev/null || stty echo 2>/dev/null' INT TERM HUP fi stty -echo /dev/null || stty -echo 2>/dev/null || true IFS= read -r _rh_val /dev/null || IFS= read -r _rh_val || true if [ -n "$_rh_saved" ]; then stty "$_rh_saved" /dev/null || stty "$_rh_saved" 2>/dev/null || true else stty echo /dev/null || stty echo 2>/dev/null || true fi # Restore the caller's prior trap (or clear ours if there was none). if [ -n "$_rh_prior_trap" ]; then eval "$_rh_prior_trap" else trap - INT TERM HUP fi else IFS= read -r _rh_val /dev/null || IFS= read -r _rh_val || true fi # Assign back to the caller's variable name without eval-injection risk. printf -v "$_rh_var" '%s' "$_rh_val" } # v0.7.5: shared CR-safety primitives. pull/push use `wc -c | tr -d ' '` to # verify byte counts — Cygwin wc.exe can pass through \r and tank the # `[ "$got" != "$local_size" ]` comparison. _SSH_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)" if [ -r "$_SSH_LIB_DIR/cygwin-safe.sh" ]; then # shellcheck disable=SC1090,SC1091 . "$_SSH_LIB_DIR/cygwin-safe.sh" else coerce_int() { local r="${1:-}" d="${2:-0}" c; c=$(printf '%s' "$r" | tr -cd '0-9'); printf '%s' "${c:-$d}"; } fi ensure_layout() { mkdir -p "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null chmod 700 "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null || true if [ ! -f "$SSH_HOSTS_FILE" ]; then umask 077 # v0.8.15: 4th column = pinned HCIROOT (optional). Older 3-column files stay # valid — readers treat a missing $4 as "no pin". # v0.8.17: 5th column = direct flag (on|off, optional). Older 3-/4-column # files stay valid — readers treat a missing/empty $5 as "off" (master mode). printf 'alias\taddr\tport\thciroot\tdirect\n' > "$SSH_HOSTS_FILE" chmod 600 "$SSH_HOSTS_FILE" fi } # read_host_addr ALIAS → echoes "ADDR\tPORT" or empty read_host_addr() { local alias="$1" [ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 1; } awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $2 "\t" $3; exit }' < "$SSH_HOSTS_FILE" } # read_host_hciroot ALIAS → echoes the pinned HCIROOT (column 4) or empty. # v0.8.15: a non-empty value means remote commands for this alias run with # HCIROOT exported explicitly and WITHOUT the `bash -lc` login wrapper. read_host_hciroot() { local alias="$1" [ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 0; } awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $4; exit }' < "$SSH_HOSTS_FILE" } # read_host_direct ALIAS → echoes "on" if DIRECT mode is set (column 5 == on), # else empty. v0.8.17: when on, ALL remote ops for the alias bypass the # ControlMaster and run a fresh per-command sshpass connection (for hosts that # reject session multiplexing). Missing/empty/anything-but-"on" → master mode. read_host_direct() { local alias="$1" [ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 0; } awk -F'\t' -v a="$alias" 'NR>1 && $1==a && $5=="on" { print "on"; exit }' < "$SSH_HOSTS_FILE" } # _alias_is_direct ALIAS → returns 0 (true) if the alias is in DIRECT mode. _alias_is_direct() { [ "$(read_host_direct "$1")" = "on" ] } require_sshpass() { command -v sshpass >/dev/null 2>&1 \ || die "sshpass not on PATH — install it (apt install sshpass / brew install sshpass) and retry" } cmd_help() { sed -n '4,65p' "$0" } cmd_hosts() { ensure_layout # v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint would tank # the `-le 1` integer test below. if [ "$(coerce_int "$(wc -l < "$SSH_HOSTS_FILE")" 0)" -le 1 ]; then echo "no hosts configured. Add with: ssh-helper.sh add " return 0 fi printf 'alias user@host port cred master direct hciroot-pin\n' printf '%s\n' '───── ───────── ──── ──── ────── ────── ───────────' awk -F'\t' 'NR>1' "$SSH_HOSTS_FILE" | while IFS=$'\t' read -r alias addr port hciroot direct; do local cred_state="–" [ -f "$SSH_CREDS_DIR/$alias" ] && cred_state="✓" # v0.8.17: in DIRECT mode the master column reads "n/a" — there is no master # to probe (and probing it would be a meaningless socket check). The direct # column shows on/–. local direct_state="–" [ "$direct" = "on" ] && direct_state="on" local master_state="–" if [ "$direct" = "on" ]; then master_state="n/a" else local sock="$SSH_SOCKETS_DIR/$alias.sock" if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then master_state="open" fi fi printf '%-20s%-52s%-6s%-6s%-8s%-8s%s\n' "$alias" "$addr" "${port:-22}" "$cred_state" "$master_state" "$direct_state" "${hciroot:-–}" done } cmd_add() { local alias="${1:-}" target="${2:-}" [ -n "$alias" ] && [ -n "$target" ] || die "usage: add " [[ "$target" =~ ^[^@[:space:]]+@[^:[:space:]]+(:[0-9]+)?$ ]] \ || die "target must look like user@host or user@host:port" local addr port if [[ "$target" == *:* ]]; then addr="${target%:*}" port="${target##*:}" else addr="$target" port="22" fi ensure_layout # Reject duplicates (use 'remove' first) if awk -F'\t' -v a="$alias" 'NR>1 && $1==a { found=1; exit } END { exit !found }' "$SSH_HOSTS_FILE"; then die "alias '$alias' already exists. Use 'remove $alias' first." fi umask 077 # v0.8.15: write an empty 4th (hciroot) field so the row layout is uniform. # v0.8.17: write an empty 5th (direct) field too — uniform 5-column rows. printf '%s\t%s\t%s\t%s\t%s\n' "$alias" "$addr" "$port" "" "" >> "$SSH_HOSTS_FILE" chmod 600 "$SSH_HOSTS_FILE" ok "added $alias → $addr (port $port). Next: ssh-helper.sh pass $alias" } # cmd_set_hciroot ALIAS [PATH] — pin (or clear) the HCIROOT for an alias. # Persisted as column 4 of the hosts TSV. An empty/omitted PATH clears the pin. # When set, cmd_exec/cmd_discover/cmd_pull_smat run remote commands with # HCIROOT= exported EXPLICITLY and WITHOUT the `bash -lc` login wrapper — # the v0.8.15 fix for hosts whose login profile is sudo-gated (a non-interactive # SSH session hits `sudo: a terminal is required` and never exports $HCIROOT). cmd_set_hciroot() { local alias="${1:-}" newroot="${2:-}" [ -n "$alias" ] || die "usage: set-hciroot (empty path clears the pin)" ensure_layout local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)" # Rewrite the row in place, setting/replacing column 4. awk handles rows that # still have only 3 columns (legacy) by assigning $4 directly. v0.8.17: also # backfill the column-5 (direct) header so the layout stays uniform; existing # column-5 values on data rows are preserved untouched (we only touch $4). local tmp; tmp=$(mktemp) awk -F'\t' -v OFS='\t' -v a="$alias" -v r="$newroot" ' NR==1 { if (NF < 4) { $4="hciroot" } if (NF < 5) { $5="direct" } print; next } $1==a { $4=r; print; next } { print } ' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE" chmod 600 "$SSH_HOSTS_FILE" if [ -n "$newroot" ]; then ok "pinned HCIROOT for $alias → $newroot" ok " (remote enumeration/exec for $alias will export HCIROOT explicitly and SKIP the login profile)" else ok "cleared HCIROOT pin for $alias (reverting to login-shell \$HCIROOT resolution)" fi } # cmd_set_direct ALIAS on|off — toggle (or clear) DIRECT mode for an alias. # Persisted as column 5 of the hosts TSV. v0.8.17. # # When ON, cmd_exec/cmd_discover/cmd_pull_smat run remote commands over a FRESH # per-command sshpass connection (forced password auth) instead of multiplexing # through a ControlMaster socket — the fix for hosts (e.g. qa → shdclvf01q) where # the master opens & authenticates fine but any multiplexed session dies with # "read from master failed: Connection reset by peer". The pinned HCIROOT (set # via set-hciroot) is still honoured — the remote command is shaped by the same # _remote_cmd_for path; only the dispatch (direct vs master socket) changes. cmd_set_direct() { local alias="${1:-}" mode="${2:-}" [ -n "$alias" ] || die "usage: set-direct on|off" # Trim surrounding whitespace so a trailing-space arg from the slash path # (e.g. `/ssh-set-direct qa on `) still normalizes cleanly to on|off. mode="${mode#"${mode%%[![:space:]]*}"}" # leading mode="${mode%"${mode##*[![:space:]]}"}" # trailing case "$mode" in on|ON|On) mode="on" ;; off|OFF|Off|'') mode="" ;; # empty/off → clear the flag (master mode) *) die "usage: set-direct on|off (got: $mode)" ;; esac ensure_layout local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)" # Rewrite the row in place, setting/replacing column 5. awk backfills the # column-4 (hciroot) and column-5 (direct) headers for legacy <5-column files, # and pads a matching data row to 5 columns before assigning $5 so we never # clobber a pinned HCIROOT in column 4. local tmp; tmp=$(mktemp) awk -F'\t' -v OFS='\t' -v a="$alias" -v m="$mode" ' NR==1 { if (NF < 4) { $4="hciroot" } if (NF < 5) { $5="direct" } print; next } $1==a { if (NF < 4) { $4="" } $5=m; print; next } { print } ' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE" chmod 600 "$SSH_HOSTS_FILE" if [ "$mode" = "on" ]; then ok "DIRECT mode ON for $alias" ok " (all remote ops bypass the ControlMaster — fresh per-command sshpass, forced password auth)" ok " next: ssh-helper.sh setup $alias (validates the password; no master opened in direct mode)" else ok "DIRECT mode OFF for $alias (reverting to ControlMaster multiplexing)" fi } cmd_remove() { local alias="${1:-}" [ -n "$alias" ] || die "usage: remove " ensure_layout local tmp; tmp=$(mktemp) awk -F'\t' -v a="$alias" 'NR==1 || $1!=a' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE" chmod 600 "$SSH_HOSTS_FILE" # Close + clean master socket and cred cmd_close "$alias" 2>/dev/null || true rm -f "$SSH_CREDS_DIR/$alias" "$SSH_SOCKETS_DIR/$alias.sock" 2>/dev/null ok "removed $alias (cred + socket cleared)" } cmd_pass() { local alias="${1:-}" [ -n "$alias" ] || die "usage: pass " local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)" ensure_layout printf 'Password for %s (input is hidden; press Enter when done): ' "$alias" >&2 local pw="" _read_hidden pw echo "" >&2 [ -n "$pw" ] || die "no password entered" umask 077 # NO trailing newline — sshpass -f expects raw password as full file content printf '%s' "$pw" > "$SSH_CREDS_DIR/$alias" chmod 600 "$SSH_CREDS_DIR/$alias" ok "password saved to $SSH_CREDS_DIR/$alias (mode 0600). Next: ssh-helper.sh setup $alias" } cmd_setup() { local alias="${1:-}" [ -n "$alias" ] || die "usage: setup " local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local addr port addr=$(printf '%s' "$addr_port" | cut -f1) port=$(printf '%s' "$addr_port" | cut -f2) ensure_layout # v0.8.17: DIRECT mode — opening a ControlMaster is pointless (the box rejects # multiplexing). Instead, VALIDATE that the stored password authenticates by # running one trivial direct command, then report ready. No master socket is # created. This makes Bryan's flow: # /ssh-pass → /ssh-set-hciroot → /ssh-set-direct on → /sites if _alias_is_direct "$alias"; then local credfile="$SSH_CREDS_DIR/$alias" [ -f "$credfile" ] || die "no password set for $alias — run 'pass $alias' first" require_sshpass ok "DIRECT mode for $alias ($addr:$port) — validating the stored password (no master in direct mode)..." local errfile; errfile=$(mktemp 2>/dev/null || echo "/tmp/larry-ssh-direct-setup.err.$$") # A trivial, side-effect-free probe. Forced password auth, host-key checked, # no master. STDERR (banner/sudo) is captured for failure diagnosis only. # v0.8.18: shared DIRECT options via _direct_ssh_opts (was an inline copy). sshpass -f "$credfile" ssh \ $(_direct_ssh_opts) \ -p "$port" \ "$addr" 'true' 2>"$errfile" local vrc=$? if [ "$vrc" -eq 0 ]; then rm -f "$errfile" ok "✓ direct auth OK: $alias → $addr:$port (no master; ready for /sites $alias)" return 0 fi printf 'ssh-helper: direct validation FAILED for %s (rc=%d).\n' "$alias" "$vrc" >&2 local filtered; filtered=$(_filter_direct_stderr < "$errfile") if [ -n "$filtered" ]; then printf 'ssh-helper: remote stderr (benign banner/sudo lines stripped):\n' >&2 printf '%s\n' "$filtered" >&2 else printf 'ssh-helper: no non-benign stderr — almost certainly the stored password is stale/rotated. Re-run: ssh-helper.sh pass %s\n' "$alias" >&2 fi rm -f "$errfile" return 1 fi local sock="$SSH_SOCKETS_DIR/$alias.sock" if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then ok "master already open for $alias ($addr:$port)" return 0 fi local credfile="$SSH_CREDS_DIR/$alias" [ -f "$credfile" ] || die "no password set for $alias — run 'pass $alias' first" require_sshpass ok "opening ssh master for $alias ($addr:$port) — ControlPersist=$SSH_CONTROL_PERSIST..." # _try_master_open — one attempt with the stored credential. Returns 0 on a # verified-open master; non-zero otherwise. Stderr from sshpass/ssh lands in # the file named by $1 so the caller can classify it. # # v0.8.15 hardening (banner + rotating-password): # • -o PreferredAuthentications=password -o PubkeyAuthentication=no forces the # password method so sshpass feeds the password cleanly. Without this, on a # box that prints a long pre-auth banner and would otherwise try pubkey # first, ssh can consume the password slot on the wrong method and the only # thing surfaced is the banner with NO "permission denied" — exactly the # symptom seen on shdclvf01q. # • -o NumberOfPasswordPrompts=1 so a stale password fails fast (one prompt) # instead of hanging, which lets us re-prompt for the rotated one. _try_master_open() { local errfile="$1" sshpass -f "$credfile" ssh \ -o "ControlMaster=yes" \ -o "ControlPath=$sock" \ -o "ControlPersist=$SSH_CONTROL_PERSIST" \ -o "StrictHostKeyChecking=accept-new" \ -o "PreferredAuthentications=password" \ -o "PubkeyAuthentication=no" \ -o "NumberOfPasswordPrompts=1" \ -o "ConnectTimeout=10" \ -p "$port" \ -N -f \ "$addr" 2>"$errfile" local rc=$? [ "$rc" -eq 0 ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null } # _looks_like_auth_failure ERRFILE — heuristic: did this fail on auth (vs. # network/host-key)? sshpass exits 5 on auth failure, but the banner can mask # the textual reason, so we also treat permission/password/auth keywords as # auth failures. A rotated password is the prime suspect on this box. _looks_like_auth_failure() { local errfile="$1" grep -qiE 'permission denied|authentication fail|incorrect password|too many authentication|password:' "$errfile" 2>/dev/null && return 0 # Empty-or-banner-only stderr after a password attempt → almost always the # rotated/stale credential. Treat as auth failure so we re-prompt. return 0 } local errfile="/tmp/larry-ssh-setup.err" : > "$errfile" if _try_master_open "$errfile"; then ok "✓ master open: $alias → $addr:$port (socket: $sock)" rm -f "$errfile" return 0 fi # First attempt failed. Surface the REAL error (not just the banner) and, if it # looks like an auth failure, re-prompt for a fresh password (12h rotation on # this box) and retry ONCE. Never silently no-op. printf 'ssh-helper: first master-open attempt failed for %s.\n' "$alias" >&2 if [ -s "$errfile" ]; then printf 'ssh-helper: ssh/sshpass stderr (auth error, not just the banner):\n' >&2 grep -iE 'permission denied|authentication|password|denied|fatal|connection|timed out|refused|host key' "$errfile" >&2 2>/dev/null \ || cat "$errfile" >&2 2>/dev/null else printf 'ssh-helper: (no stderr captured — the box likely printed only its pre-auth banner; the stored password is almost certainly stale)\n' >&2 fi if _looks_like_auth_failure "$errfile" && [ -t 0 -o -e /dev/tty ]; then printf 'ssh-helper: looks like the stored password is stale (this host rotates ~every 12h).\n' >&2 printf 'Enter a FRESH password for %s (input hidden; Enter to abort): ' "$alias" >&2 local pw="" _read_hidden pw echo "" >&2 if [ -n "$pw" ]; then umask 077 printf '%s' "$pw" > "$credfile" # NO trailing newline (sshpass -f) chmod 600 "$credfile" ok "stored the fresh password — retrying master open..." : > "$errfile" if _try_master_open "$errfile"; then ok "✓ master open: $alias → $addr:$port (socket: $sock)" rm -f "$errfile" return 0 fi printf 'ssh-helper: retry with the fresh password ALSO failed. ssh/sshpass stderr:\n' >&2 cat "$errfile" >&2 2>/dev/null else printf 'ssh-helper: no password entered — aborting.\n' >&2 fi fi printf 'ssh-helper: master NOT open for %s. Next step: re-run `ssh-helper.sh setup %s` (or the /ssh-setup %s slash command) with a current password; if the host changed, re-check `ssh-helper.sh hosts`.\n' \ "$alias" "$alias" "$alias" >&2 rm -f "$errfile" return 1 } cmd_close() { local alias="${1:-}" [ -n "$alias" ] || die "usage: close " local addr_port; addr_port=$(read_host_addr "$alias") || addr_port="" local sock="$SSH_SOCKETS_DIR/$alias.sock" if [ -S "$sock" ] && [ -n "$addr_port" ]; then local addr port addr=$(printf '%s' "$addr_port" | cut -f1) port=$(printf '%s' "$addr_port" | cut -f2) ssh -S "$sock" -O exit -p "$port" "$addr" 2>/dev/null || true fi rm -f "$sock" ok "closed master for $alias" } cmd_status() { ensure_layout if [ -n "${1:-}" ]; then local alias="$1" local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local addr port addr=$(printf '%s' "$addr_port" | cut -f1) port=$(printf '%s' "$addr_port" | cut -f2) local sock="$SSH_SOCKETS_DIR/$alias.sock" printf 'alias: %s\naddr: %s\nport: %s\ncred: %s\nsocket: %s\nstatus: ' \ "$alias" "$addr" "$port" \ "$([ -f "$SSH_CREDS_DIR/$alias" ] && echo present || echo missing)" \ "$sock" if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then echo "master OPEN" else echo "no master (run setup)" fi return 0 fi cmd_hosts } # v0.8.13 (Cloverleaf login-shell fix): exec defaults to a LOGIN shell. # # Root cause of the "qa keeps asking me for $HCIROOT" friction: a plain # ssh host 'cmd' # runs a NON-interactive, NON-login shell. On a Cloverleaf host, $HCIROOT (and # $HCISITE, the hci* binaries on PATH, etc.) are exported by the LOGIN profile # (/etc/profile.d, the hci user's ~/.profile / ~/.bash_profile, the per-site # `.profile`). A non-login shell never sources those, so $HCIROOT arrives empty # and Larry used to give up and nag the user for a path. Wrapping the command in # `bash -lc` forces a login shell, so the Cloverleaf environment populates # exactly as it does for an interactive operator login. This is the version- # agnostic, no-config fix — it works on any Cloverleaf host whose operator login # sets up the environment (i.e. all of them). # # Escape hatch: prefix the command with the literal token NOLOGIN (or set # LARRY_SSH_NO_LOGIN=1) to run a bare non-login shell — for the rare host where # the login profile is interactive-only and hangs a non-tty `bash -l`. _build_login_cmd() { # $1 = raw command string. Echoes the command to hand to ssh. local raw="$1" case "$raw" in NOLOGIN\ *) printf '%s' "${raw#NOLOGIN }"; return ;; esac [ "${LARRY_SSH_NO_LOGIN:-0}" = "1" ] && { printf '%s' "$raw"; return; } # Single-quote the payload for a robust `bash -lc ''`. Embedded # single quotes become '\'' (close, escaped-quote, reopen) — POSIX-portable. local esc; esc=$(printf '%s' "$raw" | sed "s/'/'\\\\''/g") printf "bash -lc '%s'" "$esc" } # v0.8.15 (sudo-gated-profile fix): when an alias has a pinned HCIROOT, the # remote command must NOT go through the login profile (`bash -lc`). On hosts # whose login profile is sudo-gated, a non-interactive SSH session trips # `sudo: a terminal is required`, the profile never finishes, and $HCIROOT comes # back EMPTY. Instead we export HCIROOT explicitly and run a plain `sh -c` (no # login profile, no tty needed). This is deterministic and version-agnostic. # # _shq STR → single-quote STR for safe embedding inside another '...' context. _shq() { printf '%s' "$1" | sed "s/'/'\\\\''/g"; } # _build_pinned_cmd HCIROOT RAW → a remote command string that exports HCIROOT # explicitly (and HCISITEDIR-friendly callers can derive from it) then runs RAW # under a NON-login `sh -c`. No `bash -lc`, so the sudo-gated profile is skipped. _build_pinned_cmd() { local root="$1" raw="$2" local esc; esc=$(_shq "$raw") printf "sh -c 'HCIROOT=%s; export HCIROOT; %s'" "$(_shq "$root")" "$esc" } # _remote_cmd_for ALIAS RAW → echo the exact command string to hand to ssh. # If ALIAS has a pinned HCIROOT → pinned (explicit-export, no login profile). # Else → the existing login-shell wrapper (_build_login_cmd). Single chokepoint # so cmd_exec/cmd_discover/cmd_pull_smat all honour the pin identically. _remote_cmd_for() { local alias="$1" raw="$2" local pin; pin=$(read_host_hciroot "$alias") if [ -n "$pin" ]; then _build_pinned_cmd "$pin" "$raw" else _build_login_cmd "$raw" fi } # ── v0.8.17: DIRECT (no-multiplex) dispatch ────────────────────────────────── # # Some Cloverleaf hosts reject SSH ControlMaster session multiplexing: the master # opens and authenticates, but every session multiplexed over it dies with # "read from master failed: Connection reset by peer", then ssh falls back to a # fresh connection that fails auth. Confirmed live on qa (bryjohnx@lhsixfqa → # shdclvf01q, cis2025.01). The fix is to run each remote command as its OWN # fresh ssh connection with forced password auth (sshpass -f ) — NO # master socket. This is legitimate password auth, NOT a traffic bypass: no # proxy, no tunnel, no masking, and host-key checking stays on (accept-new). # # _DIRECT_CONNECT_TIMEOUT — seconds for ssh ConnectTimeout (env-overridable). _DIRECT_CONNECT_TIMEOUT="${LARRY_SSH_DIRECT_TIMEOUT:-10}" # _direct_ssh_opts → emit the shared ssh/scp `-o` option tokens for every DIRECT # (no-ControlMaster) transport, one token per word, on STDOUT. v0.8.18: extracted # so the security posture lives in ONE place and a change can't drift across the # three call sites (cmd_setup probe, _run_direct, _direct_scp). The five # security-critical flags are: # PreferredAuthentications=password — force the password method so sshpass # feeds the password cleanly past a banner # PubkeyAuthentication=no — never silently fall back to a key # StrictHostKeyChecking=accept-new — host-key checking STAYS ON (TOFU). This # is the no-traffic-bypass guarantee: we # never disable host verification. # ControlMaster=no / ControlPath=none — never multiplex (the box rejects it) # Plus two shared connection knobs identical across all three callers: # NumberOfPasswordPrompts=1 — a stale password fails fast (one prompt) # ConnectTimeout=$_DIRECT_CONNECT_TIMEOUT # ssh `-o` ordering is immaterial (no conflicting duplicate keys), so emitting # these as one contiguous block is byte-equivalent in BEHAVIOR to the prior # inline copies. Callers splat the words unquoted: `ssh $(_direct_ssh_opts) ...`. # Every token here is a single shell word (no spaces inside any -o value), so the # unquoted expansion is safe. _direct_ssh_opts() { printf '%s\n' \ -o "PreferredAuthentications=password" \ -o "PubkeyAuthentication=no" \ -o "NumberOfPasswordPrompts=1" \ -o "StrictHostKeyChecking=accept-new" \ -o "ControlMaster=no" \ -o "ControlPath=none" \ -o "ConnectTimeout=$_DIRECT_CONNECT_TIMEOUT" } # _direct_creds ALIAS → echoes the credfile path (the file /ssh-pass writes), # or empty (and warns) if absent. Same file the ControlMaster path uses. _direct_creds() { local alias="$1" local credfile="$SSH_CREDS_DIR/$alias" [ -f "$credfile" ] && { printf '%s' "$credfile"; return 0; } printf '' return 1 } # _filter_direct_stderr — strip known-benign noise from a direct session's # STDERR so the parsed STDOUT result is presented clean. The qa login profile # emits a pre-auth banner ("Unauthorized access…/monitored", "WARNING", etc.) # AND `sudo: a terminal is required` / `sudo: a password is required` / # `sudo: no tty present` on STDERR for non-interactive sessions. Those are # expected and harmless for our read-only enumeration — drop them. ANYTHING # ELSE that remains is a real signal and is surfaced by the caller, but ONLY on # an actual non-zero command failure (see _run_direct). Reads stderr on stdin; # echoes the filtered remainder. The patterns are intentionally narrow so we # never swallow a genuine error message. _filter_direct_stderr() { grep -ivE \ 'unauthorized (access|use)|access is monitored|monitored and recorded|this (system|computer|is a) .*(private|restricted|government|corporate)|by (logging in|accessing|using) .*(you )?(consent|agree)|all activ(ity|ities) .*(may be|are) (monitored|logged|recorded)|disconnect immediately|^[[:space:]]*\*+[[:space:]]*$|^[[:space:]]*WARNING[[:space:]]*[:!]?|sudo: a terminal is required|sudo: a password is required|sudo: no tty present|sudo: sorry, you must have a tty' \ 2>/dev/null || true } # _run_direct ALIAS REMOTE_CMD → run REMOTE_CMD on ALIAS over a FRESH per-command # sshpass connection (no ControlMaster). REMOTE_CMD must already be shaped by # _remote_cmd_for (so the HCIROOT pin / login-shell wrapper is honoured). STDOUT # is passed through verbatim (the parsed-clean result). STDERR is captured, # filtered for the known-benign banner+sudo lines, and surfaced ONLY when the # remote command exits non-zero. Returns the remote command's exit code. _run_direct() { local alias="$1" remote_cmd="$2" require_sshpass local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local addr port addr=$(printf '%s' "$addr_port" | cut -f1) port=$(printf '%s' "$addr_port" | cut -f2) local credfile; credfile=$(_direct_creds "$alias") \ || die "no password set for $alias — run 'pass $alias' first (direct mode needs the stored credential per command)" local errfile; errfile=$(mktemp 2>/dev/null || echo "/tmp/larry-ssh-direct.err.$$") # NO ControlMaster/ControlPath. Forced password method so sshpass feeds the # password cleanly past any pre-auth banner (same rationale as the master # path's v0.8.15 PreferredAuthentications=password hardening). BatchMode is # NOT set — sshpass supplies the password non-interactively via the askpass # file descriptor; BatchMode would suppress that path on some builds. # v0.8.18: shared DIRECT options via _direct_ssh_opts (was an inline copy). sshpass -f "$credfile" ssh \ $(_direct_ssh_opts) \ -p "$port" \ "$addr" "$remote_cmd" 2>"$errfile" local rc=$? # On a real failure, surface the FILTERED stderr (banner + sudo noise removed) # so the operator sees the genuine reason without the boilerplate. On success, # the benign noise is simply dropped — stdout is already the clean result. if [ "$rc" -ne 0 ]; then local filtered; filtered=$(_filter_direct_stderr < "$errfile") if [ -n "$filtered" ]; then printf 'ssh-helper: direct command failed for %s (rc=%d). Remote stderr (benign banner/sudo lines stripped):\n' "$alias" "$rc" >&2 printf '%s\n' "$filtered" >&2 else printf 'ssh-helper: direct command failed for %s (rc=%d) with no non-benign stderr — likely an auth failure (stale/rotated password?) or a connection reset. Re-check: ssh-helper.sh setup %s\n' "$alias" "$rc" "$alias" >&2 fi fi rm -f "$errfile" return "$rc" } # _direct_scp ALIAS SRC DST → scp SRC→DST over a FRESH sshpass connection (no # ControlMaster), forced password auth, host-key checked. Either SRC or DST is a # remote spec of the form ":" supplied by the caller (cmd_pull builds # it). Returns scp's exit code. STDERR (incl. banner/sudo noise) is filtered the # same way as _run_direct and surfaced only on failure. v0.8.17. _direct_scp() { local alias="$1" src="$2" dst="$3" require_sshpass local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local port; port=$(printf '%s' "$addr_port" | cut -f2) local credfile; credfile=$(_direct_creds "$alias") \ || die "no password set for $alias — run 'pass $alias' first" local errfile; errfile=$(mktemp 2>/dev/null || echo "/tmp/larry-scp-direct.err.$$") # v0.8.18: shared DIRECT options via _direct_ssh_opts (was an inline copy). # scp reads the same ssh-style -o options; only the port flag differs (-P). sshpass -f "$credfile" scp -q \ $(_direct_ssh_opts) \ -P "$port" \ "$src" "$dst" 2>"$errfile" local rc=$? if [ "$rc" -ne 0 ]; then local filtered; filtered=$(_filter_direct_stderr < "$errfile") printf 'ssh-helper: direct scp failed for %s (rc=%d):\n' "$alias" "$rc" >&2 [ -n "$filtered" ] && printf '%s\n' "$filtered" >&2 fi rm -f "$errfile" return "$rc" } # _dispatch_remote ALIAS RAW_CMD → run RAW_CMD on ALIAS, choosing the transport: # DIRECT mode (column 5 == on) → fresh per-command sshpass (_run_direct) # else → existing ControlMaster multiplex # In BOTH cases the remote command is shaped identically by _remote_cmd_for, so # the HCIROOT pin and login-shell semantics are unchanged across transports. # Requires (master mode only) that the master is open — the master-mode branch # preserves the prior die-on-closed-master behaviour exactly. _dispatch_remote() { local alias="$1" raw="$2" local shaped; shaped=$(_remote_cmd_for "$alias" "$raw") if _alias_is_direct "$alias"; then _run_direct "$alias" "$shaped" return $? fi # ── ControlMaster path (unchanged for non-direct aliases) ──────────────── local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local addr port addr=$(printf '%s' "$addr_port" | cut -f1) port=$(printf '%s' "$addr_port" | cut -f2) local sock="$SSH_SOCKETS_DIR/$alias.sock" if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then die "no open master for $alias — run 'setup $alias' first" fi ssh -S "$sock" -p "$port" -o BatchMode=yes "$addr" "$shaped" } cmd_exec() { local alias="${1:-}" [ -n "$alias" ] || die "usage: exec " shift local cmd="$*" [ -n "$cmd" ] || die "no command given" local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" # v0.8.17: transport selection is centralised in _dispatch_remote. # • DIRECT mode → a fresh per-command sshpass connection (no master), with # benign banner/sudo stderr stripped and real errors surfaced on failure. # • else → the existing ControlMaster multiplex (no password needed). # In both cases the remote command is shaped identically: a pinned HCIROOT is # exported explicitly + login profile skipped (v0.8.15); otherwise a login # shell populates $HCIROOT et al. (see _remote_cmd_for / _build_login_cmd). _dispatch_remote "$alias" "$cmd" } # cmd_discover ALIAS — proactively detect the remote Cloverleaf environment. # Resolves $HCIROOT in a LOGIN shell, then enumerates sites two ways: # 1. hcisitelist (the Cloverleaf-shipped site lister) if it's on the login PATH # 2. NetConfig walk under $HCIROOT (version-agnostic ground truth — the same # "a site is a dir with a NetConfig" rule each-site.sh uses) # Emits TSV to stdout the tool layer can parse deterministically: # HCIROOT (or HCIROOT if unresolved) # SITE (zero or more) # Never prompts; on failure it emits what it could resolve + a NOTE line. cmd_discover() { local alias="${1:-}" [ -n "$alias" ] || die "usage: discover " local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local addr port addr=$(printf '%s' "$addr_port" | cut -f1) port=$(printf '%s' "$addr_port" | cut -f2) # v0.8.17: in DIRECT mode there is no master to check — _dispatch_remote runs a # fresh per-command sshpass connection below. Only validate an open master for # the (unchanged) multiplex path. if ! _alias_is_direct "$alias"; then local sock="$SSH_SOCKETS_DIR/$alias.sock" if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then die "no open master for $alias — run 'setup $alias' first" fi fi # A single remote script. It: # - prints HCIROOT\t$HCIROOT # - PRIMARY enumeration = the NetConfig walk under $HCIROOT (depth ≤2), # IDENTICAL to lib/each-site.sh: find NetConfig files → dirname → basename # → sort -u. This is the version-agnostic ground truth and works on a box # with NO `hcisitelist` (v0.8.15 portability fix — confirmed: shdclvf01q # has no hcisitelist). # - `hcisitelist` is used ONLY if it is actually present AND the walk found # nothing (belt-and-suspenders), never as the dependency. # Kept POSIX-sh so it runs under whatever /bin/sh spawns it. # # NOTE on environment: when the alias has a pinned HCIROOT, _remote_cmd_for # exports HCIROOT explicitly and runs this under a NON-login `sh -c` (skips the # sudo-gated login profile). Otherwise it runs under `bash -lc` so the login # profile populates $HCIROOT. Either way the script below only reads # ${HCIROOT:-}, so it is agnostic to which path delivered it. # v0.8.15 (list-sites exclusion): drop non-real entries from the enumeration so # /sites shows only operator-meaningful sites. Two filters, applied at the walk # source (so REMOTE pinned, REMOTE login-shell, and LOCAL all behave the same): # 1. SITES_EXCLUDE — static scaffolding/special dirs (helloworld, siteProto, # master). A documented, tunable env var: Bryan can override at call time # via `SITES_EXCLUDE='...' discover ` without a config UI. # 2. Host-name match — any site dir whose name == the remote `hostname -s` or # full `hostname` (a dir just named after the box, e.g. shdclvf01q). The # remote hostname is the primary signal; we ALSO pass the alias's configured # SSH host as a secondary candidate (qa's alias host is lhsixfqa) so a dir # matching that is dropped too. # NOT silent: every dropped name is reported on an EXCLUDED note so the tool # layer surfaces it. The real-site list/count stays the headline. local sites_exclude="${SITES_EXCLUDE:-helloworld siteProto master}" # bare host from the alias's user@host (strip optional user@); '-' if none. local alias_host="${addr#*@}"; [ -n "$alias_host" ] || alias_host="-" local remote=' SITES_EXCLUDE='\'"$(_shq "$sites_exclude")"\''; ALIAS_HOST='\'"$(_shq "$alias_host")"\''; printf "HCIROOT\t%s\n" "${HCIROOT:-}"; if [ -z "${HCIROOT:-}" ]; then printf "NOTE\tHCIROOT is empty. If this host has a sudo-gated/non-interactive login profile, pin it: ssh-helper.sh set-hciroot \n"; exit 0; fi; if [ ! -d "${HCIROOT}" ]; then printf "NOTE\tHCIROOT=%s is not a directory on the remote — check the pinned path\n" "${HCIROOT}"; exit 0; fi; sites=$(find "$HCIROOT" -mindepth 1 -maxdepth 2 -name NetConfig -type f 2>/dev/null \ | while IFS= read -r nc; do d=$(dirname "$nc"); basename "$d"; done \ | sort -u); if [ -z "$sites" ] && command -v hcisitelist >/dev/null 2>&1; then printf "NOTE\tNetConfig walk found no sites; falling back to hcisitelist\n"; sites=$(hcisitelist 2>/dev/null | tr " " "\n" | grep -v "^$" | sort -u); fi; if [ -z "$sites" ]; then printf "NOTE\tno sites with a NetConfig found under %s\n" "$HCIROOT"; exit 0; fi; HN_S=$(hostname -s 2>/dev/null || true); HN_F=$(hostname 2>/dev/null || true); kept=""; dropped=""; for s in $sites; do [ -n "$s" ] || continue; drop=""; for x in $SITES_EXCLUDE; do [ "$s" = "$x" ] && drop=1 && break; done; [ -z "$drop" ] && [ -n "$HN_S" ] && [ "$s" = "$HN_S" ] && drop=1; [ -z "$drop" ] && [ -n "$HN_F" ] && [ "$s" = "$HN_F" ] && drop=1; [ -z "$drop" ] && [ "$ALIAS_HOST" != "-" ] && [ "$s" = "$ALIAS_HOST" ] && drop=1; if [ -n "$drop" ]; then dropped="$dropped $s"; else kept="$kept $s"; fi; done; dropped=$(printf "%s" "$dropped" | sed "s/^ *//"); [ -n "$dropped" ] && printf "EXCLUDED\t%s\n" "$dropped"; printf "%s\n" "$kept" | while IFS= read -r s; do [ -n "$s" ] && printf "SITE\t%s\n" "$s"; done' # v0.8.17: dispatch over DIRECT sshpass or the ControlMaster, per the alias's # flag. The TSV that the tool layer parses is on STDOUT and stays clean; the # qa banner/sudo noise on STDERR is stripped by _run_direct's filter (direct # mode) and surfaced only on a real non-zero failure. _dispatch_remote "$alias" "$remote" } # ── v0.6.8: scp helpers that multiplex via the existing ControlMaster ──────── # We use ssh's ControlPath/ControlMaster=no for scp (scp reads ssh-style options # via -o), so the file transfer rides the open master and needs no second auth. # Resolve ADDR/PORT/SOCK for an alias; die if master not open. Sets globals: # _RH_ADDR _RH_PORT _RH_SOCK _resolve_open_master() { local alias="$1" local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" _RH_ADDR=$(printf '%s' "$addr_port" | cut -f1) _RH_PORT=$(printf '%s' "$addr_port" | cut -f2) _RH_SOCK="$SSH_SOCKETS_DIR/$alias.sock" if [ ! -S "$_RH_SOCK" ] || ! ssh -S "$_RH_SOCK" -O check -p "$_RH_PORT" "$_RH_ADDR" 2>/dev/null; then die "no open master for $alias — open it with /ssh-setup $alias first" fi } # Deterministic local cache path for ssh_pull. # /tmp/larry-pulls/.. _pull_cache_path() { local alias="$1" remote="$2" local base; base=$(basename -- "$remote" 2>/dev/null) [ -z "$base" ] && base="file" # 8-char hex hash of full remote path. We try the most common hashers in # turn; on a stripped box without any, fall back to a length+checksum proxy # so the path is still deterministic per . local hash="" if command -v shasum >/dev/null 2>&1; then hash=$(printf '%s' "$remote" | shasum -a 1 2>/dev/null | cut -c1-8) elif command -v sha1sum >/dev/null 2>&1; then hash=$(printf '%s' "$remote" | sha1sum 2>/dev/null | cut -c1-8) elif command -v md5sum >/dev/null 2>&1; then hash=$(printf '%s' "$remote" | md5sum 2>/dev/null | cut -c1-8) else hash=$(printf '%s' "$remote" | cksum 2>/dev/null | awk '{printf "%08x", $1}' | cut -c1-8) fi [ -z "$hash" ] && hash="00000000" mkdir -p /tmp/larry-pulls 2>/dev/null printf '/tmp/larry-pulls/%s.%s.%s' "$alias" "$base" "$hash" } cmd_pull() { local alias="${1:-}" remote="${2:-}" local_path="${3:-}" [ -n "$alias" ] && [ -n "$remote" ] || die "usage: pull [local_path]" # v0.8.17: DIRECT mode — no ControlMaster. The remote-size probe rides a fresh # per-command sshpass connection (_dispatch_remote → _run_direct), and the # transfer uses _direct_scp (also fresh sshpass). Everything else (cache path, # size verification, the clean final-line local path) is identical. if _alias_is_direct "$alias"; then local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local _d_addr; _d_addr=$(printf '%s' "$addr_port" | cut -f1) [ -z "$local_path" ] && local_path=$(_pull_cache_path "$alias" "$remote") mkdir -p "$(dirname "$local_path")" 2>/dev/null local remote_size remote_size=$(coerce_int "$(_dispatch_remote "$alias" "wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" "") if [ -z "$remote_size" ] || ! [[ "$remote_size" =~ ^[0-9]+$ ]]; then die "remote file not found or not readable: $remote" fi if _direct_scp "$alias" "$_d_addr:$remote" "$local_path"; then local got; got=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0) if [ "$got" != "$remote_size" ]; then die "partial transfer: remote=$remote_size bytes, local=$got bytes ($local_path)" fi ok "pulled $alias:$remote → $local_path ($got bytes, direct)" printf '%s\n' "$local_path" return 0 fi return 1 fi _resolve_open_master "$alias" [ -z "$local_path" ] && local_path=$(_pull_cache_path "$alias" "$remote") mkdir -p "$(dirname "$local_path")" 2>/dev/null # Get remote file size up-front for a partial-transfer sanity check. # v0.7.5: coerce_int on wc output — strips CR + non-digits at the source. local remote_size="" remote_size=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \ "wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" "") if [ -z "$remote_size" ] || ! [[ "$remote_size" =~ ^[0-9]+$ ]]; then die "remote file not found or not readable: $remote" fi # scp via the existing master: -o ControlPath=... -o ControlMaster=no local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$") if scp -q \ -o "ControlPath=$_RH_SOCK" \ -o "ControlMaster=no" \ -o "BatchMode=yes" \ -P "$_RH_PORT" \ "$_RH_ADDR:$remote" "$local_path" 2>"$scp_err"; then # v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense. local got; got=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0) if [ "$got" != "$remote_size" ]; then rm -f "$scp_err" die "partial transfer: remote=$remote_size bytes, local=$got bytes ($local_path)" fi rm -f "$scp_err" ok "pulled $alias:$remote → $local_path ($got bytes)" # Print only the local path on the final line so callers (tool layer) can # capture it deterministically with `tail -1` or similar. printf '%s\n' "$local_path" return 0 fi local rc=$? printf 'ssh-helper: scp pull failed (rc=%d):\n' "$rc" >&2 cat "$scp_err" >&2 2>/dev/null rm -f "$scp_err" return 1 } cmd_push() { local alias="${1:-}" local_path="${2:-}" remote="${3:-}" [ -n "$alias" ] && [ -n "$local_path" ] && [ -n "$remote" ] \ || die "usage: push " [ -f "$local_path" ] || die "local file not found: $local_path" # v0.8.18: DIRECT mode — symmetric with cmd_pull's direct branch. No # ControlMaster (the host rejects multiplexing); the transfer uses _direct_scp # (fresh per-command sshpass), and the post-transfer size verification rides a # fresh per-command connection via _dispatch_remote → _run_direct. Without this # branch, ssh_push (an exposed tool, used by nc_regression phase 4 to push # cross-env input bundles) died "no open master" for any DIRECT-mode alias. if _alias_is_direct "$alias"; then local addr_port; addr_port=$(read_host_addr "$alias") [ -n "$addr_port" ] || die "no such alias: $alias" local _d_addr; _d_addr=$(printf '%s' "$addr_port" | cut -f1) local local_size; local_size=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0) if _direct_scp "$alias" "$local_path" "$_d_addr:$remote"; then local got got=$(coerce_int "$(_dispatch_remote "$alias" "wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" 0) if [ "$got" != "$local_size" ]; then die "partial transfer: local=$local_size bytes, remote=$got bytes ($alias:$remote)" fi ok "pushed $local_path → $alias:$remote ($got bytes, direct)" return 0 fi return 1 fi _resolve_open_master "$alias" # v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense. local local_size; local_size=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0) local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$") if scp -q \ -o "ControlPath=$_RH_SOCK" \ -o "ControlMaster=no" \ -o "BatchMode=yes" \ -P "$_RH_PORT" \ "$local_path" "$_RH_ADDR:$remote" 2>"$scp_err"; then # Validate via remote wc -c. local got # v0.7.5: coerce_int on wc output (Cygwin wc.exe CR-taint defense). got=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \ "wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" 0) if [ "$got" != "$local_size" ]; then rm -f "$scp_err" die "partial transfer: local=$local_size bytes, remote=$got bytes ($alias:$remote)" fi rm -f "$scp_err" ok "pushed $local_path → $alias:$remote ($got bytes)" return 0 fi local rc=$? printf 'ssh-helper: scp push failed (rc=%d):\n' "$rc" >&2 cat "$scp_err" >&2 2>/dev/null rm -f "$scp_err" return 1 } # pull-smat: smart pull for a Cloverleaf thread's .smatdb file. # Two modes: # Full pull: pull-smat # Locates $HCISITEDIR/exec/processes/*/.smatdb on the # remote via find, then scp's the entire .smatdb file. # Sampled: pull-smat # Runs sqlite3 server-side, extracts up to 1000 most-recent # messages from the last days, encodes each # MessageContent BLOB as base64, returns TSV: # unix_tsdirectiontypesourcedestmessage_blob_b64 # The schema (table=smat_msgs, columns Time/Type/SourceConn/ # DestConn/MessageContent) is the same one nc-msgs.sh uses. cmd_pull_smat() { local alias="${1:-}" site="${2:-}" thread="${3:-}" days_back="${4:-}" [ -n "$alias" ] && [ -n "$site" ] && [ -n "$thread" ] \ || die "usage: pull-smat [days_back]" # v0.8.17: in DIRECT mode every remote op is a fresh per-command sshpass # connection — there is no master to resolve. Only require an open master for # the (unchanged) multiplex path; both the find/sample command dispatches and # the full-file scp below pick the transport via the direct flag. if ! _alias_is_direct "$alias"; then _resolve_open_master "$alias" fi # Discover the remote .smatdb path. $HCISITEDIR/$HCIROOT are resolved by the # LOGIN shell (see _build_login_cmd) — the v0.8.13 fix — so we no longer # depend on a non-login rc happening to export them. SITEDIR falls back to # / if HCISITEDIR isn't set for that site. The find runs # remotely to avoid hard-coding process directory names. local find_cmd find_cmd='set -e; SDIR="${HCISITEDIR:-${HCIROOT:-}/'"$site"'}"; ' find_cmd+='[ -d "$SDIR" ] || { echo "ERROR: sitedir not found on remote: $SDIR" >&2; exit 2; }; ' find_cmd+='F=$(find "$SDIR/exec/processes" -maxdepth 2 -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); ' find_cmd+='[ -n "$F" ] || F=$(find "$SDIR" -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); ' find_cmd+='[ -n "$F" ] || { echo "ERROR: no smatdb found for thread '"$thread"' under $SDIR" >&2; exit 3; }; ' # v0.8.13 M1 hardening (Vera Minor #1): emit the resolved path behind an # unambiguous sentinel prefix instead of relying on it being the last stdout # line. A login shell (`bash -lc`, the v0.8.13 fix) is the case most likely to # print a MOTD/banner to stdout, which a blind `tail -1` would mistake for the # path. We grep for the sentinel line and strip it; only if no sentinel is # present (host somehow stripped it) do we fall back to the prior `tail -1` # behaviour, so this can never regress a host that worked before. find_cmd+='printf "SMATDB_PATH:%s\n" "$F"' local _smat_raw remote_smatdb # v0.8.15: honour a pinned HCIROOT (explicit export, no sudo-gated login profile). # v0.8.17: dispatch over DIRECT sshpass or the ControlMaster per the alias flag. # We capture stdout+stderr together (2>&1) as before — the SMATDB_PATH sentinel # / ERROR: parsing already tolerates banner/sudo noise interleaved on stderr, # so the direct path needs no extra filtering here. _smat_raw=$(_dispatch_remote "$alias" "$find_cmd" 2>&1) remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^SMATDB_PATH:' | tail -1) if [ -n "$remote_smatdb" ]; then remote_smatdb="${remote_smatdb#SMATDB_PATH:}" else # No sentinel — surface any ERROR: line if present, else fall back to the # last line (pre-hardening behaviour) so failure modes stay diagnosable. remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^ERROR:' | tail -1) [ -n "$remote_smatdb" ] || remote_smatdb=$(printf '%s\n' "$_smat_raw" | tail -1) fi case "$remote_smatdb" in ERROR:*|'') die "remote smatdb lookup failed: $remote_smatdb" ;; esac if [ -z "$days_back" ]; then # Full mode: scp the whole .smatdb file. local local_path local_path=$(_pull_cache_path "$alias" "$remote_smatdb") cmd_pull "$alias" "$remote_smatdb" "$local_path" return $? fi # Sampled mode: run sqlite3 on the remote, return TSV with b64-encoded blobs. # base64 -w0 is GNU coreutils; on BSD use plain base64 (no -w). We accept # whichever is present; the awk in the SQL pipeline strips internal newlines # for sturdy TSV. # # Output line shape (each message): # \t\t\t\t\t # `direction` is "in" when DestConn=thread, else "out" (best-effort heuristic). local sample_cmd sample_cmd='set -e; ' sample_cmd+='which sqlite3 >/dev/null 2>&1 || { echo "ERROR: sqlite3 not on remote PATH" >&2; exit 4; }; ' sample_cmd+='B64() { if base64 --help 2>&1 | grep -q -- " -w"; then base64 -w0; else base64 | tr -d "\n"; fi; }; ' # Note: sqlite3 ".mode tabs" prints rows tab-separated; we redirect blob via # writefile() into temp files, then base64 each. That avoids any binary # mangling in the sqlite3 -ascii path. Approach: select rowids, then for each # rowid pull MessageContent into a per-row temp file, b64 it inline. sample_cmd+='TMP=$(mktemp -d); trap "rm -rf $TMP" EXIT; ' sample_cmd+='CUTOFF_MS=$(( ( $(date +%s) - '"$days_back"' * 86400 ) * 1000 )); ' sample_cmd+='sqlite3 "'"$remote_smatdb"'" "SELECT rowid, Time, IFNULL(Type,\"\"), IFNULL(SourceConn,\"\"), IFNULL(DestConn,\"\") FROM smat_msgs WHERE Time >= $CUTOFF_MS ORDER BY Time DESC LIMIT 1000" ' sample_cmd+='| while IFS="|" read -r rid tm typ src dst; do ' sample_cmd+=' blobfile="$TMP/$rid.bin"; ' sample_cmd+=' sqlite3 "'"$remote_smatdb"'" "SELECT writefile(\"$blobfile\", MessageContent) FROM smat_msgs WHERE rowid=$rid" >/dev/null 2>&1; ' sample_cmd+=' if [ "$dst" = "'"$thread"'" ]; then dir="in"; else dir="out"; fi; ' sample_cmd+=' printf "%s\t%s\t%s\t%s\t%s\t" "$(( tm / 1000 ))" "$dir" "$typ" "$src" "$dst"; ' sample_cmd+=' B64 < "$blobfile"; ' sample_cmd+=' printf "\n"; ' sample_cmd+='done; ' sample_cmd+='TOTAL=$(sqlite3 "'"$remote_smatdb"'" "SELECT COUNT(*) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); ' sample_cmd+='RETURNED=$(sqlite3 "'"$remote_smatdb"'" "SELECT MIN(1000, COUNT(*)) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); ' sample_cmd+='echo "# smatdb=$(basename '"$remote_smatdb"') days_back='"$days_back"' total_in_window=$TOTAL returned=$RETURNED truncated=$([ "$TOTAL" -gt 1000 ] && echo yes || echo no)" >&2' # Login shell so sqlite3 resolves from the operator's PATH (v0.8.13), unless # the alias has a pinned HCIROOT, in which case we export HCIROOT explicitly # and skip the sudo-gated login profile (v0.8.15). Note: when pinned, sqlite3 # must be resolvable on the default non-login PATH; if it is not, the # sample_cmd already emits a clear "ERROR: sqlite3 not on remote PATH". # v0.8.17: dispatch over DIRECT sshpass or the ControlMaster per the alias flag. _dispatch_remote "$alias" "$sample_cmd" } case "${1:-help}" in hosts|list) shift; cmd_hosts ;; add) shift; cmd_add "$@" ;; remove|rm) shift; cmd_remove "$@" ;; pass|passwd) shift; cmd_pass "$@" ;; set-hciroot|hciroot) shift; cmd_set_hciroot "$@" ;; set-direct|direct) shift; cmd_set_direct "$@" ;; setup|open) shift; cmd_setup "$@" ;; close|exit) shift; cmd_close "$@" ;; status) shift; cmd_status "$@" ;; exec|run) shift; cmd_exec "$@" ;; discover) shift; cmd_discover "$@" ;; pull) shift; cmd_pull "$@" ;; push) shift; cmd_push "$@" ;; pull-smat) shift; cmd_pull_smat "$@" ;; -h|--help|help) cmd_help ;; *) die "unknown subcommand: ${1:-} (run with --help)" ;; esac