cloverleaf-larry/lib/ssh-helper.sh
Bryan Johnson 111be2c744 v0.8.26: harden control-byte sanitize across the tool suite + ssh-helper traps
Shared _sanitize_ctl (unconditional, nc-document) and _sanitize_ctl_tty
(strips only when stdout is a terminal) now live in cygwin-safe.sh. nc-msgs,
nc-parse, and the hl7-* tools route stdout through the tty-gated variant, so a
terminal is protected from raw HL7/NetConfig control bytes while pipes and
redirects stay byte-exact (the 0x1c framing route_test needs is preserved).
Exit codes propagate via PIPESTATUS. ssh-helper _read_hidden installs its
restore trap before stty -echo on every path and saves/restores the prior trap.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 16:35:06 -07:00

1212 lines
60 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# ssh-helper.sh — secure SSH command execution via ControlMaster.
#
# Architecture:
# • Hosts configured in $LARRY_HOME/.ssh-hosts.tsv as
# alias \t user@host \t port
# • Passwords stored at $LARRY_HOME/.ssh-creds/<alias>, mode 0600.
# The password file is the single point of truth — to rotate (daily-changing
# passwords) just overwrite the file with the new one and re-run 'setup'.
# • sshpass reads the password via -f (file), so it never lands in argv or
# environment where Larry the LLM (or other processes via /proc) could see it.
# • The first 'setup' call opens a long-lived SSH ControlMaster connection
# (default ControlPersist=8h). Subsequent 'exec' calls multiplex through
# the master socket and need no password.
# • Larry's tool layer only sees: alias, command, command_output.
# Never the password. Never the user@host (unless added to the alias list).
#
# Subcommands:
# hosts list configured hosts
# add <alias> <user@host[:port]> add a host to the alias list
# remove <alias> remove an alias (also clears cred + socket)
# pass <alias> set/update the password (hidden interactive)
# set-hciroot <alias> <path> pin (persist) $HCIROOT for an alias. When
# set, remote enumeration/exec runs with
# HCIROOT=<path> exported EXPLICITLY and
# WITHOUT the `bash -lc` login wrapper — for
# hosts whose login profile is sudo-gated or
# otherwise non-interactive (v0.8.15).
# Pass an empty path to clear the pin.
# set-direct <alias> on|off toggle (persist) DIRECT mode for an alias
# (v0.8.17). When ON, ALL remote ops for the
# alias BYPASS the ControlMaster and run a
# FRESH per-command sshpass connection with
# forced password auth. For hosts that reject
# SSH session multiplexing ("read from master
# failed: Connection reset by peer") — the
# master opens but multiplexed sessions die.
# The pinned HCIROOT (set-hciroot) is still
# honoured: the remote command is shaped by
# the same _remote_cmd_for path, just sent
# over the direct connection. NO traffic
# bypass — plain forced-password ssh, no
# proxy/tunnel/masking, host-key checked
# (accept-new). Persisted as TSV column 5.
# setup <alias> open ControlMaster (uses stored password ONCE).
# In DIRECT mode, skips the (pointless) master
# and instead VALIDATES the password with one
# trivial direct command, then reports ready.
# close <alias> close ControlMaster
# status [alias] show open masters / cred presence
# exec <alias> <command...> run command via master (returns output)
# discover <alias> auto-detect remote Cloverleaf env:
# resolves $HCIROOT (LOGIN shell), then
# enumerates sites (hcisitelist fast-path,
# NetConfig-walk fallback). Prints TSV:
# HCIROOT<TAB><path>
# SITE<TAB><name> (one per site)
# No nagging for paths — the remote's own
# login profile is the source of truth.
# pull <alias> <remote> [local] scp remote → local via existing master
# push <alias> <local> <remote> scp local → remote via existing master
# pull-smat <alias> <site> <thread> [days_back]
# pull a thread's smatdb (full) or sample
# recent messages from it (sampled, TSV b64)
# help print this help
set -u
set -o pipefail
LARRY_HOME="${LARRY_HOME:-$HOME/.larry}"
SSH_HOSTS_FILE="$LARRY_HOME/.ssh-hosts.tsv"
SSH_CREDS_DIR="$LARRY_HOME/.ssh-creds"
SSH_SOCKETS_DIR="$LARRY_HOME/.ssh-sockets"
SSH_CONTROL_PERSIST="${LARRY_SSH_CONTROL_PERSIST:-8h}"
die() { printf 'ssh-helper: %s\n' "$*" >&2; exit 1; }
warn() { printf 'ssh-helper: warn: %s\n' "$*" >&2; }
ok() { printf 'ssh-helper: %s\n' "$*"; }
# v0.8.25 — SAFE HIDDEN-PASSWORD READ (terminal-corruption fix).
# Previously the hidden-password prompts did a bare `stty -echo` ... read ...
# `stty echo`. If the user hit Ctrl-C (or the read got an EOF/signal) BETWEEN
# those two stty calls, echo was never re-enabled and the terminal was left
# corrupted (typing invisible) — recoverable only with `stty sane`/`reset`.
# This wrapper SAVES the full prior termios state with `stty -g` and restores
# it via a trap on EXIT/INT/TERM/HUP, so any interrupt path restores the tty.
# Reads from /dev/tty (the real terminal), not stdin. Portable: `stty -g`/`stty <state>`
# is POSIX and works on AIX/Linux/BSD/Cygwin; no GNU-only flags.
# _read_hidden VARNAME
# Sets the named variable to the line read (no echo). Returns 0 always (empty
# input is the caller's "abort" signal).
_read_hidden() { # varname
local _rh_var="$1" _rh_val="" _rh_saved="" _rh_prior_trap=""
if command -v stty >/dev/null 2>&1 && { [ -t 0 ] || [ -e /dev/tty ]; }; then
_rh_saved=$(stty -g </dev/tty 2>/dev/null || stty -g 2>/dev/null || true)
# v0.8.26 nit (b): capture the caller's PRIOR INT/TERM/HUP trap so we can
# RESTORE it on the way out instead of blindly resetting to default. `trap -p`
# prints the re-installable trap commands (empty if none was set). Harmless
# today (no caller sets these around the prompt) but correct.
_rh_prior_trap=$(trap -p INT TERM HUP 2>/dev/null || true)
# v0.8.26 nit (a): install a restore trap BEFORE touching echo, on EVERY
# path. If `stty -g` saved a state, restore exactly that; if the save failed
# (_rh_saved empty), fall back to `stty echo` so a ^C mid-read can never
# leave the terminal with echo off. Previously the trap was only installed
# when _rh_saved was non-empty, yet `stty -echo` ran regardless — an
# interrupt in that window left the tty corrupted.
if [ -n "$_rh_saved" ]; then
trap 'stty "$_rh_saved" </dev/tty 2>/dev/null || stty "$_rh_saved" 2>/dev/null' INT TERM HUP
else
trap 'stty echo </dev/tty 2>/dev/null || stty echo 2>/dev/null' INT TERM HUP
fi
stty -echo </dev/tty 2>/dev/null || stty -echo 2>/dev/null || true
IFS= read -r _rh_val </dev/tty 2>/dev/null || IFS= read -r _rh_val || true
if [ -n "$_rh_saved" ]; then
stty "$_rh_saved" </dev/tty 2>/dev/null || stty "$_rh_saved" 2>/dev/null || true
else
stty echo </dev/tty 2>/dev/null || stty echo 2>/dev/null || true
fi
# Restore the caller's prior trap (or clear ours if there was none).
if [ -n "$_rh_prior_trap" ]; then
eval "$_rh_prior_trap"
else
trap - INT TERM HUP
fi
else
IFS= read -r _rh_val </dev/tty 2>/dev/null || IFS= read -r _rh_val || true
fi
# Assign back to the caller's variable name without eval-injection risk.
printf -v "$_rh_var" '%s' "$_rh_val"
}
# v0.7.5: shared CR-safety primitives. pull/push use `wc -c | tr -d ' '` to
# verify byte counts — Cygwin wc.exe can pass through \r and tank the
# `[ "$got" != "$local_size" ]` comparison.
_SSH_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
if [ -r "$_SSH_LIB_DIR/cygwin-safe.sh" ]; then
# shellcheck disable=SC1090,SC1091
. "$_SSH_LIB_DIR/cygwin-safe.sh"
else
coerce_int() { local r="${1:-}" d="${2:-0}" c; c=$(printf '%s' "$r" | tr -cd '0-9'); printf '%s' "${c:-$d}"; }
fi
ensure_layout() {
mkdir -p "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null
chmod 700 "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null || true
if [ ! -f "$SSH_HOSTS_FILE" ]; then
umask 077
# v0.8.15: 4th column = pinned HCIROOT (optional). Older 3-column files stay
# valid — readers treat a missing $4 as "no pin".
# v0.8.17: 5th column = direct flag (on|off, optional). Older 3-/4-column
# files stay valid — readers treat a missing/empty $5 as "off" (master mode).
printf 'alias\taddr\tport\thciroot\tdirect\n' > "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
fi
}
# read_host_addr ALIAS → echoes "ADDR\tPORT" or empty
read_host_addr() {
local alias="$1"
[ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 1; }
awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $2 "\t" $3; exit }' < "$SSH_HOSTS_FILE"
}
# read_host_hciroot ALIAS → echoes the pinned HCIROOT (column 4) or empty.
# v0.8.15: a non-empty value means remote commands for this alias run with
# HCIROOT exported explicitly and WITHOUT the `bash -lc` login wrapper.
read_host_hciroot() {
local alias="$1"
[ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 0; }
awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $4; exit }' < "$SSH_HOSTS_FILE"
}
# read_host_direct ALIAS → echoes "on" if DIRECT mode is set (column 5 == on),
# else empty. v0.8.17: when on, ALL remote ops for the alias bypass the
# ControlMaster and run a fresh per-command sshpass connection (for hosts that
# reject session multiplexing). Missing/empty/anything-but-"on" → master mode.
read_host_direct() {
local alias="$1"
[ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 0; }
awk -F'\t' -v a="$alias" 'NR>1 && $1==a && $5=="on" { print "on"; exit }' < "$SSH_HOSTS_FILE"
}
# _alias_is_direct ALIAS → returns 0 (true) if the alias is in DIRECT mode.
_alias_is_direct() {
[ "$(read_host_direct "$1")" = "on" ]
}
require_sshpass() {
command -v sshpass >/dev/null 2>&1 \
|| die "sshpass not on PATH — install it (apt install sshpass / brew install sshpass) and retry"
}
cmd_help() {
sed -n '4,65p' "$0"
}
cmd_hosts() {
ensure_layout
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint would tank
# the `-le 1` integer test below.
if [ "$(coerce_int "$(wc -l < "$SSH_HOSTS_FILE")" 0)" -le 1 ]; then
echo "no hosts configured. Add with: ssh-helper.sh add <alias> <user@host[:port]>"
return 0
fi
printf 'alias user@host port cred master direct hciroot-pin\n'
printf '%s\n' '───── ───────── ──── ──── ────── ────── ───────────'
awk -F'\t' 'NR>1' "$SSH_HOSTS_FILE" | while IFS=$'\t' read -r alias addr port hciroot direct; do
local cred_state=""
[ -f "$SSH_CREDS_DIR/$alias" ] && cred_state="✓"
# v0.8.17: in DIRECT mode the master column reads "n/a" — there is no master
# to probe (and probing it would be a meaningless socket check). The direct
# column shows on/.
local direct_state=""
[ "$direct" = "on" ] && direct_state="on"
local master_state=""
if [ "$direct" = "on" ]; then
master_state="n/a"
else
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
master_state="open"
fi
fi
printf '%-20s%-52s%-6s%-6s%-8s%-8s%s\n' "$alias" "$addr" "${port:-22}" "$cred_state" "$master_state" "$direct_state" "${hciroot:-}"
done
}
cmd_add() {
local alias="${1:-}" target="${2:-}"
[ -n "$alias" ] && [ -n "$target" ] || die "usage: add <alias> <user@host[:port]>"
[[ "$target" =~ ^[^@[:space:]]+@[^:[:space:]]+(:[0-9]+)?$ ]] \
|| die "target must look like user@host or user@host:port"
local addr port
if [[ "$target" == *:* ]]; then
addr="${target%:*}"
port="${target##*:}"
else
addr="$target"
port="22"
fi
ensure_layout
# Reject duplicates (use 'remove' first)
if awk -F'\t' -v a="$alias" 'NR>1 && $1==a { found=1; exit } END { exit !found }' "$SSH_HOSTS_FILE"; then
die "alias '$alias' already exists. Use 'remove $alias' first."
fi
umask 077
# v0.8.15: write an empty 4th (hciroot) field so the row layout is uniform.
# v0.8.17: write an empty 5th (direct) field too — uniform 5-column rows.
printf '%s\t%s\t%s\t%s\t%s\n' "$alias" "$addr" "$port" "" "" >> "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
ok "added $alias$addr (port $port). Next: ssh-helper.sh pass $alias"
}
# cmd_set_hciroot ALIAS [PATH] — pin (or clear) the HCIROOT for an alias.
# Persisted as column 4 of the hosts TSV. An empty/omitted PATH clears the pin.
# When set, cmd_exec/cmd_discover/cmd_pull_smat run remote commands with
# HCIROOT=<path> exported EXPLICITLY and WITHOUT the `bash -lc` login wrapper —
# the v0.8.15 fix for hosts whose login profile is sudo-gated (a non-interactive
# SSH session hits `sudo: a terminal is required` and never exports $HCIROOT).
cmd_set_hciroot() {
local alias="${1:-}" newroot="${2:-}"
[ -n "$alias" ] || die "usage: set-hciroot <alias> <path> (empty path clears the pin)"
ensure_layout
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)"
# Rewrite the row in place, setting/replacing column 4. awk handles rows that
# still have only 3 columns (legacy) by assigning $4 directly. v0.8.17: also
# backfill the column-5 (direct) header so the layout stays uniform; existing
# column-5 values on data rows are preserved untouched (we only touch $4).
local tmp; tmp=$(mktemp)
awk -F'\t' -v OFS='\t' -v a="$alias" -v r="$newroot" '
NR==1 { if (NF < 4) { $4="hciroot" } if (NF < 5) { $5="direct" } print; next }
$1==a { $4=r; print; next }
{ print }
' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
if [ -n "$newroot" ]; then
ok "pinned HCIROOT for $alias$newroot"
ok " (remote enumeration/exec for $alias will export HCIROOT explicitly and SKIP the login profile)"
else
ok "cleared HCIROOT pin for $alias (reverting to login-shell \$HCIROOT resolution)"
fi
}
# cmd_set_direct ALIAS on|off — toggle (or clear) DIRECT mode for an alias.
# Persisted as column 5 of the hosts TSV. v0.8.17.
#
# When ON, cmd_exec/cmd_discover/cmd_pull_smat run remote commands over a FRESH
# per-command sshpass connection (forced password auth) instead of multiplexing
# through a ControlMaster socket — the fix for hosts (e.g. qa → shdclvf01q) where
# the master opens & authenticates fine but any multiplexed session dies with
# "read from master failed: Connection reset by peer". The pinned HCIROOT (set
# via set-hciroot) is still honoured — the remote command is shaped by the same
# _remote_cmd_for path; only the dispatch (direct vs master socket) changes.
cmd_set_direct() {
local alias="${1:-}" mode="${2:-}"
[ -n "$alias" ] || die "usage: set-direct <alias> on|off"
# Trim surrounding whitespace so a trailing-space arg from the slash path
# (e.g. `/ssh-set-direct qa on `) still normalizes cleanly to on|off.
mode="${mode#"${mode%%[![:space:]]*}"}" # leading
mode="${mode%"${mode##*[![:space:]]}"}" # trailing
case "$mode" in
on|ON|On) mode="on" ;;
off|OFF|Off|'') mode="" ;; # empty/off → clear the flag (master mode)
*) die "usage: set-direct <alias> on|off (got: $mode)" ;;
esac
ensure_layout
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)"
# Rewrite the row in place, setting/replacing column 5. awk backfills the
# column-4 (hciroot) and column-5 (direct) headers for legacy <5-column files,
# and pads a matching data row to 5 columns before assigning $5 so we never
# clobber a pinned HCIROOT in column 4.
local tmp; tmp=$(mktemp)
awk -F'\t' -v OFS='\t' -v a="$alias" -v m="$mode" '
NR==1 { if (NF < 4) { $4="hciroot" } if (NF < 5) { $5="direct" } print; next }
$1==a { if (NF < 4) { $4="" } $5=m; print; next }
{ print }
' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
if [ "$mode" = "on" ]; then
ok "DIRECT mode ON for $alias"
ok " (all remote ops bypass the ControlMaster — fresh per-command sshpass, forced password auth)"
ok " next: ssh-helper.sh setup $alias (validates the password; no master opened in direct mode)"
else
ok "DIRECT mode OFF for $alias (reverting to ControlMaster multiplexing)"
fi
}
cmd_remove() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: remove <alias>"
ensure_layout
local tmp; tmp=$(mktemp)
awk -F'\t' -v a="$alias" 'NR==1 || $1!=a' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
# Close + clean master socket and cred
cmd_close "$alias" 2>/dev/null || true
rm -f "$SSH_CREDS_DIR/$alias" "$SSH_SOCKETS_DIR/$alias.sock" 2>/dev/null
ok "removed $alias (cred + socket cleared)"
}
cmd_pass() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: pass <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)"
ensure_layout
printf 'Password for %s (input is hidden; press Enter when done): ' "$alias" >&2
local pw=""
_read_hidden pw
echo "" >&2
[ -n "$pw" ] || die "no password entered"
umask 077
# NO trailing newline — sshpass -f expects raw password as full file content
printf '%s' "$pw" > "$SSH_CREDS_DIR/$alias"
chmod 600 "$SSH_CREDS_DIR/$alias"
ok "password saved to $SSH_CREDS_DIR/$alias (mode 0600). Next: ssh-helper.sh setup $alias"
}
cmd_setup() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: setup <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
ensure_layout
# v0.8.17: DIRECT mode — opening a ControlMaster is pointless (the box rejects
# multiplexing). Instead, VALIDATE that the stored password authenticates by
# running one trivial direct command, then report ready. No master socket is
# created. This makes Bryan's flow:
# /ssh-pass <a> → /ssh-set-hciroot <a> <path> → /ssh-set-direct <a> on → /sites <a>
if _alias_is_direct "$alias"; then
local credfile="$SSH_CREDS_DIR/$alias"
[ -f "$credfile" ] || die "no password set for $alias — run 'pass $alias' first"
require_sshpass
ok "DIRECT mode for $alias ($addr:$port) — validating the stored password (no master in direct mode)..."
local errfile; errfile=$(mktemp 2>/dev/null || echo "/tmp/larry-ssh-direct-setup.err.$$")
# A trivial, side-effect-free probe. Forced password auth, host-key checked,
# no master. STDERR (banner/sudo) is captured for failure diagnosis only.
# v0.8.18: shared DIRECT options via _direct_ssh_opts (was an inline copy).
sshpass -f "$credfile" ssh \
$(_direct_ssh_opts) \
-p "$port" \
"$addr" 'true' 2>"$errfile"
local vrc=$?
if [ "$vrc" -eq 0 ]; then
rm -f "$errfile"
ok "✓ direct auth OK: $alias$addr:$port (no master; ready for /sites $alias)"
return 0
fi
printf 'ssh-helper: direct validation FAILED for %s (rc=%d).\n' "$alias" "$vrc" >&2
local filtered; filtered=$(_filter_direct_stderr < "$errfile")
if [ -n "$filtered" ]; then
printf 'ssh-helper: remote stderr (benign banner/sudo lines stripped):\n' >&2
printf '%s\n' "$filtered" >&2
else
printf 'ssh-helper: no non-benign stderr — almost certainly the stored password is stale/rotated. Re-run: ssh-helper.sh pass %s\n' "$alias" >&2
fi
rm -f "$errfile"
return 1
fi
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
ok "master already open for $alias ($addr:$port)"
return 0
fi
local credfile="$SSH_CREDS_DIR/$alias"
[ -f "$credfile" ] || die "no password set for $alias — run 'pass $alias' first"
require_sshpass
ok "opening ssh master for $alias ($addr:$port) — ControlPersist=$SSH_CONTROL_PERSIST..."
# _try_master_open — one attempt with the stored credential. Returns 0 on a
# verified-open master; non-zero otherwise. Stderr from sshpass/ssh lands in
# the file named by $1 so the caller can classify it.
#
# v0.8.15 hardening (banner + rotating-password):
# • -o PreferredAuthentications=password -o PubkeyAuthentication=no forces the
# password method so sshpass feeds the password cleanly. Without this, on a
# box that prints a long pre-auth banner and would otherwise try pubkey
# first, ssh can consume the password slot on the wrong method and the only
# thing surfaced is the banner with NO "permission denied" — exactly the
# symptom seen on shdclvf01q.
# • -o NumberOfPasswordPrompts=1 so a stale password fails fast (one prompt)
# instead of hanging, which lets us re-prompt for the rotated one.
_try_master_open() {
local errfile="$1"
sshpass -f "$credfile" ssh \
-o "ControlMaster=yes" \
-o "ControlPath=$sock" \
-o "ControlPersist=$SSH_CONTROL_PERSIST" \
-o "StrictHostKeyChecking=accept-new" \
-o "PreferredAuthentications=password" \
-o "PubkeyAuthentication=no" \
-o "NumberOfPasswordPrompts=1" \
-o "ConnectTimeout=10" \
-p "$port" \
-N -f \
"$addr" 2>"$errfile"
local rc=$?
[ "$rc" -eq 0 ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null
}
# _looks_like_auth_failure ERRFILE — heuristic: did this fail on auth (vs.
# network/host-key)? sshpass exits 5 on auth failure, but the banner can mask
# the textual reason, so we also treat permission/password/auth keywords as
# auth failures. A rotated password is the prime suspect on this box.
_looks_like_auth_failure() {
local errfile="$1"
grep -qiE 'permission denied|authentication fail|incorrect password|too many authentication|password:' "$errfile" 2>/dev/null && return 0
# Empty-or-banner-only stderr after a password attempt → almost always the
# rotated/stale credential. Treat as auth failure so we re-prompt.
return 0
}
local errfile="/tmp/larry-ssh-setup.err"
: > "$errfile"
if _try_master_open "$errfile"; then
ok "✓ master open: $alias$addr:$port (socket: $sock)"
rm -f "$errfile"
return 0
fi
# First attempt failed. Surface the REAL error (not just the banner) and, if it
# looks like an auth failure, re-prompt for a fresh password (12h rotation on
# this box) and retry ONCE. Never silently no-op.
printf 'ssh-helper: first master-open attempt failed for %s.\n' "$alias" >&2
if [ -s "$errfile" ]; then
printf 'ssh-helper: ssh/sshpass stderr (auth error, not just the banner):\n' >&2
grep -iE 'permission denied|authentication|password|denied|fatal|connection|timed out|refused|host key' "$errfile" >&2 2>/dev/null \
|| cat "$errfile" >&2 2>/dev/null
else
printf 'ssh-helper: (no stderr captured — the box likely printed only its pre-auth banner; the stored password is almost certainly stale)\n' >&2
fi
if _looks_like_auth_failure "$errfile" && [ -t 0 -o -e /dev/tty ]; then
printf 'ssh-helper: looks like the stored password is stale (this host rotates ~every 12h).\n' >&2
printf 'Enter a FRESH password for %s (input hidden; Enter to abort): ' "$alias" >&2
local pw=""
_read_hidden pw
echo "" >&2
if [ -n "$pw" ]; then
umask 077
printf '%s' "$pw" > "$credfile" # NO trailing newline (sshpass -f)
chmod 600 "$credfile"
ok "stored the fresh password — retrying master open..."
: > "$errfile"
if _try_master_open "$errfile"; then
ok "✓ master open: $alias$addr:$port (socket: $sock)"
rm -f "$errfile"
return 0
fi
printf 'ssh-helper: retry with the fresh password ALSO failed. ssh/sshpass stderr:\n' >&2
cat "$errfile" >&2 2>/dev/null
else
printf 'ssh-helper: no password entered — aborting.\n' >&2
fi
fi
printf 'ssh-helper: master NOT open for %s. Next step: re-run `ssh-helper.sh setup %s` (or the /ssh-setup %s slash command) with a current password; if the host changed, re-check `ssh-helper.sh hosts`.\n' \
"$alias" "$alias" "$alias" >&2
rm -f "$errfile"
return 1
}
cmd_close() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: close <alias>"
local addr_port; addr_port=$(read_host_addr "$alias") || addr_port=""
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && [ -n "$addr_port" ]; then
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
ssh -S "$sock" -O exit -p "$port" "$addr" 2>/dev/null || true
fi
rm -f "$sock"
ok "closed master for $alias"
}
cmd_status() {
ensure_layout
if [ -n "${1:-}" ]; then
local alias="$1"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
printf 'alias: %s\naddr: %s\nport: %s\ncred: %s\nsocket: %s\nstatus: ' \
"$alias" "$addr" "$port" \
"$([ -f "$SSH_CREDS_DIR/$alias" ] && echo present || echo missing)" \
"$sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
echo "master OPEN"
else
echo "no master (run setup)"
fi
return 0
fi
cmd_hosts
}
# v0.8.13 (Cloverleaf login-shell fix): exec defaults to a LOGIN shell.
#
# Root cause of the "qa keeps asking me for $HCIROOT" friction: a plain
# ssh host 'cmd'
# runs a NON-interactive, NON-login shell. On a Cloverleaf host, $HCIROOT (and
# $HCISITE, the hci* binaries on PATH, etc.) are exported by the LOGIN profile
# (/etc/profile.d, the hci user's ~/.profile / ~/.bash_profile, the per-site
# `.profile`). A non-login shell never sources those, so $HCIROOT arrives empty
# and Larry used to give up and nag the user for a path. Wrapping the command in
# `bash -lc` forces a login shell, so the Cloverleaf environment populates
# exactly as it does for an interactive operator login. This is the version-
# agnostic, no-config fix — it works on any Cloverleaf host whose operator login
# sets up the environment (i.e. all of them).
#
# Escape hatch: prefix the command with the literal token NOLOGIN<space> (or set
# LARRY_SSH_NO_LOGIN=1) to run a bare non-login shell — for the rare host where
# the login profile is interactive-only and hangs a non-tty `bash -l`.
_build_login_cmd() {
# $1 = raw command string. Echoes the command to hand to ssh.
local raw="$1"
case "$raw" in
NOLOGIN\ *) printf '%s' "${raw#NOLOGIN }"; return ;;
esac
[ "${LARRY_SSH_NO_LOGIN:-0}" = "1" ] && { printf '%s' "$raw"; return; }
# Single-quote the payload for a robust `bash -lc '<payload>'`. Embedded
# single quotes become '\'' (close, escaped-quote, reopen) — POSIX-portable.
local esc; esc=$(printf '%s' "$raw" | sed "s/'/'\\\\''/g")
printf "bash -lc '%s'" "$esc"
}
# v0.8.15 (sudo-gated-profile fix): when an alias has a pinned HCIROOT, the
# remote command must NOT go through the login profile (`bash -lc`). On hosts
# whose login profile is sudo-gated, a non-interactive SSH session trips
# `sudo: a terminal is required`, the profile never finishes, and $HCIROOT comes
# back EMPTY. Instead we export HCIROOT explicitly and run a plain `sh -c` (no
# login profile, no tty needed). This is deterministic and version-agnostic.
#
# _shq STR → single-quote STR for safe embedding inside another '...' context.
_shq() { printf '%s' "$1" | sed "s/'/'\\\\''/g"; }
# _build_pinned_cmd HCIROOT RAW → a remote command string that exports HCIROOT
# explicitly (and HCISITEDIR-friendly callers can derive from it) then runs RAW
# under a NON-login `sh -c`. No `bash -lc`, so the sudo-gated profile is skipped.
_build_pinned_cmd() {
local root="$1" raw="$2"
local esc; esc=$(_shq "$raw")
printf "sh -c 'HCIROOT=%s; export HCIROOT; %s'" "$(_shq "$root")" "$esc"
}
# _remote_cmd_for ALIAS RAW → echo the exact command string to hand to ssh.
# If ALIAS has a pinned HCIROOT → pinned (explicit-export, no login profile).
# Else → the existing login-shell wrapper (_build_login_cmd). Single chokepoint
# so cmd_exec/cmd_discover/cmd_pull_smat all honour the pin identically.
_remote_cmd_for() {
local alias="$1" raw="$2"
local pin; pin=$(read_host_hciroot "$alias")
if [ -n "$pin" ]; then
_build_pinned_cmd "$pin" "$raw"
else
_build_login_cmd "$raw"
fi
}
# ── v0.8.17: DIRECT (no-multiplex) dispatch ──────────────────────────────────
#
# Some Cloverleaf hosts reject SSH ControlMaster session multiplexing: the master
# opens and authenticates, but every session multiplexed over it dies with
# "read from master failed: Connection reset by peer", then ssh falls back to a
# fresh connection that fails auth. Confirmed live on qa (bryjohnx@lhsixfqa →
# shdclvf01q, cis2025.01). The fix is to run each remote command as its OWN
# fresh ssh connection with forced password auth (sshpass -f <credfile>) — NO
# master socket. This is legitimate password auth, NOT a traffic bypass: no
# proxy, no tunnel, no masking, and host-key checking stays on (accept-new).
#
# _DIRECT_CONNECT_TIMEOUT — seconds for ssh ConnectTimeout (env-overridable).
_DIRECT_CONNECT_TIMEOUT="${LARRY_SSH_DIRECT_TIMEOUT:-10}"
# _direct_ssh_opts → emit the shared ssh/scp `-o` option tokens for every DIRECT
# (no-ControlMaster) transport, one token per word, on STDOUT. v0.8.18: extracted
# so the security posture lives in ONE place and a change can't drift across the
# three call sites (cmd_setup probe, _run_direct, _direct_scp). The five
# security-critical flags are:
# PreferredAuthentications=password — force the password method so sshpass
# feeds the password cleanly past a banner
# PubkeyAuthentication=no — never silently fall back to a key
# StrictHostKeyChecking=accept-new — host-key checking STAYS ON (TOFU). This
# is the no-traffic-bypass guarantee: we
# never disable host verification.
# ControlMaster=no / ControlPath=none — never multiplex (the box rejects it)
# Plus two shared connection knobs identical across all three callers:
# NumberOfPasswordPrompts=1 — a stale password fails fast (one prompt)
# ConnectTimeout=$_DIRECT_CONNECT_TIMEOUT
# ssh `-o` ordering is immaterial (no conflicting duplicate keys), so emitting
# these as one contiguous block is byte-equivalent in BEHAVIOR to the prior
# inline copies. Callers splat the words unquoted: `ssh $(_direct_ssh_opts) ...`.
# Every token here is a single shell word (no spaces inside any -o value), so the
# unquoted expansion is safe.
_direct_ssh_opts() {
printf '%s\n' \
-o "PreferredAuthentications=password" \
-o "PubkeyAuthentication=no" \
-o "NumberOfPasswordPrompts=1" \
-o "StrictHostKeyChecking=accept-new" \
-o "ControlMaster=no" \
-o "ControlPath=none" \
-o "ConnectTimeout=$_DIRECT_CONNECT_TIMEOUT"
}
# _direct_creds ALIAS → echoes the credfile path (the file /ssh-pass writes),
# or empty (and warns) if absent. Same file the ControlMaster path uses.
_direct_creds() {
local alias="$1"
local credfile="$SSH_CREDS_DIR/$alias"
[ -f "$credfile" ] && { printf '%s' "$credfile"; return 0; }
printf ''
return 1
}
# _filter_direct_stderr — strip known-benign noise from a direct session's
# STDERR so the parsed STDOUT result is presented clean. The qa login profile
# emits a pre-auth banner ("Unauthorized access…/monitored", "WARNING", etc.)
# AND `sudo: a terminal is required` / `sudo: a password is required` /
# `sudo: no tty present` on STDERR for non-interactive sessions. Those are
# expected and harmless for our read-only enumeration — drop them. ANYTHING
# ELSE that remains is a real signal and is surfaced by the caller, but ONLY on
# an actual non-zero command failure (see _run_direct). Reads stderr on stdin;
# echoes the filtered remainder. The patterns are intentionally narrow so we
# never swallow a genuine error message.
_filter_direct_stderr() {
grep -ivE \
'unauthorized (access|use)|access is monitored|monitored and recorded|this (system|computer|is a) .*(private|restricted|government|corporate)|by (logging in|accessing|using) .*(you )?(consent|agree)|all activ(ity|ities) .*(may be|are) (monitored|logged|recorded)|disconnect immediately|^[[:space:]]*\*+[[:space:]]*$|^[[:space:]]*WARNING[[:space:]]*[:!]?|sudo: a terminal is required|sudo: a password is required|sudo: no tty present|sudo: sorry, you must have a tty' \
2>/dev/null || true
}
# _run_direct ALIAS REMOTE_CMD → run REMOTE_CMD on ALIAS over a FRESH per-command
# sshpass connection (no ControlMaster). REMOTE_CMD must already be shaped by
# _remote_cmd_for (so the HCIROOT pin / login-shell wrapper is honoured). STDOUT
# is passed through verbatim (the parsed-clean result). STDERR is captured,
# filtered for the known-benign banner+sudo lines, and surfaced ONLY when the
# remote command exits non-zero. Returns the remote command's exit code.
_run_direct() {
local alias="$1" remote_cmd="$2"
require_sshpass
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local credfile; credfile=$(_direct_creds "$alias") \
|| die "no password set for $alias — run 'pass $alias' first (direct mode needs the stored credential per command)"
local errfile; errfile=$(mktemp 2>/dev/null || echo "/tmp/larry-ssh-direct.err.$$")
# NO ControlMaster/ControlPath. Forced password method so sshpass feeds the
# password cleanly past any pre-auth banner (same rationale as the master
# path's v0.8.15 PreferredAuthentications=password hardening). BatchMode is
# NOT set — sshpass supplies the password non-interactively via the askpass
# file descriptor; BatchMode would suppress that path on some builds.
# v0.8.18: shared DIRECT options via _direct_ssh_opts (was an inline copy).
sshpass -f "$credfile" ssh \
$(_direct_ssh_opts) \
-p "$port" \
"$addr" "$remote_cmd" 2>"$errfile"
local rc=$?
# On a real failure, surface the FILTERED stderr (banner + sudo noise removed)
# so the operator sees the genuine reason without the boilerplate. On success,
# the benign noise is simply dropped — stdout is already the clean result.
if [ "$rc" -ne 0 ]; then
local filtered; filtered=$(_filter_direct_stderr < "$errfile")
if [ -n "$filtered" ]; then
printf 'ssh-helper: direct command failed for %s (rc=%d). Remote stderr (benign banner/sudo lines stripped):\n' "$alias" "$rc" >&2
printf '%s\n' "$filtered" >&2
else
printf 'ssh-helper: direct command failed for %s (rc=%d) with no non-benign stderr — likely an auth failure (stale/rotated password?) or a connection reset. Re-check: ssh-helper.sh setup %s\n' "$alias" "$rc" "$alias" >&2
fi
fi
rm -f "$errfile"
return "$rc"
}
# _direct_scp ALIAS SRC DST → scp SRC→DST over a FRESH sshpass connection (no
# ControlMaster), forced password auth, host-key checked. Either SRC or DST is a
# remote spec of the form "<addr>:<path>" supplied by the caller (cmd_pull builds
# it). Returns scp's exit code. STDERR (incl. banner/sudo noise) is filtered the
# same way as _run_direct and surfaced only on failure. v0.8.17.
_direct_scp() {
local alias="$1" src="$2" dst="$3"
require_sshpass
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local port; port=$(printf '%s' "$addr_port" | cut -f2)
local credfile; credfile=$(_direct_creds "$alias") \
|| die "no password set for $alias — run 'pass $alias' first"
local errfile; errfile=$(mktemp 2>/dev/null || echo "/tmp/larry-scp-direct.err.$$")
# v0.8.18: shared DIRECT options via _direct_ssh_opts (was an inline copy).
# scp reads the same ssh-style -o options; only the port flag differs (-P).
sshpass -f "$credfile" scp -q \
$(_direct_ssh_opts) \
-P "$port" \
"$src" "$dst" 2>"$errfile"
local rc=$?
if [ "$rc" -ne 0 ]; then
local filtered; filtered=$(_filter_direct_stderr < "$errfile")
printf 'ssh-helper: direct scp failed for %s (rc=%d):\n' "$alias" "$rc" >&2
[ -n "$filtered" ] && printf '%s\n' "$filtered" >&2
fi
rm -f "$errfile"
return "$rc"
}
# _dispatch_remote ALIAS RAW_CMD → run RAW_CMD on ALIAS, choosing the transport:
# DIRECT mode (column 5 == on) → fresh per-command sshpass (_run_direct)
# else → existing ControlMaster multiplex
# In BOTH cases the remote command is shaped identically by _remote_cmd_for, so
# the HCIROOT pin and login-shell semantics are unchanged across transports.
# Requires (master mode only) that the master is open — the master-mode branch
# preserves the prior die-on-closed-master behaviour exactly.
_dispatch_remote() {
local alias="$1" raw="$2"
local shaped; shaped=$(_remote_cmd_for "$alias" "$raw")
if _alias_is_direct "$alias"; then
_run_direct "$alias" "$shaped"
return $?
fi
# ── ControlMaster path (unchanged for non-direct aliases) ────────────────
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
die "no open master for $alias — run 'setup $alias' first"
fi
ssh -S "$sock" -p "$port" -o BatchMode=yes "$addr" "$shaped"
}
cmd_exec() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: exec <alias> <command...>"
shift
local cmd="$*"
[ -n "$cmd" ] || die "no command given"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
# v0.8.17: transport selection is centralised in _dispatch_remote.
# • DIRECT mode → a fresh per-command sshpass connection (no master), with
# benign banner/sudo stderr stripped and real errors surfaced on failure.
# • else → the existing ControlMaster multiplex (no password needed).
# In both cases the remote command is shaped identically: a pinned HCIROOT is
# exported explicitly + login profile skipped (v0.8.15); otherwise a login
# shell populates $HCIROOT et al. (see _remote_cmd_for / _build_login_cmd).
_dispatch_remote "$alias" "$cmd"
}
# cmd_discover ALIAS — proactively detect the remote Cloverleaf environment.
# Resolves $HCIROOT in a LOGIN shell, then enumerates sites two ways:
# 1. hcisitelist (the Cloverleaf-shipped site lister) if it's on the login PATH
# 2. NetConfig walk under $HCIROOT (version-agnostic ground truth — the same
# "a site is a dir with a NetConfig" rule each-site.sh uses)
# Emits TSV to stdout the tool layer can parse deterministically:
# HCIROOT<TAB><path> (or HCIROOT<TAB> if unresolved)
# SITE<TAB><name> (zero or more)
# Never prompts; on failure it emits what it could resolve + a NOTE line.
cmd_discover() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: discover <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
# v0.8.17: in DIRECT mode there is no master to check — _dispatch_remote runs a
# fresh per-command sshpass connection below. Only validate an open master for
# the (unchanged) multiplex path.
if ! _alias_is_direct "$alias"; then
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
die "no open master for $alias — run 'setup $alias' first"
fi
fi
# A single remote script. It:
# - prints HCIROOT\t$HCIROOT
# - PRIMARY enumeration = the NetConfig walk under $HCIROOT (depth ≤2),
# IDENTICAL to lib/each-site.sh: find NetConfig files → dirname → basename
# → sort -u. This is the version-agnostic ground truth and works on a box
# with NO `hcisitelist` (v0.8.15 portability fix — confirmed: shdclvf01q
# has no hcisitelist).
# - `hcisitelist` is used ONLY if it is actually present AND the walk found
# nothing (belt-and-suspenders), never as the dependency.
# Kept POSIX-sh so it runs under whatever /bin/sh spawns it.
#
# NOTE on environment: when the alias has a pinned HCIROOT, _remote_cmd_for
# exports HCIROOT explicitly and runs this under a NON-login `sh -c` (skips the
# sudo-gated login profile). Otherwise it runs under `bash -lc` so the login
# profile populates $HCIROOT. Either way the script below only reads
# ${HCIROOT:-}, so it is agnostic to which path delivered it.
# v0.8.15 (list-sites exclusion): drop non-real entries from the enumeration so
# /sites shows only operator-meaningful sites. Two filters, applied at the walk
# source (so REMOTE pinned, REMOTE login-shell, and LOCAL all behave the same):
# 1. SITES_EXCLUDE — static scaffolding/special dirs (helloworld, siteProto,
# master). A documented, tunable env var: Bryan can override at call time
# via `SITES_EXCLUDE='...' discover <alias>` without a config UI.
# 2. Host-name match — any site dir whose name == the remote `hostname -s` or
# full `hostname` (a dir just named after the box, e.g. shdclvf01q). The
# remote hostname is the primary signal; we ALSO pass the alias's configured
# SSH host as a secondary candidate (qa's alias host is lhsixfqa) so a dir
# matching that is dropped too.
# NOT silent: every dropped name is reported on an EXCLUDED note so the tool
# layer surfaces it. The real-site list/count stays the headline.
local sites_exclude="${SITES_EXCLUDE:-helloworld siteProto master}"
# bare host from the alias's user@host (strip optional user@); '-' if none.
local alias_host="${addr#*@}"; [ -n "$alias_host" ] || alias_host="-"
local remote='
SITES_EXCLUDE='\'"$(_shq "$sites_exclude")"\'';
ALIAS_HOST='\'"$(_shq "$alias_host")"\'';
printf "HCIROOT\t%s\n" "${HCIROOT:-}";
if [ -z "${HCIROOT:-}" ]; then
printf "NOTE\tHCIROOT is empty. If this host has a sudo-gated/non-interactive login profile, pin it: ssh-helper.sh set-hciroot <alias> <path>\n";
exit 0;
fi;
if [ ! -d "${HCIROOT}" ]; then
printf "NOTE\tHCIROOT=%s is not a directory on the remote — check the pinned path\n" "${HCIROOT}";
exit 0;
fi;
sites=$(find "$HCIROOT" -mindepth 1 -maxdepth 2 -name NetConfig -type f 2>/dev/null \
| while IFS= read -r nc; do d=$(dirname "$nc"); basename "$d"; done \
| sort -u);
if [ -z "$sites" ] && command -v hcisitelist >/dev/null 2>&1; then
printf "NOTE\tNetConfig walk found no sites; falling back to hcisitelist\n";
sites=$(hcisitelist 2>/dev/null | tr " " "\n" | grep -v "^$" | sort -u);
fi;
if [ -z "$sites" ]; then
printf "NOTE\tno sites with a NetConfig found under %s\n" "$HCIROOT";
exit 0;
fi;
HN_S=$(hostname -s 2>/dev/null || true);
HN_F=$(hostname 2>/dev/null || true);
kept=""; dropped="";
for s in $sites; do
[ -n "$s" ] || continue;
drop="";
for x in $SITES_EXCLUDE; do [ "$s" = "$x" ] && drop=1 && break; done;
[ -z "$drop" ] && [ -n "$HN_S" ] && [ "$s" = "$HN_S" ] && drop=1;
[ -z "$drop" ] && [ -n "$HN_F" ] && [ "$s" = "$HN_F" ] && drop=1;
[ -z "$drop" ] && [ "$ALIAS_HOST" != "-" ] && [ "$s" = "$ALIAS_HOST" ] && drop=1;
if [ -n "$drop" ]; then dropped="$dropped $s"; else kept="$kept
$s"; fi;
done;
dropped=$(printf "%s" "$dropped" | sed "s/^ *//");
[ -n "$dropped" ] && printf "EXCLUDED\t%s\n" "$dropped";
printf "%s\n" "$kept" | while IFS= read -r s; do [ -n "$s" ] && printf "SITE\t%s\n" "$s"; done'
# v0.8.17: dispatch over DIRECT sshpass or the ControlMaster, per the alias's
# flag. The TSV that the tool layer parses is on STDOUT and stays clean; the
# qa banner/sudo noise on STDERR is stripped by _run_direct's filter (direct
# mode) and surfaced only on a real non-zero failure.
_dispatch_remote "$alias" "$remote"
}
# ── v0.6.8: scp helpers that multiplex via the existing ControlMaster ────────
# We use ssh's ControlPath/ControlMaster=no for scp (scp reads ssh-style options
# via -o), so the file transfer rides the open master and needs no second auth.
# Resolve ADDR/PORT/SOCK for an alias; die if master not open. Sets globals:
# _RH_ADDR _RH_PORT _RH_SOCK
_resolve_open_master() {
local alias="$1"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
_RH_ADDR=$(printf '%s' "$addr_port" | cut -f1)
_RH_PORT=$(printf '%s' "$addr_port" | cut -f2)
_RH_SOCK="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$_RH_SOCK" ] || ! ssh -S "$_RH_SOCK" -O check -p "$_RH_PORT" "$_RH_ADDR" 2>/dev/null; then
die "no open master for $alias — open it with /ssh-setup $alias first"
fi
}
# Deterministic local cache path for ssh_pull.
# /tmp/larry-pulls/<alias>.<basename>.<short-hash-of-remote-path>
_pull_cache_path() {
local alias="$1" remote="$2"
local base; base=$(basename -- "$remote" 2>/dev/null)
[ -z "$base" ] && base="file"
# 8-char hex hash of full remote path. We try the most common hashers in
# turn; on a stripped box without any, fall back to a length+checksum proxy
# so the path is still deterministic per <alias,remote_path>.
local hash=""
if command -v shasum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | shasum -a 1 2>/dev/null | cut -c1-8)
elif command -v sha1sum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | sha1sum 2>/dev/null | cut -c1-8)
elif command -v md5sum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | md5sum 2>/dev/null | cut -c1-8)
else
hash=$(printf '%s' "$remote" | cksum 2>/dev/null | awk '{printf "%08x", $1}' | cut -c1-8)
fi
[ -z "$hash" ] && hash="00000000"
mkdir -p /tmp/larry-pulls 2>/dev/null
printf '/tmp/larry-pulls/%s.%s.%s' "$alias" "$base" "$hash"
}
cmd_pull() {
local alias="${1:-}" remote="${2:-}" local_path="${3:-}"
[ -n "$alias" ] && [ -n "$remote" ] || die "usage: pull <alias> <remote_path> [local_path]"
# v0.8.17: DIRECT mode — no ControlMaster. The remote-size probe rides a fresh
# per-command sshpass connection (_dispatch_remote → _run_direct), and the
# transfer uses _direct_scp (also fresh sshpass). Everything else (cache path,
# size verification, the clean final-line local path) is identical.
if _alias_is_direct "$alias"; then
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local _d_addr; _d_addr=$(printf '%s' "$addr_port" | cut -f1)
[ -z "$local_path" ] && local_path=$(_pull_cache_path "$alias" "$remote")
mkdir -p "$(dirname "$local_path")" 2>/dev/null
local remote_size
remote_size=$(coerce_int "$(_dispatch_remote "$alias" "wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" "")
if [ -z "$remote_size" ] || ! [[ "$remote_size" =~ ^[0-9]+$ ]]; then
die "remote file not found or not readable: $remote"
fi
if _direct_scp "$alias" "$_d_addr:$remote" "$local_path"; then
local got; got=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
if [ "$got" != "$remote_size" ]; then
die "partial transfer: remote=$remote_size bytes, local=$got bytes ($local_path)"
fi
ok "pulled $alias:$remote$local_path ($got bytes, direct)"
printf '%s\n' "$local_path"
return 0
fi
return 1
fi
_resolve_open_master "$alias"
[ -z "$local_path" ] && local_path=$(_pull_cache_path "$alias" "$remote")
mkdir -p "$(dirname "$local_path")" 2>/dev/null
# Get remote file size up-front for a partial-transfer sanity check.
# v0.7.5: coerce_int on wc output — strips CR + non-digits at the source.
local remote_size=""
remote_size=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \
"wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" "")
if [ -z "$remote_size" ] || ! [[ "$remote_size" =~ ^[0-9]+$ ]]; then
die "remote file not found or not readable: $remote"
fi
# scp via the existing master: -o ControlPath=... -o ControlMaster=no
local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$")
if scp -q \
-o "ControlPath=$_RH_SOCK" \
-o "ControlMaster=no" \
-o "BatchMode=yes" \
-P "$_RH_PORT" \
"$_RH_ADDR:$remote" "$local_path" 2>"$scp_err"; then
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
local got; got=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
if [ "$got" != "$remote_size" ]; then
rm -f "$scp_err"
die "partial transfer: remote=$remote_size bytes, local=$got bytes ($local_path)"
fi
rm -f "$scp_err"
ok "pulled $alias:$remote$local_path ($got bytes)"
# Print only the local path on the final line so callers (tool layer) can
# capture it deterministically with `tail -1` or similar.
printf '%s\n' "$local_path"
return 0
fi
local rc=$?
printf 'ssh-helper: scp pull failed (rc=%d):\n' "$rc" >&2
cat "$scp_err" >&2 2>/dev/null
rm -f "$scp_err"
return 1
}
cmd_push() {
local alias="${1:-}" local_path="${2:-}" remote="${3:-}"
[ -n "$alias" ] && [ -n "$local_path" ] && [ -n "$remote" ] \
|| die "usage: push <alias> <local_path> <remote_path>"
[ -f "$local_path" ] || die "local file not found: $local_path"
# v0.8.18: DIRECT mode — symmetric with cmd_pull's direct branch. No
# ControlMaster (the host rejects multiplexing); the transfer uses _direct_scp
# (fresh per-command sshpass), and the post-transfer size verification rides a
# fresh per-command connection via _dispatch_remote → _run_direct. Without this
# branch, ssh_push (an exposed tool, used by nc_regression phase 4 to push
# cross-env input bundles) died "no open master" for any DIRECT-mode alias.
if _alias_is_direct "$alias"; then
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local _d_addr; _d_addr=$(printf '%s' "$addr_port" | cut -f1)
local local_size; local_size=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
if _direct_scp "$alias" "$local_path" "$_d_addr:$remote"; then
local got
got=$(coerce_int "$(_dispatch_remote "$alias" "wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" 0)
if [ "$got" != "$local_size" ]; then
die "partial transfer: local=$local_size bytes, remote=$got bytes ($alias:$remote)"
fi
ok "pushed $local_path$alias:$remote ($got bytes, direct)"
return 0
fi
return 1
fi
_resolve_open_master "$alias"
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
local local_size; local_size=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$")
if scp -q \
-o "ControlPath=$_RH_SOCK" \
-o "ControlMaster=no" \
-o "BatchMode=yes" \
-P "$_RH_PORT" \
"$local_path" "$_RH_ADDR:$remote" 2>"$scp_err"; then
# Validate via remote wc -c.
local got
# v0.7.5: coerce_int on wc output (Cygwin wc.exe CR-taint defense).
got=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \
"wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" 0)
if [ "$got" != "$local_size" ]; then
rm -f "$scp_err"
die "partial transfer: local=$local_size bytes, remote=$got bytes ($alias:$remote)"
fi
rm -f "$scp_err"
ok "pushed $local_path$alias:$remote ($got bytes)"
return 0
fi
local rc=$?
printf 'ssh-helper: scp push failed (rc=%d):\n' "$rc" >&2
cat "$scp_err" >&2 2>/dev/null
rm -f "$scp_err"
return 1
}
# pull-smat: smart pull for a Cloverleaf thread's .smatdb file.
# Two modes:
# Full pull: pull-smat <alias> <site> <thread>
# Locates $HCISITEDIR/exec/processes/*/<thread>.smatdb on the
# remote via find, then scp's the entire .smatdb file.
# Sampled: pull-smat <alias> <site> <thread> <days_back>
# Runs sqlite3 server-side, extracts up to 1000 most-recent
# messages from the last <days_back> days, encodes each
# MessageContent BLOB as base64, returns TSV:
# unix_ts<TAB>direction<TAB>type<TAB>source<TAB>dest<TAB>message_blob_b64
# The schema (table=smat_msgs, columns Time/Type/SourceConn/
# DestConn/MessageContent) is the same one nc-msgs.sh uses.
cmd_pull_smat() {
local alias="${1:-}" site="${2:-}" thread="${3:-}" days_back="${4:-}"
[ -n "$alias" ] && [ -n "$site" ] && [ -n "$thread" ] \
|| die "usage: pull-smat <alias> <site> <thread> [days_back]"
# v0.8.17: in DIRECT mode every remote op is a fresh per-command sshpass
# connection — there is no master to resolve. Only require an open master for
# the (unchanged) multiplex path; both the find/sample command dispatches and
# the full-file scp below pick the transport via the direct flag.
if ! _alias_is_direct "$alias"; then
_resolve_open_master "$alias"
fi
# Discover the remote .smatdb path. $HCISITEDIR/$HCIROOT are resolved by the
# LOGIN shell (see _build_login_cmd) — the v0.8.13 fix — so we no longer
# depend on a non-login rc happening to export them. SITEDIR falls back to
# <HCIROOT>/<site> if HCISITEDIR isn't set for that site. The find runs
# remotely to avoid hard-coding process directory names.
local find_cmd
find_cmd='set -e; SDIR="${HCISITEDIR:-${HCIROOT:-}/'"$site"'}"; '
find_cmd+='[ -d "$SDIR" ] || { echo "ERROR: sitedir not found on remote: $SDIR" >&2; exit 2; }; '
find_cmd+='F=$(find "$SDIR/exec/processes" -maxdepth 2 -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); '
find_cmd+='[ -n "$F" ] || F=$(find "$SDIR" -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); '
find_cmd+='[ -n "$F" ] || { echo "ERROR: no smatdb found for thread '"$thread"' under $SDIR" >&2; exit 3; }; '
# v0.8.13 M1 hardening (Vera Minor #1): emit the resolved path behind an
# unambiguous sentinel prefix instead of relying on it being the last stdout
# line. A login shell (`bash -lc`, the v0.8.13 fix) is the case most likely to
# print a MOTD/banner to stdout, which a blind `tail -1` would mistake for the
# path. We grep for the sentinel line and strip it; only if no sentinel is
# present (host somehow stripped it) do we fall back to the prior `tail -1`
# behaviour, so this can never regress a host that worked before.
find_cmd+='printf "SMATDB_PATH:%s\n" "$F"'
local _smat_raw remote_smatdb
# v0.8.15: honour a pinned HCIROOT (explicit export, no sudo-gated login profile).
# v0.8.17: dispatch over DIRECT sshpass or the ControlMaster per the alias flag.
# We capture stdout+stderr together (2>&1) as before — the SMATDB_PATH sentinel
# / ERROR: parsing already tolerates banner/sudo noise interleaved on stderr,
# so the direct path needs no extra filtering here.
_smat_raw=$(_dispatch_remote "$alias" "$find_cmd" 2>&1)
remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^SMATDB_PATH:' | tail -1)
if [ -n "$remote_smatdb" ]; then
remote_smatdb="${remote_smatdb#SMATDB_PATH:}"
else
# No sentinel — surface any ERROR: line if present, else fall back to the
# last line (pre-hardening behaviour) so failure modes stay diagnosable.
remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^ERROR:' | tail -1)
[ -n "$remote_smatdb" ] || remote_smatdb=$(printf '%s\n' "$_smat_raw" | tail -1)
fi
case "$remote_smatdb" in
ERROR:*|'') die "remote smatdb lookup failed: $remote_smatdb" ;;
esac
if [ -z "$days_back" ]; then
# Full mode: scp the whole .smatdb file.
local local_path
local_path=$(_pull_cache_path "$alias" "$remote_smatdb")
cmd_pull "$alias" "$remote_smatdb" "$local_path"
return $?
fi
# Sampled mode: run sqlite3 on the remote, return TSV with b64-encoded blobs.
# base64 -w0 is GNU coreutils; on BSD use plain base64 (no -w). We accept
# whichever is present; the awk in the SQL pipeline strips internal newlines
# for sturdy TSV.
#
# Output line shape (each message):
# <unix_ts_s>\t<direction>\t<type>\t<source>\t<dest>\t<b64-of-MessageContent>
# `direction` is "in" when DestConn=thread, else "out" (best-effort heuristic).
local sample_cmd
sample_cmd='set -e; '
sample_cmd+='which sqlite3 >/dev/null 2>&1 || { echo "ERROR: sqlite3 not on remote PATH" >&2; exit 4; }; '
sample_cmd+='B64() { if base64 --help 2>&1 | grep -q -- " -w"; then base64 -w0; else base64 | tr -d "\n"; fi; }; '
# Note: sqlite3 ".mode tabs" prints rows tab-separated; we redirect blob via
# writefile() into temp files, then base64 each. That avoids any binary
# mangling in the sqlite3 -ascii path. Approach: select rowids, then for each
# rowid pull MessageContent into a per-row temp file, b64 it inline.
sample_cmd+='TMP=$(mktemp -d); trap "rm -rf $TMP" EXIT; '
sample_cmd+='CUTOFF_MS=$(( ( $(date +%s) - '"$days_back"' * 86400 ) * 1000 )); '
sample_cmd+='sqlite3 "'"$remote_smatdb"'" "SELECT rowid, Time, IFNULL(Type,\"\"), IFNULL(SourceConn,\"\"), IFNULL(DestConn,\"\") FROM smat_msgs WHERE Time >= $CUTOFF_MS ORDER BY Time DESC LIMIT 1000" '
sample_cmd+='| while IFS="|" read -r rid tm typ src dst; do '
sample_cmd+=' blobfile="$TMP/$rid.bin"; '
sample_cmd+=' sqlite3 "'"$remote_smatdb"'" "SELECT writefile(\"$blobfile\", MessageContent) FROM smat_msgs WHERE rowid=$rid" >/dev/null 2>&1; '
sample_cmd+=' if [ "$dst" = "'"$thread"'" ]; then dir="in"; else dir="out"; fi; '
sample_cmd+=' printf "%s\t%s\t%s\t%s\t%s\t" "$(( tm / 1000 ))" "$dir" "$typ" "$src" "$dst"; '
sample_cmd+=' B64 < "$blobfile"; '
sample_cmd+=' printf "\n"; '
sample_cmd+='done; '
sample_cmd+='TOTAL=$(sqlite3 "'"$remote_smatdb"'" "SELECT COUNT(*) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); '
sample_cmd+='RETURNED=$(sqlite3 "'"$remote_smatdb"'" "SELECT MIN(1000, COUNT(*)) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); '
sample_cmd+='echo "# smatdb=$(basename '"$remote_smatdb"') days_back='"$days_back"' total_in_window=$TOTAL returned=$RETURNED truncated=$([ "$TOTAL" -gt 1000 ] && echo yes || echo no)" >&2'
# Login shell so sqlite3 resolves from the operator's PATH (v0.8.13), unless
# the alias has a pinned HCIROOT, in which case we export HCIROOT explicitly
# and skip the sudo-gated login profile (v0.8.15). Note: when pinned, sqlite3
# must be resolvable on the default non-login PATH; if it is not, the
# sample_cmd already emits a clear "ERROR: sqlite3 not on remote PATH".
# v0.8.17: dispatch over DIRECT sshpass or the ControlMaster per the alias flag.
_dispatch_remote "$alias" "$sample_cmd"
}
case "${1:-help}" in
hosts|list) shift; cmd_hosts ;;
add) shift; cmd_add "$@" ;;
remove|rm) shift; cmd_remove "$@" ;;
pass|passwd) shift; cmd_pass "$@" ;;
set-hciroot|hciroot) shift; cmd_set_hciroot "$@" ;;
set-direct|direct) shift; cmd_set_direct "$@" ;;
setup|open) shift; cmd_setup "$@" ;;
close|exit) shift; cmd_close "$@" ;;
status) shift; cmd_status "$@" ;;
exec|run) shift; cmd_exec "$@" ;;
discover) shift; cmd_discover "$@" ;;
pull) shift; cmd_pull "$@" ;;
push) shift; cmd_push "$@" ;;
pull-smat) shift; cmd_pull_smat "$@" ;;
-h|--help|help) cmd_help ;;
*) die "unknown subcommand: ${1:-} (run with --help)" ;;
esac