cloverleaf-larry/lib/ssh-helper.sh
Bryan Johnson fc667e2451 v0.8.15: legacy/qa remote-enumeration fix — per-alias HCIROOT pin (sudo-gated profile bypass), hcisitelist-free NetConfig walk, ControlMaster banner+rotating-pw hardening; zero traffic-bypass primitives
MAJOR-1: regenerate MANIFEST (larry.sh, lib/ssh-helper.sh, VERSION,
CHANGELOG.md hashes now authoritative for the v0.8.15 bytes).
MINOR-1: print_help /sites line documents the --hciroot <path> pin
convenience and the pinned-vs-login resolution distinction.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 08:58:49 -07:00

792 lines
37 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# ssh-helper.sh — secure SSH command execution via ControlMaster.
#
# Architecture:
# • Hosts configured in $LARRY_HOME/.ssh-hosts.tsv as
# alias \t user@host \t port
# • Passwords stored at $LARRY_HOME/.ssh-creds/<alias>, mode 0600.
# The password file is the single point of truth — to rotate (daily-changing
# passwords) just overwrite the file with the new one and re-run 'setup'.
# • sshpass reads the password via -f (file), so it never lands in argv or
# environment where Larry the LLM (or other processes via /proc) could see it.
# • The first 'setup' call opens a long-lived SSH ControlMaster connection
# (default ControlPersist=8h). Subsequent 'exec' calls multiplex through
# the master socket and need no password.
# • Larry's tool layer only sees: alias, command, command_output.
# Never the password. Never the user@host (unless added to the alias list).
#
# Subcommands:
# hosts list configured hosts
# add <alias> <user@host[:port]> add a host to the alias list
# remove <alias> remove an alias (also clears cred + socket)
# pass <alias> set/update the password (hidden interactive)
# set-hciroot <alias> <path> pin (persist) $HCIROOT for an alias. When
# set, remote enumeration/exec runs with
# HCIROOT=<path> exported EXPLICITLY and
# WITHOUT the `bash -lc` login wrapper — for
# hosts whose login profile is sudo-gated or
# otherwise non-interactive (v0.8.15).
# Pass an empty path to clear the pin.
# setup <alias> open ControlMaster (uses stored password ONCE)
# close <alias> close ControlMaster
# status [alias] show open masters / cred presence
# exec <alias> <command...> run command via master (returns output)
# discover <alias> auto-detect remote Cloverleaf env:
# resolves $HCIROOT (LOGIN shell), then
# enumerates sites (hcisitelist fast-path,
# NetConfig-walk fallback). Prints TSV:
# HCIROOT<TAB><path>
# SITE<TAB><name> (one per site)
# No nagging for paths — the remote's own
# login profile is the source of truth.
# pull <alias> <remote> [local] scp remote → local via existing master
# push <alias> <local> <remote> scp local → remote via existing master
# pull-smat <alias> <site> <thread> [days_back]
# pull a thread's smatdb (full) or sample
# recent messages from it (sampled, TSV b64)
# help print this help
set -u
set -o pipefail
LARRY_HOME="${LARRY_HOME:-$HOME/.larry}"
SSH_HOSTS_FILE="$LARRY_HOME/.ssh-hosts.tsv"
SSH_CREDS_DIR="$LARRY_HOME/.ssh-creds"
SSH_SOCKETS_DIR="$LARRY_HOME/.ssh-sockets"
SSH_CONTROL_PERSIST="${LARRY_SSH_CONTROL_PERSIST:-8h}"
die() { printf 'ssh-helper: %s\n' "$*" >&2; exit 1; }
warn() { printf 'ssh-helper: warn: %s\n' "$*" >&2; }
ok() { printf 'ssh-helper: %s\n' "$*"; }
# v0.7.5: shared CR-safety primitives. pull/push use `wc -c | tr -d ' '` to
# verify byte counts — Cygwin wc.exe can pass through \r and tank the
# `[ "$got" != "$local_size" ]` comparison.
_SSH_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
if [ -r "$_SSH_LIB_DIR/cygwin-safe.sh" ]; then
# shellcheck disable=SC1090,SC1091
. "$_SSH_LIB_DIR/cygwin-safe.sh"
else
coerce_int() { local r="${1:-}" d="${2:-0}" c; c=$(printf '%s' "$r" | tr -cd '0-9'); printf '%s' "${c:-$d}"; }
fi
ensure_layout() {
mkdir -p "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null
chmod 700 "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null || true
if [ ! -f "$SSH_HOSTS_FILE" ]; then
umask 077
# v0.8.15: 4th column = pinned HCIROOT (optional). Older 3-column files stay
# valid — readers treat a missing $4 as "no pin".
printf 'alias\taddr\tport\thciroot\n' > "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
fi
}
# read_host_addr ALIAS → echoes "ADDR\tPORT" or empty
read_host_addr() {
local alias="$1"
[ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 1; }
awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $2 "\t" $3; exit }' < "$SSH_HOSTS_FILE"
}
# read_host_hciroot ALIAS → echoes the pinned HCIROOT (column 4) or empty.
# v0.8.15: a non-empty value means remote commands for this alias run with
# HCIROOT exported explicitly and WITHOUT the `bash -lc` login wrapper.
read_host_hciroot() {
local alias="$1"
[ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 0; }
awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $4; exit }' < "$SSH_HOSTS_FILE"
}
require_sshpass() {
command -v sshpass >/dev/null 2>&1 \
|| die "sshpass not on PATH — install it (apt install sshpass / brew install sshpass) and retry"
}
cmd_help() {
sed -n '4,47p' "$0"
}
cmd_hosts() {
ensure_layout
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint would tank
# the `-le 1` integer test below.
if [ "$(coerce_int "$(wc -l < "$SSH_HOSTS_FILE")" 0)" -le 1 ]; then
echo "no hosts configured. Add with: ssh-helper.sh add <alias> <user@host[:port]>"
return 0
fi
printf 'alias user@host port cred master hciroot-pin\n'
printf '%s\n' '───── ───────── ──── ──── ────── ───────────'
awk -F'\t' 'NR>1' "$SSH_HOSTS_FILE" | while IFS=$'\t' read -r alias addr port hciroot; do
local cred_state=""
[ -f "$SSH_CREDS_DIR/$alias" ] && cred_state="✓"
local master_state=""
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
master_state="open"
fi
printf '%-20s%-52s%-6s%-6s%-8s%s\n' "$alias" "$addr" "${port:-22}" "$cred_state" "$master_state" "${hciroot:-}"
done
}
cmd_add() {
local alias="${1:-}" target="${2:-}"
[ -n "$alias" ] && [ -n "$target" ] || die "usage: add <alias> <user@host[:port]>"
[[ "$target" =~ ^[^@[:space:]]+@[^:[:space:]]+(:[0-9]+)?$ ]] \
|| die "target must look like user@host or user@host:port"
local addr port
if [[ "$target" == *:* ]]; then
addr="${target%:*}"
port="${target##*:}"
else
addr="$target"
port="22"
fi
ensure_layout
# Reject duplicates (use 'remove' first)
if awk -F'\t' -v a="$alias" 'NR>1 && $1==a { found=1; exit } END { exit !found }' "$SSH_HOSTS_FILE"; then
die "alias '$alias' already exists. Use 'remove $alias' first."
fi
umask 077
# v0.8.15: write an empty 4th (hciroot) field so the row layout is uniform.
printf '%s\t%s\t%s\t%s\n' "$alias" "$addr" "$port" "" >> "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
ok "added $alias$addr (port $port). Next: ssh-helper.sh pass $alias"
}
# cmd_set_hciroot ALIAS [PATH] — pin (or clear) the HCIROOT for an alias.
# Persisted as column 4 of the hosts TSV. An empty/omitted PATH clears the pin.
# When set, cmd_exec/cmd_discover/cmd_pull_smat run remote commands with
# HCIROOT=<path> exported EXPLICITLY and WITHOUT the `bash -lc` login wrapper —
# the v0.8.15 fix for hosts whose login profile is sudo-gated (a non-interactive
# SSH session hits `sudo: a terminal is required` and never exports $HCIROOT).
cmd_set_hciroot() {
local alias="${1:-}" newroot="${2:-}"
[ -n "$alias" ] || die "usage: set-hciroot <alias> <path> (empty path clears the pin)"
ensure_layout
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)"
# Rewrite the row in place, setting/replacing column 4. awk handles rows that
# still have only 3 columns (legacy) by assigning $4 directly.
local tmp; tmp=$(mktemp)
awk -F'\t' -v OFS='\t' -v a="$alias" -v r="$newroot" '
NR==1 { if (NF < 4) { $4="hciroot" } print; next }
$1==a { $4=r; print; next }
{ print }
' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
if [ -n "$newroot" ]; then
ok "pinned HCIROOT for $alias$newroot"
ok " (remote enumeration/exec for $alias will export HCIROOT explicitly and SKIP the login profile)"
else
ok "cleared HCIROOT pin for $alias (reverting to login-shell \$HCIROOT resolution)"
fi
}
cmd_remove() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: remove <alias>"
ensure_layout
local tmp; tmp=$(mktemp)
awk -F'\t' -v a="$alias" 'NR==1 || $1!=a' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
# Close + clean master socket and cred
cmd_close "$alias" 2>/dev/null || true
rm -f "$SSH_CREDS_DIR/$alias" "$SSH_SOCKETS_DIR/$alias.sock" 2>/dev/null
ok "removed $alias (cred + socket cleared)"
}
cmd_pass() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: pass <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)"
ensure_layout
printf 'Password for %s (input is hidden; press Enter when done): ' "$alias" >&2
local pw=""
stty -echo 2>/dev/null
IFS= read -r pw </dev/tty || true
stty echo 2>/dev/null
echo "" >&2
[ -n "$pw" ] || die "no password entered"
umask 077
# NO trailing newline — sshpass -f expects raw password as full file content
printf '%s' "$pw" > "$SSH_CREDS_DIR/$alias"
chmod 600 "$SSH_CREDS_DIR/$alias"
ok "password saved to $SSH_CREDS_DIR/$alias (mode 0600). Next: ssh-helper.sh setup $alias"
}
cmd_setup() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: setup <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
ensure_layout
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
ok "master already open for $alias ($addr:$port)"
return 0
fi
local credfile="$SSH_CREDS_DIR/$alias"
[ -f "$credfile" ] || die "no password set for $alias — run 'pass $alias' first"
require_sshpass
ok "opening ssh master for $alias ($addr:$port) — ControlPersist=$SSH_CONTROL_PERSIST..."
# _try_master_open — one attempt with the stored credential. Returns 0 on a
# verified-open master; non-zero otherwise. Stderr from sshpass/ssh lands in
# the file named by $1 so the caller can classify it.
#
# v0.8.15 hardening (banner + rotating-password):
# • -o PreferredAuthentications=password -o PubkeyAuthentication=no forces the
# password method so sshpass feeds the password cleanly. Without this, on a
# box that prints a long pre-auth banner and would otherwise try pubkey
# first, ssh can consume the password slot on the wrong method and the only
# thing surfaced is the banner with NO "permission denied" — exactly the
# symptom seen on shdclvf01q.
# • -o NumberOfPasswordPrompts=1 so a stale password fails fast (one prompt)
# instead of hanging, which lets us re-prompt for the rotated one.
_try_master_open() {
local errfile="$1"
sshpass -f "$credfile" ssh \
-o "ControlMaster=yes" \
-o "ControlPath=$sock" \
-o "ControlPersist=$SSH_CONTROL_PERSIST" \
-o "StrictHostKeyChecking=accept-new" \
-o "PreferredAuthentications=password" \
-o "PubkeyAuthentication=no" \
-o "NumberOfPasswordPrompts=1" \
-o "ConnectTimeout=10" \
-p "$port" \
-N -f \
"$addr" 2>"$errfile"
local rc=$?
[ "$rc" -eq 0 ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null
}
# _looks_like_auth_failure ERRFILE — heuristic: did this fail on auth (vs.
# network/host-key)? sshpass exits 5 on auth failure, but the banner can mask
# the textual reason, so we also treat permission/password/auth keywords as
# auth failures. A rotated password is the prime suspect on this box.
_looks_like_auth_failure() {
local errfile="$1"
grep -qiE 'permission denied|authentication fail|incorrect password|too many authentication|password:' "$errfile" 2>/dev/null && return 0
# Empty-or-banner-only stderr after a password attempt → almost always the
# rotated/stale credential. Treat as auth failure so we re-prompt.
return 0
}
local errfile="/tmp/larry-ssh-setup.err"
: > "$errfile"
if _try_master_open "$errfile"; then
ok "✓ master open: $alias$addr:$port (socket: $sock)"
rm -f "$errfile"
return 0
fi
# First attempt failed. Surface the REAL error (not just the banner) and, if it
# looks like an auth failure, re-prompt for a fresh password (12h rotation on
# this box) and retry ONCE. Never silently no-op.
printf 'ssh-helper: first master-open attempt failed for %s.\n' "$alias" >&2
if [ -s "$errfile" ]; then
printf 'ssh-helper: ssh/sshpass stderr (auth error, not just the banner):\n' >&2
grep -iE 'permission denied|authentication|password|denied|fatal|connection|timed out|refused|host key' "$errfile" >&2 2>/dev/null \
|| cat "$errfile" >&2 2>/dev/null
else
printf 'ssh-helper: (no stderr captured — the box likely printed only its pre-auth banner; the stored password is almost certainly stale)\n' >&2
fi
if _looks_like_auth_failure "$errfile" && [ -t 0 -o -e /dev/tty ]; then
printf 'ssh-helper: looks like the stored password is stale (this host rotates ~every 12h).\n' >&2
printf 'Enter a FRESH password for %s (input hidden; Enter to abort): ' "$alias" >&2
local pw=""
stty -echo 2>/dev/null
IFS= read -r pw </dev/tty || true
stty echo 2>/dev/null
echo "" >&2
if [ -n "$pw" ]; then
umask 077
printf '%s' "$pw" > "$credfile" # NO trailing newline (sshpass -f)
chmod 600 "$credfile"
ok "stored the fresh password — retrying master open..."
: > "$errfile"
if _try_master_open "$errfile"; then
ok "✓ master open: $alias$addr:$port (socket: $sock)"
rm -f "$errfile"
return 0
fi
printf 'ssh-helper: retry with the fresh password ALSO failed. ssh/sshpass stderr:\n' >&2
cat "$errfile" >&2 2>/dev/null
else
printf 'ssh-helper: no password entered — aborting.\n' >&2
fi
fi
printf 'ssh-helper: master NOT open for %s. Next step: re-run `ssh-helper.sh setup %s` (or the /ssh-setup %s slash command) with a current password; if the host changed, re-check `ssh-helper.sh hosts`.\n' \
"$alias" "$alias" "$alias" >&2
rm -f "$errfile"
return 1
}
cmd_close() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: close <alias>"
local addr_port; addr_port=$(read_host_addr "$alias") || addr_port=""
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && [ -n "$addr_port" ]; then
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
ssh -S "$sock" -O exit -p "$port" "$addr" 2>/dev/null || true
fi
rm -f "$sock"
ok "closed master for $alias"
}
cmd_status() {
ensure_layout
if [ -n "${1:-}" ]; then
local alias="$1"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
printf 'alias: %s\naddr: %s\nport: %s\ncred: %s\nsocket: %s\nstatus: ' \
"$alias" "$addr" "$port" \
"$([ -f "$SSH_CREDS_DIR/$alias" ] && echo present || echo missing)" \
"$sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
echo "master OPEN"
else
echo "no master (run setup)"
fi
return 0
fi
cmd_hosts
}
# v0.8.13 (Cloverleaf login-shell fix): exec defaults to a LOGIN shell.
#
# Root cause of the "qa keeps asking me for $HCIROOT" friction: a plain
# ssh host 'cmd'
# runs a NON-interactive, NON-login shell. On a Cloverleaf host, $HCIROOT (and
# $HCISITE, the hci* binaries on PATH, etc.) are exported by the LOGIN profile
# (/etc/profile.d, the hci user's ~/.profile / ~/.bash_profile, the per-site
# `.profile`). A non-login shell never sources those, so $HCIROOT arrives empty
# and Larry used to give up and nag the user for a path. Wrapping the command in
# `bash -lc` forces a login shell, so the Cloverleaf environment populates
# exactly as it does for an interactive operator login. This is the version-
# agnostic, no-config fix — it works on any Cloverleaf host whose operator login
# sets up the environment (i.e. all of them).
#
# Escape hatch: prefix the command with the literal token NOLOGIN<space> (or set
# LARRY_SSH_NO_LOGIN=1) to run a bare non-login shell — for the rare host where
# the login profile is interactive-only and hangs a non-tty `bash -l`.
_build_login_cmd() {
# $1 = raw command string. Echoes the command to hand to ssh.
local raw="$1"
case "$raw" in
NOLOGIN\ *) printf '%s' "${raw#NOLOGIN }"; return ;;
esac
[ "${LARRY_SSH_NO_LOGIN:-0}" = "1" ] && { printf '%s' "$raw"; return; }
# Single-quote the payload for a robust `bash -lc '<payload>'`. Embedded
# single quotes become '\'' (close, escaped-quote, reopen) — POSIX-portable.
local esc; esc=$(printf '%s' "$raw" | sed "s/'/'\\\\''/g")
printf "bash -lc '%s'" "$esc"
}
# v0.8.15 (sudo-gated-profile fix): when an alias has a pinned HCIROOT, the
# remote command must NOT go through the login profile (`bash -lc`). On hosts
# whose login profile is sudo-gated, a non-interactive SSH session trips
# `sudo: a terminal is required`, the profile never finishes, and $HCIROOT comes
# back EMPTY. Instead we export HCIROOT explicitly and run a plain `sh -c` (no
# login profile, no tty needed). This is deterministic and version-agnostic.
#
# _shq STR → single-quote STR for safe embedding inside another '...' context.
_shq() { printf '%s' "$1" | sed "s/'/'\\\\''/g"; }
# _build_pinned_cmd HCIROOT RAW → a remote command string that exports HCIROOT
# explicitly (and HCISITEDIR-friendly callers can derive from it) then runs RAW
# under a NON-login `sh -c`. No `bash -lc`, so the sudo-gated profile is skipped.
_build_pinned_cmd() {
local root="$1" raw="$2"
local esc; esc=$(_shq "$raw")
printf "sh -c 'HCIROOT=%s; export HCIROOT; %s'" "$(_shq "$root")" "$esc"
}
# _remote_cmd_for ALIAS RAW → echo the exact command string to hand to ssh.
# If ALIAS has a pinned HCIROOT → pinned (explicit-export, no login profile).
# Else → the existing login-shell wrapper (_build_login_cmd). Single chokepoint
# so cmd_exec/cmd_discover/cmd_pull_smat all honour the pin identically.
_remote_cmd_for() {
local alias="$1" raw="$2"
local pin; pin=$(read_host_hciroot "$alias")
if [ -n "$pin" ]; then
_build_pinned_cmd "$pin" "$raw"
else
_build_login_cmd "$raw"
fi
}
cmd_exec() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: exec <alias> <command...>"
shift
local cmd="$*"
[ -n "$cmd" ] || die "no command given"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
die "no open master for $alias — run 'setup $alias' first"
fi
# Multiplexed; no password needed. If the alias has a pinned HCIROOT we export
# it explicitly and skip the login profile (v0.8.15 sudo-gated-profile fix);
# otherwise we run in a login shell so $HCIROOT et al. populate from the remote
# Cloverleaf login profile (see _build_login_cmd / _remote_cmd_for).
ssh -S "$sock" -p "$port" -o BatchMode=yes "$addr" "$(_remote_cmd_for "$alias" "$cmd")"
}
# cmd_discover ALIAS — proactively detect the remote Cloverleaf environment.
# Resolves $HCIROOT in a LOGIN shell, then enumerates sites two ways:
# 1. hcisitelist (the Cloverleaf-shipped site lister) if it's on the login PATH
# 2. NetConfig walk under $HCIROOT (version-agnostic ground truth — the same
# "a site is a dir with a NetConfig" rule each-site.sh uses)
# Emits TSV to stdout the tool layer can parse deterministically:
# HCIROOT<TAB><path> (or HCIROOT<TAB> if unresolved)
# SITE<TAB><name> (zero or more)
# Never prompts; on failure it emits what it could resolve + a NOTE line.
cmd_discover() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: discover <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
die "no open master for $alias — run 'setup $alias' first"
fi
# A single remote script. It:
# - prints HCIROOT\t$HCIROOT
# - PRIMARY enumeration = the NetConfig walk under $HCIROOT (depth ≤2),
# IDENTICAL to lib/each-site.sh: find NetConfig files → dirname → basename
# → sort -u. This is the version-agnostic ground truth and works on a box
# with NO `hcisitelist` (v0.8.15 portability fix — confirmed: shdclvf01q
# has no hcisitelist).
# - `hcisitelist` is used ONLY if it is actually present AND the walk found
# nothing (belt-and-suspenders), never as the dependency.
# Kept POSIX-sh so it runs under whatever /bin/sh spawns it.
#
# NOTE on environment: when the alias has a pinned HCIROOT, _remote_cmd_for
# exports HCIROOT explicitly and runs this under a NON-login `sh -c` (skips the
# sudo-gated login profile). Otherwise it runs under `bash -lc` so the login
# profile populates $HCIROOT. Either way the script below only reads
# ${HCIROOT:-}, so it is agnostic to which path delivered it.
# v0.8.15 (list-sites exclusion): drop non-real entries from the enumeration so
# /sites shows only operator-meaningful sites. Two filters, applied at the walk
# source (so REMOTE pinned, REMOTE login-shell, and LOCAL all behave the same):
# 1. SITES_EXCLUDE — static scaffolding/special dirs (helloworld, siteProto,
# master). A documented, tunable env var: Bryan can override at call time
# via `SITES_EXCLUDE='...' discover <alias>` without a config UI.
# 2. Host-name match — any site dir whose name == the remote `hostname -s` or
# full `hostname` (a dir just named after the box, e.g. shdclvf01q). The
# remote hostname is the primary signal; we ALSO pass the alias's configured
# SSH host as a secondary candidate (qa's alias host is lhsixfqa) so a dir
# matching that is dropped too.
# NOT silent: every dropped name is reported on an EXCLUDED note so the tool
# layer surfaces it. The real-site list/count stays the headline.
local sites_exclude="${SITES_EXCLUDE:-helloworld siteProto master}"
# bare host from the alias's user@host (strip optional user@); '-' if none.
local alias_host="${addr#*@}"; [ -n "$alias_host" ] || alias_host="-"
local remote='
SITES_EXCLUDE='\'"$(_shq "$sites_exclude")"\'';
ALIAS_HOST='\'"$(_shq "$alias_host")"\'';
printf "HCIROOT\t%s\n" "${HCIROOT:-}";
if [ -z "${HCIROOT:-}" ]; then
printf "NOTE\tHCIROOT is empty. If this host has a sudo-gated/non-interactive login profile, pin it: ssh-helper.sh set-hciroot <alias> <path>\n";
exit 0;
fi;
if [ ! -d "${HCIROOT}" ]; then
printf "NOTE\tHCIROOT=%s is not a directory on the remote — check the pinned path\n" "${HCIROOT}";
exit 0;
fi;
sites=$(find "$HCIROOT" -mindepth 1 -maxdepth 2 -name NetConfig -type f 2>/dev/null \
| while IFS= read -r nc; do d=$(dirname "$nc"); basename "$d"; done \
| sort -u);
if [ -z "$sites" ] && command -v hcisitelist >/dev/null 2>&1; then
printf "NOTE\tNetConfig walk found no sites; falling back to hcisitelist\n";
sites=$(hcisitelist 2>/dev/null | tr " " "\n" | grep -v "^$" | sort -u);
fi;
if [ -z "$sites" ]; then
printf "NOTE\tno sites with a NetConfig found under %s\n" "$HCIROOT";
exit 0;
fi;
HN_S=$(hostname -s 2>/dev/null || true);
HN_F=$(hostname 2>/dev/null || true);
kept=""; dropped="";
for s in $sites; do
[ -n "$s" ] || continue;
drop="";
for x in $SITES_EXCLUDE; do [ "$s" = "$x" ] && drop=1 && break; done;
[ -z "$drop" ] && [ -n "$HN_S" ] && [ "$s" = "$HN_S" ] && drop=1;
[ -z "$drop" ] && [ -n "$HN_F" ] && [ "$s" = "$HN_F" ] && drop=1;
[ -z "$drop" ] && [ "$ALIAS_HOST" != "-" ] && [ "$s" = "$ALIAS_HOST" ] && drop=1;
if [ -n "$drop" ]; then dropped="$dropped $s"; else kept="$kept
$s"; fi;
done;
dropped=$(printf "%s" "$dropped" | sed "s/^ *//");
[ -n "$dropped" ] && printf "EXCLUDED\t%s\n" "$dropped";
printf "%s\n" "$kept" | while IFS= read -r s; do [ -n "$s" ] && printf "SITE\t%s\n" "$s"; done'
ssh -S "$sock" -p "$port" -o BatchMode=yes "$addr" "$(_remote_cmd_for "$alias" "$remote")"
}
# ── v0.6.8: scp helpers that multiplex via the existing ControlMaster ────────
# We use ssh's ControlPath/ControlMaster=no for scp (scp reads ssh-style options
# via -o), so the file transfer rides the open master and needs no second auth.
# Resolve ADDR/PORT/SOCK for an alias; die if master not open. Sets globals:
# _RH_ADDR _RH_PORT _RH_SOCK
_resolve_open_master() {
local alias="$1"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
_RH_ADDR=$(printf '%s' "$addr_port" | cut -f1)
_RH_PORT=$(printf '%s' "$addr_port" | cut -f2)
_RH_SOCK="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$_RH_SOCK" ] || ! ssh -S "$_RH_SOCK" -O check -p "$_RH_PORT" "$_RH_ADDR" 2>/dev/null; then
die "no open master for $alias — open it with /ssh-setup $alias first"
fi
}
# Deterministic local cache path for ssh_pull.
# /tmp/larry-pulls/<alias>.<basename>.<short-hash-of-remote-path>
_pull_cache_path() {
local alias="$1" remote="$2"
local base; base=$(basename -- "$remote" 2>/dev/null)
[ -z "$base" ] && base="file"
# 8-char hex hash of full remote path. We try the most common hashers in
# turn; on a stripped box without any, fall back to a length+checksum proxy
# so the path is still deterministic per <alias,remote_path>.
local hash=""
if command -v shasum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | shasum -a 1 2>/dev/null | cut -c1-8)
elif command -v sha1sum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | sha1sum 2>/dev/null | cut -c1-8)
elif command -v md5sum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | md5sum 2>/dev/null | cut -c1-8)
else
hash=$(printf '%s' "$remote" | cksum 2>/dev/null | awk '{printf "%08x", $1}' | cut -c1-8)
fi
[ -z "$hash" ] && hash="00000000"
mkdir -p /tmp/larry-pulls 2>/dev/null
printf '/tmp/larry-pulls/%s.%s.%s' "$alias" "$base" "$hash"
}
cmd_pull() {
local alias="${1:-}" remote="${2:-}" local_path="${3:-}"
[ -n "$alias" ] && [ -n "$remote" ] || die "usage: pull <alias> <remote_path> [local_path]"
_resolve_open_master "$alias"
[ -z "$local_path" ] && local_path=$(_pull_cache_path "$alias" "$remote")
mkdir -p "$(dirname "$local_path")" 2>/dev/null
# Get remote file size up-front for a partial-transfer sanity check.
# v0.7.5: coerce_int on wc output — strips CR + non-digits at the source.
local remote_size=""
remote_size=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \
"wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" "")
if [ -z "$remote_size" ] || ! [[ "$remote_size" =~ ^[0-9]+$ ]]; then
die "remote file not found or not readable: $remote"
fi
# scp via the existing master: -o ControlPath=... -o ControlMaster=no
local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$")
if scp -q \
-o "ControlPath=$_RH_SOCK" \
-o "ControlMaster=no" \
-o "BatchMode=yes" \
-P "$_RH_PORT" \
"$_RH_ADDR:$remote" "$local_path" 2>"$scp_err"; then
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
local got; got=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
if [ "$got" != "$remote_size" ]; then
rm -f "$scp_err"
die "partial transfer: remote=$remote_size bytes, local=$got bytes ($local_path)"
fi
rm -f "$scp_err"
ok "pulled $alias:$remote$local_path ($got bytes)"
# Print only the local path on the final line so callers (tool layer) can
# capture it deterministically with `tail -1` or similar.
printf '%s\n' "$local_path"
return 0
fi
local rc=$?
printf 'ssh-helper: scp pull failed (rc=%d):\n' "$rc" >&2
cat "$scp_err" >&2 2>/dev/null
rm -f "$scp_err"
return 1
}
cmd_push() {
local alias="${1:-}" local_path="${2:-}" remote="${3:-}"
[ -n "$alias" ] && [ -n "$local_path" ] && [ -n "$remote" ] \
|| die "usage: push <alias> <local_path> <remote_path>"
[ -f "$local_path" ] || die "local file not found: $local_path"
_resolve_open_master "$alias"
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
local local_size; local_size=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$")
if scp -q \
-o "ControlPath=$_RH_SOCK" \
-o "ControlMaster=no" \
-o "BatchMode=yes" \
-P "$_RH_PORT" \
"$local_path" "$_RH_ADDR:$remote" 2>"$scp_err"; then
# Validate via remote wc -c.
local got
# v0.7.5: coerce_int on wc output (Cygwin wc.exe CR-taint defense).
got=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \
"wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" 0)
if [ "$got" != "$local_size" ]; then
rm -f "$scp_err"
die "partial transfer: local=$local_size bytes, remote=$got bytes ($alias:$remote)"
fi
rm -f "$scp_err"
ok "pushed $local_path$alias:$remote ($got bytes)"
return 0
fi
local rc=$?
printf 'ssh-helper: scp push failed (rc=%d):\n' "$rc" >&2
cat "$scp_err" >&2 2>/dev/null
rm -f "$scp_err"
return 1
}
# pull-smat: smart pull for a Cloverleaf thread's .smatdb file.
# Two modes:
# Full pull: pull-smat <alias> <site> <thread>
# Locates $HCISITEDIR/exec/processes/*/<thread>.smatdb on the
# remote via find, then scp's the entire .smatdb file.
# Sampled: pull-smat <alias> <site> <thread> <days_back>
# Runs sqlite3 server-side, extracts up to 1000 most-recent
# messages from the last <days_back> days, encodes each
# MessageContent BLOB as base64, returns TSV:
# unix_ts<TAB>direction<TAB>type<TAB>source<TAB>dest<TAB>message_blob_b64
# The schema (table=smat_msgs, columns Time/Type/SourceConn/
# DestConn/MessageContent) is the same one nc-msgs.sh uses.
cmd_pull_smat() {
local alias="${1:-}" site="${2:-}" thread="${3:-}" days_back="${4:-}"
[ -n "$alias" ] && [ -n "$site" ] && [ -n "$thread" ] \
|| die "usage: pull-smat <alias> <site> <thread> [days_back]"
_resolve_open_master "$alias"
# Discover the remote .smatdb path. $HCISITEDIR/$HCIROOT are resolved by the
# LOGIN shell (see _build_login_cmd) — the v0.8.13 fix — so we no longer
# depend on a non-login rc happening to export them. SITEDIR falls back to
# <HCIROOT>/<site> if HCISITEDIR isn't set for that site. The find runs
# remotely to avoid hard-coding process directory names.
local find_cmd
find_cmd='set -e; SDIR="${HCISITEDIR:-${HCIROOT:-}/'"$site"'}"; '
find_cmd+='[ -d "$SDIR" ] || { echo "ERROR: sitedir not found on remote: $SDIR" >&2; exit 2; }; '
find_cmd+='F=$(find "$SDIR/exec/processes" -maxdepth 2 -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); '
find_cmd+='[ -n "$F" ] || F=$(find "$SDIR" -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); '
find_cmd+='[ -n "$F" ] || { echo "ERROR: no smatdb found for thread '"$thread"' under $SDIR" >&2; exit 3; }; '
# v0.8.13 M1 hardening (Vera Minor #1): emit the resolved path behind an
# unambiguous sentinel prefix instead of relying on it being the last stdout
# line. A login shell (`bash -lc`, the v0.8.13 fix) is the case most likely to
# print a MOTD/banner to stdout, which a blind `tail -1` would mistake for the
# path. We grep for the sentinel line and strip it; only if no sentinel is
# present (host somehow stripped it) do we fall back to the prior `tail -1`
# behaviour, so this can never regress a host that worked before.
find_cmd+='printf "SMATDB_PATH:%s\n" "$F"'
local _smat_raw remote_smatdb
# v0.8.15: honour a pinned HCIROOT (explicit export, no sudo-gated login profile).
_smat_raw=$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" "$(_remote_cmd_for "$alias" "$find_cmd")" 2>&1)
remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^SMATDB_PATH:' | tail -1)
if [ -n "$remote_smatdb" ]; then
remote_smatdb="${remote_smatdb#SMATDB_PATH:}"
else
# No sentinel — surface any ERROR: line if present, else fall back to the
# last line (pre-hardening behaviour) so failure modes stay diagnosable.
remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^ERROR:' | tail -1)
[ -n "$remote_smatdb" ] || remote_smatdb=$(printf '%s\n' "$_smat_raw" | tail -1)
fi
case "$remote_smatdb" in
ERROR:*|'') die "remote smatdb lookup failed: $remote_smatdb" ;;
esac
if [ -z "$days_back" ]; then
# Full mode: scp the whole .smatdb file.
local local_path
local_path=$(_pull_cache_path "$alias" "$remote_smatdb")
cmd_pull "$alias" "$remote_smatdb" "$local_path"
return $?
fi
# Sampled mode: run sqlite3 on the remote, return TSV with b64-encoded blobs.
# base64 -w0 is GNU coreutils; on BSD use plain base64 (no -w). We accept
# whichever is present; the awk in the SQL pipeline strips internal newlines
# for sturdy TSV.
#
# Output line shape (each message):
# <unix_ts_s>\t<direction>\t<type>\t<source>\t<dest>\t<b64-of-MessageContent>
# `direction` is "in" when DestConn=thread, else "out" (best-effort heuristic).
local sample_cmd
sample_cmd='set -e; '
sample_cmd+='which sqlite3 >/dev/null 2>&1 || { echo "ERROR: sqlite3 not on remote PATH" >&2; exit 4; }; '
sample_cmd+='B64() { if base64 --help 2>&1 | grep -q -- " -w"; then base64 -w0; else base64 | tr -d "\n"; fi; }; '
# Note: sqlite3 ".mode tabs" prints rows tab-separated; we redirect blob via
# writefile() into temp files, then base64 each. That avoids any binary
# mangling in the sqlite3 -ascii path. Approach: select rowids, then for each
# rowid pull MessageContent into a per-row temp file, b64 it inline.
sample_cmd+='TMP=$(mktemp -d); trap "rm -rf $TMP" EXIT; '
sample_cmd+='CUTOFF_MS=$(( ( $(date +%s) - '"$days_back"' * 86400 ) * 1000 )); '
sample_cmd+='sqlite3 "'"$remote_smatdb"'" "SELECT rowid, Time, IFNULL(Type,\"\"), IFNULL(SourceConn,\"\"), IFNULL(DestConn,\"\") FROM smat_msgs WHERE Time >= $CUTOFF_MS ORDER BY Time DESC LIMIT 1000" '
sample_cmd+='| while IFS="|" read -r rid tm typ src dst; do '
sample_cmd+=' blobfile="$TMP/$rid.bin"; '
sample_cmd+=' sqlite3 "'"$remote_smatdb"'" "SELECT writefile(\"$blobfile\", MessageContent) FROM smat_msgs WHERE rowid=$rid" >/dev/null 2>&1; '
sample_cmd+=' if [ "$dst" = "'"$thread"'" ]; then dir="in"; else dir="out"; fi; '
sample_cmd+=' printf "%s\t%s\t%s\t%s\t%s\t" "$(( tm / 1000 ))" "$dir" "$typ" "$src" "$dst"; '
sample_cmd+=' B64 < "$blobfile"; '
sample_cmd+=' printf "\n"; '
sample_cmd+='done; '
sample_cmd+='TOTAL=$(sqlite3 "'"$remote_smatdb"'" "SELECT COUNT(*) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); '
sample_cmd+='RETURNED=$(sqlite3 "'"$remote_smatdb"'" "SELECT MIN(1000, COUNT(*)) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); '
sample_cmd+='echo "# smatdb=$(basename '"$remote_smatdb"') days_back='"$days_back"' total_in_window=$TOTAL returned=$RETURNED truncated=$([ "$TOTAL" -gt 1000 ] && echo yes || echo no)" >&2'
# Login shell so sqlite3 resolves from the operator's PATH (v0.8.13), unless
# the alias has a pinned HCIROOT, in which case we export HCIROOT explicitly
# and skip the sudo-gated login profile (v0.8.15). Note: when pinned, sqlite3
# must be resolvable on the default non-login PATH; if it is not, the
# sample_cmd already emits a clear "ERROR: sqlite3 not on remote PATH".
ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" "$(_remote_cmd_for "$alias" "$sample_cmd")"
}
case "${1:-help}" in
hosts|list) shift; cmd_hosts ;;
add) shift; cmd_add "$@" ;;
remove|rm) shift; cmd_remove "$@" ;;
pass|passwd) shift; cmd_pass "$@" ;;
set-hciroot|hciroot) shift; cmd_set_hciroot "$@" ;;
setup|open) shift; cmd_setup "$@" ;;
close|exit) shift; cmd_close "$@" ;;
status) shift; cmd_status "$@" ;;
exec|run) shift; cmd_exec "$@" ;;
discover) shift; cmd_discover "$@" ;;
pull) shift; cmd_pull "$@" ;;
push) shift; cmd_push "$@" ;;
pull-smat) shift; cmd_pull_smat "$@" ;;
-h|--help|help) cmd_help ;;
*) die "unknown subcommand: ${1:-} (run with --help)" ;;
esac