cloverleaf-larry/lib/ssh-helper.sh
Bryan Johnson fe2f67a1aa v0.8.13: $HCIROOT login-shell fix + both-mode detection + list_sites/sites + per-delta jq-fork slowness fix
Root-cause fix for the live-session friction where "how many sites are on
qa?" stalled on repeated `export $HCIROOT` nags despite a working `qa` SSH
alias:

1. $HCIROOT login-shell fix: ssh-helper.sh `exec` now wraps remote commands in
   `bash -lc` so the Cloverleaf login profile sources and $HCIROOT/$HCISITE/PATH
   populate as for an interactive operator login. Escape hatch: NOLOGIN prefix
   or LARRY_SSH_NO_LOGIN=1. pull-smat find/sample use the same wrapper.
2. Both-mode detection: startup surfaces a MODE= line (LOCAL / REMOTE / UNKNOWN)
   and leads with what it found instead of asking for paths.
3. First-class list_sites tool + /sites [alias]: enumerates sites in both modes
   (hcisitelist fast-path, NetConfig-walk fallback) via new ssh-helper discover.
4. System-prompt de-nagging: agents/larry.md + env-diff/regression prompts no
   longer tell Larry to ask Bryan to export $HCIROOT for a reachable host.
5. Streaming slowness (dominant residual): new pure-bash _json_str_decode
   un-escapes the common escape-free delta with zero forks, halving per-turn
   jq forks on top of v0.8.12. Round-trip verified.
6. pull-smat path capture hardened (Vera Minor #1): resolved path now emitted
   behind a SMATDB_PATH: sentinel and selected by pattern not position, so a
   login-shell MOTD/banner on stdout can't be mistaken for the path; falls back
   to prior tail -1 when no sentinel present. Selection logic unit-verified.

Vera gate: PASS-WITH-NOTES (v0.8.13). bash -n clean on larry.sh + ssh-helper.sh;
MANIFEST regenerated (48 entries) and --check clean.

Co-Authored-By: Clover (Claude Opus 4.7) <noreply@anthropic.com>
2026-05-28 07:40:53 -07:00

579 lines
26 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# ssh-helper.sh — secure SSH command execution via ControlMaster.
#
# Architecture:
# • Hosts configured in $LARRY_HOME/.ssh-hosts.tsv as
# alias \t user@host \t port
# • Passwords stored at $LARRY_HOME/.ssh-creds/<alias>, mode 0600.
# The password file is the single point of truth — to rotate (daily-changing
# passwords) just overwrite the file with the new one and re-run 'setup'.
# • sshpass reads the password via -f (file), so it never lands in argv or
# environment where Larry the LLM (or other processes via /proc) could see it.
# • The first 'setup' call opens a long-lived SSH ControlMaster connection
# (default ControlPersist=8h). Subsequent 'exec' calls multiplex through
# the master socket and need no password.
# • Larry's tool layer only sees: alias, command, command_output.
# Never the password. Never the user@host (unless added to the alias list).
#
# Subcommands:
# hosts list configured hosts
# add <alias> <user@host[:port]> add a host to the alias list
# remove <alias> remove an alias (also clears cred + socket)
# pass <alias> set/update the password (hidden interactive)
# setup <alias> open ControlMaster (uses stored password ONCE)
# close <alias> close ControlMaster
# status [alias] show open masters / cred presence
# exec <alias> <command...> run command via master (returns output)
# discover <alias> auto-detect remote Cloverleaf env:
# resolves $HCIROOT (LOGIN shell), then
# enumerates sites (hcisitelist fast-path,
# NetConfig-walk fallback). Prints TSV:
# HCIROOT<TAB><path>
# SITE<TAB><name> (one per site)
# No nagging for paths — the remote's own
# login profile is the source of truth.
# pull <alias> <remote> [local] scp remote → local via existing master
# push <alias> <local> <remote> scp local → remote via existing master
# pull-smat <alias> <site> <thread> [days_back]
# pull a thread's smatdb (full) or sample
# recent messages from it (sampled, TSV b64)
# help print this help
set -u
set -o pipefail
LARRY_HOME="${LARRY_HOME:-$HOME/.larry}"
SSH_HOSTS_FILE="$LARRY_HOME/.ssh-hosts.tsv"
SSH_CREDS_DIR="$LARRY_HOME/.ssh-creds"
SSH_SOCKETS_DIR="$LARRY_HOME/.ssh-sockets"
SSH_CONTROL_PERSIST="${LARRY_SSH_CONTROL_PERSIST:-8h}"
die() { printf 'ssh-helper: %s\n' "$*" >&2; exit 1; }
warn() { printf 'ssh-helper: warn: %s\n' "$*" >&2; }
ok() { printf 'ssh-helper: %s\n' "$*"; }
# v0.7.5: shared CR-safety primitives. pull/push use `wc -c | tr -d ' '` to
# verify byte counts — Cygwin wc.exe can pass through \r and tank the
# `[ "$got" != "$local_size" ]` comparison.
_SSH_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
if [ -r "$_SSH_LIB_DIR/cygwin-safe.sh" ]; then
# shellcheck disable=SC1090,SC1091
. "$_SSH_LIB_DIR/cygwin-safe.sh"
else
coerce_int() { local r="${1:-}" d="${2:-0}" c; c=$(printf '%s' "$r" | tr -cd '0-9'); printf '%s' "${c:-$d}"; }
fi
ensure_layout() {
mkdir -p "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null
chmod 700 "$LARRY_HOME" "$SSH_CREDS_DIR" "$SSH_SOCKETS_DIR" 2>/dev/null || true
if [ ! -f "$SSH_HOSTS_FILE" ]; then
umask 077
printf 'alias\taddr\tport\n' > "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
fi
}
# read_host_addr ALIAS → echoes "ADDR\tPORT" or empty
read_host_addr() {
local alias="$1"
[ -f "$SSH_HOSTS_FILE" ] || { printf ''; return 1; }
awk -F'\t' -v a="$alias" 'NR>1 && $1==a { print $2 "\t" $3; exit }' < "$SSH_HOSTS_FILE"
}
require_sshpass() {
command -v sshpass >/dev/null 2>&1 \
|| die "sshpass not on PATH — install it (apt install sshpass / brew install sshpass) and retry"
}
cmd_help() {
sed -n '4,30p' "$0"
}
cmd_hosts() {
ensure_layout
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint would tank
# the `-le 1` integer test below.
if [ "$(coerce_int "$(wc -l < "$SSH_HOSTS_FILE")" 0)" -le 1 ]; then
echo "no hosts configured. Add with: ssh-helper.sh add <alias> <user@host[:port]>"
return 0
fi
printf 'alias user@host port cred master\n'
printf '%s\n' '───── ───────── ──── ──── ──────'
awk -F'\t' 'NR>1' "$SSH_HOSTS_FILE" | while IFS=$'\t' read -r alias addr port; do
local cred_state=""
[ -f "$SSH_CREDS_DIR/$alias" ] && cred_state="✓"
local master_state=""
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
master_state="open"
fi
printf '%-20s%-52s%-6s%-6s%s\n' "$alias" "$addr" "${port:-22}" "$cred_state" "$master_state"
done
}
cmd_add() {
local alias="${1:-}" target="${2:-}"
[ -n "$alias" ] && [ -n "$target" ] || die "usage: add <alias> <user@host[:port]>"
[[ "$target" =~ ^[^@[:space:]]+@[^:[:space:]]+(:[0-9]+)?$ ]] \
|| die "target must look like user@host or user@host:port"
local addr port
if [[ "$target" == *:* ]]; then
addr="${target%:*}"
port="${target##*:}"
else
addr="$target"
port="22"
fi
ensure_layout
# Reject duplicates (use 'remove' first)
if awk -F'\t' -v a="$alias" 'NR>1 && $1==a { found=1; exit } END { exit !found }' "$SSH_HOSTS_FILE"; then
die "alias '$alias' already exists. Use 'remove $alias' first."
fi
umask 077
printf '%s\t%s\t%s\n' "$alias" "$addr" "$port" >> "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
ok "added $alias$addr (port $port). Next: ssh-helper.sh pass $alias"
}
cmd_remove() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: remove <alias>"
ensure_layout
local tmp; tmp=$(mktemp)
awk -F'\t' -v a="$alias" 'NR==1 || $1!=a' "$SSH_HOSTS_FILE" > "$tmp" && mv "$tmp" "$SSH_HOSTS_FILE"
chmod 600 "$SSH_HOSTS_FILE"
# Close + clean master socket and cred
cmd_close "$alias" 2>/dev/null || true
rm -f "$SSH_CREDS_DIR/$alias" "$SSH_SOCKETS_DIR/$alias.sock" 2>/dev/null
ok "removed $alias (cred + socket cleared)"
}
cmd_pass() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: pass <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias (run 'add' first)"
ensure_layout
printf 'Password for %s (input is hidden; press Enter when done): ' "$alias" >&2
local pw=""
stty -echo 2>/dev/null
IFS= read -r pw </dev/tty || true
stty echo 2>/dev/null
echo "" >&2
[ -n "$pw" ] || die "no password entered"
umask 077
# NO trailing newline — sshpass -f expects raw password as full file content
printf '%s' "$pw" > "$SSH_CREDS_DIR/$alias"
chmod 600 "$SSH_CREDS_DIR/$alias"
ok "password saved to $SSH_CREDS_DIR/$alias (mode 0600). Next: ssh-helper.sh setup $alias"
}
cmd_setup() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: setup <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
ensure_layout
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
ok "master already open for $alias ($addr:$port)"
return 0
fi
local credfile="$SSH_CREDS_DIR/$alias"
[ -f "$credfile" ] || die "no password set for $alias — run 'pass $alias' first"
require_sshpass
ok "opening ssh master for $alias ($addr:$port) — ControlPersist=$SSH_CONTROL_PERSIST..."
if sshpass -f "$credfile" ssh \
-o "ControlMaster=yes" \
-o "ControlPath=$sock" \
-o "ControlPersist=$SSH_CONTROL_PERSIST" \
-o "StrictHostKeyChecking=accept-new" \
-o "ConnectTimeout=10" \
-p "$port" \
-N -f \
"$addr" 2>/tmp/larry-ssh-setup.err; then
if ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
ok "✓ master open: $alias$addr:$port (socket: $sock)"
rm -f /tmp/larry-ssh-setup.err
return 0
fi
fi
printf 'ssh-helper: setup failed. sshpass/ssh stderr:\n' >&2
cat /tmp/larry-ssh-setup.err >&2 2>/dev/null
rm -f /tmp/larry-ssh-setup.err
return 1
}
cmd_close() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: close <alias>"
local addr_port; addr_port=$(read_host_addr "$alias") || addr_port=""
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ -S "$sock" ] && [ -n "$addr_port" ]; then
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
ssh -S "$sock" -O exit -p "$port" "$addr" 2>/dev/null || true
fi
rm -f "$sock"
ok "closed master for $alias"
}
cmd_status() {
ensure_layout
if [ -n "${1:-}" ]; then
local alias="$1"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
printf 'alias: %s\naddr: %s\nport: %s\ncred: %s\nsocket: %s\nstatus: ' \
"$alias" "$addr" "$port" \
"$([ -f "$SSH_CREDS_DIR/$alias" ] && echo present || echo missing)" \
"$sock"
if [ -S "$sock" ] && ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
echo "master OPEN"
else
echo "no master (run setup)"
fi
return 0
fi
cmd_hosts
}
# v0.8.13 (Cloverleaf login-shell fix): exec defaults to a LOGIN shell.
#
# Root cause of the "qa keeps asking me for $HCIROOT" friction: a plain
# ssh host 'cmd'
# runs a NON-interactive, NON-login shell. On a Cloverleaf host, $HCIROOT (and
# $HCISITE, the hci* binaries on PATH, etc.) are exported by the LOGIN profile
# (/etc/profile.d, the hci user's ~/.profile / ~/.bash_profile, the per-site
# `.profile`). A non-login shell never sources those, so $HCIROOT arrives empty
# and Larry used to give up and nag the user for a path. Wrapping the command in
# `bash -lc` forces a login shell, so the Cloverleaf environment populates
# exactly as it does for an interactive operator login. This is the version-
# agnostic, no-config fix — it works on any Cloverleaf host whose operator login
# sets up the environment (i.e. all of them).
#
# Escape hatch: prefix the command with the literal token NOLOGIN<space> (or set
# LARRY_SSH_NO_LOGIN=1) to run a bare non-login shell — for the rare host where
# the login profile is interactive-only and hangs a non-tty `bash -l`.
_build_login_cmd() {
# $1 = raw command string. Echoes the command to hand to ssh.
local raw="$1"
case "$raw" in
NOLOGIN\ *) printf '%s' "${raw#NOLOGIN }"; return ;;
esac
[ "${LARRY_SSH_NO_LOGIN:-0}" = "1" ] && { printf '%s' "$raw"; return; }
# Single-quote the payload for a robust `bash -lc '<payload>'`. Embedded
# single quotes become '\'' (close, escaped-quote, reopen) — POSIX-portable.
local esc; esc=$(printf '%s' "$raw" | sed "s/'/'\\\\''/g")
printf "bash -lc '%s'" "$esc"
}
cmd_exec() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: exec <alias> <command...>"
shift
local cmd="$*"
[ -n "$cmd" ] || die "no command given"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
die "no open master for $alias — run 'setup $alias' first"
fi
# Multiplexed; no password needed. Run in a login shell so $HCIROOT et al.
# populate from the remote Cloverleaf login profile (see _build_login_cmd).
ssh -S "$sock" -p "$port" -o BatchMode=yes "$addr" "$(_build_login_cmd "$cmd")"
}
# cmd_discover ALIAS — proactively detect the remote Cloverleaf environment.
# Resolves $HCIROOT in a LOGIN shell, then enumerates sites two ways:
# 1. hcisitelist (the Cloverleaf-shipped site lister) if it's on the login PATH
# 2. NetConfig walk under $HCIROOT (version-agnostic ground truth — the same
# "a site is a dir with a NetConfig" rule each-site.sh uses)
# Emits TSV to stdout the tool layer can parse deterministically:
# HCIROOT<TAB><path> (or HCIROOT<TAB> if unresolved)
# SITE<TAB><name> (zero or more)
# Never prompts; on failure it emits what it could resolve + a NOTE line.
cmd_discover() {
local alias="${1:-}"
[ -n "$alias" ] || die "usage: discover <alias>"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
local addr port
addr=$(printf '%s' "$addr_port" | cut -f1)
port=$(printf '%s' "$addr_port" | cut -f2)
local sock="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$sock" ] || ! ssh -S "$sock" -O check -p "$port" "$addr" 2>/dev/null; then
die "no open master for $alias — run 'setup $alias' first"
fi
# A single login-shell remote script. It:
# - prints HCIROOT\t$HCIROOT
# - tries `hcisitelist` (Cloverleaf site lister); each non-empty token → SITE
# - falls back to a NetConfig walk under $HCIROOT (depth ≤2)
# Kept POSIX-sh so it runs under whatever /bin/sh the login shell spawns.
local remote='
printf "HCIROOT\t%s\n" "${HCIROOT:-}";
if [ -z "${HCIROOT:-}" ]; then
printf "NOTE\tHCIROOT empty even in a login shell — operator profile may not export it\n";
exit 0;
fi
got=0;
if command -v hcisitelist >/dev/null 2>&1; then
for s in $(hcisitelist 2>/dev/null); do
[ -n "$s" ] && { printf "SITE\t%s\n" "$s"; got=1; }
done;
fi;
if [ "$got" = "0" ]; then
find "$HCIROOT" -mindepth 1 -maxdepth 2 -name NetConfig -type f 2>/dev/null \
| while IFS= read -r nc; do d=$(dirname "$nc"); printf "SITE\t%s\n" "$(basename "$d")"; done \
| sort -u;
fi'
ssh -S "$sock" -p "$port" -o BatchMode=yes "$addr" "$(_build_login_cmd "$remote")"
}
# ── v0.6.8: scp helpers that multiplex via the existing ControlMaster ────────
# We use ssh's ControlPath/ControlMaster=no for scp (scp reads ssh-style options
# via -o), so the file transfer rides the open master and needs no second auth.
# Resolve ADDR/PORT/SOCK for an alias; die if master not open. Sets globals:
# _RH_ADDR _RH_PORT _RH_SOCK
_resolve_open_master() {
local alias="$1"
local addr_port; addr_port=$(read_host_addr "$alias")
[ -n "$addr_port" ] || die "no such alias: $alias"
_RH_ADDR=$(printf '%s' "$addr_port" | cut -f1)
_RH_PORT=$(printf '%s' "$addr_port" | cut -f2)
_RH_SOCK="$SSH_SOCKETS_DIR/$alias.sock"
if [ ! -S "$_RH_SOCK" ] || ! ssh -S "$_RH_SOCK" -O check -p "$_RH_PORT" "$_RH_ADDR" 2>/dev/null; then
die "no open master for $alias — open it with /ssh-setup $alias first"
fi
}
# Deterministic local cache path for ssh_pull.
# /tmp/larry-pulls/<alias>.<basename>.<short-hash-of-remote-path>
_pull_cache_path() {
local alias="$1" remote="$2"
local base; base=$(basename -- "$remote" 2>/dev/null)
[ -z "$base" ] && base="file"
# 8-char hex hash of full remote path. We try the most common hashers in
# turn; on a stripped box without any, fall back to a length+checksum proxy
# so the path is still deterministic per <alias,remote_path>.
local hash=""
if command -v shasum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | shasum -a 1 2>/dev/null | cut -c1-8)
elif command -v sha1sum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | sha1sum 2>/dev/null | cut -c1-8)
elif command -v md5sum >/dev/null 2>&1; then
hash=$(printf '%s' "$remote" | md5sum 2>/dev/null | cut -c1-8)
else
hash=$(printf '%s' "$remote" | cksum 2>/dev/null | awk '{printf "%08x", $1}' | cut -c1-8)
fi
[ -z "$hash" ] && hash="00000000"
mkdir -p /tmp/larry-pulls 2>/dev/null
printf '/tmp/larry-pulls/%s.%s.%s' "$alias" "$base" "$hash"
}
cmd_pull() {
local alias="${1:-}" remote="${2:-}" local_path="${3:-}"
[ -n "$alias" ] && [ -n "$remote" ] || die "usage: pull <alias> <remote_path> [local_path]"
_resolve_open_master "$alias"
[ -z "$local_path" ] && local_path=$(_pull_cache_path "$alias" "$remote")
mkdir -p "$(dirname "$local_path")" 2>/dev/null
# Get remote file size up-front for a partial-transfer sanity check.
# v0.7.5: coerce_int on wc output — strips CR + non-digits at the source.
local remote_size=""
remote_size=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \
"wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" "")
if [ -z "$remote_size" ] || ! [[ "$remote_size" =~ ^[0-9]+$ ]]; then
die "remote file not found or not readable: $remote"
fi
# scp via the existing master: -o ControlPath=... -o ControlMaster=no
local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$")
if scp -q \
-o "ControlPath=$_RH_SOCK" \
-o "ControlMaster=no" \
-o "BatchMode=yes" \
-P "$_RH_PORT" \
"$_RH_ADDR:$remote" "$local_path" 2>"$scp_err"; then
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
local got; got=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
if [ "$got" != "$remote_size" ]; then
rm -f "$scp_err"
die "partial transfer: remote=$remote_size bytes, local=$got bytes ($local_path)"
fi
rm -f "$scp_err"
ok "pulled $alias:$remote$local_path ($got bytes)"
# Print only the local path on the final line so callers (tool layer) can
# capture it deterministically with `tail -1` or similar.
printf '%s\n' "$local_path"
return 0
fi
local rc=$?
printf 'ssh-helper: scp pull failed (rc=%d):\n' "$rc" >&2
cat "$scp_err" >&2 2>/dev/null
rm -f "$scp_err"
return 1
}
cmd_push() {
local alias="${1:-}" local_path="${2:-}" remote="${3:-}"
[ -n "$alias" ] && [ -n "$local_path" ] && [ -n "$remote" ] \
|| die "usage: push <alias> <local_path> <remote_path>"
[ -f "$local_path" ] || die "local file not found: $local_path"
_resolve_open_master "$alias"
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
local local_size; local_size=$(coerce_int "$(wc -c < "$local_path" 2>/dev/null)" 0)
local scp_err; scp_err=$(mktemp 2>/dev/null || echo "/tmp/larry-scp.err.$$")
if scp -q \
-o "ControlPath=$_RH_SOCK" \
-o "ControlMaster=no" \
-o "BatchMode=yes" \
-P "$_RH_PORT" \
"$local_path" "$_RH_ADDR:$remote" 2>"$scp_err"; then
# Validate via remote wc -c.
local got
# v0.7.5: coerce_int on wc output (Cygwin wc.exe CR-taint defense).
got=$(coerce_int "$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" \
"wc -c < $(printf '%q' "$remote") 2>/dev/null" 2>/dev/null)" 0)
if [ "$got" != "$local_size" ]; then
rm -f "$scp_err"
die "partial transfer: local=$local_size bytes, remote=$got bytes ($alias:$remote)"
fi
rm -f "$scp_err"
ok "pushed $local_path$alias:$remote ($got bytes)"
return 0
fi
local rc=$?
printf 'ssh-helper: scp push failed (rc=%d):\n' "$rc" >&2
cat "$scp_err" >&2 2>/dev/null
rm -f "$scp_err"
return 1
}
# pull-smat: smart pull for a Cloverleaf thread's .smatdb file.
# Two modes:
# Full pull: pull-smat <alias> <site> <thread>
# Locates $HCISITEDIR/exec/processes/*/<thread>.smatdb on the
# remote via find, then scp's the entire .smatdb file.
# Sampled: pull-smat <alias> <site> <thread> <days_back>
# Runs sqlite3 server-side, extracts up to 1000 most-recent
# messages from the last <days_back> days, encodes each
# MessageContent BLOB as base64, returns TSV:
# unix_ts<TAB>direction<TAB>type<TAB>source<TAB>dest<TAB>message_blob_b64
# The schema (table=smat_msgs, columns Time/Type/SourceConn/
# DestConn/MessageContent) is the same one nc-msgs.sh uses.
cmd_pull_smat() {
local alias="${1:-}" site="${2:-}" thread="${3:-}" days_back="${4:-}"
[ -n "$alias" ] && [ -n "$site" ] && [ -n "$thread" ] \
|| die "usage: pull-smat <alias> <site> <thread> [days_back]"
_resolve_open_master "$alias"
# Discover the remote .smatdb path. $HCISITEDIR/$HCIROOT are resolved by the
# LOGIN shell (see _build_login_cmd) — the v0.8.13 fix — so we no longer
# depend on a non-login rc happening to export them. SITEDIR falls back to
# <HCIROOT>/<site> if HCISITEDIR isn't set for that site. The find runs
# remotely to avoid hard-coding process directory names.
local find_cmd
find_cmd='set -e; SDIR="${HCISITEDIR:-${HCIROOT:-}/'"$site"'}"; '
find_cmd+='[ -d "$SDIR" ] || { echo "ERROR: sitedir not found on remote: $SDIR" >&2; exit 2; }; '
find_cmd+='F=$(find "$SDIR/exec/processes" -maxdepth 2 -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); '
find_cmd+='[ -n "$F" ] || F=$(find "$SDIR" -type f -name "'"$thread"'.smatdb" 2>/dev/null | head -1); '
find_cmd+='[ -n "$F" ] || { echo "ERROR: no smatdb found for thread '"$thread"' under $SDIR" >&2; exit 3; }; '
# v0.8.13 M1 hardening (Vera Minor #1): emit the resolved path behind an
# unambiguous sentinel prefix instead of relying on it being the last stdout
# line. A login shell (`bash -lc`, the v0.8.13 fix) is the case most likely to
# print a MOTD/banner to stdout, which a blind `tail -1` would mistake for the
# path. We grep for the sentinel line and strip it; only if no sentinel is
# present (host somehow stripped it) do we fall back to the prior `tail -1`
# behaviour, so this can never regress a host that worked before.
find_cmd+='printf "SMATDB_PATH:%s\n" "$F"'
local _smat_raw remote_smatdb
_smat_raw=$(ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" "$(_build_login_cmd "$find_cmd")" 2>&1)
remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^SMATDB_PATH:' | tail -1)
if [ -n "$remote_smatdb" ]; then
remote_smatdb="${remote_smatdb#SMATDB_PATH:}"
else
# No sentinel — surface any ERROR: line if present, else fall back to the
# last line (pre-hardening behaviour) so failure modes stay diagnosable.
remote_smatdb=$(printf '%s\n' "$_smat_raw" | grep '^ERROR:' | tail -1)
[ -n "$remote_smatdb" ] || remote_smatdb=$(printf '%s\n' "$_smat_raw" | tail -1)
fi
case "$remote_smatdb" in
ERROR:*|'') die "remote smatdb lookup failed: $remote_smatdb" ;;
esac
if [ -z "$days_back" ]; then
# Full mode: scp the whole .smatdb file.
local local_path
local_path=$(_pull_cache_path "$alias" "$remote_smatdb")
cmd_pull "$alias" "$remote_smatdb" "$local_path"
return $?
fi
# Sampled mode: run sqlite3 on the remote, return TSV with b64-encoded blobs.
# base64 -w0 is GNU coreutils; on BSD use plain base64 (no -w). We accept
# whichever is present; the awk in the SQL pipeline strips internal newlines
# for sturdy TSV.
#
# Output line shape (each message):
# <unix_ts_s>\t<direction>\t<type>\t<source>\t<dest>\t<b64-of-MessageContent>
# `direction` is "in" when DestConn=thread, else "out" (best-effort heuristic).
local sample_cmd
sample_cmd='set -e; '
sample_cmd+='which sqlite3 >/dev/null 2>&1 || { echo "ERROR: sqlite3 not on remote PATH" >&2; exit 4; }; '
sample_cmd+='B64() { if base64 --help 2>&1 | grep -q -- " -w"; then base64 -w0; else base64 | tr -d "\n"; fi; }; '
# Note: sqlite3 ".mode tabs" prints rows tab-separated; we redirect blob via
# writefile() into temp files, then base64 each. That avoids any binary
# mangling in the sqlite3 -ascii path. Approach: select rowids, then for each
# rowid pull MessageContent into a per-row temp file, b64 it inline.
sample_cmd+='TMP=$(mktemp -d); trap "rm -rf $TMP" EXIT; '
sample_cmd+='CUTOFF_MS=$(( ( $(date +%s) - '"$days_back"' * 86400 ) * 1000 )); '
sample_cmd+='sqlite3 "'"$remote_smatdb"'" "SELECT rowid, Time, IFNULL(Type,\"\"), IFNULL(SourceConn,\"\"), IFNULL(DestConn,\"\") FROM smat_msgs WHERE Time >= $CUTOFF_MS ORDER BY Time DESC LIMIT 1000" '
sample_cmd+='| while IFS="|" read -r rid tm typ src dst; do '
sample_cmd+=' blobfile="$TMP/$rid.bin"; '
sample_cmd+=' sqlite3 "'"$remote_smatdb"'" "SELECT writefile(\"$blobfile\", MessageContent) FROM smat_msgs WHERE rowid=$rid" >/dev/null 2>&1; '
sample_cmd+=' if [ "$dst" = "'"$thread"'" ]; then dir="in"; else dir="out"; fi; '
sample_cmd+=' printf "%s\t%s\t%s\t%s\t%s\t" "$(( tm / 1000 ))" "$dir" "$typ" "$src" "$dst"; '
sample_cmd+=' B64 < "$blobfile"; '
sample_cmd+=' printf "\n"; '
sample_cmd+='done; '
sample_cmd+='TOTAL=$(sqlite3 "'"$remote_smatdb"'" "SELECT COUNT(*) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); '
sample_cmd+='RETURNED=$(sqlite3 "'"$remote_smatdb"'" "SELECT MIN(1000, COUNT(*)) FROM smat_msgs WHERE Time >= $CUTOFF_MS"); '
sample_cmd+='echo "# smatdb=$(basename '"$remote_smatdb"') days_back='"$days_back"' total_in_window=$TOTAL returned=$RETURNED truncated=$([ "$TOTAL" -gt 1000 ] && echo yes || echo no)" >&2'
# Login shell so sqlite3 resolves from the operator's PATH (v0.8.13).
ssh -S "$_RH_SOCK" -p "$_RH_PORT" -o BatchMode=yes "$_RH_ADDR" "$(_build_login_cmd "$sample_cmd")"
}
case "${1:-help}" in
hosts|list) shift; cmd_hosts ;;
add) shift; cmd_add "$@" ;;
remove|rm) shift; cmd_remove "$@" ;;
pass|passwd) shift; cmd_pass "$@" ;;
setup|open) shift; cmd_setup "$@" ;;
close|exit) shift; cmd_close "$@" ;;
status) shift; cmd_status "$@" ;;
exec|run) shift; cmd_exec "$@" ;;
discover) shift; cmd_discover "$@" ;;
pull) shift; cmd_pull "$@" ;;
push) shift; cmd_push "$@" ;;
pull-smat) shift; cmd_pull_smat "$@" ;;
-h|--help|help) cmd_help ;;
*) die "unknown subcommand: ${1:-} (run with --help)" ;;
esac