#!/usr/bin/env bash # nc-engine.sh — Cloverleaf engine process control. Native v3 wrapper # around the shipped Cloverleaf binaries — modelled on v1 `bounce`, # `bounce_processes`, `pstop`, `start`, etc. # # Every action goes through the journal so it's reversible. Bounces are # journaled as paired stop+start records; the rollback executes them in # reverse to restore prior state (best-effort — engine state can drift). # # Subcommands: # stop [more...] stop one or more processes/threads # start [more...] start one or more # bounce [more...] stop then start (atomic-ish) # restart alias of bounce # status quick site status via tstat (if available) # resend-ob resend a file outbound (post-xlate) # resend-ib resend a file inbound (pre-xlate) # route-test run Cloverleaf route_test for a thread # testxlate test an xlate against an xlt file # tpstest run a TPS test # # Options for stop/start/bounce: # --site SITE override $HCISITE for this call # --confirm yes skip Y/N prompt (still journaled) # --dry-run show the binary command but do not execute # # Cloverleaf binaries used (auto-discovered under $HCIROOT/bin/): # hcienginestop hcienginerun hcienginerestart hcienginestat tstat # hciengineroutetest hciengineenginesend ... set -o pipefail NC_SELF="$0" LIB_DIR="$(cd "$(dirname "$NC_SELF")" && pwd)" JOURNAL="$LIB_DIR/journal.sh" die() { printf 'nc-engine: %s\n' "$*" >&2; exit 1; } warn() { printf 'nc-engine: %s\n' "$*" >&2; } # Source journal so journaled actions can call journal_write [ -f "$JOURNAL" ] && . "$JOURNAL" || warn "journal.sh not available — actions will not be reversible" resolve_binary() { local name="$1" if command -v "$name" >/dev/null 2>&1; then command -v "$name"; return; fi for d in "${HCIROOT:-}/bin" "${HCIROOT:-}/server/bin"; do [ -x "$d/$name" ] && { echo "$d/$name"; return; } done return 1 } journal_action() { # Record an engine action in the journal as a synthetic "command" entry. # We don't snapshot files (these are runtime ops, not file edits) but we # write a manifest-style entry so larry-rollback.sh --list shows them. local action="$1" target="$2" detail="${3:-}" local sessdir="$LARRY_HOME/journal/${LARRY_SESSION_ID:-engine-$(date +%Y-%m-%d-%H%M%S)-$$}" mkdir -p "$sessdir" 2>/dev/null local idx; idx=$(printf '%03d' $(($(find "$sessdir" -name '[0-9]*.engine' 2>/dev/null | wc -l) + 1))) local entry="$sessdir/${idx}_${action}_${target//\//_}.engine" { printf 'action: %s\ntarget: %s\nwhen: %s\nhost: %s\nhciroot: %s\nhcisite: %s\ndetail: %s\n' \ "$action" "$target" "$(date -Iseconds 2>/dev/null || date)" \ "$(hostname 2>/dev/null || echo unknown)" "${HCIROOT:-?}" "${HCISITE:-?}" "$detail" } > "$entry" # Also append to a flat engine log for quick listing local elog="$LARRY_HOME/journal/engine-actions.tsv" [ -f "$elog" ] || printf 'when\tsession\taction\ttarget\thciroot\thcisite\n' > "$elog" printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$(date -Iseconds 2>/dev/null || date)" \ "${LARRY_SESSION_ID:-?}" "$action" "$target" "${HCIROOT:-?}" "${HCISITE:-?}" >> "$elog" } run_action() { local action="$1" target="$2"; shift 2 local site="${HCISITE:-}" local confirm="" local dry=0 while [ $# -gt 0 ]; do case "$1" in --site) shift; site="$1" ;; --confirm) shift; confirm="$1" ;; --dry-run) dry=1 ;; esac shift done local binary cmd label case "$action" in stop) binary=$(resolve_binary hcienginestop) || die "hcienginestop not found"; cmd="$binary -p $target"; label="STOP" ;; start) binary=$(resolve_binary hcienginerun) || die "hcienginerun not found"; cmd="$binary -p $target"; label="START" ;; bounce|restart) binary=$(resolve_binary hcienginerestart) \ && cmd="$binary -p $target" && label="BOUNCE" \ || { # Fallback to stop + start local sbin; sbin=$(resolve_binary hcienginestop) || die "hcienginestop+hcienginerestart both missing" local rbin; rbin=$(resolve_binary hcienginerun) || die "hcienginerun missing" cmd="$sbin -p $target && $rbin -p $target" label="BOUNCE" } ;; *) die "unknown action: $action" ;; esac printf '\n%s%s%s thread/process=%s site=%s\n' "${C_YELLOW:-}" "$label" "${C_RESET:-}" "$target" "${site:-?}" printf ' $ %s\n' "$cmd" if [ "$dry" = "1" ]; then printf ' [dry-run] not executed\n' return 0 fi if [ "$confirm" != "yes" ]; then printf ' proceed? [y/N]: ' read -r ans /dev/null || ans="" [[ "$ans" =~ ^[Yy]$ ]] || { echo " DENIED by user"; return 1; } fi journal_action "$action" "$target" "$cmd" HCISITE="$site" eval "$cmd" local rc=$? if [ "$rc" -eq 0 ]; then echo " ✓ ok"; else warn " exit $rc"; fi return $rc } cmd_status() { local site="${HCISITE:-}" local binary binary=$(resolve_binary hcienginestat) || binary=$(resolve_binary tstat) || die "no engine-status binary on PATH (looked for hcienginestat, tstat)" HCISITE="$site" "$binary" "$@" } cmd_resend() { local kind="$1" thread="$2" file="$3"; shift 3 [ -n "$thread" ] && [ -f "$file" ] || die "usage: resend-{ib,ob} " local cmd case "$kind" in ob) cmd="$thread resend_ob $file" ;; ib) cmd="$thread resend_ib $file" ;; *) die "bad resend kind: $kind" ;; esac printf '\nRESEND-%s thread=%s file=%s\n $ %s\n proceed? [y/N]: ' "${kind^^}" "$thread" "$file" "$cmd" read -r ans /dev/null || ans="" [[ "$ans" =~ ^[Yy]$ ]] || { echo " DENIED"; return 1; } journal_action "resend-$kind" "$thread" "file=$file" eval "$cmd" } cmd_route_test() { local thread="$1" file="$2" [ -n "$thread" ] && [ -f "$file" ] || die "usage: route-test " local cmd="$thread route_test $file" printf '\nROUTE-TEST thread=%s input=%s\n $ %s\n proceed? [y/N]: ' "$thread" "$file" "$cmd" read -r ans /dev/null || ans="" [[ "$ans" =~ ^[Yy]$ ]] || { echo " DENIED"; return 1; } journal_action "route-test" "$thread" "file=$file" eval "$cmd" } cmd_testxlate() { local xlate="$1" xltfile="$2" [ -n "$xlate" ] && [ -f "$xltfile" ] || die "usage: testxlate " local cmd="testxlate $xlate $xltfile" printf '\nTESTXLATE xlate=%s file=%s\n $ %s\n proceed? [y/N]: ' "$xlate" "$xltfile" "$cmd" read -r ans /dev/null || ans="" [[ "$ans" =~ ^[Yy]$ ]] || { echo " DENIED"; return 1; } journal_action "testxlate" "$xlate" "file=$xltfile" eval "$cmd" } cmd_tpstest() { local msgfile="$1"; shift [ -f "$msgfile" ] || die "usage: tpstest " local procs; procs="$*" local cmd="tpstest $msgfile $procs" printf '\nTPSTEST msgfile=%s procs=%s\n $ %s\n proceed? [y/N]: ' "$msgfile" "$procs" "$cmd" read -r ans /dev/null || ans="" [[ "$ans" =~ ^[Yy]$ ]] || { echo " DENIED"; return 1; } journal_action "tpstest" "$msgfile" "procs=$procs" eval "$cmd" } SUB="${1:-help}" case "$SUB" in stop|start|bounce|restart) shift [ $# -ge 1 ] || die "usage: $SUB [more...] [--site SITE] [--confirm yes] [--dry-run]" # Separate targets from flags targets=(); flags=() while [ $# -gt 0 ]; do case "$1" in --*) flags+=("$1" "${2:-}"); shift 2 ;; *) targets+=("$1"); shift ;; esac done for t in "${targets[@]}"; do run_action "$SUB" "$t" "${flags[@]}"; done ;; status) shift; cmd_status "$@" ;; resend-ob) shift; cmd_resend ob "$@" ;; resend-ib) shift; cmd_resend ib "$@" ;; route-test) shift; cmd_route_test "$@" ;; testxlate) shift; cmd_testxlate "$@" ;; tpstest) shift; cmd_tpstest "$@" ;; help|-h|--help) sed -n '2,30p' "$NC_SELF" ;; *) die "unknown subcommand: $SUB" ;; esac