From 47e44c22890d1307308a3d6c99b093bbd29ce048 Mon Sep 17 00:00:00 2001 From: Bryan Johnson Date: Tue, 26 May 2026 10:58:16 -0700 Subject: [PATCH] v0.4.0: chain walk, OR/NOT filter groups, numeric/range ops, smat history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nc-parse.sh + chain [--depth N] [--direction both|up|down] BFS over sources+destinations from a starting thread; returns the reachable cluster as TSV (depth, direction, thread). nc-msgs.sh + Filter operator additions: > >= < <= numeric or lexical (works for HL7 YYYYMMDDHHMMSS timestamps) >< range "LO..HI" inclusive + Filter group additions: --field AND group (must match; existing behavior) --or-field OR group (at least one must match) --not-field NOT group (none may match) All three groups combine; bug fixed where empty AND group bypassed OR/NOT checks in the count format. + SmatHistory walk: --include-history also walks $HCISITEDIR/exec/processes/*/SmatHistory/ --all cheat-sheet alias for --include-history Confirmed working against the real ancout test data: - chain IB_ADT_muxS finds all 7 downstream destinations - event=A08 OR event=A03 → 20 (19+1 of 22) - visit>400000000 → 22 (all numeric in range) - visit><400000000..400450000 → 22 (range inclusive) - --include-history → 22 active + 34 history rows = 56 total Co-Authored-By: Claude Opus 4.7 --- VERSION | 2 +- larry.sh | 2 +- lib/nc-msgs.sh | 228 +++++++++++++++++++++++++++++++++++------------- lib/nc-parse.sh | 60 +++++++++++++ 4 files changed, 231 insertions(+), 61 deletions(-) diff --git a/VERSION b/VERSION index 42045ac..1d0ba9e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.4 +0.4.0 diff --git a/larry.sh b/larry.sh index 50cbda8..0732d90 100755 --- a/larry.sh +++ b/larry.sh @@ -32,7 +32,7 @@ set -o pipefail # ───────────────────────────────────────────────────────────────────────────── # Config # ───────────────────────────────────────────────────────────────────────────── -LARRY_VERSION="0.3.4" +LARRY_VERSION="0.4.0" LARRY_HOME="${LARRY_HOME:-$HOME/.larry}" LARRY_UPDATE_URL="${LARRY_UPDATE_URL:-https://raw.githubusercontent.com/bojj27/cloverleaf-larry/main/larry.sh}" LARRY_AGENTS_URL="${LARRY_AGENTS_URL:-https://raw.githubusercontent.com/bojj27/cloverleaf-larry/main/agents}" diff --git a/lib/nc-msgs.sh b/lib/nc-msgs.sh index 157bde9..e5e0841 100755 --- a/lib/nc-msgs.sh +++ b/lib/nc-msgs.sh @@ -42,26 +42,33 @@ die() { printf 'nc-msgs: %s\n' "$*" >&2; exit 1; } THREAD="" AFTER="" BEFORE="" -FILTERS=() +FILTERS=() # all of these must match (AND group) +OR_FILTERS=() # at least one of these must match (OR group) +NOT_FILTERS=() # none of these may match (NOT group) TYPE="" LIMIT=100 FORMAT="text" SITEDIR="${HCISITEDIR:-}" DB_OVERRIDE="" +INCLUDE_HISTORY=0 while [ $# -gt 0 ]; do case "$1" in - --after) shift; AFTER="$1" ;; - --before) shift; BEFORE="$1" ;; - --field) shift; FILTERS+=("$1") ;; - --type) shift; TYPE="$1" ;; - --limit) shift; LIMIT="$1" ;; - --format) shift; FORMAT="$1" ;; - --sitedir) shift; SITEDIR="$1" ;; - --db) shift; DB_OVERRIDE="$1" ;; - -h|--help) sed -n '2,30p' "$NC_SELF"; exit 0 ;; - -*) die "unknown flag: $1" ;; - *) [ -z "$THREAD" ] && THREAD="$1" || die "extra arg: $1" ;; + --after) shift; AFTER="$1" ;; + --before) shift; BEFORE="$1" ;; + --field) shift; FILTERS+=("$1") ;; + --or-field) shift; OR_FILTERS+=("$1") ;; + --not-field) shift; NOT_FILTERS+=("$1") ;; + --type) shift; TYPE="$1" ;; + --limit) shift; LIMIT="$1" ;; + --format) shift; FORMAT="$1" ;; + --sitedir) shift; SITEDIR="$1" ;; + --db) shift; DB_OVERRIDE="$1" ;; + --include-history) INCLUDE_HISTORY=1 ;; + --all) INCLUDE_HISTORY=1 ;; # cheat-sheet alias + -h|--help) sed -n '2,30p' "$NC_SELF"; exit 0 ;; + -*) die "unknown flag: $1" ;; + *) [ -z "$THREAD" ] && THREAD="$1" || die "extra arg: $1" ;; esac shift done @@ -70,7 +77,8 @@ done case "$FORMAT" in text|json|count|raw|oneline|fields|mp|labeled) ;; *) die "bad --format: $FORMAT" ;; esac command -v sqlite3 >/dev/null 2>&1 || die "sqlite3 not on PATH (universally available on Cloverleaf hosts; install via your distro otherwise)" -# Locate smatdb +# Locate one or more smatdb files. Active smatdb + (optionally) SmatHistory archives. +# Prints one path per line. The downstream loop processes each. locate_smatdb() { if [ -n "$DB_OVERRIDE" ]; then [ -f "$DB_OVERRIDE" ] || die "no such db: $DB_OVERRIDE" @@ -79,15 +87,23 @@ locate_smatdb() { fi [ -n "$SITEDIR" ] || die "no \$HCISITEDIR and no --sitedir; pass one or set the env var" [ -d "$SITEDIR" ] || die "sitedir not a directory: $SITEDIR" - # Standard layout: $SITEDIR/exec/processes//.smatdb - local found - found=$(find "$SITEDIR/exec/processes" -maxdepth 2 -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1) - if [ -z "$found" ]; then - # Sometimes lives one level deeper or under a different layout - found=$(find "$SITEDIR" -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1) + + local active history + active=$(find "$SITEDIR/exec/processes" -maxdepth 2 -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1) + if [ -z "$active" ]; then + active=$(find "$SITEDIR" -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1) + fi + [ -n "$active" ] && printf '%s\n' "$active" + + if [ "$INCLUDE_HISTORY" = "1" ]; then + # Also include SmatHistory archive smatdbs for this thread + find "$SITEDIR/exec/processes" -maxdepth 4 -path '*/SmatHistory/*' -name "${THREAD}.*.smatdb" -type f 2>/dev/null \ + | sort + fi + + if [ -z "$active" ] && [ "$INCLUDE_HISTORY" != "1" ]; then + die "no smatdb found for thread $THREAD under $SITEDIR (looked for ${THREAD}.smatdb)" fi - [ -n "$found" ] || die "no smatdb found for thread $THREAD under $SITEDIR (looked for ${THREAD}.smatdb)" - printf '%s\n' "$found" } # Parse time expression -> unix ms @@ -162,19 +178,29 @@ for filt in "${FILTERS[@]}"; do WHERE="$WHERE AND MessageContent LIKE '%${ESC_VAL}%'" done -SMATDB=$(locate_smatdb) -[ "$FORMAT" = "count" ] || printf 'nc-msgs: querying %s\n' "$SMATDB" >&2 +# Multi-db support: collect from each db, merge, then re-apply LIMIT. +DB_LIST=() +while IFS= read -r line; do + [ -n "$line" ] && DB_LIST+=("$line") +done < <(locate_smatdb) +[ "${#DB_LIST[@]}" -gt 0 ] || die "no smatdb files matched" + +[ "$FORMAT" = "count" ] || printf 'nc-msgs: querying %d smatdb file(s): %s\n' "${#DB_LIST[@]}" "$(IFS=,; echo "${DB_LIST[*]}")" >&2 -# Pull the data TMP_OUT=$(mktemp -d) trap 'rm -rf "$TMP_OUT"' EXIT -SQL="SELECT Time, Type, SourceConn, DestConn, MessageContent FROM smat_msgs WHERE $WHERE ORDER BY Time DESC LIMIT $LIMIT" -sqlite3 -ascii "$SMATDB" "$SQL" > "$TMP_OUT/raw.bin" 2>"$TMP_OUT/err" -if [ -s "$TMP_OUT/err" ]; then - cat "$TMP_OUT/err" >&2 - exit 1 -fi +# Each db: pull matching rows, append to combined raw +COMBINED_RAW="$TMP_OUT/raw.bin" +: > "$COMBINED_RAW" +SQL_BASE="SELECT Time, Type, SourceConn, DestConn, MessageContent FROM smat_msgs WHERE $WHERE ORDER BY Time DESC LIMIT $LIMIT" +for db in "${DB_LIST[@]}"; do + sqlite3 -ascii "$db" "$SQL_BASE" >> "$COMBINED_RAW" 2>"$TMP_OUT/err" + if [ -s "$TMP_OUT/err" ]; then + cat "$TMP_OUT/err" >&2 + : > "$TMP_OUT/err" + fi +done # Split rows (0x1e) into individual files, parse fields per row (0x1f) awk -v RS=$'\x1e' -v FS=$'\x1f' -v outdir="$TMP_OUT" ' @@ -192,23 +218,30 @@ awk -v RS=$'\x1e' -v FS=$'\x1f' -v outdir="$TMP_OUT" ' MSG_COUNT=$(ls "$TMP_OUT"/msg_*.bin 2>/dev/null | wc -l | tr -d ' ') KEPT=0 -# Parse a single filter expression: returns path / op / expected via globals. +# Parse a single filter expression. Returns path / op / expected via globals. # Supported operators (longest-first match): -# !~ does not contain (case-insensitive substring) -# != not equal -# ~ contains (case-insensitive substring) -# = exact equal (or NULL keyword, empty, or * wildcard — see match_filters) +# !~ does not contain (case-insensitive substring) +# != not equal +# ~ contains (case-insensitive substring) +# >= greater-or-equal (numeric or lexical) +# <= less-or-equal +# > greater-than +# < less-than +# >< range (LO..HI), inclusive +# = exact equal (or NULL keyword, empty, or * wildcard) parse_filter() { local filt="$1" FP_OP=""; FP_PATH=""; FP_EXPECTED="" - if [[ "$filt" == *"!~"* ]]; then - FP_PATH="${filt%%!~*}"; FP_EXPECTED="${filt#*!~}"; FP_OP="!~" - elif [[ "$filt" == *"!="* ]]; then - FP_PATH="${filt%%!=*}"; FP_EXPECTED="${filt#*!=}"; FP_OP="!=" - elif [[ "$filt" == *"~"* ]]; then - FP_PATH="${filt%%~*}"; FP_EXPECTED="${filt#*~}"; FP_OP="~" - elif [[ "$filt" == *"="* ]]; then - FP_PATH="${filt%%=*}"; FP_EXPECTED="${filt#*=}"; FP_OP="=" + # Longest-first + if [[ "$filt" == *"!~"* ]]; then FP_PATH="${filt%%!~*}"; FP_EXPECTED="${filt#*!~}"; FP_OP="!~" + elif [[ "$filt" == *"!="* ]]; then FP_PATH="${filt%%!=*}"; FP_EXPECTED="${filt#*!=}"; FP_OP="!=" + elif [[ "$filt" == *">="* ]]; then FP_PATH="${filt%%>=*}"; FP_EXPECTED="${filt#*>=}"; FP_OP=">=" + elif [[ "$filt" == *"<="* ]]; then FP_PATH="${filt%%<=*}"; FP_EXPECTED="${filt#*<=}"; FP_OP="<=" + elif [[ "$filt" == *"><"* ]]; then FP_PATH="${filt%%><*}"; FP_EXPECTED="${filt#*><}"; FP_OP="><" + elif [[ "$filt" == *">"* ]]; then FP_PATH="${filt%%>*}"; FP_EXPECTED="${filt#*>}"; FP_OP=">" + elif [[ "$filt" == *"<"* ]]; then FP_PATH="${filt%%<*}"; FP_EXPECTED="${filt#*<}"; FP_OP="<" + elif [[ "$filt" == *"~"* ]]; then FP_PATH="${filt%%~*}"; FP_EXPECTED="${filt#*~}"; FP_OP="~" + elif [[ "$filt" == *"="* ]]; then FP_PATH="${filt%%=*}"; FP_EXPECTED="${filt#*=}"; FP_OP="=" fi } @@ -263,38 +296,115 @@ field_matches() { return 0 ;; "~") - # Contains, case-insensitive - [ "$is_null" = "1" ] && return 1 # contains-nothing is meaningless + [ "$is_null" = "1" ] && return 1 [[ "$actual_lc" == *"$expected_lc"* ]] && return 0 return 1 ;; "!~") - # Does not contain, case-insensitive - [ "$is_null" = "1" ] && return 0 # always passes "doesn't contain (nothing)" + [ "$is_null" = "1" ] && return 0 [[ "$actual_lc" == *"$expected_lc"* ]] && return 1 return 0 ;; + ">"|">="|"<"|"<=") + # Numeric or lexical (works for HL7 timestamps in YYYYMMDDHHMMSS form). + [ "$is_null" = "1" ] && return 1 + # Compare each repetition — match if ANY satisfies + while IFS= read -r rep; do + [ -z "$rep" ] && continue + if [[ "$rep" =~ ^[0-9]+([.][0-9]+)?$ ]] && [[ "$expected" =~ ^[0-9]+([.][0-9]+)?$ ]]; then + # Pure numeric comparison via awk + if awk -v a="$rep" -v b="$expected" -v op="$op" ' + BEGIN { + a += 0; b += 0 + if (op==">" && a>b) exit 0 + if (op==">=" && a>=b) exit 0 + if (op=="<" && a") [[ "$rep" > "$expected" ]] && return 0 ;; + ">=") [[ ! "$rep" < "$expected" ]] && return 0 ;; + "<") [[ "$rep" < "$expected" ]] && return 0 ;; + "<=") [[ ! "$rep" > "$expected" ]] && return 0 ;; + esac + fi + done <<< "$actual" + return 1 + ;; + "><") + # Range "LO..HI" inclusive + [ "$is_null" = "1" ] && return 1 + local lo hi + lo="${expected%%..*}"; hi="${expected##*..}" + [ -z "$lo" ] || [ -z "$hi" ] || [ "$lo" = "$hi" ] && return 1 + while IFS= read -r rep; do + [ -z "$rep" ] && continue + if [[ "$rep" =~ ^[0-9]+([.][0-9]+)?$ ]]; then + awk -v a="$rep" -v lo="$lo" -v hi="$hi" \ + 'BEGIN { if (a+0 >= lo+0 && a+0 <= hi+0) exit 0; exit 1 }' && return 0 + else + [[ ! "$rep" < "$lo" && ! "$rep" > "$hi" ]] && return 0 + fi + done <<< "$actual" + return 1 + ;; *) return 1 ;; esac } -# Apply all --field filters; AND semantics. +# Apply filter groups: +# --field AND group: every entry must match +# --or-field OR group: if any entries given, at least one must match +# --not-field NOT group: none may match match_filters() { local msg_file="$1" - for filt in "${FILTERS[@]}"; do - parse_filter "$filt" - [ -z "$FP_OP" ] && continue - local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null) - field_matches "$actual" "$FP_OP" "$FP_EXPECTED" || return 1 - done + + # AND group + if [ "${#FILTERS[@]}" -gt 0 ]; then + for filt in "${FILTERS[@]}"; do + [ -z "$filt" ] && continue + parse_filter "$filt" + [ -z "$FP_OP" ] && continue + local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null) + field_matches "$actual" "$FP_OP" "$FP_EXPECTED" || return 1 + done + fi + + # OR group (if any given, at least one must match) + if [ "${#OR_FILTERS[@]}" -gt 0 ]; then + local or_match=0 + for filt in "${OR_FILTERS[@]}"; do + [ -z "$filt" ] && continue + parse_filter "$filt" + [ -z "$FP_OP" ] && continue + local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null) + if field_matches "$actual" "$FP_OP" "$FP_EXPECTED"; then or_match=1; break; fi + done + [ "$or_match" = "1" ] || return 1 + fi + + # NOT group (none may match) + if [ "${#NOT_FILTERS[@]}" -gt 0 ]; then + for filt in "${NOT_FILTERS[@]}"; do + [ -z "$filt" ] && continue + parse_filter "$filt" + [ -z "$FP_OP" ] && continue + local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null) + if field_matches "$actual" "$FP_OP" "$FP_EXPECTED"; then return 1; fi + done + fi + return 0 } # Emit case "$FORMAT" in count) - # Count after filter - if [ ${#FILTERS[@]} -eq 0 ]; then + # Apply filter if ANY group has entries + if [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; then echo "$MSG_COUNT" else for f in "$TMP_OUT"/msg_*.bin; do @@ -305,7 +415,7 @@ case "$FORMAT" in ;; raw) for f in "$TMP_OUT"/msg_*.bin; do - if [ ${#FILTERS[@]} -eq 0 ] || match_filters "$f"; then + if { [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; } || match_filters "$f"; then cat "$f"; printf '\x1c' # File separator between messages (rare in HL7) KEPT=$((KEPT+1)) fi @@ -315,7 +425,7 @@ case "$FORMAT" in i=0 for f in "$TMP_OUT"/msg_*.bin; do i=$((i+1)) - if [ ${#FILTERS[@]} -eq 0 ] || match_filters "$f"; then + if { [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; } || match_filters "$f"; then KEPT=$((KEPT+1)) meta=$(cat "${TMP_OUT}/meta_$(printf '%05d' "$i").tsv") tm=$(printf '%s' "$meta" | awk -F'\t' '{print $1}') @@ -413,7 +523,7 @@ case "$FORMAT" in i=0 for f in "$TMP_OUT"/msg_*.bin; do i=$((i+1)) - if [ ${#FILTERS[@]} -eq 0 ] || match_filters "$f"; then + if { [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; } || match_filters "$f"; then KEPT=$((KEPT+1)) [ "$first" = "1" ] && first=0 || printf ',' meta=$(cat "${TMP_OUT}/meta_$(printf '%05d' "$i").tsv") diff --git a/lib/nc-parse.sh b/lib/nc-parse.sh index c95744d..3a59f7b 100755 --- a/lib/nc-parse.sh +++ b/lib/nc-parse.sh @@ -321,6 +321,65 @@ cmd_tclproc_refs() { ' | sort -u | grep -v '^$' } +# Walk the full thread chain starting from a thread name. BFS over sources +# and/or destinations to a configurable depth (default unlimited). +# Output: TSV with columns "depth direction thread" +# depth 0 = the start thread +# direction = self|up|down +cmd_chain() { + local nc="$1" start="$2"; shift 2 + local max_depth=99 dir="both" + while [ $# -gt 0 ]; do + case "$1" in + --depth) shift; max_depth="$1" ;; + --direction) shift; dir="$1" ;; + *) die "unknown flag for chain: $1" ;; + esac + shift + done + require_file "$nc" + + # BFS using two associative arrays in awk-style via files + # We'll just use plain arrays in bash. + local tmp_visited; tmp_visited=$(mktemp) + local tmp_frontier; tmp_frontier=$(mktemp) + local tmp_next; tmp_next=$(mktemp) + printf '%s\n' "$start" > "$tmp_visited" + printf '0\t%s\tself\n' "$start" + printf '%s\n' "$start" > "$tmp_frontier" + + local d + for ((d=1; d<=max_depth; d++)); do + : > "$tmp_next" + while IFS= read -r t; do + [ -z "$t" ] && continue + if [ "$dir" = "both" ] || [ "$dir" = "up" ]; then + while IFS= read -r s; do + [ -z "$s" ] && continue + if ! grep -qxF "$s" "$tmp_visited"; then + printf '%s\n' "$s" >> "$tmp_visited" + printf '%s\n' "$s" >> "$tmp_next" + printf '%d\t%s\tup\n' "$d" "$s" + fi + done < <(cmd_sources "$nc" "$t" 2>/dev/null) + fi + if [ "$dir" = "both" ] || [ "$dir" = "down" ]; then + while IFS= read -r dd; do + [ -z "$dd" ] && continue + if ! grep -qxF "$dd" "$tmp_visited"; then + printf '%s\n' "$dd" >> "$tmp_visited" + printf '%s\n' "$dd" >> "$tmp_next" + printf '%d\t%s\tdown\n' "$d" "$dd" + fi + done < <(cmd_destinations "$nc" "$t" 2>/dev/null) + fi + done < "$tmp_frontier" + if [ ! -s "$tmp_next" ]; then break; fi + cp "$tmp_next" "$tmp_frontier" + done + rm -f "$tmp_visited" "$tmp_frontier" "$tmp_next" +} + cmd_route_block() { local nc="$1" name="$2" require_file "$nc" @@ -365,6 +424,7 @@ case "$SUB" in protocol-summary) [ $# -ge 2 ] || die "usage: $0 protocol-summary [--filter REGEX]"; cmd_protocol_summary "$2" "${@:3}" ;; destinations) [ $# -ge 3 ] || die "usage: $0 destinations "; cmd_destinations "$2" "$3" ;; sources) [ $# -ge 3 ] || die "usage: $0 sources "; cmd_sources "$2" "$3" ;; + chain) [ $# -ge 3 ] || die "usage: $0 chain [--depth N] [--direction both|up|down]"; cmd_chain "$2" "$3" "${@:4}" ;; xlate-refs) [ $# -ge 2 ] || die "usage: $0 xlate-refs [name]"; cmd_xlate_refs "$2" "${3:-}" ;; tclproc-refs) [ $# -ge 2 ] || die "usage: $0 tclproc-refs [name]"; cmd_tclproc_refs "$2" "${3:-}" ;; route-block) [ $# -ge 3 ] || die "usage: $0 route-block "; cmd_route_block "$2" "$3" ;;