cloverleaf-larry/lib/nc-parse.sh
Bryan Johnson 47e44c2289 v0.4.0: chain walk, OR/NOT filter groups, numeric/range ops, smat history
nc-parse.sh
  + chain <name> [--depth N] [--direction both|up|down]
    BFS over sources+destinations from a starting thread; returns the
    reachable cluster as TSV (depth, direction, thread).

nc-msgs.sh
  + Filter operator additions:
      >  >=  <  <=    numeric or lexical (works for HL7 YYYYMMDDHHMMSS timestamps)
      ><              range "LO..HI" inclusive
  + Filter group additions:
      --field         AND group (must match; existing behavior)
      --or-field      OR group  (at least one must match)
      --not-field     NOT group (none may match)
    All three groups combine; bug fixed where empty AND group bypassed
    OR/NOT checks in the count format.
  + SmatHistory walk:
      --include-history    also walks $HCISITEDIR/exec/processes/*/SmatHistory/
      --all                cheat-sheet alias for --include-history

Confirmed working against the real ancout test data:
  - chain IB_ADT_muxS finds all 7 downstream destinations
  - event=A08 OR event=A03 → 20 (19+1 of 22)
  - visit>400000000 → 22 (all numeric in range)
  - visit><400000000..400450000 → 22 (range inclusive)
  - --include-history → 22 active + 34 history rows = 56 total

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 10:58:16 -07:00

434 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
# nc-parse.sh — first-class native Cloverleaf NetConfig parser for Larry-Anywhere v3.
# Pure bash + awk. No external tools. No v1/v2 dependencies.
#
# The NetConfig is a TCL-style nested-block file with two top-level declarations:
# - process <name> { ... } — process containers
# - protocol <name> { ... } — threads (the operational unit)
#
# This parser exposes structured access to those blocks.
#
# Usage:
# nc-parse.sh <subcommand> <netconfig_path> [args...]
#
# Subcommands:
# list-protocols — one protocol name per line
# list-processes — one process name per line
# protocol-line <NAME> — line number where `protocol NAME {` appears
# protocol-block <NAME> — emit the full TCL block for NAME
# protocol-field <NAME> <FIELD> — emit value of top-level field for NAME
# (e.g. PROCESSNAME, OUTBOUNDONLY, OBWORKASIB)
# protocol-nested <NAME> <PATH> — drill into nested block, e.g. "PROTOCOL.PORT"
# protocol-summary [--all|--filter R] — TSV summary of all protocols with key fields
# destinations <NAME> — list DEST values from DATAXLATE routing block
# xlate-refs [<NAME>] — list xlate .xlt files referenced
# route-block <NAME> — emit the DATAXLATE block (the routing config)
# help — this help
#
# Exit codes: 0 OK, 1 usage error, 2 not found, 3 parse error.
set -u
set -o pipefail
NC_SELF="$0"
die() { printf 'nc-parse: %s\n' "$*" >&2; exit 1; }
require_file() {
[ -f "$1" ] || { printf 'nc-parse: not a file: %s\n' "$1" >&2; exit 2; }
}
# ─────────────────────────────────────────────────────────────────────────────
# Core: emit each top-level block as record `TYPE\tNAME\tSTART_LINE\tEND_LINE`
# Robust to braces nested arbitrarily.
# ─────────────────────────────────────────────────────────────────────────────
_blocks() {
local nc="$1"
awk '
BEGIN { depth=0; in_block=0; type=""; name=""; start=0 }
{
line = $0
if (!in_block && line ~ /^(process|protocol) [A-Za-z0-9_]+ \{$/) {
split(line, a, " ")
type = a[1]
name = a[2]
start = NR
depth = 1
in_block = 1
next
}
if (in_block) {
# count unescaped { and } on this line
n_open = gsub(/\{/, "{", line)
n_close = gsub(/\}/, "}", line)
depth += n_open - n_close
if (depth == 0) {
printf "%s\t%s\t%d\t%d\n", type, name, start, NR
in_block = 0; type=""; name=""; start=0
}
}
}
' "$nc"
}
cmd_list_protocols() {
local nc="$1"
require_file "$nc"
_blocks "$nc" | awk -F'\t' '$1=="protocol"{print $2}'
}
cmd_list_processes() {
local nc="$1"
require_file "$nc"
_blocks "$nc" | awk -F'\t' '$1=="process"{print $2}'
}
cmd_protocol_line() {
local nc="$1" name="$2"
require_file "$nc"
_blocks "$nc" | awk -F'\t' -v n="$name" '$1=="protocol" && $2==n {print $3}'
}
cmd_protocol_block() {
local nc="$1" name="$2"
require_file "$nc"
local range; range=$(_blocks "$nc" | awk -F'\t' -v n="$name" '$1=="protocol" && $2==n {print $3","$4}')
[ -z "$range" ] && { printf 'nc-parse: no such protocol: %s\n' "$name" >&2; exit 2; }
awk -v range="$range" 'BEGIN{split(range,r,",")} NR>=r[1] && NR<=r[2]' "$nc"
}
# Top-level fields are lines like: ` { FIELD value }` at depth 1 inside the protocol block.
# Strip surrounding `{ ... }` and emit value(s).
cmd_protocol_field() {
local nc="$1" name="$2" field="$3"
require_file "$nc"
cmd_protocol_block "$nc" "$name" \
| awk -v F="$field" '
BEGIN { depth = 0 }
{
line = $0
n_open = gsub(/\{/, "{", line)
n_close = gsub(/\}/, "}", line)
# before applying deltas, the previous depth is what we use to test
# for "this line is a depth-1 field-statement"
prev = depth
depth += n_open - n_close
# A top-level field is at indent depth==1 BEFORE this line opens any
# further blocks. It looks like: { FIELD value }
# (entire content on one line). We match exactly that.
if (prev == 1 && line ~ "^[[:space:]]+\\{ " F " ") {
# strip leading " { F " and trailing " }"
sub("^[[:space:]]+\\{ " F " ", "", line)
sub(" \\}$", "", line)
print line
}
}
'
}
# Drill into nested blocks. e.g. protocol-nested NAME PROTOCOL.PORT
# Walks the nested { KEY { ... } } structure.
cmd_protocol_nested() {
local nc="$1" name="$2" path="$3"
require_file "$nc"
IFS='.' read -ra parts <<< "$path"
local block; block=$(cmd_protocol_block "$nc" "$name") || return 1
local current="$block"
local i
for ((i=0; i<${#parts[@]}; i++)); do
local key="${parts[$i]}"
if [ $((i+1)) -eq ${#parts[@]} ]; then
# Last part: extract scalar value.
# Baseline depth depends on whether we drilled (body has no wrapper, prev=0)
# or we're at the protocol-block level (has `protocol NAME {` wrapper, prev=1).
local baseline=1
[ $i -gt 0 ] && baseline=0
printf '%s\n' "$current" | awk -v K="$key" -v BASE="$baseline" '
BEGIN { depth = 0 }
{
line = $0
n_open = gsub(/\{/, "{", line)
n_close = gsub(/\}/, "}", line)
prev = depth
depth += n_open - n_close
if (prev == BASE && line ~ "^[[:space:]]+\\{ " K " ") {
sub("^[[:space:]]+\\{ " K " ", "", line)
sub(" \\}$", "", line)
print line
}
}
'
else
# Drill: find `{ KEY {` opening, capture body until matching `} }`
current=$(printf '%s\n' "$current" | awk -v K="$key" '
BEGIN { depth=0; capturing=0; cap_depth=0 }
{
line = $0
n_open = gsub(/\{/, "{", line)
n_close = gsub(/\}/, "}", line)
if (!capturing && line ~ "^[[:space:]]+\\{ " K " \\{$") {
capturing = 1
cap_depth = depth + 1 # the new opening { just hit
depth += n_open - n_close
next
}
if (capturing) {
depth += n_open - n_close
if (depth < cap_depth) {
capturing = 0
exit
}
print
} else {
depth += n_open - n_close
}
}
')
[ -z "$current" ] && { printf 'nc-parse: no nested key %s under %s\n' "$key" "$name" >&2; exit 2; }
fi
done
}
# Compact one-line summary per protocol. TSV.
cmd_protocol_summary() {
local nc="$1"; shift
local filter=""
while [ $# -gt 0 ]; do
case "$1" in
--all) filter="" ;;
--filter) shift; filter="$1" ;;
*) die "unknown summary flag: $1" ;;
esac
shift
done
require_file "$nc"
# Print TSV header
printf "name\tprocess\tdirection\tport\thost\ttype\tisserver\toutonly\tobworkasib\ticlserverport\n"
local names
names=$(cmd_list_protocols "$nc")
local n
for n in $names; do
if [ -n "$filter" ] && ! printf '%s' "$n" | grep -Eq -- "$filter"; then
continue
fi
local pname obib outonly iclserv ptype phost pport isserver direction
pname=$(cmd_protocol_field "$nc" "$n" PROCESSNAME | head -1)
obib=$(cmd_protocol_field "$nc" "$n" OBWORKASIB | head -1)
outonly=$(cmd_protocol_field "$nc" "$n" OUTBOUNDONLY | head -1)
iclserv=$(cmd_protocol_field "$nc" "$n" ICLSERVERPORT | head -1)
ptype=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.TYPE 2>/dev/null | head -1)
phost=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.HOST 2>/dev/null | head -1)
pport=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.PORT 2>/dev/null | head -1)
isserver=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.ISSERVER 2>/dev/null | head -1)
# Direction inference
if [ "$isserver" = "1" ]; then
direction="inbound-tcp-listen"
elif [ "$obib" = "1" ]; then
direction="inbound-icl-or-file"
elif [ "$outonly" = "1" ]; then
direction="outbound"
else
direction="unknown"
fi
# Clean braces from values
phost=$(printf '%s' "$phost" | sed 's/^{}$//; s/^{//; s/}$//')
pport=$(printf '%s' "$pport" | sed 's/^{}$//; s/^{//; s/}$//')
printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
"$n" "${pname:-}" "$direction" "${pport:-}" "${phost:-}" \
"${ptype:-}" "${isserver:-}" "${outonly:-}" "${obib:-}" "${iclserv:-}"
done
}
# Destinations: walk DATAXLATE > ROUTE_DETAILS > { DEST <name> }
cmd_destinations() {
local nc="$1" name="$2"
require_file "$nc"
cmd_protocol_block "$nc" "$name" \
| awk '
/\{ DEST [A-Za-z0-9_]+ \}/ {
sub(/.*\{ DEST /, "")
sub(/ \}.*$/, "")
print
}
' | sort -u
}
# Xlate refs: every X.xlt name appearing in the protocol's block (or all if no name)
cmd_xlate_refs() {
local nc="$1" name="${2:-}"
require_file "$nc"
if [ -n "$name" ]; then
cmd_protocol_block "$nc" "$name" | grep -oE '[A-Za-z0-9_]+\.xlt' | sort -u
else
grep -oE '[A-Za-z0-9_]+\.xlt' "$nc" | sort -u
fi
}
# Sources: every protocol that has `{ DEST <target> }` in its body.
# Slower than _blocks because it scans each protocol's body, but for a 48-thread
# site it's still sub-second.
cmd_sources() {
local nc="$1" target="$2"
require_file "$nc"
local names; names=$(cmd_list_protocols "$nc")
local n
for n in $names; do
[ "$n" = "$target" ] && continue
if cmd_protocol_block "$nc" "$n" 2>/dev/null | grep -qE "\\{ DEST $target \\}"; then
printf '%s\n' "$n"
fi
done
}
# Tclproc references — extract every TCL proc name referenced in this protocol's
# block (DATAFORMAT.PROC singletons + PROCS clauses with one or more names).
# Excludes empty {} and the bare keyword PROCSCONTROL.
cmd_tclproc_refs() {
local nc="$1" name="${2:-}"
require_file "$nc"
local body
if [ -n "$name" ]; then
body=$(cmd_protocol_block "$nc" "$name" 2>/dev/null)
else
body=$(cat "$nc")
fi
printf '%s\n' "$body" | awk '
{
line = $0
# PROC <name> (singleton, e.g. DATAFORMAT.PROC)
if (match(line, /\{ PROC [A-Za-z_][A-Za-z0-9_]*/)) {
v = substr(line, RSTART + 7, RLENGTH - 7)
print v
}
# PROCS <name> (singleton)
if (match(line, /\{ PROCS [A-Za-z_][A-Za-z0-9_]*/)) {
v = substr(line, RSTART + 8, RLENGTH - 8)
print v
}
# PROCS { name1 name2 ... } (list — rare but possible)
if (match(line, /\{ PROCS \{ [^}]+\}/)) {
v = substr(line, RSTART + 9, RLENGTH - 9)
sub(/ *\}$/, "", v)
n = split(v, arr, /[ \t]+/)
for (i=1; i<=n; i++) if (arr[i] != "") print arr[i]
}
}
' | sort -u | grep -v '^$'
}
# Walk the full thread chain starting from a thread name. BFS over sources
# and/or destinations to a configurable depth (default unlimited).
# Output: TSV with columns "depth direction thread"
# depth 0 = the start thread
# direction = self|up|down
cmd_chain() {
local nc="$1" start="$2"; shift 2
local max_depth=99 dir="both"
while [ $# -gt 0 ]; do
case "$1" in
--depth) shift; max_depth="$1" ;;
--direction) shift; dir="$1" ;;
*) die "unknown flag for chain: $1" ;;
esac
shift
done
require_file "$nc"
# BFS using two associative arrays in awk-style via files
# We'll just use plain arrays in bash.
local tmp_visited; tmp_visited=$(mktemp)
local tmp_frontier; tmp_frontier=$(mktemp)
local tmp_next; tmp_next=$(mktemp)
printf '%s\n' "$start" > "$tmp_visited"
printf '0\t%s\tself\n' "$start"
printf '%s\n' "$start" > "$tmp_frontier"
local d
for ((d=1; d<=max_depth; d++)); do
: > "$tmp_next"
while IFS= read -r t; do
[ -z "$t" ] && continue
if [ "$dir" = "both" ] || [ "$dir" = "up" ]; then
while IFS= read -r s; do
[ -z "$s" ] && continue
if ! grep -qxF "$s" "$tmp_visited"; then
printf '%s\n' "$s" >> "$tmp_visited"
printf '%s\n' "$s" >> "$tmp_next"
printf '%d\t%s\tup\n' "$d" "$s"
fi
done < <(cmd_sources "$nc" "$t" 2>/dev/null)
fi
if [ "$dir" = "both" ] || [ "$dir" = "down" ]; then
while IFS= read -r dd; do
[ -z "$dd" ] && continue
if ! grep -qxF "$dd" "$tmp_visited"; then
printf '%s\n' "$dd" >> "$tmp_visited"
printf '%s\n' "$dd" >> "$tmp_next"
printf '%d\t%s\tdown\n' "$d" "$dd"
fi
done < <(cmd_destinations "$nc" "$t" 2>/dev/null)
fi
done < "$tmp_frontier"
if [ ! -s "$tmp_next" ]; then break; fi
cp "$tmp_next" "$tmp_frontier"
done
rm -f "$tmp_visited" "$tmp_frontier" "$tmp_next"
}
cmd_route_block() {
local nc="$1" name="$2"
require_file "$nc"
cmd_protocol_block "$nc" "$name" \
| awk '
BEGIN { depth=0; capturing=0; cap_depth=0 }
{
line = $0
n_open = gsub(/\{/, "{", line)
n_close = gsub(/\}/, "}", line)
if (!capturing && line ~ /^[[:space:]]+\{ DATAXLATE \{$/) {
capturing = 1
cap_depth = depth + 1
print
depth += n_open - n_close
next
}
if (capturing) {
print
depth += n_open - n_close
if (depth < cap_depth) exit
} else {
depth += n_open - n_close
}
}
'
}
cmd_help() { sed -n '2,30p' "$NC_SELF"; }
# ─────────────────────────────────────────────────────────────────────────────
# Dispatch
# ─────────────────────────────────────────────────────────────────────────────
SUB="${1:-help}"
case "$SUB" in
list-protocols) [ $# -ge 2 ] || die "usage: $0 list-protocols <netconfig>"; cmd_list_protocols "$2" ;;
list-processes) [ $# -ge 2 ] || die "usage: $0 list-processes <netconfig>"; cmd_list_processes "$2" ;;
protocol-line) [ $# -ge 3 ] || die "usage: $0 protocol-line <netconfig> <name>"; cmd_protocol_line "$2" "$3" ;;
protocol-block) [ $# -ge 3 ] || die "usage: $0 protocol-block <netconfig> <name>"; cmd_protocol_block "$2" "$3" ;;
protocol-field) [ $# -ge 4 ] || die "usage: $0 protocol-field <netconfig> <name> <field>"; cmd_protocol_field "$2" "$3" "$4" ;;
protocol-nested) [ $# -ge 4 ] || die "usage: $0 protocol-nested <netconfig> <name> <dotted.path>"; cmd_protocol_nested "$2" "$3" "$4" ;;
protocol-summary) [ $# -ge 2 ] || die "usage: $0 protocol-summary <netconfig> [--filter REGEX]"; cmd_protocol_summary "$2" "${@:3}" ;;
destinations) [ $# -ge 3 ] || die "usage: $0 destinations <netconfig> <name>"; cmd_destinations "$2" "$3" ;;
sources) [ $# -ge 3 ] || die "usage: $0 sources <netconfig> <name>"; cmd_sources "$2" "$3" ;;
chain) [ $# -ge 3 ] || die "usage: $0 chain <netconfig> <name> [--depth N] [--direction both|up|down]"; cmd_chain "$2" "$3" "${@:4}" ;;
xlate-refs) [ $# -ge 2 ] || die "usage: $0 xlate-refs <netconfig> [name]"; cmd_xlate_refs "$2" "${3:-}" ;;
tclproc-refs) [ $# -ge 2 ] || die "usage: $0 tclproc-refs <netconfig> [name]"; cmd_tclproc_refs "$2" "${3:-}" ;;
route-block) [ $# -ge 3 ] || die "usage: $0 route-block <netconfig> <name>"; cmd_route_block "$2" "$3" ;;
help|-h|--help) cmd_help ;;
*) die "unknown subcommand: $SUB (try '$0 help')" ;;
esac