#!/usr/bin/env bash # nc-parse.sh — first-class native Cloverleaf NetConfig parser for Larry-Anywhere v3. # Pure bash + awk. No external tools. No v1/v2 dependencies. # # The NetConfig is a TCL-style nested-block file with two top-level declarations: # - process { ... } — process containers # - protocol { ... } — threads (the operational unit) # # This parser exposes structured access to those blocks. # # Usage: # nc-parse.sh [args...] # # Subcommands: # list-protocols — one protocol name per line # list-processes — one process name per line # protocol-line — line number where `protocol NAME {` appears # protocol-block — emit the full TCL block for NAME # protocol-field — emit value of top-level field for NAME # (e.g. PROCESSNAME, OUTBOUNDONLY, OBWORKASIB) # protocol-nested — drill into nested block, e.g. "PROTOCOL.PORT" # protocol-summary [--all|--filter R] — TSV summary of all protocols with key fields # destinations — list DEST values from DATAXLATE routing block # sources — inverse: protocols that DEST to NAME # xlate-refs [] — list xlate .xlt files referenced # tclproc-refs [] — list TCL proc names referenced # route-block — emit the DATAXLATE block (the routing config) # index — single-pass route INDEX (P/D/L/O/X # records) for the in-memory path walker; # see cmd_index. Parses the file ONCE. # help — this help # # Route-chain PATH enumeration (root-to-leaf chains, all-mode, cross-site) lives # in lib/nc-paths.sh — it is the single walker backend built on the one-hop # destinations/sources primitives here. The old `chain` subcommand was removed. # # Exit codes: 0 OK, 1 usage error, 2 not found, 3 parse error. set -u set -o pipefail NC_SELF="$0" NC_LIB_DIR="$(cd "$(dirname "$NC_SELF")" && pwd)" # v0.8.26: shared control-byte sanitizer. This tool dumps NetConfig and raw .tcl # proc bodies to stdout; that content can carry C0 control bytes (ESC etc.) that # corrupt a terminal when viewed un-redirected. _sanitize_ctl_tty strips them # ONLY when stdout is a tty — on a pipe/redirect the bytes pass through raw so # downstream tooling sees byte-identical content. See lib/cygwin-safe.sh. if [ -r "$NC_LIB_DIR/cygwin-safe.sh" ]; then # shellcheck disable=SC1090,SC1091 . "$NC_LIB_DIR/cygwin-safe.sh" else _sanitize_ctl_tty() { cat; } # degrade safe: raw passthrough if lib missing fi die() { printf 'nc-parse: %s\n' "$*" >&2; exit 1; } require_file() { [ -f "$1" ] || { printf 'nc-parse: not a file: %s\n' "$1" >&2; exit 2; } } # ───────────────────────────────────────────────────────────────────────────── # Core: emit each top-level block as record `TYPE\tNAME\tSTART_LINE\tEND_LINE` # Robust to braces nested arbitrarily. # ───────────────────────────────────────────────────────────────────────────── _blocks() { local nc="$1" awk ' BEGIN { depth=0; in_block=0; type=""; name=""; start=0 } { line = $0 if (!in_block && line ~ /^(process|protocol) [A-Za-z0-9_]+ \{$/) { split(line, a, " ") type = a[1] name = a[2] start = NR depth = 1 in_block = 1 next } if (in_block) { # count unescaped { and } on this line n_open = gsub(/\{/, "{", line) n_close = gsub(/\}/, "}", line) depth += n_open - n_close if (depth == 0) { printf "%s\t%s\t%d\t%d\n", type, name, start, NR in_block = 0; type=""; name=""; start=0 } } } ' "$nc" } # ───────────────────────────────────────────────────────────────────────────── # SINGLE-PASS INDEX (v0.8.20 perf rearchitecture). # # Emits, in ONE awk pass over the NetConfig, every fact the path WALKER needs so # it never has to re-invoke a subprocess or re-parse the file per hop. The old # walker called nc-parse.sh (destinations/sources/protocol-nested/...) once PER # HOP PER CANDIDATE, and each of those re-ran _blocks + cmd_protocol_block (two # full awk passes over a 16K-line file). On the real 24-site integrator that was # O(threads x parse-cost) = minutes. This subcommand replaces all of it: parse # ONCE, walk in memory. # # Output is a flat TAB-separated record stream (one record per line). The leading # single-char tag identifies the record kind: # P protocol declared in this NetConfig # D a DATAXLATE DEST edge thread->dest # (handles BOTH `{ DEST name }` and the # list form `{ DEST {a b c} }`) # L a LISTEN port for : # server PROTOCOL.PORT (ISSERVER=1) and/or # the guarded top-level ICLSERVERPORT. # O an OUTBOUND/tcpip-client dest port for # (PROTOCOL.PORT with ISSERVER!=1). # X a top-level `destination` block: the # AUTHORITATIVE cross-site link. A DEST that # names hops to in # (PORT is the connecting port). This is how # Cloverleaf actually links sites (ICL) — by # named destination, resolved to SITE+THREAD, # NOT by blindly matching ports. # # Robust to arbitrary brace nesting (same depth bookkeeping as _blocks). DEST, # ISSERVER, PORT and ICLSERVERPORT are recognised by their canonical one-line # `{ KEY value }` rendering; absent/`{}` values are simply never emitted (the # guard that the old paths.tcl lacked). # ───────────────────────────────────────────────────────────────────────────── cmd_index() { local nc="$1" require_file "$nc" awk ' BEGIN { depth=0; in_block=0; btype=""; bname="" in_proto=0; proto_depth=0; pport=""; isserver=""; iclport="" dsite=""; dthread=""; dport="" } # ---- enter a top-level block ------------------------------------------- !in_block && $0 ~ /^(process|protocol|destination) [A-Za-z0-9_]+ \{$/ { split($0, a, " ") btype = a[1]; bname = a[2] depth = 1; in_block = 1 in_proto = 0; proto_depth = 0; pport=""; isserver=""; iclport="" dsite=""; dthread=""; dport="" if (btype == "protocol") print "P\t" bname next } in_block { line = $0 # --- field extraction BEFORE we mutate depth (fields are depth-1 inside # their parent block; the value is the whole `{ KEY v }` on one line) --- if (btype == "protocol") { # DEST single: { DEST name } if (match(line, /\{ DEST [A-Za-z0-9_]+ \}/)) { v = substr(line, RSTART+7, RLENGTH-9) # strip "{ DEST " .. " }" print "D\t" bname "\t" v } # DEST list: { DEST {a b c} } else if (match(line, /\{ DEST \{[^}]*\}/)) { v = substr(line, RSTART+8, RLENGTH-9) # strip "{ DEST {" .. "}" m = split(v, dd, /[ \t]+/) for (i=1; i<=m; i++) if (dd[i] != "") print "D\t" bname "\t" dd[i] } # top-level ICLSERVERPORT (a listen port, guarded numeric) if (match(line, /^[[:space:]]+\{ ICLSERVERPORT [0-9]+ \}[[:space:]]*$/)) { v = line; sub(/^[[:space:]]+\{ ICLSERVERPORT /, "", v); sub(/ \}[[:space:]]*$/, "", v) iclport = v } # enter the nested { PROTOCOL { ... } } sub-block if (!in_proto && line ~ /^[[:space:]]+\{ PROTOCOL \{$/) { in_proto = 1; proto_depth = depth + 1 } else if (in_proto) { if (match(line, /^[[:space:]]+\{ PORT [0-9]+ \}[[:space:]]*$/)) { v = line; sub(/^[[:space:]]+\{ PORT /, "", v); sub(/ \}[[:space:]]*$/, "", v) pport = v } if (match(line, /^[[:space:]]+\{ ISSERVER [0-9]+ \}[[:space:]]*$/)) { v = line; sub(/^[[:space:]]+\{ ISSERVER /, "", v); sub(/ \}[[:space:]]*$/, "", v) isserver = v } } } else if (btype == "destination") { if (match(line, /^[[:space:]]+\{ SITE [A-Za-z0-9_]+ \}[[:space:]]*$/)) { v = line; sub(/^[[:space:]]+\{ SITE /, "", v); sub(/ \}[[:space:]]*$/, "", v); dsite = v } if (match(line, /^[[:space:]]+\{ THREAD [A-Za-z0-9_]+ \}[[:space:]]*$/)) { v = line; sub(/^[[:space:]]+\{ THREAD /, "", v); sub(/ \}[[:space:]]*$/, "", v); dthread = v } if (match(line, /^[[:space:]]+\{ PORT [0-9]+ \}[[:space:]]*$/)) { v = line; sub(/^[[:space:]]+\{ PORT /, "", v); sub(/ \}[[:space:]]*$/, "", v); dport = v } } # --- depth bookkeeping --- n_open = gsub(/\{/, "{", line) n_close = gsub(/\}/, "}", line) depth += n_open - n_close if (in_proto && depth < proto_depth) in_proto = 0 # --- close the top-level block: emit its aggregate records --- if (depth == 0) { if (btype == "protocol") { # listen ports: server PROTOCOL.PORT (ISSERVER=1) and/or ICL port if (isserver == "1" && pport != "") print "L\t" bname "\t" pport if (iclport != "") print "L\t" bname "\t" iclport # outbound/tcpip-client dest port if (isserver != "1" && pport != "") print "O\t" bname "\t" pport } else if (btype == "destination") { if (dsite != "" && dthread != "") print "X\t" bname "\t" dsite "\t" dthread "\t" dport } in_block = 0; btype=""; bname="" } } ' "$nc" } cmd_list_protocols() { local nc="$1" require_file "$nc" _blocks "$nc" | awk -F'\t' '$1=="protocol"{print $2}' } cmd_list_processes() { local nc="$1" require_file "$nc" _blocks "$nc" | awk -F'\t' '$1=="process"{print $2}' } cmd_protocol_line() { local nc="$1" name="$2" require_file "$nc" _blocks "$nc" | awk -F'\t' -v n="$name" '$1=="protocol" && $2==n {print $3}' } cmd_protocol_block() { local nc="$1" name="$2" require_file "$nc" local range; range=$(_blocks "$nc" | awk -F'\t' -v n="$name" '$1=="protocol" && $2==n {print $3","$4}') [ -z "$range" ] && { printf 'nc-parse: no such protocol: %s\n' "$name" >&2; exit 2; } awk -v range="$range" 'BEGIN{split(range,r,",")} NR>=r[1] && NR<=r[2]' "$nc" } # Top-level fields are lines like: ` { FIELD value }` at depth 1 inside the protocol block. # Strip surrounding `{ ... }` and emit value(s). cmd_protocol_field() { local nc="$1" name="$2" field="$3" require_file "$nc" cmd_protocol_block "$nc" "$name" \ | awk -v F="$field" ' BEGIN { depth = 0 } { line = $0 n_open = gsub(/\{/, "{", line) n_close = gsub(/\}/, "}", line) # before applying deltas, the previous depth is what we use to test # for "this line is a depth-1 field-statement" prev = depth depth += n_open - n_close # A top-level field is at indent depth==1 BEFORE this line opens any # further blocks. It looks like: { FIELD value } # (entire content on one line). We match exactly that. if (prev == 1 && line ~ "^[[:space:]]+\\{ " F " ") { # strip leading " { F " and trailing " }" sub("^[[:space:]]+\\{ " F " ", "", line) sub(" \\}$", "", line) print line } } ' } # Drill into nested blocks. e.g. protocol-nested NAME PROTOCOL.PORT # Walks the nested { KEY { ... } } structure. cmd_protocol_nested() { local nc="$1" name="$2" path="$3" require_file "$nc" IFS='.' read -ra parts <<< "$path" local block; block=$(cmd_protocol_block "$nc" "$name") || return 1 local current="$block" local i for ((i=0; i<${#parts[@]}; i++)); do local key="${parts[$i]}" if [ $((i+1)) -eq ${#parts[@]} ]; then # Last part: extract scalar value. # Baseline depth depends on whether we drilled (body has no wrapper, prev=0) # or we're at the protocol-block level (has `protocol NAME {` wrapper, prev=1). local baseline=1 [ $i -gt 0 ] && baseline=0 printf '%s\n' "$current" | awk -v K="$key" -v BASE="$baseline" ' BEGIN { depth = 0 } { line = $0 n_open = gsub(/\{/, "{", line) n_close = gsub(/\}/, "}", line) prev = depth depth += n_open - n_close if (prev == BASE && line ~ "^[[:space:]]+\\{ " K " ") { sub("^[[:space:]]+\\{ " K " ", "", line) sub(" \\}$", "", line) print line } } ' else # Drill: find `{ KEY {` opening, capture body until matching `} }` current=$(printf '%s\n' "$current" | awk -v K="$key" ' BEGIN { depth=0; capturing=0; cap_depth=0 } { line = $0 n_open = gsub(/\{/, "{", line) n_close = gsub(/\}/, "}", line) if (!capturing && line ~ "^[[:space:]]+\\{ " K " \\{$") { capturing = 1 cap_depth = depth + 1 # the new opening { just hit depth += n_open - n_close next } if (capturing) { depth += n_open - n_close if (depth < cap_depth) { capturing = 0 exit } print } else { depth += n_open - n_close } } ') [ -z "$current" ] && { printf 'nc-parse: no nested key %s under %s\n' "$key" "$name" >&2; exit 2; } fi done } # Compact one-line summary per protocol. TSV. cmd_protocol_summary() { local nc="$1"; shift local filter="" while [ $# -gt 0 ]; do case "$1" in --all) filter="" ;; --filter) shift; filter="$1" ;; *) die "unknown summary flag: $1" ;; esac shift done require_file "$nc" # Print TSV header printf "name\tprocess\tdirection\tport\thost\ttype\tisserver\toutonly\tobworkasib\ticlserverport\n" local names names=$(cmd_list_protocols "$nc") local n for n in $names; do if [ -n "$filter" ] && ! printf '%s' "$n" | grep -Eq -- "$filter"; then continue fi local pname obib outonly iclserv ptype phost pport isserver direction pname=$(cmd_protocol_field "$nc" "$n" PROCESSNAME | head -1) obib=$(cmd_protocol_field "$nc" "$n" OBWORKASIB | head -1) outonly=$(cmd_protocol_field "$nc" "$n" OUTBOUNDONLY | head -1) iclserv=$(cmd_protocol_field "$nc" "$n" ICLSERVERPORT | head -1) ptype=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.TYPE 2>/dev/null | head -1) phost=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.HOST 2>/dev/null | head -1) pport=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.PORT 2>/dev/null | head -1) isserver=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.ISSERVER 2>/dev/null | head -1) # Direction inference if [ "$isserver" = "1" ]; then direction="inbound-tcp-listen" elif [ "$obib" = "1" ]; then direction="inbound-icl-or-file" elif [ "$outonly" = "1" ]; then direction="outbound" else direction="unknown" fi # Clean braces from values phost=$(printf '%s' "$phost" | sed 's/^{}$//; s/^{//; s/}$//') pport=$(printf '%s' "$pport" | sed 's/^{}$//; s/^{//; s/}$//') printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \ "$n" "${pname:-}" "$direction" "${pport:-}" "${phost:-}" \ "${ptype:-}" "${isserver:-}" "${outonly:-}" "${obib:-}" "${iclserv:-}" done } # Destinations: walk DATAXLATE > ROUTE_DETAILS > { DEST } cmd_destinations() { local nc="$1" name="$2" require_file "$nc" cmd_protocol_block "$nc" "$name" \ | awk ' /\{ DEST [A-Za-z0-9_]+ \}/ { sub(/.*\{ DEST /, "") sub(/ \}.*$/, "") print } ' | sort -u } # Xlate refs: every X.xlt name appearing in the protocol's block (or all if no name) cmd_xlate_refs() { local nc="$1" name="${2:-}" require_file "$nc" if [ -n "$name" ]; then cmd_protocol_block "$nc" "$name" | grep -oE '[A-Za-z0-9_]+\.xlt' | sort -u else grep -oE '[A-Za-z0-9_]+\.xlt' "$nc" | sort -u fi } # Sources: every protocol that has `{ DEST }` in its body. # Slower than _blocks because it scans each protocol's body, but for a 48-thread # site it's still sub-second. cmd_sources() { local nc="$1" target="$2" require_file "$nc" local names; names=$(cmd_list_protocols "$nc") local n for n in $names; do [ "$n" = "$target" ] && continue if cmd_protocol_block "$nc" "$n" 2>/dev/null | grep -qE "\\{ DEST $target \\}"; then printf '%s\n' "$n" fi done } # Tclproc references — extract every TCL proc name referenced in this protocol's # block (DATAFORMAT.PROC singletons + PROCS clauses with one or more names). # Excludes empty {} and the bare keyword PROCSCONTROL. cmd_tclproc_refs() { local nc="$1" name="${2:-}" require_file "$nc" local body if [ -n "$name" ]; then body=$(cmd_protocol_block "$nc" "$name" 2>/dev/null) else body=$(cat "$nc") fi printf '%s\n' "$body" | awk ' { line = $0 # PROC (singleton, e.g. DATAFORMAT.PROC) # Allow a LEADING DIGIT: Cloverleaf proc names like 3M_check_ack are valid # TCL proc names. The old [A-Za-z_]-first class silently dropped them. if (match(line, /\{ PROC [A-Za-z0-9_]+/)) { v = substr(line, RSTART + 7, RLENGTH - 7) print v } # PROCS (singleton) if (match(line, /\{ PROCS [A-Za-z0-9_]+/)) { v = substr(line, RSTART + 8, RLENGTH - 8) print v } # PROCS { name1 name2 ... } (list — rare but possible) if (match(line, /\{ PROCS \{ [^}]+\}/)) { v = substr(line, RSTART + 9, RLENGTH - 9) sub(/ *\}$/, "", v) n = split(v, arr, /[ \t]+/) for (i=1; i<=n; i++) if (arr[i] != "") print arr[i] } } ' | sort -u | grep -v '^$' } # NOTE (v0.8.19): the old `cmd_chain` BFS-node-set walker was removed and # CONSOLIDATED into lib/nc-paths.sh, which is now the SINGLE route-chain backend. # cmd_chain only emitted a flat set of reachable nodes (depth/direction/thread), # never enumerated root-to-leaf PATHS, was never wired into the LLM, and would # have left two competing walkers. nc-paths.sh ports the v2 `paths` DFS # enumerator (SITE/THREAD/HOPS/PATH output, all-mode, PORT-based cross-site links) # and reuses the one-hop DEST primitives (cmd_destinations / cmd_sources) below # for intra-site routing. Do not reintroduce a second walker here — extend # nc-paths.sh. cmd_route_block() { local nc="$1" name="$2" require_file "$nc" cmd_protocol_block "$nc" "$name" \ | awk ' BEGIN { depth=0; capturing=0; cap_depth=0 } { line = $0 n_open = gsub(/\{/, "{", line) n_close = gsub(/\}/, "}", line) if (!capturing && line ~ /^[[:space:]]+\{ DATAXLATE \{$/) { capturing = 1 cap_depth = depth + 1 print depth += n_open - n_close next } if (capturing) { print depth += n_open - n_close if (depth < cap_depth) exit } else { depth += n_open - n_close } } ' } cmd_help() { sed -n '2,30p' "$NC_SELF"; } # ───────────────────────────────────────────────────────────────────────────── # Dispatch # ───────────────────────────────────────────────────────────────────────────── SUB="${1:-help}" # v0.8.26: route the whole dispatch's stdout through the tty-gated sanitizer in # a brace group, then propagate ${PIPESTATUS[0]} so the subcommand's exit code # (incl. die's exit 1 and not-found 2/3) survives the pipe. On a pipe/redirect # the gate is a no-op (raw passthrough); only an interactive tty gets stripped. { case "$SUB" in list-protocols) [ $# -ge 2 ] || die "usage: $0 list-protocols "; cmd_list_protocols "$2" ;; list-processes) [ $# -ge 2 ] || die "usage: $0 list-processes "; cmd_list_processes "$2" ;; protocol-line) [ $# -ge 3 ] || die "usage: $0 protocol-line "; cmd_protocol_line "$2" "$3" ;; protocol-block) [ $# -ge 3 ] || die "usage: $0 protocol-block "; cmd_protocol_block "$2" "$3" ;; protocol-field) [ $# -ge 4 ] || die "usage: $0 protocol-field "; cmd_protocol_field "$2" "$3" "$4" ;; protocol-nested) [ $# -ge 4 ] || die "usage: $0 protocol-nested "; cmd_protocol_nested "$2" "$3" "$4" ;; protocol-summary) [ $# -ge 2 ] || die "usage: $0 protocol-summary [--filter REGEX]"; cmd_protocol_summary "$2" "${@:3}" ;; destinations) [ $# -ge 3 ] || die "usage: $0 destinations "; cmd_destinations "$2" "$3" ;; sources) [ $# -ge 3 ] || die "usage: $0 sources "; cmd_sources "$2" "$3" ;; chain) die "the 'chain' subcommand was removed in v0.8.19 — use nc-paths.sh (route-chain path enumerator) instead" ;; xlate-refs) [ $# -ge 2 ] || die "usage: $0 xlate-refs [name]"; cmd_xlate_refs "$2" "${3:-}" ;; tclproc-refs) [ $# -ge 2 ] || die "usage: $0 tclproc-refs [name]"; cmd_tclproc_refs "$2" "${3:-}" ;; route-block) [ $# -ge 3 ] || die "usage: $0 route-block "; cmd_route_block "$2" "$3" ;; index) [ $# -ge 2 ] || die "usage: $0 index "; cmd_index "$2" ;; help|-h|--help) cmd_help ;; *) die "unknown subcommand: $SUB (try '$0 help')" ;; esac } | _sanitize_ctl_tty exit "${PIPESTATUS[0]}"