390 lines
15 KiB
Bash
Executable File
390 lines
15 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# nc-parse.sh — first-class native Cloverleaf NetConfig parser for Larry-Anywhere v3.
|
|
# Pure bash + awk. No external tools. No v1/v2 dependencies.
|
|
#
|
|
# The NetConfig is a TCL-style nested-block file with two top-level declarations:
|
|
# - process <name> { ... } — process containers
|
|
# - protocol <name> { ... } — threads (the operational unit)
|
|
#
|
|
# This parser exposes structured access to those blocks.
|
|
#
|
|
# Usage:
|
|
# nc-parse.sh <subcommand> <netconfig_path> [args...]
|
|
#
|
|
# Subcommands:
|
|
# list-protocols — one protocol name per line
|
|
# list-processes — one process name per line
|
|
# protocol-line <NAME> — line number where `protocol NAME {` appears
|
|
# protocol-block <NAME> — emit the full TCL block for NAME
|
|
# protocol-field <NAME> <FIELD> — emit value of top-level field for NAME
|
|
# (e.g. PROCESSNAME, OUTBOUNDONLY, OBWORKASIB)
|
|
# protocol-nested <NAME> <PATH> — drill into nested block, e.g. "PROTOCOL.PORT"
|
|
# protocol-summary [--all|--filter R] — TSV summary of all protocols with key fields
|
|
# destinations <NAME> — list DEST values from DATAXLATE routing block
|
|
# sources <NAME> — inverse: protocols that DEST to NAME
|
|
# xlate-refs [<NAME>] — list xlate .xlt files referenced
|
|
# tclproc-refs [<NAME>] — list TCL proc names referenced
|
|
# route-block <NAME> — emit the DATAXLATE block (the routing config)
|
|
# help — this help
|
|
#
|
|
# Route-chain PATH enumeration (root-to-leaf chains, all-mode, cross-site) lives
|
|
# in lib/nc-paths.sh — it is the single walker backend built on the one-hop
|
|
# destinations/sources primitives here. The old `chain` subcommand was removed.
|
|
#
|
|
# Exit codes: 0 OK, 1 usage error, 2 not found, 3 parse error.
|
|
set -u
|
|
set -o pipefail
|
|
|
|
NC_SELF="$0"
|
|
|
|
die() { printf 'nc-parse: %s\n' "$*" >&2; exit 1; }
|
|
|
|
require_file() {
|
|
[ -f "$1" ] || { printf 'nc-parse: not a file: %s\n' "$1" >&2; exit 2; }
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Core: emit each top-level block as record `TYPE\tNAME\tSTART_LINE\tEND_LINE`
|
|
# Robust to braces nested arbitrarily.
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
_blocks() {
|
|
local nc="$1"
|
|
awk '
|
|
BEGIN { depth=0; in_block=0; type=""; name=""; start=0 }
|
|
{
|
|
line = $0
|
|
if (!in_block && line ~ /^(process|protocol) [A-Za-z0-9_]+ \{$/) {
|
|
split(line, a, " ")
|
|
type = a[1]
|
|
name = a[2]
|
|
start = NR
|
|
depth = 1
|
|
in_block = 1
|
|
next
|
|
}
|
|
if (in_block) {
|
|
# count unescaped { and } on this line
|
|
n_open = gsub(/\{/, "{", line)
|
|
n_close = gsub(/\}/, "}", line)
|
|
depth += n_open - n_close
|
|
if (depth == 0) {
|
|
printf "%s\t%s\t%d\t%d\n", type, name, start, NR
|
|
in_block = 0; type=""; name=""; start=0
|
|
}
|
|
}
|
|
}
|
|
' "$nc"
|
|
}
|
|
|
|
cmd_list_protocols() {
|
|
local nc="$1"
|
|
require_file "$nc"
|
|
_blocks "$nc" | awk -F'\t' '$1=="protocol"{print $2}'
|
|
}
|
|
|
|
cmd_list_processes() {
|
|
local nc="$1"
|
|
require_file "$nc"
|
|
_blocks "$nc" | awk -F'\t' '$1=="process"{print $2}'
|
|
}
|
|
|
|
cmd_protocol_line() {
|
|
local nc="$1" name="$2"
|
|
require_file "$nc"
|
|
_blocks "$nc" | awk -F'\t' -v n="$name" '$1=="protocol" && $2==n {print $3}'
|
|
}
|
|
|
|
cmd_protocol_block() {
|
|
local nc="$1" name="$2"
|
|
require_file "$nc"
|
|
local range; range=$(_blocks "$nc" | awk -F'\t' -v n="$name" '$1=="protocol" && $2==n {print $3","$4}')
|
|
[ -z "$range" ] && { printf 'nc-parse: no such protocol: %s\n' "$name" >&2; exit 2; }
|
|
awk -v range="$range" 'BEGIN{split(range,r,",")} NR>=r[1] && NR<=r[2]' "$nc"
|
|
}
|
|
|
|
# Top-level fields are lines like: ` { FIELD value }` at depth 1 inside the protocol block.
|
|
# Strip surrounding `{ ... }` and emit value(s).
|
|
cmd_protocol_field() {
|
|
local nc="$1" name="$2" field="$3"
|
|
require_file "$nc"
|
|
cmd_protocol_block "$nc" "$name" \
|
|
| awk -v F="$field" '
|
|
BEGIN { depth = 0 }
|
|
{
|
|
line = $0
|
|
n_open = gsub(/\{/, "{", line)
|
|
n_close = gsub(/\}/, "}", line)
|
|
# before applying deltas, the previous depth is what we use to test
|
|
# for "this line is a depth-1 field-statement"
|
|
prev = depth
|
|
depth += n_open - n_close
|
|
|
|
# A top-level field is at indent depth==1 BEFORE this line opens any
|
|
# further blocks. It looks like: { FIELD value }
|
|
# (entire content on one line). We match exactly that.
|
|
if (prev == 1 && line ~ "^[[:space:]]+\\{ " F " ") {
|
|
# strip leading " { F " and trailing " }"
|
|
sub("^[[:space:]]+\\{ " F " ", "", line)
|
|
sub(" \\}$", "", line)
|
|
print line
|
|
}
|
|
}
|
|
'
|
|
}
|
|
|
|
# Drill into nested blocks. e.g. protocol-nested NAME PROTOCOL.PORT
|
|
# Walks the nested { KEY { ... } } structure.
|
|
cmd_protocol_nested() {
|
|
local nc="$1" name="$2" path="$3"
|
|
require_file "$nc"
|
|
IFS='.' read -ra parts <<< "$path"
|
|
local block; block=$(cmd_protocol_block "$nc" "$name") || return 1
|
|
local current="$block"
|
|
local i
|
|
for ((i=0; i<${#parts[@]}; i++)); do
|
|
local key="${parts[$i]}"
|
|
if [ $((i+1)) -eq ${#parts[@]} ]; then
|
|
# Last part: extract scalar value.
|
|
# Baseline depth depends on whether we drilled (body has no wrapper, prev=0)
|
|
# or we're at the protocol-block level (has `protocol NAME {` wrapper, prev=1).
|
|
local baseline=1
|
|
[ $i -gt 0 ] && baseline=0
|
|
printf '%s\n' "$current" | awk -v K="$key" -v BASE="$baseline" '
|
|
BEGIN { depth = 0 }
|
|
{
|
|
line = $0
|
|
n_open = gsub(/\{/, "{", line)
|
|
n_close = gsub(/\}/, "}", line)
|
|
prev = depth
|
|
depth += n_open - n_close
|
|
if (prev == BASE && line ~ "^[[:space:]]+\\{ " K " ") {
|
|
sub("^[[:space:]]+\\{ " K " ", "", line)
|
|
sub(" \\}$", "", line)
|
|
print line
|
|
}
|
|
}
|
|
'
|
|
else
|
|
# Drill: find `{ KEY {` opening, capture body until matching `} }`
|
|
current=$(printf '%s\n' "$current" | awk -v K="$key" '
|
|
BEGIN { depth=0; capturing=0; cap_depth=0 }
|
|
{
|
|
line = $0
|
|
n_open = gsub(/\{/, "{", line)
|
|
n_close = gsub(/\}/, "}", line)
|
|
if (!capturing && line ~ "^[[:space:]]+\\{ " K " \\{$") {
|
|
capturing = 1
|
|
cap_depth = depth + 1 # the new opening { just hit
|
|
depth += n_open - n_close
|
|
next
|
|
}
|
|
if (capturing) {
|
|
depth += n_open - n_close
|
|
if (depth < cap_depth) {
|
|
capturing = 0
|
|
exit
|
|
}
|
|
print
|
|
} else {
|
|
depth += n_open - n_close
|
|
}
|
|
}
|
|
')
|
|
[ -z "$current" ] && { printf 'nc-parse: no nested key %s under %s\n' "$key" "$name" >&2; exit 2; }
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Compact one-line summary per protocol. TSV.
|
|
cmd_protocol_summary() {
|
|
local nc="$1"; shift
|
|
local filter=""
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--all) filter="" ;;
|
|
--filter) shift; filter="$1" ;;
|
|
*) die "unknown summary flag: $1" ;;
|
|
esac
|
|
shift
|
|
done
|
|
require_file "$nc"
|
|
|
|
# Print TSV header
|
|
printf "name\tprocess\tdirection\tport\thost\ttype\tisserver\toutonly\tobworkasib\ticlserverport\n"
|
|
|
|
local names
|
|
names=$(cmd_list_protocols "$nc")
|
|
local n
|
|
for n in $names; do
|
|
if [ -n "$filter" ] && ! printf '%s' "$n" | grep -Eq -- "$filter"; then
|
|
continue
|
|
fi
|
|
local pname obib outonly iclserv ptype phost pport isserver direction
|
|
pname=$(cmd_protocol_field "$nc" "$n" PROCESSNAME | head -1)
|
|
obib=$(cmd_protocol_field "$nc" "$n" OBWORKASIB | head -1)
|
|
outonly=$(cmd_protocol_field "$nc" "$n" OUTBOUNDONLY | head -1)
|
|
iclserv=$(cmd_protocol_field "$nc" "$n" ICLSERVERPORT | head -1)
|
|
ptype=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.TYPE 2>/dev/null | head -1)
|
|
phost=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.HOST 2>/dev/null | head -1)
|
|
pport=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.PORT 2>/dev/null | head -1)
|
|
isserver=$(cmd_protocol_nested "$nc" "$n" PROTOCOL.ISSERVER 2>/dev/null | head -1)
|
|
|
|
# Direction inference
|
|
if [ "$isserver" = "1" ]; then
|
|
direction="inbound-tcp-listen"
|
|
elif [ "$obib" = "1" ]; then
|
|
direction="inbound-icl-or-file"
|
|
elif [ "$outonly" = "1" ]; then
|
|
direction="outbound"
|
|
else
|
|
direction="unknown"
|
|
fi
|
|
|
|
# Clean braces from values
|
|
phost=$(printf '%s' "$phost" | sed 's/^{}$//; s/^{//; s/}$//')
|
|
pport=$(printf '%s' "$pport" | sed 's/^{}$//; s/^{//; s/}$//')
|
|
|
|
printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
|
|
"$n" "${pname:-}" "$direction" "${pport:-}" "${phost:-}" \
|
|
"${ptype:-}" "${isserver:-}" "${outonly:-}" "${obib:-}" "${iclserv:-}"
|
|
done
|
|
}
|
|
|
|
# Destinations: walk DATAXLATE > ROUTE_DETAILS > { DEST <name> }
|
|
cmd_destinations() {
|
|
local nc="$1" name="$2"
|
|
require_file "$nc"
|
|
cmd_protocol_block "$nc" "$name" \
|
|
| awk '
|
|
/\{ DEST [A-Za-z0-9_]+ \}/ {
|
|
sub(/.*\{ DEST /, "")
|
|
sub(/ \}.*$/, "")
|
|
print
|
|
}
|
|
' | sort -u
|
|
}
|
|
|
|
# Xlate refs: every X.xlt name appearing in the protocol's block (or all if no name)
|
|
cmd_xlate_refs() {
|
|
local nc="$1" name="${2:-}"
|
|
require_file "$nc"
|
|
if [ -n "$name" ]; then
|
|
cmd_protocol_block "$nc" "$name" | grep -oE '[A-Za-z0-9_]+\.xlt' | sort -u
|
|
else
|
|
grep -oE '[A-Za-z0-9_]+\.xlt' "$nc" | sort -u
|
|
fi
|
|
}
|
|
|
|
# Sources: every protocol that has `{ DEST <target> }` in its body.
|
|
# Slower than _blocks because it scans each protocol's body, but for a 48-thread
|
|
# site it's still sub-second.
|
|
cmd_sources() {
|
|
local nc="$1" target="$2"
|
|
require_file "$nc"
|
|
local names; names=$(cmd_list_protocols "$nc")
|
|
local n
|
|
for n in $names; do
|
|
[ "$n" = "$target" ] && continue
|
|
if cmd_protocol_block "$nc" "$n" 2>/dev/null | grep -qE "\\{ DEST $target \\}"; then
|
|
printf '%s\n' "$n"
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Tclproc references — extract every TCL proc name referenced in this protocol's
|
|
# block (DATAFORMAT.PROC singletons + PROCS clauses with one or more names).
|
|
# Excludes empty {} and the bare keyword PROCSCONTROL.
|
|
cmd_tclproc_refs() {
|
|
local nc="$1" name="${2:-}"
|
|
require_file "$nc"
|
|
local body
|
|
if [ -n "$name" ]; then
|
|
body=$(cmd_protocol_block "$nc" "$name" 2>/dev/null)
|
|
else
|
|
body=$(cat "$nc")
|
|
fi
|
|
printf '%s\n' "$body" | awk '
|
|
{
|
|
line = $0
|
|
# PROC <name> (singleton, e.g. DATAFORMAT.PROC)
|
|
if (match(line, /\{ PROC [A-Za-z_][A-Za-z0-9_]*/)) {
|
|
v = substr(line, RSTART + 7, RLENGTH - 7)
|
|
print v
|
|
}
|
|
# PROCS <name> (singleton)
|
|
if (match(line, /\{ PROCS [A-Za-z_][A-Za-z0-9_]*/)) {
|
|
v = substr(line, RSTART + 8, RLENGTH - 8)
|
|
print v
|
|
}
|
|
# PROCS { name1 name2 ... } (list — rare but possible)
|
|
if (match(line, /\{ PROCS \{ [^}]+\}/)) {
|
|
v = substr(line, RSTART + 9, RLENGTH - 9)
|
|
sub(/ *\}$/, "", v)
|
|
n = split(v, arr, /[ \t]+/)
|
|
for (i=1; i<=n; i++) if (arr[i] != "") print arr[i]
|
|
}
|
|
}
|
|
' | sort -u | grep -v '^$'
|
|
}
|
|
|
|
# NOTE (v0.8.19): the old `cmd_chain` BFS-node-set walker was removed and
|
|
# CONSOLIDATED into lib/nc-paths.sh, which is now the SINGLE route-chain backend.
|
|
# cmd_chain only emitted a flat set of reachable nodes (depth/direction/thread),
|
|
# never enumerated root-to-leaf PATHS, was never wired into the LLM, and would
|
|
# have left two competing walkers. nc-paths.sh ports the v2 `paths` DFS
|
|
# enumerator (SITE/THREAD/HOPS/PATH output, all-mode, cross-site joins) and reuses
|
|
# the one-hop DEST primitives (cmd_destinations / cmd_sources) below. Do not
|
|
# reintroduce a second walker here — extend nc-paths.sh.
|
|
|
|
cmd_route_block() {
|
|
local nc="$1" name="$2"
|
|
require_file "$nc"
|
|
cmd_protocol_block "$nc" "$name" \
|
|
| awk '
|
|
BEGIN { depth=0; capturing=0; cap_depth=0 }
|
|
{
|
|
line = $0
|
|
n_open = gsub(/\{/, "{", line)
|
|
n_close = gsub(/\}/, "}", line)
|
|
if (!capturing && line ~ /^[[:space:]]+\{ DATAXLATE \{$/) {
|
|
capturing = 1
|
|
cap_depth = depth + 1
|
|
print
|
|
depth += n_open - n_close
|
|
next
|
|
}
|
|
if (capturing) {
|
|
print
|
|
depth += n_open - n_close
|
|
if (depth < cap_depth) exit
|
|
} else {
|
|
depth += n_open - n_close
|
|
}
|
|
}
|
|
'
|
|
}
|
|
|
|
cmd_help() { sed -n '2,30p' "$NC_SELF"; }
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# Dispatch
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
SUB="${1:-help}"
|
|
case "$SUB" in
|
|
list-protocols) [ $# -ge 2 ] || die "usage: $0 list-protocols <netconfig>"; cmd_list_protocols "$2" ;;
|
|
list-processes) [ $# -ge 2 ] || die "usage: $0 list-processes <netconfig>"; cmd_list_processes "$2" ;;
|
|
protocol-line) [ $# -ge 3 ] || die "usage: $0 protocol-line <netconfig> <name>"; cmd_protocol_line "$2" "$3" ;;
|
|
protocol-block) [ $# -ge 3 ] || die "usage: $0 protocol-block <netconfig> <name>"; cmd_protocol_block "$2" "$3" ;;
|
|
protocol-field) [ $# -ge 4 ] || die "usage: $0 protocol-field <netconfig> <name> <field>"; cmd_protocol_field "$2" "$3" "$4" ;;
|
|
protocol-nested) [ $# -ge 4 ] || die "usage: $0 protocol-nested <netconfig> <name> <dotted.path>"; cmd_protocol_nested "$2" "$3" "$4" ;;
|
|
protocol-summary) [ $# -ge 2 ] || die "usage: $0 protocol-summary <netconfig> [--filter REGEX]"; cmd_protocol_summary "$2" "${@:3}" ;;
|
|
destinations) [ $# -ge 3 ] || die "usage: $0 destinations <netconfig> <name>"; cmd_destinations "$2" "$3" ;;
|
|
sources) [ $# -ge 3 ] || die "usage: $0 sources <netconfig> <name>"; cmd_sources "$2" "$3" ;;
|
|
chain) die "the 'chain' subcommand was removed in v0.8.19 — use nc-paths.sh (route-chain path enumerator) instead" ;;
|
|
xlate-refs) [ $# -ge 2 ] || die "usage: $0 xlate-refs <netconfig> [name]"; cmd_xlate_refs "$2" "${3:-}" ;;
|
|
tclproc-refs) [ $# -ge 2 ] || die "usage: $0 tclproc-refs <netconfig> [name]"; cmd_tclproc_refs "$2" "${3:-}" ;;
|
|
route-block) [ $# -ge 3 ] || die "usage: $0 route-block <netconfig> <name>"; cmd_route_block "$2" "$3" ;;
|
|
help|-h|--help) cmd_help ;;
|
|
*) die "unknown subcommand: $SUB (try '$0 help')" ;;
|
|
esac
|