Portable AI agent for Cloverleaf integration work. Pure bash + curl + jq. Zero dependency on v1 wrapper scripts or v2 cloverleaf-tools.pyz. 27 native Anthropic tools: NetConfig parsing (read) nc_list_protocols, nc_list_processes, nc_protocol_block, nc_protocol_field, nc_protocol_nested, nc_protocol_summary, nc_destinations, nc_sources, nc_xlate_refs, nc_tclproc_refs NetConfig modification (journal-backed writes with rollback) nc_insert_protocol, nc_add_route, larry_rollback_list Workflows nc_find_inbound, nc_make_jump (3-thread jump pattern), nc_find (tbn/tbp/tbh/tbpr/where replacements), nc_document, nc_diff_interface, nc_regression Messages hl7_field, nc_msgs (smat is SQLite!), hl7_diff (with --ignore MSH.7) File system read_file, list_dir, grep_files, glob_files, write_file, bash_exec Validated against a 22-site real Cloverleaf test install. Five worked examples end-to-end: jump-thread generation, smat MRN search, system documentation, interface+connected diff, HL7-aware regression diff. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
249 lines
8.2 KiB
Bash
Executable File
249 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# hl7-diff.sh — HL7-aware diff with field-level normalization.
|
|
# Compares two HL7 message files (or multi-message files), segment-by-segment,
|
|
# field-by-field. Lets you ignore fields that always change (default: MSH.7
|
|
# timestamp) without losing meaningful differences.
|
|
#
|
|
# Usage:
|
|
# hl7-diff.sh [--ignore "FIELD,FIELD,..."] [--include-fields "FIELD,..."]
|
|
# [--separator SEP] [--format text|tsv|count]
|
|
# <left_file> <right_file>
|
|
#
|
|
# Multi-message handling:
|
|
# Files may contain multiple HL7 messages separated by either:
|
|
# - the file separator byte 0x1c (the nc_msgs --format raw default), OR
|
|
# - blank lines between MSH-starting blocks (legacy).
|
|
# The tool auto-detects.
|
|
#
|
|
# Defaults:
|
|
# --ignore MSH.7
|
|
#
|
|
# Output (text format):
|
|
# Per-message header, then one line per differing field:
|
|
# SEG.FIELD[.COMP[.SUB]] LEFT_VALUE RIGHT_VALUE
|
|
#
|
|
# Exit codes: 0 identical (post-ignore), 1 differences found, 2 input error
|
|
set -o pipefail
|
|
|
|
NC_SELF="$0"
|
|
LIB_DIR="$(cd "$(dirname "$NC_SELF")" && pwd)"
|
|
|
|
die() { printf 'hl7-diff: %s\n' "$*" >&2; exit 2; }
|
|
|
|
IGNORE="MSH.7"
|
|
INCLUDE=""
|
|
FORMAT="text"
|
|
LEFT=""
|
|
RIGHT=""
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--ignore) shift; IGNORE="$1" ;;
|
|
--include-fields) shift; INCLUDE="$1" ;;
|
|
--format) shift; FORMAT="$1" ;;
|
|
-h|--help) sed -n '2,25p' "$NC_SELF"; exit 0 ;;
|
|
-*) die "unknown flag: $1" ;;
|
|
*) if [ -z "$LEFT" ]; then LEFT="$1"
|
|
elif [ -z "$RIGHT" ]; then RIGHT="$1"
|
|
else die "extra arg: $1"; fi ;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
[ -f "$LEFT" ] || die "no such left file: $LEFT"
|
|
[ -f "$RIGHT" ] || die "no such right file: $RIGHT"
|
|
case "$FORMAT" in text|tsv|count) ;; *) die "bad --format" ;; esac
|
|
|
|
# Split a file into individual messages. Each MSH-block becomes one message.
|
|
# Output: one message per line, with \r preserved as 0x0d between segments,
|
|
# messages separated by \x1e (record sep). Returns a path.
|
|
split_messages() {
|
|
local infile="$1" outfile="$2"
|
|
# Try splitting by 0x1c first (raw nc_msgs format)
|
|
if grep -q $'\x1c' "$infile" 2>/dev/null; then
|
|
awk -v RS=$'\x1c' 'NF>0 || $0!="" {gsub(/\n$/,""); printf "%s\x1e", $0}' "$infile" > "$outfile"
|
|
else
|
|
# Fallback: each `MSH|` starts a new message; everything until next MSH is one message
|
|
awk '
|
|
/^MSH\|/ {
|
|
if (msg != "") printf "%s\x1e", msg
|
|
msg = $0
|
|
next
|
|
}
|
|
{
|
|
if (msg != "") msg = msg "\r" $0
|
|
else msg = $0
|
|
}
|
|
END {
|
|
if (msg != "") printf "%s\x1e", msg
|
|
}
|
|
' "$infile" > "$outfile"
|
|
fi
|
|
}
|
|
|
|
# Build awk script that does the comparison. We feed it both message lists and
|
|
# the ignore/include lists.
|
|
TMP_L=$(mktemp); TMP_R=$(mktemp)
|
|
trap 'rm -f "$TMP_L" "$TMP_R"' EXIT
|
|
|
|
split_messages "$LEFT" "$TMP_L"
|
|
split_messages "$RIGHT" "$TMP_R"
|
|
|
|
awk -v IGNORE="$IGNORE" -v INCLUDE="$INCLUDE" -v FMT="$FORMAT" \
|
|
-v LFILE="$LEFT" -v RFILE="$RIGHT" '
|
|
function ignored(seg, field, comp, subc, key, key2) {
|
|
if (INCLUDE != "") {
|
|
# Inclusion mode: only fields in INCLUDE are checked
|
|
key = seg "." field
|
|
if (comp != "") key = key "." comp
|
|
if (subc != "") key = key "." subc
|
|
key2 = seg "." field
|
|
# Match if exact or a prefix in include list
|
|
n = split(INCLUDE, arr, ",")
|
|
for (i=1; i<=n; i++) {
|
|
ent = arr[i]
|
|
if (ent == key || ent == key2) return 0
|
|
}
|
|
return 1
|
|
}
|
|
key = seg "." field
|
|
n = split(IGNORE, arr, ",")
|
|
for (i=1; i<=n; i++) {
|
|
ent = arr[i]
|
|
if (ent == key) return 1
|
|
if (ent == seg "." field "." comp) return 1
|
|
if (ent == seg "." field "." comp "." subc) return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
function parse_msg(msg, out_segs, n, i, seg_name, raw_segs) {
|
|
delete out_segs
|
|
n = split(msg, raw_segs, "\r")
|
|
for (i=1; i<=n; i++) {
|
|
if (raw_segs[i] == "") continue
|
|
seg_name = substr(raw_segs[i], 1, 3)
|
|
out_segs[i] = seg_name "|" raw_segs[i] # prefix with name for easy lookup
|
|
}
|
|
return n
|
|
}
|
|
|
|
function compare_field(seg, fidx, lv, rv, msg_idx, n_lc, n_rc, lc, rc, lcomp, rcomp, j, k, n_lsc, n_rsc, lsub, rsub, nm, diffs, ls, rs, ns) {
|
|
if (lv == rv) return 0
|
|
# Try component-level if both have ^
|
|
if (lv ~ /\^/ || rv ~ /\^/) {
|
|
n_lc = split(lv, lcomp, "^")
|
|
n_rc = split(rv, rcomp, "^")
|
|
nm = (n_lc > n_rc) ? n_lc : n_rc
|
|
diffs = 0
|
|
for (j=1; j<=nm; j++) {
|
|
lc = (j <= n_lc) ? lcomp[j] : ""
|
|
rc = (j <= n_rc) ? rcomp[j] : ""
|
|
if (lc == rc) continue
|
|
# subcomponent
|
|
if (lc ~ /&/ || rc ~ /&/) {
|
|
n_lsc = split(lc, lsub, "&")
|
|
n_rsc = split(rc, rsub, "&")
|
|
ns = (n_lsc > n_rsc) ? n_lsc : n_rsc
|
|
for (k=1; k<=ns; k++) {
|
|
ls = (k <= n_lsc) ? lsub[k] : ""
|
|
rs = (k <= n_rsc) ? rsub[k] : ""
|
|
if (ls != rs && !ignored(seg, fidx, j, k)) {
|
|
emit(msg_idx, seg "." fidx "." j "." k, ls, rs)
|
|
diffs++
|
|
}
|
|
}
|
|
} else {
|
|
if (!ignored(seg, fidx, j, "")) {
|
|
emit(msg_idx, seg "." fidx "." j, lc, rc)
|
|
diffs++
|
|
}
|
|
}
|
|
}
|
|
return diffs
|
|
}
|
|
if (!ignored(seg, fidx, "", "")) {
|
|
emit(msg_idx, seg "." fidx, lv, rv)
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
function emit(msg_idx, path, lv, rv) {
|
|
if (FMT == "tsv") printf "%d\t%s\t%s\t%s\n", msg_idx, path, lv, rv
|
|
else printf " %-20s %-30s %s\n", path, lv, rv
|
|
DIFF_COUNT++
|
|
}
|
|
|
|
function diff_segment(seg_name, lseg, rseg, msg_idx, lf, rf, nl, nr, i, base, nmax, lv, rv, field_num) {
|
|
nl = split(lseg, lf, "|")
|
|
nr = split(rseg, rf, "|")
|
|
nmax = (nl > nr) ? nl : nr
|
|
# MSH field numbering offset
|
|
base = (seg_name == "MSH") ? 0 : 1
|
|
# MSH.1 is the field separator (always "|"); MSH.2 is encoding chars.
|
|
# For seg=MSH, index i in array → MSH.<i> for i>=2 corresponds to lf[i]
|
|
# For other seg, index i (i>=2) corresponds to SEG.(i-1).
|
|
for (i=2; i<=nmax; i++) {
|
|
lv = (i <= nl) ? lf[i] : ""
|
|
rv = (i <= nr) ? rf[i] : ""
|
|
field_num = (seg_name == "MSH") ? i : (i - 1)
|
|
compare_field(seg_name, field_num, lv, rv, msg_idx)
|
|
}
|
|
}
|
|
|
|
function diff_message(left_msg, right_msg, msg_idx, l_segs, r_segs, ln, rn, i, l_name, r_name, l, r, mx) {
|
|
ln = parse_msg(left_msg, l_segs)
|
|
rn = parse_msg(right_msg, r_segs)
|
|
mx = (ln > rn) ? ln : rn
|
|
for (i=1; i<=mx; i++) {
|
|
l = (i <= ln) ? l_segs[i] : ""
|
|
r = (i <= rn) ? r_segs[i] : ""
|
|
l_name = (l != "") ? substr(l, 1, 3) : "(none)"
|
|
r_name = (r != "") ? substr(r, 1, 3) : "(none)"
|
|
if (l_name != r_name) {
|
|
if (l == "" && r == "") continue # both ends padded — not a real diff
|
|
emit(msg_idx, "SEGMENT_ORDER", l_name, r_name)
|
|
continue
|
|
}
|
|
if (l_name == "(none)") continue
|
|
# strip "NAME|" prefix we added in parse_msg
|
|
sub(/^[A-Z0-9]+\|/, "", l)
|
|
sub(/^[A-Z0-9]+\|/, "", r)
|
|
diff_segment(l_name, l, r, msg_idx)
|
|
}
|
|
}
|
|
|
|
BEGIN { DIFF_COUNT = 0; n_l = 0; n_r = 0 }
|
|
|
|
FNR == NR { L_MSGS[++n_l] = $0; next }
|
|
{ R_MSGS[++n_r] = $0 }
|
|
|
|
END {
|
|
if (FMT == "count") { print DIFF_COUNT; exit }
|
|
nm = (n_l > n_r) ? n_l : n_r
|
|
if (FMT == "text") {
|
|
printf "HL7 diff:\n left: %s (%d messages)\n right: %s (%d messages)\n ignore: %s\n", LFILE, n_l, RFILE, n_r, IGNORE
|
|
if (INCLUDE != "") printf " include-only: %s\n", INCLUDE
|
|
printf "\n"
|
|
}
|
|
if (n_l != n_r) {
|
|
if (FMT == "tsv") printf "0\tMESSAGE_COUNT\t%d\t%d\n", n_l, n_r
|
|
else printf " MESSAGE COUNT mismatch: %d vs %d\n", n_l, n_r
|
|
DIFF_COUNT++
|
|
}
|
|
for (i=1; i<=nm; i++) {
|
|
lm = (i <= n_l) ? L_MSGS[i] : ""
|
|
rm = (i <= n_r) ? R_MSGS[i] : ""
|
|
if (lm == "" || rm == "") {
|
|
emit(i, "MESSAGE_PRESENCE", (lm != "" ? "present" : "missing"), (rm != "" ? "present" : "missing"))
|
|
continue
|
|
}
|
|
if (FMT == "text" && (i == 1 || DIFF_COUNT > 0)) printf "----- message %d -----\n", i
|
|
diff_message(lm, rm, i)
|
|
}
|
|
if (FMT == "text") printf "\n%d total field difference(s)\n", DIFF_COUNT
|
|
exit (DIFF_COUNT > 0 ? 1 : 0)
|
|
}
|
|
' RS=$'\x1e' "$TMP_L" "$TMP_R"
|