F-1 (HIGH — blocks regression): hl7-diff --format count always returned 0
because the early-exit in END fired before the diff loop ran. Fix: remove
the early exit; suppress per-diff printf in emit() for count mode; emit
DIFF_COUNT after the loop. count/text/tsv all agree (13 diffs on fixture,
0 on identical pair, exit codes correct). Ref: lib/hl7-diff.sh.
F-5 (MEDIUM — PHI leak): hl7-sanitize silently passed LF-delimited HL7
through as cleartext (awk RS="\r" never split on LF). Fix: detect CR
absence via python3 binary read; normalise LF/CRLF→CR via `tr` before
the awk pass. Both file and stdin paths handled. CR path is a zero-overhead
passthrough. Before: 0 tokens, cleartext PHI. After: 6 tokens, all PID
fields replaced with [[MRN_0001]] etc. Ref: lib/hl7-sanitize.sh.
F-2 (MEDIUM): nc-make-jump emitted { PORT {} } for file/ICL inbounds
because the guard only tested for empty ORIG_PORT; protocol-nested returns
the literal "{}" for empty blocks. Fix: case guard rejects empty, "{}", and
any non-numeric value with a clear "is it a TCP listener?" error (exit 1).
TCP inbounds (numeric PORT) still generate correctly. Ref: lib/nc-make-jump.sh.
F-3 (MEDIUM — manual marquee example): nc-msgs mrn=<bare> returned 0 on
real Epic MRNs stored as "5720501458^^^MRN". Fix: in field_matches "="
operator, when expected has no ^ and the stored repetition does, compare
component-1 (text before first ^). Full-componented and mrn.1= paths
unchanged. Fixture: bare mrn=5720501458 now matches 2/3 messages correctly.
Ref: lib/nc-msgs.sh.
All four files pass bash -n. MANIFEST regenerated (54 entries, --check=0).
Tested against synthetic fixtures on .135 (no live engine required for these
logic bugs). Work-box re-verify commands in audit §4-B.
Co-Authored-By: Clover (claude-sonnet-4-6) <noreply@anthropic.com>
587 lines
23 KiB
Bash
Executable File
587 lines
23 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# nc-msgs.sh — native v3 smat query. No v1/v2 dependency, no hcidbdump.
|
|
#
|
|
# Cloverleaf smat databases are SQLite 3. v3 reads them directly via `sqlite3`
|
|
# in -ascii mode to preserve raw `\r` segment separators.
|
|
#
|
|
# Schema (smat_msgs columns we care about):
|
|
# Time INTEGER — milliseconds since epoch
|
|
# MessageContent BLOB — raw HL7 (segments separated by \r)
|
|
# SourceConn VARCHAR — source thread name
|
|
# DestConn VARCHAR — destination thread name
|
|
# Type VARCHAR — DATA, ACK, etc.
|
|
# MidDomain/Hub/Num INTEGER — message ID triple
|
|
#
|
|
# Usage:
|
|
# nc-msgs.sh <thread_name> [--after EXPR] [--before EXPR]
|
|
# [--field PATH=VALUE] # repeatable filter, AND semantics
|
|
# [--type DATA|ACK]
|
|
# [--limit N] # default 100
|
|
# [--format text|json|count|raw]
|
|
# [--sitedir DIR] # default $HCISITEDIR
|
|
# [--db PATH] # explicit smatdb path (overrides locate)
|
|
#
|
|
# Time expressions (--after, --before):
|
|
# "3 days ago", "12 hours ago", "30 minutes ago"
|
|
# "2026-05-20", "2026-05-20 14:30:00"
|
|
# unix epoch in seconds (e.g. 1772100000)
|
|
#
|
|
# Examples:
|
|
# nc-msgs.sh to_3m --after "3 days ago" --field PID.18=623000286
|
|
# nc-msgs.sh ADTto_3m --field MSH.9.2=A08 --limit 5
|
|
# nc-msgs.sh ADTto_3m --format count
|
|
set -u
|
|
set -o pipefail
|
|
|
|
NC_SELF="$0"
|
|
LIB_DIR="$(cd "$(dirname "$NC_SELF")" && pwd)"
|
|
HL7F="$LIB_DIR/hl7-field.sh"
|
|
|
|
# v0.7.5: shared CR-safety primitives (Cygwin/MobaXterm date.exe and wc.exe
|
|
# emit CR-tainted output that crashes the arithmetic in parse_time_ms).
|
|
if [ -r "$LIB_DIR/cygwin-safe.sh" ]; then
|
|
# shellcheck disable=SC1090,SC1091
|
|
. "$LIB_DIR/cygwin-safe.sh"
|
|
else
|
|
coerce_int() { local r="${1:-}" d="${2:-0}" c; c=$(printf '%s' "$r" | tr -cd '0-9'); printf '%s' "${c:-$d}"; }
|
|
# v0.8.26 fallback: raw passthrough if the shared lib is missing. Never strips
|
|
# — the gated stripping is a terminal-protection nicety, not a correctness need.
|
|
_sanitize_ctl_tty() { cat; }
|
|
fi
|
|
|
|
die() { printf 'nc-msgs: %s\n' "$*" >&2; exit 1; }
|
|
|
|
THREAD=""
|
|
AFTER=""
|
|
BEFORE=""
|
|
FILTERS=() # all of these must match (AND group)
|
|
OR_FILTERS=() # at least one of these must match (OR group)
|
|
NOT_FILTERS=() # none of these may match (NOT group)
|
|
TYPE=""
|
|
LIMIT=100
|
|
FORMAT="text"
|
|
SITEDIR="${HCISITEDIR:-}"
|
|
DB_OVERRIDE=""
|
|
INCLUDE_HISTORY=0
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--after) shift; AFTER="$1" ;;
|
|
--before) shift; BEFORE="$1" ;;
|
|
--field) shift; FILTERS+=("$1") ;;
|
|
--or-field) shift; OR_FILTERS+=("$1") ;;
|
|
--not-field) shift; NOT_FILTERS+=("$1") ;;
|
|
--type) shift; TYPE="$1" ;;
|
|
--limit) shift; LIMIT="$1" ;;
|
|
--format) shift; FORMAT="$1" ;;
|
|
--sitedir) shift; SITEDIR="$1" ;;
|
|
--db) shift; DB_OVERRIDE="$1" ;;
|
|
--include-history) INCLUDE_HISTORY=1 ;;
|
|
--all) INCLUDE_HISTORY=1 ;; # cheat-sheet alias
|
|
-h|--help) sed -n '2,30p' "$NC_SELF"; exit 0 ;;
|
|
-*) die "unknown flag: $1" ;;
|
|
*) [ -z "$THREAD" ] && THREAD="$1" || die "extra arg: $1" ;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
[ -n "$THREAD" ] || die "usage: nc-msgs.sh <thread> [...flags]"
|
|
case "$FORMAT" in text|json|count|raw|oneline|fields|mp|labeled) ;; *) die "bad --format: $FORMAT" ;; esac
|
|
command -v sqlite3 >/dev/null 2>&1 || die "sqlite3 not on PATH (universally available on Cloverleaf hosts; install via your distro otherwise)"
|
|
|
|
# Locate one or more smatdb files. Active smatdb + (optionally) SmatHistory archives.
|
|
# Prints one path per line. The downstream loop processes each.
|
|
locate_smatdb() {
|
|
if [ -n "$DB_OVERRIDE" ]; then
|
|
[ -f "$DB_OVERRIDE" ] || die "no such db: $DB_OVERRIDE"
|
|
printf '%s\n' "$DB_OVERRIDE"
|
|
return
|
|
fi
|
|
[ -n "$SITEDIR" ] || die "no \$HCISITEDIR and no --sitedir; pass one or set the env var"
|
|
[ -d "$SITEDIR" ] || die "sitedir not a directory: $SITEDIR"
|
|
|
|
local active history
|
|
active=$(find "$SITEDIR/exec/processes" -maxdepth 2 -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1)
|
|
if [ -z "$active" ]; then
|
|
active=$(find "$SITEDIR" -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1)
|
|
fi
|
|
[ -n "$active" ] && printf '%s\n' "$active"
|
|
|
|
if [ "$INCLUDE_HISTORY" = "1" ]; then
|
|
# Also include SmatHistory archive smatdbs for this thread
|
|
find "$SITEDIR/exec/processes" -maxdepth 4 -path '*/SmatHistory/*' -name "${THREAD}.*.smatdb" -type f 2>/dev/null \
|
|
| sort
|
|
fi
|
|
|
|
if [ -z "$active" ] && [ "$INCLUDE_HISTORY" != "1" ]; then
|
|
die "no smatdb found for thread $THREAD under $SITEDIR (looked for ${THREAD}.smatdb)"
|
|
fi
|
|
}
|
|
|
|
# Parse time expression -> unix ms
|
|
# v0.7.5: every `ts=$(date ... +%s)` capture is routed through coerce_int
|
|
# before it lands in `$((ts * 1000))` — Cygwin date.exe can produce a
|
|
# CR-tainted epoch which would crash with "invalid arithmetic operator".
|
|
parse_time_ms() {
|
|
local expr="$1"
|
|
[ -z "$expr" ] && return 0
|
|
# If it's purely numeric and >= 10 digits, treat as already-ms
|
|
if [[ "$expr" =~ ^[0-9]+$ ]]; then
|
|
if [ "${#expr}" -ge 12 ]; then printf '%s' "$expr"; return; fi
|
|
if [ "${#expr}" -le 10 ]; then printf '%s' "$((expr * 1000))"; return; fi
|
|
fi
|
|
# GNU date and BSD date differ. Try GNU first (-d EXPR), fall back to BSD (-jf or -v).
|
|
local ts=""
|
|
if ts=$(date -d "$expr" +%s 2>/dev/null); then
|
|
ts=$(coerce_int "$ts" 0); printf '%s' "$((ts * 1000))"; return
|
|
fi
|
|
# BSD date — try `-v` shorthand for relative times
|
|
if echo "$expr" | grep -qE '^[0-9]+ (second|minute|hour|day|week|month|year)s? ago$'; then
|
|
local n unit
|
|
n=$(echo "$expr" | awk '{print $1}')
|
|
unit=$(echo "$expr" | awk '{print $2}' | sed 's/s$//')
|
|
# n came from awk on a shell-local string — clean it for the arithmetic below.
|
|
n=$(coerce_int "$n" 0)
|
|
local flag
|
|
case "$unit" in
|
|
second) flag="S" ;;
|
|
minute) flag="M" ;;
|
|
hour) flag="H" ;;
|
|
day) flag="d" ;;
|
|
week) flag="d"; n=$((n * 7)) ;;
|
|
month) flag="m" ;;
|
|
year) flag="y" ;;
|
|
esac
|
|
ts=$(date -v "-${n}${flag}" +%s 2>/dev/null) && { ts=$(coerce_int "$ts" 0); printf '%s' "$((ts * 1000))"; return; }
|
|
fi
|
|
# BSD date with -jf
|
|
if ts=$(date -jf "%Y-%m-%d %H:%M:%S" "$expr" +%s 2>/dev/null); then
|
|
ts=$(coerce_int "$ts" 0); printf '%s' "$((ts * 1000))"; return
|
|
fi
|
|
if ts=$(date -jf "%Y-%m-%d" "$expr" +%s 2>/dev/null); then
|
|
ts=$(coerce_int "$ts" 0); printf '%s' "$((ts * 1000))"; return
|
|
fi
|
|
die "could not parse time expression: $expr"
|
|
}
|
|
|
|
AFTER_MS=$(parse_time_ms "$AFTER")
|
|
BEFORE_MS=$(parse_time_ms "$BEFORE")
|
|
|
|
# Build WHERE clause
|
|
WHERE="1=1"
|
|
[ -n "$AFTER_MS" ] && WHERE="$WHERE AND Time >= $AFTER_MS"
|
|
[ -n "$BEFORE_MS" ] && WHERE="$WHERE AND Time <= $BEFORE_MS"
|
|
if [ -n "$TYPE" ]; then
|
|
# Escape single quotes
|
|
ESC_TYPE=$(printf '%s' "$TYPE" | sed "s/'/''/g")
|
|
WHERE="$WHERE AND Type = '$ESC_TYPE'"
|
|
fi
|
|
|
|
# Coarse LIKE pre-filter — only safe for positive exact/contains operators.
|
|
# Negation ops, null/wildcard, and absences require post-filter.
|
|
prefilter_op_path=""; prefilter_op_kind=""; prefilter_op_val=""
|
|
for filt in "${FILTERS[@]}"; do
|
|
# Skip negation, null, wildcard — those need every message to be inspected
|
|
if [[ "$filt" == *"!="* ]] || [[ "$filt" == *"!~"* ]]; then continue; fi
|
|
# Extract value portion regardless of op
|
|
if [[ "$filt" == *"~"* ]] && [[ "$filt" != *"!~"* ]]; then
|
|
val="${filt#*~}"
|
|
elif [[ "$filt" == *"="* ]]; then
|
|
val="${filt#*=}"
|
|
else continue; fi
|
|
norm_val=$(printf '%s' "$val" | tr '[:upper:]' '[:lower:]')
|
|
if [ "$norm_val" = "null" ] || [ -z "$val" ] || [ "$val" = "*" ]; then continue; fi
|
|
ESC_VAL=$(printf '%s' "$val" | sed "s/'/''/g")
|
|
WHERE="$WHERE AND MessageContent LIKE '%${ESC_VAL}%'"
|
|
done
|
|
|
|
# Multi-db support: collect from each db, merge, then re-apply LIMIT.
|
|
DB_LIST=()
|
|
while IFS= read -r line; do
|
|
[ -n "$line" ] && DB_LIST+=("$line")
|
|
done < <(locate_smatdb)
|
|
[ "${#DB_LIST[@]}" -gt 0 ] || die "no smatdb files matched"
|
|
|
|
[ "$FORMAT" = "count" ] || printf 'nc-msgs: querying %d smatdb file(s): %s\n' "${#DB_LIST[@]}" "$(IFS=,; echo "${DB_LIST[*]}")" >&2
|
|
|
|
TMP_OUT=$(mktemp -d)
|
|
trap 'rm -rf "$TMP_OUT"' EXIT
|
|
|
|
# Each db: pull matching rows, append to combined raw
|
|
COMBINED_RAW="$TMP_OUT/raw.bin"
|
|
: > "$COMBINED_RAW"
|
|
SQL_BASE="SELECT Time, Type, SourceConn, DestConn, MessageContent FROM smat_msgs WHERE $WHERE ORDER BY Time DESC LIMIT $LIMIT"
|
|
for db in "${DB_LIST[@]}"; do
|
|
sqlite3 -ascii "$db" "$SQL_BASE" >> "$COMBINED_RAW" 2>"$TMP_OUT/err"
|
|
if [ -s "$TMP_OUT/err" ]; then
|
|
cat "$TMP_OUT/err" >&2
|
|
: > "$TMP_OUT/err"
|
|
fi
|
|
done
|
|
|
|
# Split rows (0x1e) into individual files, parse fields per row (0x1f)
|
|
awk -v RS=$'\x1e' -v FS=$'\x1f' -v outdir="$TMP_OUT" '
|
|
NF >= 5 {
|
|
n++
|
|
fpath = outdir "/msg_" sprintf("%05d", n) ".bin"
|
|
print $5 > fpath
|
|
close(fpath)
|
|
metafpath = outdir "/meta_" sprintf("%05d", n) ".tsv"
|
|
printf "%s\t%s\t%s\t%s\n", $1, $2, $3, $4 > metafpath
|
|
close(metafpath)
|
|
}
|
|
' "$TMP_OUT/raw.bin"
|
|
|
|
# v0.7.5: coerce_int on wc output — Cygwin wc.exe CR-taint defense.
|
|
MSG_COUNT=$(coerce_int "$(ls "$TMP_OUT"/msg_*.bin 2>/dev/null | wc -l)" 0)
|
|
KEPT=0
|
|
|
|
# Parse a single filter expression. Returns path / op / expected via globals.
|
|
# Supported operators (longest-first match):
|
|
# !~ does not contain (case-insensitive substring)
|
|
# != not equal
|
|
# ~ contains (case-insensitive substring)
|
|
# >= greater-or-equal (numeric or lexical)
|
|
# <= less-or-equal
|
|
# > greater-than
|
|
# < less-than
|
|
# >< range (LO..HI), inclusive
|
|
# = exact equal (or NULL keyword, empty, or * wildcard)
|
|
parse_filter() {
|
|
local filt="$1"
|
|
FP_OP=""; FP_PATH=""; FP_EXPECTED=""
|
|
# Longest-first
|
|
if [[ "$filt" == *"!~"* ]]; then FP_PATH="${filt%%!~*}"; FP_EXPECTED="${filt#*!~}"; FP_OP="!~"
|
|
elif [[ "$filt" == *"!="* ]]; then FP_PATH="${filt%%!=*}"; FP_EXPECTED="${filt#*!=}"; FP_OP="!="
|
|
elif [[ "$filt" == *">="* ]]; then FP_PATH="${filt%%>=*}"; FP_EXPECTED="${filt#*>=}"; FP_OP=">="
|
|
elif [[ "$filt" == *"<="* ]]; then FP_PATH="${filt%%<=*}"; FP_EXPECTED="${filt#*<=}"; FP_OP="<="
|
|
elif [[ "$filt" == *"><"* ]]; then FP_PATH="${filt%%><*}"; FP_EXPECTED="${filt#*><}"; FP_OP="><"
|
|
elif [[ "$filt" == *">"* ]]; then FP_PATH="${filt%%>*}"; FP_EXPECTED="${filt#*>}"; FP_OP=">"
|
|
elif [[ "$filt" == *"<"* ]]; then FP_PATH="${filt%%<*}"; FP_EXPECTED="${filt#*<}"; FP_OP="<"
|
|
elif [[ "$filt" == *"~"* ]]; then FP_PATH="${filt%%~*}"; FP_EXPECTED="${filt#*~}"; FP_OP="~"
|
|
elif [[ "$filt" == *"="* ]]; then FP_PATH="${filt%%=*}"; FP_EXPECTED="${filt#*=}"; FP_OP="="
|
|
fi
|
|
}
|
|
|
|
# Returns 0 if the (op, expected) check matches the actual field value(s).
|
|
field_matches() {
|
|
local actual="$1" op="$2" expected="$3"
|
|
local expected_lc; expected_lc=$(printf '%s' "$expected" | tr '[:upper:]' '[:lower:]')
|
|
local actual_lc; actual_lc=$(printf '%s' "$actual" | tr '[:upper:]' '[:lower:]')
|
|
local is_null=0
|
|
if [ -z "$expected" ] || [ "$expected_lc" = "null" ]; then is_null=1; fi
|
|
|
|
case "$op" in
|
|
"=")
|
|
if [ "$is_null" = "1" ]; then
|
|
# Null match: actual empty or all-empty reps
|
|
[ -z "$actual" ] && return 0
|
|
while IFS= read -r rep; do
|
|
[ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 1
|
|
done <<< "$actual"
|
|
return 0
|
|
elif [ "$expected" = "*" ]; then
|
|
# Wildcard — any non-empty rep
|
|
while IFS= read -r rep; do
|
|
[ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 0
|
|
done <<< "$actual"
|
|
return 1
|
|
fi
|
|
# F-3 fix (2026-06-08): exact-match on a bare value (no ^ in expected)
|
|
# must also match when the stored field carries HL7 components, e.g.
|
|
# mrn=5720501458 should match 5720501458^^^MRN
|
|
# The manual's marquee example uses this form. Without the fix, operators
|
|
# searching by bare MRN get 0 results on every real Epic site.
|
|
# Rule: if expected has no ^ and the repetition does, compare only
|
|
# component-1 (the part before the first ^).
|
|
while IFS= read -r rep; do
|
|
[ "$rep" = "$expected" ] && return 0
|
|
if [[ "$rep" == *"^"* ]] && [[ "$expected" != *"^"* ]]; then
|
|
local _comp1="${rep%%^*}"
|
|
[ "$_comp1" = "$expected" ] && return 0
|
|
fi
|
|
done <<< "$actual"
|
|
return 1
|
|
;;
|
|
"!=")
|
|
if [ "$is_null" = "1" ]; then
|
|
# Not-null: at least one rep is non-empty
|
|
while IFS= read -r rep; do
|
|
[ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 0
|
|
done <<< "$actual"
|
|
return 1
|
|
elif [ "$expected" = "*" ]; then
|
|
# Not-any-value = null
|
|
[ -z "$actual" ] && return 0
|
|
while IFS= read -r rep; do
|
|
[ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 1
|
|
done <<< "$actual"
|
|
return 0
|
|
fi
|
|
# Not-equal: NO repetition equals expected
|
|
while IFS= read -r rep; do
|
|
[ "$rep" = "$expected" ] && return 1
|
|
done <<< "$actual"
|
|
return 0
|
|
;;
|
|
"~")
|
|
[ "$is_null" = "1" ] && return 1
|
|
[[ "$actual_lc" == *"$expected_lc"* ]] && return 0
|
|
return 1
|
|
;;
|
|
"!~")
|
|
[ "$is_null" = "1" ] && return 0
|
|
[[ "$actual_lc" == *"$expected_lc"* ]] && return 1
|
|
return 0
|
|
;;
|
|
">"|">="|"<"|"<=")
|
|
# Numeric or lexical (works for HL7 timestamps in YYYYMMDDHHMMSS form).
|
|
[ "$is_null" = "1" ] && return 1
|
|
# Compare each repetition — match if ANY satisfies
|
|
while IFS= read -r rep; do
|
|
[ -z "$rep" ] && continue
|
|
if [[ "$rep" =~ ^[0-9]+([.][0-9]+)?$ ]] && [[ "$expected" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
|
|
# Pure numeric comparison via awk
|
|
if awk -v a="$rep" -v b="$expected" -v op="$op" '
|
|
BEGIN {
|
|
a += 0; b += 0
|
|
if (op==">" && a>b) exit 0
|
|
if (op==">=" && a>=b) exit 0
|
|
if (op=="<" && a<b) exit 0
|
|
if (op=="<=" && a<=b) exit 0
|
|
exit 1
|
|
}'; then return 0; fi
|
|
else
|
|
# Lexical compare (works for YYYYMMDD timestamps)
|
|
case "$op" in
|
|
">") [[ "$rep" > "$expected" ]] && return 0 ;;
|
|
">=") [[ ! "$rep" < "$expected" ]] && return 0 ;;
|
|
"<") [[ "$rep" < "$expected" ]] && return 0 ;;
|
|
"<=") [[ ! "$rep" > "$expected" ]] && return 0 ;;
|
|
esac
|
|
fi
|
|
done <<< "$actual"
|
|
return 1
|
|
;;
|
|
"><")
|
|
# Range "LO..HI" inclusive
|
|
[ "$is_null" = "1" ] && return 1
|
|
local lo hi
|
|
lo="${expected%%..*}"; hi="${expected##*..}"
|
|
[ -z "$lo" ] || [ -z "$hi" ] || [ "$lo" = "$hi" ] && return 1
|
|
while IFS= read -r rep; do
|
|
[ -z "$rep" ] && continue
|
|
if [[ "$rep" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
|
|
awk -v a="$rep" -v lo="$lo" -v hi="$hi" \
|
|
'BEGIN { if (a+0 >= lo+0 && a+0 <= hi+0) exit 0; exit 1 }' && return 0
|
|
else
|
|
[[ ! "$rep" < "$lo" && ! "$rep" > "$hi" ]] && return 0
|
|
fi
|
|
done <<< "$actual"
|
|
return 1
|
|
;;
|
|
*) return 1 ;;
|
|
esac
|
|
}
|
|
|
|
# Apply filter groups:
|
|
# --field AND group: every entry must match
|
|
# --or-field OR group: if any entries given, at least one must match
|
|
# --not-field NOT group: none may match
|
|
match_filters() {
|
|
local msg_file="$1"
|
|
|
|
# AND group
|
|
if [ "${#FILTERS[@]}" -gt 0 ]; then
|
|
for filt in "${FILTERS[@]}"; do
|
|
[ -z "$filt" ] && continue
|
|
parse_filter "$filt"
|
|
[ -z "$FP_OP" ] && continue
|
|
local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null)
|
|
field_matches "$actual" "$FP_OP" "$FP_EXPECTED" || return 1
|
|
done
|
|
fi
|
|
|
|
# OR group (if any given, at least one must match)
|
|
if [ "${#OR_FILTERS[@]}" -gt 0 ]; then
|
|
local or_match=0
|
|
for filt in "${OR_FILTERS[@]}"; do
|
|
[ -z "$filt" ] && continue
|
|
parse_filter "$filt"
|
|
[ -z "$FP_OP" ] && continue
|
|
local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null)
|
|
if field_matches "$actual" "$FP_OP" "$FP_EXPECTED"; then or_match=1; break; fi
|
|
done
|
|
[ "$or_match" = "1" ] || return 1
|
|
fi
|
|
|
|
# NOT group (none may match)
|
|
if [ "${#NOT_FILTERS[@]}" -gt 0 ]; then
|
|
for filt in "${NOT_FILTERS[@]}"; do
|
|
[ -z "$filt" ] && continue
|
|
parse_filter "$filt"
|
|
[ -z "$FP_OP" ] && continue
|
|
local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null)
|
|
if field_matches "$actual" "$FP_OP" "$FP_EXPECTED"; then return 1; fi
|
|
done
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
# Emit
|
|
# v0.8.26: route ALL stdout from the emit block through the tty-gated sanitizer.
|
|
# CRITICAL: the `raw` format intentionally emits raw HL7 plus 0x1c framing for
|
|
# downstream tooling (`nc-msgs ... --format raw > input.msgs` → route_test). On a
|
|
# pipe/redirect those bytes are load-bearing and MUST pass through untouched, so
|
|
# the gate sanitizes ONLY when stdout is an interactive tty (protect the human's
|
|
# terminal). Stderr lines (the `>&2` scan summary) bypass this pipe by design.
|
|
{
|
|
case "$FORMAT" in
|
|
count)
|
|
# Apply filter if ANY group has entries
|
|
if [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; then
|
|
echo "$MSG_COUNT"
|
|
else
|
|
for f in "$TMP_OUT"/msg_*.bin; do
|
|
match_filters "$f" && KEPT=$((KEPT+1))
|
|
done
|
|
echo "$KEPT"
|
|
fi
|
|
;;
|
|
raw)
|
|
for f in "$TMP_OUT"/msg_*.bin; do
|
|
if { [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; } || match_filters "$f"; then
|
|
cat "$f"; printf '\x1c' # File separator between messages (rare in HL7)
|
|
KEPT=$((KEPT+1))
|
|
fi
|
|
done
|
|
;;
|
|
text|oneline|fields|mp|labeled)
|
|
i=0
|
|
for f in "$TMP_OUT"/msg_*.bin; do
|
|
i=$((i+1))
|
|
if { [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; } || match_filters "$f"; then
|
|
KEPT=$((KEPT+1))
|
|
meta=$(cat "${TMP_OUT}/meta_$(printf '%05d' "$i").tsv")
|
|
tm=$(printf '%s' "$meta" | awk -F'\t' '{print $1}')
|
|
typ=$(printf '%s' "$meta" | awk -F'\t' '{print $2}')
|
|
src=$(printf '%s' "$meta" | awk -F'\t' '{print $3}')
|
|
dst=$(printf '%s' "$meta" | awk -F'\t' '{print $4}')
|
|
# v0.7.5: coerce_int on the time column before any arithmetic / integer
|
|
# compare. The meta TSV is written by awk above; on Cygwin a
|
|
# Windows-native awk could emit CRLF rows and `$1` then carries a CR.
|
|
tm_i=$(coerce_int "$tm" 0)
|
|
if [ "$tm_i" -gt 100000000000 ]; then
|
|
tm_h=$(date -r $((tm_i/1000)) 2>/dev/null || date -d "@$((tm_i/1000))" 2>/dev/null || echo "$tm_i")
|
|
else
|
|
tm_h="$tm_i"
|
|
fi
|
|
printf '===== msg %d time=%s type=%s src=%s dst=%s =====\n' "$KEPT" "$tm_h" "$typ" "$src" "$dst"
|
|
case "$FORMAT" in
|
|
text)
|
|
tr '\r' '\n' < "$f"
|
|
;;
|
|
oneline)
|
|
# Compact: single line, segments separated by visible '⏎' marker
|
|
tr '\r' '\037' < "$f" | sed 's/\x1f/ ⏎ /g'
|
|
printf '\n'
|
|
;;
|
|
fields|mp)
|
|
# Each field on its own line: SEG.N: value (skips empty)
|
|
tr '\r' '\n' < "$f" | awk -F'|' '
|
|
NF > 0 {
|
|
seg = substr($1, 1, 3)
|
|
if (seg == "") next
|
|
is_msh = (seg == "MSH")
|
|
for (k=2; k<=NF; k++) {
|
|
val = $k
|
|
if (val == "" || val == "\"\"") continue
|
|
fnum = is_msh ? k : (k - 1)
|
|
printf "%s.%d: %s\n", seg, fnum, val
|
|
}
|
|
}'
|
|
;;
|
|
labeled)
|
|
# Same as fields but adds the friendly alias when known.
|
|
tr '\r' '\n' < "$f" | awk -F'|' '
|
|
BEGIN {
|
|
# Reverse alias lookup table (alias for SEG.N → label)
|
|
a["PID.3"]="mrn"; a["PID.4"]="alt_id"
|
|
a["PID.5"]="name"; a["PID.6"]="mothers_maiden"
|
|
a["PID.7"]="dob"; a["PID.8"]="sex"
|
|
a["PID.11"]="address"; a["PID.13"]="phone"
|
|
a["PID.14"]="work_phone"; a["PID.18"]="account"
|
|
a["PID.19"]="ssn"; a["PID.20"]="license"
|
|
a["PID.29"]="dod"
|
|
a["PV1.2"]="patient_class"; a["PV1.3"]="location"
|
|
a["PV1.7"]="attending"; a["PV1.8"]="referring"
|
|
a["PV1.9"]="consulting"; a["PV1.17"]="admitting"
|
|
a["PV1.18"]="patient_type"; a["PV1.19"]="visit"
|
|
a["PV1.50"]="alt_visit"
|
|
a["MSH.3"]="sending_app"; a["MSH.4"]="sending_facility"
|
|
a["MSH.5"]="receiving_app"; a["MSH.6"]="receiving_facility"
|
|
a["MSH.7"]="timestamp"; a["MSH.9"]="msg_type"
|
|
a["MSH.10"]="control_id"; a["MSH.11"]="processing_id"
|
|
a["MSH.12"]="hl7_version"
|
|
a["EVN.1"]="trigger_event"; a["EVN.2"]="event_dt"
|
|
a["EVN.4"]="evn_reason"; a["EVN.5"]="operator"
|
|
a["NK1.2"]="next_of_kin"; a["NK1.3"]="relationship"
|
|
a["NK1.4"]="nk_address"; a["NK1.5"]="nk_phone"
|
|
a["GT1.4"]="guarantor"; a["GT1.5"]="gt_address"
|
|
a["GT1.6"]="gt_phone"; a["GT1.12"]="gt_ssn"
|
|
a["IN1.2"]="insurance"; a["IN1.16"]="insured"
|
|
a["IN1.17"]="insured_dob"; a["IN1.36"]="policy"
|
|
a["DG1.3"]="diagnosis"; a["DG1.4"]="dx_desc"
|
|
a["OBR.2"]="placer_order"; a["OBR.3"]="filler_order"
|
|
a["OBR.4"]="test_code"; a["OBR.16"]="ordering"
|
|
a["OBX.5"]="value"; a["OBX.11"]="status"
|
|
}
|
|
NF > 0 {
|
|
seg = substr($1, 1, 3)
|
|
if (seg == "") next
|
|
is_msh = (seg == "MSH")
|
|
for (k=2; k<=NF; k++) {
|
|
val = $k
|
|
if (val == "" || val == "\"\"") continue
|
|
fnum = is_msh ? k : (k - 1)
|
|
key = seg "." fnum
|
|
if (key in a) printf "%s (%s): %s\n", key, a[key], val
|
|
else printf "%s: %s\n", key, val
|
|
}
|
|
}'
|
|
;;
|
|
esac
|
|
printf '\n'
|
|
fi
|
|
done
|
|
printf 'nc-msgs: %d msgs scanned, %d match filters\n' "$MSG_COUNT" "$KEPT" >&2
|
|
;;
|
|
json)
|
|
printf '['
|
|
first=1
|
|
i=0
|
|
for f in "$TMP_OUT"/msg_*.bin; do
|
|
i=$((i+1))
|
|
if { [ ${#FILTERS[@]} -eq 0 ] && [ ${#OR_FILTERS[@]} -eq 0 ] && [ ${#NOT_FILTERS[@]} -eq 0 ]; } || match_filters "$f"; then
|
|
KEPT=$((KEPT+1))
|
|
[ "$first" = "1" ] && first=0 || printf ','
|
|
meta=$(cat "${TMP_OUT}/meta_$(printf '%05d' "$i").tsv")
|
|
tm=$(printf '%s' "$meta" | awk -F'\t' '{print $1}')
|
|
typ=$(printf '%s' "$meta" | awk -F'\t' '{print $2}')
|
|
src=$(printf '%s' "$meta" | awk -F'\t' '{print $3}')
|
|
dst=$(printf '%s' "$meta" | awk -F'\t' '{print $4}')
|
|
# Replace \r with \n in message content for JSON-safety, then JSON-escape
|
|
msg_text=$(tr '\r' '\n' < "$f" | jq -Rs .)
|
|
printf '{"time_ms":%s,"type":"%s","source":"%s","dest":"%s","content":%s}' \
|
|
"$tm" "$typ" "$src" "$dst" "$msg_text"
|
|
fi
|
|
done
|
|
printf ']\n'
|
|
;;
|
|
esac
|
|
} | _sanitize_ctl_tty
|
|
# Preserve the emit block's exit status across the gating pipe. The EXIT trap
|
|
# (rm -rf "$TMP_OUT") still fires on this parent exit.
|
|
exit "${PIPESTATUS[0]}"
|