#!/usr/bin/env bash # nc-msgs.sh — native v3 smat query. No v1/v2 dependency, no hcidbdump. # # Cloverleaf smat databases are SQLite 3. v3 reads them directly via `sqlite3` # in -ascii mode to preserve raw `\r` segment separators. # # Schema (smat_msgs columns we care about): # Time INTEGER — milliseconds since epoch # MessageContent BLOB — raw HL7 (segments separated by \r) # SourceConn VARCHAR — source thread name # DestConn VARCHAR — destination thread name # Type VARCHAR — DATA, ACK, etc. # MidDomain/Hub/Num INTEGER — message ID triple # # Usage: # nc-msgs.sh [--after EXPR] [--before EXPR] # [--field PATH=VALUE] # repeatable filter, AND semantics # [--type DATA|ACK] # [--limit N] # default 100 # [--format text|json|count|raw] # [--sitedir DIR] # default $HCISITEDIR # [--db PATH] # explicit smatdb path (overrides locate) # # Time expressions (--after, --before): # "3 days ago", "12 hours ago", "30 minutes ago" # "2026-05-20", "2026-05-20 14:30:00" # unix epoch in seconds (e.g. 1772100000) # # Examples: # nc-msgs.sh to_3m --after "3 days ago" --field PID.18=623000286 # nc-msgs.sh ADTto_3m --field MSH.9.2=A08 --limit 5 # nc-msgs.sh ADTto_3m --format count set -u set -o pipefail NC_SELF="$0" LIB_DIR="$(cd "$(dirname "$NC_SELF")" && pwd)" HL7F="$LIB_DIR/hl7-field.sh" die() { printf 'nc-msgs: %s\n' "$*" >&2; exit 1; } THREAD="" AFTER="" BEFORE="" FILTERS=() TYPE="" LIMIT=100 FORMAT="text" SITEDIR="${HCISITEDIR:-}" DB_OVERRIDE="" while [ $# -gt 0 ]; do case "$1" in --after) shift; AFTER="$1" ;; --before) shift; BEFORE="$1" ;; --field) shift; FILTERS+=("$1") ;; --type) shift; TYPE="$1" ;; --limit) shift; LIMIT="$1" ;; --format) shift; FORMAT="$1" ;; --sitedir) shift; SITEDIR="$1" ;; --db) shift; DB_OVERRIDE="$1" ;; -h|--help) sed -n '2,30p' "$NC_SELF"; exit 0 ;; -*) die "unknown flag: $1" ;; *) [ -z "$THREAD" ] && THREAD="$1" || die "extra arg: $1" ;; esac shift done [ -n "$THREAD" ] || die "usage: nc-msgs.sh [...flags]" case "$FORMAT" in text|json|count|raw|oneline|fields|mp|labeled) ;; *) die "bad --format: $FORMAT" ;; esac command -v sqlite3 >/dev/null 2>&1 || die "sqlite3 not on PATH (universally available on Cloverleaf hosts; install via your distro otherwise)" # Locate smatdb locate_smatdb() { if [ -n "$DB_OVERRIDE" ]; then [ -f "$DB_OVERRIDE" ] || die "no such db: $DB_OVERRIDE" printf '%s\n' "$DB_OVERRIDE" return fi [ -n "$SITEDIR" ] || die "no \$HCISITEDIR and no --sitedir; pass one or set the env var" [ -d "$SITEDIR" ] || die "sitedir not a directory: $SITEDIR" # Standard layout: $SITEDIR/exec/processes//.smatdb local found found=$(find "$SITEDIR/exec/processes" -maxdepth 2 -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1) if [ -z "$found" ]; then # Sometimes lives one level deeper or under a different layout found=$(find "$SITEDIR" -type f -name "${THREAD}.smatdb" 2>/dev/null | head -1) fi [ -n "$found" ] || die "no smatdb found for thread $THREAD under $SITEDIR (looked for ${THREAD}.smatdb)" printf '%s\n' "$found" } # Parse time expression -> unix ms parse_time_ms() { local expr="$1" [ -z "$expr" ] && return 0 # If it's purely numeric and >= 10 digits, treat as already-ms if [[ "$expr" =~ ^[0-9]+$ ]]; then if [ "${#expr}" -ge 12 ]; then printf '%s' "$expr"; return; fi if [ "${#expr}" -le 10 ]; then printf '%s' "$((expr * 1000))"; return; fi fi # GNU date and BSD date differ. Try GNU first (-d EXPR), fall back to BSD (-jf or -v). local ts="" if ts=$(date -d "$expr" +%s 2>/dev/null); then printf '%s' "$((ts * 1000))"; return fi # BSD date — try `-v` shorthand for relative times if echo "$expr" | grep -qE '^[0-9]+ (second|minute|hour|day|week|month|year)s? ago$'; then local n unit n=$(echo "$expr" | awk '{print $1}') unit=$(echo "$expr" | awk '{print $2}' | sed 's/s$//') local flag case "$unit" in second) flag="S" ;; minute) flag="M" ;; hour) flag="H" ;; day) flag="d" ;; week) flag="d"; n=$((n * 7)) ;; month) flag="m" ;; year) flag="y" ;; esac ts=$(date -v "-${n}${flag}" +%s 2>/dev/null) && { printf '%s' "$((ts * 1000))"; return; } fi # BSD date with -jf if ts=$(date -jf "%Y-%m-%d %H:%M:%S" "$expr" +%s 2>/dev/null); then printf '%s' "$((ts * 1000))"; return fi if ts=$(date -jf "%Y-%m-%d" "$expr" +%s 2>/dev/null); then printf '%s' "$((ts * 1000))"; return fi die "could not parse time expression: $expr" } AFTER_MS=$(parse_time_ms "$AFTER") BEFORE_MS=$(parse_time_ms "$BEFORE") # Build WHERE clause WHERE="1=1" [ -n "$AFTER_MS" ] && WHERE="$WHERE AND Time >= $AFTER_MS" [ -n "$BEFORE_MS" ] && WHERE="$WHERE AND Time <= $BEFORE_MS" if [ -n "$TYPE" ]; then # Escape single quotes ESC_TYPE=$(printf '%s' "$TYPE" | sed "s/'/''/g") WHERE="$WHERE AND Type = '$ESC_TYPE'" fi # Coarse LIKE pre-filter — only safe for positive exact/contains operators. # Negation ops, null/wildcard, and absences require post-filter. prefilter_op_path=""; prefilter_op_kind=""; prefilter_op_val="" for filt in "${FILTERS[@]}"; do # Skip negation, null, wildcard — those need every message to be inspected if [[ "$filt" == *"!="* ]] || [[ "$filt" == *"!~"* ]]; then continue; fi # Extract value portion regardless of op if [[ "$filt" == *"~"* ]] && [[ "$filt" != *"!~"* ]]; then val="${filt#*~}" elif [[ "$filt" == *"="* ]]; then val="${filt#*=}" else continue; fi norm_val=$(printf '%s' "$val" | tr '[:upper:]' '[:lower:]') if [ "$norm_val" = "null" ] || [ -z "$val" ] || [ "$val" = "*" ]; then continue; fi ESC_VAL=$(printf '%s' "$val" | sed "s/'/''/g") WHERE="$WHERE AND MessageContent LIKE '%${ESC_VAL}%'" done SMATDB=$(locate_smatdb) [ "$FORMAT" = "count" ] || printf 'nc-msgs: querying %s\n' "$SMATDB" >&2 # Pull the data TMP_OUT=$(mktemp -d) trap 'rm -rf "$TMP_OUT"' EXIT SQL="SELECT Time, Type, SourceConn, DestConn, MessageContent FROM smat_msgs WHERE $WHERE ORDER BY Time DESC LIMIT $LIMIT" sqlite3 -ascii "$SMATDB" "$SQL" > "$TMP_OUT/raw.bin" 2>"$TMP_OUT/err" if [ -s "$TMP_OUT/err" ]; then cat "$TMP_OUT/err" >&2 exit 1 fi # Split rows (0x1e) into individual files, parse fields per row (0x1f) awk -v RS=$'\x1e' -v FS=$'\x1f' -v outdir="$TMP_OUT" ' NF >= 5 { n++ fpath = outdir "/msg_" sprintf("%05d", n) ".bin" print $5 > fpath close(fpath) metafpath = outdir "/meta_" sprintf("%05d", n) ".tsv" printf "%s\t%s\t%s\t%s\n", $1, $2, $3, $4 > metafpath close(metafpath) } ' "$TMP_OUT/raw.bin" MSG_COUNT=$(ls "$TMP_OUT"/msg_*.bin 2>/dev/null | wc -l | tr -d ' ') KEPT=0 # Parse a single filter expression: returns path / op / expected via globals. # Supported operators (longest-first match): # !~ does not contain (case-insensitive substring) # != not equal # ~ contains (case-insensitive substring) # = exact equal (or NULL keyword, empty, or * wildcard — see match_filters) parse_filter() { local filt="$1" FP_OP=""; FP_PATH=""; FP_EXPECTED="" if [[ "$filt" == *"!~"* ]]; then FP_PATH="${filt%%!~*}"; FP_EXPECTED="${filt#*!~}"; FP_OP="!~" elif [[ "$filt" == *"!="* ]]; then FP_PATH="${filt%%!=*}"; FP_EXPECTED="${filt#*!=}"; FP_OP="!=" elif [[ "$filt" == *"~"* ]]; then FP_PATH="${filt%%~*}"; FP_EXPECTED="${filt#*~}"; FP_OP="~" elif [[ "$filt" == *"="* ]]; then FP_PATH="${filt%%=*}"; FP_EXPECTED="${filt#*=}"; FP_OP="=" fi } # Returns 0 if the (op, expected) check matches the actual field value(s). field_matches() { local actual="$1" op="$2" expected="$3" local expected_lc; expected_lc=$(printf '%s' "$expected" | tr '[:upper:]' '[:lower:]') local actual_lc; actual_lc=$(printf '%s' "$actual" | tr '[:upper:]' '[:lower:]') local is_null=0 if [ -z "$expected" ] || [ "$expected_lc" = "null" ]; then is_null=1; fi case "$op" in "=") if [ "$is_null" = "1" ]; then # Null match: actual empty or all-empty reps [ -z "$actual" ] && return 0 while IFS= read -r rep; do [ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 1 done <<< "$actual" return 0 elif [ "$expected" = "*" ]; then # Wildcard — any non-empty rep while IFS= read -r rep; do [ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 0 done <<< "$actual" return 1 fi while IFS= read -r rep; do [ "$rep" = "$expected" ] && return 0 done <<< "$actual" return 1 ;; "!=") if [ "$is_null" = "1" ]; then # Not-null: at least one rep is non-empty while IFS= read -r rep; do [ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 0 done <<< "$actual" return 1 elif [ "$expected" = "*" ]; then # Not-any-value = null [ -z "$actual" ] && return 0 while IFS= read -r rep; do [ -n "$rep" ] && [ "$rep" != "\"\"" ] && return 1 done <<< "$actual" return 0 fi # Not-equal: NO repetition equals expected while IFS= read -r rep; do [ "$rep" = "$expected" ] && return 1 done <<< "$actual" return 0 ;; "~") # Contains, case-insensitive [ "$is_null" = "1" ] && return 1 # contains-nothing is meaningless [[ "$actual_lc" == *"$expected_lc"* ]] && return 0 return 1 ;; "!~") # Does not contain, case-insensitive [ "$is_null" = "1" ] && return 0 # always passes "doesn't contain (nothing)" [[ "$actual_lc" == *"$expected_lc"* ]] && return 1 return 0 ;; *) return 1 ;; esac } # Apply all --field filters; AND semantics. match_filters() { local msg_file="$1" for filt in "${FILTERS[@]}"; do parse_filter "$filt" [ -z "$FP_OP" ] && continue local actual; actual=$("$HL7F" "$FP_PATH" "$msg_file" 2>/dev/null) field_matches "$actual" "$FP_OP" "$FP_EXPECTED" || return 1 done return 0 } # Emit case "$FORMAT" in count) # Count after filter if [ ${#FILTERS[@]} -eq 0 ]; then echo "$MSG_COUNT" else for f in "$TMP_OUT"/msg_*.bin; do match_filters "$f" && KEPT=$((KEPT+1)) done echo "$KEPT" fi ;; raw) for f in "$TMP_OUT"/msg_*.bin; do if [ ${#FILTERS[@]} -eq 0 ] || match_filters "$f"; then cat "$f"; printf '\x1c' # File separator between messages (rare in HL7) KEPT=$((KEPT+1)) fi done ;; text|oneline|fields|mp|labeled) i=0 for f in "$TMP_OUT"/msg_*.bin; do i=$((i+1)) if [ ${#FILTERS[@]} -eq 0 ] || match_filters "$f"; then KEPT=$((KEPT+1)) meta=$(cat "${TMP_OUT}/meta_$(printf '%05d' "$i").tsv") tm=$(printf '%s' "$meta" | awk -F'\t' '{print $1}') typ=$(printf '%s' "$meta" | awk -F'\t' '{print $2}') src=$(printf '%s' "$meta" | awk -F'\t' '{print $3}') dst=$(printf '%s' "$meta" | awk -F'\t' '{print $4}') if [ "$tm" -gt 100000000000 ] 2>/dev/null; then tm_h=$(date -r $((tm/1000)) 2>/dev/null || date -d "@$((tm/1000))" 2>/dev/null || echo "$tm") else tm_h="$tm" fi printf '===== msg %d time=%s type=%s src=%s dst=%s =====\n' "$KEPT" "$tm_h" "$typ" "$src" "$dst" case "$FORMAT" in text) tr '\r' '\n' < "$f" ;; oneline) # Compact: single line, segments separated by visible '⏎' marker tr '\r' '\037' < "$f" | sed 's/\x1f/ ⏎ /g' printf '\n' ;; fields|mp) # Each field on its own line: SEG.N: value (skips empty) tr '\r' '\n' < "$f" | awk -F'|' ' NF > 0 { seg = substr($1, 1, 3) if (seg == "") next is_msh = (seg == "MSH") for (k=2; k<=NF; k++) { val = $k if (val == "" || val == "\"\"") continue fnum = is_msh ? k : (k - 1) printf "%s.%d: %s\n", seg, fnum, val } }' ;; labeled) # Same as fields but adds the friendly alias when known. tr '\r' '\n' < "$f" | awk -F'|' ' BEGIN { # Reverse alias lookup table (alias for SEG.N → label) a["PID.3"]="mrn"; a["PID.4"]="alt_id" a["PID.5"]="name"; a["PID.6"]="mothers_maiden" a["PID.7"]="dob"; a["PID.8"]="sex" a["PID.11"]="address"; a["PID.13"]="phone" a["PID.14"]="work_phone"; a["PID.18"]="account" a["PID.19"]="ssn"; a["PID.20"]="license" a["PID.29"]="dod" a["PV1.2"]="patient_class"; a["PV1.3"]="location" a["PV1.7"]="attending"; a["PV1.8"]="referring" a["PV1.9"]="consulting"; a["PV1.17"]="admitting" a["PV1.18"]="patient_type"; a["PV1.19"]="visit" a["PV1.50"]="alt_visit" a["MSH.3"]="sending_app"; a["MSH.4"]="sending_facility" a["MSH.5"]="receiving_app"; a["MSH.6"]="receiving_facility" a["MSH.7"]="timestamp"; a["MSH.9"]="msg_type" a["MSH.10"]="control_id"; a["MSH.11"]="processing_id" a["MSH.12"]="hl7_version" a["EVN.1"]="trigger_event"; a["EVN.2"]="event_dt" a["EVN.4"]="evn_reason"; a["EVN.5"]="operator" a["NK1.2"]="next_of_kin"; a["NK1.3"]="relationship" a["NK1.4"]="nk_address"; a["NK1.5"]="nk_phone" a["GT1.4"]="guarantor"; a["GT1.5"]="gt_address" a["GT1.6"]="gt_phone"; a["GT1.12"]="gt_ssn" a["IN1.2"]="insurance"; a["IN1.16"]="insured" a["IN1.17"]="insured_dob"; a["IN1.36"]="policy" a["DG1.3"]="diagnosis"; a["DG1.4"]="dx_desc" a["OBR.2"]="placer_order"; a["OBR.3"]="filler_order" a["OBR.4"]="test_code"; a["OBR.16"]="ordering" a["OBX.5"]="value"; a["OBX.11"]="status" } NF > 0 { seg = substr($1, 1, 3) if (seg == "") next is_msh = (seg == "MSH") for (k=2; k<=NF; k++) { val = $k if (val == "" || val == "\"\"") continue fnum = is_msh ? k : (k - 1) key = seg "." fnum if (key in a) printf "%s (%s): %s\n", key, a[key], val else printf "%s: %s\n", key, val } }' ;; esac printf '\n' fi done printf 'nc-msgs: %d msgs scanned, %d match filters\n' "$MSG_COUNT" "$KEPT" >&2 ;; json) printf '[' first=1 i=0 for f in "$TMP_OUT"/msg_*.bin; do i=$((i+1)) if [ ${#FILTERS[@]} -eq 0 ] || match_filters "$f"; then KEPT=$((KEPT+1)) [ "$first" = "1" ] && first=0 || printf ',' meta=$(cat "${TMP_OUT}/meta_$(printf '%05d' "$i").tsv") tm=$(printf '%s' "$meta" | awk -F'\t' '{print $1}') typ=$(printf '%s' "$meta" | awk -F'\t' '{print $2}') src=$(printf '%s' "$meta" | awk -F'\t' '{print $3}') dst=$(printf '%s' "$meta" | awk -F'\t' '{print $4}') # Replace \r with \n in message content for JSON-safety, then JSON-escape msg_text=$(tr '\r' '\n' < "$f" | jq -Rs .) printf '{"time_ms":%s,"type":"%s","source":"%s","dest":"%s","content":%s}' \ "$tm" "$typ" "$src" "$dst" "$msg_text" fi done printf ']\n' ;; esac