cloverleaf-larry/lib/hl7-field.sh
Bryan Johnson e08f030df5 v0.3.0: initial release of Larry-Anywhere
Portable AI agent for Cloverleaf integration work. Pure bash + curl + jq.
Zero dependency on v1 wrapper scripts or v2 cloverleaf-tools.pyz.

27 native Anthropic tools:

NetConfig parsing (read)
  nc_list_protocols, nc_list_processes, nc_protocol_block,
  nc_protocol_field, nc_protocol_nested, nc_protocol_summary,
  nc_destinations, nc_sources, nc_xlate_refs, nc_tclproc_refs

NetConfig modification (journal-backed writes with rollback)
  nc_insert_protocol, nc_add_route, larry_rollback_list

Workflows
  nc_find_inbound, nc_make_jump (3-thread jump pattern), nc_find
  (tbn/tbp/tbh/tbpr/where replacements), nc_document, nc_diff_interface,
  nc_regression

Messages
  hl7_field, nc_msgs (smat is SQLite!), hl7_diff (with --ignore MSH.7)

File system
  read_file, list_dir, grep_files, glob_files, write_file, bash_exec

Validated against a 22-site real Cloverleaf test install. Five worked
examples end-to-end: jump-thread generation, smat MRN search, system
documentation, interface+connected diff, HL7-aware regression diff.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-26 09:46:20 -07:00

123 lines
4.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# hl7-field.sh — extract a specific field from an HL7 v2 message. Native v3.
#
# Field path: SEG[.FIELD[.COMPONENT[.SUBCOMPONENT]]]
# PID — return the whole PID segment
# PID.3 — return PID field 3
# PID.3.1 — return PID field 3, component 1
# PID.3.1.1 — return PID field 3, component 1, subcomponent 1
# MSH.10 — special: MSH numbering accounts for the encoding chars
# (MSH.1 = field separator char, MSH.2 = encoding chars,
# MSH.3+ = subsequent fields).
#
# Repetitions (~ separator) are returned one per line.
#
# Usage:
# hl7-field.sh <path> [message_file] # read message from file or stdin
# echo "$msg" | hl7-field.sh PID.18
# hl7-field.sh PID.18 /tmp/sample.hl7
#
# Exit codes: 0 = found (any number of values printed), 2 = bad path, 3 = not found.
set -u
usage() { sed -n '2,20p' "$0"; exit 0; }
PATH_SPEC="${1:-}"
MSG_FILE="${2:-}"
[ -n "$PATH_SPEC" ] || { usage >&2; exit 2; }
case "$PATH_SPEC" in -h|--help) usage ;; esac
# Read message bytes
if [ -n "$MSG_FILE" ]; then
[ -f "$MSG_FILE" ] || { echo "hl7-field: no such file: $MSG_FILE" >&2; exit 2; }
MSG=$(cat "$MSG_FILE")
else
MSG=$(cat)
fi
[ -n "$MSG" ] || { echo "hl7-field: empty message" >&2; exit 3; }
# Parse path: SEG, optional .FIELD, .COMPONENT, .SUBCOMPONENT
IFS='.' read -r SEG FNUM CNUM SCNUM <<< "$PATH_SPEC"
[ -n "$SEG" ] || { echo "hl7-field: bad path: $PATH_SPEC" >&2; exit 2; }
# Detect encoding characters from MSH
# Standard layout: MSH<F>^~\&<F>... where F is the field-separator (usually |)
# We need the field, component, subcomponent, repetition separators.
FSEP=$(printf '%s' "$MSG" | head -c 4 | cut -c4) # 4th char of MSH segment = field sep
ECH=$(printf '%s' "$MSG" | awk -v FS="$FSEP" '/^MSH/{print $2; exit}')
CSEP="${ECH:0:1}" # ^ — component separator
RSEP="${ECH:1:1}" # ~ — repetition separator
ESC="${ECH:2:1}" # \ — escape character (unused in lookup)
SCSEP="${ECH:3:1}" # & — subcomponent separator
[ -z "$FSEP" ] && FSEP='|'
[ -z "$CSEP" ] && CSEP='^'
[ -z "$RSEP" ] && RSEP='~'
[ -z "$SCSEP" ] && SCSEP='&'
# Find the requested segment. Segments are separated by \r (\x0d).
# Walk segments, emit when SEG matches.
SEGMENT=$(printf '%s' "$MSG" | awk -v RS=$'\r' -v SEG="$SEG" '
$0 ~ ("^" SEG "($|[" FS "])") { print; found=1; exit }
BEGIN { FS="\t" } # value irrelevant — we match the whole record
' 2>/dev/null)
if [ -z "$SEGMENT" ]; then
# Fall back: split by \r in shell (POSIX)
SEGMENT=$(printf '%s' "$MSG" | tr '\r' '\n' | grep -m1 "^${SEG}[${FSEP}\$]" || true)
fi
[ -n "$SEGMENT" ] || exit 3
# If only segment requested, emit and exit
if [ -z "${FNUM:-}" ]; then
printf '%s\n' "$SEGMENT"; exit 0
fi
# Split segment by field separator into array, with MSH special-case
# MSH.1 = the field separator character itself (e.g. "|").
# MSH.2 = encoding chars (e.g. "^~\&").
# MSH.N (N>=3) = field at array index (N-1).
# Non-MSH: SEG.N = field at array index N.
get_field() {
local seg="$1" fnum="$2"
if [ "$SEG" = "MSH" ]; then
if [ "$fnum" = "1" ]; then
printf '%s' "$FSEP"; return
fi
# awk MSH treatment: $1="MSH", $2=encoding ($1 is "MSH", $2 is ECH)
# MSH.N for N >= 2 is awk index N-1 ... wait, MSH.2 = ECH = $2.
# MSH.3 = first real field after ECH = $3
# So MSH.N → awk index N for N >= 2. (Yes: MSH.2=$2, MSH.3=$3, ...)
printf '%s' "$seg" | awk -v FS="$FSEP" -v N="$fnum" '{print $N}'
else
# SEG.N → awk index N+1 (because $1 == SEG name, $2 == field 1, etc.)
printf '%s' "$seg" | awk -v FS="$FSEP" -v N="$fnum" '{print $(N+1)}'
fi
}
FIELD_VAL=$(get_field "$SEGMENT" "$FNUM")
# Split repetitions
if [ -n "$FIELD_VAL" ]; then
printf '%s' "$FIELD_VAL" | awk -v R="$RSEP" -v C="$CSEP" -v S="$SCSEP" \
-v CN="${CNUM:-}" -v SCN="${SCNUM:-}" '
BEGIN { n=split(value, parts, R) }
{ value=$0 }
END {
n = split(value, reps, R)
for (i=1; i<=n; i++) {
v = reps[i]
if (CN != "") {
nc = split(v, comps, C)
v = comps[CN]
if (SCN != "") {
nsc = split(v, subs, S)
v = subs[SCN]
}
}
print v
}
}
'
fi