#!/usr/bin/env bash # hl7-field.sh — extract a specific field from an HL7 v2 message. Native v3. # # Field path: SEG[.FIELD[.COMPONENT[.SUBCOMPONENT]]] # Both `.` and `-` are accepted as separators (cheat-sheet flexibility): # PID — return the whole PID segment # PID.3 — return PID field 3 # PID-3 — same as PID.3 # PID.3.1 — return PID field 3, component 1 # PID-3.1 — same as PID.3.1 # PV1-3-4 — PV1 segment, field 3, component 4 # MSH.10 — special: MSH numbering accounts for the encoding chars # (MSH.1 = field separator char, MSH.2 = encoding chars, # MSH.3+ = subsequent fields). # # Repetitions (~ separator) are returned one per line. # # Usage: # hl7-field.sh [message_file] # read message from file or stdin # echo "$msg" | hl7-field.sh PID.18 # hl7-field.sh PID.18 /tmp/sample.hl7 # # Exit codes: 0 = found (any number of values printed), 2 = bad path, 3 = not found. set -u usage() { sed -n '2,20p' "$0"; exit 0; } PATH_SPEC="${1:-}" MSG_FILE="${2:-}" [ -n "$PATH_SPEC" ] || { usage >&2; exit 2; } case "$PATH_SPEC" in -h|--help) usage ;; esac # Read message bytes if [ -n "$MSG_FILE" ]; then [ -f "$MSG_FILE" ] || { echo "hl7-field: no such file: $MSG_FILE" >&2; exit 2; } MSG=$(cat "$MSG_FILE") else MSG=$(cat) fi [ -n "$MSG" ] || { echo "hl7-field: empty message" >&2; exit 3; } # Parse path: SEG, optional .FIELD, .COMPONENT, .SUBCOMPONENT # Accept both `.` and `-` as separators (PID.3.1 == PID-3.1 == PID-3-1 == PID.3-1). # Normalize first separator-after-segment to `.` then split. NORMALIZED=$(printf '%s' "$PATH_SPEC" | sed 's/[.\-]/./g') # Resolve common HL7 field-name aliases. The first token may be an alias like # MRN, NAME, ACCOUNT, VISIT, etc. — translate to its SEG.FIELD form, preserving # any component/subcomponent suffix the user passed. # MRN → PID.3 # NAME.2 → PID.5.2 # account_no.1 → PID.18.1 resolve_hl7_alias() { local norm; norm=$(printf '%s' "$1" | tr '[:lower:]' '[:upper:]' | tr ' ' '_') case "$norm" in MRN|PATIENT_ID|PT_ID|PATIENTID) echo "PID.3" ;; ALT_ID|ALTID|ALT_PATIENT_ID|ALT_PT_ID) echo "PID.4" ;; NAME|PATIENT_NAME|PT_NAME) echo "PID.5" ;; MAIDEN|MOTHER_MAIDEN) echo "PID.6" ;; DOB|BIRTHDATE|BIRTH_DATE|BIRTHDAY) echo "PID.7" ;; SEX|GENDER) echo "PID.8" ;; ALIAS) echo "PID.9" ;; ADDR|ADDRESS|PT_ADDRESS) echo "PID.11" ;; PHONE|HOME_PHONE) echo "PID.13" ;; WORK_PHONE|BUSINESS_PHONE) echo "PID.14" ;; ACCT|ACCOUNT|ACCOUNT_NUMBER|ACCOUNTNUM|ACCT_NUM|ACCOUNT_NO) echo "PID.18" ;; SSN) echo "PID.19" ;; LIC|LICENSE|DRIVER_LICENSE|DL) echo "PID.20" ;; DOD|DEATH_DATE|DATE_OF_DEATH) echo "PID.29" ;; PATIENT_CLASS|PT_CLASS) echo "PV1.2" ;; LOCATION|ASSIGNED_LOCATION|PT_LOCATION|BED_LOCATION) echo "PV1.3" ;; ATTENDING|ATTENDING_DR|ATTENDING_DOCTOR|ATTENDING_PROVIDER) echo "PV1.7" ;; REFERRING|REFERRING_DR|REFERRING_DOCTOR|REFERRING_PROVIDER) echo "PV1.8" ;; CONSULTING|CONSULTING_DR|CONSULTING_DOCTOR) echo "PV1.9" ;; ADMITTING|ADMITTING_DR|ADMITTING_DOCTOR|ADMITTING_PROVIDER) echo "PV1.17" ;; PT_TYPE|PATIENT_TYPE) echo "PV1.18" ;; VISIT|VISIT_NUMBER|VISIT_NO|ENCOUNTER|CSN|ENC|ENC_NUM) echo "PV1.19" ;; ALT_VISIT|ALT_VISIT_ID) echo "PV1.50" ;; EVENT_DT|EVN_DATE) echo "EVN.2" ;; REASON_FOR_EVENT|EVN_REASON) echo "EVN.4" ;; OPERATOR|EVN_OPERATOR|RESPONSIBLE_OPERATOR) echo "EVN.5" ;; CONTROL_ID|MSG_CONTROL_ID|MSG_CTL_ID|CTLID|MSGID|MESSAGE_ID) echo "MSH.10" ;; MSG_TYPE|MESSAGE_TYPE) echo "MSH.9" ;; EVENT|EVENT_CODE|TRIGGER_EVENT|TRIGGER) echo "MSH.9.2" ;; TIMESTAMP|MSG_TIMESTAMP|MSG_TIME|SENT_TIME) echo "MSH.7" ;; SENDING_APP|SENDING_APPLICATION) echo "MSH.3" ;; SENDING_FACILITY|SENDING_FAC) echo "MSH.4" ;; RECEIVING_APP|RECEIVING_APPLICATION) echo "MSH.5" ;; RECEIVING_FACILITY|RECEIVING_FAC) echo "MSH.6" ;; PROCESSING_ID|PROC_ID) echo "MSH.11" ;; VERSION|HL7_VERSION|VER) echo "MSH.12" ;; NK|NK_NAME|NEXT_OF_KIN) echo "NK1.2" ;; NK_RELATIONSHIP|RELATIONSHIP) echo "NK1.3" ;; NK_ADDRESS) echo "NK1.4" ;; NK_PHONE) echo "NK1.5" ;; GUARANTOR|GUARANTOR_NAME|GT_NAME) echo "GT1.4" ;; GUARANTOR_ADDRESS|GT_ADDRESS) echo "GT1.5" ;; GUARANTOR_PHONE|GT_PHONE) echo "GT1.6" ;; GUARANTOR_SSN|GT_SSN) echo "GT1.12" ;; INSURANCE|INSURANCE_PLAN|INS_PLAN) echo "IN1.2" ;; INSURED_NAME|POLICY_HOLDER|INSURED) echo "IN1.16" ;; INSURED_DOB) echo "IN1.17" ;; POLICY|POLICY_NUMBER|INS_POLICY) echo "IN1.36" ;; DIAGNOSIS|DX|DIAGNOSIS_CODE) echo "DG1.3" ;; DIAGNOSIS_DESC|DX_DESC|DIAGNOSIS_DESCRIPTION) echo "DG1.4" ;; PLACER|PLACER_ORDER|ORDER_NUMBER|ORDER_NO) echo "OBR.2" ;; FILLER|FILLER_ORDER) echo "OBR.3" ;; TEST_CODE|UNIVERSAL_SERVICE_ID|SERVICE_CODE) echo "OBR.4" ;; SPECIMEN|SPECIMEN_SOURCE) echo "OBR.15" ;; ORDERING|ORDERING_PROVIDER|ORDERING_DR) echo "OBR.16" ;; OBS_VALUE|RESULT_VALUE|OBX_VALUE) echo "OBX.5" ;; OBS_STATUS|RESULT_STATUS|OBX_STATUS) echo "OBX.11" ;; *) echo "" ;; esac } # Split the normalized path. If the first token is an alias, replace it. IFS='.' read -ra _parts <<< "$NORMALIZED" _first="${_parts[0]:-}" _aliased=$(resolve_hl7_alias "$_first") if [ -n "$_aliased" ]; then # Replace first token with alias expansion; keep remaining components if [ ${#_parts[@]} -gt 1 ]; then NORMALIZED="${_aliased}.$(IFS=. ; echo "${_parts[*]:1}")" else NORMALIZED="$_aliased" fi fi IFS='.' read -r SEG FNUM CNUM SCNUM <<< "$NORMALIZED" [ -n "$SEG" ] || { echo "hl7-field: bad path: $PATH_SPEC" >&2; exit 2; } # Detect encoding characters from MSH # Standard layout: MSH^~\&... where F is the field-separator (usually |) # We need the field, component, subcomponent, repetition separators. FSEP=$(printf '%s' "$MSG" | head -c 4 | cut -c4) # 4th char of MSH segment = field sep ECH=$(printf '%s' "$MSG" | awk -v FS="$FSEP" '/^MSH/{print $2; exit}') CSEP="${ECH:0:1}" # ^ — component separator RSEP="${ECH:1:1}" # ~ — repetition separator ESC="${ECH:2:1}" # \ — escape character (unused in lookup) SCSEP="${ECH:3:1}" # & — subcomponent separator [ -z "$FSEP" ] && FSEP='|' [ -z "$CSEP" ] && CSEP='^' [ -z "$RSEP" ] && RSEP='~' [ -z "$SCSEP" ] && SCSEP='&' # Find the requested segment. Segments are separated by \r (\x0d). # Walk segments, emit when SEG matches. SEGMENT=$(printf '%s' "$MSG" | awk -v RS=$'\r' -v SEG="$SEG" ' $0 ~ ("^" SEG "($|[" FS "])") { print; found=1; exit } BEGIN { FS="\t" } # value irrelevant — we match the whole record ' 2>/dev/null) if [ -z "$SEGMENT" ]; then # Fall back: split by \r in shell (POSIX) SEGMENT=$(printf '%s' "$MSG" | tr '\r' '\n' | grep -m1 "^${SEG}[${FSEP}\$]" || true) fi [ -n "$SEGMENT" ] || exit 3 # If only segment requested, emit and exit if [ -z "${FNUM:-}" ]; then printf '%s\n' "$SEGMENT"; exit 0 fi # Split segment by field separator into array, with MSH special-case # MSH.1 = the field separator character itself (e.g. "|"). # MSH.2 = encoding chars (e.g. "^~\&"). # MSH.N (N>=3) = field at array index (N-1). # Non-MSH: SEG.N = field at array index N. get_field() { local seg="$1" fnum="$2" if [ "$SEG" = "MSH" ]; then if [ "$fnum" = "1" ]; then printf '%s' "$FSEP"; return fi # awk MSH treatment: $1="MSH", $2=encoding ($1 is "MSH", $2 is ECH) # MSH.N for N >= 2 is awk index N-1 ... wait, MSH.2 = ECH = $2. # MSH.3 = first real field after ECH = $3 # So MSH.N → awk index N for N >= 2. (Yes: MSH.2=$2, MSH.3=$3, ...) printf '%s' "$seg" | awk -v FS="$FSEP" -v N="$fnum" '{print $N}' else # SEG.N → awk index N+1 (because $1 == SEG name, $2 == field 1, etc.) printf '%s' "$seg" | awk -v FS="$FSEP" -v N="$fnum" '{print $(N+1)}' fi } FIELD_VAL=$(get_field "$SEGMENT" "$FNUM") # Split repetitions if [ -n "$FIELD_VAL" ]; then printf '%s' "$FIELD_VAL" | awk -v R="$RSEP" -v C="$CSEP" -v S="$SCSEP" \ -v CN="${CNUM:-}" -v SCN="${SCNUM:-}" ' BEGIN { n=split(value, parts, R) } { value=$0 } END { n = split(value, reps, R) for (i=1; i<=n; i++) { v = reps[i] if (CN != "") { nc = split(v, comps, C) v = comps[CN] if (SCN != "") { nsc = split(v, subs, S) v = subs[SCN] } } print v } } ' fi