cloverleaf-larry/lib/hl7-desanitize.sh
Bryan Johnson 111be2c744 v0.8.26: harden control-byte sanitize across the tool suite + ssh-helper traps
Shared _sanitize_ctl (unconditional, nc-document) and _sanitize_ctl_tty
(strips only when stdout is a terminal) now live in cygwin-safe.sh. nc-msgs,
nc-parse, and the hl7-* tools route stdout through the tty-gated variant, so a
terminal is protected from raw HL7/NetConfig control bytes while pipes and
redirects stay byte-exact (the 0x1c framing route_test needs is preserved).
Exit codes propagate via PIPESTATUS. ssh-helper _read_hidden installs its
restore trap before stty -echo on every path and saves/restores the prior trap.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 16:35:06 -07:00

102 lines
3.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# hl7-desanitize.sh — reverse hl7-sanitize: replace [[CATEGORY_NNNN]] tokens
# with original values from $LARRY_HOME/sanitize/lookup.tsv.
#
# Use this LOCALLY ONLY — at view time, in your terminal. Never feed
# desanitized output back into Larry; that defeats the whole point.
#
# Usage:
# hl7-desanitize.sh [FILE] # read file or stdin
# hl7-desanitize.sh --table PATH # alternate table
# hl7-desanitize.sh --token [[NAME_0001]] # single token lookup
#
# Examples:
# # View Larry's sanitized output unmasked, in less:
# cat larry-output.txt | hl7-desanitize.sh | less
#
# # Quick single-token lookup:
# hl7-desanitize.sh --token "[[MRN_0001]]"
set -o pipefail
LARRY_HOME="${LARRY_HOME:-$HOME/.larry}"
DEFAULT_TABLE="$LARRY_HOME/sanitize/lookup.tsv"
# v0.8.26: shared control-byte sanitizer. Desanitized HL7 can carry C0 control
# bytes that corrupt a terminal when viewed un-redirected; strip them ONLY when
# stdout is a tty. Piping to `less` (a documented use) is NOT a tty, so the
# content passes through raw and less handles the control bytes itself. See
# lib/cygwin-safe.sh.
_HL7D_LIB_DIR="$(cd "$(dirname "$0")" && pwd)"
if [ -r "$_HL7D_LIB_DIR/cygwin-safe.sh" ]; then
# shellcheck disable=SC1090,SC1091
. "$_HL7D_LIB_DIR/cygwin-safe.sh"
else
_sanitize_ctl_tty() { cat; } # degrade safe: raw passthrough if lib missing
fi
die() { printf 'hl7-desanitize: %s\n' "$*" >&2; exit 1; }
table="$DEFAULT_TABLE"
single_token=""
input_file=""
while [ $# -gt 0 ]; do
case "$1" in
--table) shift; table="$1" ;;
--token) shift; single_token="$1" ;;
-h|--help) sed -n '2,20p' "$0"; exit 0 ;;
-*) die "unknown flag: $1" ;;
*) input_file="$1" ;;
esac
shift
done
[ -f "$table" ] || die "no lookup table at $table (sanitize first?)"
if [ -n "$single_token" ]; then
awk -F'\t' -v t="$single_token" 'NR>1 && $1==t {print $3; found=1; exit} END{if (!found) {print "no such token: " t > "/dev/stderr"; exit 2}}' "$table" | _sanitize_ctl_tty
exit "${PIPESTATUS[0]}"
fi
# Build sed expression set from lookup table
# Each line: token \t category \t original
# We want: s/\[\[CATEGORY_NNNN\]\]/original/g for each
# Note: original may contain sed metacharacters; escape them.
# Read table into awk, build replacement map, walk input substituting tokens.
awk_script='
BEGIN { RS = "\n" }
NR == FNR {
# Reading table
if ($1 == "token" || $1 == "") next
# cols: 1=token, 2=category, 3=original
tokens[$1] = $3
next
}
{
line = $0
# Replace each known token in the line. Tokens look like [[X_NNNN]].
# Find all matches and substitute.
while (match(line, /\[\[[A-Z_]+_[0-9]+\]\]/)) {
tok = substr(line, RSTART, RLENGTH)
if (tok in tokens) {
# Build new line by substring substitution
line = substr(line, 1, RSTART-1) tokens[tok] substr(line, RSTART+RLENGTH)
} else {
# Unknown token — leave it, but skip past so we do not infinite-loop
placeholder = "<<<unmapped:" tok ">>>"
line = substr(line, 1, RSTART-1) placeholder substr(line, RSTART+RLENGTH)
}
}
print line
}
'
if [ -n "$input_file" ]; then
awk -F'\t' "$awk_script" "$table" "$input_file" | _sanitize_ctl_tty
exit "${PIPESTATUS[0]}"
else
awk -F'\t' "$awk_script" "$table" /dev/stdin | _sanitize_ctl_tty
exit "${PIPESTATUS[0]}"
fi