v0.7.1: status line moves from above-prompt to between-turn (post-input, pre-response)
render_status_line is no longer called before printf 'you[model]>'. It is
now invoked after read_user_input returns and after @file/PHI preprocessing
complete, immediately before add_user_text/agent_turn. The visual effect is
that the dim status divider sits BETWEEN turns — summarising the cost of
the just-completed turn as the user heads into the next one.
The slash-command and empty-input paths all 'continue' before the new call
site, so no status line renders on /help, /status, /clear, /quit, etc.
First-turn suppression continues to live inside render_status_line (it
returns silently while STATUS_* globals are empty and _LARRY_TURNS=0), so
the very first prompt of a session still has nothing above the response.
/status on-demand command is unchanged; LARRY_NO_STATUS=1 still disables
entirely. Comments updated at render_status_line, the STATUS_* globals
header, the help block, and the LARRY_NO_STATUS env doc.
Supersedes the earlier combined v0.7.1 (af2ffe8). PHI auto-detection and
session-artifact upload are intentionally NOT in this build — this is the
narrow status-line-only v0.7.1 Bryan requested. lib/hl7-sanitize.sh
returns to its v0.7.0 shape (PHONE/EMAIL normalize-value cases + the
normalize-value subcommand are removed because nothing in larry.sh now
calls them).
LARRY_VERSION + VERSION -> 0.7.1.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
af2ffe883c
commit
0927238dcd
543
larry.sh
543
larry.sh
@ -37,7 +37,7 @@
|
|||||||
# /help this help
|
# /help this help
|
||||||
#
|
#
|
||||||
# Env knobs (v0.6.9):
|
# Env knobs (v0.6.9):
|
||||||
# LARRY_NO_STATUS=1 disable the status line above the prompt
|
# LARRY_NO_STATUS=1 disable the between-turn status line
|
||||||
#
|
#
|
||||||
# Inline file syntax: @<path> in any prompt inlines the file's contents
|
# Inline file syntax: @<path> in any prompt inlines the file's contents
|
||||||
# (TAB to autocomplete). See /help for details.
|
# (TAB to autocomplete). See /help for details.
|
||||||
@ -852,369 +852,6 @@ preprocess_phi_markers() {
|
|||||||
printf '%s' "$input"
|
printf '%s' "$input"
|
||||||
}
|
}
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# v0.7.1 — Automatic PHI detection
|
|
||||||
#
|
|
||||||
# Runs BEFORE preprocess_phi_markers (so explicit markers still take precedence)
|
|
||||||
# and BEFORE @file inline expansion has already been done (so file contents
|
|
||||||
# don't get token-walked here — they're tokenized by hl7_sanitize when needed).
|
|
||||||
#
|
|
||||||
# Strategy: walk every whitespace-delimited token and decide one of:
|
|
||||||
# * leave alone (path / URL / already-token / already-marker / timestamp)
|
|
||||||
# * tokenize via hl7-sanitize.sh tokenize-value (same pipeline as manual)
|
|
||||||
#
|
|
||||||
# Bryan's directive: err on the side of caution. We tokenize anything that
|
|
||||||
# *looks* like PHI as long as it doesn't interfere with required canonical
|
|
||||||
# matching. The same tokenize-value pipeline handles normalization, so
|
|
||||||
# different surface forms of the same value share one token across the session
|
|
||||||
# and across sanitized files.
|
|
||||||
#
|
|
||||||
# Modes (env LARRY_AUTO_PHI or /auto-phi slash):
|
|
||||||
# confirm (default) — prompt Y/n on first sighting of a name-like value
|
|
||||||
# aggressive — tokenize every match silently
|
|
||||||
# off — disable auto-detection entirely
|
|
||||||
#
|
|
||||||
# Per-turn override: prepend "!nophi " to skip auto-detection for that turn.
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
# Mode (default confirm). Promoted to AUTO_PHI_MODE so /auto-phi can mutate it.
|
|
||||||
AUTO_PHI_MODE="${LARRY_AUTO_PHI:-confirm}"
|
|
||||||
|
|
||||||
# Per-session memory: declined values (user said "n" to confirm prompt) and
|
|
||||||
# accepted values (cached so we don't re-prompt). Keys are normalized canonical
|
|
||||||
# strings. For bash<4, fall back to two pipe-delimited strings.
|
|
||||||
if (( BASH_VERSINFO[0] >= 4 )); then
|
|
||||||
declare -A AUTO_PHI_ACCEPTED 2>/dev/null
|
|
||||||
declare -A AUTO_PHI_DECLINED 2>/dev/null
|
|
||||||
else
|
|
||||||
AUTO_PHI_ACCEPTED_LIST=""
|
|
||||||
AUTO_PHI_DECLINED_LIST=""
|
|
||||||
fi
|
|
||||||
AUTO_PHI_SESSION_COUNT=0
|
|
||||||
|
|
||||||
# Built-in allowlist of common non-PHI two-word phrases that match the loose
|
|
||||||
# "Title Case Title Case" name pattern. Lowercased + sorted on lookup.
|
|
||||||
# This is intentionally small — confirm-mode catches the rest interactively.
|
|
||||||
_AUTO_PHI_NAME_ALLOWLIST=$(cat <<'EOF'
|
|
||||||
home assistant
|
|
||||||
mac studio
|
|
||||||
mac mini
|
|
||||||
mac pro
|
|
||||||
mac book
|
|
||||||
apple watch
|
|
||||||
apple tv
|
|
||||||
new york
|
|
||||||
los angeles
|
|
||||||
san francisco
|
|
||||||
san diego
|
|
||||||
las vegas
|
|
||||||
united states
|
|
||||||
united kingdom
|
|
||||||
north america
|
|
||||||
south america
|
|
||||||
microsoft office
|
|
||||||
google cloud
|
|
||||||
amazon web
|
|
||||||
visual studio
|
|
||||||
sublime text
|
|
||||||
android studio
|
|
||||||
docker desktop
|
|
||||||
node red
|
|
||||||
linux mint
|
|
||||||
windows server
|
|
||||||
ubuntu server
|
|
||||||
debian linux
|
|
||||||
red hat
|
|
||||||
oracle linux
|
|
||||||
EOF
|
|
||||||
)
|
|
||||||
|
|
||||||
_auto_phi_in_allowlist() {
|
|
||||||
local v_lower
|
|
||||||
v_lower=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]')
|
|
||||||
grep -Fxq -- "$v_lower" <<< "$_AUTO_PHI_NAME_ALLOWLIST"
|
|
||||||
}
|
|
||||||
|
|
||||||
_auto_phi_seen_accepted() {
|
|
||||||
local key="$1"
|
|
||||||
if (( BASH_VERSINFO[0] >= 4 )); then
|
|
||||||
[ -n "${AUTO_PHI_ACCEPTED[$key]:-}" ]
|
|
||||||
else
|
|
||||||
[[ "|$AUTO_PHI_ACCEPTED_LIST|" == *"|$key|"* ]]
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
_auto_phi_seen_declined() {
|
|
||||||
local key="$1"
|
|
||||||
if (( BASH_VERSINFO[0] >= 4 )); then
|
|
||||||
[ -n "${AUTO_PHI_DECLINED[$key]:-}" ]
|
|
||||||
else
|
|
||||||
[[ "|$AUTO_PHI_DECLINED_LIST|" == *"|$key|"* ]]
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
_auto_phi_mark_accepted() {
|
|
||||||
local key="$1"
|
|
||||||
if (( BASH_VERSINFO[0] >= 4 )); then
|
|
||||||
AUTO_PHI_ACCEPTED[$key]=1
|
|
||||||
else
|
|
||||||
AUTO_PHI_ACCEPTED_LIST="${AUTO_PHI_ACCEPTED_LIST}|$key"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
_auto_phi_mark_declined() {
|
|
||||||
local key="$1"
|
|
||||||
if (( BASH_VERSINFO[0] >= 4 )); then
|
|
||||||
AUTO_PHI_DECLINED[$key]=1
|
|
||||||
else
|
|
||||||
AUTO_PHI_DECLINED_LIST="${AUTO_PHI_DECLINED_LIST}|$key"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# _auto_phi_classify VALUE → echoes a category (EMAIL/SSN/PHONE/DOB/MRN/NAME/NAME_LOOSE)
|
|
||||||
# or empty string if the value is not a tokenization candidate.
|
|
||||||
_auto_phi_classify() {
|
|
||||||
local v="$1"
|
|
||||||
[ -z "$v" ] && return 0
|
|
||||||
|
|
||||||
# Already-token format: [[CATEGORY_NNNN]] — leave alone.
|
|
||||||
[[ "$v" =~ ^\[\[[A-Z][A-Z0-9_]*_[0-9]+\]\]$ ]] && return 0
|
|
||||||
|
|
||||||
# Already-marker formats: @@VALUE@@, @@VALUE, {{phi:...}} — manual handles.
|
|
||||||
[[ "$v" == @@* ]] && return 0
|
|
||||||
[[ "$v" == *@@ ]] && return 0
|
|
||||||
[[ "$v" == \{\{phi:* ]] && return 0
|
|
||||||
|
|
||||||
# Path-like — leave alone.
|
|
||||||
case "$v" in
|
|
||||||
/*|./*|../*|~/*) return 0 ;;
|
|
||||||
[A-Z]:\\*) return 0 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# URL-like — leave alone.
|
|
||||||
case "$v" in
|
|
||||||
http://*|https://*|ssh://*|ftp://*|sftp://*|file://*|ws://*|wss://*) return 0 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Strip a single trailing punctuation that is sentence-grammar, not part
|
|
||||||
# of the value. Re-evaluate the cleaned form.
|
|
||||||
local trimmed="$v"
|
|
||||||
case "$trimmed" in
|
|
||||||
*[.,\;:\!\?\)]) trimmed="${trimmed%?}" ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Email-like. Must have exactly one @, dotted domain.
|
|
||||||
if [[ "$trimmed" =~ ^[^@[:space:]]+@[^@[:space:]]+\.[^@[:space:]]+$ ]]; then
|
|
||||||
printf 'EMAIL'; return
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Long-digit timestamp guard FIRST (before phone/SSN/MRN checks). Pure
|
|
||||||
# digits 13+ chars OR 10 chars starting with '1' (epoch seconds / millis).
|
|
||||||
# These would otherwise match the bare-phone or MRN patterns. Leave alone.
|
|
||||||
if [[ "$trimmed" =~ ^[0-9]+$ ]]; then
|
|
||||||
local n="${#trimmed}"
|
|
||||||
if [ "$n" -ge 13 ]; then return 0; fi
|
|
||||||
if [ "$n" -eq 10 ] && [[ "$trimmed" == 1* ]]; then return 0; fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# SSN-like. 9 digits with optional dashes (must total exactly 9 digits).
|
|
||||||
if [[ "$trimmed" =~ ^[0-9]{3}-?[0-9]{2}-?[0-9]{4}$ ]]; then
|
|
||||||
local d="${trimmed//-/}"
|
|
||||||
[ "${#d}" -eq 9 ] && { printf 'SSN'; return; }
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Phone-like. The regex needs to match the FULL token, including a "(212)"
|
|
||||||
# prefix when the next token is "555-1234". We can't see across token
|
|
||||||
# boundaries here, so we accept the most-common single-token forms:
|
|
||||||
# 555-123-4567 5551234567 555.123.4567
|
|
||||||
# 555 123 4567 (212)555-1234 (212)5551234
|
|
||||||
# Multi-token "(212) 555-1234" is reconstructed by the two-token-PHONE
|
|
||||||
# pass below in auto_detect_phi (caller side).
|
|
||||||
if [[ "$trimmed" =~ ^\(?[0-9]{3}\)?[-\.\ ]?[0-9]{3}[-\.\ ]?[0-9]{4}$ ]]; then
|
|
||||||
# Distinguish from pure-digit MRN: 10-digit all-numeric reaches here
|
|
||||||
# too. If trimmed is 10 pure digits starting with '1' we already
|
|
||||||
# returned above (timestamp). Otherwise treat as PHONE.
|
|
||||||
printf 'PHONE'; return
|
|
||||||
fi
|
|
||||||
|
|
||||||
# DOB / date-like.
|
|
||||||
if [[ "$trimmed" =~ ^[0-9]{1,4}[/-][0-9]{1,2}[/-][0-9]{1,4}$ ]]; then
|
|
||||||
printf 'DOB'; return
|
|
||||||
fi
|
|
||||||
|
|
||||||
# MRN-like: pure digits, 6-12 chars (conservative — see spec rule #9).
|
|
||||||
if [[ "$trimmed" =~ ^[0-9]+$ ]]; then
|
|
||||||
local n2="${#trimmed}"
|
|
||||||
if [ "$n2" -ge 6 ] && [ "$n2" -le 12 ]; then
|
|
||||||
printf 'MRN'; return
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Name-like (HL7 carat).
|
|
||||||
if [[ "$trimmed" =~ ^[A-Za-z]+\^[A-Za-z]+ ]]; then
|
|
||||||
printf 'NAME'; return
|
|
||||||
fi
|
|
||||||
|
|
||||||
# The loose "Title Case Title Case" pattern is handled across two whitespace
|
|
||||||
# tokens at the caller level — not classified per-token here.
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# auto_detect_phi INPUT — main entrypoint. Echoes the rewritten input.
|
|
||||||
# Per-turn override: input starting with "!nophi " causes the function to
|
|
||||||
# strip the prefix and return without scanning.
|
|
||||||
auto_detect_phi() {
|
|
||||||
local input="$1"
|
|
||||||
local sanitize_script="$LARRY_LIB_DIR/hl7-sanitize.sh"
|
|
||||||
[ -x "$sanitize_script" ] || { printf '%s' "$input"; return; }
|
|
||||||
|
|
||||||
# Per-turn override.
|
|
||||||
if [[ "$input" == '!nophi '* ]]; then
|
|
||||||
printf '%s' "${input#!nophi }"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
if [ "$AUTO_PHI_MODE" = "off" ]; then
|
|
||||||
printf '%s' "$input"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Build a list of replacements (orig\tcategory\token) so we don't mutate
|
|
||||||
# the string mid-scan (which would invalidate offsets).
|
|
||||||
local -a hits=()
|
|
||||||
|
|
||||||
# Pass A: per-whitespace-token classification.
|
|
||||||
local IFS=$' \t\n' tok
|
|
||||||
local -a tokens
|
|
||||||
read -r -a tokens <<< "$input"
|
|
||||||
local t cat key strip_trailing
|
|
||||||
for t in "${tokens[@]}"; do
|
|
||||||
[ -z "$t" ] && continue
|
|
||||||
# Also split comma-delimited sub-tokens (e.g. "a@b.com,c@d.com").
|
|
||||||
local sub
|
|
||||||
for sub in ${t//,/ }; do
|
|
||||||
[ -z "$sub" ] && continue
|
|
||||||
cat=$(_auto_phi_classify "$sub")
|
|
||||||
[ -z "$cat" ] && continue
|
|
||||||
# Strip trailing sentence-grammar punct for the actual replace string,
|
|
||||||
# but only one char to match classify's behaviour.
|
|
||||||
strip_trailing="$sub"
|
|
||||||
case "$strip_trailing" in
|
|
||||||
*[.,\;:\!\?\)]) strip_trailing="${strip_trailing%?}" ;;
|
|
||||||
esac
|
|
||||||
hits+=("$strip_trailing|$cat")
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
# Pass B: loose "Title Case Title Case" two-word names. Detect using a
|
|
||||||
# tolerant regex over the prose; per Bryan's confirm-first default, every
|
|
||||||
# hit goes through confirm unless mode=aggressive.
|
|
||||||
local i name_pair
|
|
||||||
for ((i=0; i<${#tokens[@]}-1; i++)); do
|
|
||||||
local left="${tokens[$i]}" right="${tokens[$i+1]}"
|
|
||||||
# Strip one trailing punct from right for the test.
|
|
||||||
local right_clean="$right"
|
|
||||||
case "$right_clean" in
|
|
||||||
*[.,\;:\!\?\)]) right_clean="${right_clean%?}" ;;
|
|
||||||
esac
|
|
||||||
if [[ "$left" =~ ^[A-Z][a-z]+$ ]] && [[ "$right_clean" =~ ^[A-Z][a-z]+$ ]]; then
|
|
||||||
name_pair="$left $right_clean"
|
|
||||||
# Allowlist check (case-insensitive).
|
|
||||||
if _auto_phi_in_allowlist "$name_pair"; then
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
hits+=("$name_pair|NAME_LOOSE")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Pass C: two-token phone "(212) 555-1234" or "(212) 5551234" etc. The
|
|
||||||
# single-token classifier can't see across whitespace.
|
|
||||||
local phone_pair
|
|
||||||
for ((i=0; i<${#tokens[@]}-1; i++)); do
|
|
||||||
local p_left="${tokens[$i]}" p_right="${tokens[$i+1]}"
|
|
||||||
# Strip one trailing punct from p_right.
|
|
||||||
case "$p_right" in
|
|
||||||
*[.,\;:\!\?\)]) p_right="${p_right%?}" ;;
|
|
||||||
esac
|
|
||||||
if [[ "$p_left" =~ ^\(?[0-9]{3}\)?$ ]] \
|
|
||||||
&& [[ "$p_right" =~ ^[0-9]{3}[-\.]?[0-9]{4}$ ]]; then
|
|
||||||
phone_pair="$p_left $p_right"
|
|
||||||
hits+=("$phone_pair|PHONE")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# No hits — fast path.
|
|
||||||
[ ${#hits[@]} -eq 0 ] && { printf '%s' "$input"; return 0; }
|
|
||||||
|
|
||||||
# Dedupe hits while preserving order.
|
|
||||||
local -A seen_hits=()
|
|
||||||
local -a uhits=()
|
|
||||||
local h
|
|
||||||
for h in "${hits[@]}"; do
|
|
||||||
if [ -z "${seen_hits[$h]:-}" ]; then
|
|
||||||
seen_hits[$h]=1
|
|
||||||
uhits+=("$h")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Apply each hit: confirm where needed, then tokenize + substitute.
|
|
||||||
local summary=""
|
|
||||||
local mode="$AUTO_PHI_MODE"
|
|
||||||
for h in "${uhits[@]}"; do
|
|
||||||
local orig="${h%|*}"
|
|
||||||
local cat="${h##*|}"
|
|
||||||
local actual_cat="$cat"
|
|
||||||
[ "$cat" = "NAME_LOOSE" ] && actual_cat="NAME"
|
|
||||||
|
|
||||||
# Use canonical normalize for memory key (so "John Smith" / "JOHN SMITH"
|
|
||||||
# share one decision).
|
|
||||||
local mem_key
|
|
||||||
mem_key=$("$sanitize_script" normalize-value "$orig" "$actual_cat" 2>/dev/null) || mem_key="$orig"
|
|
||||||
[ -z "$mem_key" ] && mem_key="$orig"
|
|
||||||
|
|
||||||
# User previously declined this value this session.
|
|
||||||
if _auto_phi_seen_declined "$mem_key"; then continue; fi
|
|
||||||
|
|
||||||
# Confirm-first prompting only for NAME_LOOSE (the high-FP-rate detector).
|
|
||||||
# Strict-format hits (EMAIL/SSN/PHONE/DOB/MRN/NAME-with-caret) are always
|
|
||||||
# tokenized. This matches Bryan's "err on the side of caution" while
|
|
||||||
# keeping confirms rare and high-signal.
|
|
||||||
if [ "$cat" = "NAME_LOOSE" ] && [ "$mode" = "confirm" ] \
|
|
||||||
&& ! _auto_phi_seen_accepted "$mem_key"; then
|
|
||||||
local ans
|
|
||||||
printf '%sphi auto>%s possible PHI detected: "%s". Tokenize? [Y/n] ' \
|
|
||||||
"$C_YELLOW" "$C_RESET" "$orig" >&2
|
|
||||||
IFS= read -r ans </dev/tty 2>/dev/null || ans=""
|
|
||||||
case "$ans" in
|
|
||||||
n|N|no|NO|No) _auto_phi_mark_declined "$mem_key"; continue ;;
|
|
||||||
*) _auto_phi_mark_accepted "$mem_key" ;;
|
|
||||||
esac
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Tokenize.
|
|
||||||
local token
|
|
||||||
token=$("$sanitize_script" tokenize-value --category "$actual_cat" "$orig" 2>/dev/null)
|
|
||||||
[ -z "$token" ] && continue
|
|
||||||
|
|
||||||
# Substitute. Use literal string replacement (all occurrences).
|
|
||||||
input="${input//"$orig"/"$token"}"
|
|
||||||
|
|
||||||
# Build summary line.
|
|
||||||
if [ -z "$summary" ]; then
|
|
||||||
summary="${orig}→${token}"
|
|
||||||
else
|
|
||||||
summary="${summary}, ${orig}→${token}"
|
|
||||||
fi
|
|
||||||
AUTO_PHI_SESSION_COUNT=$((AUTO_PHI_SESSION_COUNT + 1))
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ -n "$summary" ]; then
|
|
||||||
local count
|
|
||||||
count=$(awk -F', ' '{print NF}' <<< "$summary")
|
|
||||||
printf '%sphi auto>%s tokenized %d value(s): %s\n' \
|
|
||||||
"$C_YELLOW" "$C_RESET" "$count" "$summary" >&2
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf '%s' "$input"
|
|
||||||
}
|
|
||||||
|
|
||||||
tool_hl7_sanitize() {
|
tool_hl7_sanitize() {
|
||||||
local input_path="$1" strict="${2:-0}"
|
local input_path="$1" strict="${2:-0}"
|
||||||
_lib_err_if_missing || return
|
_lib_err_if_missing || return
|
||||||
@ -1479,7 +1116,8 @@ _LARRY_TURNS=0
|
|||||||
# Two DIFFERENT parsers needed (easy footgun called out by Pax).
|
# Two DIFFERENT parsers needed (easy footgun called out by Pax).
|
||||||
#
|
#
|
||||||
# STATUS_* globals are updated by _parse_response_headers after every API
|
# STATUS_* globals are updated by _parse_response_headers after every API
|
||||||
# call, then read by render_status_line which is invoked before each prompt.
|
# call, then read by render_status_line which (as of v0.7.1) is invoked
|
||||||
|
# between turns — after the user submits input and before agent_turn runs.
|
||||||
# Empty string = "unknown" — render as "—", never as "0%".
|
# Empty string = "unknown" — render as "—", never as "0%".
|
||||||
STATUS_ctx_used_tokens="" # input + cache_creation + cache_read for LAST turn
|
STATUS_ctx_used_tokens="" # input + cache_creation + cache_read for LAST turn
|
||||||
STATUS_ctx_window="" # from MODEL_CONTEXT_WINDOWS lookup
|
STATUS_ctx_window="" # from MODEL_CONTEXT_WINDOWS lookup
|
||||||
@ -1652,10 +1290,14 @@ _parse_response_headers() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# render_status_line — print the dim status line above the prompt.
|
# render_status_line — print the dim 1-line status footer between turns.
|
||||||
|
# As of v0.7.1 this renders AFTER the user submits input and BEFORE
|
||||||
|
# agent_turn begins (was: above the prompt, v0.6.9–v0.7.0). It now reads
|
||||||
|
# as a "between turns" marker summarising the just-completed turn's cost
|
||||||
|
# heading into the new request.
|
||||||
# Honors LARRY_NO_STATUS=1. Prints nothing if we have no data yet (first
|
# Honors LARRY_NO_STATUS=1. Prints nothing if we have no data yet (first
|
||||||
# turn of a session). Always ends with a trailing newline so the prompt
|
# turn of a session). Always ends with a trailing newline so the next
|
||||||
# lands cleanly below.
|
# stream lands cleanly below.
|
||||||
render_status_line() {
|
render_status_line() {
|
||||||
[ "${LARRY_NO_STATUS:-0}" = "1" ] && return 0
|
[ "${LARRY_NO_STATUS:-0}" = "1" ] && return 0
|
||||||
|
|
||||||
@ -2794,23 +2436,6 @@ Slash commands:
|
|||||||
all collapse to the same token.
|
all collapse to the same token.
|
||||||
Category is auto-detected from value shape (MRN/SSN/DOB/NAME/MANUAL).
|
Category is auto-detected from value shape (MRN/SSN/DOB/NAME/MANUAL).
|
||||||
{{phi:VALUE}} / {{phi:CAT:VALUE}} legacy syntax (still works)
|
{{phi:VALUE}} / {{phi:CAT:VALUE}} legacy syntax (still works)
|
||||||
|
|
||||||
Automatic PHI detection (v0.7.1):
|
|
||||||
Larry now scans every prompt for PHI-shaped values and tokenizes them
|
|
||||||
BEFORE sending to Anthropic. Detects emails, SSNs, phones, dates,
|
|
||||||
MRNs (6-12 pure digits), HL7 caret-names, "Last, First" names, and
|
|
||||||
title-case "John Smith" patterns. Paths, URLs, timestamps, and a small
|
|
||||||
allowlist (Home Assistant, Mac Studio, etc.) are skipped.
|
|
||||||
|
|
||||||
Modes (env LARRY_AUTO_PHI or /auto-phi):
|
|
||||||
confirm default — prompts Y/n on loose name-like matches once per
|
|
||||||
session; explicit-format hits (email/SSN/phone/etc.) are
|
|
||||||
always tokenized
|
|
||||||
aggressive tokenize every match silently
|
|
||||||
off disable auto-detection entirely (manual markers still work)
|
|
||||||
|
|
||||||
Per-turn override: prefix any prompt with "!nophi " to skip the scan
|
|
||||||
for that turn only. Manual @@VALUE / {{phi:VALUE}} markers always win.
|
|
||||||
/redetect re-scan for HCIROOT/HCISITE/tools
|
/redetect re-scan for HCIROOT/HCISITE/tools
|
||||||
/sites list site dirs under HCIROOT
|
/sites list site dirs under HCIROOT
|
||||||
/site <name> switch HCISITE for this session
|
/site <name> switch HCISITE for this session
|
||||||
@ -2834,22 +2459,13 @@ Multi-line input:
|
|||||||
a warning. TAB after @ autocompletes against files in cwd (fzf if installed).
|
a warning. TAB after @ autocompletes against files in cwd (fzf if installed).
|
||||||
|
|
||||||
Status line (v0.6.9, repositioned v0.7.1):
|
Status line (v0.6.9, repositioned v0.7.1):
|
||||||
A dim 1-line summary now prints BELOW each just-completed turn (after the
|
A dim 1-line summary prints between turns — after you submit input and
|
||||||
Larry response, before the next you[...]> prompt) so it stays adjacent
|
before larry's response begins — summarising the just-completed turn:
|
||||||
to the conversation flow:
|
|
||||||
OAuth: ─ ctx 12% (24K/200K) ─ 5h 1.8% reset 19:45 ─ 7d 73.7% reset Mon Jun 2 ─
|
OAuth: ─ ctx 12% (24K/200K) ─ 5h 1.8% reset 19:45 ─ 7d 73.7% reset Mon Jun 2 ─
|
||||||
API key: ─ ctx 12% (24K/200K) ─ $0.213 session ─ 14 turns ─
|
API key: ─ ctx 12% (24K/200K) ─ $0.213 session ─ 14 turns ─
|
||||||
Disable entirely with LARRY_NO_STATUS=1. Force re-display with /status.
|
Disable entirely with LARRY_NO_STATUS=1. Force re-display with /status.
|
||||||
Suppressed automatically on the first turn (no data yet).
|
Suppressed automatically on the first turn (no data yet).
|
||||||
|
|
||||||
Memory upload at session close (v0.7.1):
|
|
||||||
When LARRY_MEMORY_UPLOAD_URL is set, on clean exit Larry POSTs three
|
|
||||||
artifacts to the configured endpoint: $LARRY_HOME/log/headers.log
|
|
||||||
(header-log), $LARRY_HOME/sessions/<id>.log.md (session-log), and
|
|
||||||
<id>.messages.json (session-messages). Each request carries
|
|
||||||
X-Larry-Source, X-Larry-Version, and X-Session-Id headers.
|
|
||||||
Unset = silent skip with a one-line warn at exit.
|
|
||||||
|
|
||||||
TAB completion (v0.6.6/v0.6.7/v0.7.0):
|
TAB completion (v0.6.6/v0.6.7/v0.7.0):
|
||||||
Type '/' followed by any prefix and press TAB.
|
Type '/' followed by any prefix and press TAB.
|
||||||
/h<TAB> → /help
|
/h<TAB> → /help
|
||||||
@ -3004,8 +2620,6 @@ _LARRY_SLASH_CMDS_DESC=(
|
|||||||
[/hl7]="<SEGMENT> print full field list for an HL7 segment (e.g. /hl7 PID)"
|
[/hl7]="<SEGMENT> print full field list for an HL7 segment (e.g. /hl7 PID)"
|
||||||
[/hl7-fields]="<SEG.FIELD> print component breakdown (e.g. /hl7-fields PID.5)"
|
[/hl7-fields]="<SEG.FIELD> print component breakdown (e.g. /hl7-fields PID.5)"
|
||||||
[/mouse]="on|off toggle xterm mouse mode for this session"
|
[/mouse]="on|off toggle xterm mouse mode for this session"
|
||||||
[/auto-phi]="on|off|aggressive|confirm — runtime control for v0.7.1 auto PHI detection"
|
|
||||||
[/auto-phi-status]="show current auto-PHI mode + session tokenization count"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# __larry_complete_slash — bound to TAB via `bind -x` (see _install_readline_tab).
|
# __larry_complete_slash — bound to TAB via `bind -x` (see _install_readline_tab).
|
||||||
@ -3492,83 +3106,8 @@ _uninstall_mouse_mode() {
|
|||||||
printf '\033[?1006l\033[?1000l' 2>/dev/null || true
|
printf '\033[?1006l\033[?1000l' 2>/dev/null || true
|
||||||
_LARRY_MOUSE_ACTIVE=0
|
_LARRY_MOUSE_ACTIVE=0
|
||||||
}
|
}
|
||||||
# Ensure mouse mode is disabled on REPL exit (Ctrl-C, /quit, EOF). The trap
|
# Ensure mouse mode is disabled on REPL exit (Ctrl-C, /quit, EOF). Idempotent.
|
||||||
# itself is registered AFTER the v0.7.1 upload function below, so we can
|
trap '_uninstall_mouse_mode' EXIT INT TERM
|
||||||
# chain mouse-mode teardown after the memory upload in a single trap.
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
# v0.7.1 — session-artifact upload at session close.
|
|
||||||
#
|
|
||||||
# When LARRY_MEMORY_UPLOAD_URL is set, on clean exit we POST the headers.log,
|
|
||||||
# the session log.md, and the messages.json file to the configured endpoint.
|
|
||||||
# Each artifact goes as its own request with distinguishing headers so the
|
|
||||||
# ingest side can route appropriately.
|
|
||||||
#
|
|
||||||
# Bryan's memory pipeline (fswatch + ingest daemon) only sees files on his
|
|
||||||
# Mac; the WORK BOX (MobaXterm/Cygwin) where larry.sh runs is isolated, so
|
|
||||||
# we upload over the existing tailscale/network path.
|
|
||||||
#
|
|
||||||
# Safety:
|
|
||||||
# - headers.log filters to ^anthropic-* / ^retry-after: response headers
|
|
||||||
# only — request auth headers (Authorization / x-api-key) are NEVER
|
|
||||||
# captured into the log at write time (see _parse_response_headers).
|
|
||||||
# - session log.md contains conversation content. By design Bryan uses
|
|
||||||
# PHI markers / auto-PHI, so PHI is already tokenized before reaching
|
|
||||||
# the log. Auth tokens never enter the conversation stream.
|
|
||||||
# - messages.json contains the same token-substituted conversation
|
|
||||||
# content as the log.
|
|
||||||
#
|
|
||||||
# Set LARRY_MEMORY_UPLOAD_URL=<endpoint> (e.g. on proxy.bjnoela.com) to
|
|
||||||
# enable. Unset = silent skip with a one-line warn at session close.
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
|
||||||
_LARRY_UPLOAD_FIRED=0
|
|
||||||
upload_session_artifacts() {
|
|
||||||
# Run once per session (in case both clean exit and EXIT trap fire).
|
|
||||||
[ "$_LARRY_UPLOAD_FIRED" = "1" ] && return 0
|
|
||||||
_LARRY_UPLOAD_FIRED=1
|
|
||||||
|
|
||||||
local url="${LARRY_MEMORY_UPLOAD_URL:-}"
|
|
||||||
if [ -z "$url" ]; then
|
|
||||||
warn "(memory upload skipped: LARRY_MEMORY_UPLOAD_URL not configured)"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
command -v curl >/dev/null 2>&1 || { warn "(memory upload skipped: curl missing)"; return 0; }
|
|
||||||
|
|
||||||
local artifacts=(
|
|
||||||
"$LARRY_HOME/log/headers.log|headers-log|text/plain"
|
|
||||||
"$LOG_FILE|session-log|text/markdown"
|
|
||||||
"$MESSAGES_FILE|session-messages|application/json"
|
|
||||||
)
|
|
||||||
local entry path kind ctype http_code uploaded=0
|
|
||||||
for entry in "${artifacts[@]}"; do
|
|
||||||
path="${entry%%|*}"
|
|
||||||
kind="${entry#*|}"; kind="${kind%%|*}"
|
|
||||||
ctype="${entry##*|}"
|
|
||||||
[ -f "$path" ] || continue
|
|
||||||
[ -s "$path" ] || continue
|
|
||||||
http_code=$(curl -fsS --max-time 15 \
|
|
||||||
-o /dev/null -w '%{http_code}' \
|
|
||||||
-X POST "$url" \
|
|
||||||
-H "Content-Type: $ctype" \
|
|
||||||
-H "X-Larry-Source: $kind" \
|
|
||||||
-H "X-Larry-Version: $LARRY_VERSION" \
|
|
||||||
-H "X-Session-Id: $SESSION_ID" \
|
|
||||||
--data-binary "@$path" 2>/dev/null) || http_code="000"
|
|
||||||
if [ "$http_code" = "200" ] || [ "$http_code" = "201" ] || [ "$http_code" = "202" ] || [ "$http_code" = "204" ]; then
|
|
||||||
uploaded=$((uploaded + 1))
|
|
||||||
else
|
|
||||||
warn "(memory upload: $kind → HTTP $http_code)"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
if [ "$uploaded" -gt 0 ]; then
|
|
||||||
larry_say "memory upload: posted $uploaded artifact(s) to $url"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Fire upload on EXIT trap too (covers Ctrl-C / EOF / kill). The function
|
|
||||||
# is idempotent (_LARRY_UPLOAD_FIRED guard) so the clean-exit call from
|
|
||||||
# main_loop won't double-post.
|
|
||||||
trap 'upload_session_artifacts || true; _uninstall_mouse_mode' EXIT INT TERM
|
|
||||||
|
|
||||||
read_user_input() {
|
read_user_input() {
|
||||||
# Returns user input via global LARRY_INPUT.
|
# Returns user input via global LARRY_INPUT.
|
||||||
@ -3709,14 +3248,8 @@ main_loop() {
|
|||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
local _short; _short=$(model_short_name)
|
local _short; _short=$(model_short_name)
|
||||||
# v0.7.1: status line is rendered AFTER the previous agent_turn (see end
|
# v0.7.1: status line moved from above-prompt to between-turn
|
||||||
# of loop), so it sits BELOW the just-completed prompt cycle / agent
|
# (see render_status_line and the post-input call below).
|
||||||
# response and ABOVE the next prompt. Net visual effect: status reads as
|
|
||||||
# a footer to the most-recent turn. This is "Option B" from the v0.7.1
|
|
||||||
# spec — chosen over cursor-manipulation Option A because `read -e`
|
|
||||||
# (readline) takes exclusive control of the cursor and inserting a
|
|
||||||
# repositioned footer below an active prompt is fragile on MobaXterm /
|
|
||||||
# Cygwin (readline redisplay clobbers manual cursor moves).
|
|
||||||
printf '%syou[%s]>%s ' "$C_GREEN" "$_short" "$C_RESET"
|
printf '%syou[%s]>%s ' "$C_GREEN" "$_short" "$C_RESET"
|
||||||
if ! read_user_input; then
|
if ! read_user_input; then
|
||||||
echo ""; break
|
echo ""; break
|
||||||
@ -3858,33 +3391,6 @@ main_loop() {
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
continue ;;
|
continue ;;
|
||||||
# v0.7.1: auto-PHI runtime control.
|
|
||||||
/auto-phi|/auto-phi\ *)
|
|
||||||
local _arg; _arg=$(_slash_args "/auto-phi" "$input")
|
|
||||||
case "${_arg:-status}" in
|
|
||||||
on|confirm)
|
|
||||||
AUTO_PHI_MODE="confirm"
|
|
||||||
larry_say "auto-phi: confirm (default — prompts on loose name-like matches)"
|
|
||||||
;;
|
|
||||||
aggressive)
|
|
||||||
AUTO_PHI_MODE="aggressive"
|
|
||||||
larry_say "auto-phi: aggressive (tokenizes all candidates silently)"
|
|
||||||
;;
|
|
||||||
off)
|
|
||||||
AUTO_PHI_MODE="off"
|
|
||||||
larry_say "auto-phi: off (explicit markers @@VALUE / {{phi:VALUE}} still work)"
|
|
||||||
;;
|
|
||||||
status)
|
|
||||||
larry_say "auto-phi mode: $AUTO_PHI_MODE (tokenized this session: $AUTO_PHI_SESSION_COUNT)"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
err "usage: /auto-phi on|off|aggressive|confirm (no arg → status)"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
continue ;;
|
|
||||||
/auto-phi-status)
|
|
||||||
larry_say "auto-phi mode: $AUTO_PHI_MODE (tokenized this session: $AUTO_PHI_SESSION_COUNT)"
|
|
||||||
continue ;;
|
|
||||||
/show-last-tool)
|
/show-last-tool)
|
||||||
if [ -z "$_LARRY_LAST_TOOL_NAME" ]; then
|
if [ -z "$_LARRY_LAST_TOOL_NAME" ]; then
|
||||||
err "no tool calls yet this session"
|
err "no tool calls yet this session"
|
||||||
@ -4106,11 +3612,6 @@ EOF
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
# v0.7.1: auto-PHI detection runs BEFORE explicit markers, but the function
|
|
||||||
# itself defers to existing markers (it leaves anything inside @@...@@ or
|
|
||||||
# {{phi:...}} alone). Manual markers still win.
|
|
||||||
input=$(auto_detect_phi "$input")
|
|
||||||
|
|
||||||
# PHI preprocessing: replace any {{phi:VALUE}} markers with local tokens
|
# PHI preprocessing: replace any {{phi:VALUE}} markers with local tokens
|
||||||
# BEFORE the input enters conversation history and gets sent to Anthropic.
|
# BEFORE the input enters conversation history and gets sent to Anthropic.
|
||||||
if [[ "$input" == *"{{phi:"* ]] || [[ "$input" == *"@@"* ]]; then
|
if [[ "$input" == *"{{phi:"* ]] || [[ "$input" == *"@@"* ]]; then
|
||||||
@ -4118,17 +3619,21 @@ EOF
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log_section "user"; log_append "$input"
|
log_section "user"; log_append "$input"
|
||||||
|
# v0.7.1: render the persistent status line BETWEEN turns — after the
|
||||||
|
# user has submitted real (non-slash, non-empty) input and after all
|
||||||
|
# input preprocessing (@file, PHI) is done, but before agent_turn
|
||||||
|
# begins streaming. Slash commands and empty input `continue` above
|
||||||
|
# and never reach this point, matching the "no status in those paths"
|
||||||
|
# rule. First-turn suppression is enforced inside render_status_line
|
||||||
|
# (returns silently when there is no header data yet).
|
||||||
|
render_status_line
|
||||||
add_user_text "$input"
|
add_user_text "$input"
|
||||||
agent_turn "$system_prompt" || warn "turn ended with error"
|
agent_turn "$system_prompt" || warn "turn ended with error"
|
||||||
echo ""
|
echo ""
|
||||||
# v0.7.1: status line below the just-completed prompt cycle. Lives between
|
|
||||||
# turns, immediately above the next prompt. /status forces a re-render.
|
|
||||||
render_status_line
|
|
||||||
done
|
done
|
||||||
|
|
||||||
log_section "session-end"
|
log_section "session-end"
|
||||||
log_append "- end: $(date -Iseconds 2>/dev/null || date)"
|
log_append "- end: $(date -Iseconds 2>/dev/null || date)"
|
||||||
upload_session_artifacts || true
|
|
||||||
larry_say "session log: $LOG_FILE"
|
larry_say "session log: $LOG_FILE"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -202,17 +202,6 @@ normalize_value() {
|
|||||||
# it as an option flag. Same caveat applies for any future tr -d call.
|
# it as an option flag. Same caveat applies for any future tr -d call.
|
||||||
printf '%s' "$value" | tr -d '[:space:]-'
|
printf '%s' "$value" | tr -d '[:space:]-'
|
||||||
;;
|
;;
|
||||||
PHONE)
|
|
||||||
# Strip all non-digits so "(555) 123-4567" and "5551234567" share one
|
|
||||||
# token. Keep digits only.
|
|
||||||
printf '%s' "$value" | tr -cd '[:digit:]'
|
|
||||||
;;
|
|
||||||
EMAIL)
|
|
||||||
# Lowercase + trim. Emails are case-insensitive in the local part per
|
|
||||||
# most providers' practice (RFC technically allows local-part case
|
|
||||||
# sensitivity, but tokenizing as one value is fine for PHI).
|
|
||||||
printf '%s' "$value" | tr '[:upper:]' '[:lower:]' | awk '{$1=$1; print}'
|
|
||||||
;;
|
|
||||||
*)
|
*)
|
||||||
printf '%s' "$value" | awk '{$1=$1; print}'
|
printf '%s' "$value" | awk '{$1=$1; print}'
|
||||||
;;
|
;;
|
||||||
@ -448,17 +437,6 @@ case "$SUB" in
|
|||||||
count) shift; cmd_count "$@" ;;
|
count) shift; cmd_count "$@" ;;
|
||||||
tokenize-value) shift; cmd_tokenize_value "$@" ;;
|
tokenize-value) shift; cmd_tokenize_value "$@" ;;
|
||||||
detokenize-value) shift; cmd_detokenize_value "$@" ;;
|
detokenize-value) shift; cmd_detokenize_value "$@" ;;
|
||||||
normalize-value)
|
|
||||||
# normalize-value VALUE [CATEGORY] — emit canonical form without
|
|
||||||
# tokenizing or touching the table. Used by larry.sh's auto-PHI to
|
|
||||||
# build per-session memory keys.
|
|
||||||
shift
|
|
||||||
nv_val="${1:-}"; nv_cat="${2:-}"
|
|
||||||
[ -n "$nv_val" ] || die "normalize-value needs a VALUE"
|
|
||||||
[ -n "$nv_cat" ] || nv_cat=$(detect_category "$nv_val")
|
|
||||||
normalize_value "$nv_val" "$nv_cat"
|
|
||||||
printf '\n'
|
|
||||||
;;
|
|
||||||
-h|--help) sed -n '2,30p' "$NC_SELF"; exit 0 ;;
|
-h|--help) sed -n '2,30p' "$NC_SELF"; exit 0 ;;
|
||||||
*)
|
*)
|
||||||
# Default = sanitize mode
|
# Default = sanitize mode
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user