cloverleaf-larry/lib/phi-presidio-sidecar.py

#!/usr/bin/env python3
"""
larry-anywhere v0.8.2: Microsoft Presidio sidecar for free-text NER.

Closes V1 from Vera's PHI-leak audit (the dominant real-world failure mode —
patient names / addresses / un-keyworded dates in prose chat). Free-text PHI
flows past the v0.7.3 tier-1/2/3/4 classifier because that classifier is
HL7-segment-aware and keyword-driven, not a general entity recognizer.

This sidecar runs Microsoft Presidio (spaCy backend + custom recognizers)
as a persistent FastAPI service on 127.0.0.1:$LARRY_PHI_PORT (default 41189).
Larry's main loop hits it via curl as the LAST tier of auto-PHI detection.

Wire-up:
- lib/phi-sidecar.sh         — bash launcher / health-check / lifecycle
- lib/phi-client.sh          — bash client (phi_redact_text wrapper)
- larry.sh:auto_detect_phi   — calls phi_redact_text as tier-5 (post-explicit-marker,
                               post-tier-1-to-4, before sending input to model)
- install-larry.sh           — offers to pip-install presidio + spacy + en_core_web_sm

Benchmarks (Bryan's Mac, Apple Silicon, en_core_web_sm):
  cold start (model load):  ~9 seconds
  warm latency (P50/P95):   20ms / 22ms (analyzer only)
  HTTP round-trip warm:     ~57ms (curl --unix-socket via TCP fallback)
First request post-startup pays a ~150ms tokenizer-warmup tax; thereafter
within the 200ms-per-turn REPL budget Bryan specified.

Failure mode: if Presidio fails to load (model missing, package broken),
the process exits non-zero. The bash launcher detects this and tells the
user. Larry's tier-5 silently no-ops when the sidecar is unreachable,
preserving v0.8.1 behavior on hosts where Presidio isn't installed.

Compatibility: requires Python 3.9+ (3.14 tested). MobaXterm/Cygwin
compatibility is gated by spaCy's C-extension wheels; if pip install
presidio_analyzer fails on Cygwin, this host stays on v0.8.1 + nudges
per Bryan's accepted tradeoff.
"""
from __future__ import annotations

import os
import sys
import time
import logging

logging.basicConfig(level=logging.WARNING, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("phi-sidecar")

try:
    from fastapi import FastAPI, Body
    from pydantic import BaseModel
    from presidio_analyzer import AnalyzerEngine, Pattern, PatternRecognizer
    from presidio_analyzer.nlp_engine import NlpEngineProvider
    from presidio_anonymizer import AnonymizerEngine
    import uvicorn
except ImportError as e:
    sys.stderr.write(f"phi-sidecar: missing dependency ({e}); install with:\n")
    sys.stderr.write("  pip install presidio_analyzer presidio_anonymizer fastapi uvicorn\n")
    sys.stderr.write("  python -m spacy download en_core_web_sm\n")
    sys.exit(3)

LARRY_PHI_PORT = int(os.environ.get("LARRY_PHI_PORT", "41189"))
LARRY_PHI_HOST = os.environ.get("LARRY_PHI_HOST", "127.0.0.1")
LARRY_PHI_MODEL = os.environ.get("LARRY_PHI_MODEL", "en_core_web_sm")


# Module-scope request model. MUST be module-level, not function-local —
# pydantic v2 + FastAPI introspection treats a closure-defined model as
# query params (the symptom: 'Field required' on a "query" location for
# the body param), which breaks the /redact endpoint silently.
class RedactReq(BaseModel):
    text: str
    score_threshold: float = 0.3  # below this confidence we ignore


def build_analyzer() -> AnalyzerEngine:
    """Load Presidio with en_core_web_sm (small/fast) + HL7-specific custom recognizers."""
    config = {
        "nlp_engine_name": "spacy",
        "models": [{"lang_code": "en", "model_name": LARRY_PHI_MODEL}],
    }
    nlp_engine = NlpEngineProvider(nlp_configuration=config).create_engine()
    analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=["en"])

    # Custom recognizers tuned for HL7/Cloverleaf operator chat. These run
    # IN ADDITION TO Presidio's built-in PII recognizers (PERSON, LOCATION,
    # DATE_TIME, PHONE_NUMBER, US_SSN, EMAIL_ADDRESS, etc.).
    #
    # HL7_MRN: 6-12 digit numeric, looser than NPI's strict 10-digit rule.
    # Catches "check 623000286" prose where the keyword-based tier-2 missed.
    analyzer.registry.add_recognizer(
        PatternRecognizer(
            supported_entity="HL7_MRN",
            patterns=[Pattern("hl7_mrn_6_12", r"\b\d{6,12}\b", 0.30)],
            context=["mrn", "patient", "record", "account", "acct", "visit", "encounter", "csn"],
        )
    )
    # HL7_CARET_NAME: "SMITH^JOHN" / "SMITH^JOHN^Q" pattern outside Tier-3
    # context. The v0.7.3 Tier-3 only fires when PID.3/PID.5/etc. is in the
    # same line; this recognizer catches the caret-name itself.
    analyzer.registry.add_recognizer(
        PatternRecognizer(
            supported_entity="HL7_CARET_NAME",
            patterns=[Pattern("caret_name", r"\b[A-Z][A-Z\-']+\^[A-Z][A-Z\-']+(\^[A-Z][A-Z\-']+)?\b", 0.85)],
        )
    )
    # HL7_BARE_PHONE_10: plain "5551234567" (no dashes/parens) — Tier 1
    # requires formatting. Limit confidence so plain numbers in code stay safe.
    analyzer.registry.add_recognizer(
        PatternRecognizer(
            supported_entity="HL7_PHONE_BARE",
            patterns=[Pattern("phone_10_bare", r"\b[2-9]\d{2}[2-9]\d{6}\b", 0.20)],
            context=["phone", "tel", "telephone", "contact", "cell", "mobile"],
        )
    )
    return analyzer


def main():
    log.warning("loading presidio (this takes ~5-10 seconds the first time)...")
    t0 = time.time()
    analyzer = build_analyzer()
    anonymizer = AnonymizerEngine()
    log.warning(f"presidio ready in {(time.time()-t0)*1000:.0f} ms; listening on {LARRY_PHI_HOST}:{LARRY_PHI_PORT}")

    app = FastAPI(title="larry-phi-sidecar", version="0.8.2")

    @app.post("/redact")
    def redact(req: RedactReq = Body(...)):
        t0 = time.time()
        results = analyzer.analyze(text=req.text, language="en", score_threshold=req.score_threshold)
        anon = anonymizer.anonymize(text=req.text, analyzer_results=results)
        return {
            "redacted": anon.text,
            "entities": [
                {"type": r.entity_type, "start": r.start, "end": r.end, "score": r.score}
                for r in results
            ],
            "latency_ms": (time.time() - t0) * 1000,
        }

    @app.get("/health")
    def health():
        return {"status": "ok", "model": LARRY_PHI_MODEL, "port": LARRY_PHI_PORT}

    uvicorn.run(app, host=LARRY_PHI_HOST, port=LARRY_PHI_PORT, log_level="warning")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        sys.exit(0)