#!/usr/bin/env bash # fetch-safe.sh — content-validating remote fetch for the Larry-Anywhere # installer + auto-updater. # # WHY THIS EXISTS (root cause — see # Deliverables/2026-05-27-cloverleaf-larry-stuck-update-and-tab-bug.md, # Clover #5's diagnosis, "Problem 1"): # # `curl -fsSL` against a Gitea raw-file URL, when the Gitea instance # requires sign-in (or the repo is private), returns the HTML *Sign-In # page* with **HTTP 200** (Gitea answers an unauthenticated raw read with # 303 -> /user/login, and `curl -L` follows it to a 200 HTML page). # `curl -fsSL` only fails on HTTP 4xx/5xx, so it treats this 200-HTML as # SUCCESS. The installer/updater then parses the HTML as VERSION/MANIFEST/ # larry.sh content, finds no valid version, and either silently aborts OR # (worse) overwrites real on-disk files with the HTML soup. # # That exact trap stranded Bryan's work-box at v0.7.3 until the Gitea # `REQUIRE_SIGNIN_VIEW=false` flip. The flip fixed the symptom; this file # fixes the *fragility* — any future private-repo install, Gitea # re-privatization, or auth-gated mirror would hit the same silent trap. # # DESIGN: fail LOUD, never silently corrupt. After every fetch, before the # caller trusts the bytes, we (a) detect the HTML-login-page trap and (b) # validate the content shape per file type. On any failure we print an # actionable error and return non-zero WITHOUT leaving a poisoned file in # place. # # OPTIONAL AUTH: if LARRY_GITEA_TOKEN (or GITEA_TOKEN) is set, fetches add an # `Authorization: token ` header so the updater works against a # private repo without the public-flip. The token value is NEVER logged. # # SOURCING NOTE: this file is the canonical, version-controlled home of these # validators and is listed in MANIFEST so it propagates + stays auditable. # BUT both install-larry.sh (the curl|bash bootstrap, which runs before any # lib/ file exists on disk) and larry.sh's self_update() (which runs before # lib/ is sourced) carry an INLINE, byte-identical copy of these functions so # they work pre-source. When you change a validator here, mirror it in those # two inline blocks (each is fenced with `# >>> fetch-safe inline (keep in # sync with lib/fetch-safe.sh) >>>`). # # Defines functions only; runs no code on source; touches no set -e/-u/-o # pipefail (the caller owns those). Re-sourcing is harmless. # _fs_curl_auth_args — emit the optional Authorization header args on stdout, # one per line, IF a Gitea PAT is present in the environment. Never echoes the # token to a log; the caller splices the lines straight into curl's argv. _fs_curl_auth_args() { local _tok="${LARRY_GITEA_TOKEN:-${GITEA_TOKEN:-}}" # Strip CR (Cygwin/MobaXterm paste can taint an env var with a trailing \r, # which would corrupt the HTTP header line and get the request rejected). _tok="${_tok//$'\r'/}" if [ -n "$_tok" ]; then printf '%s\n' '-H' printf '%s\n' "Authorization: token $_tok" fi } # fetch_validate URL DEST KIND [MAX_TIME] # URL — fully-qualified remote URL to fetch # DEST — local path to write on success (left ABSENT/untouched on failure) # KIND — content-shape contract, one of: # version -> first line must match ^[0-9]+\.[0-9]+\.[0-9]+ # manifest -> newline list of plausible paths, no HTML chars # script -> first line must be `#!/usr/bin/env bash` # sh -> shebang OR at least non-HTML (lib helper files) # text -> just "not the HTML sign-in trap" (default) # MAX_TIME — curl --max-time seconds (default 15) # # Returns 0 and writes DEST only when BOTH the HTML-trap check AND the # content-shape check pass. Returns non-zero (and prints an actionable error) # otherwise, leaving DEST untouched so the caller never overwrites a real file # with garbage. fetch_validate() { local url="$1" dest="$2" kind="${3:-text}" mt="${4:-15}" local tmp hdr code ctype first tmp="$(mktemp 2>/dev/null || echo "${dest}.fs.$$")" hdr="$(mktemp 2>/dev/null || echo "${dest}.fsh.$$")" # Build curl argv. -D dumps response headers so we can inspect Content-Type # and the final HTTP status. -w prints the final code on stdout's tail (we # capture it separately). We deliberately DO follow redirects (-L) so we can # still reach a CDN/mirror that legitimately 301s, but the post-fetch checks # below catch the /user/login HTML landing that the redirect produces. local _args=( -sSL --max-time "$mt" -o "$tmp" -D "$hdr" -w '%{http_code}' ) # Splice optional auth header (read line-by-line to preserve spaces). local _auth_line while IFS= read -r _auth_line; do [ -n "$_auth_line" ] && _args+=( "$_auth_line" ) done < <(_fs_curl_auth_args) code="$(curl "${_args[@]}" "$url" 2>/dev/null)" local rc=$? code="${code//$'\r'/}" # Hard transport failure (curl non-zero, or empty body). if [ "$rc" -ne 0 ] || [ ! -s "$tmp" ]; then rm -f "$tmp" "$hdr" printf 'error: %s — fetch failed (curl rc=%s, empty=%s). Origin unreachable or timed out.\n' \ "$url" "$rc" "$([ -s "$tmp" ] && echo no || echo yes)" >&2 return 1 fi # ── HTML-login-page trap detection (ANY one of these is a hard fail) ────── ctype="$(grep -i '^content-type:' "$hdr" 2>/dev/null | tail -1 | tr -d '\r' | tr 'A-Z' 'a-z')" first="$(head -c 4096 "$tmp" 2>/dev/null | tr -d '\r')" if printf '%s' "$first" | grep -qi 'sign in'; then rm -f "$tmp" "$hdr" _fs_html_trap_error "$url" return 1 fi case "$ctype" in *text/html*) rm -f "$tmp" "$hdr" _fs_html_trap_error "$url" return 1 ;; esac rm -f "$hdr" # ── Content-shape validation per KIND ───────────────────────────────────── local line1 line1="$(head -1 "$tmp" 2>/dev/null | tr -d '\r')" case "$kind" in version) local ver ver="$(printf '%s' "$first" | tr -d '[:space:]')" if ! printf '%s' "$ver" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+'; then rm -f "$tmp" printf 'error: %s — expected a semver VERSION (e.g. 0.8.4), got %s. Not valid file content.\n' \ "$url" "$(_fs_snippet "$tmp" "$first")" >&2 return 1 fi ;; manifest) # Must contain at least one plausible path line and NO HTML angle bracket. if printf '%s' "$first" | grep -q '<'; then rm -f "$tmp" printf 'error: %s — MANIFEST contains HTML markup ("<"), not a path list.\n' "$url" >&2 return 1 fi # v0.8.11: a plausible line is a path token, OPTIONALLY followed by # whitespace + a 64-hex sha256 (the new "pathsha256" format). The # legacy paths-only form still matches (the hash group is optional). The # '<' guard above remains the real HTML-trap defense; this just confirms # the body looks like a manifest, not random text. if ! grep -Eq '^[A-Za-z0-9_][A-Za-z0-9_./-]*([[:space:]]+[0-9a-fA-F]{64})?[[:space:]]*$' "$tmp"; then rm -f "$tmp" printf 'error: %s — MANIFEST has no plausible path line.\n' "$url" >&2 return 1 fi ;; script) if [ "$line1" != '#!/usr/bin/env bash' ]; then rm -f "$tmp" printf 'error: %s — larry.sh must start with `#!/usr/bin/env bash`, got %s.\n' \ "$url" "$(_fs_snippet "$tmp" "$first")" >&2 return 1 fi ;; sh) # A shebang is ideal; at minimum it must not be HTML (already checked). case "$line1" in '#!'*) : ;; *) # Non-shebang .sh (rare) — accept as long as it isn't HTML (above). : ;; esac ;; text|*) : ;; esac # All checks passed — atomically place the validated bytes. mkdir -p "$(dirname "$dest")" 2>/dev/null || true mv "$tmp" "$dest" || { rm -f "$tmp"; printf 'error: cannot write %s\n' "$dest" >&2; return 1; } return 0 } # _fs_html_trap_error URL — print the canonical, actionable HTML-trap error. _fs_html_trap_error() { printf 'error: %s returned an HTML sign-in page, not file content. The Gitea repo is private or the instance requires sign-in. Either (a) make the repo public + set REQUIRE_SIGNIN_VIEW=false, or (b) set LARRY_GITEA_TOKEN= for authenticated fetch.\n' \ "$1" >&2 } # _fs_snippet TMPFILE FALLBACK — a short, single-line, log-safe preview of what # we actually received (first 60 chars), so errors are diagnosable without # dumping a full HTML page. _fs_snippet() { local f="$1" fb="$2" s s="$(head -c 60 "$f" 2>/dev/null | tr -d '\r\n' )" [ -z "$s" ] && s="$fb" printf '"%s..."' "$s" }