196 lines
8.6 KiB
Bash
196 lines
8.6 KiB
Bash
#!/usr/bin/env bash
|
|
# fetch-safe.sh — content-validating remote fetch for the Larry-Anywhere
|
|
# installer + auto-updater.
|
|
#
|
|
# WHY THIS EXISTS (root cause — see
|
|
# Deliverables/2026-05-27-cloverleaf-larry-stuck-update-and-tab-bug.md,
|
|
# Clover #5's diagnosis, "Problem 1"):
|
|
#
|
|
# `curl -fsSL` against a Gitea raw-file URL, when the Gitea instance
|
|
# requires sign-in (or the repo is private), returns the HTML *Sign-In
|
|
# page* with **HTTP 200** (Gitea answers an unauthenticated raw read with
|
|
# 303 -> /user/login, and `curl -L` follows it to a 200 HTML page).
|
|
# `curl -fsSL` only fails on HTTP 4xx/5xx, so it treats this 200-HTML as
|
|
# SUCCESS. The installer/updater then parses the HTML as VERSION/MANIFEST/
|
|
# larry.sh content, finds no valid version, and either silently aborts OR
|
|
# (worse) overwrites real on-disk files with the HTML soup.
|
|
#
|
|
# That exact trap stranded Bryan's work-box at v0.7.3 until the Gitea
|
|
# `REQUIRE_SIGNIN_VIEW=false` flip. The flip fixed the symptom; this file
|
|
# fixes the *fragility* — any future private-repo install, Gitea
|
|
# re-privatization, or auth-gated mirror would hit the same silent trap.
|
|
#
|
|
# DESIGN: fail LOUD, never silently corrupt. After every fetch, before the
|
|
# caller trusts the bytes, we (a) detect the HTML-login-page trap and (b)
|
|
# validate the content shape per file type. On any failure we print an
|
|
# actionable error and return non-zero WITHOUT leaving a poisoned file in
|
|
# place.
|
|
#
|
|
# OPTIONAL AUTH: if LARRY_GITEA_TOKEN (or GITEA_TOKEN) is set, fetches add an
|
|
# `Authorization: token <PAT>` header so the updater works against a
|
|
# private repo without the public-flip. The token value is NEVER logged.
|
|
#
|
|
# SOURCING NOTE: this file is the canonical, version-controlled home of these
|
|
# validators and is listed in MANIFEST so it propagates + stays auditable.
|
|
# BUT both install-larry.sh (the curl|bash bootstrap, which runs before any
|
|
# lib/ file exists on disk) and larry.sh's self_update() (which runs before
|
|
# lib/ is sourced) carry an INLINE, byte-identical copy of these functions so
|
|
# they work pre-source. When you change a validator here, mirror it in those
|
|
# two inline blocks (each is fenced with `# >>> fetch-safe inline (keep in
|
|
# sync with lib/fetch-safe.sh) >>>`).
|
|
#
|
|
# Defines functions only; runs no code on source; touches no set -e/-u/-o
|
|
# pipefail (the caller owns those). Re-sourcing is harmless.
|
|
|
|
# _fs_curl_auth_args — emit the optional Authorization header args on stdout,
|
|
# one per line, IF a Gitea PAT is present in the environment. Never echoes the
|
|
# token to a log; the caller splices the lines straight into curl's argv.
|
|
_fs_curl_auth_args() {
|
|
local _tok="${LARRY_GITEA_TOKEN:-${GITEA_TOKEN:-}}"
|
|
# Strip CR (Cygwin/MobaXterm paste can taint an env var with a trailing \r,
|
|
# which would corrupt the HTTP header line and get the request rejected).
|
|
_tok="${_tok//$'\r'/}"
|
|
if [ -n "$_tok" ]; then
|
|
printf '%s\n' '-H'
|
|
printf '%s\n' "Authorization: token $_tok"
|
|
fi
|
|
}
|
|
|
|
# fetch_validate URL DEST KIND [MAX_TIME]
|
|
# URL — fully-qualified remote URL to fetch
|
|
# DEST — local path to write on success (left ABSENT/untouched on failure)
|
|
# KIND — content-shape contract, one of:
|
|
# version -> first line must match ^[0-9]+\.[0-9]+\.[0-9]+
|
|
# manifest -> newline list of plausible paths, no HTML chars
|
|
# script -> first line must be `#!/usr/bin/env bash`
|
|
# sh -> shebang OR at least non-HTML (lib helper files)
|
|
# text -> just "not the HTML sign-in trap" (default)
|
|
# MAX_TIME — curl --max-time seconds (default 15)
|
|
#
|
|
# Returns 0 and writes DEST only when BOTH the HTML-trap check AND the
|
|
# content-shape check pass. Returns non-zero (and prints an actionable error)
|
|
# otherwise, leaving DEST untouched so the caller never overwrites a real file
|
|
# with garbage.
|
|
fetch_validate() {
|
|
local url="$1" dest="$2" kind="${3:-text}" mt="${4:-15}"
|
|
local tmp hdr code ctype first
|
|
tmp="$(mktemp 2>/dev/null || echo "${dest}.fs.$$")"
|
|
hdr="$(mktemp 2>/dev/null || echo "${dest}.fsh.$$")"
|
|
|
|
# Build curl argv. -D dumps response headers so we can inspect Content-Type
|
|
# and the final HTTP status. -w prints the final code on stdout's tail (we
|
|
# capture it separately). We deliberately DO follow redirects (-L) so we can
|
|
# still reach a CDN/mirror that legitimately 301s, but the post-fetch checks
|
|
# below catch the /user/login HTML landing that the redirect produces.
|
|
local _args=( -sSL --max-time "$mt" -o "$tmp" -D "$hdr" -w '%{http_code}' )
|
|
# Splice optional auth header (read line-by-line to preserve spaces).
|
|
local _auth_line
|
|
while IFS= read -r _auth_line; do
|
|
[ -n "$_auth_line" ] && _args+=( "$_auth_line" )
|
|
done < <(_fs_curl_auth_args)
|
|
|
|
code="$(curl "${_args[@]}" "$url" 2>/dev/null)"
|
|
local rc=$?
|
|
code="${code//$'\r'/}"
|
|
|
|
# Hard transport failure (curl non-zero, or empty body).
|
|
if [ "$rc" -ne 0 ] || [ ! -s "$tmp" ]; then
|
|
rm -f "$tmp" "$hdr"
|
|
printf 'error: %s — fetch failed (curl rc=%s, empty=%s). Origin unreachable or timed out.\n' \
|
|
"$url" "$rc" "$([ -s "$tmp" ] && echo no || echo yes)" >&2
|
|
return 1
|
|
fi
|
|
|
|
# ── HTML-login-page trap detection (ANY one of these is a hard fail) ──────
|
|
ctype="$(grep -i '^content-type:' "$hdr" 2>/dev/null | tail -1 | tr -d '\r' | tr 'A-Z' 'a-z')"
|
|
first="$(head -c 4096 "$tmp" 2>/dev/null | tr -d '\r')"
|
|
|
|
if printf '%s' "$first" | grep -qi '<!doctype html\|<html\|sign in - gitea\|<title>sign in'; then
|
|
rm -f "$tmp" "$hdr"
|
|
_fs_html_trap_error "$url"
|
|
return 1
|
|
fi
|
|
case "$ctype" in
|
|
*text/html*)
|
|
rm -f "$tmp" "$hdr"
|
|
_fs_html_trap_error "$url"
|
|
return 1
|
|
;;
|
|
esac
|
|
|
|
rm -f "$hdr"
|
|
|
|
# ── Content-shape validation per KIND ─────────────────────────────────────
|
|
local line1
|
|
line1="$(head -1 "$tmp" 2>/dev/null | tr -d '\r')"
|
|
case "$kind" in
|
|
version)
|
|
local ver
|
|
ver="$(printf '%s' "$first" | tr -d '[:space:]')"
|
|
if ! printf '%s' "$ver" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+'; then
|
|
rm -f "$tmp"
|
|
printf 'error: %s — expected a semver VERSION (e.g. 0.8.4), got %s. Not valid file content.\n' \
|
|
"$url" "$(_fs_snippet "$tmp" "$first")" >&2
|
|
return 1
|
|
fi
|
|
;;
|
|
manifest)
|
|
# Must contain at least one plausible path line and NO HTML angle bracket.
|
|
if printf '%s' "$first" | grep -q '<'; then
|
|
rm -f "$tmp"
|
|
printf 'error: %s — MANIFEST contains HTML markup ("<"), not a path list.\n' "$url" >&2
|
|
return 1
|
|
fi
|
|
# v0.8.11: a plausible line is a path token, OPTIONALLY followed by
|
|
# whitespace + a 64-hex sha256 (the new "path<TAB>sha256" format). The
|
|
# legacy paths-only form still matches (the hash group is optional). The
|
|
# '<' guard above remains the real HTML-trap defense; this just confirms
|
|
# the body looks like a manifest, not random text.
|
|
if ! grep -Eq '^[A-Za-z0-9_][A-Za-z0-9_./-]*([[:space:]]+[0-9a-fA-F]{64})?[[:space:]]*$' "$tmp"; then
|
|
rm -f "$tmp"
|
|
printf 'error: %s — MANIFEST has no plausible path line.\n' "$url" >&2
|
|
return 1
|
|
fi
|
|
;;
|
|
script)
|
|
if [ "$line1" != '#!/usr/bin/env bash' ]; then
|
|
rm -f "$tmp"
|
|
printf 'error: %s — larry.sh must start with `#!/usr/bin/env bash`, got %s.\n' \
|
|
"$url" "$(_fs_snippet "$tmp" "$first")" >&2
|
|
return 1
|
|
fi
|
|
;;
|
|
sh)
|
|
# A shebang is ideal; at minimum it must not be HTML (already checked).
|
|
case "$line1" in
|
|
'#!'*) : ;;
|
|
*)
|
|
# Non-shebang .sh (rare) — accept as long as it isn't HTML (above).
|
|
: ;;
|
|
esac
|
|
;;
|
|
text|*) : ;;
|
|
esac
|
|
|
|
# All checks passed — atomically place the validated bytes.
|
|
mkdir -p "$(dirname "$dest")" 2>/dev/null || true
|
|
mv "$tmp" "$dest" || { rm -f "$tmp"; printf 'error: cannot write %s\n' "$dest" >&2; return 1; }
|
|
return 0
|
|
}
|
|
|
|
# _fs_html_trap_error URL — print the canonical, actionable HTML-trap error.
|
|
_fs_html_trap_error() {
|
|
printf 'error: %s returned an HTML sign-in page, not file content. The Gitea repo is private or the instance requires sign-in. Either (a) make the repo public + set REQUIRE_SIGNIN_VIEW=false, or (b) set LARRY_GITEA_TOKEN=<PAT> for authenticated fetch.\n' \
|
|
"$1" >&2
|
|
}
|
|
|
|
# _fs_snippet TMPFILE FALLBACK — a short, single-line, log-safe preview of what
|
|
# we actually received (first 60 chars), so errors are diagnosable without
|
|
# dumping a full HTML page.
|
|
_fs_snippet() {
|
|
local f="$1" fb="$2" s
|
|
s="$(head -c 60 "$f" 2>/dev/null | tr -d '\r\n' )"
|
|
[ -z "$s" ] && s="$fb"
|
|
printf '"%s..."' "$s"
|
|
}
|