-- Migration: 010_spec016_enrollment.sql -- Purpose: SPEC-016 zero-touch per-site agent enrollment — server-side data model. -- -- Adds the per-site enrollment-key table, a minimal sites table to anchor it, -- and the machine-side columns the collision-gated self-registration flow needs. -- -- Two-tier credential model (SPEC-016 §Security): a low-sensitivity, rotatable, -- per-site ENROLLMENT KEY (the `cek_` secret stored hashed here) gates "may this -- machine register at all", while the high-sensitivity per-machine `cak_` -- operating credential (connect_agent_keys, migration 004) is minted on a -- successful enroll. Compromise of an enrollment key is recovered by rotating one -- site, not a fleet-wide re-key. -- -- DEVIATION FROM SPEC (documented): SPEC-016 §DB-migration describes -- `site_enrollment_keys.site_id` as `fk -> sites`, assuming a sites table already -- exists. It does NOT — in the current schema "site" and "company/organization" are -- free-text columns on connect_machines (migration 005), there is no relational -- sites entity. This migration therefore CREATES a minimal `connect_sites` table -- (the relational anchor the enrollment-key FK and the dashboard per-site key -- display both require) keyed by a natural `site_code` and scoped per-tenant. It is -- intentionally minimal (code + display name + tenant); richer site/company -- modeling is left to future work. The free-text connect_machines.site / -- .organization columns are untouched and continue to carry agent-reported labels. -- -- Idempotent: CREATE TABLE/INDEX IF NOT EXISTS, ADD COLUMN IF NOT EXISTS. Applied on -- server startup by sqlx::migrate!(); never pre-applied via psql. Ordered after 009. -- See .claude/standards/gururmm/sqlx-migrations.md. -- pgcrypto provides gen_random_uuid(); enabled in 001/004 but re-asserted for safety. CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- ============================================================================ -- connect_sites — relational anchor for per-site enrollment (see DEVIATION above) -- ============================================================================ -- A site is the unit a single signed installer targets. `site_code` is the -- non-secret, operator-facing identifier the installer carries and the agent sends -- at /api/enroll (e.g. "ACME-PHX"). Uniqueness is per-tenant: the same human-chosen -- code may legitimately exist in two tenants. tenant_id mirrors the nullable, -- default-tenant-backfilled tenancy column used on every other scoped table -- (migration 004); db::tenancy::current_tenant_id() resolves it for now. CREATE TABLE IF NOT EXISTS connect_sites ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- Operator-facing site identifier the installer carries. Non-secret. site_code TEXT NOT NULL, -- Human-readable site / company display name for the dashboard. display_name TEXT, -- Default company label applied to machines enrolled at this site (mirrors the -- free-text connect_machines.organization the agent otherwise self-reports). company TEXT, -- Tenancy-ready (Phase 4). Backfilled to the default tenant below. tenant_id UUID, -- RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope): -- default 'auto-approve'; a future 'pending-approval' value will gate new -- enrollments. NOT enforced in Phase A — present so the policy SPEC needs no -- schema change. Do not branch on this column yet. enrollment_policy TEXT DEFAULT 'auto-approve', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Per-tenant uniqueness of the natural site_code so /api/enroll can resolve a site -- deterministically within a tenant while the same code may exist across tenants. -- COALESCE keeps the index usable while tenant_id is still nullable (Phase 1). CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_sites_tenant_code ON connect_sites (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), site_code); -- Backfill the sites tenant_id to the default tenant (table is empty on a fresh DB; -- no-op there, but keeps the migration self-consistent). UPDATE connect_sites SET tenant_id = '00000000-0000-0000-0000-000000000001' WHERE tenant_id IS NULL; -- ============================================================================ -- site_enrollment_keys — rotatable, hashed per-site enrollment secret + fingerprint -- ============================================================================ -- Stores ONLY the Argon2id hash of the `cek_` secret; the plaintext is shown once -- at issue/rotate and never recoverable. `version` is the monotonic rotation -- counter; `fingerprint` is the non-secret short hex shown as `vN (XXXX)` in the -- dashboard and baked into the installer filename. `active` marks the current key — -- rotation flips the old key to active=false (blocking NEW enrollments from old -- installers) and inserts a new active row; already-enrolled agents holding their -- own `cak_` are unaffected. Multiple inactive (historical) rows may coexist per -- site; at most one active row is intended (enforced by a partial unique index). CREATE TABLE IF NOT EXISTS site_enrollment_keys ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), site_id UUID NOT NULL REFERENCES connect_sites(id) ON DELETE CASCADE, -- Argon2id hash of the `cek_` enrollment secret. Never the plaintext. key_hash TEXT NOT NULL, -- Monotonic rotation version (1, 2, 3, ...). version INTEGER NOT NULL, -- Non-secret short hex fingerprint code (the XXXX in `vN (XXXX)`), derived from -- the secret. Stored so the dashboard / GET endpoint can show it without the -- secret. fingerprint TEXT NOT NULL, active BOOLEAN NOT NULL DEFAULT true, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- Set when this key is rotated out (active flipped to false). rotated_at TIMESTAMPTZ ); -- Lookup index for the enroll hot path: resolve the active key for a site. CREATE INDEX IF NOT EXISTS idx_site_enrollment_keys_site_active ON site_enrollment_keys (site_id, active); -- At most one ACTIVE enrollment key per site (the "current" installer key). -- Partial unique index so any number of inactive historical rows may coexist. CREATE UNIQUE INDEX IF NOT EXISTS idx_site_enrollment_keys_one_active ON site_enrollment_keys (site_id) WHERE active; -- ============================================================================ -- connect_machines — site binding + enrollment-state collision gate -- ============================================================================ -- machine_uid already exists (migration 008) with a partial UNIQUE index on -- (machine_uid) WHERE machine_uid IS NOT NULL. SPEC-016 §item-1 / resolved-decision #4 -- call for the dedup key to be PER-TENANT — (tenant_id, machine_uid) — so the same -- hardware legitimately present in two tenants stays two rows. tenant_id is the -- scoping column that exists on connect_machines (migration 004); machines have no -- direct site_id today, so site is tracked separately (site_id below) and tenancy is -- the uniqueness scope, exactly as the spec states. -- -- CRITICAL CONSTRAINT (why we ADD rather than REPLACE the 008 index here): -- db::machines::upsert_machine (the live connect-path upsert) uses -- `ON CONFLICT (machine_uid) WHERE machine_uid IS NOT NULL` as its conflict arbiter. -- Postgres matches that arbiter to the EXACT index from migration 008. Dropping that -- index would make the live upsert fail to find an arbiter and error at runtime — -- breaking every un-keyed agent reconnect. So migration 008's global index is LEFT -- IN PLACE (the connect path keeps working unchanged) and the per-tenant index is -- added ALONGSIDE it. In single-tenant Phase 1 the two are equivalent (every row's -- tenant_id is the default tenant), so the per-tenant index adds the SPEC-016 dedup -- semantics without a redundant-uniqueness conflict: a (tenant, uid) pair that is -- unique is also globally unique today. When multi-tenancy activates AND -- upsert_machine's ON CONFLICT is updated to name (tenant_id, machine_uid), a future -- migration drops the global 008 index. Documented as deferred; do not drop it now. -- Optional FK to the site a machine enrolled under (NULL for legacy / support-code -- machines that never enrolled through /api/enroll). A site change on re-enroll is -- the "site move" SPEC-016 audits. ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS site_id UUID REFERENCES connect_sites(id) ON DELETE SET NULL; -- enrollment_state: the collision gate (SPEC-016 §item-1/6). 'active' = live and -- controllable (auto-approve posture); 'pending' = a machine_uid collision was -- detected at enroll and an operator must confirm in the dashboard before the -- endpoint may be controlled. Default 'active' so every legacy/connect-path row is -- unaffected. ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS enrollment_state TEXT NOT NULL DEFAULT 'active' CHECK (enrollment_state IN ('active', 'pending')); -- Per-tenant machine_uid uniqueness (SPEC-016). Added ALONGSIDE migration 008's -- global (machine_uid) index (see CRITICAL CONSTRAINT above — the connect-path -- upsert's ON CONFLICT arbiter binds to the 008 index, which must survive). COALESCE -- folds a NULL tenant_id to the default tenant so the index is well-defined while -- tenancy is single-tenant (Phase 1); the WHERE clause excludes NULL machine_uid so -- legacy un-keyed rows coexist freely. CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_machines_tenant_machine_uid ON connect_machines (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), machine_uid) WHERE machine_uid IS NOT NULL;