diff --git a/Cargo.lock b/Cargo.lock index 58d8190..b352724 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1407,7 +1407,7 @@ dependencies = [ [[package]] name = "guruconnect" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "bytes", @@ -1447,7 +1447,7 @@ dependencies = [ [[package]] name = "guruconnect-server" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "argon2", diff --git a/server/migrations/010_spec016_enrollment.sql b/server/migrations/010_spec016_enrollment.sql new file mode 100644 index 0000000..db95ba9 --- /dev/null +++ b/server/migrations/010_spec016_enrollment.sql @@ -0,0 +1,159 @@ +-- Migration: 010_spec016_enrollment.sql +-- Purpose: SPEC-016 zero-touch per-site agent enrollment — server-side data model. +-- +-- Adds the per-site enrollment-key table, a minimal sites table to anchor it, +-- and the machine-side columns the collision-gated self-registration flow needs. +-- +-- Two-tier credential model (SPEC-016 §Security): a low-sensitivity, rotatable, +-- per-site ENROLLMENT KEY (the `cek_` secret stored hashed here) gates "may this +-- machine register at all", while the high-sensitivity per-machine `cak_` +-- operating credential (connect_agent_keys, migration 004) is minted on a +-- successful enroll. Compromise of an enrollment key is recovered by rotating one +-- site, not a fleet-wide re-key. +-- +-- DEVIATION FROM SPEC (documented): SPEC-016 §DB-migration describes +-- `site_enrollment_keys.site_id` as `fk -> sites`, assuming a sites table already +-- exists. It does NOT — in the current schema "site" and "company/organization" are +-- free-text columns on connect_machines (migration 005), there is no relational +-- sites entity. This migration therefore CREATES a minimal `connect_sites` table +-- (the relational anchor the enrollment-key FK and the dashboard per-site key +-- display both require) keyed by a natural `site_code` and scoped per-tenant. It is +-- intentionally minimal (code + display name + tenant); richer site/company +-- modeling is left to future work. The free-text connect_machines.site / +-- .organization columns are untouched and continue to carry agent-reported labels. +-- +-- Idempotent: CREATE TABLE/INDEX IF NOT EXISTS, ADD COLUMN IF NOT EXISTS. Applied on +-- server startup by sqlx::migrate!(); never pre-applied via psql. Ordered after 009. +-- See .claude/standards/gururmm/sqlx-migrations.md. + +-- pgcrypto provides gen_random_uuid(); enabled in 001/004 but re-asserted for safety. +CREATE EXTENSION IF NOT EXISTS "pgcrypto"; + +-- ============================================================================ +-- connect_sites — relational anchor for per-site enrollment (see DEVIATION above) +-- ============================================================================ +-- A site is the unit a single signed installer targets. `site_code` is the +-- non-secret, operator-facing identifier the installer carries and the agent sends +-- at /api/enroll (e.g. "ACME-PHX"). Uniqueness is per-tenant: the same human-chosen +-- code may legitimately exist in two tenants. tenant_id mirrors the nullable, +-- default-tenant-backfilled tenancy column used on every other scoped table +-- (migration 004); db::tenancy::current_tenant_id() resolves it for now. + +CREATE TABLE IF NOT EXISTS connect_sites ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + -- Operator-facing site identifier the installer carries. Non-secret. + site_code TEXT NOT NULL, + -- Human-readable site / company display name for the dashboard. + display_name TEXT, + -- Default company label applied to machines enrolled at this site (mirrors the + -- free-text connect_machines.organization the agent otherwise self-reports). + company TEXT, + -- Tenancy-ready (Phase 4). Backfilled to the default tenant below. + tenant_id UUID, + -- RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope): + -- default 'auto-approve'; a future 'pending-approval' value will gate new + -- enrollments. NOT enforced in Phase A — present so the policy SPEC needs no + -- schema change. Do not branch on this column yet. + enrollment_policy TEXT DEFAULT 'auto-approve', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Per-tenant uniqueness of the natural site_code so /api/enroll can resolve a site +-- deterministically within a tenant while the same code may exist across tenants. +-- COALESCE keeps the index usable while tenant_id is still nullable (Phase 1). +CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_sites_tenant_code + ON connect_sites (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), site_code); + +-- Backfill the sites tenant_id to the default tenant (table is empty on a fresh DB; +-- no-op there, but keeps the migration self-consistent). +UPDATE connect_sites +SET tenant_id = '00000000-0000-0000-0000-000000000001' +WHERE tenant_id IS NULL; + +-- ============================================================================ +-- site_enrollment_keys — rotatable, hashed per-site enrollment secret + fingerprint +-- ============================================================================ +-- Stores ONLY the Argon2id hash of the `cek_` secret; the plaintext is shown once +-- at issue/rotate and never recoverable. `version` is the monotonic rotation +-- counter; `fingerprint` is the non-secret short hex shown as `vN (XXXX)` in the +-- dashboard and baked into the installer filename. `active` marks the current key — +-- rotation flips the old key to active=false (blocking NEW enrollments from old +-- installers) and inserts a new active row; already-enrolled agents holding their +-- own `cak_` are unaffected. Multiple inactive (historical) rows may coexist per +-- site; at most one active row is intended (enforced by a partial unique index). + +CREATE TABLE IF NOT EXISTS site_enrollment_keys ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + site_id UUID NOT NULL REFERENCES connect_sites(id) ON DELETE CASCADE, + -- Argon2id hash of the `cek_` enrollment secret. Never the plaintext. + key_hash TEXT NOT NULL, + -- Monotonic rotation version (1, 2, 3, ...). + version INTEGER NOT NULL, + -- Non-secret short hex fingerprint code (the XXXX in `vN (XXXX)`), derived from + -- the secret. Stored so the dashboard / GET endpoint can show it without the + -- secret. + fingerprint TEXT NOT NULL, + active BOOLEAN NOT NULL DEFAULT true, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + -- Set when this key is rotated out (active flipped to false). + rotated_at TIMESTAMPTZ +); + +-- Lookup index for the enroll hot path: resolve the active key for a site. +CREATE INDEX IF NOT EXISTS idx_site_enrollment_keys_site_active + ON site_enrollment_keys (site_id, active); + +-- At most one ACTIVE enrollment key per site (the "current" installer key). +-- Partial unique index so any number of inactive historical rows may coexist. +CREATE UNIQUE INDEX IF NOT EXISTS idx_site_enrollment_keys_one_active + ON site_enrollment_keys (site_id) + WHERE active; + +-- ============================================================================ +-- connect_machines — site binding + enrollment-state collision gate +-- ============================================================================ +-- machine_uid already exists (migration 008) with a partial UNIQUE index on +-- (machine_uid) WHERE machine_uid IS NOT NULL. SPEC-016 §item-1 / resolved-decision #4 +-- call for the dedup key to be PER-TENANT — (tenant_id, machine_uid) — so the same +-- hardware legitimately present in two tenants stays two rows. tenant_id is the +-- scoping column that exists on connect_machines (migration 004); machines have no +-- direct site_id today, so site is tracked separately (site_id below) and tenancy is +-- the uniqueness scope, exactly as the spec states. +-- +-- CRITICAL CONSTRAINT (why we ADD rather than REPLACE the 008 index here): +-- db::machines::upsert_machine (the live connect-path upsert) uses +-- `ON CONFLICT (machine_uid) WHERE machine_uid IS NOT NULL` as its conflict arbiter. +-- Postgres matches that arbiter to the EXACT index from migration 008. Dropping that +-- index would make the live upsert fail to find an arbiter and error at runtime — +-- breaking every un-keyed agent reconnect. So migration 008's global index is LEFT +-- IN PLACE (the connect path keeps working unchanged) and the per-tenant index is +-- added ALONGSIDE it. In single-tenant Phase 1 the two are equivalent (every row's +-- tenant_id is the default tenant), so the per-tenant index adds the SPEC-016 dedup +-- semantics without a redundant-uniqueness conflict: a (tenant, uid) pair that is +-- unique is also globally unique today. When multi-tenancy activates AND +-- upsert_machine's ON CONFLICT is updated to name (tenant_id, machine_uid), a future +-- migration drops the global 008 index. Documented as deferred; do not drop it now. + +-- Optional FK to the site a machine enrolled under (NULL for legacy / support-code +-- machines that never enrolled through /api/enroll). A site change on re-enroll is +-- the "site move" SPEC-016 audits. +ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS site_id UUID REFERENCES connect_sites(id) ON DELETE SET NULL; + +-- enrollment_state: the collision gate (SPEC-016 §item-1/6). 'active' = live and +-- controllable (auto-approve posture); 'pending' = a machine_uid collision was +-- detected at enroll and an operator must confirm in the dashboard before the +-- endpoint may be controlled. Default 'active' so every legacy/connect-path row is +-- unaffected. +ALTER TABLE connect_machines + ADD COLUMN IF NOT EXISTS enrollment_state TEXT NOT NULL DEFAULT 'active' + CHECK (enrollment_state IN ('active', 'pending')); + +-- Per-tenant machine_uid uniqueness (SPEC-016). Added ALONGSIDE migration 008's +-- global (machine_uid) index (see CRITICAL CONSTRAINT above — the connect-path +-- upsert's ON CONFLICT arbiter binds to the 008 index, which must survive). COALESCE +-- folds a NULL tenant_id to the default tenant so the index is well-defined while +-- tenancy is single-tenant (Phase 1); the WHERE clause excludes NULL machine_uid so +-- legacy un-keyed rows coexist freely. +CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_machines_tenant_machine_uid + ON connect_machines (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), machine_uid) + WHERE machine_uid IS NOT NULL; diff --git a/server/src/api/enroll.rs b/server/src/api/enroll.rs new file mode 100644 index 0000000..b961e0f --- /dev/null +++ b/server/src/api/enroll.rs @@ -0,0 +1,562 @@ +//! Zero-touch per-site self-registration endpoint (SPEC-016, Phase A). +//! +//! `POST /api/enroll` is the PUBLIC (no-JWT) door a managed agent walks through on +//! first run: it presents its site's `site_code` + the long per-site enrollment +//! key (`cek_`) and its machine-derived `machine_uid`, and the server — if the key +//! verifies — dedups on `(tenant, machine_uid)`, creates or reuses the machine row, +//! and mints the per-machine `cak_` operating credential, returning the plaintext +//! `cak_` exactly once. +//! +//! Two-tier credential model (SPEC-016 §Security): the enrollment key is the +//! low-sensitivity, rotatable, per-site GATE ("may register"); the minted `cak_` is +//! the high-sensitivity, per-machine, independently-revocable OPERATING credential +//! the relay (`relay::validate_agent_api_key`) already accepts. This handler only +//! MINTS a `cak_` in the exact stored form `verify_agent_key` expects (SHA-256 hash +//! in `connect_agent_keys`) — it does not touch the relay auth path. +//! +//! AUTH POSTURE: auto-approve (ScreenConnect parity) — a clean enroll is live and +//! controllable immediately, with the new-enrollment alert as the tripwire. The one +//! exception is a detected `machine_uid` collision, which gates the machine to +//! `enrollment_state = 'pending'` and withholds a usable `cak_` until an operator +//! confirms it in the dashboard. +//! +//! SECURITY: never log the enrollment key, the minted `cak_`, or any hash. The +//! plaintext `cak_` appears only in the success response body, once. + +use std::net::IpAddr; +use std::net::SocketAddr; + +use axum::{ + extract::{ConnectInfo, State}, + http::{HeaderMap, StatusCode}, + Json, +}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use uuid::Uuid; + +use crate::auth::{agent_keys, enrollment_keys}; +use crate::db; +use crate::AppState; + +/// Standard error envelope (see `.claude/standards/api/response-format.md`), +/// matching `api::machine_keys::ApiError`. +#[derive(Debug, Serialize)] +pub struct ApiError { + pub detail: String, + pub error_code: String, + pub status_code: u16, +} + +impl ApiError { + fn new(status: StatusCode, code: &str, detail: &str) -> (StatusCode, Json) { + ( + status, + Json(ApiError { + detail: detail.to_string(), + error_code: code.to_string(), + status_code: status.as_u16(), + }), + ) + } +} + +type ApiResult = Result)>; + +/// Labels an installer carries for the machines it enrolls (SPEC-016 §3). +/// +/// All optional: a thin installer may carry only company/site. `company` -> +/// `connect_machines.organization`; `site` -> `connect_machines.site` (the +/// free-text label, distinct from the relational site binding resolved from +/// `site_code`). `department` / `device_type` are reserved label fields (SPEC-007 +/// AgentStatus parity) — accepted and folded into `tags` for now (no dedicated +/// columns yet), so they are not silently dropped. +#[derive(Debug, Default, Deserialize)] +pub struct EnrollLabels { + #[serde(default)] + pub company: Option, + #[serde(default)] + pub site: Option, + #[serde(default)] + pub department: Option, + #[serde(default)] + pub device_type: Option, + #[serde(default)] + pub tags: Vec, +} + +/// `POST /api/enroll` request body (SPEC-016 §3). +#[derive(Debug, Deserialize)] +pub struct EnrollRequest { + pub site_code: String, + /// The per-site enrollment secret (`cek_`). Verified against the site's active + /// hashed key; never logged. + pub enrollment_key: String, + /// Opaque caller-supplied stable machine identity. Phase A treats this as an + /// opaque string; the hardware-salted derivation is Phase B (agent-side). + pub machine_uid: String, + pub hostname: String, + #[serde(default)] + pub labels: EnrollLabels, +} + +/// `POST /api/enroll` success response. +/// +/// On a clean (active) enroll, `key` carries the plaintext `cak_` ONCE. On a +/// collision-gated `pending` enroll, `key` is `None` and `enrollment_state` is +/// `"pending"` — no usable operating credential is issued until an operator +/// confirms the endpoint in the dashboard. +#[derive(Debug, Serialize)] +pub struct EnrollResponse { + /// `connect_machines.id` for the enrolled machine. + pub machine_id: Uuid, + /// The minted plaintext `cak_`, present ONLY for an active enroll, ONLY here. + #[serde(skip_serializing_if = "Option::is_none")] + pub key: Option, + /// `"active"` (live, controllable, key issued) or `"pending"` (collision-gated; + /// awaiting operator confirmation; no key issued). + pub enrollment_state: String, + /// Disposition: `"new"` | `"reuse"` | `"site_move"` | `"collision_pending"`. + pub disposition: String, +} + +fn require_db(state: &AppState) -> ApiResult<&db::Database> { + state.db.as_ref().ok_or_else(|| { + ApiError::new( + StatusCode::SERVICE_UNAVAILABLE, + "DATABASE_UNAVAILABLE", + "Database not available", + ) + }) +} + +/// Collision-gate heuristic (PROVISIONAL — SPEC-016 §item-1/6). +/// +/// The residual collision case is template-cloned VMs that share a hardware UUID +/// (some hypervisors clone the SMBIOS UUID), so a *different* physical endpoint +/// resolves to an existing `machine_uid`. We cannot distinguish that from a benign +/// re-image purely from a client-asserted uid, so the gate is intentionally +/// CONSERVATIVE and the heuristic is provisional, to be tightened in planning +/// (which durable hardware signals feed the uid, and the hypervisor behavior +/// matrix — see SPEC-016 §Remaining-for-planning). +/// +/// PROVISIONAL heuristic chosen for Phase A: treat it as a collision when the +/// matched existing machine +/// (a) is currently considered ONLINE (status == "online"), AND +/// (b) reports a DIFFERENT hostname than the incoming request, +/// i.e. an apparently-live box already owns this uid yet a second box with a +/// different name is enrolling against it concurrently — the clone signature. A +/// case-insensitive hostname compare avoids false positives from case drift. An +/// OFFLINE matched row (the common re-image / re-install case) is NOT treated as a +/// collision — that is the legitimate reuse path. A same-hostname match is reuse, +/// never a collision. +/// +/// Rationale for conservatism: a false POSITIVE merely sends a real machine to +/// `pending` (an operator clicks confirm — annoying, recoverable); a false NEGATIVE +/// would auto-activate a cloned endpoint (worse). When the salt set is finalized in +/// planning this should become "uid is stable hardware, so a genuine clone is +/// expected to be rare; gate on the salt's distinguishing component instead." +fn is_collision(existing: &db::machines::Machine, incoming_hostname: &str) -> bool { + let online = existing.status.eq_ignore_ascii_case("online"); + let different_host = !existing + .hostname + .eq_ignore_ascii_case(incoming_hostname.trim()); + online && different_host +} + +/// Mint a `cak_`, store its hash bound to `machine_id` + tenant, and return the +/// plaintext. Shared by the new/reuse/move active paths. +async fn mint_cak( + db: &db::Database, + machine_id: Uuid, + tenant_id: Uuid, +) -> ApiResult { + let plaintext = agent_keys::generate_agent_key(); + let key_hash = agent_keys::hash_agent_key(&plaintext); + db::agent_keys::insert_agent_key(db.pool(), machine_id, &key_hash, Some(tenant_id)) + .await + .map_err(|e| { + tracing::error!("[ENROLL] DB error minting agent key: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to mint machine credential", + ) + })?; + Ok(plaintext) +} + +/// `POST /api/enroll` — public self-registration (SPEC-016 §3). +pub async fn enroll( + State(state): State, + ConnectInfo(addr): ConnectInfo, + headers: HeaderMap, + Json(req): Json, +) -> ApiResult<(StatusCode, Json)> { + let db = require_db(&state)?; + let tenant_id = db::tenancy::current_tenant_id(); + + // Real client IP via the shared trusted-proxy-aware extractor (same source the + // relay / rate limiter / audit log use, so the buckets never drift). + let ip: IpAddr = crate::utils::ip_extract::client_ip(&addr, &headers, &state.trusted_proxies); + + // Basic input hygiene before any DB/KDF work. + let site_code = req.site_code.trim(); + let hostname = req.hostname.trim(); + let machine_uid = req.machine_uid.trim(); + if site_code.is_empty() || hostname.is_empty() || machine_uid.is_empty() { + return Err(ApiError::new( + StatusCode::BAD_REQUEST, + "INVALID_REQUEST", + "site_code, hostname, and machine_uid are required", + )); + } + + // Defense-in-depth rate limit / lockout per (site_code, IP). The 256-bit + // enrollment key is the load-bearing gate; this throttles brute-force/abuse. + if !state.rate_limits.enroll.check(site_code, ip) { + tracing::warn!( + "[ENROLL] rate-limited/locked-out enroll for site_code={} from {}", + site_code, + ip + ); + return Err(ApiError::new( + StatusCode::TOO_MANY_REQUESTS, + "RATE_LIMITED", + "Too many enrollment attempts. Please try again later.", + )); + } + + // Resolve the site by code (per-tenant). + let site = match db::sites::get_site_by_code(db.pool(), site_code, tenant_id).await { + Ok(Some(s)) => s, + Ok(None) => { + state.rate_limits.enroll.record_failure(site_code, ip); + audit(db, db::events::EventTypes::ENROLL_REJECTED, ip, json!({ + "reason": "unknown_site_code", + "site_code": site_code, + "machine_uid": machine_uid, + })) + .await; + tracing::warn!("[ENROLL] unknown site_code={} from {}", site_code, ip); + // Same opaque rejection shape as a bad key — do not reveal which of the + // two failed (avoids a site_code enumeration oracle). + return Err(ApiError::new( + StatusCode::UNAUTHORIZED, + "ENROLL_REJECTED", + "Invalid site code or enrollment key", + )); + } + Err(e) => { + tracing::error!("[ENROLL] DB error resolving site: {}", e); + return Err(ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + )); + } + }; + + // Verify the enrollment key against the site's ACTIVE key. A rotated-out (old) + // installer's key is inactive and rejected here — old installers cannot enroll + // NEW machines after rotation (SPEC-016 success-criterion #3). + let active_key = match db::enrollment_keys::get_active_for_site(db.pool(), site.id).await { + Ok(Some(k)) => k, + Ok(None) => { + state.rate_limits.enroll.record_failure(site_code, ip); + audit(db, db::events::EventTypes::ENROLL_REJECTED, ip, json!({ + "reason": "no_active_key", + "site_code": site_code, + "machine_uid": machine_uid, + })) + .await; + tracing::warn!("[ENROLL] no active enrollment key for site_code={}", site_code); + return Err(ApiError::new( + StatusCode::UNAUTHORIZED, + "ENROLL_REJECTED", + "Invalid site code or enrollment key", + )); + } + Err(e) => { + tracing::error!("[ENROLL] DB error loading active enrollment key: {}", e); + return Err(ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + )); + } + }; + + if !enrollment_keys::verify_enrollment_key(&req.enrollment_key, &active_key.key_hash) { + state.rate_limits.enroll.record_failure(site_code, ip); + audit(db, db::events::EventTypes::ENROLL_REJECTED, ip, json!({ + "reason": "bad_enrollment_key", + "site_code": site_code, + "machine_uid": machine_uid, + })) + .await; + tracing::warn!("[ENROLL] bad enrollment key for site_code={} from {}", site_code, ip); + return Err(ApiError::new( + StatusCode::UNAUTHORIZED, + "ENROLL_REJECTED", + "Invalid site code or enrollment key", + )); + } + + // Key verified — this is a legitimate attempt; reset the failure streak. + state.rate_limits.enroll.record_success(site_code, ip); + + // Build the label/identity params shared by the create/update paths. + let tags = effective_tags(&req.labels); + let company = req.labels.company.as_deref().map(str::trim).filter(|s| !s.is_empty()); + let site_label = req + .labels + .site + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()); + + // Dedup on (tenant, machine_uid). + let existing = + match db::machines::get_machine_by_tenant_uid(db.pool(), tenant_id, machine_uid).await { + Ok(e) => e, + Err(e) => { + tracing::error!("[ENROLL] DB error on dedup lookup: {}", e); + return Err(ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + )); + } + }; + + match existing { + // -- Reuse / site-move / collision path ------------------------------- + Some(existing) => { + // Collision gate: a seemingly-different live endpoint resolving to an + // existing uid -> pending, alert, NO usable cak_ minted. + if is_collision(&existing, hostname) { + let params = enroll_params( + &existing.agent_id, + hostname, + machine_uid, + tenant_id, + site.id, + company, + site_label, + &tags, + "pending", + ); + let machine = db::machines::update_enrolled_machine(db.pool(), existing.id, ¶ms) + .await + .map_err(map_update_err)?; + + audit(db, db::events::EventTypes::ENROLL_COLLISION_PENDING, ip, json!({ + "machine_id": machine.id, + "machine_uid": machine_uid, + "site_code": site_code, + "existing_hostname": existing.hostname, + "incoming_hostname": hostname, + "heuristic": "online_existing_with_different_hostname (PROVISIONAL)", + })) + .await; + // TODO(SPEC-016): wire to #dev-alerts — collision requires operator + // confirmation in the dashboard before this endpoint may activate. + tracing::warn!( + "[ENROLL] machine_uid collision -> PENDING: machine_id={} site_code={} \ + existing_host={} incoming_host={} from {}", + machine.id, site_code, existing.hostname, hostname, ip + ); + + return Ok(( + StatusCode::ACCEPTED, + Json(EnrollResponse { + machine_id: machine.id, + key: None, + enrollment_state: "pending".to_string(), + disposition: "collision_pending".to_string(), + }), + )); + } + + // Legitimate reuse (re-image / re-install) or a site move. + let is_move = existing.site_id.map(|s| s != site.id).unwrap_or(true); + let params = enroll_params( + &existing.agent_id, + hostname, + machine_uid, + tenant_id, + site.id, + company, + site_label, + &tags, + "active", + ); + let machine = db::machines::update_enrolled_machine(db.pool(), existing.id, ¶ms) + .await + .map_err(map_update_err)?; + + let cak = mint_cak(db, machine.id, tenant_id).await?; + + if is_move { + audit(db, db::events::EventTypes::ENROLL_SITE_MOVE, ip, json!({ + "machine_id": machine.id, + "machine_uid": machine_uid, + "from_site_id": existing.site_id, + "to_site_code": site_code, + "to_site_id": site.id, + })) + .await; + // TODO(SPEC-016): wire to #dev-alerts — a machine moved between sites. + tracing::info!( + "[ENROLL] site-move: machine_id={} machine_uid present, to site_code={} from {}", + machine.id, site_code, ip + ); + } else { + audit(db, db::events::EventTypes::ENROLL_REUSE, ip, json!({ + "machine_id": machine.id, + "machine_uid": machine_uid, + "site_code": site_code, + })) + .await; + tracing::info!( + "[ENROLL] reuse: machine_id={} re-enrolled at site_code={} from {}", + machine.id, site_code, ip + ); + } + + Ok(( + StatusCode::OK, + Json(EnrollResponse { + machine_id: machine.id, + key: Some(cak), + enrollment_state: "active".to_string(), + disposition: if is_move { "site_move" } else { "reuse" }.to_string(), + }), + )) + } + + // -- New enrollment ---------------------------------------------------- + None => { + // Fresh opaque agent_id for the new row's `agent_id UNIQUE` column. The + // agent's own config-UUID story is Phase B; the server only needs a + // unique non-null value here, and the authoritative identity is the + // minted cak_ -> machine binding. + let agent_id = format!("enroll-{}", Uuid::new_v4()); + let params = enroll_params( + &agent_id, + hostname, + machine_uid, + tenant_id, + site.id, + company, + site_label, + &tags, + "active", + ); + let machine = db::machines::insert_enrolled_machine(db.pool(), ¶ms) + .await + .map_err(|e| { + tracing::error!("[ENROLL] DB error inserting enrolled machine: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to register machine", + ) + })?; + + let cak = mint_cak(db, machine.id, tenant_id).await?; + + audit(db, db::events::EventTypes::ENROLL_NEW, ip, json!({ + "machine_id": machine.id, + "machine_uid": machine_uid, + "site_code": site_code, + "hostname": hostname, + })) + .await; + // TODO(SPEC-016): wire to #dev-alerts — new-enrollment tripwire. + tracing::info!( + "[ENROLL] new: machine_id={} hostname={} site_code={} from {}", + machine.id, hostname, site_code, ip + ); + + Ok(( + StatusCode::CREATED, + Json(EnrollResponse { + machine_id: machine.id, + key: Some(cak), + enrollment_state: "active".to_string(), + disposition: "new".to_string(), + }), + )) + } + } +} + +/// Fold `department` / `device_type` (no dedicated columns yet — SPEC-007) into the +/// tag set as `department:` / `device_type:` so they are preserved rather than +/// dropped, alongside any explicit tags. Empty/whitespace values are skipped. +fn effective_tags(labels: &EnrollLabels) -> Vec { + let mut tags: Vec = labels + .tags + .iter() + .map(|t| t.trim().to_string()) + .filter(|t| !t.is_empty()) + .collect(); + if let Some(d) = labels.department.as_deref().map(str::trim).filter(|s| !s.is_empty()) { + tags.push(format!("department:{}", d)); + } + if let Some(d) = labels + .device_type + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + tags.push(format!("device_type:{}", d)); + } + tags +} + +/// Assemble [`db::machines::EnrollMachineParams`] from the resolved pieces. +#[allow(clippy::too_many_arguments)] +fn enroll_params<'a>( + agent_id: &'a str, + hostname: &'a str, + machine_uid: &'a str, + tenant_id: Uuid, + site_id: Uuid, + company: Option<&'a str>, + site_label: Option<&'a str>, + tags: &'a [String], + enrollment_state: &'a str, +) -> db::machines::EnrollMachineParams<'a> { + db::machines::EnrollMachineParams { + agent_id, + hostname, + machine_uid, + tenant_id, + site_id, + company, + site_label, + tags, + enrollment_state, + } +} + +/// Best-effort enrollment audit write — a failure here never fails the enroll. +async fn audit(db: &db::Database, event_type: &str, ip: IpAddr, details: serde_json::Value) { + if let Err(e) = db::events::log_enrollment_event(db.pool(), event_type, details, Some(ip)).await + { + tracing::warn!("[ENROLL] failed to write {} audit event: {}", event_type, e); + } +} + +/// Map a DB error from the existing-row update to the standard 500 envelope. +fn map_update_err(e: sqlx::Error) -> (StatusCode, Json) { + tracing::error!("[ENROLL] DB error updating enrolled machine: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to update machine", + ) +} diff --git a/server/src/api/mod.rs b/server/src/api/mod.rs index 5d99dbe..3c28950 100644 --- a/server/src/api/mod.rs +++ b/server/src/api/mod.rs @@ -4,10 +4,12 @@ pub mod auth; pub mod auth_logout; pub mod changelog; pub mod downloads; +pub mod enroll; pub mod machine_keys; pub mod releases; pub mod removal; pub mod sessions; +pub mod sites; pub mod users; use axum::{ diff --git a/server/src/api/sites.rs b/server/src/api/sites.rs new file mode 100644 index 0000000..01bdb36 --- /dev/null +++ b/server/src/api/sites.rs @@ -0,0 +1,217 @@ +//! Site enrollment-key administration (SPEC-016, admin plane). +//! +//! Admin (dashboard JWT + admin role) endpoints for the per-site enrollment key +//! the dashboard surfaces and rotates: +//! +//! - `POST /api/sites/:id/enrollment-key/rotate` — regenerate the `cek_` secret, +//! bump the monotonic version, derive a new fingerprint, deactivate the prior +//! active key, and return the plaintext + fingerprint ONCE. Old installers can no +//! longer enroll NEW machines after this; already-enrolled agents (holding their +//! own `cak_`) are unaffected (SPEC-016 success-criterion #3). Doubles as +//! first-issue when a site has no key yet. +//! - `GET /api/sites/:id/enrollment-key` — read the CURRENT non-secret fingerprint +//! + version (never the secret). 404 if the site has no active key yet. +//! +//! Auth mirrors `api::machine_keys`: the [`crate::auth::AdminUser`] extractor gates +//! both routes, and they are mounted behind the JWT `auth_layer`. +//! +//! SECURITY: the plaintext `cek_` is returned exactly once (rotate response), +//! never persisted in plaintext and never logged. Read responses expose only the +//! version + fingerprint. + +use axum::{ + extract::{Path, State}, + http::StatusCode, + Json, +}; +use serde::Serialize; +use uuid::Uuid; + +use crate::auth::{enrollment_keys, AdminUser}; +use crate::db; +use crate::AppState; + +/// Standard error envelope (matches `api::machine_keys::ApiError`). +#[derive(Debug, Serialize)] +pub struct ApiError { + pub detail: String, + pub error_code: String, + pub status_code: u16, +} + +impl ApiError { + fn new(status: StatusCode, code: &str, detail: &str) -> (StatusCode, Json) { + ( + status, + Json(ApiError { + detail: detail.to_string(), + error_code: code.to_string(), + status_code: status.as_u16(), + }), + ) + } +} + +type ApiResult = Result)>; + +/// Response for a freshly rotated/issued enrollment key. `key` is present ONLY +/// here, once. +#[derive(Debug, Serialize)] +pub struct RotatedEnrollmentKey { + pub site_id: Uuid, + /// The plaintext `cek_` enrollment key. Shown exactly once — bake it into the + /// site installer now; the server keeps only its hash. + pub key: String, + /// Monotonic rotation version. + pub version: i32, + /// The non-secret short hex code (the `XXXX` in `vN (XXXX)`). + pub fingerprint: String, + /// Fully rendered operator-facing fingerprint, e.g. `v3 (7F2A)`. + pub fingerprint_label: String, +} + +/// Non-secret current-key view for the GET endpoint. +#[derive(Debug, Serialize)] +pub struct EnrollmentKeyView { + pub site_id: Uuid, + pub version: i32, + pub fingerprint: String, + pub fingerprint_label: String, + pub active: bool, +} + +fn require_db(state: &AppState) -> ApiResult<&db::Database> { + state.db.as_ref().ok_or_else(|| { + ApiError::new( + StatusCode::SERVICE_UNAVAILABLE, + "DATABASE_UNAVAILABLE", + "Database not available", + ) + }) +} + +/// Resolve a site by its UUID path segment, or a 404 envelope. +async fn resolve_site(db: &db::Database, site_id: Uuid) -> ApiResult { + db::sites::get_site_by_id(db.pool(), site_id) + .await + .map_err(|e| { + tracing::error!("DB error resolving site: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + ) + })? + .ok_or_else(|| ApiError::new(StatusCode::NOT_FOUND, "SITE_NOT_FOUND", "Site not found")) +} + +/// POST /api/sites/:id/enrollment-key/rotate — rotate (or first-issue) a site's +/// enrollment key. Returns the plaintext `cek_` + fingerprint once. +pub async fn rotate_enrollment_key( + AdminUser(admin): AdminUser, + State(state): State, + Path(site_id): Path, +) -> ApiResult<(StatusCode, Json)> { + let db = require_db(&state)?; + let site = resolve_site(db, site_id).await?; + + // Mint plaintext + Argon2id hash + fingerprint. Only the hash + fingerprint + // are persisted; the plaintext is surfaced once below. + let plaintext = enrollment_keys::generate_enrollment_key(); + let key_hash = enrollment_keys::hash_enrollment_key(&plaintext).map_err(|e| { + tracing::error!("Failed to hash enrollment key: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to hash enrollment key", + ) + })?; + let fingerprint = enrollment_keys::compute_fingerprint(&plaintext); + + let new_key = db::enrollment_keys::rotate_key(db.pool(), site.id, &key_hash, &fingerprint) + .await + .map_err(|e| { + tracing::error!("DB error rotating enrollment key: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to rotate enrollment key", + ) + })?; + + let fingerprint_label = + enrollment_keys::render_fingerprint(new_key.version, &new_key.fingerprint); + + // Audit WITHOUT key material (no plaintext, no hash). + if let Err(e) = db::events::log_enrollment_event( + db.pool(), + db::events::EventTypes::ENROLLMENT_KEY_ROTATED, + serde_json::json!({ + "site_id": site.id, + "site_code": site.site_code, + "version": new_key.version, + "fingerprint": new_key.fingerprint, + "rotated_by": admin.username, + }), + None, + ) + .await + { + tracing::warn!("[ENROLL] failed to write key-rotate audit event: {}", e); + } + tracing::info!( + "Admin {} rotated enrollment key for site {} to {}", + admin.username, + site.site_code, + fingerprint_label + ); + + Ok(( + StatusCode::CREATED, + Json(RotatedEnrollmentKey { + site_id: site.id, + key: plaintext, + version: new_key.version, + fingerprint: new_key.fingerprint, + fingerprint_label, + }), + )) +} + +/// GET /api/sites/:id/enrollment-key — current non-secret fingerprint + version. +pub async fn get_enrollment_key( + AdminUser(_admin): AdminUser, + State(state): State, + Path(site_id): Path, +) -> ApiResult> { + let db = require_db(&state)?; + let site = resolve_site(db, site_id).await?; + + let key = db::enrollment_keys::get_active_for_site(db.pool(), site.id) + .await + .map_err(|e| { + tracing::error!("DB error loading enrollment key: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + ) + })? + .ok_or_else(|| { + ApiError::new( + StatusCode::NOT_FOUND, + "NO_ENROLLMENT_KEY", + "Site has no active enrollment key", + ) + })?; + + let fingerprint_label = enrollment_keys::render_fingerprint(key.version, &key.fingerprint); + + Ok(Json(EnrollmentKeyView { + site_id: site.id, + version: key.version, + fingerprint: key.fingerprint, + fingerprint_label, + active: key.active, + })) +} diff --git a/server/src/auth/enrollment_keys.rs b/server/src/auth/enrollment_keys.rs new file mode 100644 index 0000000..2b63b38 --- /dev/null +++ b/server/src/auth/enrollment_keys.rs @@ -0,0 +1,188 @@ +//! Per-site enrollment key minting, hashing, verification, and fingerprinting +//! (SPEC-016 zero-touch enrollment, auth layer). +//! +//! This is the low-sensitivity, rotatable side of the two-tier credential model +//! (SPEC-016 §Security). A per-site ENROLLMENT key (`cek_` prefix) gates "may +//! this machine register at all" at `POST /api/enroll`; a successful enroll mints +//! the high-sensitivity per-machine `cak_` operating credential +//! ([`crate::auth::agent_keys`]). Compromise of an enrollment key is contained to +//! one site and recovered by rotating it. +//! +//! Lifecycle owned here (the secret side): +//! +//! - [`generate_enrollment_key`] mints a high-entropy, `cek_`-prefixed plaintext +//! secret. Mirrors [`crate::auth::agent_keys::generate_agent_key`]'s entropy +//! approach (32 random bytes from the OS CSPRNG, hex-encoded) with a DISTINCT +//! prefix so the two key kinds are never confused in logs or storage. The +//! plaintext is shown to the operator exactly once at issue/rotate and is NEVER +//! persisted or logged. +//! - [`hash_enrollment_key`] / [`verify_enrollment_key`] use **Argon2id** (via +//! [`crate::auth::password`]). This DIFFERS from `cak_` (which uses SHA-256 for +//! a constant-shape equality lookup): SPEC-016 §2 explicitly requires the +//! enrollment key be "stored hashed (Argon2id, same as `cak_`/passwords)". The +//! trade-off is deliberate — enrollment keys are looked up by `(site, active)` +//! first (a small candidate set, usually one row) and only then verified, so the +//! per-verify KDF cost is bounded and not on a high-QPS path, while Argon2id +//! gives salted, GPU-resistant storage matching the password posture. +//! - [`compute_fingerprint`] derives the non-secret short HEX code shown as +//! `vN (XXXX)` (SPEC-016 resolved-decision #3 — hex, deliberately NOT the +//! GuruRMM word-style code, so the two products' artifacts are never visually +//! conflated). +//! +//! SECURITY: never log a plaintext key or its hash. Functions here return the +//! plaintext to the caller (issue/rotate endpoint) but emit no `tracing` output +//! containing key material. + +use anyhow::Result; +use rand::RngCore; +use ring::digest; + +/// Prefix marking a GuruConnect per-site enrollment key. Distinct from the +/// per-agent `cak_` prefix so the two key kinds are never confused. +pub const ENROLLMENT_KEY_PREFIX: &str = "cek_"; + +/// Number of random bytes behind an enrollment key (256 bits of entropy), matching +/// [`crate::auth::agent_keys`]. SPEC-016 §2 requires ≥256-bit. +const ENROLLMENT_KEY_RANDOM_BYTES: usize = 32; + +/// Number of hex characters in the fingerprint code (the `XXXX` in `vN (XXXX)`). +/// Four hex chars = 16 bits — ample to let an operator tell two installers apart at +/// a glance; it is a non-secret display aid, not a security control. +const FINGERPRINT_HEX_LEN: usize = 4; + +/// Generate a new high-entropy, `cek_`-prefixed per-site enrollment key (plaintext). +/// +/// The returned string is the ONLY time the plaintext exists; the caller must +/// surface it to the operator once and store only [`hash_enrollment_key`] of it. +/// Uses the OS CSPRNG via `rand::rngs::OsRng`. +pub fn generate_enrollment_key() -> String { + let mut bytes = [0u8; ENROLLMENT_KEY_RANDOM_BYTES]; + rand::rngs::OsRng.fill_bytes(&mut bytes); + format!("{}{}", ENROLLMENT_KEY_PREFIX, hex_encode(&bytes)) +} + +/// Hash an enrollment key for storage using Argon2id (SPEC-016 §2). +/// +/// Delegates to [`crate::auth::password::hash_password`] so the KDF parameters and +/// salt generation match the password posture exactly. Returns the PHC-format +/// string Postgres stores in `site_enrollment_keys.key_hash`. +pub fn hash_enrollment_key(plaintext: &str) -> Result { + crate::auth::password::hash_password(plaintext) +} + +/// Verify a presented enrollment key against a stored Argon2id hash. +/// +/// Returns `Ok(true)` on a match. A malformed stored hash or a mismatch yields +/// `Ok(false)` / an `Err` from the underlying verifier; the caller treats any +/// non-`Ok(true)` as a rejection. A cheap structural reject (`cek_` prefix) runs +/// first to skip the KDF on obviously-bogus input. +/// +/// SECURITY: only compares; never logs the presented key or the hash. +pub fn verify_enrollment_key(presented: &str, stored_hash: &str) -> bool { + if !presented.starts_with(ENROLLMENT_KEY_PREFIX) { + return false; + } + crate::auth::password::verify_password(presented, stored_hash).unwrap_or(false) +} + +/// Compute the non-secret short HEX fingerprint code for an enrollment key. +/// +/// Derived as the first [`FINGERPRINT_HEX_LEN`] hex chars of the SHA-256 of the +/// plaintext secret, uppercased. This is a stable, non-reversible tag of the secret +/// (knowing the code does not reveal the key) used purely for display. Pair it with +/// the monotonic version via [`render_fingerprint`]. +pub fn compute_fingerprint(plaintext: &str) -> String { + let d = digest::digest(&digest::SHA256, plaintext.as_bytes()); + let hex = hex_encode(d.as_ref()); + hex[..FINGERPRINT_HEX_LEN].to_ascii_uppercase() +} + +/// Render the operator-facing fingerprint string `vN (XXXX)` (SPEC-016 §2). +/// +/// `version` is the monotonic rotation counter; `code` is [`compute_fingerprint`]. +/// Example: `render_fingerprint(3, "7F2A")` -> `"v3 (7F2A)"`. +pub fn render_fingerprint(version: i32, code: &str) -> String { + format!("v{} ({})", version, code) +} + +/// Lowercase hex encoding without pulling in the `hex` crate (mirrors +/// [`crate::auth::agent_keys`]). +fn hex_encode(bytes: &[u8]) -> String { + use std::fmt::Write; + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + let _ = write!(s, "{:02x}", b); + } + s +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn generated_key_is_prefixed_and_high_entropy() { + let key = generate_enrollment_key(); + assert!(key.starts_with(ENROLLMENT_KEY_PREFIX)); + assert_eq!( + key.len(), + ENROLLMENT_KEY_PREFIX.len() + ENROLLMENT_KEY_RANDOM_BYTES * 2 + ); + } + + #[test] + fn generated_keys_are_unique() { + assert_ne!(generate_enrollment_key(), generate_enrollment_key()); + } + + #[test] + fn hash_and_verify_roundtrip() { + let key = generate_enrollment_key(); + let hash = hash_enrollment_key(&key).expect("hash"); + assert!(verify_enrollment_key(&key, &hash)); + } + + #[test] + fn verify_rejects_wrong_key() { + let key = generate_enrollment_key(); + let other = generate_enrollment_key(); + let hash = hash_enrollment_key(&key).expect("hash"); + assert!(!verify_enrollment_key(&other, &hash)); + } + + #[test] + fn verify_rejects_unprefixed_input_without_touching_kdf() { + let key = generate_enrollment_key(); + let hash = hash_enrollment_key(&key).expect("hash"); + // A value lacking the cek_ prefix is structurally rejected before the KDF. + assert!(!verify_enrollment_key("not-a-key", &hash)); + } + + #[test] + fn verify_rejects_malformed_stored_hash() { + let key = generate_enrollment_key(); + // A garbage stored hash must not panic and must reject. + assert!(!verify_enrollment_key(&key, "not-a-phc-hash")); + } + + #[test] + fn fingerprint_is_stable_uppercase_hex_of_expected_len() { + let key = "cek_deadbeef"; + let f1 = compute_fingerprint(key); + let f2 = compute_fingerprint(key); + assert_eq!(f1, f2); + assert_eq!(f1.len(), FINGERPRINT_HEX_LEN); + assert!(f1.chars().all(|c| c.is_ascii_hexdigit())); + assert_eq!(f1, f1.to_ascii_uppercase()); + } + + #[test] + fn fingerprint_differs_per_key() { + assert_ne!(compute_fingerprint("cek_aaa"), compute_fingerprint("cek_bbb")); + } + + #[test] + fn render_fingerprint_matches_spec_shape() { + assert_eq!(render_fingerprint(3, "7F2A"), "v3 (7F2A)"); + } +} diff --git a/server/src/auth/mod.rs b/server/src/auth/mod.rs index 8cebc3d..28da8ae 100644 --- a/server/src/auth/mod.rs +++ b/server/src/auth/mod.rs @@ -4,6 +4,7 @@ //! validation for agents. pub mod agent_keys; +pub mod enrollment_keys; pub mod jwt; pub mod password; pub mod token_blacklist; diff --git a/server/src/db/enrollment_keys.rs b/server/src/db/enrollment_keys.rs new file mode 100644 index 0000000..96c27bd --- /dev/null +++ b/server/src/db/enrollment_keys.rs @@ -0,0 +1,142 @@ +//! Per-site enrollment key database operations (SPEC-016 zero-touch enrollment). +//! +//! Backs the `site_enrollment_keys` table (migration 010). Stores ONLY the +//! Argon2id hash of the `cek_` secret plus the non-secret rotation metadata +//! (version, fingerprint, active flag). Computing the hash and minting the +//! plaintext is [`crate::auth::enrollment_keys`]'s job; this module is +//! hash-agnostic persistence and takes already-hashed values. +//! +//! Rotation invariant: at most one `active` row per site (enforced by a partial +//! unique index in migration 010). [`rotate_key`] deactivates the current active +//! row and inserts a new active one inside a single transaction so the invariant +//! is never transiently violated. +//! +//! All queries use runtime `sqlx::query()` / `sqlx::query_as()` per the codebase +//! convention (no compile-time `query!` macros, no `.sqlx` offline cache). + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::PgPool; +use uuid::Uuid; + +/// Per-site enrollment key record. +/// +/// `key_hash` is the only representation of the secret the server stores; the +/// plaintext is shown once at issue/rotate and never persisted. +#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)] +pub struct EnrollmentKey { + pub id: Uuid, + pub site_id: Uuid, + pub key_hash: String, + pub version: i32, + pub fingerprint: String, + pub active: bool, + pub created_at: DateTime, + pub rotated_at: Option>, +} + +/// Fetch the active enrollment key for a site, if any. +/// +/// This is the `/api/enroll` hot path: resolve the one active key whose hash the +/// presented `cek_` is verified against. The partial unique index guarantees at +/// most one active row, so `fetch_optional` is correct. +pub async fn get_active_for_site( + pool: &PgPool, + site_id: Uuid, +) -> Result, sqlx::Error> { + sqlx::query_as::<_, EnrollmentKey>( + r#" + SELECT id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at + FROM site_enrollment_keys + WHERE site_id = $1 AND active + "#, + ) + .bind(site_id) + .fetch_optional(pool) + .await +} + +/// Insert the FIRST enrollment key for a site at version 1 (initial issue). +/// +/// Use [`rotate_key`] for subsequent rotations. Errors with a unique violation if +/// the site already has an active key (the caller should rotate instead). +#[allow(dead_code)] // Wired by site-admin issue flow; Phase A exposes rotation (which also covers first issue when none exists). +pub async fn insert_initial_key( + pool: &PgPool, + site_id: Uuid, + key_hash: &str, + fingerprint: &str, +) -> Result { + sqlx::query_as::<_, EnrollmentKey>( + r#" + INSERT INTO site_enrollment_keys (site_id, key_hash, version, fingerprint, active) + VALUES ($1, $2, 1, $3, true) + RETURNING id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at + "#, + ) + .bind(site_id) + .bind(key_hash) + .bind(fingerprint) + .fetch_one(pool) + .await +} + +/// Rotate a site's enrollment key (SPEC-016 §2): deactivate the current active key +/// (if any) and insert a new active key at the next monotonic version, all in one +/// transaction. +/// +/// Returns the newly-created active key. If the site has no key yet, this issues +/// version 1 (so rotation also serves as first-issue). The caller passes the +/// already-hashed new secret and its fingerprint; the plaintext is surfaced once by +/// the caller and never reaches this layer. +/// +/// The transaction is what keeps the "at most one active key per site" invariant +/// (partial unique index) from being transiently violated between the UPDATE and +/// the INSERT. +pub async fn rotate_key( + pool: &PgPool, + site_id: Uuid, + new_key_hash: &str, + new_fingerprint: &str, +) -> Result { + let mut tx = pool.begin().await?; + + // Highest existing version for this site (NULL -> 0 so the first key is v1). + let current_max: Option = sqlx::query_scalar( + "SELECT MAX(version) FROM site_enrollment_keys WHERE site_id = $1", + ) + .bind(site_id) + .fetch_one(&mut *tx) + .await?; + let next_version = current_max.unwrap_or(0) + 1; + + // Deactivate the current active key (if any), stamping rotated_at. + sqlx::query( + r#" + UPDATE site_enrollment_keys + SET active = false, rotated_at = NOW() + WHERE site_id = $1 AND active + "#, + ) + .bind(site_id) + .execute(&mut *tx) + .await?; + + // Insert the new active key at the next version. + let new_key = sqlx::query_as::<_, EnrollmentKey>( + r#" + INSERT INTO site_enrollment_keys (site_id, key_hash, version, fingerprint, active) + VALUES ($1, $2, $3, $4, true) + RETURNING id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at + "#, + ) + .bind(site_id) + .bind(new_key_hash) + .bind(next_version) + .bind(new_fingerprint) + .fetch_one(&mut *tx) + .await?; + + tx.commit().await?; + Ok(new_key) +} diff --git a/server/src/db/events.rs b/server/src/db/events.rs index 8d14aff..f42fb39 100644 --- a/server/src/db/events.rs +++ b/server/src/db/events.rs @@ -69,6 +69,29 @@ impl EventTypes { pub const MACHINE_REMOVED: &'static str = "machine_removed"; /// An administrator soft-deleted (purged) a session and dropped it in-memory. pub const SESSION_REMOVED: &'static str = "session_removed"; + + // Zero-touch enrollment events (SPEC-016). Written by POST /api/enroll and the + // site enrollment-key rotation endpoint. These carry no session, so they are + // logged via `log_enrollment_event` with `session_id = NULL`; the structured + // detail (machine_uid, site_code, fingerprint, etc.) goes in `details` and the + // source IP in `ip_address`. + /// A new machine self-registered at a site and was minted its first `cak_`. + pub const ENROLL_NEW: &'static str = "enroll_new"; + /// An existing machine_uid re-enrolled at the SAME site — the row was reused and + /// a fresh `cak_` minted (re-image / re-install). + pub const ENROLL_REUSE: &'static str = "enroll_reuse"; + /// An existing machine_uid enrolled under a DIFFERENT site — the machine's site + /// binding was updated (a "site move"). Fires an alert. + pub const ENROLL_SITE_MOVE: &'static str = "enroll_site_move"; + /// A machine_uid collision was detected at enroll — the endpoint dropped to + /// `pending` and awaits operator confirmation in the dashboard. Fires an alert. + pub const ENROLL_COLLISION_PENDING: &'static str = "enroll_collision_pending"; + /// An enroll attempt failed enrollment-key verification (wrong/inactive key or + /// unknown site_code). Security audit trail for the open-registration surface. + pub const ENROLL_REJECTED: &'static str = "enroll_rejected"; + /// An administrator rotated a site's enrollment key (new version + fingerprint; + /// old installers can no longer enroll NEW machines). + pub const ENROLLMENT_KEY_ROTATED: &'static str = "enrollment_key_rotated"; } /// Log a session event @@ -154,6 +177,42 @@ pub async fn log_admin_removal( Ok(result) } +/// Log a zero-touch enrollment audit event (SPEC-016). +/// +/// Shares the `connect_session_events` audit table but carries no session +/// (`session_id = NULL`, the FK column is nullable) and no viewer — enrollment is +/// an unauthenticated agent action, not a viewer/session event. The structured +/// detail (machine_uid, site_code, fingerprint version, decision, etc.) goes in +/// `details` and the agent's source IP in `ip_address`. +/// +/// Best-effort: a failure to write the audit row must NOT fail the enroll (the +/// machine row and `cak_` already exist); the caller logs the error and proceeds, +/// matching how the relay and Task-5 removal treat audit writes. +pub async fn log_enrollment_event( + pool: &PgPool, + event_type: &str, + details: JsonValue, + ip_address: Option, +) -> Result { + let ip_str = ip_address.map(|ip| ip.to_string()); + + let result = sqlx::query_scalar::<_, i64>( + r#" + INSERT INTO connect_session_events + (session_id, event_type, viewer_id, viewer_name, details, ip_address) + VALUES (NULL, $1, NULL, NULL, $2, $3::inet) + RETURNING id + "#, + ) + .bind(event_type) + .bind(details) + .bind(ip_str) + .fetch_one(pool) + .await?; + + Ok(result) +} + /// Get events for a session #[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/ pub async fn get_session_events( diff --git a/server/src/db/machines.rs b/server/src/db/machines.rs index 901260b..51c3609 100644 --- a/server/src/db/machines.rs +++ b/server/src/db/machines.rs @@ -64,6 +64,16 @@ pub struct Machine { /// history) is retained. NULL = live. Nullable, so it is read NULL-tolerantly /// in the manual `FromRow` below. pub deleted_at: Option>, + /// Relational site binding for a machine enrolled via `/api/enroll` (SPEC-016, + /// migration 010). NULL for legacy / support-code / connect-path machines that + /// never enrolled through the zero-touch flow. A change of this on re-enroll is + /// the "site move" the enroll path audits. + pub site_id: Option, + /// Collision-gate state (SPEC-016, migration 010): `'active'` (live, auto-approve) + /// or `'pending'` (a machine_uid collision was detected at enroll; awaiting + /// operator confirmation before the endpoint may be controlled). Non-null with a + /// default of `'active'`; read NULL-tolerantly below for defense in depth. + pub enrollment_state: String, } impl<'r> FromRow<'r, PgRow> for Machine { @@ -83,6 +93,13 @@ impl<'r> FromRow<'r, PgRow> for Machine { machine_uid: row.try_get("machine_uid")?, // Schema-nullable (migration 009); decode directly as Option. deleted_at: row.try_get("deleted_at")?, + // Schema-nullable (migration 010); decode directly as Option. + site_id: row.try_get("site_id")?, + // Non-null with default 'active' (migration 010); read NULL-tolerantly + // (older snapshots / partial rows) and fall back to 'active'. + enrollment_state: row + .try_get::, _>("enrollment_state")? + .unwrap_or_else(|| "active".to_string()), // Nullable-with-default columns mapped to non-`Option` Rust types: read as // `Option` and fall back to the type default so a NULL cell never errors. is_elevated: row @@ -207,6 +224,131 @@ pub async fn upsert_machine( } } +/// Find a machine by the SPEC-016 per-tenant dedup key `(tenant_id, machine_uid)`. +/// +/// This is the enroll-time dedup lookup: the same hardware re-enrolling (re-image / +/// re-install) resolves to its existing row within the tenant, while the same +/// hardware in a DIFFERENT tenant is a distinct row (resolved-decision #4). Tenant +/// scoping uses the same default-tenant fold as the unique index so the lookup +/// matches the uniqueness guarantee. +/// +/// Unlike `get_machine_by_agent_id`, this deliberately does NOT filter +/// `deleted_at IS NULL`: a previously operator-purged machine that legitimately +/// re-enrolls must be found so the enroll path can revive it (clearing +/// `deleted_at`), mirroring the connect-path revive in `upsert_machine`. +pub async fn get_machine_by_tenant_uid( + pool: &PgPool, + tenant_id: Uuid, + machine_uid: &str, +) -> Result, sqlx::Error> { + sqlx::query_as::<_, Machine>( + r#" + SELECT * FROM connect_machines + WHERE machine_uid = $1 + AND COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid) = $2 + "#, + ) + .bind(machine_uid) + .bind(tenant_id) + .fetch_optional(pool) + .await +} + +/// Parameters for an enroll-time machine create/update (SPEC-016 `/api/enroll`). +/// +/// `agent_id` is a freshly minted opaque id for a NEW enrollment (the agent's +/// config UUID story is Phase B; the server only needs a unique non-null value for +/// the `agent_id UNIQUE` column). On REUSE/MOVE the existing row's `agent_id` is +/// preserved (the FK target of any already-minted `cak_`), so the update path does +/// not touch it. +pub struct EnrollMachineParams<'a> { + pub agent_id: &'a str, + pub hostname: &'a str, + pub machine_uid: &'a str, + pub tenant_id: Uuid, + pub site_id: Uuid, + /// Company label (-> connect_machines.organization). + pub company: Option<&'a str>, + /// Site label (-> connect_machines.site) — the free-text label, distinct from + /// the relational site_id binding. + pub site_label: Option<&'a str>, + pub tags: &'a [String], + /// 'active' (auto-approve) or 'pending' (collision-gated). + pub enrollment_state: &'a str, +} + +/// Insert a NEW machine row for a first-time enrollment (SPEC-016). +/// +/// Carries the labels, the relational `site_id`, the per-tenant `machine_uid`, and +/// the collision-gate `enrollment_state`. Persistent + online. Returns the created +/// row (its `id` is the FK target for the `cak_` the caller mints next). +pub async fn insert_enrolled_machine( + pool: &PgPool, + p: &EnrollMachineParams<'_>, +) -> Result { + sqlx::query_as::<_, Machine>( + r#" + INSERT INTO connect_machines + (agent_id, hostname, is_persistent, status, last_seen, machine_uid, + tenant_id, site_id, organization, site, tags, enrollment_state) + VALUES ($1, $2, true, 'online', NOW(), $3, $4, $5, $6, $7, $8, $9) + RETURNING * + "#, + ) + .bind(p.agent_id) + .bind(p.hostname) + .bind(p.machine_uid) + .bind(p.tenant_id) + .bind(p.site_id) + .bind(p.company) + .bind(p.site_label) + .bind(p.tags) + .bind(p.enrollment_state) + .fetch_one(pool) + .await +} + +/// Update an EXISTING machine row on re-enroll / reuse / site-move (SPEC-016). +/// +/// Refreshes hostname, site binding (`site_id`), labels, and `enrollment_state`, +/// and revives a soft-deleted row (`deleted_at = NULL`) — a re-enroll of a purged +/// host means it is live again, mirroring `upsert_machine`'s revive. Deliberately +/// does NOT change `agent_id`: the existing id is the FK target of any prior `cak_`. +/// Labels are COALESCE-merged so an enroll that omits a label does not wipe an +/// existing value; `tags` is overwritten only when a non-empty set is supplied +/// (matching `update_machine_metadata`'s convention). +pub async fn update_enrolled_machine( + pool: &PgPool, + machine_id: Uuid, + p: &EnrollMachineParams<'_>, +) -> Result { + sqlx::query_as::<_, Machine>( + r#" + UPDATE connect_machines SET + hostname = $2, + site_id = $3, + organization = COALESCE($4, organization), + site = COALESCE($5, site), + tags = CASE WHEN $6::text[] = '{}' THEN tags ELSE $6 END, + enrollment_state = $7, + status = 'online', + last_seen = NOW(), + deleted_at = NULL + WHERE id = $1 + RETURNING * + "#, + ) + .bind(machine_id) + .bind(p.hostname) + .bind(p.site_id) + .bind(p.company) + .bind(p.site_label) + .bind(p.tags) + .bind(p.enrollment_state) + .fetch_one(pool) + .await +} + /// Update machine status and info #[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/ pub async fn update_machine_status( diff --git a/server/src/db/mod.rs b/server/src/db/mod.rs index 655ef81..97afb9e 100644 --- a/server/src/db/mod.rs +++ b/server/src/db/mod.rs @@ -4,8 +4,10 @@ //! Optional - server works without database if DATABASE_URL not set. pub mod agent_keys; +pub mod enrollment_keys; pub mod events; pub mod machines; +pub mod sites; pub mod releases; pub mod sessions; pub mod support_codes; diff --git a/server/src/db/sites.rs b/server/src/db/sites.rs new file mode 100644 index 0000000..01784cc --- /dev/null +++ b/server/src/db/sites.rs @@ -0,0 +1,94 @@ +//! Site database operations (SPEC-016 zero-touch enrollment). +//! +//! Backs the `connect_sites` table (migration 010): the relational anchor a +//! per-site enrollment key hangs off and the `/api/enroll` flow resolves by +//! `site_code`. See the migration header for why this table exists (the prior +//! schema modeled "site" only as a free-text column on `connect_machines`). +//! +//! All queries use runtime `sqlx::query()` / `sqlx::query_as()` per the codebase +//! convention (no compile-time `query!` macros, no `.sqlx` offline cache). + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::PgPool; +use uuid::Uuid; + +/// Site record from the database. +#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)] +pub struct Site { + pub id: Uuid, + pub site_code: String, + pub display_name: Option, + pub company: Option, + pub tenant_id: Option, + /// RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope). + /// Not enforced in Phase A. + pub enrollment_policy: Option, + pub created_at: DateTime, +} + +/// Resolve a site by its operator-facing `site_code`, scoped to the given tenant. +/// +/// Tenant scoping uses the same default-tenant fold as the unique index so the +/// lookup matches the uniqueness guarantee: `(COALESCE(tenant_id, default), +/// site_code)`. Returns `None` if no site with that code exists in the tenant. +pub async fn get_site_by_code( + pool: &PgPool, + site_code: &str, + tenant_id: Uuid, +) -> Result, sqlx::Error> { + sqlx::query_as::<_, Site>( + r#" + SELECT id, site_code, display_name, company, tenant_id, enrollment_policy, created_at + FROM connect_sites + WHERE site_code = $1 + AND COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid) = $2 + "#, + ) + .bind(site_code) + .bind(tenant_id) + .fetch_optional(pool) + .await +} + +/// Fetch a site by its primary-key UUID. +pub async fn get_site_by_id(pool: &PgPool, id: Uuid) -> Result, sqlx::Error> { + sqlx::query_as::<_, Site>( + r#" + SELECT id, site_code, display_name, company, tenant_id, enrollment_policy, created_at + FROM connect_sites + WHERE id = $1 + "#, + ) + .bind(id) + .fetch_optional(pool) + .await +} + +/// Insert a new site, returning the created row. +/// +/// `tenant_id` is `None`-tolerant and resolved via `db::tenancy::current_tenant_id()` +/// at the call site. Errors with a unique-violation if `(tenant, site_code)` already +/// exists (the caller maps that to a 409). +#[allow(dead_code)] // Wired by the site-admin API (dashboard site CRUD); Phase A exposes key rotation, not site CRUD. +pub async fn insert_site( + pool: &PgPool, + site_code: &str, + display_name: Option<&str>, + company: Option<&str>, + tenant_id: Option, +) -> Result { + sqlx::query_as::<_, Site>( + r#" + INSERT INTO connect_sites (site_code, display_name, company, tenant_id) + VALUES ($1, $2, $3, $4) + RETURNING id, site_code, display_name, company, tenant_id, enrollment_policy, created_at + "#, + ) + .bind(site_code) + .bind(display_name) + .bind(company) + .bind(tenant_id) + .fetch_one(pool) + .await +} diff --git a/server/src/main.rs b/server/src/main.rs index e94e7e6..ed2eca2 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -448,6 +448,11 @@ async fn main() -> Result<()> { )), ) .route("/api/codes/:code/cancel", post(cancel_code)) + // Zero-touch enrollment (SPEC-016). PUBLIC: no JWT — the per-site enrollment + // key in the body is the gate, and the handler applies its own + // per-(site_code, IP) rate limit / lockout (defense-in-depth). Mounted with + // the other public API routes. + .route("/api/enroll", post(api::enroll::enroll)) // WebSocket endpoints .route("/ws/agent", get(relay::agent_ws_handler)) .route("/ws/viewer", get(relay::viewer_ws_handler)) @@ -498,6 +503,18 @@ async fn main() -> Result<()> { "/api/machines/:agent_id/keys/:key_id", delete(api::machine_keys::revoke_key), ) + // Per-site enrollment key administration (SPEC-016, admin-only / JWT). + // Rotate regenerates the cek_ secret + fingerprint (old installers can no + // longer enroll new machines); GET returns the current non-secret + // fingerprint/version. Both gated by the AdminUser extractor. + .route( + "/api/sites/:id/enrollment-key", + get(api::sites::get_enrollment_key), + ) + .route( + "/api/sites/:id/enrollment-key/rotate", + post(api::sites::rotate_enrollment_key), + ) // REST API - Releases and Version .route("/api/version", get(api::releases::get_version)) // No auth - for agent polling .route("/api/releases", get(api::releases::list_releases)) diff --git a/server/src/middleware/rate_limit.rs b/server/src/middleware/rate_limit.rs index cf3433f..72f21e7 100644 --- a/server/src/middleware/rate_limit.rs +++ b/server/src/middleware/rate_limit.rs @@ -77,6 +77,19 @@ pub const CODE_VALIDATE_MAX_FAILURES: u32 = 10; /// Support-code validate: how long an IP stays locked out once tripped. pub const CODE_VALIDATE_LOCKOUT: Duration = Duration::from_secs(15 * 60); +/// Enroll (`POST /api/enroll`, SPEC-016): window length. +pub const ENROLL_WINDOW: Duration = Duration::from_secs(60); +/// Enroll: max requests per window per `(site_code, IP)`. A zero-touch site push +/// drives N machines through enroll near-simultaneously, so this is generous +/// (mass-deploy friendly) while still capping a runaway loop. Defense-in-depth: the +/// 256-bit enrollment key is the load-bearing gate, not this cap. +pub const ENROLL_MAX_PER_WINDOW: u32 = 60; +/// Enroll: consecutive FAILED enroll attempts (bad/inactive key, unknown site) from +/// one `(site_code, IP)` that trip the lockout. +pub const ENROLL_MAX_FAILURES: u32 = 20; +/// Enroll: how long a `(site_code, IP)` stays locked out once tripped. +pub const ENROLL_LOCKOUT: Duration = Duration::from_secs(15 * 60); + /// Hard cap on the number of distinct IPs tracked by any single limiter map. /// Prevents an IP-rotating attacker from growing memory without bound. When the /// cap is hit, the oldest-windowed entries are pruned. Generous for a real MSP @@ -260,6 +273,150 @@ impl FailureLockout { } } +// ============================================================================ +// Composite-key limiter for enrollment (keyed by (site_code, IP)) — SPEC-016 +// ============================================================================ +// +// The login / change-password / code-validate limiters above key purely on IP. +// SPEC-016 §3 wants the enroll defense keyed on `(site_code, source-IP)` so a noisy +// site push from one office IP cannot lock out a different site enrolling from the +// same egress IP. Rather than overload the IP-only maps, this is a small dedicated +// composite-key limiter + lockout. It is invoked from the enroll HANDLER (not a +// `from_fn` layer) because the `site_code` lives in the JSON body, which a +// pre-handler middleware cannot read without consuming it. Documented as +// defense-in-depth: the 256-bit enrollment key is the real gate. + +/// Composite limiter key: the site_code and the real client IP. +type EnrollKey = (String, IpAddr); + +/// Per-`(site_code, IP)` fixed-window limiter + consecutive-failure lockout. +/// +/// Combines both protections behind one lock-guarded map so the enroll handler +/// makes a single allow/deny decision and reports success/failure into the same +/// structure. Self-pruning and size-capped, like the IP-only limiters. +#[derive(Clone)] +pub struct EnrollLimiter { + inner: std::sync::Arc>>, + max_per_window: u32, + window: Duration, + max_failures: u32, + cooldown: Duration, +} + +#[derive(Debug, Clone, Copy)] +struct EnrollEntry { + window_started: Instant, + count: u32, + failures: u32, + locked_until: Option, + last_seen: Instant, +} + +impl EnrollLimiter { + pub fn new( + max_per_window: u32, + window: Duration, + max_failures: u32, + cooldown: Duration, + ) -> Self { + Self { + inner: std::sync::Arc::new(Mutex::new(HashMap::new())), + max_per_window, + window, + max_failures, + cooldown, + } + } + + fn entry_now() -> EnrollEntry { + let now = Instant::now(); + EnrollEntry { + window_started: now, + count: 0, + failures: 0, + locked_until: None, + last_seen: now, + } + } + + /// Admit one enroll attempt for `(site_code, ip)`. Returns `true` if allowed + /// (and counts it). Returns `false` if the key is currently locked out OR over + /// the per-window request cap. Clock injected for tests. + fn check_at(&self, site_code: &str, ip: IpAddr, now: Instant) -> bool { + let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner()); + + if map.len() >= MAX_TRACKED_IPS { + let window = self.window; + let cooldown = self.cooldown; + map.retain(|_, e| { + e.locked_until.map(|u| now < u).unwrap_or(false) + || now.duration_since(e.window_started) < window + || now.duration_since(e.last_seen) < cooldown + }); + } + + let key = (site_code.to_string(), ip); + let e = map.entry(key).or_insert_with(Self::entry_now); + e.last_seen = now; + + // Lockout takes precedence. + if let Some(until) = e.locked_until { + if now < until { + return false; + } + // Cooldown elapsed — clear it for a fresh start. + e.locked_until = None; + e.failures = 0; + } + + // Roll the fixed window forward if elapsed. + if now.duration_since(e.window_started) >= self.window { + e.window_started = now; + e.count = 0; + } + + if e.count >= self.max_per_window { + false + } else { + e.count += 1; + true + } + } + + /// Admit one enroll attempt (real clock). + pub fn check(&self, site_code: &str, ip: IpAddr) -> bool { + self.check_at(site_code, ip, Instant::now()) + } + + fn record_failure_at(&self, site_code: &str, ip: IpAddr, now: Instant) { + let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner()); + let key = (site_code.to_string(), ip); + let e = map.entry(key).or_insert_with(Self::entry_now); + e.last_seen = now; + e.failures = e.failures.saturating_add(1); + if e.failures >= self.max_failures { + e.locked_until = Some(now + self.cooldown); + } + } + + /// Record a FAILED enroll attempt (bad key / unknown site) for the key, + /// tripping the lockout once the streak reaches `max_failures`. + pub fn record_failure(&self, site_code: &str, ip: IpAddr) { + self.record_failure_at(site_code, ip, Instant::now()); + } + + /// Record a SUCCESSFUL enroll for the key, resetting its failure streak. + pub fn record_success(&self, site_code: &str, ip: IpAddr) { + let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner()); + let key = (site_code.to_string(), ip); + if let Some(e) = map.get_mut(&key) { + e.failures = 0; + e.locked_until = None; + e.last_seen = Instant::now(); + } + } +} + // ============================================================================ // Shared rate-limit state (lives in AppState) // ============================================================================ @@ -275,6 +432,9 @@ pub struct RateLimitState { pub code_validate: RateLimiter, /// Per-IP lockout on repeated failed code validations (brute-force defense). pub code_validate_lockout: FailureLockout, + /// `POST /api/enroll` (SPEC-016): per-`(site_code, IP)` request cap + + /// consecutive-failure lockout. Invoked from the enroll handler. + pub enroll: EnrollLimiter, } impl RateLimitState { @@ -290,6 +450,12 @@ impl RateLimitState { CODE_VALIDATE_MAX_FAILURES, CODE_VALIDATE_LOCKOUT, ), + enroll: EnrollLimiter::new( + ENROLL_MAX_PER_WINDOW, + ENROLL_WINDOW, + ENROLL_MAX_FAILURES, + ENROLL_LOCKOUT, + ), } } } @@ -524,4 +690,51 @@ mod tests { assert!(lockout.is_locked_at(ip(8), t0)); assert!(!lockout.is_locked_at(ip(9), t0)); // ip9 unaffected } + + // -- EnrollLimiter (composite (site_code, IP) key) -------------------------- + + #[test] + fn enroll_window_allows_up_to_cap_then_blocks() { + let lim = EnrollLimiter::new(2, Duration::from_secs(60), 100, Duration::from_secs(600)); + let t0 = Instant::now(); + assert!(lim.check_at("SITE-A", ip(1), t0)); // 1 + assert!(lim.check_at("SITE-A", ip(1), t0)); // 2 + assert!(!lim.check_at("SITE-A", ip(1), t0)); // over cap + } + + #[test] + fn enroll_is_keyed_by_site_and_ip() { + let lim = EnrollLimiter::new(1, Duration::from_secs(60), 100, Duration::from_secs(600)); + let t0 = Instant::now(); + assert!(lim.check_at("SITE-A", ip(1), t0)); + assert!(!lim.check_at("SITE-A", ip(1), t0)); // same key over cap + // Different site, same IP -> independent bucket. + assert!(lim.check_at("SITE-B", ip(1), t0)); + // Same site, different IP -> independent bucket. + assert!(lim.check_at("SITE-A", ip(2), t0)); + } + + #[test] + fn enroll_lockout_trips_after_failures_and_blocks_check() { + let lim = EnrollLimiter::new(100, Duration::from_secs(60), 3, Duration::from_secs(600)); + let t0 = Instant::now(); + lim.record_failure_at("SITE-A", ip(1), t0); + lim.record_failure_at("SITE-A", ip(1), t0); + // Not yet tripped: a check still admits. + assert!(lim.check_at("SITE-A", ip(1), t0)); + lim.record_failure_at("SITE-A", ip(1), t0); // 3rd -> trips + // Now locked out: check denies even though under the request cap. + assert!(!lim.check_at("SITE-A", ip(1), t0)); + } + + #[test] + fn enroll_success_resets_failure_streak() { + let lim = EnrollLimiter::new(100, Duration::from_secs(60), 2, Duration::from_secs(600)); + let t0 = Instant::now(); + lim.record_failure_at("SITE-A", ip(1), t0); + lim.record_success("SITE-A", ip(1)); // reset + lim.record_failure_at("SITE-A", ip(1), t0); + // Only one failure since reset -> not locked. + assert!(lim.check_at("SITE-A", ip(1), t0)); + } }