Merge pull request 'SPEC-016 Phase A: zero-touch enrollment backend + migration' (#5) from feat/spec-016-enrollment into main
All checks were successful
All checks were successful
This commit was merged in pull request #5.
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -1407,7 +1407,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "guruconnect"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -1447,7 +1447,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "guruconnect-server"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
|
||||
159
server/migrations/010_spec016_enrollment.sql
Normal file
159
server/migrations/010_spec016_enrollment.sql
Normal file
@@ -0,0 +1,159 @@
|
||||
-- Migration: 010_spec016_enrollment.sql
|
||||
-- Purpose: SPEC-016 zero-touch per-site agent enrollment — server-side data model.
|
||||
--
|
||||
-- Adds the per-site enrollment-key table, a minimal sites table to anchor it,
|
||||
-- and the machine-side columns the collision-gated self-registration flow needs.
|
||||
--
|
||||
-- Two-tier credential model (SPEC-016 §Security): a low-sensitivity, rotatable,
|
||||
-- per-site ENROLLMENT KEY (the `cek_` secret stored hashed here) gates "may this
|
||||
-- machine register at all", while the high-sensitivity per-machine `cak_`
|
||||
-- operating credential (connect_agent_keys, migration 004) is minted on a
|
||||
-- successful enroll. Compromise of an enrollment key is recovered by rotating one
|
||||
-- site, not a fleet-wide re-key.
|
||||
--
|
||||
-- DEVIATION FROM SPEC (documented): SPEC-016 §DB-migration describes
|
||||
-- `site_enrollment_keys.site_id` as `fk -> sites`, assuming a sites table already
|
||||
-- exists. It does NOT — in the current schema "site" and "company/organization" are
|
||||
-- free-text columns on connect_machines (migration 005), there is no relational
|
||||
-- sites entity. This migration therefore CREATES a minimal `connect_sites` table
|
||||
-- (the relational anchor the enrollment-key FK and the dashboard per-site key
|
||||
-- display both require) keyed by a natural `site_code` and scoped per-tenant. It is
|
||||
-- intentionally minimal (code + display name + tenant); richer site/company
|
||||
-- modeling is left to future work. The free-text connect_machines.site /
|
||||
-- .organization columns are untouched and continue to carry agent-reported labels.
|
||||
--
|
||||
-- Idempotent: CREATE TABLE/INDEX IF NOT EXISTS, ADD COLUMN IF NOT EXISTS. Applied on
|
||||
-- server startup by sqlx::migrate!(); never pre-applied via psql. Ordered after 009.
|
||||
-- See .claude/standards/gururmm/sqlx-migrations.md.
|
||||
|
||||
-- pgcrypto provides gen_random_uuid(); enabled in 001/004 but re-asserted for safety.
|
||||
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||
|
||||
-- ============================================================================
|
||||
-- connect_sites — relational anchor for per-site enrollment (see DEVIATION above)
|
||||
-- ============================================================================
|
||||
-- A site is the unit a single signed installer targets. `site_code` is the
|
||||
-- non-secret, operator-facing identifier the installer carries and the agent sends
|
||||
-- at /api/enroll (e.g. "ACME-PHX"). Uniqueness is per-tenant: the same human-chosen
|
||||
-- code may legitimately exist in two tenants. tenant_id mirrors the nullable,
|
||||
-- default-tenant-backfilled tenancy column used on every other scoped table
|
||||
-- (migration 004); db::tenancy::current_tenant_id() resolves it for now.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS connect_sites (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
-- Operator-facing site identifier the installer carries. Non-secret.
|
||||
site_code TEXT NOT NULL,
|
||||
-- Human-readable site / company display name for the dashboard.
|
||||
display_name TEXT,
|
||||
-- Default company label applied to machines enrolled at this site (mirrors the
|
||||
-- free-text connect_machines.organization the agent otherwise self-reports).
|
||||
company TEXT,
|
||||
-- Tenancy-ready (Phase 4). Backfilled to the default tenant below.
|
||||
tenant_id UUID,
|
||||
-- RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope):
|
||||
-- default 'auto-approve'; a future 'pending-approval' value will gate new
|
||||
-- enrollments. NOT enforced in Phase A — present so the policy SPEC needs no
|
||||
-- schema change. Do not branch on this column yet.
|
||||
enrollment_policy TEXT DEFAULT 'auto-approve',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Per-tenant uniqueness of the natural site_code so /api/enroll can resolve a site
|
||||
-- deterministically within a tenant while the same code may exist across tenants.
|
||||
-- COALESCE keeps the index usable while tenant_id is still nullable (Phase 1).
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_sites_tenant_code
|
||||
ON connect_sites (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), site_code);
|
||||
|
||||
-- Backfill the sites tenant_id to the default tenant (table is empty on a fresh DB;
|
||||
-- no-op there, but keeps the migration self-consistent).
|
||||
UPDATE connect_sites
|
||||
SET tenant_id = '00000000-0000-0000-0000-000000000001'
|
||||
WHERE tenant_id IS NULL;
|
||||
|
||||
-- ============================================================================
|
||||
-- site_enrollment_keys — rotatable, hashed per-site enrollment secret + fingerprint
|
||||
-- ============================================================================
|
||||
-- Stores ONLY the Argon2id hash of the `cek_` secret; the plaintext is shown once
|
||||
-- at issue/rotate and never recoverable. `version` is the monotonic rotation
|
||||
-- counter; `fingerprint` is the non-secret short hex shown as `vN (XXXX)` in the
|
||||
-- dashboard and baked into the installer filename. `active` marks the current key —
|
||||
-- rotation flips the old key to active=false (blocking NEW enrollments from old
|
||||
-- installers) and inserts a new active row; already-enrolled agents holding their
|
||||
-- own `cak_` are unaffected. Multiple inactive (historical) rows may coexist per
|
||||
-- site; at most one active row is intended (enforced by a partial unique index).
|
||||
|
||||
CREATE TABLE IF NOT EXISTS site_enrollment_keys (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
site_id UUID NOT NULL REFERENCES connect_sites(id) ON DELETE CASCADE,
|
||||
-- Argon2id hash of the `cek_` enrollment secret. Never the plaintext.
|
||||
key_hash TEXT NOT NULL,
|
||||
-- Monotonic rotation version (1, 2, 3, ...).
|
||||
version INTEGER NOT NULL,
|
||||
-- Non-secret short hex fingerprint code (the XXXX in `vN (XXXX)`), derived from
|
||||
-- the secret. Stored so the dashboard / GET endpoint can show it without the
|
||||
-- secret.
|
||||
fingerprint TEXT NOT NULL,
|
||||
active BOOLEAN NOT NULL DEFAULT true,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
-- Set when this key is rotated out (active flipped to false).
|
||||
rotated_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
-- Lookup index for the enroll hot path: resolve the active key for a site.
|
||||
CREATE INDEX IF NOT EXISTS idx_site_enrollment_keys_site_active
|
||||
ON site_enrollment_keys (site_id, active);
|
||||
|
||||
-- At most one ACTIVE enrollment key per site (the "current" installer key).
|
||||
-- Partial unique index so any number of inactive historical rows may coexist.
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_site_enrollment_keys_one_active
|
||||
ON site_enrollment_keys (site_id)
|
||||
WHERE active;
|
||||
|
||||
-- ============================================================================
|
||||
-- connect_machines — site binding + enrollment-state collision gate
|
||||
-- ============================================================================
|
||||
-- machine_uid already exists (migration 008) with a partial UNIQUE index on
|
||||
-- (machine_uid) WHERE machine_uid IS NOT NULL. SPEC-016 §item-1 / resolved-decision #4
|
||||
-- call for the dedup key to be PER-TENANT — (tenant_id, machine_uid) — so the same
|
||||
-- hardware legitimately present in two tenants stays two rows. tenant_id is the
|
||||
-- scoping column that exists on connect_machines (migration 004); machines have no
|
||||
-- direct site_id today, so site is tracked separately (site_id below) and tenancy is
|
||||
-- the uniqueness scope, exactly as the spec states.
|
||||
--
|
||||
-- CRITICAL CONSTRAINT (why we ADD rather than REPLACE the 008 index here):
|
||||
-- db::machines::upsert_machine (the live connect-path upsert) uses
|
||||
-- `ON CONFLICT (machine_uid) WHERE machine_uid IS NOT NULL` as its conflict arbiter.
|
||||
-- Postgres matches that arbiter to the EXACT index from migration 008. Dropping that
|
||||
-- index would make the live upsert fail to find an arbiter and error at runtime —
|
||||
-- breaking every un-keyed agent reconnect. So migration 008's global index is LEFT
|
||||
-- IN PLACE (the connect path keeps working unchanged) and the per-tenant index is
|
||||
-- added ALONGSIDE it. In single-tenant Phase 1 the two are equivalent (every row's
|
||||
-- tenant_id is the default tenant), so the per-tenant index adds the SPEC-016 dedup
|
||||
-- semantics without a redundant-uniqueness conflict: a (tenant, uid) pair that is
|
||||
-- unique is also globally unique today. When multi-tenancy activates AND
|
||||
-- upsert_machine's ON CONFLICT is updated to name (tenant_id, machine_uid), a future
|
||||
-- migration drops the global 008 index. Documented as deferred; do not drop it now.
|
||||
|
||||
-- Optional FK to the site a machine enrolled under (NULL for legacy / support-code
|
||||
-- machines that never enrolled through /api/enroll). A site change on re-enroll is
|
||||
-- the "site move" SPEC-016 audits.
|
||||
ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS site_id UUID REFERENCES connect_sites(id) ON DELETE SET NULL;
|
||||
|
||||
-- enrollment_state: the collision gate (SPEC-016 §item-1/6). 'active' = live and
|
||||
-- controllable (auto-approve posture); 'pending' = a machine_uid collision was
|
||||
-- detected at enroll and an operator must confirm in the dashboard before the
|
||||
-- endpoint may be controlled. Default 'active' so every legacy/connect-path row is
|
||||
-- unaffected.
|
||||
ALTER TABLE connect_machines
|
||||
ADD COLUMN IF NOT EXISTS enrollment_state TEXT NOT NULL DEFAULT 'active'
|
||||
CHECK (enrollment_state IN ('active', 'pending'));
|
||||
|
||||
-- Per-tenant machine_uid uniqueness (SPEC-016). Added ALONGSIDE migration 008's
|
||||
-- global (machine_uid) index (see CRITICAL CONSTRAINT above — the connect-path
|
||||
-- upsert's ON CONFLICT arbiter binds to the 008 index, which must survive). COALESCE
|
||||
-- folds a NULL tenant_id to the default tenant so the index is well-defined while
|
||||
-- tenancy is single-tenant (Phase 1); the WHERE clause excludes NULL machine_uid so
|
||||
-- legacy un-keyed rows coexist freely.
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_machines_tenant_machine_uid
|
||||
ON connect_machines (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), machine_uid)
|
||||
WHERE machine_uid IS NOT NULL;
|
||||
1008
server/src/api/enroll.rs
Normal file
1008
server/src/api/enroll.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,10 +4,12 @@ pub mod auth;
|
||||
pub mod auth_logout;
|
||||
pub mod changelog;
|
||||
pub mod downloads;
|
||||
pub mod enroll;
|
||||
pub mod machine_keys;
|
||||
pub mod releases;
|
||||
pub mod removal;
|
||||
pub mod sessions;
|
||||
pub mod sites;
|
||||
pub mod users;
|
||||
|
||||
use axum::{
|
||||
|
||||
217
server/src/api/sites.rs
Normal file
217
server/src/api/sites.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
//! Site enrollment-key administration (SPEC-016, admin plane).
|
||||
//!
|
||||
//! Admin (dashboard JWT + admin role) endpoints for the per-site enrollment key
|
||||
//! the dashboard surfaces and rotates:
|
||||
//!
|
||||
//! - `POST /api/sites/:id/enrollment-key/rotate` — regenerate the `cek_` secret,
|
||||
//! bump the monotonic version, derive a new fingerprint, deactivate the prior
|
||||
//! active key, and return the plaintext + fingerprint ONCE. Old installers can no
|
||||
//! longer enroll NEW machines after this; already-enrolled agents (holding their
|
||||
//! own `cak_`) are unaffected (SPEC-016 success-criterion #3). Doubles as
|
||||
//! first-issue when a site has no key yet.
|
||||
//! - `GET /api/sites/:id/enrollment-key` — read the CURRENT non-secret fingerprint
|
||||
//! + version (never the secret). 404 if the site has no active key yet.
|
||||
//!
|
||||
//! Auth mirrors `api::machine_keys`: the [`crate::auth::AdminUser`] extractor gates
|
||||
//! both routes, and they are mounted behind the JWT `auth_layer`.
|
||||
//!
|
||||
//! SECURITY: the plaintext `cek_` is returned exactly once (rotate response),
|
||||
//! never persisted in plaintext and never logged. Read responses expose only the
|
||||
//! version + fingerprint.
|
||||
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
http::StatusCode,
|
||||
Json,
|
||||
};
|
||||
use serde::Serialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::auth::{enrollment_keys, AdminUser};
|
||||
use crate::db;
|
||||
use crate::AppState;
|
||||
|
||||
/// Standard error envelope (matches `api::machine_keys::ApiError`).
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ApiError {
|
||||
pub detail: String,
|
||||
pub error_code: String,
|
||||
pub status_code: u16,
|
||||
}
|
||||
|
||||
impl ApiError {
|
||||
fn new(status: StatusCode, code: &str, detail: &str) -> (StatusCode, Json<ApiError>) {
|
||||
(
|
||||
status,
|
||||
Json(ApiError {
|
||||
detail: detail.to_string(),
|
||||
error_code: code.to_string(),
|
||||
status_code: status.as_u16(),
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
type ApiResult<T> = Result<T, (StatusCode, Json<ApiError>)>;
|
||||
|
||||
/// Response for a freshly rotated/issued enrollment key. `key` is present ONLY
|
||||
/// here, once.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct RotatedEnrollmentKey {
|
||||
pub site_id: Uuid,
|
||||
/// The plaintext `cek_` enrollment key. Shown exactly once — bake it into the
|
||||
/// site installer now; the server keeps only its hash.
|
||||
pub key: String,
|
||||
/// Monotonic rotation version.
|
||||
pub version: i32,
|
||||
/// The non-secret short hex code (the `XXXX` in `vN (XXXX)`).
|
||||
pub fingerprint: String,
|
||||
/// Fully rendered operator-facing fingerprint, e.g. `v3 (7F2A)`.
|
||||
pub fingerprint_label: String,
|
||||
}
|
||||
|
||||
/// Non-secret current-key view for the GET endpoint.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct EnrollmentKeyView {
|
||||
pub site_id: Uuid,
|
||||
pub version: i32,
|
||||
pub fingerprint: String,
|
||||
pub fingerprint_label: String,
|
||||
pub active: bool,
|
||||
}
|
||||
|
||||
fn require_db(state: &AppState) -> ApiResult<&db::Database> {
|
||||
state.db.as_ref().ok_or_else(|| {
|
||||
ApiError::new(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"DATABASE_UNAVAILABLE",
|
||||
"Database not available",
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolve a site by its UUID path segment, or a 404 envelope.
|
||||
async fn resolve_site(db: &db::Database, site_id: Uuid) -> ApiResult<db::sites::Site> {
|
||||
db::sites::get_site_by_id(db.pool(), site_id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error resolving site: {}", e);
|
||||
ApiError::new(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"INTERNAL_ERROR",
|
||||
"Internal server error",
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| ApiError::new(StatusCode::NOT_FOUND, "SITE_NOT_FOUND", "Site not found"))
|
||||
}
|
||||
|
||||
/// POST /api/sites/:id/enrollment-key/rotate — rotate (or first-issue) a site's
|
||||
/// enrollment key. Returns the plaintext `cek_` + fingerprint once.
|
||||
pub async fn rotate_enrollment_key(
|
||||
AdminUser(admin): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(site_id): Path<Uuid>,
|
||||
) -> ApiResult<(StatusCode, Json<RotatedEnrollmentKey>)> {
|
||||
let db = require_db(&state)?;
|
||||
let site = resolve_site(db, site_id).await?;
|
||||
|
||||
// Mint plaintext + Argon2id hash + fingerprint. Only the hash + fingerprint
|
||||
// are persisted; the plaintext is surfaced once below.
|
||||
let plaintext = enrollment_keys::generate_enrollment_key();
|
||||
let key_hash = enrollment_keys::hash_enrollment_key(&plaintext).map_err(|e| {
|
||||
tracing::error!("Failed to hash enrollment key: {}", e);
|
||||
ApiError::new(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"INTERNAL_ERROR",
|
||||
"Failed to hash enrollment key",
|
||||
)
|
||||
})?;
|
||||
let fingerprint = enrollment_keys::compute_fingerprint(&plaintext);
|
||||
|
||||
let new_key = db::enrollment_keys::rotate_key(db.pool(), site.id, &key_hash, &fingerprint)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error rotating enrollment key: {}", e);
|
||||
ApiError::new(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"INTERNAL_ERROR",
|
||||
"Failed to rotate enrollment key",
|
||||
)
|
||||
})?;
|
||||
|
||||
let fingerprint_label =
|
||||
enrollment_keys::render_fingerprint(new_key.version, &new_key.fingerprint);
|
||||
|
||||
// Audit WITHOUT key material (no plaintext, no hash).
|
||||
if let Err(e) = db::events::log_enrollment_event(
|
||||
db.pool(),
|
||||
db::events::EventTypes::ENROLLMENT_KEY_ROTATED,
|
||||
serde_json::json!({
|
||||
"site_id": site.id,
|
||||
"site_code": site.site_code,
|
||||
"version": new_key.version,
|
||||
"fingerprint": new_key.fingerprint,
|
||||
"rotated_by": admin.username,
|
||||
}),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[ENROLL] failed to write key-rotate audit event: {}", e);
|
||||
}
|
||||
tracing::info!(
|
||||
"Admin {} rotated enrollment key for site {} to {}",
|
||||
admin.username,
|
||||
site.site_code,
|
||||
fingerprint_label
|
||||
);
|
||||
|
||||
Ok((
|
||||
StatusCode::CREATED,
|
||||
Json(RotatedEnrollmentKey {
|
||||
site_id: site.id,
|
||||
key: plaintext,
|
||||
version: new_key.version,
|
||||
fingerprint: new_key.fingerprint,
|
||||
fingerprint_label,
|
||||
}),
|
||||
))
|
||||
}
|
||||
|
||||
/// GET /api/sites/:id/enrollment-key — current non-secret fingerprint + version.
|
||||
pub async fn get_enrollment_key(
|
||||
AdminUser(_admin): AdminUser,
|
||||
State(state): State<AppState>,
|
||||
Path(site_id): Path<Uuid>,
|
||||
) -> ApiResult<Json<EnrollmentKeyView>> {
|
||||
let db = require_db(&state)?;
|
||||
let site = resolve_site(db, site_id).await?;
|
||||
|
||||
let key = db::enrollment_keys::get_active_for_site(db.pool(), site.id)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error loading enrollment key: {}", e);
|
||||
ApiError::new(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
"INTERNAL_ERROR",
|
||||
"Internal server error",
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
ApiError::new(
|
||||
StatusCode::NOT_FOUND,
|
||||
"NO_ENROLLMENT_KEY",
|
||||
"Site has no active enrollment key",
|
||||
)
|
||||
})?;
|
||||
|
||||
let fingerprint_label = enrollment_keys::render_fingerprint(key.version, &key.fingerprint);
|
||||
|
||||
Ok(Json(EnrollmentKeyView {
|
||||
site_id: site.id,
|
||||
version: key.version,
|
||||
fingerprint: key.fingerprint,
|
||||
fingerprint_label,
|
||||
active: key.active,
|
||||
}))
|
||||
}
|
||||
191
server/src/auth/enrollment_keys.rs
Normal file
191
server/src/auth/enrollment_keys.rs
Normal file
@@ -0,0 +1,191 @@
|
||||
//! Per-site enrollment key minting, hashing, verification, and fingerprinting
|
||||
//! (SPEC-016 zero-touch enrollment, auth layer).
|
||||
//!
|
||||
//! This is the low-sensitivity, rotatable side of the two-tier credential model
|
||||
//! (SPEC-016 §Security). A per-site ENROLLMENT key (`cek_` prefix) gates "may
|
||||
//! this machine register at all" at `POST /api/enroll`; a successful enroll mints
|
||||
//! the high-sensitivity per-machine `cak_` operating credential
|
||||
//! ([`crate::auth::agent_keys`]). Compromise of an enrollment key is contained to
|
||||
//! one site and recovered by rotating it.
|
||||
//!
|
||||
//! Lifecycle owned here (the secret side):
|
||||
//!
|
||||
//! - [`generate_enrollment_key`] mints a high-entropy, `cek_`-prefixed plaintext
|
||||
//! secret. Mirrors [`crate::auth::agent_keys::generate_agent_key`]'s entropy
|
||||
//! approach (32 random bytes from the OS CSPRNG, hex-encoded) with a DISTINCT
|
||||
//! prefix so the two key kinds are never confused in logs or storage. The
|
||||
//! plaintext is shown to the operator exactly once at issue/rotate and is NEVER
|
||||
//! persisted or logged.
|
||||
//! - [`hash_enrollment_key`] / [`verify_enrollment_key`] use **Argon2id** (via
|
||||
//! [`crate::auth::password`]). This DIFFERS from `cak_` (which uses SHA-256 for
|
||||
//! a constant-shape equality lookup): SPEC-016 §2 explicitly requires the
|
||||
//! enrollment key be "stored hashed (Argon2id, same as `cak_`/passwords)". The
|
||||
//! trade-off is deliberate — enrollment keys are looked up by `(site, active)`
|
||||
//! first (a small candidate set, usually one row) and only then verified, so the
|
||||
//! per-verify KDF cost is bounded and not on a high-QPS path, while Argon2id
|
||||
//! gives salted, GPU-resistant storage matching the password posture.
|
||||
//! - [`compute_fingerprint`] derives the non-secret short HEX code shown as
|
||||
//! `vN (XXXX)` (SPEC-016 resolved-decision #3 — hex, deliberately NOT the
|
||||
//! GuruRMM word-style code, so the two products' artifacts are never visually
|
||||
//! conflated).
|
||||
//!
|
||||
//! SECURITY: never log a plaintext key or its hash. Functions here return the
|
||||
//! plaintext to the caller (issue/rotate endpoint) but emit no `tracing` output
|
||||
//! containing key material.
|
||||
|
||||
use anyhow::Result;
|
||||
use rand::RngCore;
|
||||
use ring::digest;
|
||||
|
||||
/// Prefix marking a GuruConnect per-site enrollment key. Distinct from the
|
||||
/// per-agent `cak_` prefix so the two key kinds are never confused.
|
||||
pub const ENROLLMENT_KEY_PREFIX: &str = "cek_";
|
||||
|
||||
/// Number of random bytes behind an enrollment key (256 bits of entropy), matching
|
||||
/// [`crate::auth::agent_keys`]. SPEC-016 §2 requires ≥256-bit.
|
||||
const ENROLLMENT_KEY_RANDOM_BYTES: usize = 32;
|
||||
|
||||
/// Number of hex characters in the fingerprint code (the `XXXX` in `vN (XXXX)`).
|
||||
/// Four hex chars = 16 bits — ample to let an operator tell two installers apart at
|
||||
/// a glance; it is a non-secret display aid, not a security control.
|
||||
const FINGERPRINT_HEX_LEN: usize = 4;
|
||||
|
||||
/// Generate a new high-entropy, `cek_`-prefixed per-site enrollment key (plaintext).
|
||||
///
|
||||
/// The returned string is the ONLY time the plaintext exists; the caller must
|
||||
/// surface it to the operator once and store only [`hash_enrollment_key`] of it.
|
||||
/// Uses the OS CSPRNG via `rand::rngs::OsRng`.
|
||||
pub fn generate_enrollment_key() -> String {
|
||||
let mut bytes = [0u8; ENROLLMENT_KEY_RANDOM_BYTES];
|
||||
rand::rngs::OsRng.fill_bytes(&mut bytes);
|
||||
format!("{}{}", ENROLLMENT_KEY_PREFIX, hex_encode(&bytes))
|
||||
}
|
||||
|
||||
/// Hash an enrollment key for storage using Argon2id (SPEC-016 §2).
|
||||
///
|
||||
/// Delegates to [`crate::auth::password::hash_password`] so the KDF parameters and
|
||||
/// salt generation match the password posture exactly. Returns the PHC-format
|
||||
/// string Postgres stores in `site_enrollment_keys.key_hash`.
|
||||
pub fn hash_enrollment_key(plaintext: &str) -> Result<String> {
|
||||
crate::auth::password::hash_password(plaintext)
|
||||
}
|
||||
|
||||
/// Verify a presented enrollment key against a stored Argon2id hash.
|
||||
///
|
||||
/// Returns `Ok(true)` on a match. A malformed stored hash or a mismatch yields
|
||||
/// `Ok(false)` / an `Err` from the underlying verifier; the caller treats any
|
||||
/// non-`Ok(true)` as a rejection. A cheap structural reject (`cek_` prefix) runs
|
||||
/// first to skip the KDF on obviously-bogus input.
|
||||
///
|
||||
/// SECURITY: only compares; never logs the presented key or the hash.
|
||||
pub fn verify_enrollment_key(presented: &str, stored_hash: &str) -> bool {
|
||||
if !presented.starts_with(ENROLLMENT_KEY_PREFIX) {
|
||||
return false;
|
||||
}
|
||||
crate::auth::password::verify_password(presented, stored_hash).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Compute the non-secret short HEX fingerprint code for an enrollment key.
|
||||
///
|
||||
/// Derived as the first [`FINGERPRINT_HEX_LEN`] hex chars of the SHA-256 of the
|
||||
/// plaintext secret, uppercased. This is a stable, non-reversible tag of the secret
|
||||
/// (knowing the code does not reveal the key) used purely for display. Pair it with
|
||||
/// the monotonic version via [`render_fingerprint`].
|
||||
pub fn compute_fingerprint(plaintext: &str) -> String {
|
||||
let d = digest::digest(&digest::SHA256, plaintext.as_bytes());
|
||||
let hex = hex_encode(d.as_ref());
|
||||
hex[..FINGERPRINT_HEX_LEN].to_ascii_uppercase()
|
||||
}
|
||||
|
||||
/// Render the operator-facing fingerprint string `vN (XXXX)` (SPEC-016 §2).
|
||||
///
|
||||
/// `version` is the monotonic rotation counter; `code` is [`compute_fingerprint`].
|
||||
/// Example: `render_fingerprint(3, "7F2A")` -> `"v3 (7F2A)"`.
|
||||
pub fn render_fingerprint(version: i32, code: &str) -> String {
|
||||
format!("v{} ({})", version, code)
|
||||
}
|
||||
|
||||
/// Lowercase hex encoding without pulling in the `hex` crate (mirrors
|
||||
/// [`crate::auth::agent_keys`]).
|
||||
fn hex_encode(bytes: &[u8]) -> String {
|
||||
use std::fmt::Write;
|
||||
let mut s = String::with_capacity(bytes.len() * 2);
|
||||
for b in bytes {
|
||||
let _ = write!(s, "{:02x}", b);
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn generated_key_is_prefixed_and_high_entropy() {
|
||||
let key = generate_enrollment_key();
|
||||
assert!(key.starts_with(ENROLLMENT_KEY_PREFIX));
|
||||
assert_eq!(
|
||||
key.len(),
|
||||
ENROLLMENT_KEY_PREFIX.len() + ENROLLMENT_KEY_RANDOM_BYTES * 2
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generated_keys_are_unique() {
|
||||
assert_ne!(generate_enrollment_key(), generate_enrollment_key());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hash_and_verify_roundtrip() {
|
||||
let key = generate_enrollment_key();
|
||||
let hash = hash_enrollment_key(&key).expect("hash");
|
||||
assert!(verify_enrollment_key(&key, &hash));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_rejects_wrong_key() {
|
||||
let key = generate_enrollment_key();
|
||||
let other = generate_enrollment_key();
|
||||
let hash = hash_enrollment_key(&key).expect("hash");
|
||||
assert!(!verify_enrollment_key(&other, &hash));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_rejects_unprefixed_input_without_touching_kdf() {
|
||||
let key = generate_enrollment_key();
|
||||
let hash = hash_enrollment_key(&key).expect("hash");
|
||||
// A value lacking the cek_ prefix is structurally rejected before the KDF.
|
||||
assert!(!verify_enrollment_key("not-a-key", &hash));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_rejects_malformed_stored_hash() {
|
||||
let key = generate_enrollment_key();
|
||||
// A garbage stored hash must not panic and must reject.
|
||||
assert!(!verify_enrollment_key(&key, "not-a-phc-hash"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_is_stable_uppercase_hex_of_expected_len() {
|
||||
let key = "cek_deadbeef";
|
||||
let f1 = compute_fingerprint(key);
|
||||
let f2 = compute_fingerprint(key);
|
||||
assert_eq!(f1, f2);
|
||||
assert_eq!(f1.len(), FINGERPRINT_HEX_LEN);
|
||||
assert!(f1.chars().all(|c| c.is_ascii_hexdigit()));
|
||||
assert_eq!(f1, f1.to_ascii_uppercase());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_differs_per_key() {
|
||||
assert_ne!(
|
||||
compute_fingerprint("cek_aaa"),
|
||||
compute_fingerprint("cek_bbb")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_fingerprint_matches_spec_shape() {
|
||||
assert_eq!(render_fingerprint(3, "7F2A"), "v3 (7F2A)");
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
//! validation for agents.
|
||||
|
||||
pub mod agent_keys;
|
||||
pub mod enrollment_keys;
|
||||
pub mod jwt;
|
||||
pub mod password;
|
||||
pub mod token_blacklist;
|
||||
|
||||
141
server/src/db/enrollment_keys.rs
Normal file
141
server/src/db/enrollment_keys.rs
Normal file
@@ -0,0 +1,141 @@
|
||||
//! Per-site enrollment key database operations (SPEC-016 zero-touch enrollment).
|
||||
//!
|
||||
//! Backs the `site_enrollment_keys` table (migration 010). Stores ONLY the
|
||||
//! Argon2id hash of the `cek_` secret plus the non-secret rotation metadata
|
||||
//! (version, fingerprint, active flag). Computing the hash and minting the
|
||||
//! plaintext is [`crate::auth::enrollment_keys`]'s job; this module is
|
||||
//! hash-agnostic persistence and takes already-hashed values.
|
||||
//!
|
||||
//! Rotation invariant: at most one `active` row per site (enforced by a partial
|
||||
//! unique index in migration 010). [`rotate_key`] deactivates the current active
|
||||
//! row and inserts a new active one inside a single transaction so the invariant
|
||||
//! is never transiently violated.
|
||||
//!
|
||||
//! All queries use runtime `sqlx::query()` / `sqlx::query_as()` per the codebase
|
||||
//! convention (no compile-time `query!` macros, no `.sqlx` offline cache).
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Per-site enrollment key record.
|
||||
///
|
||||
/// `key_hash` is the only representation of the secret the server stores; the
|
||||
/// plaintext is shown once at issue/rotate and never persisted.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||
pub struct EnrollmentKey {
|
||||
pub id: Uuid,
|
||||
pub site_id: Uuid,
|
||||
pub key_hash: String,
|
||||
pub version: i32,
|
||||
pub fingerprint: String,
|
||||
pub active: bool,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub rotated_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
/// Fetch the active enrollment key for a site, if any.
|
||||
///
|
||||
/// This is the `/api/enroll` hot path: resolve the one active key whose hash the
|
||||
/// presented `cek_` is verified against. The partial unique index guarantees at
|
||||
/// most one active row, so `fetch_optional` is correct.
|
||||
pub async fn get_active_for_site(
|
||||
pool: &PgPool,
|
||||
site_id: Uuid,
|
||||
) -> Result<Option<EnrollmentKey>, sqlx::Error> {
|
||||
sqlx::query_as::<_, EnrollmentKey>(
|
||||
r#"
|
||||
SELECT id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at
|
||||
FROM site_enrollment_keys
|
||||
WHERE site_id = $1 AND active
|
||||
"#,
|
||||
)
|
||||
.bind(site_id)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Insert the FIRST enrollment key for a site at version 1 (initial issue).
|
||||
///
|
||||
/// Use [`rotate_key`] for subsequent rotations. Errors with a unique violation if
|
||||
/// the site already has an active key (the caller should rotate instead).
|
||||
#[allow(dead_code)] // Wired by site-admin issue flow; Phase A exposes rotation (which also covers first issue when none exists).
|
||||
pub async fn insert_initial_key(
|
||||
pool: &PgPool,
|
||||
site_id: Uuid,
|
||||
key_hash: &str,
|
||||
fingerprint: &str,
|
||||
) -> Result<EnrollmentKey, sqlx::Error> {
|
||||
sqlx::query_as::<_, EnrollmentKey>(
|
||||
r#"
|
||||
INSERT INTO site_enrollment_keys (site_id, key_hash, version, fingerprint, active)
|
||||
VALUES ($1, $2, 1, $3, true)
|
||||
RETURNING id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at
|
||||
"#,
|
||||
)
|
||||
.bind(site_id)
|
||||
.bind(key_hash)
|
||||
.bind(fingerprint)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Rotate a site's enrollment key (SPEC-016 §2): deactivate the current active key
|
||||
/// (if any) and insert a new active key at the next monotonic version, all in one
|
||||
/// transaction.
|
||||
///
|
||||
/// Returns the newly-created active key. If the site has no key yet, this issues
|
||||
/// version 1 (so rotation also serves as first-issue). The caller passes the
|
||||
/// already-hashed new secret and its fingerprint; the plaintext is surfaced once by
|
||||
/// the caller and never reaches this layer.
|
||||
///
|
||||
/// The transaction is what keeps the "at most one active key per site" invariant
|
||||
/// (partial unique index) from being transiently violated between the UPDATE and
|
||||
/// the INSERT.
|
||||
pub async fn rotate_key(
|
||||
pool: &PgPool,
|
||||
site_id: Uuid,
|
||||
new_key_hash: &str,
|
||||
new_fingerprint: &str,
|
||||
) -> Result<EnrollmentKey, sqlx::Error> {
|
||||
let mut tx = pool.begin().await?;
|
||||
|
||||
// Highest existing version for this site (NULL -> 0 so the first key is v1).
|
||||
let current_max: Option<i32> =
|
||||
sqlx::query_scalar("SELECT MAX(version) FROM site_enrollment_keys WHERE site_id = $1")
|
||||
.bind(site_id)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
let next_version = current_max.unwrap_or(0) + 1;
|
||||
|
||||
// Deactivate the current active key (if any), stamping rotated_at.
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE site_enrollment_keys
|
||||
SET active = false, rotated_at = NOW()
|
||||
WHERE site_id = $1 AND active
|
||||
"#,
|
||||
)
|
||||
.bind(site_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
// Insert the new active key at the next version.
|
||||
let new_key = sqlx::query_as::<_, EnrollmentKey>(
|
||||
r#"
|
||||
INSERT INTO site_enrollment_keys (site_id, key_hash, version, fingerprint, active)
|
||||
VALUES ($1, $2, $3, $4, true)
|
||||
RETURNING id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at
|
||||
"#,
|
||||
)
|
||||
.bind(site_id)
|
||||
.bind(new_key_hash)
|
||||
.bind(next_version)
|
||||
.bind(new_fingerprint)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(new_key)
|
||||
}
|
||||
@@ -69,6 +69,40 @@ impl EventTypes {
|
||||
pub const MACHINE_REMOVED: &'static str = "machine_removed";
|
||||
/// An administrator soft-deleted (purged) a session and dropped it in-memory.
|
||||
pub const SESSION_REMOVED: &'static str = "session_removed";
|
||||
|
||||
// Zero-touch enrollment events (SPEC-016). Written by POST /api/enroll and the
|
||||
// site enrollment-key rotation endpoint. These carry no session, so they are
|
||||
// logged via `log_enrollment_event` with `session_id = NULL`; the structured
|
||||
// detail (machine_uid, site_code, fingerprint, etc.) goes in `details` and the
|
||||
// source IP in `ip_address`.
|
||||
/// A new machine self-registered at a site and was minted its first `cak_`.
|
||||
pub const ENROLL_NEW: &'static str = "enroll_new";
|
||||
/// An existing machine_uid re-enrolled at the SAME site — the row was reused and
|
||||
/// a fresh `cak_` minted (re-image / re-install).
|
||||
pub const ENROLL_REUSE: &'static str = "enroll_reuse";
|
||||
/// An existing machine_uid enrolled under a DIFFERENT site — the machine's site
|
||||
/// binding was updated (a "site move"). Fires an alert.
|
||||
///
|
||||
/// NOTE (SPEC-016 Phase A): the unauthenticated enroll path does NOT perform this
|
||||
/// move — a cross-site enroll is REFUSED (`ENROLL_SITE_CONFLICT`) rather than
|
||||
/// silently repointing the machine. This event is reserved for the deliberate
|
||||
/// Phase-B `--reassign` flow (and the dashboard move action) that supersede it.
|
||||
#[allow(dead_code)] // reserved for Phase-B --reassign; not emitted by Phase A enroll
|
||||
pub const ENROLL_SITE_MOVE: &'static str = "enroll_site_move";
|
||||
/// An existing machine_uid presented a valid key for a DIFFERENT site than the one
|
||||
/// the machine is currently bound to. Phase A REFUSES this (no move, no key minted)
|
||||
/// as the accidental-move / cross-site-hijack guard; the deliberate move arrives
|
||||
/// with the Phase-B `--reassign` flow + dashboard. Fires an alert.
|
||||
pub const ENROLL_SITE_CONFLICT: &'static str = "enroll_site_conflict";
|
||||
/// A machine_uid collision was detected at enroll — the endpoint dropped to
|
||||
/// `pending` and awaits operator confirmation in the dashboard. Fires an alert.
|
||||
pub const ENROLL_COLLISION_PENDING: &'static str = "enroll_collision_pending";
|
||||
/// An enroll attempt failed enrollment-key verification (wrong/inactive key or
|
||||
/// unknown site_code). Security audit trail for the open-registration surface.
|
||||
pub const ENROLL_REJECTED: &'static str = "enroll_rejected";
|
||||
/// An administrator rotated a site's enrollment key (new version + fingerprint;
|
||||
/// old installers can no longer enroll NEW machines).
|
||||
pub const ENROLLMENT_KEY_ROTATED: &'static str = "enrollment_key_rotated";
|
||||
}
|
||||
|
||||
/// Log a session event
|
||||
@@ -154,6 +188,42 @@ pub async fn log_admin_removal(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Log a zero-touch enrollment audit event (SPEC-016).
|
||||
///
|
||||
/// Shares the `connect_session_events` audit table but carries no session
|
||||
/// (`session_id = NULL`, the FK column is nullable) and no viewer — enrollment is
|
||||
/// an unauthenticated agent action, not a viewer/session event. The structured
|
||||
/// detail (machine_uid, site_code, fingerprint version, decision, etc.) goes in
|
||||
/// `details` and the agent's source IP in `ip_address`.
|
||||
///
|
||||
/// Best-effort: a failure to write the audit row must NOT fail the enroll (the
|
||||
/// machine row and `cak_` already exist); the caller logs the error and proceeds,
|
||||
/// matching how the relay and Task-5 removal treat audit writes.
|
||||
pub async fn log_enrollment_event(
|
||||
pool: &PgPool,
|
||||
event_type: &str,
|
||||
details: JsonValue,
|
||||
ip_address: Option<IpAddr>,
|
||||
) -> Result<i64, sqlx::Error> {
|
||||
let ip_str = ip_address.map(|ip| ip.to_string());
|
||||
|
||||
let result = sqlx::query_scalar::<_, i64>(
|
||||
r#"
|
||||
INSERT INTO connect_session_events
|
||||
(session_id, event_type, viewer_id, viewer_name, details, ip_address)
|
||||
VALUES (NULL, $1, NULL, NULL, $2, $3::inet)
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(event_type)
|
||||
.bind(details)
|
||||
.bind(ip_str)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Get events for a session
|
||||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||||
pub async fn get_session_events(
|
||||
|
||||
@@ -64,6 +64,16 @@ pub struct Machine {
|
||||
/// history) is retained. NULL = live. Nullable, so it is read NULL-tolerantly
|
||||
/// in the manual `FromRow` below.
|
||||
pub deleted_at: Option<DateTime<Utc>>,
|
||||
/// Relational site binding for a machine enrolled via `/api/enroll` (SPEC-016,
|
||||
/// migration 010). NULL for legacy / support-code / connect-path machines that
|
||||
/// never enrolled through the zero-touch flow. A change of this on re-enroll is
|
||||
/// the "site move" the enroll path audits.
|
||||
pub site_id: Option<Uuid>,
|
||||
/// Collision-gate state (SPEC-016, migration 010): `'active'` (live, auto-approve)
|
||||
/// or `'pending'` (a machine_uid collision was detected at enroll; awaiting
|
||||
/// operator confirmation before the endpoint may be controlled). Non-null with a
|
||||
/// default of `'active'`; read NULL-tolerantly below for defense in depth.
|
||||
pub enrollment_state: String,
|
||||
}
|
||||
|
||||
impl<'r> FromRow<'r, PgRow> for Machine {
|
||||
@@ -83,6 +93,13 @@ impl<'r> FromRow<'r, PgRow> for Machine {
|
||||
machine_uid: row.try_get("machine_uid")?,
|
||||
// Schema-nullable (migration 009); decode directly as Option.
|
||||
deleted_at: row.try_get("deleted_at")?,
|
||||
// Schema-nullable (migration 010); decode directly as Option.
|
||||
site_id: row.try_get("site_id")?,
|
||||
// Non-null with default 'active' (migration 010); read NULL-tolerantly
|
||||
// (older snapshots / partial rows) and fall back to 'active'.
|
||||
enrollment_state: row
|
||||
.try_get::<Option<String>, _>("enrollment_state")?
|
||||
.unwrap_or_else(|| "active".to_string()),
|
||||
// Nullable-with-default columns mapped to non-`Option` Rust types: read as
|
||||
// `Option<T>` and fall back to the type default so a NULL cell never errors.
|
||||
is_elevated: row
|
||||
@@ -207,6 +224,131 @@ pub async fn upsert_machine(
|
||||
}
|
||||
}
|
||||
|
||||
/// Find a machine by the SPEC-016 per-tenant dedup key `(tenant_id, machine_uid)`.
|
||||
///
|
||||
/// This is the enroll-time dedup lookup: the same hardware re-enrolling (re-image /
|
||||
/// re-install) resolves to its existing row within the tenant, while the same
|
||||
/// hardware in a DIFFERENT tenant is a distinct row (resolved-decision #4). Tenant
|
||||
/// scoping uses the same default-tenant fold as the unique index so the lookup
|
||||
/// matches the uniqueness guarantee.
|
||||
///
|
||||
/// Unlike `get_machine_by_agent_id`, this deliberately does NOT filter
|
||||
/// `deleted_at IS NULL`: a previously operator-purged machine that legitimately
|
||||
/// re-enrolls must be found so the enroll path can revive it (clearing
|
||||
/// `deleted_at`), mirroring the connect-path revive in `upsert_machine`.
|
||||
pub async fn get_machine_by_tenant_uid(
|
||||
pool: &PgPool,
|
||||
tenant_id: Uuid,
|
||||
machine_uid: &str,
|
||||
) -> Result<Option<Machine>, sqlx::Error> {
|
||||
sqlx::query_as::<_, Machine>(
|
||||
r#"
|
||||
SELECT * FROM connect_machines
|
||||
WHERE machine_uid = $1
|
||||
AND COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid) = $2
|
||||
"#,
|
||||
)
|
||||
.bind(machine_uid)
|
||||
.bind(tenant_id)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Parameters for an enroll-time machine create/update (SPEC-016 `/api/enroll`).
|
||||
///
|
||||
/// `agent_id` is a freshly minted opaque id for a NEW enrollment (the agent's
|
||||
/// config UUID story is Phase B; the server only needs a unique non-null value for
|
||||
/// the `agent_id UNIQUE` column). On REUSE/MOVE the existing row's `agent_id` is
|
||||
/// preserved (the FK target of any already-minted `cak_`), so the update path does
|
||||
/// not touch it.
|
||||
pub struct EnrollMachineParams<'a> {
|
||||
pub agent_id: &'a str,
|
||||
pub hostname: &'a str,
|
||||
pub machine_uid: &'a str,
|
||||
pub tenant_id: Uuid,
|
||||
pub site_id: Uuid,
|
||||
/// Company label (-> connect_machines.organization).
|
||||
pub company: Option<&'a str>,
|
||||
/// Site label (-> connect_machines.site) — the free-text label, distinct from
|
||||
/// the relational site_id binding.
|
||||
pub site_label: Option<&'a str>,
|
||||
pub tags: &'a [String],
|
||||
/// 'active' (auto-approve) or 'pending' (collision-gated).
|
||||
pub enrollment_state: &'a str,
|
||||
}
|
||||
|
||||
/// Insert a NEW machine row for a first-time enrollment (SPEC-016).
|
||||
///
|
||||
/// Carries the labels, the relational `site_id`, the per-tenant `machine_uid`, and
|
||||
/// the collision-gate `enrollment_state`. Persistent + online. Returns the created
|
||||
/// row (its `id` is the FK target for the `cak_` the caller mints next).
|
||||
pub async fn insert_enrolled_machine(
|
||||
pool: &PgPool,
|
||||
p: &EnrollMachineParams<'_>,
|
||||
) -> Result<Machine, sqlx::Error> {
|
||||
sqlx::query_as::<_, Machine>(
|
||||
r#"
|
||||
INSERT INTO connect_machines
|
||||
(agent_id, hostname, is_persistent, status, last_seen, machine_uid,
|
||||
tenant_id, site_id, organization, site, tags, enrollment_state)
|
||||
VALUES ($1, $2, true, 'online', NOW(), $3, $4, $5, $6, $7, $8, $9)
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(p.agent_id)
|
||||
.bind(p.hostname)
|
||||
.bind(p.machine_uid)
|
||||
.bind(p.tenant_id)
|
||||
.bind(p.site_id)
|
||||
.bind(p.company)
|
||||
.bind(p.site_label)
|
||||
.bind(p.tags)
|
||||
.bind(p.enrollment_state)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Update an EXISTING machine row on re-enroll / reuse / site-move (SPEC-016).
|
||||
///
|
||||
/// Refreshes hostname, site binding (`site_id`), labels, and `enrollment_state`,
|
||||
/// and revives a soft-deleted row (`deleted_at = NULL`) — a re-enroll of a purged
|
||||
/// host means it is live again, mirroring `upsert_machine`'s revive. Deliberately
|
||||
/// does NOT change `agent_id`: the existing id is the FK target of any prior `cak_`.
|
||||
/// Labels are COALESCE-merged so an enroll that omits a label does not wipe an
|
||||
/// existing value; `tags` is overwritten only when a non-empty set is supplied
|
||||
/// (matching `update_machine_metadata`'s convention).
|
||||
pub async fn update_enrolled_machine(
|
||||
pool: &PgPool,
|
||||
machine_id: Uuid,
|
||||
p: &EnrollMachineParams<'_>,
|
||||
) -> Result<Machine, sqlx::Error> {
|
||||
sqlx::query_as::<_, Machine>(
|
||||
r#"
|
||||
UPDATE connect_machines SET
|
||||
hostname = $2,
|
||||
site_id = $3,
|
||||
organization = COALESCE($4, organization),
|
||||
site = COALESCE($5, site),
|
||||
tags = CASE WHEN $6::text[] = '{}' THEN tags ELSE $6 END,
|
||||
enrollment_state = $7,
|
||||
status = 'online',
|
||||
last_seen = NOW(),
|
||||
deleted_at = NULL
|
||||
WHERE id = $1
|
||||
RETURNING *
|
||||
"#,
|
||||
)
|
||||
.bind(machine_id)
|
||||
.bind(p.hostname)
|
||||
.bind(p.site_id)
|
||||
.bind(p.company)
|
||||
.bind(p.site_label)
|
||||
.bind(p.tags)
|
||||
.bind(p.enrollment_state)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Update machine status and info
|
||||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||||
pub async fn update_machine_status(
|
||||
|
||||
@@ -4,10 +4,12 @@
|
||||
//! Optional - server works without database if DATABASE_URL not set.
|
||||
|
||||
pub mod agent_keys;
|
||||
pub mod enrollment_keys;
|
||||
pub mod events;
|
||||
pub mod machines;
|
||||
pub mod releases;
|
||||
pub mod sessions;
|
||||
pub mod sites;
|
||||
pub mod support_codes;
|
||||
pub mod tenancy;
|
||||
pub mod users;
|
||||
|
||||
94
server/src/db/sites.rs
Normal file
94
server/src/db/sites.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
//! Site database operations (SPEC-016 zero-touch enrollment).
|
||||
//!
|
||||
//! Backs the `connect_sites` table (migration 010): the relational anchor a
|
||||
//! per-site enrollment key hangs off and the `/api/enroll` flow resolves by
|
||||
//! `site_code`. See the migration header for why this table exists (the prior
|
||||
//! schema modeled "site" only as a free-text column on `connect_machines`).
|
||||
//!
|
||||
//! All queries use runtime `sqlx::query()` / `sqlx::query_as()` per the codebase
|
||||
//! convention (no compile-time `query!` macros, no `.sqlx` offline cache).
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Site record from the database.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||
pub struct Site {
|
||||
pub id: Uuid,
|
||||
pub site_code: String,
|
||||
pub display_name: Option<String>,
|
||||
pub company: Option<String>,
|
||||
pub tenant_id: Option<Uuid>,
|
||||
/// RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope).
|
||||
/// Not enforced in Phase A.
|
||||
pub enrollment_policy: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Resolve a site by its operator-facing `site_code`, scoped to the given tenant.
|
||||
///
|
||||
/// Tenant scoping uses the same default-tenant fold as the unique index so the
|
||||
/// lookup matches the uniqueness guarantee: `(COALESCE(tenant_id, default),
|
||||
/// site_code)`. Returns `None` if no site with that code exists in the tenant.
|
||||
pub async fn get_site_by_code(
|
||||
pool: &PgPool,
|
||||
site_code: &str,
|
||||
tenant_id: Uuid,
|
||||
) -> Result<Option<Site>, sqlx::Error> {
|
||||
sqlx::query_as::<_, Site>(
|
||||
r#"
|
||||
SELECT id, site_code, display_name, company, tenant_id, enrollment_policy, created_at
|
||||
FROM connect_sites
|
||||
WHERE site_code = $1
|
||||
AND COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid) = $2
|
||||
"#,
|
||||
)
|
||||
.bind(site_code)
|
||||
.bind(tenant_id)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Fetch a site by its primary-key UUID.
|
||||
pub async fn get_site_by_id(pool: &PgPool, id: Uuid) -> Result<Option<Site>, sqlx::Error> {
|
||||
sqlx::query_as::<_, Site>(
|
||||
r#"
|
||||
SELECT id, site_code, display_name, company, tenant_id, enrollment_policy, created_at
|
||||
FROM connect_sites
|
||||
WHERE id = $1
|
||||
"#,
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Insert a new site, returning the created row.
|
||||
///
|
||||
/// `tenant_id` is `None`-tolerant and resolved via `db::tenancy::current_tenant_id()`
|
||||
/// at the call site. Errors with a unique-violation if `(tenant, site_code)` already
|
||||
/// exists (the caller maps that to a 409).
|
||||
#[allow(dead_code)] // Wired by the site-admin API (dashboard site CRUD); Phase A exposes key rotation, not site CRUD.
|
||||
pub async fn insert_site(
|
||||
pool: &PgPool,
|
||||
site_code: &str,
|
||||
display_name: Option<&str>,
|
||||
company: Option<&str>,
|
||||
tenant_id: Option<Uuid>,
|
||||
) -> Result<Site, sqlx::Error> {
|
||||
sqlx::query_as::<_, Site>(
|
||||
r#"
|
||||
INSERT INTO connect_sites (site_code, display_name, company, tenant_id)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
RETURNING id, site_code, display_name, company, tenant_id, enrollment_policy, created_at
|
||||
"#,
|
||||
)
|
||||
.bind(site_code)
|
||||
.bind(display_name)
|
||||
.bind(company)
|
||||
.bind(tenant_id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
}
|
||||
@@ -448,6 +448,11 @@ async fn main() -> Result<()> {
|
||||
)),
|
||||
)
|
||||
.route("/api/codes/:code/cancel", post(cancel_code))
|
||||
// Zero-touch enrollment (SPEC-016). PUBLIC: no JWT — the per-site enrollment
|
||||
// key in the body is the gate, and the handler applies its own
|
||||
// per-(site_code, IP) rate limit / lockout (defense-in-depth). Mounted with
|
||||
// the other public API routes.
|
||||
.route("/api/enroll", post(api::enroll::enroll))
|
||||
// WebSocket endpoints
|
||||
.route("/ws/agent", get(relay::agent_ws_handler))
|
||||
.route("/ws/viewer", get(relay::viewer_ws_handler))
|
||||
@@ -498,6 +503,18 @@ async fn main() -> Result<()> {
|
||||
"/api/machines/:agent_id/keys/:key_id",
|
||||
delete(api::machine_keys::revoke_key),
|
||||
)
|
||||
// Per-site enrollment key administration (SPEC-016, admin-only / JWT).
|
||||
// Rotate regenerates the cek_ secret + fingerprint (old installers can no
|
||||
// longer enroll new machines); GET returns the current non-secret
|
||||
// fingerprint/version. Both gated by the AdminUser extractor.
|
||||
.route(
|
||||
"/api/sites/:id/enrollment-key",
|
||||
get(api::sites::get_enrollment_key),
|
||||
)
|
||||
.route(
|
||||
"/api/sites/:id/enrollment-key/rotate",
|
||||
post(api::sites::rotate_enrollment_key),
|
||||
)
|
||||
// REST API - Releases and Version
|
||||
.route("/api/version", get(api::releases::get_version)) // No auth - for agent polling
|
||||
.route("/api/releases", get(api::releases::list_releases))
|
||||
|
||||
@@ -77,6 +77,19 @@ pub const CODE_VALIDATE_MAX_FAILURES: u32 = 10;
|
||||
/// Support-code validate: how long an IP stays locked out once tripped.
|
||||
pub const CODE_VALIDATE_LOCKOUT: Duration = Duration::from_secs(15 * 60);
|
||||
|
||||
/// Enroll (`POST /api/enroll`, SPEC-016): window length.
|
||||
pub const ENROLL_WINDOW: Duration = Duration::from_secs(60);
|
||||
/// Enroll: max requests per window per `(site_code, IP)`. A zero-touch site push
|
||||
/// drives N machines through enroll near-simultaneously, so this is generous
|
||||
/// (mass-deploy friendly) while still capping a runaway loop. Defense-in-depth: the
|
||||
/// 256-bit enrollment key is the load-bearing gate, not this cap.
|
||||
pub const ENROLL_MAX_PER_WINDOW: u32 = 60;
|
||||
/// Enroll: consecutive FAILED enroll attempts (bad/inactive key, unknown site) from
|
||||
/// one `(site_code, IP)` that trip the lockout.
|
||||
pub const ENROLL_MAX_FAILURES: u32 = 20;
|
||||
/// Enroll: how long a `(site_code, IP)` stays locked out once tripped.
|
||||
pub const ENROLL_LOCKOUT: Duration = Duration::from_secs(15 * 60);
|
||||
|
||||
/// Hard cap on the number of distinct IPs tracked by any single limiter map.
|
||||
/// Prevents an IP-rotating attacker from growing memory without bound. When the
|
||||
/// cap is hit, the oldest-windowed entries are pruned. Generous for a real MSP
|
||||
@@ -260,6 +273,150 @@ impl FailureLockout {
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Composite-key limiter for enrollment (keyed by (site_code, IP)) — SPEC-016
|
||||
// ============================================================================
|
||||
//
|
||||
// The login / change-password / code-validate limiters above key purely on IP.
|
||||
// SPEC-016 §3 wants the enroll defense keyed on `(site_code, source-IP)` so a noisy
|
||||
// site push from one office IP cannot lock out a different site enrolling from the
|
||||
// same egress IP. Rather than overload the IP-only maps, this is a small dedicated
|
||||
// composite-key limiter + lockout. It is invoked from the enroll HANDLER (not a
|
||||
// `from_fn` layer) because the `site_code` lives in the JSON body, which a
|
||||
// pre-handler middleware cannot read without consuming it. Documented as
|
||||
// defense-in-depth: the 256-bit enrollment key is the real gate.
|
||||
|
||||
/// Composite limiter key: the site_code and the real client IP.
|
||||
type EnrollKey = (String, IpAddr);
|
||||
|
||||
/// Per-`(site_code, IP)` fixed-window limiter + consecutive-failure lockout.
|
||||
///
|
||||
/// Combines both protections behind one lock-guarded map so the enroll handler
|
||||
/// makes a single allow/deny decision and reports success/failure into the same
|
||||
/// structure. Self-pruning and size-capped, like the IP-only limiters.
|
||||
#[derive(Clone)]
|
||||
pub struct EnrollLimiter {
|
||||
inner: std::sync::Arc<Mutex<HashMap<EnrollKey, EnrollEntry>>>,
|
||||
max_per_window: u32,
|
||||
window: Duration,
|
||||
max_failures: u32,
|
||||
cooldown: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct EnrollEntry {
|
||||
window_started: Instant,
|
||||
count: u32,
|
||||
failures: u32,
|
||||
locked_until: Option<Instant>,
|
||||
last_seen: Instant,
|
||||
}
|
||||
|
||||
impl EnrollLimiter {
|
||||
pub fn new(
|
||||
max_per_window: u32,
|
||||
window: Duration,
|
||||
max_failures: u32,
|
||||
cooldown: Duration,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: std::sync::Arc::new(Mutex::new(HashMap::new())),
|
||||
max_per_window,
|
||||
window,
|
||||
max_failures,
|
||||
cooldown,
|
||||
}
|
||||
}
|
||||
|
||||
fn entry_now() -> EnrollEntry {
|
||||
let now = Instant::now();
|
||||
EnrollEntry {
|
||||
window_started: now,
|
||||
count: 0,
|
||||
failures: 0,
|
||||
locked_until: None,
|
||||
last_seen: now,
|
||||
}
|
||||
}
|
||||
|
||||
/// Admit one enroll attempt for `(site_code, ip)`. Returns `true` if allowed
|
||||
/// (and counts it). Returns `false` if the key is currently locked out OR over
|
||||
/// the per-window request cap. Clock injected for tests.
|
||||
fn check_at(&self, site_code: &str, ip: IpAddr, now: Instant) -> bool {
|
||||
let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner());
|
||||
|
||||
if map.len() >= MAX_TRACKED_IPS {
|
||||
let window = self.window;
|
||||
let cooldown = self.cooldown;
|
||||
map.retain(|_, e| {
|
||||
e.locked_until.map(|u| now < u).unwrap_or(false)
|
||||
|| now.duration_since(e.window_started) < window
|
||||
|| now.duration_since(e.last_seen) < cooldown
|
||||
});
|
||||
}
|
||||
|
||||
let key = (site_code.to_string(), ip);
|
||||
let e = map.entry(key).or_insert_with(Self::entry_now);
|
||||
e.last_seen = now;
|
||||
|
||||
// Lockout takes precedence.
|
||||
if let Some(until) = e.locked_until {
|
||||
if now < until {
|
||||
return false;
|
||||
}
|
||||
// Cooldown elapsed — clear it for a fresh start.
|
||||
e.locked_until = None;
|
||||
e.failures = 0;
|
||||
}
|
||||
|
||||
// Roll the fixed window forward if elapsed.
|
||||
if now.duration_since(e.window_started) >= self.window {
|
||||
e.window_started = now;
|
||||
e.count = 0;
|
||||
}
|
||||
|
||||
if e.count >= self.max_per_window {
|
||||
false
|
||||
} else {
|
||||
e.count += 1;
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Admit one enroll attempt (real clock).
|
||||
pub fn check(&self, site_code: &str, ip: IpAddr) -> bool {
|
||||
self.check_at(site_code, ip, Instant::now())
|
||||
}
|
||||
|
||||
fn record_failure_at(&self, site_code: &str, ip: IpAddr, now: Instant) {
|
||||
let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner());
|
||||
let key = (site_code.to_string(), ip);
|
||||
let e = map.entry(key).or_insert_with(Self::entry_now);
|
||||
e.last_seen = now;
|
||||
e.failures = e.failures.saturating_add(1);
|
||||
if e.failures >= self.max_failures {
|
||||
e.locked_until = Some(now + self.cooldown);
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a FAILED enroll attempt (bad key / unknown site) for the key,
|
||||
/// tripping the lockout once the streak reaches `max_failures`.
|
||||
pub fn record_failure(&self, site_code: &str, ip: IpAddr) {
|
||||
self.record_failure_at(site_code, ip, Instant::now());
|
||||
}
|
||||
|
||||
/// Record a SUCCESSFUL enroll for the key, resetting its failure streak.
|
||||
pub fn record_success(&self, site_code: &str, ip: IpAddr) {
|
||||
let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner());
|
||||
let key = (site_code.to_string(), ip);
|
||||
if let Some(e) = map.get_mut(&key) {
|
||||
e.failures = 0;
|
||||
e.locked_until = None;
|
||||
e.last_seen = Instant::now();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Shared rate-limit state (lives in AppState)
|
||||
// ============================================================================
|
||||
@@ -275,6 +432,9 @@ pub struct RateLimitState {
|
||||
pub code_validate: RateLimiter,
|
||||
/// Per-IP lockout on repeated failed code validations (brute-force defense).
|
||||
pub code_validate_lockout: FailureLockout,
|
||||
/// `POST /api/enroll` (SPEC-016): per-`(site_code, IP)` request cap +
|
||||
/// consecutive-failure lockout. Invoked from the enroll handler.
|
||||
pub enroll: EnrollLimiter,
|
||||
}
|
||||
|
||||
impl RateLimitState {
|
||||
@@ -290,6 +450,12 @@ impl RateLimitState {
|
||||
CODE_VALIDATE_MAX_FAILURES,
|
||||
CODE_VALIDATE_LOCKOUT,
|
||||
),
|
||||
enroll: EnrollLimiter::new(
|
||||
ENROLL_MAX_PER_WINDOW,
|
||||
ENROLL_WINDOW,
|
||||
ENROLL_MAX_FAILURES,
|
||||
ENROLL_LOCKOUT,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -524,4 +690,51 @@ mod tests {
|
||||
assert!(lockout.is_locked_at(ip(8), t0));
|
||||
assert!(!lockout.is_locked_at(ip(9), t0)); // ip9 unaffected
|
||||
}
|
||||
|
||||
// -- EnrollLimiter (composite (site_code, IP) key) --------------------------
|
||||
|
||||
#[test]
|
||||
fn enroll_window_allows_up_to_cap_then_blocks() {
|
||||
let lim = EnrollLimiter::new(2, Duration::from_secs(60), 100, Duration::from_secs(600));
|
||||
let t0 = Instant::now();
|
||||
assert!(lim.check_at("SITE-A", ip(1), t0)); // 1
|
||||
assert!(lim.check_at("SITE-A", ip(1), t0)); // 2
|
||||
assert!(!lim.check_at("SITE-A", ip(1), t0)); // over cap
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enroll_is_keyed_by_site_and_ip() {
|
||||
let lim = EnrollLimiter::new(1, Duration::from_secs(60), 100, Duration::from_secs(600));
|
||||
let t0 = Instant::now();
|
||||
assert!(lim.check_at("SITE-A", ip(1), t0));
|
||||
assert!(!lim.check_at("SITE-A", ip(1), t0)); // same key over cap
|
||||
// Different site, same IP -> independent bucket.
|
||||
assert!(lim.check_at("SITE-B", ip(1), t0));
|
||||
// Same site, different IP -> independent bucket.
|
||||
assert!(lim.check_at("SITE-A", ip(2), t0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enroll_lockout_trips_after_failures_and_blocks_check() {
|
||||
let lim = EnrollLimiter::new(100, Duration::from_secs(60), 3, Duration::from_secs(600));
|
||||
let t0 = Instant::now();
|
||||
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||
// Not yet tripped: a check still admits.
|
||||
assert!(lim.check_at("SITE-A", ip(1), t0));
|
||||
lim.record_failure_at("SITE-A", ip(1), t0); // 3rd -> trips
|
||||
// Now locked out: check denies even though under the request cap.
|
||||
assert!(!lim.check_at("SITE-A", ip(1), t0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enroll_success_resets_failure_streak() {
|
||||
let lim = EnrollLimiter::new(100, Duration::from_secs(60), 2, Duration::from_secs(600));
|
||||
let t0 = Instant::now();
|
||||
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||
lim.record_success("SITE-A", ip(1)); // reset
|
||||
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||
// Only one failure since reset -> not locked.
|
||||
assert!(lim.check_at("SITE-A", ip(1), t0));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user