diff --git a/server/migrations/005_machine_metadata.sql b/server/migrations/005_machine_metadata.sql new file mode 100644 index 0000000..2dfb5b7 --- /dev/null +++ b/server/migrations/005_machine_metadata.sql @@ -0,0 +1,26 @@ +-- Migration: 005_machine_metadata.sql +-- Purpose: Add the organization / site / tags columns to connect_machines. +-- +-- db::machines::update_machine_metadata (called from relay/mod.rs on every +-- AgentStatus update) has always written `organization`, `site`, and `tags` +-- to connect_machines, but no prior migration ever created those columns. +-- The caller swallows the error (`let _ = ...`), so the write failed silently +-- and the metadata never persisted. This adds the columns so the write +-- succeeds, and the Machine struct now maps them (so SELECT * returns them). +-- Resolves coord todo faf39fe0. +-- +-- Idempotent: ADD COLUMN IF NOT EXISTS. Applied on server startup by +-- sqlx::migrate!(); never pre-applied via psql. Ordered after 004. +-- See .claude/standards/gururmm/sqlx-migrations.md. + +-- organization / site: nullable free-form text reported by the agent +-- (AgentStatus.organization / AgentStatus.site). +ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS organization TEXT; +ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS site TEXT; + +-- tags: TEXT[] matching the &[String] bound by update_machine_metadata and the +-- repeated-string AgentStatus.tags proto field. NOT NULL DEFAULT '{}' so every +-- row (existing and new) holds an array and decodes cleanly into Vec; +-- the metadata update only overwrites it when the agent reports a non-empty set. +ALTER TABLE connect_machines + ADD COLUMN IF NOT EXISTS tags TEXT[] NOT NULL DEFAULT '{}'; diff --git a/server/src/api/machine_keys.rs b/server/src/api/machine_keys.rs new file mode 100644 index 0000000..a7f8786 --- /dev/null +++ b/server/src/api/machine_keys.rs @@ -0,0 +1,253 @@ +//! Per-agent key issuance endpoints (admin plane). +//! +//! Lets an administrator mint, list, and revoke per-agent `cak_` keys for a +//! managed machine. These keys are the v2 replacement for the shared +//! `AGENT_API_KEY`: each is per-machine, individually revocable, and stored +//! only as a SHA-256 hash. +//! +//! Auth: dashboard JWT + admin role (the [`AdminUser`] extractor). All routes +//! are mounted behind the JWT `auth_layer`, so the extractor has the JWT config +//! and blacklist available. +//! +//! SECURITY: +//! - The plaintext key is returned exactly once, in the create response. It is +//! never persisted and never logged. +//! - List responses expose only metadata (id, timestamps) — never the hash and +//! never the plaintext. +//! - Errors returned to clients use the standard envelope and never surface raw +//! `sqlx`/`anyhow` strings (which can leak schema/host detail). + +use axum::{ + extract::{Path, State}, + http::StatusCode, + Json, +}; +use chrono::{DateTime, Utc}; +use serde::Serialize; +use uuid::Uuid; + +use crate::auth::{agent_keys, AdminUser}; +use crate::db; +use crate::AppState; + +/// Standard error envelope (see `.claude/standards/api/response-format.md`). +#[derive(Debug, Serialize)] +pub struct ApiError { + pub detail: String, + pub error_code: String, + pub status_code: u16, +} + +impl ApiError { + fn new(status: StatusCode, code: &str, detail: &str) -> (StatusCode, Json) { + ( + status, + Json(ApiError { + detail: detail.to_string(), + error_code: code.to_string(), + status_code: status.as_u16(), + }), + ) + } +} + +type ApiResult = Result)>; + +/// Response for a freshly minted key. `key` is present ONLY here, once. +#[derive(Debug, Serialize)] +pub struct CreatedKey { + pub id: Uuid, + pub machine_id: Uuid, + /// The plaintext `cak_` key. Shown exactly once — store it now; the server + /// keeps only its hash. + pub key: String, + pub created_at: DateTime, +} + +/// Public metadata for an existing key. Never includes the hash or plaintext. +#[derive(Debug, Serialize)] +pub struct KeyMetadata { + pub id: Uuid, + pub machine_id: Uuid, + pub created_at: DateTime, + pub last_used_at: Option>, + pub revoked_at: Option>, +} + +/// Resolve the database handle or a 503 envelope. +fn require_db(state: &AppState) -> ApiResult<&db::Database> { + state.db.as_ref().ok_or_else(|| { + ApiError::new( + StatusCode::SERVICE_UNAVAILABLE, + "DATABASE_UNAVAILABLE", + "Database not available", + ) + }) +} + +/// Resolve an `agent_id` path segment to the machine's UUID primary key (or a +/// 404 envelope). The route param is `agent_id` (string) to stay consistent +/// with the other `/api/machines/:agent_id` routes — matchit 0.7 panics if the +/// same path position uses two different param names — while the key tables key +/// on the machine UUID. +async fn resolve_machine_id(db: &db::Database, agent_id: &str) -> ApiResult { + let machine = db::machines::get_machine_by_agent_id(db.pool(), agent_id) + .await + .map_err(|e| { + tracing::error!("DB error resolving machine: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + ) + })? + .ok_or_else(|| { + ApiError::new( + StatusCode::NOT_FOUND, + "MACHINE_NOT_FOUND", + "Machine not found", + ) + })?; + Ok(machine.id) +} + +/// POST /api/machines/:agent_id/keys — mint a new per-agent key for a machine. +/// +/// `:agent_id` is the machine's `agent_id`; it is resolved to the machine UUID +/// (`connect_machines.id`) that the `connect_agent_keys.machine_id` FK targets. +/// Returns the plaintext key once. +pub async fn create_key( + AdminUser(admin): AdminUser, + State(state): State, + Path(agent_id): Path, +) -> ApiResult<(StatusCode, Json)> { + let db = require_db(&state)?; + let machine_id = resolve_machine_id(db, &agent_id).await?; + + // Mint plaintext + hash. Only the hash is persisted. + let plaintext = agent_keys::generate_agent_key(); + let key_hash = agent_keys::hash_agent_key(&plaintext); + let tenant_id = db::tenancy::current_tenant_id(); + + let key_id = + db::agent_keys::insert_agent_key(db.pool(), machine_id, &key_hash, Some(tenant_id)) + .await + .map_err(|e| { + tracing::error!("DB error inserting agent key: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to create agent key", + ) + })?; + + // Audit the issuance WITHOUT the key material. + tracing::info!( + "Admin {} issued a per-agent key {} for machine {}", + admin.username, + key_id, + machine_id + ); + + Ok(( + StatusCode::CREATED, + Json(CreatedKey { + id: key_id, + machine_id, + key: plaintext, + created_at: Utc::now(), + }), + )) +} + +/// GET /api/machines/:agent_id/keys — list key metadata for a machine. +/// +/// Returns only non-secret metadata. Useful for an admin to see which keys +/// exist, when they were last used, and which are revoked. +pub async fn list_keys( + AdminUser(_admin): AdminUser, + State(state): State, + Path(agent_id): Path, +) -> ApiResult>> { + let db = require_db(&state)?; + let machine_id = resolve_machine_id(db, &agent_id).await?; + + let keys = db::agent_keys::list_for_machine(db.pool(), machine_id) + .await + .map_err(|e| { + tracing::error!("DB error listing agent keys: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to list agent keys", + ) + })?; + + let out = keys + .into_iter() + .map(|k| KeyMetadata { + id: k.id, + machine_id: k.machine_id, + created_at: k.created_at, + last_used_at: k.last_used_at, + revoked_at: k.revoked_at, + }) + .collect(); + + Ok(Json(out)) +} + +/// DELETE /api/machines/:agent_id/keys/:key_id — revoke a key. +/// +/// Sets `revoked_at`; the key is immediately rejected by agent auth thereafter. +/// `:agent_id` (machine) scopes the revoke so a key id from another machine +/// cannot be revoked via a mismatched URL. +pub async fn revoke_key( + AdminUser(admin): AdminUser, + State(state): State, + Path((agent_id, key_id)): Path<(String, Uuid)>, +) -> ApiResult { + let db = require_db(&state)?; + let machine_id = resolve_machine_id(db, &agent_id).await?; + + // Scope the revoke to the path machine so a key id from another machine + // cannot be revoked via a mismatched URL. + let owned = db::agent_keys::key_belongs_to_machine(db.pool(), key_id, machine_id) + .await + .map_err(|e| { + tracing::error!("DB error verifying key ownership: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Internal server error", + ) + })?; + + if !owned { + return Err(ApiError::new( + StatusCode::NOT_FOUND, + "KEY_NOT_FOUND", + "Key not found for this machine", + )); + } + + db::agent_keys::revoke_agent_key(db.pool(), key_id) + .await + .map_err(|e| { + tracing::error!("DB error revoking agent key: {}", e); + ApiError::new( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to revoke agent key", + ) + })?; + + tracing::info!( + "Admin {} revoked per-agent key {} for machine {}", + admin.username, + key_id, + machine_id + ); + + Ok(StatusCode::NO_CONTENT) +} diff --git a/server/src/api/mod.rs b/server/src/api/mod.rs index 59c904f..7a16a55 100644 --- a/server/src/api/mod.rs +++ b/server/src/api/mod.rs @@ -4,7 +4,9 @@ pub mod auth; pub mod auth_logout; pub mod changelog; pub mod downloads; +pub mod machine_keys; pub mod releases; +pub mod sessions; pub mod users; use axum::{ diff --git a/server/src/api/sessions.rs b/server/src/api/sessions.rs new file mode 100644 index 0000000..96a2025 --- /dev/null +++ b/server/src/api/sessions.rs @@ -0,0 +1,106 @@ +//! Session API endpoints — viewer-token minting. +//! +//! `POST /api/sessions/:id/viewer-token` mints a short-lived, session-scoped +//! viewer JWT for an authenticated dashboard user. This replaces the v1 model +//! where any dashboard JWT could join any session at the viewer WebSocket: +//! a viewer token is now bound to one `session_id`, and the WS layer (Task 3) +//! verifies signature + expiry + blacklist + that the token's `session_id` +//! matches the requested session. +//! +//! Authorization (Phase 1): the requester must present a valid dashboard JWT +//! (the [`AuthenticatedUser`] extractor) AND the target session must exist in +//! the live session manager. Per-tenant / per-machine ACL narrowing is a +//! Phase-4 concern; the tenancy claim is already carried so the WS and future +//! phases can enforce it. Minting is itself the authorization gate — only an +//! authenticated user can obtain a token, and only for a real session. + +use axum::{ + extract::{Path, State}, + http::StatusCode, + Json, +}; +use serde::Serialize; +use uuid::Uuid; + +use crate::auth::AuthenticatedUser; +use crate::db; +use crate::AppState; + +use super::machine_keys::ApiError; + +type ApiResult = Result)>; + +/// Response carrying a freshly minted viewer token. +#[derive(Debug, Serialize)] +pub struct ViewerTokenResponse { + /// The short-lived viewer JWT. Use as the `token` query param on + /// `/ws/viewer` for this session only. + pub token: String, + /// The session the token authorizes (echoed for client convenience). + pub session_id: String, + /// Token lifetime in seconds (for client-side refresh scheduling). + pub expires_in_secs: i64, +} + +/// Build a standard error envelope (mirrors `machine_keys`). +fn err(status: StatusCode, code: &str, detail: &str) -> (StatusCode, Json) { + ( + status, + Json(ApiError { + detail: detail.to_string(), + error_code: code.to_string(), + status_code: status.as_u16(), + }), + ) +} + +/// POST /api/sessions/:id/viewer-token +/// +/// Mints a ~5-minute, session-scoped viewer token for the authenticated user. +pub async fn mint_viewer_token( + user: AuthenticatedUser, + State(state): State, + Path(id): Path, +) -> ApiResult> { + let session_id = Uuid::parse_str(&id) + .map_err(|_| err(StatusCode::BAD_REQUEST, "INVALID_SESSION_ID", "Invalid session ID"))?; + + // Authorization gate: the session must exist (live session manager is the + // source of truth for joinable sessions, matching GET /api/sessions/:id). + let session = state.sessions.get_session(session_id).await.ok_or_else(|| { + err( + StatusCode::NOT_FOUND, + "SESSION_NOT_FOUND", + "Session not found", + ) + })?; + + // Resolve tenancy (Phase-1: always the default tenant). Carried in the + // claim so the WS and Phase-4 isolation can enforce it. + let tenant_id = db::tenancy::current_tenant_id(); + + let token = state + .jwt_config + .create_viewer_token(&user.user_id, session_id, tenant_id) + .map_err(|e| { + tracing::error!("Failed to mint viewer token: {}", e); + err( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + "Failed to mint viewer token", + ) + })?; + + tracing::info!( + "User {} minted a viewer token for session {} (agent {})", + user.username, + session_id, + session.agent_id + ); + + Ok(Json(ViewerTokenResponse { + token, + session_id: session_id.to_string(), + expires_in_secs: crate::auth::jwt::VIEWER_TOKEN_TTL_SECS, + })) +} diff --git a/server/src/auth/agent_keys.rs b/server/src/auth/agent_keys.rs new file mode 100644 index 0000000..4e93440 --- /dev/null +++ b/server/src/auth/agent_keys.rs @@ -0,0 +1,147 @@ +//! Per-agent key minting, hashing, and verification (auth layer). +//! +//! This is the auth-plane counterpart to the persistence-only +//! [`crate::db::agent_keys`] module. It owns the *secret* side of the per-agent +//! key lifecycle: +//! +//! - [`generate_agent_key`] mints a high-entropy, `cak_`-prefixed plaintext key. +//! The plaintext is shown to the operator exactly once at issuance and is +//! NEVER persisted or logged. +//! - [`hash_agent_key`] computes the SHA-256 hex digest that the database +//! stores and looks up by. SHA-256 (not Argon2) is deliberate here: the key is +//! already high-entropy random, so the slow, salted password KDF buys nothing, +//! and a deterministic digest is required for the constant-shape hash-equality +//! lookup. Argon2id remains reserved for *passwords* (see +//! [`crate::auth::password`]). +//! - [`verify_agent_key`] hashes a presented key, looks it up against the +//! active (non-revoked) rows, stamps `last_used_at` on a hit, and returns the +//! owning machine id. +//! +//! This mirrors the GuruRMM `agk_` enrollment pattern +//! (`guru-rmm/server/src/ws/mod.rs`): random bytes -> prefixed token -> +//! hex SHA-256 stored hash. +//! +//! SECURITY: never log a plaintext key or its hash. Functions here return the +//! plaintext to the caller (issuance endpoint) but emit no `tracing` output +//! containing key material. + +use rand::RngCore; +use ring::digest; +use sqlx::PgPool; +use uuid::Uuid; + +use crate::db; + +/// Prefix marking a GuruConnect per-agent key. Mirrors GuruRMM's `agk_`. +pub const AGENT_KEY_PREFIX: &str = "cak_"; + +/// Number of random bytes behind a per-agent key (256 bits of entropy). +const AGENT_KEY_RANDOM_BYTES: usize = 32; + +/// Generate a new high-entropy, `cak_`-prefixed per-agent key (plaintext). +/// +/// The returned string is the ONLY time the plaintext exists; the caller must +/// surface it to the operator once and store only [`hash_agent_key`] of it. +/// Uses the OS CSPRNG via `rand::rngs::OsRng`. +pub fn generate_agent_key() -> String { + let mut bytes = [0u8; AGENT_KEY_RANDOM_BYTES]; + rand::rngs::OsRng.fill_bytes(&mut bytes); + format!("{}{}", AGENT_KEY_PREFIX, hex_encode(&bytes)) +} + +/// Compute the storage/lookup hash for a per-agent key. +/// +/// SHA-256, hex-encoded lowercase — identical in shape to GuruRMM's +/// `hash_api_key`, so the two systems' hashed-key columns are interchangeable +/// in format. Deterministic by design: the database looks keys up by this exact +/// value. +pub fn hash_agent_key(presented: &str) -> String { + let digest = digest::digest(&digest::SHA256, presented.as_bytes()); + hex_encode(digest.as_ref()) +} + +/// Verify a presented per-agent key. +/// +/// Returns the owning `machine_id` if the key hashes to an active (non-revoked) +/// row, stamping `last_used_at` as a side effect. Returns `None` if the key is +/// unknown or revoked (`find_active_by_hash` already filters `revoked_at`). +/// +/// SECURITY: only the hash touches the database; the plaintext never leaves +/// this function and is never logged. +pub async fn verify_agent_key(pool: &PgPool, presented: &str) -> Option { + // Cheap structural reject before hashing/DB work: a real key is prefixed. + if !presented.starts_with(AGENT_KEY_PREFIX) { + return None; + } + + let key_hash = hash_agent_key(presented); + + match db::agent_keys::find_active_by_hash(pool, &key_hash).await { + Ok(Some(key)) => { + // Best-effort usage stamp; a failure here must not block auth. + if let Err(e) = db::agent_keys::touch_last_used(pool, key.id).await { + tracing::warn!("Failed to stamp last_used_at for agent key: {}", e); + } + Some(key.machine_id) + } + Ok(None) => None, + Err(e) => { + tracing::error!("Database error during agent key verification: {}", e); + None + } + } +} + +/// Lowercase hex encoding without pulling in the `hex` crate. +fn hex_encode(bytes: &[u8]) -> String { + use std::fmt::Write; + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + let _ = write!(s, "{:02x}", b); + } + s +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn generated_key_is_prefixed_and_high_entropy() { + let key = generate_agent_key(); + assert!(key.starts_with(AGENT_KEY_PREFIX)); + // prefix + 32 bytes * 2 hex chars + assert_eq!(key.len(), AGENT_KEY_PREFIX.len() + AGENT_KEY_RANDOM_BYTES * 2); + } + + #[test] + fn generated_keys_are_unique() { + let a = generate_agent_key(); + let b = generate_agent_key(); + assert_ne!(a, b); + } + + #[test] + fn hash_is_stable_and_hex() { + let key = "cak_deadbeef"; + let h1 = hash_agent_key(key); + let h2 = hash_agent_key(key); + assert_eq!(h1, h2); + assert_eq!(h1.len(), 64); // SHA-256 -> 32 bytes -> 64 hex chars + assert!(h1.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn hash_differs_per_key() { + assert_ne!(hash_agent_key("cak_aaa"), hash_agent_key("cak_bbb")); + } + + #[test] + fn hash_matches_known_sha256_vector() { + // SHA-256("abc") well-known test vector. + assert_eq!( + hex_encode(digest::digest(&digest::SHA256, b"abc").as_ref()), + "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad" + ); + } +} diff --git a/server/src/auth/jwt.rs b/server/src/auth/jwt.rs index ce8562f..01024c2 100644 --- a/server/src/auth/jwt.rs +++ b/server/src/auth/jwt.rs @@ -44,6 +44,54 @@ impl Claims { } } +/// The fixed `purpose` discriminator carried by a viewer token. +/// +/// A login [`Claims`] never carries this field; a [`ViewerClaims`] always does +/// and it is checked on validation. This makes the two token types +/// non-interchangeable even before structural deserialization differences are +/// considered. +pub const VIEWER_TOKEN_PURPOSE: &str = "viewer"; + +/// Default lifetime of a minted viewer token, in seconds (5 minutes). +pub const VIEWER_TOKEN_TTL_SECS: i64 = 300; + +/// Claims for a session-scoped viewer token. +/// +/// Minted by an authenticated + authorized dashboard user for ONE specific +/// session (`POST /api/sessions/:id/viewer-token`) and consumed by the viewer +/// WebSocket (Task 3), which verifies signature + expiry + blacklist + that +/// `session_id` matches the requested session before admitting the viewer. +/// +/// The `purpose` field is fixed to [`VIEWER_TOKEN_PURPOSE`]; a normal login JWT +/// has no such field and cannot be deserialized into this struct, and this +/// struct cannot be deserialized into a login [`Claims`] (it lacks the +/// login-only fields). The discriminator is also re-checked explicitly on +/// validation so the two are unambiguously separate credential planes. +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ViewerClaims { + /// Subject — the dashboard user id that minted/owns this token. + pub sub: String, + /// The session this token authorizes viewing of. The WS layer (Task 3) + /// rejects the token if this does not equal the requested session. + pub session_id: String, + /// Tenancy scope (Phase-4-ready). Resolved via `db::tenancy` at mint time. + pub tenant_id: String, + /// Fixed discriminator — always [`VIEWER_TOKEN_PURPOSE`]. + pub purpose: String, + /// Expiration time (unix timestamp). + pub exp: i64, + /// Issued at (unix timestamp). + pub iat: i64, +} + +impl ViewerClaims { + /// Parse the session id claim as a UUID. + pub fn session_uuid(&self) -> Result { + Uuid::parse_str(&self.session_id) + .map_err(|e| anyhow!("Invalid session_id in viewer token: {}", e)) + } +} + /// JWT configuration #[derive(Clone)] pub struct JwtConfig { @@ -119,6 +167,74 @@ impl JwtConfig { Ok(token_data.claims) } + + /// Mint a short-lived, session-scoped viewer token. + /// + /// Signed with the same secret as login tokens but carrying a distinct + /// [`ViewerClaims`] shape and a fixed `purpose`. TTL defaults to + /// [`VIEWER_TOKEN_TTL_SECS`] (5 minutes) — short by design so a leaked + /// viewer token has a tiny window and no standing access is granted. + pub fn create_viewer_token( + &self, + user_id: &str, + session_id: Uuid, + tenant_id: Uuid, + ) -> Result { + let now = Utc::now(); + let exp = now + Duration::seconds(VIEWER_TOKEN_TTL_SECS); + + let claims = ViewerClaims { + sub: user_id.to_string(), + session_id: session_id.to_string(), + tenant_id: tenant_id.to_string(), + purpose: VIEWER_TOKEN_PURPOSE.to_string(), + exp: exp.timestamp(), + iat: now.timestamp(), + }; + + encode( + &Header::default(), + &claims, + &EncodingKey::from_secret(self.secret.as_bytes()), + ) + .map_err(|e| anyhow!("Failed to create viewer token: {}", e)) + } + + /// Validate a viewer token: signature + expiry + correct `purpose`. + /// + /// Rejects a login JWT presented as a viewer token (it lacks the viewer + /// claim shape, and even if forced it would fail the `purpose` check) and, + /// symmetrically, a viewer token can never satisfy [`Self::validate_token`] + /// because it lacks the login claim fields. The WS layer (Task 3) + /// additionally checks the blacklist and the `session_id` match. + #[allow(dead_code)] // Consumed by the viewer WebSocket handler in Task 3. + pub fn validate_viewer_token(&self, token: &str) -> Result { + let mut validation = Validation::default(); + validation.validate_exp = true; + validation.validate_nbf = false; + validation.leeway = 0; + // The login claim's required fields differ from ViewerClaims, so the + // default required-claims set ("exp") is the only safe intersection. + validation.set_required_spec_claims(&["exp"]); + + let token_data = decode::( + token, + &DecodingKey::from_secret(self.secret.as_bytes()), + &validation, + ) + .map_err(|e| anyhow!("Invalid viewer token: {}", e))?; + + if token_data.claims.purpose != VIEWER_TOKEN_PURPOSE { + return Err(anyhow!("Token is not a viewer token")); + } + + let now = Utc::now().timestamp(); + if token_data.claims.exp < now { + return Err(anyhow!("Viewer token has expired")); + } + + Ok(token_data.claims) + } } // Removed insecure default_jwt_secret() function - JWT_SECRET must be set via environment variable diff --git a/server/src/auth/mod.rs b/server/src/auth/mod.rs index 35739c2..2a1934a 100644 --- a/server/src/auth/mod.rs +++ b/server/src/auth/mod.rs @@ -3,11 +3,12 @@ //! Handles JWT validation for dashboard users and API key //! validation for agents. +pub mod agent_keys; pub mod jwt; pub mod password; pub mod token_blacklist; -pub use jwt::{Claims, JwtConfig}; +pub use jwt::{Claims, JwtConfig, ViewerClaims}; pub use password::{generate_random_password, hash_password, verify_password}; pub use token_blacklist::TokenBlacklist; diff --git a/server/src/db/agent_keys.rs b/server/src/db/agent_keys.rs index 59c410f..574dcef 100644 --- a/server/src/db/agent_keys.rs +++ b/server/src/db/agent_keys.rs @@ -78,8 +78,47 @@ pub async fn find_active_by_hash( .await } +/// List all keys for a machine (newest first), including revoked ones. +/// +/// Returns full rows; the API layer projects these to metadata-only responses +/// and never serializes `key_hash`. +pub async fn list_for_machine( + pool: &PgPool, + machine_id: Uuid, +) -> Result, sqlx::Error> { + sqlx::query_as::<_, AgentKey>( + r#" + SELECT id, machine_id, key_hash, tenant_id, created_at, last_used_at, revoked_at + FROM connect_agent_keys + WHERE machine_id = $1 + ORDER BY created_at DESC + "#, + ) + .bind(machine_id) + .fetch_all(pool) + .await +} + +/// Check whether a key id belongs to the given machine. +/// +/// Used to scope revocation to the machine in the request path, so a key id +/// cannot be revoked via a mismatched machine URL. +pub async fn key_belongs_to_machine( + pool: &PgPool, + key_id: Uuid, + machine_id: Uuid, +) -> Result { + let exists = sqlx::query_scalar::<_, bool>( + "SELECT EXISTS(SELECT 1 FROM connect_agent_keys WHERE id = $1 AND machine_id = $2)", + ) + .bind(key_id) + .bind(machine_id) + .fetch_one(pool) + .await?; + Ok(exists) +} + /// Revoke a key by id (idempotent: re-revoking keeps the original timestamp). -#[allow(dead_code)] // Wired by the key-revocation endpoint in Task 2. pub async fn revoke_agent_key(pool: &PgPool, id: Uuid) -> Result<(), sqlx::Error> { sqlx::query( r#" diff --git a/server/src/db/machines.rs b/server/src/db/machines.rs index 6986436..9680d9f 100644 --- a/server/src/db/machines.rs +++ b/server/src/db/machines.rs @@ -22,6 +22,18 @@ pub struct Machine { pub updated_at: DateTime, /// Tenancy-ready (Phase 4). Backfilled to the default tenant by migration 004. pub tenant_id: Option, + /// Company/organization name reported by the agent (`AgentStatus.organization`). + /// Column added in migration 005; previously written by `update_machine_metadata` + /// against a non-existent column (the write silently failed). Now mapped here so + /// `SELECT *` returns it. + pub organization: Option, + /// Site/location name reported by the agent (`AgentStatus.site`). Migration 005. + pub site: Option, + /// Free-form tags reported by the agent (`AgentStatus.tags`). Stored as a + /// Postgres `TEXT[]`; matches the `&[String]` bound by `update_machine_metadata`. + /// Migration 005. `NULL`/absent column reads back as an empty vec via the default. + #[serde(default)] + pub tags: Vec, } /// Get or create a machine by agent_id (upsert) diff --git a/server/src/main.rs b/server/src/main.rs index 52ba8b6..2813279 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -312,6 +312,11 @@ async fn main() -> Result<()> { .route("/api/sessions", get(list_sessions)) .route("/api/sessions/:id", get(get_session)) .route("/api/sessions/:id", delete(disconnect_session)) + // Session-scoped viewer-token minting (dashboard JWT; bound to one session) + .route( + "/api/sessions/:id/viewer-token", + post(api::sessions::mint_viewer_token), + ) // REST API - Machines .route("/api/machines", get(list_machines)) .route("/api/machines/:agent_id", get(get_machine)) @@ -321,6 +326,21 @@ async fn main() -> Result<()> { "/api/machines/:agent_id/update", post(trigger_machine_update), ) + // Per-agent key issuance (admin only). `:agent_id` matches the param + // name used by the other /api/machines/:agent_id routes — matchit 0.7 + // panics if the same path position uses two different param names. + .route( + "/api/machines/:agent_id/keys", + post(api::machine_keys::create_key), + ) + .route( + "/api/machines/:agent_id/keys", + get(api::machine_keys::list_keys), + ) + .route( + "/api/machines/:agent_id/keys/:key_id", + delete(api::machine_keys::revoke_key), + ) // REST API - Releases and Version .route("/api/version", get(api::releases::get_version)) // No auth - for agent polling .route("/api/releases", get(api::releases::list_releases)) diff --git a/server/src/relay/mod.rs b/server/src/relay/mod.rs index 6471586..62acf23 100644 --- a/server/src/relay/mod.rs +++ b/server/src/relay/mod.rs @@ -169,8 +169,9 @@ pub async fn agent_ws_handler( // Validate API key if provided (for persistent agents) if let Some(ref key) = api_key { - // For now, we'll accept API keys that match the JWT secret or a configured agent key - // In production, this should validate against a database of registered agents + // Agent-plane auth ONLY: a per-agent `cak_` key (hash-compared against + // connect_agent_keys, rejecting revoked) or the deprecated shared + // AGENT_API_KEY fallback. A dashboard/user JWT is NEVER accepted here. if !validate_agent_api_key(&state, key).await { warn!( "Agent connection rejected: {} from {} - invalid API key", @@ -220,21 +221,45 @@ pub async fn agent_ws_handler( })) } -/// Validate an agent API key +/// Validate an agent key presented on the agent plane. +/// +/// SECURITY (v2): a dashboard/user JWT is NEVER a valid agent credential — the +/// old `jwt_config.validate_token` branch was the relay CRITICAL and is gone. +/// Accepts, in order: +/// 1. A per-agent `cak_` key: SHA-256 hash compared against +/// `connect_agent_keys`; revoked keys are rejected (the DB query filters +/// `revoked_at IS NULL`). This is the supported path. +/// 2. The shared `AGENT_API_KEY` env value — DEPRECATED fallback, retained +/// only for not-yet-migrated agents. Its use is logged at WARNING and it +/// should be removed once all managed agents carry per-agent keys. +/// +/// Never logs the presented key or any hash. async fn validate_agent_api_key(state: &AppState, api_key: &str) -> bool { - // Check if API key is a valid JWT (allows using dashboard token for testing) - if state.jwt_config.validate_token(api_key).is_ok() { - return true; - } - - // Check against configured agent API key if set - if let Some(ref configured_key) = state.agent_api_key { - if api_key == configured_key { + // 1. Per-agent key (the supported path). Requires a database; without one, + // only the deprecated shared-key fallback below can apply. + if let Some(ref db) = state.db { + if crate::auth::agent_keys::verify_agent_key(db.pool(), api_key) + .await + .is_some() + { + return true; + } + } + + // 2. DEPRECATED shared-key fallback. Constant-time-ish equality is not + // critical here (the key is high-entropy and this path is sunset), but + // we still avoid logging the value. + if let Some(ref configured_key) = state.agent_api_key { + if api_key == configured_key { + warn!( + "[WARNING] Agent authenticated via the DEPRECATED shared AGENT_API_KEY \ + fallback. Migrate this agent to a per-agent cak_ key; the shared key \ + will be removed." + ); return true; } } - // In future: validate against database of registered agents false } diff --git a/specs/v2-secure-session-core/plan.md b/specs/v2-secure-session-core/plan.md index 98315fa..9a91d71 100644 --- a/specs/v2-secure-session-core/plan.md +++ b/specs/v2-secure-session-core/plan.md @@ -1,7 +1,10 @@ # v2 Secure Session Core — Implementation Plan > Spec created: 2026-05-29 -> Status: in progress — Task 1 (schema) DONE 2026-05-29; Task 2 (auth) next +> Status: in progress — Tasks 1-2 DONE 2026-05-29; Task 3 (relay WS) next. +> CARRY-FORWARD: Task 3 MUST add a viewer-token AUTHORIZATION check (admin/permission gate) — Task 2 +> fixed only the token *mechanism*; the authz gate is what actually closes audit CRITICAL #1. Policy +> (admin-only vs admin-or-view-permission) pending Mike's decision. > Parent: `docs/specs/SPEC-002-v2-modernization-architecture.md` (Phase 1) > Keystone: Tasks 1–4 are the "get-right-first" secure auth/session core — every audit CRITICAL/HIGH > is closed there. Tasks 5–7 deliver the product capability on top. Do them in order. @@ -47,7 +50,22 @@ Reference: `specs/native-remote-control/plan.md` Task 2 (`connect_agent_keys`); --- -## Task 2 (KEYSTONE): Rebuilt auth model — plane separation + session-scoped viewer tokens +## Task 2 (KEYSTONE) [DONE 2026-05-29 — code-reviewed APPROVED]: Rebuilt auth model — plane separation + session-scoped viewer tokens + +> CARRY-FORWARD TO TASK 3: viewer-token minting is gated only by `AuthenticatedUser` (authentication, +> not authorization). GC has a real `admin|operator|viewer` role + permissions model, so this is intra- +> tenant privilege escalation until a permission check is added. **The mechanism is fixed here; the authz +> check in Task 3 is what closes audit CRITICAL #1.** Metadata bug todo faf39fe0 resolved (migration 005). + +> [IMPLEMENTED] `auth/agent_keys.rs` [new] (cak_ mint/SHA-256 hash/verify), `auth/jwt.rs` +> (`ViewerClaims` + `create_viewer_token`/`validate_viewer_token`, 5-min TTL, `purpose:"viewer"`), +> `auth/mod.rs` (module + re-export). Deleted the JWT-as-agent-key branch in `relay/mod.rs` +> `validate_agent_api_key` — now per-agent `cak_` key OR deprecated shared `AGENT_API_KEY` (WARNING-logged), +> never a user JWT. New endpoints: `POST/GET /api/machines/:agent_id/keys`, +> `DELETE /api/machines/:agent_id/keys/:key_id` (admin), `POST /api/sessions/:id/viewer-token` (dashboard JWT). +> db helpers added: `agent_keys::{list_for_machine,key_belongs_to_machine}`. Folded in migration +> `005_machine_metadata.sql` + Machine struct org/site/tags mapping (coord todo faf39fe0). No Rust +> toolchain on this machine — self-reviewed; not yet `cargo check`-verified. Files touched: `server/src/auth/` (`mod.rs`, `jwt.rs`, `agent_keys.rs` [new], `token_blacklist.rs`, `password.rs`), `server/src/api/auth.rs`, `server/src/api/sessions.rs`. @@ -76,6 +94,13 @@ Files touched: `server/src/relay/mod.rs`, `server/src/session/mod.rs`. - **`viewer_ws_handler`** (`relay/mod.rs:242`): verify the viewer token's **signature + expiry + blacklist + `session_id` claim == requested `session_id`** before `handle_viewer_connection` (`relay/mod.rs:595`). Reject otherwise. (Fixes the any-JWT-joins-any-session + blacklist-bypass CRITICALs.) +- **Viewer-token AUTHORIZATION (carry-forward from Task 2 review) — this is what actually closes audit + CRITICAL #1.** The minting endpoint `POST /api/sessions/:id/viewer-token` (in `server/src/api/sessions.rs`) + must enforce a real permission predicate, not just `AuthenticatedUser`: `user.is_admin() || + user.has_permission()`. GC's role model (`admin|operator|viewer`) + permissions table already + exist (`server/src/auth/mod.rs`), so honoring the intra-tenant role distinction is cheap. **Policy + decision (Mike): admin-only, or admin-or-`view_sessions`-permission.** Multi-tenant client-access + isolation stays deferred to Phase 4; this is only the intra-tenant role gate. - **`agent_ws_handler`** (`relay/mod.rs:55`): authenticate via per-agent key OR support code only (Task 2). Persistent reattach must bind to the authenticated machine identity, not a query-string `agent_id` alone (`session/mod.rs:98`).