fix(server): tolerate NULL connect_machines columns (tags decode bug)
connect_machines.tags is text[] nullable with no default; the derived
FromRow decoded it as non-Option Vec<String>, so rows with NULL tags
threw "unexpected null" - breaking managed-session reconcile on startup
and the authed Machines list. Hit in production on the v2 cutover.
- Replace the derived FromRow on Machine with a manual impl that decodes
every nullable-non-Option column as Option<T> with unwrap_or_default
(tags, is_elevated, is_persistent, status, timestamps), fixing all six
read sites at once. Public field types unchanged.
- migrations/007: backfill NULL tags to empty array, set DEFAULT '{}',
set NOT NULL (no writer inserts NULL: upsert omits tags, metadata
update binds a non-null array). Idempotent with the prod hot-patch.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
39
server/migrations/007_fix_machine_tags_null.sql
Normal file
39
server/migrations/007_fix_machine_tags_null.sql
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
-- Migration: 007_fix_machine_tags_null.sql
|
||||||
|
-- Purpose: Make connect_machines.tags self-protecting against NULL.
|
||||||
|
--
|
||||||
|
-- Migration 005 intended to create `tags TEXT[] NOT NULL DEFAULT '{}'`, but it
|
||||||
|
-- used `ADD COLUMN IF NOT EXISTS`. On the production instance the column already
|
||||||
|
-- existed (created without NOT NULL / without a default by an earlier path), so
|
||||||
|
-- 005's constraints were never applied — the column stayed nullable with no
|
||||||
|
-- default. Rows with `tags IS NULL` then failed to decode in the Rust `Machine`
|
||||||
|
-- FromRow (`unexpected null; try decoding as an Option`), which broke the startup
|
||||||
|
-- "reconcile managed sessions" task and would 500 the authenticated Machines list.
|
||||||
|
--
|
||||||
|
-- Production was hot-patched with `UPDATE connect_machines SET tags='{}' WHERE
|
||||||
|
-- tags IS NULL`. This migration makes the schema itself enforce the invariant so
|
||||||
|
-- it cannot recur: backfill any remaining NULLs, set a column DEFAULT, then add a
|
||||||
|
-- NOT NULL constraint. The Rust decode was also made NULL-tolerant (manual FromRow)
|
||||||
|
-- as belt-and-suspenders.
|
||||||
|
--
|
||||||
|
-- Idempotent: the backfill is a no-op once rows already hold '{}' (the prod hot-patch
|
||||||
|
-- already ran); SET DEFAULT is idempotent; SET NOT NULL is a no-op if the constraint
|
||||||
|
-- already exists. Applied on server startup by sqlx::migrate!(); never pre-applied
|
||||||
|
-- via psql. Ordered after 006.
|
||||||
|
-- See .claude/standards/gururmm/sqlx-migrations.md.
|
||||||
|
|
||||||
|
-- 1. Backfill any rows still holding NULL (must precede SET NOT NULL).
|
||||||
|
UPDATE connect_machines
|
||||||
|
SET tags = '{}'
|
||||||
|
WHERE tags IS NULL;
|
||||||
|
|
||||||
|
-- 2. Pin a column default so future INSERTs that omit `tags` get an empty array
|
||||||
|
-- rather than NULL.
|
||||||
|
ALTER TABLE connect_machines
|
||||||
|
ALTER COLUMN tags SET DEFAULT '{}';
|
||||||
|
|
||||||
|
-- 3. Enforce the invariant at the schema level. Safe: the only writer of `tags`
|
||||||
|
-- (db::machines::update_machine_metadata) binds a non-null TEXT[]; no INSERT path
|
||||||
|
-- supplies an explicit NULL for tags (upsert_machine omits the column, so it now
|
||||||
|
-- takes the DEFAULT above). After the backfill no NULL rows remain.
|
||||||
|
ALTER TABLE connect_machines
|
||||||
|
ALTER COLUMN tags SET NOT NULL;
|
||||||
@@ -2,11 +2,26 @@
|
|||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sqlx::PgPool;
|
use sqlx::{postgres::PgRow, FromRow, PgPool, Row};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
/// Machine record from database
|
/// Machine record from database
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
///
|
||||||
|
/// `FromRow` is implemented manually (not derived) so that every column whose
|
||||||
|
/// schema definition is *nullable* decodes NULL-tolerantly. The `connect_machines`
|
||||||
|
/// table was created in migration 001 with only `DEFAULT` clauses (no `NOT NULL`)
|
||||||
|
/// on `is_elevated`, `is_persistent`, `first_seen`, `last_seen`, `status`,
|
||||||
|
/// `created_at`, and `updated_at`; `tags` (migration 005) likewise ended up
|
||||||
|
/// nullable with no default on the production instance. A derived `FromRow` maps
|
||||||
|
/// those to non-`Option` Rust types and errors at decode time the moment any cell
|
||||||
|
/// is NULL (`unexpected null; try decoding as an Option`). In production a row with
|
||||||
|
/// `tags IS NULL` broke the startup reconcile task and would 500 the authenticated
|
||||||
|
/// Machines list. The manual impl below reads every nullable column as
|
||||||
|
/// `Option<T>` and falls back to `Default::default()`, so a NULL can never panic or
|
||||||
|
/// error regardless of how the column was created. Truly non-null columns (`id`,
|
||||||
|
/// `agent_id`, `hostname`) are decoded directly. Migration 007 additionally pins
|
||||||
|
/// `tags` to `DEFAULT '{}'` and backfills existing NULLs (defense in depth).
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Machine {
|
pub struct Machine {
|
||||||
pub id: Uuid,
|
pub id: Uuid,
|
||||||
pub agent_id: String,
|
pub agent_id: String,
|
||||||
@@ -31,11 +46,57 @@ pub struct Machine {
|
|||||||
pub site: Option<String>,
|
pub site: Option<String>,
|
||||||
/// Free-form tags reported by the agent (`AgentStatus.tags`). Stored as a
|
/// Free-form tags reported by the agent (`AgentStatus.tags`). Stored as a
|
||||||
/// Postgres `TEXT[]`; matches the `&[String]` bound by `update_machine_metadata`.
|
/// Postgres `TEXT[]`; matches the `&[String]` bound by `update_machine_metadata`.
|
||||||
/// Migration 005. `NULL`/absent column reads back as an empty vec via the default.
|
/// Migration 005. A `NULL` cell decodes to an empty vec (see the manual `FromRow`
|
||||||
|
/// impl) so it can never error at decode time.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub tags: Vec<String>,
|
pub tags: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'r> FromRow<'r, PgRow> for Machine {
|
||||||
|
fn from_row(row: &'r PgRow) -> Result<Self, sqlx::Error> {
|
||||||
|
Ok(Self {
|
||||||
|
// NOT NULL columns: decode directly.
|
||||||
|
id: row.try_get("id")?,
|
||||||
|
agent_id: row.try_get("agent_id")?,
|
||||||
|
hostname: row.try_get("hostname")?,
|
||||||
|
// Schema-nullable in `Option<T>` form already: decode directly.
|
||||||
|
os_version: row.try_get("os_version")?,
|
||||||
|
last_session_id: row.try_get("last_session_id")?,
|
||||||
|
tenant_id: row.try_get("tenant_id")?,
|
||||||
|
organization: row.try_get("organization")?,
|
||||||
|
site: row.try_get("site")?,
|
||||||
|
// Nullable-with-default columns mapped to non-`Option` Rust types: read as
|
||||||
|
// `Option<T>` and fall back to the type default so a NULL cell never errors.
|
||||||
|
is_elevated: row
|
||||||
|
.try_get::<Option<bool>, _>("is_elevated")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
is_persistent: row
|
||||||
|
.try_get::<Option<bool>, _>("is_persistent")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
first_seen: row
|
||||||
|
.try_get::<Option<DateTime<Utc>>, _>("first_seen")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
last_seen: row
|
||||||
|
.try_get::<Option<DateTime<Utc>>, _>("last_seen")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
status: row
|
||||||
|
.try_get::<Option<String>, _>("status")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
created_at: row
|
||||||
|
.try_get::<Option<DateTime<Utc>>, _>("created_at")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
updated_at: row
|
||||||
|
.try_get::<Option<DateTime<Utc>>, _>("updated_at")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
// The production bug: `tags` was nullable with no default. A NULL cell
|
||||||
|
// decodes to an empty vec here instead of erroring.
|
||||||
|
tags: row
|
||||||
|
.try_get::<Option<Vec<String>>, _>("tags")?
|
||||||
|
.unwrap_or_default(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Get or create a machine by agent_id (upsert)
|
/// Get or create a machine by agent_id (upsert)
|
||||||
pub async fn upsert_machine(
|
pub async fn upsert_machine(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
|
|||||||
Reference in New Issue
Block a user