fix(server): tolerate NULL connect_machines columns (tags decode bug)
connect_machines.tags is text[] nullable with no default; the derived
FromRow decoded it as non-Option Vec<String>, so rows with NULL tags
threw "unexpected null" - breaking managed-session reconcile on startup
and the authed Machines list. Hit in production on the v2 cutover.
- Replace the derived FromRow on Machine with a manual impl that decodes
every nullable-non-Option column as Option<T> with unwrap_or_default
(tags, is_elevated, is_persistent, status, timestamps), fixing all six
read sites at once. Public field types unchanged.
- migrations/007: backfill NULL tags to empty array, set DEFAULT '{}',
set NOT NULL (no writer inserts NULL: upsert omits tags, metadata
update binds a non-null array). Idempotent with the prod hot-patch.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,11 +2,26 @@
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
use sqlx::{postgres::PgRow, FromRow, PgPool, Row};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Machine record from database
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||
///
|
||||
/// `FromRow` is implemented manually (not derived) so that every column whose
|
||||
/// schema definition is *nullable* decodes NULL-tolerantly. The `connect_machines`
|
||||
/// table was created in migration 001 with only `DEFAULT` clauses (no `NOT NULL`)
|
||||
/// on `is_elevated`, `is_persistent`, `first_seen`, `last_seen`, `status`,
|
||||
/// `created_at`, and `updated_at`; `tags` (migration 005) likewise ended up
|
||||
/// nullable with no default on the production instance. A derived `FromRow` maps
|
||||
/// those to non-`Option` Rust types and errors at decode time the moment any cell
|
||||
/// is NULL (`unexpected null; try decoding as an Option`). In production a row with
|
||||
/// `tags IS NULL` broke the startup reconcile task and would 500 the authenticated
|
||||
/// Machines list. The manual impl below reads every nullable column as
|
||||
/// `Option<T>` and falls back to `Default::default()`, so a NULL can never panic or
|
||||
/// error regardless of how the column was created. Truly non-null columns (`id`,
|
||||
/// `agent_id`, `hostname`) are decoded directly. Migration 007 additionally pins
|
||||
/// `tags` to `DEFAULT '{}'` and backfills existing NULLs (defense in depth).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Machine {
|
||||
pub id: Uuid,
|
||||
pub agent_id: String,
|
||||
@@ -31,11 +46,57 @@ pub struct Machine {
|
||||
pub site: Option<String>,
|
||||
/// Free-form tags reported by the agent (`AgentStatus.tags`). Stored as a
|
||||
/// Postgres `TEXT[]`; matches the `&[String]` bound by `update_machine_metadata`.
|
||||
/// Migration 005. `NULL`/absent column reads back as an empty vec via the default.
|
||||
/// Migration 005. A `NULL` cell decodes to an empty vec (see the manual `FromRow`
|
||||
/// impl) so it can never error at decode time.
|
||||
#[serde(default)]
|
||||
pub tags: Vec<String>,
|
||||
}
|
||||
|
||||
impl<'r> FromRow<'r, PgRow> for Machine {
|
||||
fn from_row(row: &'r PgRow) -> Result<Self, sqlx::Error> {
|
||||
Ok(Self {
|
||||
// NOT NULL columns: decode directly.
|
||||
id: row.try_get("id")?,
|
||||
agent_id: row.try_get("agent_id")?,
|
||||
hostname: row.try_get("hostname")?,
|
||||
// Schema-nullable in `Option<T>` form already: decode directly.
|
||||
os_version: row.try_get("os_version")?,
|
||||
last_session_id: row.try_get("last_session_id")?,
|
||||
tenant_id: row.try_get("tenant_id")?,
|
||||
organization: row.try_get("organization")?,
|
||||
site: row.try_get("site")?,
|
||||
// Nullable-with-default columns mapped to non-`Option` Rust types: read as
|
||||
// `Option<T>` and fall back to the type default so a NULL cell never errors.
|
||||
is_elevated: row
|
||||
.try_get::<Option<bool>, _>("is_elevated")?
|
||||
.unwrap_or_default(),
|
||||
is_persistent: row
|
||||
.try_get::<Option<bool>, _>("is_persistent")?
|
||||
.unwrap_or_default(),
|
||||
first_seen: row
|
||||
.try_get::<Option<DateTime<Utc>>, _>("first_seen")?
|
||||
.unwrap_or_default(),
|
||||
last_seen: row
|
||||
.try_get::<Option<DateTime<Utc>>, _>("last_seen")?
|
||||
.unwrap_or_default(),
|
||||
status: row
|
||||
.try_get::<Option<String>, _>("status")?
|
||||
.unwrap_or_default(),
|
||||
created_at: row
|
||||
.try_get::<Option<DateTime<Utc>>, _>("created_at")?
|
||||
.unwrap_or_default(),
|
||||
updated_at: row
|
||||
.try_get::<Option<DateTime<Utc>>, _>("updated_at")?
|
||||
.unwrap_or_default(),
|
||||
// The production bug: `tags` was nullable with no default. A NULL cell
|
||||
// decodes to an empty vec here instead of erroring.
|
||||
tags: row
|
||||
.try_get::<Option<Vec<String>>, _>("tags")?
|
||||
.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create a machine by agent_id (upsert)
|
||||
pub async fn upsert_machine(
|
||||
pool: &PgPool,
|
||||
|
||||
Reference in New Issue
Block a user