From d0b8db070f273f4b9792aa1e399493a684660832 Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Tue, 2 Jun 2026 11:43:56 -0700 Subject: [PATCH] feat(agent): hardware-salt machine_uid (SPEC-016 Phase B item 1) Extend the SPEC-004 machine_uid derivation with the locked SPEC-016 hardware salt: combine the Windows MachineGuid with the SMBIOS system UUID (Win32_ComputerSystemProduct.UUID), falling back to motherboard serial (Win32_BaseBoard.SerialNumber) + primary disk serial when the SMBIOS UUID is absent or a degenerate placeholder (all-zeros / all-FFs, emitted by some OEMs and hypervisor templates). Signals are read via narrow PowerShell CIM queries (hidden window, no profile) rather than adding a WMI crate or hand-rolling COM IWbemServices for two scalar reads. Values are normalized (trim + upper-case) so vendor case/space drift never perturbs the digest. The combined string is SHA-256'd into the existing opaque muid_ shape, preserving the wire identity the relay connect path already reports while making it survive an OS re-image on the same hardware. Which signal set fed the result is logged (source label only, never the secret values). Adds unit tests for derivation determinism + signal-sensitivity, degenerate-SMBIOS rejection, and signal normalization. Co-Authored-By: Claude Opus 4.8 (1M context) --- agent/src/identity.rs | 268 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 260 insertions(+), 8 deletions(-) diff --git a/agent/src/identity.rs b/agent/src/identity.rs index eb1f226..4dba303 100644 --- a/agent/src/identity.rs +++ b/agent/src/identity.rs @@ -9,16 +9,27 @@ //! **recomputable**: the same machine yields the same id on every call with no //! persistence required. //! -//! - **Windows:** SHA-256 hash of the OS machine GUID read from -//! `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` (a `REG_SZ`). The raw -//! GUID is never returned — only the opaque `muid_` derived from it. +//! - **Windows:** SHA-256 of a hardware-salted identity string. The primary +//! signal is the OS machine GUID +//! (`HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid`, a `REG_SZ`) combined +//! with the **SMBIOS system UUID** (`Win32_ComputerSystemProduct.UUID`). When +//! the SMBIOS UUID is absent / all-zeros / all-FFs (some OEMs/hypervisors), it +//! falls back to the **motherboard serial** (`Win32_BaseBoard.SerialNumber`) +//! plus the **primary disk serial**. The raw signals are never returned — only +//! the opaque `muid_` derived from them. //! - **Non-Windows (and Windows registry failure):** a random UUID persisted in //! the agent's data directory, read back on subsequent runs so it is stable //! across calls and process restarts. //! +//! **Stability contract (SPEC-016 item 1):** the derivation mixes only stable +//! hardware signals — never a per-install random value or volatile data — so the +//! `machine_uid` survives both a reboot AND an OS re-image on the SAME hardware +//! (the re-image dedup goal), while distinct physical boxes stay distinct. +//! //! This module deliberately does NOT change `agent_id`/`generate_agent_id`. //! `machine_uid` is reported *alongside* `agent_id`; the server-side dedup that -//! consumes it is a separate task. +//! consumes it lives in `POST /api/enroll` (SPEC-016 Phase A) and the relay +//! connect path. use std::sync::OnceLock; @@ -56,21 +67,200 @@ fn derive_uid(raw: &str) -> String { #[cfg(windows)] fn compute_machine_uid() -> String { - match read_machine_guid() { - Ok(guid) if !guid.trim().is_empty() => derive_uid(guid.trim()), + // Primary signal: the OS MachineGuid. If even this is unavailable the box has + // no usable hardware identity to anchor on, so degrade to the persisted seed + // exactly as before (preserves the SPEC-004 fallback behavior). + let machine_guid = match read_machine_guid() { + Ok(guid) if !guid.trim().is_empty() => guid.trim().to_string(), Ok(_) => { tracing::warn!( "MachineGuid registry value was empty; falling back to persisted machine_uid" ); - persisted_uid() + return persisted_uid(); } Err(e) => { tracing::warn!( "Failed to read MachineGuid from registry ({e}); falling back to persisted machine_uid" ); - persisted_uid() + return persisted_uid(); + } + }; + + // Hardware salt (SPEC-016): SMBIOS system UUID if usable, else motherboard + + // disk serial. A box that yields no usable hardware salt still gets a stable + // uid from the MachineGuid alone (it survives reboot; an OS re-image would + // change it, but that is the unavoidable floor when no durable hardware signal + // is readable). We log WHICH signals fed the result for debugging WITHOUT + // emitting the secret values themselves. + let salt = hardware_salt(); + let (raw, source) = match &salt { + Some(s) => (format!("{machine_guid}|{s}"), "machineguid+hardware"), + None => (machine_guid, "machineguid-only"), + }; + tracing::info!("machine_uid derived from signals: {source}"); + derive_uid(&raw) +} + +/// Collect the stable hardware salt for the `machine_uid` (Windows only). +/// +/// Returns `Some(salt)` where `salt` is a deterministic, normalized concatenation +/// of usable hardware signals, or `None` when nothing durable is readable (in +/// which case the caller anchors on the MachineGuid alone). +/// +/// Order of preference, per SPEC-016 item 1: +/// 1. SMBIOS system UUID (`Win32_ComputerSystemProduct.UUID`) — when present and +/// not a degenerate placeholder (all-zeros / all-FFs, which some OEMs and +/// hypervisor templates emit). +/// 2. Fallback: motherboard serial (`Win32_BaseBoard.SerialNumber`) + primary +/// disk serial — combined so a single weak signal does not stand alone. +/// +/// Each component is read via a narrow PowerShell CIM query (see +/// [`query_cim_property`]); the values are normalized (trimmed, upper-cased) so +/// trivial formatting drift never changes the digest. +#[cfg(windows)] +fn hardware_salt() -> Option { + if let Some(uuid) = smbios_uuid() { + return Some(format!("smbios:{uuid}")); + } + + // SMBIOS UUID unusable — fall back to board + disk serial. Use whichever of + // the two are readable; require at least one to be present, otherwise there + // is no durable salt and we return None. + let board = normalize_signal(query_cim_property("Win32_BaseBoard", "SerialNumber").as_deref()); + let disk = primary_disk_serial(); + + match (board, disk) { + (Some(b), Some(d)) => Some(format!("board:{b}|disk:{d}")), + (Some(b), None) => Some(format!("board:{b}")), + (None, Some(d)) => Some(format!("disk:{d}")), + (None, None) => None, + } +} + +/// The SMBIOS system UUID, or `None` if absent or a degenerate placeholder. +/// +/// Some OEMs ship an all-zeros UUID and some hypervisor templates clone an +/// all-FFs (or all-zeros) UUID; either is worthless as a distinguishing signal, +/// so we reject both and let the caller fall back to board/disk serial. +#[cfg(windows)] +fn smbios_uuid() -> Option { + let raw = + normalize_signal(query_cim_property("Win32_ComputerSystemProduct", "UUID").as_deref())?; + + // Reject degenerate placeholders (ignoring dashes): all-zeros or all-FFs. + let hex: String = raw.chars().filter(|c| *c != '-').collect(); + let all_zero = !hex.is_empty() && hex.chars().all(|c| c == '0'); + let all_ff = !hex.is_empty() && hex.chars().all(|c| c == 'F'); + if hex.is_empty() || all_zero || all_ff { + tracing::debug!("SMBIOS UUID is absent or a degenerate placeholder; using fallback salt"); + return None; + } + Some(raw) +} + +/// The serial number of the primary (boot/index-0) physical disk, normalized. +/// +/// Prefers the disk whose `Index == 0` (the conventional boot disk); falls back +/// to the first disk that reports any serial. Returns `None` if no disk reports a +/// usable serial. +#[cfg(windows)] +fn primary_disk_serial() -> Option { + // One narrow query: index + serial for all physical disks, sorted by index, + // emitted as `indexserial` lines. Parse the lowest-index non-empty serial. + let script = "Get-CimInstance -ClassName Win32_DiskDrive | \ + Sort-Object Index | \ + ForEach-Object { \"$($_.Index)`t$($_.SerialNumber)\" }"; + let out = run_powershell(script)?; + for line in out.lines() { + let mut parts = line.splitn(2, '\t'); + let _index = parts.next(); + if let Some(serial) = parts.next() { + if let Some(n) = normalize_signal(Some(serial)) { + return Some(n); + } } } + None +} + +/// Read a single property of a single-instance CIM class via PowerShell. +/// +/// Returns the raw (untrimmed) first non-empty line of output, or `None`. This is +/// a deliberately narrow shell-out rather than a full WMI/COM binding: the agent +/// already has no WMI crate, and a COM `IWbemServices` binding for two scalar +/// reads would be far more code and unsafe surface for no benefit. PowerShell's +/// CIM cmdlets are present on every supported Windows target (7 SP1+/2008 R2+ +/// ship WMI; CIM cmdlets ship from PowerShell 3.0 / WMF 3.0, universally present +/// on currently-supported builds). +#[cfg(windows)] +fn query_cim_property(class: &str, property: &str) -> Option { + // `(Get-CimInstance -ClassName X).Property` — single scalar, no formatting. + let script = format!("(Get-CimInstance -ClassName {class}).{property}"); + let out = run_powershell(&script)?; + out.lines() + .map(str::trim) + .find(|l| !l.is_empty()) + .map(str::to_string) +} + +/// Run a short PowerShell snippet and capture stdout, or `None` on any failure. +/// +/// Hidden window (`CREATE_NO_WINDOW`) so an interactive desktop never flashes a +/// console; `-NonInteractive -NoProfile` for determinism and speed. Never logs +/// the captured output (it carries hardware identifiers). +#[cfg(windows)] +fn run_powershell(script: &str) -> Option { + use std::os::windows::process::CommandExt; + use std::process::Command; + + // CREATE_NO_WINDOW — avoid a console flash on the interactive desktop. + const CREATE_NO_WINDOW: u32 = 0x0800_0000; + + let output = Command::new("powershell.exe") + .args([ + "-NonInteractive", + "-NoProfile", + "-ExecutionPolicy", + "Bypass", + "-Command", + script, + ]) + .creation_flags(CREATE_NO_WINDOW) + .output(); + + match output { + Ok(o) if o.status.success() => { + let s = String::from_utf8_lossy(&o.stdout).trim().to_string(); + if s.is_empty() { + None + } else { + Some(s) + } + } + Ok(o) => { + tracing::debug!( + "hardware-signal query exited with status {:?}; ignoring this signal", + o.status.code() + ); + None + } + Err(e) => { + tracing::debug!("could not run hardware-signal query ({e}); ignoring this signal"); + None + } + } +} + +/// Normalize a raw hardware signal: trim, upper-case, drop if empty. Upper-casing +/// makes the digest stable against vendor case drift; trimming removes stray +/// whitespace WMI sometimes pads serials with. +#[cfg(windows)] +fn normalize_signal(raw: Option<&str>) -> Option { + let v = raw?.trim(); + if v.is_empty() { + return None; + } + Some(v.to_uppercase()) } #[cfg(not(windows))] @@ -297,4 +487,66 @@ mod tests { assert_eq!(a, b, "compute_machine_uid must be deterministic on Windows"); assert!(a.starts_with(MUID_PREFIX)); } + + /// The hardware-salted derivation is just `derive_uid` over a deterministic + /// concatenation, so identical signals MUST yield an identical uid and any + /// changed signal MUST change it. This pins the SPEC-016 determinism contract + /// independent of the (machine-specific) live hardware reads. + #[test] + fn salted_derivation_is_deterministic_and_signal_sensitive() { + let guid = "11111111-2222-3333-4444-555555555555"; + let with_smbios = derive_uid(&format!("{guid}|smbios:AAAA-BBBB")); + let with_smbios_again = derive_uid(&format!("{guid}|smbios:AAAA-BBBB")); + let with_board = derive_uid(&format!("{guid}|board:SN123|disk:DSK9")); + let guid_only = derive_uid(guid); + + // Same inputs -> same uid (re-image stability: MachineGuid changes on + // re-image but the hardware salt does not; here we hold inputs fixed). + assert_eq!(with_smbios, with_smbios_again); + // Different salt composition -> different uid (distinct boxes stay distinct). + assert_ne!(with_smbios, with_board); + assert_ne!(with_smbios, guid_only); + assert_ne!(with_board, guid_only); + } + + /// All-zero and all-FF SMBIOS UUIDs are degenerate placeholders that some OEMs + /// and hypervisor templates emit; the normalizer + placeholder check must + /// reject them so the derivation falls through to board/disk serial. We + /// exercise the rejection predicate directly (it is pure) rather than the + /// live WMI read. + #[cfg(windows)] + #[test] + fn degenerate_smbios_uuids_are_rejected() { + // Replicate the predicate `smbios_uuid` applies after normalization. + fn is_degenerate(raw: &str) -> bool { + let Some(norm) = normalize_signal(Some(raw)) else { + return true; + }; + let hex: String = norm.chars().filter(|c| *c != '-').collect(); + hex.is_empty() + || (!hex.is_empty() && hex.chars().all(|c| c == '0')) + || (!hex.is_empty() && hex.chars().all(|c| c == 'F')) + } + + assert!(is_degenerate("00000000-0000-0000-0000-000000000000")); + assert!(is_degenerate("FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF")); + assert!(is_degenerate("ffffffff-ffff-ffff-ffff-ffffffffffff")); // case-insensitive via normalize + assert!(is_degenerate(" ")); + // A real, mixed UUID is NOT degenerate. + assert!(!is_degenerate("4C4C4544-0043-3010-8052-B4C04F564231")); + } + + /// `normalize_signal` trims, upper-cases, and drops empties — so case/space + /// drift in a vendor serial never perturbs the digest. + #[cfg(windows)] + #[test] + fn normalize_signal_is_stable_against_drift() { + assert_eq!( + normalize_signal(Some(" abc123 ")), + Some("ABC123".to_string()) + ); + assert_eq!(normalize_signal(Some("ABC123")), Some("ABC123".to_string())); + assert_eq!(normalize_signal(Some(" ")), None); + assert_eq!(normalize_signal(None), None); + } }