//! Deterministic, recomputable machine identity (`machine_uid`). //! //! SPEC-004 / v2-stable-identity Task 1. //! //! `machine_uid()` returns a stable, opaque identifier for *this physical //! machine*. Unlike `agent_id` (a random UUID persisted in the config file, //! which mints a fresh value — and thus a duplicate server row — whenever the //! config is lost), `machine_uid` is **derived from the hardware/OS** and is //! **recomputable**: the same machine yields the same id on every call with no //! persistence required. //! //! - **Windows:** SHA-256 of a hardware identity string. The id is derived from //! the **hardware salt ONLY** whenever any durable hardware signal is readable: //! the **SMBIOS system UUID** (`Win32_ComputerSystemProduct.UUID`), or — when //! that is absent / all-zeros / all-FFs (some OEMs/hypervisors) — the //! **motherboard serial** (`Win32_BaseBoard.SerialNumber`) plus the **primary //! disk serial**. A fixed namespace string is mixed in for domain separation. //! The OS machine GUID //! (`HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid`, a `REG_SZ`) is used //! ONLY as a last-resort signal when NO hardware salt is readable. The raw //! signals are never returned — only the opaque `muid_` derived from them. //! - **Non-Windows (and Windows with no readable signal at all):** a random UUID //! persisted in the agent's data directory, read back on subsequent runs so it //! is stable across calls and process restarts. //! //! **Stability contract (SPEC-016 item 1):** //! - **Salted path (hardware signal present) is re-image-stable:** the digest //! mixes only durable hardware signals (SMBIOS UUID, or board + disk serial) and //! a fixed namespace — NOT the `MachineGuid`, which Windows regenerates on every //! OS install/re-image. So the `machine_uid` survives both a reboot AND an OS //! re-image on the SAME hardware (the re-image dedup goal), while distinct //! physical boxes stay distinct. //! - **MachineGuid-only path is the volatile floor:** when no hardware salt is //! readable, the id anchors on the `MachineGuid` alone. This is stable across //! reboots but NOT across a re-image (the GUID is regenerated). This degraded //! path is logged at WARN so the server-side collision gate operator has a clue. //! //! This module deliberately does NOT change `agent_id`/`generate_agent_id`. //! `machine_uid` is reported *alongside* `agent_id`; the server-side dedup that //! consumes it lives in `POST /api/enroll` (SPEC-016 Phase A) and the relay //! connect path. use std::sync::OnceLock; /// Prefix marking the value as an opaque machine-uid (vs. a raw GUID/UUID). const MUID_PREFIX: &str = "muid_"; /// Fixed namespace mixed into the hardware-salted derivation for domain /// separation: it ties the digest to *this* identity scheme so the same raw /// hardware serial can never collide with an unrelated digest, and it documents /// the derivation version. It is NOT a secret — it is a constant. const MUID_NAMESPACE: &str = "guruconnect:machine_uid:v1"; /// Cached value — `machine_uid()` reads the registry / a file, so compute once /// and reuse for the lifetime of the process. static MACHINE_UID: OnceLock = OnceLock::new(); /// Return a deterministic, recomputable opaque machine identifier. /// /// The result is non-empty and prefixed with [`MUID_PREFIX`]. It is cached after /// the first call. On Windows it is derived from a durable hardware salt when one /// is readable (re-image-stable; see the module docs), falling back to the OS /// machine GUID alone (reboot-stable floor) and finally — when no signal at all is /// readable, or on any non-Windows platform — a persisted random UUID, rather than /// panicking. pub fn machine_uid() -> String { MACHINE_UID.get_or_init(compute_machine_uid).clone() } /// Derive the opaque id from a raw machine-identity string via SHA-256. /// /// Returns `muid_`. Hashing makes the value /// opaque (the raw `MachineGuid` is never exposed) while staying fully /// deterministic for a given input. fn derive_uid(raw: &str) -> String { use sha2::{Digest, Sha256}; let mut hasher = Sha256::new(); hasher.update(raw.as_bytes()); let hash = hasher.finalize(); format!("{}{}", MUID_PREFIX, hex::encode(&hash[..16])) } #[cfg(windows)] fn compute_machine_uid() -> String { // PRIMARY signal (SPEC-016 item 1): a durable hardware salt — SMBIOS system // UUID if usable, else motherboard + disk serial. When ANY hardware salt is // readable we derive the uid from the salt ALONE (plus a fixed namespace), // deliberately EXCLUDING the MachineGuid: Windows regenerates the MachineGuid // on every OS install/re-image, so mixing it in would break re-image dedup. // The salted digest survives both reboot AND re-image on the same hardware. if let Some(salt) = hardware_salt() { tracing::info!("machine_uid derived from durable hardware salt (re-image-stable)"); return derive_uid(&format!("{MUID_NAMESPACE}|{salt}")); } // LAST-RESORT signal: no hardware salt is readable, so anchor on the OS // MachineGuid alone. This is the volatile FLOOR — stable across reboots but // NOT across an OS re-image (the GUID is regenerated). We WARN so the // server-side collision-gate operator knows this endpoint's uid is not // re-image-stable. The MachineGuid itself is never logged. match read_machine_guid() { Ok(guid) if !guid.trim().is_empty() => { tracing::warn!( "machine_uid: no durable hardware salt readable; anchoring on MachineGuid \ ONLY — this id is reboot-stable but NOT re-image-stable" ); derive_uid(&format!("{MUID_NAMESPACE}|machineguid:{}", guid.trim())) } Ok(_) => { tracing::warn!( "machine_uid: no hardware salt and MachineGuid registry value was empty; \ falling back to persisted machine_uid" ); persisted_uid() } Err(e) => { tracing::warn!( "machine_uid: no hardware salt and failed to read MachineGuid ({e}); \ falling back to persisted machine_uid" ); persisted_uid() } } } /// Collect the durable hardware salt for the `machine_uid` (Windows only). /// /// This is the PRIMARY identity signal: when it returns `Some(salt)`, the caller /// derives the uid from the salt ALONE (re-image-stable). Returns `Some(salt)` /// where `salt` is a deterministic, normalized concatenation of usable hardware /// signals, or `None` when nothing durable is readable (in which case the caller /// degrades to anchoring on the MachineGuid alone — the volatile floor). /// /// Order of preference, per SPEC-016 item 1: /// 1. SMBIOS system UUID (`Win32_ComputerSystemProduct.UUID`) — when present and /// not a degenerate placeholder (all-zeros / all-FFs, which some OEMs and /// hypervisor templates emit). /// 2. Fallback: motherboard serial (`Win32_BaseBoard.SerialNumber`) + primary /// disk serial — combined so a single weak signal does not stand alone. /// /// Each component is read via a narrow PowerShell CIM query (see /// [`query_cim_property`]); the values are normalized (trimmed, upper-cased) so /// trivial formatting drift never changes the digest. #[cfg(windows)] fn hardware_salt() -> Option { if let Some(uuid) = smbios_uuid() { return Some(format!("smbios:{uuid}")); } // SMBIOS UUID unusable — fall back to board + disk serial. Use whichever of // the two are readable; require at least one to be present, otherwise there // is no durable salt and we return None. let board = normalize_signal(query_cim_property("Win32_BaseBoard", "SerialNumber").as_deref()); let disk = primary_disk_serial(); match (board, disk) { (Some(b), Some(d)) => Some(format!("board:{b}|disk:{d}")), (Some(b), None) => Some(format!("board:{b}")), (None, Some(d)) => Some(format!("disk:{d}")), (None, None) => None, } } /// The SMBIOS system UUID, or `None` if absent or a degenerate placeholder. /// /// Some OEMs ship an all-zeros UUID and some hypervisor templates clone an /// all-FFs (or all-zeros) UUID; either is worthless as a distinguishing signal, /// so we reject both and let the caller fall back to board/disk serial. #[cfg(windows)] fn smbios_uuid() -> Option { let raw = normalize_signal(query_cim_property("Win32_ComputerSystemProduct", "UUID").as_deref())?; // Reject degenerate placeholders (ignoring dashes): all-zeros or all-FFs. let hex: String = raw.chars().filter(|c| *c != '-').collect(); let all_zero = !hex.is_empty() && hex.chars().all(|c| c == '0'); let all_ff = !hex.is_empty() && hex.chars().all(|c| c == 'F'); if hex.is_empty() || all_zero || all_ff { tracing::debug!("SMBIOS UUID is absent or a degenerate placeholder; using fallback salt"); return None; } Some(raw) } /// The serial number of the primary (boot/index-0) physical disk, normalized. /// /// Prefers the disk whose `Index == 0` (the conventional boot disk); falls back /// to the first disk that reports any serial. Returns `None` if no disk reports a /// usable serial. #[cfg(windows)] fn primary_disk_serial() -> Option { // One narrow query: index + serial for all physical disks, sorted by index, // emitted as `indexserial` lines. Parse the lowest-index non-empty serial. let script = "Get-CimInstance -ClassName Win32_DiskDrive | \ Sort-Object Index | \ ForEach-Object { \"$($_.Index)`t$($_.SerialNumber)\" }"; let out = run_powershell(script)?; for line in out.lines() { let mut parts = line.splitn(2, '\t'); let _index = parts.next(); if let Some(serial) = parts.next() { if let Some(n) = normalize_signal(Some(serial)) { return Some(n); } } } None } /// Read a single property of a single-instance CIM class via PowerShell. /// /// Returns the raw (untrimmed) first non-empty line of output, or `None`. This is /// a deliberately narrow shell-out rather than a full WMI/COM binding: the agent /// already has no WMI crate, and a COM `IWbemServices` binding for two scalar /// reads would be far more code and unsafe surface for no benefit. PowerShell's /// CIM cmdlets are present on every supported Windows target (7 SP1+/2008 R2+ /// ship WMI; CIM cmdlets ship from PowerShell 3.0 / WMF 3.0, universally present /// on currently-supported builds). #[cfg(windows)] fn query_cim_property(class: &str, property: &str) -> Option { // `(Get-CimInstance -ClassName X).Property` — single scalar, no formatting. let script = format!("(Get-CimInstance -ClassName {class}).{property}"); let out = run_powershell(&script)?; out.lines() .map(str::trim) .find(|l| !l.is_empty()) .map(str::to_string) } /// Wall-clock bound on a single PowerShell hardware-signal query. /// /// A wedged WMI/CIM provider can hang indefinitely; without a bound that would /// hang agent startup forever. On timeout we kill the child and treat the signal /// as missing (fall back through the chain) — never panic. #[cfg(windows)] const POWERSHELL_QUERY_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); /// Run a short PowerShell snippet and capture stdout, or `None` on any failure /// (including a wall-clock timeout). /// /// Hidden window (`CREATE_NO_WINDOW`) so an interactive desktop never flashes a /// console; `-NonInteractive -NoProfile` for determinism and speed. The call is /// spawned and waited on with a [`POWERSHELL_QUERY_TIMEOUT`] bound so a stuck WMI /// provider cannot wedge startup; on timeout the child is killed and the signal is /// treated as missing. Never logs the captured output (it carries hardware /// identifiers). #[cfg(windows)] fn run_powershell(script: &str) -> Option { use std::io::Read; use std::os::windows::process::CommandExt; use std::process::{Command, Stdio}; use std::time::Instant; // CREATE_NO_WINDOW — avoid a console flash on the interactive desktop. const CREATE_NO_WINDOW: u32 = 0x0800_0000; let mut child = match Command::new("powershell.exe") .args([ "-NonInteractive", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", script, ]) .stdin(Stdio::null()) .stdout(Stdio::piped()) .stderr(Stdio::null()) .creation_flags(CREATE_NO_WINDOW) .spawn() { Ok(c) => c, Err(e) => { tracing::debug!("could not run hardware-signal query ({e}); ignoring this signal"); return None; } }; // Poll for exit with a wall-clock bound. We spin with a short sleep rather than // a reader thread: the queries are infrequent (startup only) and the loop keeps // the timeout logic simple and panic-free. let deadline = Instant::now() + POWERSHELL_QUERY_TIMEOUT; let status = loop { match child.try_wait() { Ok(Some(status)) => break status, Ok(None) => { if Instant::now() >= deadline { // Wedged provider: kill and treat as a missing signal. let _ = child.kill(); let _ = child.wait(); tracing::debug!( "hardware-signal query exceeded {}s timeout; killed and ignoring this signal", POWERSHELL_QUERY_TIMEOUT.as_secs() ); return None; } std::thread::sleep(std::time::Duration::from_millis(50)); } Err(e) => { tracing::debug!("error waiting on hardware-signal query ({e}); ignoring"); let _ = child.kill(); let _ = child.wait(); return None; } } }; if !status.success() { tracing::debug!( "hardware-signal query exited with status {:?}; ignoring this signal", status.code() ); return None; } // The process exited; drain its captured stdout. let mut buf = Vec::new(); if let Some(mut out) = child.stdout.take() { if let Err(e) = out.read_to_end(&mut buf) { tracing::debug!("error reading hardware-signal query output ({e}); ignoring"); return None; } } let s = String::from_utf8_lossy(&buf).trim().to_string(); if s.is_empty() { None } else { Some(s) } } /// Normalize a raw hardware signal: trim, upper-case, drop if empty. Upper-casing /// makes the digest stable against vendor case drift; trimming removes stray /// whitespace WMI sometimes pads serials with. #[cfg(windows)] fn normalize_signal(raw: Option<&str>) -> Option { let v = raw?.trim(); if v.is_empty() { return None; } Some(v.to_uppercase()) } #[cfg(not(windows))] fn compute_machine_uid() -> String { // No OS machine GUID available — use the persisted random UUID, hashed for a // uniform opaque shape with the Windows path. persisted_uid() } /// Read `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` (REG_SZ). /// /// Uses `RegGetValueW`, which opens, queries, null-terminates, and (with /// `RRF_RT_REG_SZ`) type-checks the value in one call. #[cfg(windows)] fn read_machine_guid() -> anyhow::Result { use anyhow::{anyhow, Context}; use windows::core::PCWSTR; use windows::Win32::Foundation::ERROR_SUCCESS; use windows::Win32::System::Registry::{RegGetValueW, HKEY_LOCAL_MACHINE, RRF_RT_REG_SZ}; fn to_wide(s: &str) -> Vec { s.encode_utf16().chain(std::iter::once(0)).collect() } let subkey = to_wide(r"SOFTWARE\Microsoft\Cryptography"); let value = to_wide("MachineGuid"); unsafe { // First query the required buffer size (in bytes). let mut size: u32 = 0; let status = RegGetValueW( HKEY_LOCAL_MACHINE, PCWSTR(subkey.as_ptr()), PCWSTR(value.as_ptr()), RRF_RT_REG_SZ, None, None, Some(&mut size), ); if status != ERROR_SUCCESS { return Err(anyhow!("RegGetValueW(size) failed: {:?}", status)); } if size == 0 { return Err(anyhow!("MachineGuid reported zero length")); } // `size` is bytes; allocate a u16 buffer large enough to hold it. let len_u16 = size.div_ceil(2) as usize; let mut buffer = vec![0u16; len_u16]; let mut size_out = size; let status = RegGetValueW( HKEY_LOCAL_MACHINE, PCWSTR(subkey.as_ptr()), PCWSTR(value.as_ptr()), RRF_RT_REG_SZ, None, Some(buffer.as_mut_ptr() as *mut _), Some(&mut size_out), ); if status != ERROR_SUCCESS { return Err(anyhow!("RegGetValueW(read) failed: {:?}", status)); } // Trim the trailing NUL(s) that RegGetValueW guarantees. let chars = size_out as usize / 2; let slice = &buffer[..chars.min(buffer.len())]; let end = slice.iter().position(|&c| c == 0).unwrap_or(slice.len()); String::from_utf16(&slice[..end]).context("MachineGuid was not valid UTF-16") } } /// Read (or, on first use, generate and persist) a random UUID, then derive the /// opaque id from it. This is the fallback identity: stable across calls and /// process restarts because it is persisted to disk. fn persisted_uid() -> String { let path = fallback_uid_path(); // Try to read an existing value. if let Some(ref p) = path { if let Ok(contents) = std::fs::read_to_string(p) { let trimmed = contents.trim(); if !trimmed.is_empty() { return derive_uid(trimmed); } } } // Generate a new random seed and persist it (best-effort). let seed = uuid::Uuid::new_v4().to_string(); if let Some(ref p) = path { if let Some(parent) = p.parent() { let _ = std::fs::create_dir_all(parent); } if let Err(e) = std::fs::write(p, &seed) { tracing::warn!( "Could not persist fallback machine_uid seed to {:?} ({e}); \ id will be stable for this process only", p ); } } else { tracing::warn!( "No writable data directory for fallback machine_uid seed; \ id will be stable for this process only" ); } derive_uid(&seed) } /// Location of the persisted fallback seed file. /// /// - **Windows:** `%ProgramData%\GuruConnect\machine_uid` (mirrors the agent /// config location), used only when the registry read fails. /// - **Non-Windows:** `$XDG_DATA_HOME/guruconnect/machine_uid`, falling back to /// `$HOME/.local/share/guruconnect/machine_uid`, then a temp-dir path. fn fallback_uid_path() -> Option { #[cfg(windows)] { if let Ok(program_data) = std::env::var("ProgramData") { return Some( std::path::PathBuf::from(program_data) .join("GuruConnect") .join("machine_uid"), ); } } #[cfg(not(windows))] { if let Ok(xdg) = std::env::var("XDG_DATA_HOME") { if !xdg.is_empty() { return Some( std::path::PathBuf::from(xdg) .join("guruconnect") .join("machine_uid"), ); } } if let Ok(home) = std::env::var("HOME") { if !home.is_empty() { return Some( std::path::PathBuf::from(home) .join(".local") .join("share") .join("guruconnect") .join("machine_uid"), ); } } } // Last resort: a stable name in the system temp dir. Some(std::env::temp_dir().join("guruconnect_machine_uid")) } #[cfg(test)] mod tests { use super::*; #[test] fn machine_uid_is_non_empty_and_prefixed() { let uid = machine_uid(); assert!(!uid.is_empty(), "machine_uid must not be empty"); assert!( uid.starts_with(MUID_PREFIX), "machine_uid must start with {MUID_PREFIX}: got {uid}" ); // muid_ + 16 bytes hex (32 chars). assert_eq!( uid.len(), MUID_PREFIX.len() + 32, "unexpected machine_uid length: {uid}" ); assert!( uid[MUID_PREFIX.len()..] .chars() .all(|c| c.is_ascii_hexdigit()), "machine_uid suffix must be lowercase hex: {uid}" ); } #[test] fn machine_uid_is_deterministic_across_calls() { // The cached public API must be stable. assert_eq!(machine_uid(), machine_uid()); } #[test] fn derive_uid_is_deterministic() { // Same input -> same output; different input -> different output. let a = derive_uid("the-same-input"); let b = derive_uid("the-same-input"); let c = derive_uid("a-different-input"); assert_eq!(a, b); assert_ne!(a, c); assert!(a.starts_with(MUID_PREFIX)); } /// The non-Windows fallback must be stable across calls because it persists /// its seed. We exercise `persisted_uid()` directly (the public `machine_uid` /// is cached, so it cannot demonstrate persistence on its own). #[test] fn persisted_uid_is_stable_across_calls() { let first = persisted_uid(); let second = persisted_uid(); assert_eq!( first, second, "persisted fallback uid must be stable across calls" ); assert!(first.starts_with(MUID_PREFIX)); } /// On Windows specifically, the registry-derived path must be deterministic: /// reading the same `MachineGuid` twice yields the same uid. #[cfg(windows)] #[test] fn windows_machine_guid_path_is_deterministic() { // If the registry read succeeds, two reads must agree and the derived // uid must match. If it fails (unusual), the test still validates the // fallback determinism via compute_machine_uid(). let a = compute_machine_uid(); let b = compute_machine_uid(); assert_eq!(a, b, "compute_machine_uid must be deterministic on Windows"); assert!(a.starts_with(MUID_PREFIX)); } /// Pin the EXACT derivation strings that `compute_machine_uid` builds, so these /// pure-function tests track the production logic. Keep in lock-step with /// `compute_machine_uid`. #[cfg(windows)] fn salted_uid(salt: &str) -> String { derive_uid(&format!("{MUID_NAMESPACE}|{salt}")) } #[cfg(windows)] fn machineguid_only_uid(guid: &str) -> String { derive_uid(&format!("{MUID_NAMESPACE}|machineguid:{guid}")) } /// H1 RE-IMAGE STABILITY: when a hardware salt is present, the uid is derived /// from the salt ALONE — the MachineGuid is NOT part of the input. So holding /// the hardware signals fixed while varying the MachineGuid MUST yield the SAME /// uid. This is exactly the re-image case: an OS re-image regenerates the /// MachineGuid but leaves SMBIOS UUID / board+disk serial unchanged, and the /// machine_uid must not move (otherwise dedup breaks). We prove it by showing /// the salted derivation has no MachineGuid term to vary. #[cfg(windows)] #[test] fn salted_uid_is_reimage_stable_independent_of_machine_guid() { let salt = "smbios:4C4C4544-0043-3010-8052-B4C04F564231"; // "Before re-image" and "after re-image": MachineGuid differs, but the // salt-derived uid takes no MachineGuid input, so both are identical. let before = salted_uid(salt); let after = salted_uid(salt); assert_eq!( before, after, "salted uid must be stable across a re-image (no MachineGuid term)" ); // Contrast: the MachineGuid-only floor DOES move when the GUID changes — // demonstrating WHY the salted path must exclude it for re-image stability. let guid_a = machineguid_only_uid("11111111-2222-3333-4444-555555555555"); let guid_b = machineguid_only_uid("99999999-8888-7777-6666-555555555555"); assert_ne!( guid_a, guid_b, "MachineGuid-only floor is volatile across re-image (expected)" ); // And the salted uid must differ from the MachineGuid-only floor for the // same box: the two derivation paths are domain-separated. assert_ne!(before, guid_a); } /// The hardware-salted derivation is `derive_uid` over a deterministic, /// namespaced concatenation: identical signals MUST yield an identical uid and /// any changed signal MUST change it. Pins the SPEC-016 determinism contract /// independent of the (machine-specific) live hardware reads. #[cfg(windows)] #[test] fn salted_derivation_is_deterministic_and_signal_sensitive() { let with_smbios = salted_uid("smbios:AAAA-BBBB"); let with_smbios_again = salted_uid("smbios:AAAA-BBBB"); let with_board = salted_uid("board:SN123|disk:DSK9"); // Same inputs -> same uid. assert_eq!(with_smbios, with_smbios_again); // Different salt composition -> different uid (distinct boxes stay distinct). assert_ne!(with_smbios, with_board); } /// All-zero and all-FF SMBIOS UUIDs are degenerate placeholders that some OEMs /// and hypervisor templates emit; the normalizer + placeholder check must /// reject them so the derivation falls through to board/disk serial. We /// exercise the rejection predicate directly (it is pure) rather than the /// live WMI read. #[cfg(windows)] #[test] fn degenerate_smbios_uuids_are_rejected() { // Replicate the predicate `smbios_uuid` applies after normalization. fn is_degenerate(raw: &str) -> bool { let Some(norm) = normalize_signal(Some(raw)) else { return true; }; let hex: String = norm.chars().filter(|c| *c != '-').collect(); hex.is_empty() || (!hex.is_empty() && hex.chars().all(|c| c == '0')) || (!hex.is_empty() && hex.chars().all(|c| c == 'F')) } assert!(is_degenerate("00000000-0000-0000-0000-000000000000")); assert!(is_degenerate("FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF")); assert!(is_degenerate("ffffffff-ffff-ffff-ffff-ffffffffffff")); // case-insensitive via normalize assert!(is_degenerate(" ")); // A real, mixed UUID is NOT degenerate. assert!(!is_degenerate("4C4C4544-0043-3010-8052-B4C04F564231")); } /// `normalize_signal` trims, upper-cases, and drops empties — so case/space /// drift in a vendor serial never perturbs the digest. #[cfg(windows)] #[test] fn normalize_signal_is_stable_against_drift() { assert_eq!( normalize_signal(Some(" abc123 ")), Some("ABC123".to_string()) ); assert_eq!(normalize_signal(Some("ABC123")), Some("ABC123".to_string())); assert_eq!(normalize_signal(Some(" ")), None); assert_eq!(normalize_signal(None), None); } }