From b3e8f32734cff1c0de12d09568321783b48efe78 Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Sat, 30 May 2026 21:23:11 -0700 Subject: [PATCH] feat(agent): derive + report deterministic machine_uid (SPEC-004 Task 1) Agent now derives a recomputable, opaque machine_uid (Windows: SHA-256 of the OS MachineGuid at HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid -> muid_; non-Windows / registry-failure: persisted random UUID, warn-logged). Raw GUID never exposed; OnceLock-cached. Reported ALONGSIDE agent_id (unchanged) on AgentStatus (new additive proto field 12) and in the connect handshake query. This is the stable identity that fixes config-loss duplicate registrations (DESKTOP-I66IM5Q x9); server-side dedup keying that consumes it is SPEC-004 Task 2. Non-breaking, isolated. 5 unit tests; cargo fmt/clippy(-D warnings)/test green on GURU-5070. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 1 + agent/Cargo.toml | 1 + agent/src/identity.rs | 300 +++++++++++++++++++++++++++++++ agent/src/main.rs | 1 + agent/src/session/mod.rs | 8 + agent/src/transport/websocket.rs | 13 +- proto/guruconnect.proto | 8 + 7 files changed, 331 insertions(+), 1 deletion(-) create mode 100644 agent/src/identity.rs diff --git a/Cargo.lock b/Cargo.lock index 578f8fc..58d8190 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1414,6 +1414,7 @@ dependencies = [ "chrono", "clap", "futures-util", + "hex", "hostname", "image", "muda", diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 5124937..e92093d 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -47,6 +47,7 @@ toml = "0.8" # Crypto ring = "0.17" sha2 = "0.10" +hex = "0.4" # HTTP client for updates reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream", "json"] } diff --git a/agent/src/identity.rs b/agent/src/identity.rs new file mode 100644 index 0000000..eb1f226 --- /dev/null +++ b/agent/src/identity.rs @@ -0,0 +1,300 @@ +//! Deterministic, recomputable machine identity (`machine_uid`). +//! +//! SPEC-004 / v2-stable-identity Task 1. +//! +//! `machine_uid()` returns a stable, opaque identifier for *this physical +//! machine*. Unlike `agent_id` (a random UUID persisted in the config file, +//! which mints a fresh value — and thus a duplicate server row — whenever the +//! config is lost), `machine_uid` is **derived from the hardware/OS** and is +//! **recomputable**: the same machine yields the same id on every call with no +//! persistence required. +//! +//! - **Windows:** SHA-256 hash of the OS machine GUID read from +//! `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` (a `REG_SZ`). The raw +//! GUID is never returned — only the opaque `muid_` derived from it. +//! - **Non-Windows (and Windows registry failure):** a random UUID persisted in +//! the agent's data directory, read back on subsequent runs so it is stable +//! across calls and process restarts. +//! +//! This module deliberately does NOT change `agent_id`/`generate_agent_id`. +//! `machine_uid` is reported *alongside* `agent_id`; the server-side dedup that +//! consumes it is a separate task. + +use std::sync::OnceLock; + +/// Prefix marking the value as an opaque machine-uid (vs. a raw GUID/UUID). +const MUID_PREFIX: &str = "muid_"; + +/// Cached value — `machine_uid()` reads the registry / a file, so compute once +/// and reuse for the lifetime of the process. +static MACHINE_UID: OnceLock = OnceLock::new(); + +/// Return a deterministic, recomputable opaque machine identifier. +/// +/// The result is non-empty and prefixed with [`MUID_PREFIX`]. It is cached after +/// the first call. On Windows it is derived purely from the OS machine GUID (no +/// persistence). If the Windows registry read fails — or on any non-Windows +/// platform — it degrades to a persisted random UUID (today's-behavior-equivalent +/// stability) rather than panicking. +pub fn machine_uid() -> String { + MACHINE_UID.get_or_init(compute_machine_uid).clone() +} + +/// Derive the opaque id from a raw machine-identity string via SHA-256. +/// +/// Returns `muid_`. Hashing makes the value +/// opaque (the raw `MachineGuid` is never exposed) while staying fully +/// deterministic for a given input. +fn derive_uid(raw: &str) -> String { + use sha2::{Digest, Sha256}; + + let mut hasher = Sha256::new(); + hasher.update(raw.as_bytes()); + let hash = hasher.finalize(); + format!("{}{}", MUID_PREFIX, hex::encode(&hash[..16])) +} + +#[cfg(windows)] +fn compute_machine_uid() -> String { + match read_machine_guid() { + Ok(guid) if !guid.trim().is_empty() => derive_uid(guid.trim()), + Ok(_) => { + tracing::warn!( + "MachineGuid registry value was empty; falling back to persisted machine_uid" + ); + persisted_uid() + } + Err(e) => { + tracing::warn!( + "Failed to read MachineGuid from registry ({e}); falling back to persisted machine_uid" + ); + persisted_uid() + } + } +} + +#[cfg(not(windows))] +fn compute_machine_uid() -> String { + // No OS machine GUID available — use the persisted random UUID, hashed for a + // uniform opaque shape with the Windows path. + persisted_uid() +} + +/// Read `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` (REG_SZ). +/// +/// Uses `RegGetValueW`, which opens, queries, null-terminates, and (with +/// `RRF_RT_REG_SZ`) type-checks the value in one call. +#[cfg(windows)] +fn read_machine_guid() -> anyhow::Result { + use anyhow::{anyhow, Context}; + use windows::core::PCWSTR; + use windows::Win32::Foundation::ERROR_SUCCESS; + use windows::Win32::System::Registry::{RegGetValueW, HKEY_LOCAL_MACHINE, RRF_RT_REG_SZ}; + + fn to_wide(s: &str) -> Vec { + s.encode_utf16().chain(std::iter::once(0)).collect() + } + + let subkey = to_wide(r"SOFTWARE\Microsoft\Cryptography"); + let value = to_wide("MachineGuid"); + + unsafe { + // First query the required buffer size (in bytes). + let mut size: u32 = 0; + let status = RegGetValueW( + HKEY_LOCAL_MACHINE, + PCWSTR(subkey.as_ptr()), + PCWSTR(value.as_ptr()), + RRF_RT_REG_SZ, + None, + None, + Some(&mut size), + ); + if status != ERROR_SUCCESS { + return Err(anyhow!("RegGetValueW(size) failed: {:?}", status)); + } + if size == 0 { + return Err(anyhow!("MachineGuid reported zero length")); + } + + // `size` is bytes; allocate a u16 buffer large enough to hold it. + let len_u16 = size.div_ceil(2) as usize; + let mut buffer = vec![0u16; len_u16]; + let mut size_out = size; + let status = RegGetValueW( + HKEY_LOCAL_MACHINE, + PCWSTR(subkey.as_ptr()), + PCWSTR(value.as_ptr()), + RRF_RT_REG_SZ, + None, + Some(buffer.as_mut_ptr() as *mut _), + Some(&mut size_out), + ); + if status != ERROR_SUCCESS { + return Err(anyhow!("RegGetValueW(read) failed: {:?}", status)); + } + + // Trim the trailing NUL(s) that RegGetValueW guarantees. + let chars = size_out as usize / 2; + let slice = &buffer[..chars.min(buffer.len())]; + let end = slice.iter().position(|&c| c == 0).unwrap_or(slice.len()); + String::from_utf16(&slice[..end]).context("MachineGuid was not valid UTF-16") + } +} + +/// Read (or, on first use, generate and persist) a random UUID, then derive the +/// opaque id from it. This is the fallback identity: stable across calls and +/// process restarts because it is persisted to disk. +fn persisted_uid() -> String { + let path = fallback_uid_path(); + + // Try to read an existing value. + if let Some(ref p) = path { + if let Ok(contents) = std::fs::read_to_string(p) { + let trimmed = contents.trim(); + if !trimmed.is_empty() { + return derive_uid(trimmed); + } + } + } + + // Generate a new random seed and persist it (best-effort). + let seed = uuid::Uuid::new_v4().to_string(); + if let Some(ref p) = path { + if let Some(parent) = p.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Err(e) = std::fs::write(p, &seed) { + tracing::warn!( + "Could not persist fallback machine_uid seed to {:?} ({e}); \ + id will be stable for this process only", + p + ); + } + } else { + tracing::warn!( + "No writable data directory for fallback machine_uid seed; \ + id will be stable for this process only" + ); + } + + derive_uid(&seed) +} + +/// Location of the persisted fallback seed file. +/// +/// - **Windows:** `%ProgramData%\GuruConnect\machine_uid` (mirrors the agent +/// config location), used only when the registry read fails. +/// - **Non-Windows:** `$XDG_DATA_HOME/guruconnect/machine_uid`, falling back to +/// `$HOME/.local/share/guruconnect/machine_uid`, then a temp-dir path. +fn fallback_uid_path() -> Option { + #[cfg(windows)] + { + if let Ok(program_data) = std::env::var("ProgramData") { + return Some( + std::path::PathBuf::from(program_data) + .join("GuruConnect") + .join("machine_uid"), + ); + } + } + + #[cfg(not(windows))] + { + if let Ok(xdg) = std::env::var("XDG_DATA_HOME") { + if !xdg.is_empty() { + return Some( + std::path::PathBuf::from(xdg) + .join("guruconnect") + .join("machine_uid"), + ); + } + } + if let Ok(home) = std::env::var("HOME") { + if !home.is_empty() { + return Some( + std::path::PathBuf::from(home) + .join(".local") + .join("share") + .join("guruconnect") + .join("machine_uid"), + ); + } + } + } + + // Last resort: a stable name in the system temp dir. + Some(std::env::temp_dir().join("guruconnect_machine_uid")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn machine_uid_is_non_empty_and_prefixed() { + let uid = machine_uid(); + assert!(!uid.is_empty(), "machine_uid must not be empty"); + assert!( + uid.starts_with(MUID_PREFIX), + "machine_uid must start with {MUID_PREFIX}: got {uid}" + ); + // muid_ + 16 bytes hex (32 chars). + assert_eq!( + uid.len(), + MUID_PREFIX.len() + 32, + "unexpected machine_uid length: {uid}" + ); + assert!( + uid[MUID_PREFIX.len()..] + .chars() + .all(|c| c.is_ascii_hexdigit()), + "machine_uid suffix must be lowercase hex: {uid}" + ); + } + + #[test] + fn machine_uid_is_deterministic_across_calls() { + // The cached public API must be stable. + assert_eq!(machine_uid(), machine_uid()); + } + + #[test] + fn derive_uid_is_deterministic() { + // Same input -> same output; different input -> different output. + let a = derive_uid("the-same-input"); + let b = derive_uid("the-same-input"); + let c = derive_uid("a-different-input"); + assert_eq!(a, b); + assert_ne!(a, c); + assert!(a.starts_with(MUID_PREFIX)); + } + + /// The non-Windows fallback must be stable across calls because it persists + /// its seed. We exercise `persisted_uid()` directly (the public `machine_uid` + /// is cached, so it cannot demonstrate persistence on its own). + #[test] + fn persisted_uid_is_stable_across_calls() { + let first = persisted_uid(); + let second = persisted_uid(); + assert_eq!( + first, second, + "persisted fallback uid must be stable across calls" + ); + assert!(first.starts_with(MUID_PREFIX)); + } + + /// On Windows specifically, the registry-derived path must be deterministic: + /// reading the same `MachineGuid` twice yields the same uid. + #[cfg(windows)] + #[test] + fn windows_machine_guid_path_is_deterministic() { + // If the registry read succeeds, two reads must agree and the derived + // uid must match. If it fails (unusual), the test still validates the + // fallback determinism via compute_machine_uid(). + let a = compute_machine_uid(); + let b = compute_machine_uid(); + assert_eq!(a, b, "compute_machine_uid must be deterministic on Windows"); + assert!(a.starts_with(MUID_PREFIX)); + } +} diff --git a/agent/src/main.rs b/agent/src/main.rs index f6a7830..a8fd11f 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -17,6 +17,7 @@ mod chat; mod config; mod consent; mod encoder; +mod identity; mod input; mod install; mod sas_client; diff --git a/agent/src/session/mod.rs b/agent/src/session/mod.rs index a2f7587..5e40185 100644 --- a/agent/src/session/mod.rs +++ b/agent/src/session/mod.rs @@ -103,12 +103,16 @@ impl SessionManager { pub async fn connect(&mut self) -> Result<()> { self.state = SessionState::Connecting; + // Deterministic, recomputable identity reported alongside agent_id + // (v2 stable-identity Task 1). Cached after the first call. + let machine_uid = crate::identity::machine_uid(); let transport = WebSocketTransport::connect( &self.config.server_url, &self.config.agent_id, &self.config.api_key, Some(&self.hostname), self.config.support_code.as_deref(), + Some(&machine_uid), ) .await?; @@ -247,6 +251,10 @@ impl SessionManager { // Advertise hardware H.264 capability so the server can negotiate the // codec (Task 7). Detected once and cached by the encoder module. supports_h264: encoder::supports_hardware_h264(), + // Deterministic, recomputable hardware identity (v2 stable-identity + // Task 1). Reported alongside the unchanged random agent_id; cached + // after the first (registry) read. + machine_uid: crate::identity::machine_uid(), }; let msg = Message { diff --git a/agent/src/transport/websocket.rs b/agent/src/transport/websocket.rs index 1a970ee..f924921 100644 --- a/agent/src/transport/websocket.rs +++ b/agent/src/transport/websocket.rs @@ -35,14 +35,25 @@ impl WebSocketTransport { api_key: &str, hostname: Option<&str>, support_code: Option<&str>, + machine_uid: Option<&str>, ) -> Result { - // Build query parameters + // Build query parameters. agent_id + api_key are kept exactly as-is; + // machine_uid is appended ALONGSIDE them (v2 stable-identity Task 1) so + // the server sees the deterministic identity at connect time. It does not + // change registration keying (a separate server-side task). let mut params = format!("agent_id={}&api_key={}", agent_id, api_key); if let Some(hostname) = hostname { params.push_str(&format!("&hostname={}", urlencoding::encode(hostname))); } + if let Some(machine_uid) = machine_uid { + params.push_str(&format!( + "&machine_uid={}", + urlencoding::encode(machine_uid) + )); + } + if let Some(code) = support_code { params.push_str(&format!("&support_code={}", code)); } diff --git a/proto/guruconnect.proto b/proto/guruconnect.proto index 842040d..81262aa 100644 --- a/proto/guruconnect.proto +++ b/proto/guruconnect.proto @@ -317,6 +317,14 @@ message AgentStatus { // negotiation (see StartStream.video_codec). Detected once and cached; // false on non-Windows / no HW encoder / MF unavailable. bool supports_h264 = 11; + // Deterministic, recomputable hardware identity (v2 stable-identity Task 1). + // Opaque "muid_" derived by SHA-256 hashing the OS machine GUID + // (Windows: HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid); non-Windows / + // registry-failure falls back to a persisted random UUID. Reported ALONGSIDE + // agent_id (which is unchanged). The server-side dedup that consumes this is a + // separate task; until then it is informational. Empty only if the agent + // predates this field. + string machine_uid = 12; } // Server commands agent to uninstall itself