feat(agent): derive + report deterministic machine_uid (SPEC-004 Task 1)
All checks were successful
Build and Test / Build Agent (Windows) (push) Successful in 7m4s
Build and Test / Build Server (Linux) (push) Successful in 9m41s
Build and Test / Security Audit (push) Successful in 4m11s
Build and Test / Build Summary (push) Successful in 10s

Agent now derives a recomputable, opaque machine_uid (Windows: SHA-256 of the OS
MachineGuid at HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid -> muid_<hex>;
non-Windows / registry-failure: persisted random UUID, warn-logged). Raw GUID
never exposed; OnceLock-cached. Reported ALONGSIDE agent_id (unchanged) on
AgentStatus (new additive proto field 12) and in the connect handshake query.
This is the stable identity that fixes config-loss duplicate registrations
(DESKTOP-I66IM5Q x9); server-side dedup keying that consumes it is SPEC-004
Task 2. Non-breaking, isolated. 5 unit tests; cargo fmt/clippy(-D warnings)/test
green on GURU-5070.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 21:23:11 -07:00
parent 92bc522c3a
commit b3e8f32734
7 changed files with 331 additions and 1 deletions

1
Cargo.lock generated
View File

@@ -1414,6 +1414,7 @@ dependencies = [
"chrono",
"clap",
"futures-util",
"hex",
"hostname",
"image",
"muda",

View File

@@ -47,6 +47,7 @@ toml = "0.8"
# Crypto
ring = "0.17"
sha2 = "0.10"
hex = "0.4"
# HTTP client for updates
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream", "json"] }

300
agent/src/identity.rs Normal file
View File

@@ -0,0 +1,300 @@
//! Deterministic, recomputable machine identity (`machine_uid`).
//!
//! SPEC-004 / v2-stable-identity Task 1.
//!
//! `machine_uid()` returns a stable, opaque identifier for *this physical
//! machine*. Unlike `agent_id` (a random UUID persisted in the config file,
//! which mints a fresh value — and thus a duplicate server row — whenever the
//! config is lost), `machine_uid` is **derived from the hardware/OS** and is
//! **recomputable**: the same machine yields the same id on every call with no
//! persistence required.
//!
//! - **Windows:** SHA-256 hash of the OS machine GUID read from
//! `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` (a `REG_SZ`). The raw
//! GUID is never returned — only the opaque `muid_<hex>` derived from it.
//! - **Non-Windows (and Windows registry failure):** a random UUID persisted in
//! the agent's data directory, read back on subsequent runs so it is stable
//! across calls and process restarts.
//!
//! This module deliberately does NOT change `agent_id`/`generate_agent_id`.
//! `machine_uid` is reported *alongside* `agent_id`; the server-side dedup that
//! consumes it is a separate task.
use std::sync::OnceLock;
/// Prefix marking the value as an opaque machine-uid (vs. a raw GUID/UUID).
const MUID_PREFIX: &str = "muid_";
/// Cached value — `machine_uid()` reads the registry / a file, so compute once
/// and reuse for the lifetime of the process.
static MACHINE_UID: OnceLock<String> = OnceLock::new();
/// Return a deterministic, recomputable opaque machine identifier.
///
/// The result is non-empty and prefixed with [`MUID_PREFIX`]. It is cached after
/// the first call. On Windows it is derived purely from the OS machine GUID (no
/// persistence). If the Windows registry read fails — or on any non-Windows
/// platform — it degrades to a persisted random UUID (today's-behavior-equivalent
/// stability) rather than panicking.
pub fn machine_uid() -> String {
MACHINE_UID.get_or_init(compute_machine_uid).clone()
}
/// Derive the opaque id from a raw machine-identity string via SHA-256.
///
/// Returns `muid_<first-16-bytes-of-sha256, hex>`. Hashing makes the value
/// opaque (the raw `MachineGuid` is never exposed) while staying fully
/// deterministic for a given input.
fn derive_uid(raw: &str) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(raw.as_bytes());
let hash = hasher.finalize();
format!("{}{}", MUID_PREFIX, hex::encode(&hash[..16]))
}
#[cfg(windows)]
fn compute_machine_uid() -> String {
match read_machine_guid() {
Ok(guid) if !guid.trim().is_empty() => derive_uid(guid.trim()),
Ok(_) => {
tracing::warn!(
"MachineGuid registry value was empty; falling back to persisted machine_uid"
);
persisted_uid()
}
Err(e) => {
tracing::warn!(
"Failed to read MachineGuid from registry ({e}); falling back to persisted machine_uid"
);
persisted_uid()
}
}
}
#[cfg(not(windows))]
fn compute_machine_uid() -> String {
// No OS machine GUID available — use the persisted random UUID, hashed for a
// uniform opaque shape with the Windows path.
persisted_uid()
}
/// Read `HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid` (REG_SZ).
///
/// Uses `RegGetValueW`, which opens, queries, null-terminates, and (with
/// `RRF_RT_REG_SZ`) type-checks the value in one call.
#[cfg(windows)]
fn read_machine_guid() -> anyhow::Result<String> {
use anyhow::{anyhow, Context};
use windows::core::PCWSTR;
use windows::Win32::Foundation::ERROR_SUCCESS;
use windows::Win32::System::Registry::{RegGetValueW, HKEY_LOCAL_MACHINE, RRF_RT_REG_SZ};
fn to_wide(s: &str) -> Vec<u16> {
s.encode_utf16().chain(std::iter::once(0)).collect()
}
let subkey = to_wide(r"SOFTWARE\Microsoft\Cryptography");
let value = to_wide("MachineGuid");
unsafe {
// First query the required buffer size (in bytes).
let mut size: u32 = 0;
let status = RegGetValueW(
HKEY_LOCAL_MACHINE,
PCWSTR(subkey.as_ptr()),
PCWSTR(value.as_ptr()),
RRF_RT_REG_SZ,
None,
None,
Some(&mut size),
);
if status != ERROR_SUCCESS {
return Err(anyhow!("RegGetValueW(size) failed: {:?}", status));
}
if size == 0 {
return Err(anyhow!("MachineGuid reported zero length"));
}
// `size` is bytes; allocate a u16 buffer large enough to hold it.
let len_u16 = size.div_ceil(2) as usize;
let mut buffer = vec![0u16; len_u16];
let mut size_out = size;
let status = RegGetValueW(
HKEY_LOCAL_MACHINE,
PCWSTR(subkey.as_ptr()),
PCWSTR(value.as_ptr()),
RRF_RT_REG_SZ,
None,
Some(buffer.as_mut_ptr() as *mut _),
Some(&mut size_out),
);
if status != ERROR_SUCCESS {
return Err(anyhow!("RegGetValueW(read) failed: {:?}", status));
}
// Trim the trailing NUL(s) that RegGetValueW guarantees.
let chars = size_out as usize / 2;
let slice = &buffer[..chars.min(buffer.len())];
let end = slice.iter().position(|&c| c == 0).unwrap_or(slice.len());
String::from_utf16(&slice[..end]).context("MachineGuid was not valid UTF-16")
}
}
/// Read (or, on first use, generate and persist) a random UUID, then derive the
/// opaque id from it. This is the fallback identity: stable across calls and
/// process restarts because it is persisted to disk.
fn persisted_uid() -> String {
let path = fallback_uid_path();
// Try to read an existing value.
if let Some(ref p) = path {
if let Ok(contents) = std::fs::read_to_string(p) {
let trimmed = contents.trim();
if !trimmed.is_empty() {
return derive_uid(trimmed);
}
}
}
// Generate a new random seed and persist it (best-effort).
let seed = uuid::Uuid::new_v4().to_string();
if let Some(ref p) = path {
if let Some(parent) = p.parent() {
let _ = std::fs::create_dir_all(parent);
}
if let Err(e) = std::fs::write(p, &seed) {
tracing::warn!(
"Could not persist fallback machine_uid seed to {:?} ({e}); \
id will be stable for this process only",
p
);
}
} else {
tracing::warn!(
"No writable data directory for fallback machine_uid seed; \
id will be stable for this process only"
);
}
derive_uid(&seed)
}
/// Location of the persisted fallback seed file.
///
/// - **Windows:** `%ProgramData%\GuruConnect\machine_uid` (mirrors the agent
/// config location), used only when the registry read fails.
/// - **Non-Windows:** `$XDG_DATA_HOME/guruconnect/machine_uid`, falling back to
/// `$HOME/.local/share/guruconnect/machine_uid`, then a temp-dir path.
fn fallback_uid_path() -> Option<std::path::PathBuf> {
#[cfg(windows)]
{
if let Ok(program_data) = std::env::var("ProgramData") {
return Some(
std::path::PathBuf::from(program_data)
.join("GuruConnect")
.join("machine_uid"),
);
}
}
#[cfg(not(windows))]
{
if let Ok(xdg) = std::env::var("XDG_DATA_HOME") {
if !xdg.is_empty() {
return Some(
std::path::PathBuf::from(xdg)
.join("guruconnect")
.join("machine_uid"),
);
}
}
if let Ok(home) = std::env::var("HOME") {
if !home.is_empty() {
return Some(
std::path::PathBuf::from(home)
.join(".local")
.join("share")
.join("guruconnect")
.join("machine_uid"),
);
}
}
}
// Last resort: a stable name in the system temp dir.
Some(std::env::temp_dir().join("guruconnect_machine_uid"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn machine_uid_is_non_empty_and_prefixed() {
let uid = machine_uid();
assert!(!uid.is_empty(), "machine_uid must not be empty");
assert!(
uid.starts_with(MUID_PREFIX),
"machine_uid must start with {MUID_PREFIX}: got {uid}"
);
// muid_ + 16 bytes hex (32 chars).
assert_eq!(
uid.len(),
MUID_PREFIX.len() + 32,
"unexpected machine_uid length: {uid}"
);
assert!(
uid[MUID_PREFIX.len()..]
.chars()
.all(|c| c.is_ascii_hexdigit()),
"machine_uid suffix must be lowercase hex: {uid}"
);
}
#[test]
fn machine_uid_is_deterministic_across_calls() {
// The cached public API must be stable.
assert_eq!(machine_uid(), machine_uid());
}
#[test]
fn derive_uid_is_deterministic() {
// Same input -> same output; different input -> different output.
let a = derive_uid("the-same-input");
let b = derive_uid("the-same-input");
let c = derive_uid("a-different-input");
assert_eq!(a, b);
assert_ne!(a, c);
assert!(a.starts_with(MUID_PREFIX));
}
/// The non-Windows fallback must be stable across calls because it persists
/// its seed. We exercise `persisted_uid()` directly (the public `machine_uid`
/// is cached, so it cannot demonstrate persistence on its own).
#[test]
fn persisted_uid_is_stable_across_calls() {
let first = persisted_uid();
let second = persisted_uid();
assert_eq!(
first, second,
"persisted fallback uid must be stable across calls"
);
assert!(first.starts_with(MUID_PREFIX));
}
/// On Windows specifically, the registry-derived path must be deterministic:
/// reading the same `MachineGuid` twice yields the same uid.
#[cfg(windows)]
#[test]
fn windows_machine_guid_path_is_deterministic() {
// If the registry read succeeds, two reads must agree and the derived
// uid must match. If it fails (unusual), the test still validates the
// fallback determinism via compute_machine_uid().
let a = compute_machine_uid();
let b = compute_machine_uid();
assert_eq!(a, b, "compute_machine_uid must be deterministic on Windows");
assert!(a.starts_with(MUID_PREFIX));
}
}

View File

@@ -17,6 +17,7 @@ mod chat;
mod config;
mod consent;
mod encoder;
mod identity;
mod input;
mod install;
mod sas_client;

View File

@@ -103,12 +103,16 @@ impl SessionManager {
pub async fn connect(&mut self) -> Result<()> {
self.state = SessionState::Connecting;
// Deterministic, recomputable identity reported alongside agent_id
// (v2 stable-identity Task 1). Cached after the first call.
let machine_uid = crate::identity::machine_uid();
let transport = WebSocketTransport::connect(
&self.config.server_url,
&self.config.agent_id,
&self.config.api_key,
Some(&self.hostname),
self.config.support_code.as_deref(),
Some(&machine_uid),
)
.await?;
@@ -247,6 +251,10 @@ impl SessionManager {
// Advertise hardware H.264 capability so the server can negotiate the
// codec (Task 7). Detected once and cached by the encoder module.
supports_h264: encoder::supports_hardware_h264(),
// Deterministic, recomputable hardware identity (v2 stable-identity
// Task 1). Reported alongside the unchanged random agent_id; cached
// after the first (registry) read.
machine_uid: crate::identity::machine_uid(),
};
let msg = Message {

View File

@@ -35,14 +35,25 @@ impl WebSocketTransport {
api_key: &str,
hostname: Option<&str>,
support_code: Option<&str>,
machine_uid: Option<&str>,
) -> Result<Self> {
// Build query parameters
// Build query parameters. agent_id + api_key are kept exactly as-is;
// machine_uid is appended ALONGSIDE them (v2 stable-identity Task 1) so
// the server sees the deterministic identity at connect time. It does not
// change registration keying (a separate server-side task).
let mut params = format!("agent_id={}&api_key={}", agent_id, api_key);
if let Some(hostname) = hostname {
params.push_str(&format!("&hostname={}", urlencoding::encode(hostname)));
}
if let Some(machine_uid) = machine_uid {
params.push_str(&format!(
"&machine_uid={}",
urlencoding::encode(machine_uid)
));
}
if let Some(code) = support_code {
params.push_str(&format!("&support_code={}", code));
}

View File

@@ -317,6 +317,14 @@ message AgentStatus {
// negotiation (see StartStream.video_codec). Detected once and cached;
// false on non-Windows / no HW encoder / MF unavailable.
bool supports_h264 = 11;
// Deterministic, recomputable hardware identity (v2 stable-identity Task 1).
// Opaque "muid_<hex>" derived by SHA-256 hashing the OS machine GUID
// (Windows: HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid); non-Windows /
// registry-failure falls back to a persisted random UUID. Reported ALONGSIDE
// agent_id (which is unchanged). The server-side dedup that consumes this is a
// separate task; until then it is informational. Empty only if the agent
// predates this field.
string machine_uid = 12;
}
// Server commands agent to uninstall itself