SPEC-018 Phase 1: managed agent as LocalSystem service host #7

Merged
azcomputerguru merged 2 commits from feat/spec-018-service-host into main 2026-06-02 14:25:07 -07:00
3 changed files with 746 additions and 31 deletions
Showing only changes of commit 7602b4346a - Show all commits

View File

@@ -290,6 +290,18 @@ pub fn install(force_user_install: bool) -> Result<()> {
// Register protocol handler
register_protocol_handler(elevated)?;
// SPEC-018: a MANAGED install (embedded config => persistent agent) installs
// the LocalSystem service as its single autostart and removes the per-user
// HKCU\…\Run entry. Attended (support-code) and viewer installs are untouched:
// they have no embedded config and continue to use the HKCU Run / protocol
// handler paths exactly as before.
#[cfg(windows)]
{
if crate::config::Config::has_embedded_config() {
install_managed_service(&exe_path)?;
}
}
info!("Installation complete!");
if elevated {
info!("Installed system-wide to: {}", install_path.display());
@@ -300,6 +312,64 @@ pub fn install(force_user_install: bool) -> Result<()> {
Ok(())
}
/// SPEC-018: install the managed agent as a LocalSystem service and swap out the
/// legacy per-user `HKCU\…\Run` autostart so the service is the single managed
/// autostart (no double-run).
///
/// Installing a LocalSystem service requires Administrator. If the SCM rejects the
/// create (not elevated), we surface the error rather than silently leaving the
/// machine with no managed autostart — a managed deployment is expected to run the
/// install elevated. The HKCU Run entry is removed best-effort regardless.
#[cfg(windows)]
pub fn install_managed_service(exe_path: &std::path::Path) -> Result<()> {
info!("Managed install: registering LocalSystem service (SPEC-018)");
crate::service::install_service(exe_path)
.map_err(|e| anyhow!("failed to install the managed agent service: {e:#}"))?;
// Start the service now so the agent comes up immediately on first install
// rather than only on the next boot. Best-effort: the service is auto-start, so
// a transient start failure still self-heals on reboot.
if let Err(e) = crate::service::start_service() {
warn!(
"managed service installed but did not start now ({e:#}); \
it is auto-start and will run on next boot"
);
}
// Remove the legacy per-user autostart so the agent does not also launch in the
// user's session (which would double-run alongside the service).
if let Err(e) = crate::startup::remove_from_startup() {
warn!(
"managed service installed, but failed to remove the legacy HKCU Run \
autostart (harmless if it was never present): {}",
e
);
} else {
info!("removed legacy HKCU Run autostart (service is now the managed autostart)");
}
Ok(())
}
/// SPEC-018: remove the managed agent service and any legacy HKCU Run autostart.
/// Idempotent — succeeds if neither is present.
#[cfg(windows)]
pub fn uninstall_managed_service() -> Result<()> {
info!("Managed uninstall: removing LocalSystem service (SPEC-018)");
// Best-effort removal of the legacy autostart first (cheap, no SCM).
if let Err(e) = crate::startup::remove_from_startup() {
warn!(
"failed to remove legacy HKCU Run autostart during uninstall: {}",
e
);
}
crate::service::uninstall_service()
.map_err(|e| anyhow!("failed to uninstall the managed agent service: {e:#}"))
}
/// Check if the guruconnect:// protocol handler is registered
#[cfg(windows)]
pub fn is_protocol_handler_registered() -> bool {

View File

@@ -24,6 +24,8 @@ mod identity;
mod input;
mod install;
mod sas_client;
#[cfg(windows)]
mod service;
mod session;
mod startup;
mod transport;
@@ -182,6 +184,12 @@ enum Commands {
/// Show detailed version and build information
#[command(name = "version-info")]
VersionInfo,
/// Internal: entry point invoked by the Windows Service Control Manager to run
/// the managed agent as a LocalSystem service (SPEC-018). Not for interactive
/// use — running it by hand fails because there is no controlling SCM.
#[command(name = "service-run", hide = true)]
ServiceRun,
}
fn main() -> Result<()> {
@@ -236,6 +244,21 @@ fn main() -> Result<()> {
println!("{}", build_info::full_version());
Ok(())
}
Some(Commands::ServiceRun) => {
// SPEC-018 Phase 1: SCM-invoked entry. Hand off to the service
// dispatcher, which calls back into the control loop and runs the
// managed-agent logic as SYSTEM. Blocks until the service stops.
#[cfg(windows)]
{
service::run_dispatcher()
}
#[cfg(not(windows))]
{
Err(anyhow::anyhow!(
"service-run is a Windows-only entry point (SPEC-018)"
))
}
}
None => {
// No subcommand - detect mode from filename or embedded config
// Legacy: if support_code arg provided, use that
@@ -264,16 +287,31 @@ fn main() -> Result<()> {
run_agent_mode(Some(code))
}
RunMode::PermanentAgent => {
// Embedded config found - run as permanent agent
// Embedded config found - managed/persistent agent.
info!("Permanent agent mode detected (embedded config)");
if !install::is_protocol_handler_registered() {
// First run - install then run as agent
info!("First run - installing agent");
if let Err(e) = install::install(false) {
warn!("Installation failed: {}", e);
}
// SPEC-018: managed mode runs as the LocalSystem service, not as
// an interactive process. The service is the single autostart.
// - If the service is already installed, the service is (or
// will be) running the agent — this interactive invocation
// must NOT spawn a second agent. Exit quietly.
// - On first run, install (which installs + starts the service
// and removes the legacy HKCU Run entry), then exit and let
// the service carry the agent as SYSTEM.
#[cfg(windows)]
{
run_permanent_agent_managed()
}
#[cfg(not(windows))]
{
if !install::is_protocol_handler_registered() {
info!("First run - installing agent");
if let Err(e) = install::install(false) {
warn!("Installation failed: {}", e);
}
}
run_agent_mode(None)
}
run_agent_mode(None)
}
RunMode::Default => {
// No special mode detected - use legacy logic
@@ -333,10 +371,92 @@ fn run_agent_mode(support_code: Option<String>) -> Result<()> {
if config.support_code.is_none() {
resolve_agent_credential(&mut config).await?;
}
run_agent(config).await
run_agent(config, None).await
})
}
/// SPEC-018 Phase 1: run the managed/persistent agent as the LocalSystem service.
///
/// Invoked from the service control loop ([`service::run_service`]) once the
/// service has reported `Running`. This is the same persistent-agent logic as
/// [`run_agent_mode`] (load config, resolve/enroll the per-machine `cak_` per
/// SPEC-016, hold the relay connection) — but it runs **as SYSTEM**, so the
/// SYSTEM-ACL'd `cak_` store is finally readable in-context, and it observes the
/// SCM `shutdown` flag for a graceful stop.
///
/// Returns `Ok(())` when the agent loop exits because a stop was requested, and
/// `Err` only on an unrecoverable *local* fault (e.g. no usable credential and no
/// enrollment material) — network errors are retried inside the loop and never
/// surface here.
///
/// Phase 2 seam: this is where the session broker is wired in — the runtime
/// started here will own the broker that spawns the per-session capture/input
/// worker (`CreateProcessAsUserW`) and the IPC server. Phase 1 connects/enrolls
/// only; it does not capture a desktop (a Session-0 SYSTEM process cannot).
#[cfg(windows)]
pub fn run_managed_agent_service(
shutdown: std::sync::Arc<std::sync::atomic::AtomicBool>,
) -> Result<()> {
info!("Loading managed-agent configuration (running as SYSTEM)");
let mut config = config::Config::load()?;
// The service ONLY ever runs the managed/persistent path. A support session is
// an interactive, user-launched flow and must never be carried by the service.
config.support_code = None;
info!("Server: {}", config.server_url);
if let Some(ref company) = config.company {
info!("Company: {}", company);
}
if let Some(ref site) = config.site {
info!("Site: {}", site);
}
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(async move {
// SPEC-016 Phase B: resolve the operating credential before connecting.
// Running as SYSTEM, the SYSTEM+Administrators-ACL'd cak_ store is now
// readable in-context, so the Phase B fail-fast guard is not hit on this
// path (it remains as a safety net for any non-SYSTEM invocation).
resolve_agent_credential(&mut config).await?;
run_agent(config, Some(shutdown)).await
})
}
/// SPEC-018 Phase 1: handle an interactive launch of a MANAGED agent binary (one
/// carrying embedded config, detected as [`config::RunMode::PermanentAgent`]).
///
/// Managed mode runs as the LocalSystem service, never as an interactive process:
/// - If the service is already installed, the service is (or will be) running
/// the agent as SYSTEM, so this interactive invocation must NOT spawn a second
/// agent — it exits quietly.
/// - On first run, install (which installs + starts the service and removes the
/// legacy `HKCU\…\Run` autostart), then exit and let the service carry the
/// agent. If the service install fails (e.g. not elevated), fall back to
/// running the agent in-process for this run so the machine is not left with no
/// agent at all.
#[cfg(windows)]
fn run_permanent_agent_managed() -> Result<()> {
if service::is_service_installed() {
info!(
"Managed service already installed; the service runs the agent as SYSTEM — \
this interactive instance has nothing to do"
);
return Ok(());
}
info!("First run - installing managed agent service");
if let Err(e) = install::install(false) {
warn!(
"Managed service install failed ({e:#}); falling back to in-process agent for this run"
);
return run_agent_mode(None);
}
info!("Managed agent service installed; handing off to the service");
Ok(())
}
/// Resolve the per-machine operating credential for a managed agent (SPEC-016
/// Phase B, run-mode wiring).
///
@@ -372,9 +492,13 @@ async fn resolve_agent_credential(config: &mut config::Config) -> Result<()> {
// do NOT silently re-enroll. The SYSTEM+Administrators ACL is correct
// for the target (Option A) and is deliberately kept.
//
// NOTE: this guard is satisfied/removed once the GuruConnect SYSTEM
// service host lands (separate spec, SPEC-018) and the agent always
// runs as SYSTEM — at which point the store is always readable.
// SPEC-018 (this spec): the managed agent now runs as the GuruConnect
// SYSTEM service ([`run_managed_agent_service`]), so on the production
// managed path the store IS readable in-context and this branch is NOT
// hit. The guard is intentionally retained as a harmless safety net for
// any non-SYSTEM invocation (e.g. someone running the managed binary
// interactively): it still fails fast with an actionable message rather
// than bricking. Do NOT remove it in Phase 1.
Err(LoadCakError::Io {
permission_denied: true,
source,
@@ -484,7 +608,22 @@ fn run_install(force_user_install: bool) -> Result<()> {
fn run_uninstall() -> Result<()> {
info!("Uninstalling GuruConnect...");
// Remove from startup
// SPEC-018: remove the managed LocalSystem service and the legacy HKCU Run
// autostart. Idempotent — no error if the service was never installed (an
// attended/viewer install has no service), so this is safe for every install
// shape. Requires Administrator to delete the service; a non-elevated uninstall
// still clears the per-user autostart below.
#[cfg(windows)]
{
if let Err(e) = install::uninstall_managed_service() {
warn!(
"Failed to remove managed service (may require Administrator): {}",
e
);
}
}
// Remove from startup (covers non-elevated / attended / viewer installs).
if let Err(e) = startup::remove_from_startup() {
warn!("Failed to remove from startup: {}", e);
}
@@ -582,31 +721,62 @@ fn cleanup_on_exit() {
}
}
/// Run the agent main loop
async fn run_agent(config: config::Config) -> Result<()> {
/// Run the agent main loop.
///
/// `service_shutdown`, when present, is the SCM cooperative-stop flag (SPEC-018):
/// the managed-agent service passes it so the loop exits promptly on
/// `Stop`/`Shutdown`. It is `None` for the interactive/user-launched paths, which
/// stop via the tray exit / server control messages instead.
async fn run_agent(
config: config::Config,
service_shutdown: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
) -> Result<()> {
use std::sync::atomic::Ordering;
let elevated = install::is_elevated();
let running_as_service = service_shutdown.is_some();
let mut session = session::SessionManager::new(config.clone(), elevated);
let is_support_session = config.support_code.is_some();
let hostname = config.hostname();
// Add to startup
if let Err(e) = startup::add_to_startup() {
// Helper: has the SCM asked us to stop?
let stop_requested = |flag: &Option<std::sync::Arc<std::sync::atomic::AtomicBool>>| -> bool {
flag.as_ref()
.map(|f| f.load(Ordering::SeqCst))
.unwrap_or(false)
};
// Autostart persistence:
// - As the SYSTEM service (SPEC-018), the SERVICE itself is the managed
// autostart — do NOT write the per-user HKCU\…\Run entry (that would be a
// second, redundant autostart, and writing it from SYSTEM lands in the
// wrong hive). The service install/uninstall owns lifecycle.
// - Interactive/user-launched runs keep the existing HKCU Run behavior.
if running_as_service {
info!("Running as the GuruConnect SYSTEM service; service is the autostart (skipping HKCU Run)");
} else if let Err(e) = startup::add_to_startup() {
warn!("Failed to add to startup: {}", e);
}
// Create tray icon
let tray = match tray::TrayController::new(
&hostname,
config.support_code.as_deref(),
is_support_session,
) {
Ok(t) => {
info!("Tray icon created");
Some(t)
}
Err(e) => {
warn!("Failed to create tray icon: {}", e);
None
// A Session-0 SYSTEM service has no interactive desktop, so a tray icon is
// both impossible and meaningless there (SPEC-018 Phase 2 moves the user-facing
// surface into the per-session worker). Only create the tray off the service.
let tray = if running_as_service {
None
} else {
match tray::TrayController::new(
&hostname,
config.support_code.as_deref(),
is_support_session,
) {
Ok(t) => {
info!("Tray icon created");
Some(t)
}
Err(e) => {
warn!("Failed to create tray icon: {}", e);
None
}
}
};
@@ -615,6 +785,12 @@ async fn run_agent(config: config::Config) -> Result<()> {
// Connect to server and run main loop
loop {
// SPEC-018: honour an SCM stop request before (re)connecting.
if stop_requested(&service_shutdown) {
info!("Service stop requested; exiting agent loop");
return Ok(());
}
info!("Connecting to server...");
if is_support_session {
@@ -713,6 +889,18 @@ async fn run_agent(config: config::Config) -> Result<()> {
}
info!("Reconnecting in 5 seconds...");
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
// SPEC-018: poll the SCM stop flag during the backoff so a service stop is
// honoured within ~250ms instead of waiting the full reconnect delay.
if service_shutdown.is_some() {
for _ in 0..20 {
if stop_requested(&service_shutdown) {
info!("Service stop requested during reconnect backoff; exiting agent loop");
return Ok(());
}
tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
}
} else {
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
}
}
}

457
agent/src/service/mod.rs Normal file
View File

@@ -0,0 +1,457 @@
//! Windows SYSTEM service host for the managed GuruConnect agent (SPEC-018).
//!
//! # Phase 1 scope (this module)
//!
//! Phase 1 proves the *managed/persistent* agent can run as **LocalSystem** in
//! the isolated Session 0 across reboots and at the login screen:
//!
//! 1. Register the agent with the Service Control Manager (SCM) and run, when
//! started, the **existing persistent-agent logic** (`RunMode::PermanentAgent`
//! path) *as SYSTEM* — i.e. resolve/enroll the per-machine `cak_` (SPEC-016,
//! now readable because the SYSTEM-ACL'd store is in-context) and hold the
//! relay WSS connection.
//! 2. Report a correct service lifecycle to the SCM (`StartPending` ->
//! `Running` -> `StopPending` -> `Stopped`) and handle `Stop`/`Shutdown`
//! gracefully (signal the agent loop to close the WS connection and exit).
//! 3. Provide install/uninstall of the service (LocalSystem, auto-start, crash
//! recovery) so managed mode uses the service as its single autostart
//! instead of the per-user `HKCU\…\Run` entry.
//!
//! # Phase 2 (deliberately NOT built here — see SPEC-018 §Scope)
//!
//! A SYSTEM service lives in Session 0 and **cannot** capture or inject the
//! interactive desktop directly. Phase 1 therefore enrolls and connects but does
//! **NOT** capture a desktop yet. The following are Phase 2 and are intentionally
//! absent; the seams where they attach are called out inline below:
//!
//! - the **session broker** (`WTSEnumerateSessionsW` /
//! `WTSGetActiveConsoleSessionId` / `WTSQueryUserToken`),
//! - the **per-session capture/input worker** spawned via `CreateProcessAsUserW`
//! into `winsta0\default`,
//! - **service <-> worker IPC** (the per-session ACL'd named pipe), and
//! - **`SERVICE_CONTROL_SESSIONCHANGE`** reaction (logon/logoff/console-connect
//! retarget).
//!
//! Phase 1 registers the control handler for `Stop`/`Shutdown`/`Interrogate`
//! only. When Phase 2 lands, the broker hangs off the same control handler
//! (adding `SESSIONCHANGE`) and off the same agent runtime started here.
#![cfg(windows)]
use std::ffi::OsString;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, Result};
use tracing::{error, info, warn};
use windows_service::{
define_windows_service,
service::{
ServiceAccess, ServiceControl, ServiceControlAccept, ServiceErrorControl, ServiceExitCode,
ServiceInfo, ServiceStartType, ServiceState, ServiceStatus, ServiceType,
},
service_control_handler::{self, ServiceControlHandlerResult},
service_dispatcher,
service_manager::{ServiceManager, ServiceManagerAccess},
};
/// Internal service name registered with the SCM (no spaces; used by `sc`,
/// `ServiceManager`, and the control handler).
pub const SERVICE_NAME: &str = "GuruConnectAgent";
/// Human-facing display name shown in `services.msc`.
pub const SERVICE_DISPLAY_NAME: &str = "GuruConnect Managed Agent";
/// Service description shown in `services.msc`.
pub const SERVICE_DESCRIPTION: &str =
"Runs the managed GuruConnect remote-support agent as LocalSystem so it is \
reachable at the login screen and across reboots (SPEC-018).";
/// Hidden subcommand the SCM invokes to enter the service control loop. The
/// service is registered with this as its launch argument (see [`install_service`]),
/// and `main.rs` routes it into [`run_dispatcher`].
pub const SERVICE_RUN_ARG: &str = "service-run";
/// Hint we give the SCM for how long start/stop transitions may take before it
/// should consider the service hung.
const TRANSITION_WAIT: Duration = Duration::from_secs(10);
// The `windows-service` dispatcher requires a `extern "system"` entry point with
// a fixed ABI; this macro generates `ffi_service_main`, which trampolines into
// our safe `service_main`.
define_windows_service!(ffi_service_main, service_main);
/// Enter the SCM dispatcher (called from `main.rs` for the `service-run`
/// subcommand). Blocks until the service stops. This must be invoked by the SCM,
/// not interactively — `service_dispatcher::start` fails with
/// `ERROR_FAILED_SERVICE_CONTROLLER_CONNECT` (1063) if there is no controlling
/// SCM, which is the expected outcome of running `guruconnect service-run` by hand.
pub fn run_dispatcher() -> Result<()> {
service_dispatcher::start(SERVICE_NAME, ffi_service_main)
.context("failed to connect to the service control dispatcher (must be started by the SCM)")
}
/// SCM-invoked service body. Any error is logged; the function cannot return an
/// error to the SCM directly, so [`run_service`] reports a failed exit code on the
/// status handle before returning.
fn service_main(_arguments: Vec<OsString>) {
if let Err(e) = run_service() {
error!("service exited with error: {e:#}");
}
}
/// Drive the full service lifecycle: register the control handler, report
/// `Running`, run the persistent agent until a stop is requested, then report
/// `Stopped`.
fn run_service() -> Result<()> {
info!("GuruConnect managed agent service starting (running as SYSTEM in session 0)");
// Cooperative shutdown flag flipped by the SCM control handler and observed by
// the agent runtime. `AtomicBool` keeps the handler closure trivially `Send`
// and avoids holding a lock inside an SCM callback.
let shutdown = Arc::new(AtomicBool::new(false));
let shutdown_for_handler = shutdown.clone();
let event_handler = move |control_event| -> ServiceControlHandlerResult {
match control_event {
// SPEC-018 Phase 1: graceful stop. Phase 2 adds
// `ServiceControl::SessionChange(_)` here to drive the session broker
// (retarget the capture/input worker on logon/logoff/console-connect);
// we intentionally do not accept SESSIONCHANGE yet.
ServiceControl::Stop | ServiceControl::Shutdown => {
info!("received {control_event:?}; signalling agent to shut down");
shutdown_for_handler.store(true, Ordering::SeqCst);
ServiceControlHandlerResult::NoError
}
ServiceControl::Interrogate => ServiceControlHandlerResult::NoError,
_ => ServiceControlHandlerResult::NotImplemented,
}
};
let status_handle = service_control_handler::register(SERVICE_NAME, event_handler)
.context("failed to register the service control handler")?;
// Report StartPending while we spin up the runtime and connect.
set_status(
&status_handle,
ServiceState::StartPending,
ServiceControlAccept::empty(),
TRANSITION_WAIT,
);
// Report Running and accept Stop + Shutdown. We report Running before the
// first connect attempt completes because the agent loop reconnects forever;
// "the service is up and trying" is the correct steady state, and blocking the
// SCM on the first relay handshake would risk a start timeout on a slow boot.
set_status(
&status_handle,
ServiceState::Running,
ServiceControlAccept::STOP | ServiceControlAccept::SHUTDOWN,
Duration::default(),
);
info!("service reported Running; entering managed-agent control loop");
// Run the existing persistent-agent logic as SYSTEM. This is the Phase 1
// payload: resolve/enroll the cak_ (SPEC-016) and hold the relay connection.
let run_result = crate::run_managed_agent_service(shutdown.clone());
if let Err(e) = &run_result {
// The agent loop only returns Err on an unrecoverable LOCAL fault (e.g. no
// usable credential and nothing to enroll with). Network errors are
// retried inside the loop and never surface here. Report the failure to
// the SCM so recovery actions (restart) engage.
error!("managed-agent control loop terminated with error: {e:#}");
} else {
info!("managed-agent control loop exited cleanly on stop request");
}
// Transition StopPending -> Stopped.
set_status(
&status_handle,
ServiceState::StopPending,
ServiceControlAccept::empty(),
TRANSITION_WAIT,
);
let exit_code = match run_result {
Ok(()) => ServiceExitCode::Win32(0),
// ERROR_SERVICE_SPECIFIC_ERROR-style: surface a non-zero service-specific
// code so the SCM treats the exit as a failure and applies recovery.
Err(_) => ServiceExitCode::ServiceSpecific(1),
};
set_status_with_exit(
&status_handle,
ServiceState::Stopped,
ServiceControlAccept::empty(),
Duration::default(),
exit_code,
);
info!("service reported Stopped");
Ok(())
}
/// Report a status with a zero (success) exit code.
fn set_status(
handle: &service_control_handler::ServiceStatusHandle,
state: ServiceState,
accepted: ServiceControlAccept,
wait_hint: Duration,
) {
set_status_with_exit(
handle,
state,
accepted,
wait_hint,
ServiceExitCode::Win32(0),
);
}
/// Report a status to the SCM. A failure to report is logged (best-effort) — we
/// cannot do anything actionable about it and must not panic inside the service.
fn set_status_with_exit(
handle: &service_control_handler::ServiceStatusHandle,
state: ServiceState,
accepted: ServiceControlAccept,
wait_hint: Duration,
exit_code: ServiceExitCode,
) {
let status = ServiceStatus {
service_type: ServiceType::OWN_PROCESS,
current_state: state,
controls_accepted: accepted,
exit_code,
checkpoint: 0,
wait_hint,
process_id: None,
};
if let Err(e) = handle.set_service_status(status) {
warn!("failed to report service status {state:?} to the SCM: {e}");
}
}
// ---------------------------------------------------------------------------
// Install / uninstall (used by install.rs for managed mode)
// ---------------------------------------------------------------------------
/// Install (or reinstall) the managed agent as a LocalSystem auto-start service
/// pointing at `exe_path` with the [`SERVICE_RUN_ARG`] launch argument.
///
/// Idempotent: if the service already exists it is stopped and deleted first,
/// then recreated, so an upgrade picks up a new binary path / config. Configures
/// crash recovery (restart on failure) via `sc failure`.
///
/// Requires Administrator (SCM `CREATE_SERVICE`). Returns an error otherwise.
pub fn install_service(exe_path: &std::path::Path) -> Result<()> {
let manager = ServiceManager::local_computer(
None::<&str>,
ServiceManagerAccess::CONNECT | ServiceManagerAccess::CREATE_SERVICE,
)
.context("failed to connect to the Service Control Manager (run as Administrator)")?;
// Remove any prior installation so the binary path / args are refreshed.
if let Ok(existing) = manager.open_service(
SERVICE_NAME,
ServiceAccess::QUERY_STATUS | ServiceAccess::STOP | ServiceAccess::DELETE,
) {
info!("existing {SERVICE_NAME} service found; removing before reinstall");
stop_if_running(&existing);
existing
.delete()
.context("failed to delete the existing service before reinstall")?;
drop(existing);
// The SCM marks a service for deletion but only removes it once all handles
// close; a brief settle avoids a CreateService "marked for deletion" race.
std::thread::sleep(Duration::from_secs(2));
}
let service_info = ServiceInfo {
name: OsString::from(SERVICE_NAME),
display_name: OsString::from(SERVICE_DISPLAY_NAME),
service_type: ServiceType::OWN_PROCESS,
start_type: ServiceStartType::AutoStart,
error_control: ServiceErrorControl::Normal,
executable_path: exe_path.to_path_buf(),
launch_arguments: vec![OsString::from(SERVICE_RUN_ARG)],
dependencies: vec![],
// account_name: None => LocalSystem (the SPEC-018 requirement).
account_name: None,
account_password: None,
};
let service = manager
.create_service(&service_info, ServiceAccess::CHANGE_CONFIG)
.context("failed to create the GuruConnect managed agent service")?;
service
.set_description(SERVICE_DESCRIPTION)
.context("failed to set the service description")?;
configure_recovery();
info!(
"installed {SERVICE_NAME} (LocalSystem, auto-start) -> {} {}",
exe_path.display(),
SERVICE_RUN_ARG
);
Ok(())
}
/// Configure SCM crash-recovery so the service restarts on unexpected exit.
///
/// `windows-service` 0.7 does not expose `ChangeServiceConfig2` recovery actions
/// in a stable, ergonomic form, so we mirror the established pattern used by the
/// SAS service binary and shell out to `sc failure`. `reset=86400` clears the
/// failure count after a day; three `restart/5000` actions retry after 5s each.
fn configure_recovery() {
use std::os::windows::process::CommandExt;
const CREATE_NO_WINDOW: u32 = 0x0800_0000;
match std::process::Command::new("sc")
.args([
"failure",
SERVICE_NAME,
"reset=86400",
"actions=restart/5000/restart/5000/restart/5000",
])
.creation_flags(CREATE_NO_WINDOW)
.output()
{
Ok(out) if out.status.success() => {
info!("configured crash-recovery (restart) for {SERVICE_NAME}");
}
Ok(out) => {
warn!(
"could not configure crash-recovery for {SERVICE_NAME} (sc failure exit {:?}); \
the service will still run but will not auto-restart on crash",
out.status.code()
);
}
Err(e) => {
warn!("could not invoke `sc failure` to set crash-recovery for {SERVICE_NAME}: {e}");
}
}
}
/// Stop (if running) and delete the managed agent service. Idempotent: succeeds
/// quietly if the service is not installed.
pub fn uninstall_service() -> Result<()> {
let manager = ServiceManager::local_computer(None::<&str>, ServiceManagerAccess::CONNECT)
.context("failed to connect to the Service Control Manager (run as Administrator)")?;
match manager.open_service(
SERVICE_NAME,
ServiceAccess::QUERY_STATUS | ServiceAccess::STOP | ServiceAccess::DELETE,
) {
Ok(service) => {
stop_if_running(&service);
service
.delete()
.context("failed to delete the managed agent service")?;
info!("uninstalled {SERVICE_NAME} service");
Ok(())
}
Err(_) => {
// Not installed — nothing to do (idempotent uninstall).
info!("{SERVICE_NAME} service is not installed; nothing to uninstall");
Ok(())
}
}
}
/// Start the managed agent service now (used right after a first-run install so
/// the agent comes up without waiting for the next boot). Best-effort: logs and
/// returns the SCM error if the start fails, but a failure is not fatal to install
/// because the service is auto-start and will come up on the next boot regardless.
pub fn start_service() -> Result<()> {
let manager = ServiceManager::local_computer(None::<&str>, ServiceManagerAccess::CONNECT)
.context("failed to connect to the Service Control Manager")?;
let service = manager
.open_service(
SERVICE_NAME,
ServiceAccess::START | ServiceAccess::QUERY_STATUS,
)
.context("failed to open the managed agent service to start it")?;
// If it is already running (e.g. reinstall-over-running), there is nothing to do.
if let Ok(status) = service.query_status() {
if status.current_state == ServiceState::Running
|| status.current_state == ServiceState::StartPending
{
info!("{SERVICE_NAME} is already running/starting");
return Ok(());
}
}
service
.start::<String>(&[])
.context("failed to start the managed agent service")?;
info!("started {SERVICE_NAME}");
Ok(())
}
/// Report whether the managed agent service is currently installed.
pub fn is_service_installed() -> bool {
match ServiceManager::local_computer(None::<&str>, ServiceManagerAccess::CONNECT) {
Ok(manager) => manager
.open_service(SERVICE_NAME, ServiceAccess::QUERY_STATUS)
.is_ok(),
Err(_) => false,
}
}
/// Best-effort stop of a service, waiting briefly for it to leave the running
/// state so a subsequent `delete` does not race an in-flight stop.
fn stop_if_running(service: &windows_service::service::Service) {
if let Ok(status) = service.query_status() {
if status.current_state != ServiceState::Stopped {
info!("stopping {SERVICE_NAME} before delete");
let _ = service.stop();
for _ in 0..10 {
std::thread::sleep(Duration::from_millis(500));
match service.query_status() {
Ok(s) if s.current_state == ServiceState::Stopped => break,
_ => continue,
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
/// The launch argument the service is registered with MUST equal the hidden
/// `service-run` subcommand `main.rs` dispatches into [`run_dispatcher`]; a
/// mismatch would register a service the SCM could start but that would fall
/// through to normal (non-service) mode and immediately exit.
#[test]
fn service_run_arg_matches_subcommand_name() {
assert_eq!(SERVICE_RUN_ARG, "service-run");
}
/// Service identifiers are non-empty and the internal name carries no spaces
/// (the SCM key / `sc` argument must be a single token).
#[test]
fn service_identifiers_are_well_formed() {
assert!(!SERVICE_NAME.is_empty());
assert!(
!SERVICE_NAME.contains(char::is_whitespace),
"the SCM service name must be a single whitespace-free token"
);
assert!(!SERVICE_DISPLAY_NAME.is_empty());
assert!(!SERVICE_DESCRIPTION.is_empty());
}
/// `is_service_installed` must never panic regardless of elevation/SCM access;
/// on a dev workstation without the service installed it returns `false`. (We
/// do NOT install the service in tests — that is a VM/admin integration step.)
#[test]
fn is_service_installed_is_total() {
let _ = is_service_installed();
}
}