feat(server): reap stale persistent sessions + same-machine supersede (SPEC-004 Task 4)
A periodic reaper removes persistent, offline, viewerless sessions whose last heartbeat is older than a 10-minute TTL (60s sweep spawned at startup), and a same-machine supersede on the new-session path drops a stranded prior session when a legacy no-uid agent upgrades to a fresh agent_id + machine_uid. Both removals re-assert the predicate under the write lock (remove_session_if) to close a snapshot->remove TOCTOU. Security: keyed (cak_) agents pass machine_uid=None, so they never trigger supersede and are never reaped as a uid victim; online, viewer-attached, and support sessions are never reaped. 82 server tests pass; clippy clean. Implements specs/v2-stable-identity/plan.md Task 4. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -57,6 +57,16 @@ const SPA_DIR: &str = "static/app";
|
||||
/// non-WS, non-asset GET so `BrowserRouter` deep links (`/machines`,
|
||||
/// `/sessions`, `/login`) survive a hard reload.
|
||||
const SPA_INDEX: &str = "static/app/index.html";
|
||||
|
||||
/// How long an OFFLINE persistent session may sit idle before the reaper removes
|
||||
/// it (v2-stable-identity Task 4). Measured on the session's monotonic
|
||||
/// `last_heartbeat_instant`. Ten minutes is well past the agent's 30s heartbeat /
|
||||
/// 90s timeout, so only genuinely-gone machines age out — a brief reconnect blip
|
||||
/// never reaps a real session.
|
||||
const PERSISTENT_SESSION_TTL: std::time::Duration = std::time::Duration::from_secs(10 * 60);
|
||||
|
||||
/// Cadence of the stale-session reaper sweep (v2-stable-identity Task 4).
|
||||
const PERSISTENT_SESSION_REAP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(60);
|
||||
use metrics::SharedMetrics;
|
||||
use prometheus_client::registry::Registry;
|
||||
use support_codes::{CodeValidation, CreateCodeRequest, SupportCode, SupportCodeManager};
|
||||
@@ -287,6 +297,37 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
// Spawn the stale-session reaper (v2-stable-identity Task 4). Periodically
|
||||
// removes OFFLINE persistent sessions that have aged out past
|
||||
// PERSISTENT_SESSION_TTL with no viewer attached, purging both the agent_id and
|
||||
// machine_uid indexes via SessionManager::remove_session. Spawned AFTER the
|
||||
// startup restore so restored-offline rows are present (they age out once they
|
||||
// pass the TTL, per plan). The task holds only a clone of the SessionManager
|
||||
// (Arc-backed internally), so it shares the live session map.
|
||||
{
|
||||
let reaper_sessions = sessions.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(PERSISTENT_SESSION_REAP_INTERVAL);
|
||||
// Skip the immediate first tick so we do not sweep before the server is
|
||||
// even serving; the first real sweep happens one interval in.
|
||||
interval.tick().await;
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let reaped = reaper_sessions
|
||||
.reap_stale_persistent(PERSISTENT_SESSION_TTL)
|
||||
.await;
|
||||
if reaped > 0 {
|
||||
info!("Stale-session reaper removed {} offline session(s)", reaped);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
info!(
|
||||
"Stale-session reaper started (ttl {}s, sweep every {}s)",
|
||||
PERSISTENT_SESSION_TTL.as_secs(),
|
||||
PERSISTENT_SESSION_REAP_INTERVAL.as_secs()
|
||||
);
|
||||
|
||||
// Agent API key for persistent agents (optional)
|
||||
let agent_api_key = std::env::var("AGENT_API_KEY").ok();
|
||||
if let Some(ref key) = agent_api_key {
|
||||
|
||||
Reference in New Issue
Block a user