SPEC-002 Phase 1 Task 4 (the final keystone task), code-reviewed APPROVED. Closes the audit's reusable-code HIGH and rate-limiting-disabled HIGH. - Rebuilt rate limiting as a self-contained in-memory per-IP limiter (replaces the non-compiling tower_governor; removed that dep). Fixed-window caps wired to login (8/min), change-password (5/min), code-validate (15/min) -> 429; per-IP lockout after 10 consecutive failed code validations (15-min cooldown). - Single-use support codes: atomic consume on first agent bind (in-memory Pending->Connected under write lock + DB conditional UPDATE), rejecting a second presenter; validate/preview does not consume. - Widened code format: XXX-XXX-XXX, 31-char unambiguous alphabet (no 0/O/1/I/L), CSPRNG + rejection sampling, ~44.6 bits (replaces 6-digit numeric); migration 006 widens the code columns to TEXT. Completes the keystone (Tasks 1-4): every audit CRITICAL + HIGH in the secure auth/session core is now addressed. Known follow-up todos (not blocking): (1) trusted-proxy client-IP extraction (NPM-on-loopback collapses clients to 127.0.0.1); (2) multi-instance fail-closed DB single-use gate. Not cargo-check-verified locally - build-host/CI verification follows this commit. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
480 lines
17 KiB
Rust
480 lines
17 KiB
Rust
//! Support session codes management
|
||
//!
|
||
//! Handles generation and validation of high-entropy, human-readable support
|
||
//! codes for one-time remote support sessions.
|
||
//!
|
||
//! ## Code format (Task 4)
|
||
//!
|
||
//! v1 used a 6-digit numeric code (~20 bits, trivially brute-forceable). v2 uses
|
||
//! a grouped base32-style code drawn from an UNAMBIGUOUS alphabet (no `0`/`O`,
|
||
//! `1`/`I`/`L`) so a human reading it aloud cannot mistranscribe it:
|
||
//!
|
||
//! ```text
|
||
//! XXX-XXX-XXX e.g. K7P-3MQ-Z9F
|
||
//! ```
|
||
//!
|
||
//! 9 symbols over a 31-character alphabet ≈ **44.6 bits** of entropy, generated
|
||
//! with a CSPRNG ([`OsRng`]). Combined with the per-IP rate limiting + lockout on
|
||
//! the validate route (Task 4) and single-use consumption on bind, the code space
|
||
//! is no longer practically brute-forceable.
|
||
|
||
use chrono::{DateTime, Utc};
|
||
use rand::rngs::OsRng;
|
||
use rand::RngCore;
|
||
use serde::{Deserialize, Serialize};
|
||
use std::collections::HashMap;
|
||
use std::sync::Arc;
|
||
use tokio::sync::RwLock;
|
||
use uuid::Uuid;
|
||
|
||
/// Unambiguous code alphabet: digits 2-9 and A-Z, EXCLUDING the visually
|
||
/// confusable `0`/`O`, `1`/`I`/`L`. 31 distinct symbols (≈4.954 bits each).
|
||
const CODE_ALPHABET: &[u8] = b"23456789ABCDEFGHJKMNPQRSTUVWXYZ";
|
||
|
||
/// Number of alphabet symbols in a generated code (excluding group separators).
|
||
/// 9 symbols × log2(31) ≈ 44.6 bits — comfortably above the 40-bit target.
|
||
const CODE_SYMBOLS: usize = 9;
|
||
|
||
/// Symbols per visual group (the code is rendered as hyphen-separated groups).
|
||
const CODE_GROUP_SIZE: usize = 3;
|
||
|
||
/// Draw a single uniformly-distributed symbol from [`CODE_ALPHABET`] using a
|
||
/// CSPRNG, via rejection sampling so every symbol is equally likely (no modulo
|
||
/// bias). 31 is not a power of two, so we reject draws in the biased tail.
|
||
fn random_symbol() -> u8 {
|
||
let n = CODE_ALPHABET.len() as u32; // 31
|
||
// Largest multiple of n that fits in a u8 draw space (256); reject above it.
|
||
let limit = (256 / n) * n; // 248
|
||
let mut rng = OsRng;
|
||
loop {
|
||
let mut buf = [0u8; 1];
|
||
rng.fill_bytes(&mut buf);
|
||
let v = buf[0] as u32;
|
||
if v < limit {
|
||
return CODE_ALPHABET[(v % n) as usize];
|
||
}
|
||
// else: biased tail — draw again.
|
||
}
|
||
}
|
||
|
||
/// Generate a fresh grouped support code, e.g. `K7P-3MQ-Z9F`. CSPRNG-backed.
|
||
fn generate_code_string() -> String {
|
||
let mut out = String::with_capacity(CODE_SYMBOLS + CODE_SYMBOLS / CODE_GROUP_SIZE);
|
||
for i in 0..CODE_SYMBOLS {
|
||
if i > 0 && i % CODE_GROUP_SIZE == 0 {
|
||
out.push('-');
|
||
}
|
||
out.push(random_symbol() as char);
|
||
}
|
||
out
|
||
}
|
||
|
||
/// A support session code
|
||
#[derive(Debug, Clone, Serialize)]
|
||
pub struct SupportCode {
|
||
pub code: String,
|
||
pub session_id: Uuid,
|
||
pub created_by: String,
|
||
pub created_at: DateTime<Utc>,
|
||
pub status: CodeStatus,
|
||
pub client_name: Option<String>,
|
||
pub client_machine: Option<String>,
|
||
pub connected_at: Option<DateTime<Utc>>,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||
#[serde(rename_all = "lowercase")]
|
||
pub enum CodeStatus {
|
||
Pending, // Waiting for client to connect
|
||
Connected, // Client connected, session active
|
||
Completed, // Session ended normally
|
||
Cancelled, // Code cancelled by tech
|
||
}
|
||
|
||
/// Request to create a new support code
|
||
#[derive(Debug, Deserialize)]
|
||
pub struct CreateCodeRequest {
|
||
#[allow(dead_code)]
|
||
// TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||
pub technician_id: Option<String>,
|
||
pub technician_name: Option<String>,
|
||
}
|
||
|
||
/// Response when a code is validated
|
||
#[derive(Debug, Serialize)]
|
||
pub struct CodeValidation {
|
||
pub valid: bool,
|
||
pub session_id: Option<String>,
|
||
pub server_url: Option<String>,
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
pub error: Option<String>,
|
||
}
|
||
|
||
/// Manages support codes
|
||
#[derive(Clone)]
|
||
pub struct SupportCodeManager {
|
||
codes: Arc<RwLock<HashMap<String, SupportCode>>>,
|
||
session_to_code: Arc<RwLock<HashMap<Uuid, String>>>,
|
||
}
|
||
|
||
impl SupportCodeManager {
|
||
pub fn new() -> Self {
|
||
Self {
|
||
codes: Arc::new(RwLock::new(HashMap::new())),
|
||
session_to_code: Arc::new(RwLock::new(HashMap::new())),
|
||
}
|
||
}
|
||
|
||
/// Generate a unique high-entropy support code (see module docs).
|
||
///
|
||
/// Draws CSPRNG-backed grouped codes (`XXX-XXX-XXX`, ≈44.6 bits) until one is
|
||
/// not already live in the in-memory map. With a 31^9 code space the collision
|
||
/// probability is negligible; the loop only guards against the (astronomically
|
||
/// unlikely) duplicate.
|
||
async fn generate_unique_code(&self) -> String {
|
||
let codes = self.codes.read().await;
|
||
loop {
|
||
let code_str = generate_code_string();
|
||
if !codes.contains_key(&code_str) {
|
||
return code_str;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Create a new support code
|
||
pub async fn create_code(&self, request: CreateCodeRequest) -> SupportCode {
|
||
let code = self.generate_unique_code().await;
|
||
let session_id = Uuid::new_v4();
|
||
|
||
let support_code = SupportCode {
|
||
code: code.clone(),
|
||
session_id,
|
||
created_by: request
|
||
.technician_name
|
||
.unwrap_or_else(|| "Unknown".to_string()),
|
||
created_at: Utc::now(),
|
||
status: CodeStatus::Pending,
|
||
client_name: None,
|
||
client_machine: None,
|
||
connected_at: None,
|
||
};
|
||
|
||
let mut codes = self.codes.write().await;
|
||
codes.insert(code.clone(), support_code.clone());
|
||
|
||
let mut session_to_code = self.session_to_code.write().await;
|
||
session_to_code.insert(session_id, code);
|
||
|
||
support_code
|
||
}
|
||
|
||
/// Validate a code and return session info
|
||
pub async fn validate_code(&self, code: &str) -> CodeValidation {
|
||
let codes = self.codes.read().await;
|
||
|
||
match codes.get(code) {
|
||
Some(support_code) => {
|
||
if support_code.status == CodeStatus::Pending
|
||
|| support_code.status == CodeStatus::Connected
|
||
{
|
||
CodeValidation {
|
||
valid: true,
|
||
session_id: Some(support_code.session_id.to_string()),
|
||
server_url: Some("wss://connect.azcomputerguru.com/ws/support".to_string()),
|
||
error: None,
|
||
}
|
||
} else {
|
||
CodeValidation {
|
||
valid: false,
|
||
session_id: None,
|
||
server_url: None,
|
||
error: Some("This code has expired or been used".to_string()),
|
||
}
|
||
}
|
||
}
|
||
None => CodeValidation {
|
||
valid: false,
|
||
session_id: None,
|
||
server_url: None,
|
||
error: Some("Invalid code".to_string()),
|
||
},
|
||
}
|
||
}
|
||
|
||
/// Mark a code as connected.
|
||
///
|
||
/// Superseded by [`SupportCodeManager::consume_for_bind`] for the single-use
|
||
/// bind path (Task 4). Retained for non-bind callers; not used on the agent
|
||
/// bind path any longer.
|
||
#[allow(dead_code)]
|
||
pub async fn mark_connected(
|
||
&self,
|
||
code: &str,
|
||
client_name: Option<String>,
|
||
client_machine: Option<String>,
|
||
) {
|
||
let mut codes = self.codes.write().await;
|
||
if let Some(support_code) = codes.get_mut(code) {
|
||
support_code.status = CodeStatus::Connected;
|
||
support_code.client_name = client_name;
|
||
support_code.client_machine = client_machine;
|
||
support_code.connected_at = Some(Utc::now());
|
||
}
|
||
}
|
||
|
||
/// Atomically CONSUME a code for a first-time agent bind (single-use, Task 4).
|
||
///
|
||
/// This is the single-use gate for the in-memory layer. Under the write lock,
|
||
/// it accepts the code ONLY if it is currently `Pending` (never used), flips
|
||
/// it to `Connected`, and records the binding client. Any other state
|
||
/// (`Connected` — already bound, `Completed`, `Cancelled`, or a nonexistent
|
||
/// code) is rejected. Because the transition happens while holding the write
|
||
/// lock, two concurrent presenters of the same code race for the single
|
||
/// `Pending → Connected` transition: exactly one wins, the loser is rejected.
|
||
///
|
||
/// Returns `true` if the caller consumed the code (and may proceed to bind),
|
||
/// `false` if the code was not available for consumption.
|
||
///
|
||
/// NOTE: the preview route (`validate_code`) deliberately does NOT call this —
|
||
/// previewing a code must never consume it. Only the agent bind path does.
|
||
pub async fn consume_for_bind(
|
||
&self,
|
||
code: &str,
|
||
client_name: Option<String>,
|
||
client_machine: Option<String>,
|
||
) -> bool {
|
||
let mut codes = self.codes.write().await;
|
||
match codes.get_mut(code) {
|
||
Some(support_code) if support_code.status == CodeStatus::Pending => {
|
||
support_code.status = CodeStatus::Connected;
|
||
support_code.client_name = client_name;
|
||
support_code.client_machine = client_machine;
|
||
support_code.connected_at = Some(Utc::now());
|
||
true
|
||
}
|
||
_ => false,
|
||
}
|
||
}
|
||
|
||
/// Link a support code to an actual WebSocket session
|
||
pub async fn link_session(&self, code: &str, real_session_id: Uuid) {
|
||
let mut codes = self.codes.write().await;
|
||
if let Some(support_code) = codes.get_mut(code) {
|
||
// Update session_to_code mapping with real session ID
|
||
let old_session_id = support_code.session_id;
|
||
support_code.session_id = real_session_id;
|
||
|
||
// Update the reverse mapping
|
||
let mut session_to_code = self.session_to_code.write().await;
|
||
session_to_code.remove(&old_session_id);
|
||
session_to_code.insert(real_session_id, code.to_string());
|
||
}
|
||
}
|
||
|
||
/// Get code by its code string
|
||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||
pub async fn get_code(&self, code: &str) -> Option<SupportCode> {
|
||
let codes = self.codes.read().await;
|
||
codes.get(code).cloned()
|
||
}
|
||
|
||
/// Mark a code as completed
|
||
pub async fn mark_completed(&self, code: &str) {
|
||
let mut codes = self.codes.write().await;
|
||
if let Some(support_code) = codes.get_mut(code) {
|
||
support_code.status = CodeStatus::Completed;
|
||
}
|
||
}
|
||
|
||
/// Cancel a code (works for both pending and connected)
|
||
pub async fn cancel_code(&self, code: &str) -> bool {
|
||
let mut codes = self.codes.write().await;
|
||
if let Some(support_code) = codes.get_mut(code) {
|
||
if support_code.status == CodeStatus::Pending
|
||
|| support_code.status == CodeStatus::Connected
|
||
{
|
||
support_code.status = CodeStatus::Cancelled;
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
/// Check if a code is cancelled
|
||
pub async fn is_cancelled(&self, code: &str) -> bool {
|
||
let codes = self.codes.read().await;
|
||
codes
|
||
.get(code)
|
||
.map(|c| c.status == CodeStatus::Cancelled)
|
||
.unwrap_or(false)
|
||
}
|
||
|
||
/// Check if a code is valid for connection (exists and is pending)
|
||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||
pub async fn is_valid_for_connection(&self, code: &str) -> bool {
|
||
let codes = self.codes.read().await;
|
||
codes
|
||
.get(code)
|
||
.map(|c| c.status == CodeStatus::Pending)
|
||
.unwrap_or(false)
|
||
}
|
||
|
||
/// List all codes (for dashboard)
|
||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||
pub async fn list_codes(&self) -> Vec<SupportCode> {
|
||
let codes = self.codes.read().await;
|
||
codes.values().cloned().collect()
|
||
}
|
||
|
||
/// List active codes only
|
||
pub async fn list_active_codes(&self) -> Vec<SupportCode> {
|
||
let codes = self.codes.read().await;
|
||
codes
|
||
.values()
|
||
.filter(|c| c.status == CodeStatus::Pending || c.status == CodeStatus::Connected)
|
||
.cloned()
|
||
.collect()
|
||
}
|
||
|
||
/// Get code by session ID
|
||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||
pub async fn get_by_session(&self, session_id: Uuid) -> Option<SupportCode> {
|
||
let session_to_code = self.session_to_code.read().await;
|
||
let code = session_to_code.get(&session_id)?;
|
||
|
||
let codes = self.codes.read().await;
|
||
codes.get(code).cloned()
|
||
}
|
||
|
||
/// Get the status of a code as a string (for auth checks).
|
||
///
|
||
/// No longer used on the agent bind path (replaced by the atomic
|
||
/// `consume_for_bind` single-use gate, Task 4); retained for diagnostics.
|
||
#[allow(dead_code)]
|
||
pub async fn get_status(&self, code: &str) -> Option<String> {
|
||
let codes = self.codes.read().await;
|
||
codes.get(code).map(|c| match c.status {
|
||
CodeStatus::Pending => "pending".to_string(),
|
||
CodeStatus::Connected => "connected".to_string(),
|
||
CodeStatus::Completed => "completed".to_string(),
|
||
CodeStatus::Cancelled => "cancelled".to_string(),
|
||
})
|
||
}
|
||
}
|
||
|
||
impl Default for SupportCodeManager {
|
||
fn default() -> Self {
|
||
Self::new()
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn generated_code_has_expected_shape() {
|
||
// XXX-XXX-XXX: 9 symbols + 2 hyphens = 11 chars.
|
||
let code = generate_code_string();
|
||
assert_eq!(code.len(), CODE_SYMBOLS + 2);
|
||
let parts: Vec<&str> = code.split('-').collect();
|
||
assert_eq!(parts.len(), CODE_SYMBOLS / CODE_GROUP_SIZE);
|
||
for p in parts {
|
||
assert_eq!(p.len(), CODE_GROUP_SIZE);
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn generated_code_uses_only_unambiguous_alphabet() {
|
||
// No 0/O/1/I/L; every non-hyphen char is in CODE_ALPHABET.
|
||
for _ in 0..2_000 {
|
||
let code = generate_code_string();
|
||
for c in code.chars().filter(|c| *c != '-') {
|
||
assert!(
|
||
CODE_ALPHABET.contains(&(c as u8)),
|
||
"char {:?} not in unambiguous alphabet",
|
||
c
|
||
);
|
||
assert!(
|
||
!matches!(c, '0' | 'O' | '1' | 'I' | 'L'),
|
||
"ambiguous char {:?} leaked into a code",
|
||
c
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn generated_codes_are_distinct_in_practice() {
|
||
// With ~44 bits of entropy, 1000 draws should be unique.
|
||
use std::collections::HashSet;
|
||
let mut seen = HashSet::new();
|
||
for _ in 0..1_000 {
|
||
assert!(seen.insert(generate_code_string()), "unexpected collision");
|
||
}
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn consume_for_bind_is_single_use() {
|
||
let mgr = SupportCodeManager::new();
|
||
let code = mgr
|
||
.create_code(CreateCodeRequest {
|
||
technician_id: None,
|
||
technician_name: Some("tech".to_string()),
|
||
})
|
||
.await
|
||
.code;
|
||
|
||
// First bind consumes the code.
|
||
assert!(
|
||
mgr.consume_for_bind(&code, Some("agent".into()), Some("a1".into()))
|
||
.await
|
||
);
|
||
// Second presenter is rejected — single use.
|
||
assert!(
|
||
!mgr.consume_for_bind(&code, Some("agent2".into()), Some("a2".into()))
|
||
.await
|
||
);
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn consume_for_bind_rejects_unknown_code() {
|
||
let mgr = SupportCodeManager::new();
|
||
assert!(!mgr.consume_for_bind("NOP-E00-000", None, None).await);
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn consume_for_bind_rejects_cancelled_code() {
|
||
let mgr = SupportCodeManager::new();
|
||
let code = mgr
|
||
.create_code(CreateCodeRequest {
|
||
technician_id: None,
|
||
technician_name: Some("tech".to_string()),
|
||
})
|
||
.await
|
||
.code;
|
||
assert!(mgr.cancel_code(&code).await);
|
||
// A cancelled code is not Pending → cannot be consumed.
|
||
assert!(!mgr.consume_for_bind(&code, None, None).await);
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn preview_validate_does_not_consume() {
|
||
let mgr = SupportCodeManager::new();
|
||
let code = mgr
|
||
.create_code(CreateCodeRequest {
|
||
technician_id: None,
|
||
technician_name: Some("tech".to_string()),
|
||
})
|
||
.await
|
||
.code;
|
||
|
||
// Previewing the code many times must not consume it...
|
||
for _ in 0..5 {
|
||
assert!(mgr.validate_code(&code).await.valid);
|
||
}
|
||
// ...so a first real bind still succeeds.
|
||
assert!(mgr.consume_for_bind(&code, None, None).await);
|
||
}
|
||
}
|