feat(server): v2 secure-session-core Task 4 - rate limit + single-use codes
SPEC-002 Phase 1 Task 4 (the final keystone task), code-reviewed APPROVED. Closes the audit's reusable-code HIGH and rate-limiting-disabled HIGH. - Rebuilt rate limiting as a self-contained in-memory per-IP limiter (replaces the non-compiling tower_governor; removed that dep). Fixed-window caps wired to login (8/min), change-password (5/min), code-validate (15/min) -> 429; per-IP lockout after 10 consecutive failed code validations (15-min cooldown). - Single-use support codes: atomic consume on first agent bind (in-memory Pending->Connected under write lock + DB conditional UPDATE), rejecting a second presenter; validate/preview does not consume. - Widened code format: XXX-XXX-XXX, 31-char unambiguous alphabet (no 0/O/1/I/L), CSPRNG + rejection sampling, ~44.6 bits (replaces 6-digit numeric); migration 006 widens the code columns to TEXT. Completes the keystone (Tasks 1-4): every audit CRITICAL + HIGH in the secure auth/session core is now addressed. Known follow-up todos (not blocking): (1) trusted-proxy client-IP extraction (NPM-on-loopback collapses clients to 127.0.0.1); (2) multi-instance fail-closed DB single-use gate. Not cargo-check-verified locally - build-host/CI verification follows this commit. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -21,7 +21,7 @@ pub mod proto {
|
||||
use anyhow::Result;
|
||||
use axum::http::{HeaderValue, Method};
|
||||
use axum::{
|
||||
extract::{Json, Path, Query, Request, State},
|
||||
extract::{ConnectInfo, Json, Path, Query, Request, State},
|
||||
http::StatusCode,
|
||||
middleware::{self as axum_middleware, Next},
|
||||
response::{Html, IntoResponse},
|
||||
@@ -58,6 +58,9 @@ pub struct AppState {
|
||||
pub registry: Arc<std::sync::Mutex<Registry>>,
|
||||
/// Server start time
|
||||
pub start_time: Arc<std::time::Instant>,
|
||||
/// Per-IP rate limiters + brute-force lockout (Task 4). Shared (Arc-backed
|
||||
/// internally) so cloning AppState shares the same counters.
|
||||
pub rate_limits: middleware::RateLimitState,
|
||||
}
|
||||
|
||||
/// Middleware to inject JWT config and token blacklist into request extensions
|
||||
@@ -263,6 +266,7 @@ async fn main() -> Result<()> {
|
||||
metrics,
|
||||
registry,
|
||||
start_time,
|
||||
rate_limits: middleware::RateLimitState::new(),
|
||||
};
|
||||
|
||||
// Build router
|
||||
@@ -271,11 +275,21 @@ async fn main() -> Result<()> {
|
||||
.route("/health", get(health))
|
||||
// Prometheus metrics (no auth required - for monitoring)
|
||||
.route("/metrics", get(prometheus_metrics))
|
||||
// Auth endpoints (TODO: Add rate limiting - see SEC2_RATE_LIMITING_TODO.md)
|
||||
.route("/api/auth/login", post(api::auth::login))
|
||||
// Auth endpoints. Per-IP rate limiting (Task 4) is attached per-route via
|
||||
// `route_layer` so it applies ONLY to these endpoints, not the whole app.
|
||||
.route(
|
||||
"/api/auth/login",
|
||||
post(api::auth::login).route_layer(axum_middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
middleware::login_rate_limit,
|
||||
)),
|
||||
)
|
||||
.route(
|
||||
"/api/auth/change-password",
|
||||
post(api::auth::change_password),
|
||||
post(api::auth::change_password).route_layer(axum_middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
middleware::change_password_rate_limit,
|
||||
)),
|
||||
)
|
||||
.route("/api/auth/me", get(api::auth::get_me))
|
||||
.route("/api/auth/logout", post(api::auth_logout::logout))
|
||||
@@ -306,10 +320,17 @@ async fn main() -> Result<()> {
|
||||
put(api::users::set_permissions),
|
||||
)
|
||||
.route("/api/users/:id/clients", put(api::users::set_client_access))
|
||||
// Portal API - Support codes (TODO: Add rate limiting)
|
||||
// Portal API - Support codes. The unauthenticated validate route is rate
|
||||
// limited + brute-force locked out per IP (Task 4).
|
||||
.route("/api/codes", post(create_code))
|
||||
.route("/api/codes", get(list_codes))
|
||||
.route("/api/codes/:code/validate", get(validate_code))
|
||||
.route(
|
||||
"/api/codes/:code/validate",
|
||||
get(validate_code).route_layer(axum_middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
middleware::code_validate_rate_limit,
|
||||
)),
|
||||
)
|
||||
.route("/api/codes/:code/cancel", post(cancel_code))
|
||||
// WebSocket endpoints
|
||||
.route("/ws/agent", get(relay::agent_ws_handler))
|
||||
@@ -450,7 +471,24 @@ async fn create_code(
|
||||
Json(request): Json<CreateCodeRequest>,
|
||||
) -> Json<SupportCode> {
|
||||
let code = state.support_codes.create_code(request).await;
|
||||
info!("Created support code: {}", code.code);
|
||||
|
||||
// Persist the code to the database so the DURABLE single-use guard
|
||||
// (`db::support_codes::consume_code_for_bind`, Task 4) has a row to act on at
|
||||
// agent-bind time. The in-memory manager remains the live source of truth for
|
||||
// the auth decision; the DB row is the durable single-use record (and audit
|
||||
// trail) that also survives a server restart. A DB failure here is non-fatal:
|
||||
// the in-memory single-use consume still protects against reuse within this
|
||||
// process lifetime.
|
||||
if let Some(ref db) = state.db {
|
||||
if let Err(e) =
|
||||
db::support_codes::create_support_code(db.pool(), &code.code, &code.created_by).await
|
||||
{
|
||||
tracing::warn!("Failed to persist support code to database: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Do not log the code value (it is a bearer credential for the session).
|
||||
info!("Created support code for {}", code.created_by);
|
||||
Json(code)
|
||||
}
|
||||
|
||||
@@ -469,9 +507,29 @@ struct ValidateParams {
|
||||
|
||||
async fn validate_code(
|
||||
State(state): State<AppState>,
|
||||
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
||||
Path(code): Path<String>,
|
||||
) -> Json<CodeValidation> {
|
||||
Json(state.support_codes.validate_code(&code).await)
|
||||
let ip = addr.ip();
|
||||
|
||||
// PREVIEW ONLY: validate_code inspects the in-memory code state and does NOT
|
||||
// consume the code (single-use consumption happens at agent BIND, in
|
||||
// relay::handle_agent_connection). A valid preview here must not flip the
|
||||
// code to connected/consumed.
|
||||
let result = state.support_codes.validate_code(&code).await;
|
||||
|
||||
// Feed the per-IP brute-force lockout (Task 4): a failed validation counts
|
||||
// toward the streak; a success resets it. The middleware
|
||||
// (`code_validate_rate_limit`) enforces the lockout BEFORE this handler runs,
|
||||
// so an already-locked IP never reaches here.
|
||||
if result.valid {
|
||||
state.rate_limits.code_validate_lockout.record_success(ip);
|
||||
} else {
|
||||
state.rate_limits.code_validate_lockout.record_failure(ip);
|
||||
tracing::warn!("Failed support-code validation from {}", ip);
|
||||
}
|
||||
|
||||
Json(result)
|
||||
}
|
||||
|
||||
async fn cancel_code(
|
||||
|
||||
Reference in New Issue
Block a user