fix(server): trusted-proxy client-IP extraction for rate-limit/audit keying

Resolves coord todo 3c1f372a (Task-4 review SHOULD-FIX). Behind NPM-on-loopback, ConnectInfo was 127.0.0.1 so the rate limiter + lockout bucketed every client under one IP. New shared utils::ip_extract::client_ip() honors X-Real-IP / X-Forwarded-For (rightmost-untrusted hop) ONLY when the TCP peer is a configured trusted proxy (CONNECT_TRUSTED_PROXIES env, default loopback, fail-closed); untrusted peers are keyed by their true peer IP (forged headers ignored). Wired into the 3 rate-limit middleware, the validate_code lockout feed, and the agent/ viewer WS handlers so the limiter, lockout, and audit ip_address all key on the real client consistently. 13 unit tests (spoof rejection, XFF walk, fail-safe defaults). Code-reviewed APPROVED. Not cargo-check-verified locally (no toolchain); build-host/CI verification follows. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 07:15:45 -07:00
parent 21189423f2
commit 5d5cd26572
4 changed files with 430 additions and 29 deletions
--- a/server/src/utils/ip_extract.rs
+++ b/server/src/utils/ip_extract.rs
@@ -1,23 +1,385 @@
-//! IP address extraction from WebSocket connections
+//! Trusted-proxy-aware client-IP extraction.
+//!
+//! GuruConnect runs behind a reverse proxy (NPM) on the same host, so axum's
+//! [`ConnectInfo<SocketAddr>`] reports the *proxy's* peer address
+//! (`127.0.0.1`/`::1`), not the real remote client. Anything that keys on the
+//! peer IP — the per-IP rate limiter, the brute-force lockout, and the audit
+//! `ip_address` column — would therefore bucket every external client under one
+//! address, and the code-validate lockout could lock out the whole fleet on a
+//! single abuser.
+//!
+//! This module is the single source of truth for "who is the real client?".
+//! It derives the client IP from forwarding headers **only when the immediate
+//! TCP peer is a configured trusted proxy** — a client-supplied
+//! `X-Forwarded-For` / `X-Real-IP` from an *untrusted* peer is attacker-spoofable
+//! and is therefore ignored.
+//!
+//! ## Trust model
+//!
+//! - **Untrusted peer** (peer IP not in the trusted-proxy allowlist): return the
+//!   peer IP. Forwarding headers are ignored entirely — a direct client could
+//!   set any value, so honoring them would let an attacker rotate the limiter key
+//!   or forge the audit IP at will.
+//! - **Trusted peer** (peer IP in the allowlist): derive the client IP from the
+//!   forwarding headers the trusted proxy is expected to set:
+//!   1. `X-Real-IP`, if present and parseable (NPM/nginx sets a single value).
+//!   2. Otherwise `X-Forwarded-For`: take the **rightmost entry that is not
+//!      itself a trusted proxy**. Walking right-to-left and skipping trusted hops
+//!      yields the address the outermost trusted proxy actually *observed* — a
+//!      client cannot forge an entry past that point, because the trusted proxy
+//!      appends the real peer and everything the client pre-seeded sits further
+//!      left.
+//!   3. If neither header yields a usable address, fall back to the peer IP.
+//!
+//! Exact-IP matching is sufficient for the deployment (NPM on loopback); CIDR
+//! ranges are intentionally not supported to avoid pulling in an `ipnet`-style
+//! dependency for a case that exact loopback IPs already cover.

-use std::net::{IpAddr, SocketAddr};
+use std::collections::HashSet;
+use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};

-/// Extract IP address from Axum ConnectInfo
+use axum::http::HeaderMap;
+
+/// Header carrying a single client IP, set by nginx/NPM (`X-Real-IP`).
+const X_REAL_IP: &str = "x-real-ip";
+/// Standard forwarding chain header (`X-Forwarded-For`).
+const X_FORWARDED_FOR: &str = "x-forwarded-for";
+
+/// The set of TCP peers whose forwarding headers we trust.
 ///
-/// # Example
-/// ```rust
-/// pub async fn handler(ConnectInfo(addr): ConnectInfo<SocketAddr>) {
-///     let ip = extract_ip(&addr);
-///     // Use ip for logging
-/// }
-/// ```
-#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
-pub fn extract_ip(addr: &SocketAddr) -> IpAddr {
-    addr.ip()
+/// Parsed once at startup from the `CONNECT_TRUSTED_PROXIES` env var and stored
+/// in `AppState`. Membership is exact-IP; a peer not in this set is treated as a
+/// direct (untrusted) client and its forwarding headers are ignored.
+#[derive(Debug, Clone)]
+pub struct TrustedProxies {
+    proxies: HashSet<IpAddr>,
 }

-/// Extract IP address as string
-#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
-pub fn extract_ip_string(addr: &SocketAddr) -> String {
-    addr.ip().to_string()
+impl TrustedProxies {
+    /// Build a trusted-proxy set from an explicit list of IPs.
+    pub fn new(proxies: HashSet<IpAddr>) -> Self {
+        Self { proxies }
+    }
+
+    /// The default trusted set: IPv4 and IPv6 loopback. NPM terminates TLS on
+    /// the same host and proxies to GuruConnect over loopback, so loopback is the
+    /// only peer we trust unless `CONNECT_TRUSTED_PROXIES` overrides it.
+    pub fn default_loopback() -> Self {
+        let mut proxies = HashSet::with_capacity(2);
+        proxies.insert(IpAddr::V4(Ipv4Addr::LOCALHOST)); // 127.0.0.1
+        proxies.insert(IpAddr::V6(Ipv6Addr::LOCALHOST)); // ::1
+        Self { proxies }
+    }
+
+    /// Parse the trusted-proxy set from a comma-separated env-var value.
+    ///
+    /// Empty/whitespace entries are skipped; unparseable entries are skipped with
+    /// a warning (a typo must not silently widen or break trust). If the input
+    /// yields no usable IPs at all (unset, empty, or all-garbage), the default
+    /// loopback set is returned so the server is never left with an empty trust
+    /// set (which would make every request look like it came directly from the
+    /// proxy peer).
+    pub fn from_env_value(raw: Option<&str>) -> Self {
+        let raw = match raw {
+            Some(s) => s,
+            None => return Self::default_loopback(),
+        };
+
+        let mut proxies = HashSet::new();
+        for entry in raw.split(',') {
+            let entry = entry.trim();
+            if entry.is_empty() {
+                continue;
+            }
+            match entry.parse::<IpAddr>() {
+                Ok(ip) => {
+                    proxies.insert(ip);
+                }
+                Err(_) => {
+                    tracing::warn!(
+                        "Ignoring unparseable entry in CONNECT_TRUSTED_PROXIES: {:?}",
+                        entry
+                    );
+                }
+            }
+        }
+
+        if proxies.is_empty() {
+            tracing::warn!(
+                "CONNECT_TRUSTED_PROXIES contained no usable IPs; \
+                 falling back to default loopback trust set"
+            );
+            return Self::default_loopback();
+        }
+
+        Self { proxies }
+    }
+
+    /// Is `ip` a trusted proxy?
+    pub fn is_trusted(&self, ip: &IpAddr) -> bool {
+        self.proxies.contains(ip)
+    }
+
+    /// Sorted, comma-joined view of the trusted set for startup logging.
+    pub fn describe(&self) -> String {
+        let mut ips: Vec<String> = self.proxies.iter().map(|ip| ip.to_string()).collect();
+        ips.sort();
+        ips.join(", ")
+    }
+}
+
+impl Default for TrustedProxies {
+    fn default() -> Self {
+        Self::default_loopback()
+    }
+}
+
+/// Resolve the real client `IpAddr` for a request.
+///
+/// `peer` is the immediate TCP peer (from axum's [`ConnectInfo<SocketAddr>`]),
+/// `headers` are the request headers, and `trusted` is the configured
+/// trusted-proxy allowlist.
+///
+/// See the module docs for the full trust model. In short:
+/// - peer NOT trusted → return `peer.ip()` (headers ignored — spoofable);
+/// - peer trusted → `X-Real-IP`, else rightmost-untrusted `X-Forwarded-For`
+///   entry, else `peer.ip()`.
+pub fn client_ip(peer: &SocketAddr, headers: &HeaderMap, trusted: &TrustedProxies) -> IpAddr {
+    let peer_ip = peer.ip();
+
+    // Untrusted peer: a direct client could set any forwarding header, so they
+    // are not evidence of anything. Use what the OS actually observed.
+    if !trusted.is_trusted(&peer_ip) {
+        return peer_ip;
+    }
+
+    // Trusted peer: prefer the single-value X-Real-IP if the proxy set it.
+    if let Some(ip) = header_single_ip(headers, X_REAL_IP) {
+        return ip;
+    }
+
+    // Otherwise walk X-Forwarded-For right-to-left, skipping trusted hops, and
+    // return the first (rightmost) entry that is not itself a trusted proxy —
+    // that is the address the outermost trusted proxy observed and a client
+    // cannot forge past it.
+    if let Some(ip) = xff_rightmost_untrusted(headers, trusted) {
+        return ip;
+    }
+
+    // No usable forwarding header — fall back to the peer.
+    peer_ip
+}
+
+/// Parse a single-value IP header (e.g. `X-Real-IP`). Returns `None` if the
+/// header is absent, not valid UTF-8, or not a parseable `IpAddr`.
+fn header_single_ip(headers: &HeaderMap, name: &str) -> Option<IpAddr> {
+    let value = headers.get(name)?;
+    let text = value.to_str().ok()?.trim();
+    text.parse::<IpAddr>().ok()
+}
+
+/// From `X-Forwarded-For`, return the rightmost entry that is NOT a trusted
+/// proxy. Trusted hops on the right are the chain of proxies between us and the
+/// client; the first non-trusted entry walking leftward is the real client as
+/// observed by the outermost trusted proxy. Unparseable tokens are skipped.
+///
+/// Multiple `X-Forwarded-For` headers (or one header with comma-separated
+/// values) are flattened into a single left-to-right list before walking right
+/// to left, so a proxy that appends a second header line is handled correctly.
+fn xff_rightmost_untrusted(headers: &HeaderMap, trusted: &TrustedProxies) -> Option<IpAddr> {
+    // Collect all XFF tokens in wire order (left = closest to original client,
+    // right = closest to us).
+    let mut tokens: Vec<IpAddr> = Vec::new();
+    for value in headers.get_all(X_FORWARDED_FOR).iter() {
+        let text = match value.to_str() {
+            Ok(t) => t,
+            Err(_) => continue,
+        };
+        for raw in text.split(',') {
+            let raw = raw.trim();
+            if raw.is_empty() {
+                continue;
+            }
+            // XFF entries are bare IPs in this deployment; ignore anything that
+            // does not parse (e.g. obfuscated identifiers, port suffixes).
+            if let Ok(ip) = raw.parse::<IpAddr>() {
+                tokens.push(ip);
+            }
+        }
+    }
+
+    // Walk right-to-left, skipping trusted proxies, and return the first client
+    // address (the hop the outermost trusted proxy actually saw).
+    tokens.iter().rev().find(|ip| !trusted.is_trusted(ip)).copied()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::net::Ipv4Addr;
+
+    fn peer(ip: [u8; 4]) -> SocketAddr {
+        SocketAddr::from((ip, 40000))
+    }
+
+    fn v4(a: u8, b: u8, c: u8, d: u8) -> IpAddr {
+        IpAddr::V4(Ipv4Addr::new(a, b, c, d))
+    }
+
+    fn headers_with(name: &str, value: &str) -> HeaderMap {
+        let mut h = HeaderMap::new();
+        h.insert(
+            axum::http::HeaderName::from_bytes(name.as_bytes()).unwrap(),
+            value.parse().unwrap(),
+        );
+        h
+    }
+
+    /// Default trusted set is loopback (v4 + v6).
+    #[test]
+    fn default_trusts_loopback() {
+        let t = TrustedProxies::default_loopback();
+        assert!(t.is_trusted(&IpAddr::V4(Ipv4Addr::LOCALHOST)));
+        assert!(t.is_trusted(&IpAddr::V6(Ipv6Addr::LOCALHOST)));
+        assert!(!t.is_trusted(&v4(203, 0, 113, 7)));
+    }
+
+    /// Unset env -> default loopback trust.
+    #[test]
+    fn from_env_none_defaults_to_loopback() {
+        let t = TrustedProxies::from_env_value(None);
+        assert!(t.is_trusted(&IpAddr::V4(Ipv4Addr::LOCALHOST)));
+        assert!(t.is_trusted(&IpAddr::V6(Ipv6Addr::LOCALHOST)));
+    }
+
+    /// Empty/whitespace/all-garbage env -> default loopback trust (never empty).
+    #[test]
+    fn from_env_empty_or_garbage_defaults_to_loopback() {
+        for raw in ["", "   ", " , ,, ", "not-an-ip, also-bad"] {
+            let t = TrustedProxies::from_env_value(Some(raw));
+            assert!(
+                t.is_trusted(&IpAddr::V4(Ipv4Addr::LOCALHOST)),
+                "input {raw:?} should fall back to loopback"
+            );
+        }
+    }
+
+    /// Valid env list parses; a garbage entry alongside valid ones is skipped,
+    /// and loopback is NOT added implicitly when an explicit set is given.
+    #[test]
+    fn from_env_parses_valid_and_skips_garbage() {
+        let t = TrustedProxies::from_env_value(Some("10.0.0.1, garbage, 172.16.3.1"));
+        assert!(t.is_trusted(&v4(10, 0, 0, 1)));
+        assert!(t.is_trusted(&v4(172, 16, 3, 1)));
+        // Explicit set given -> loopback only trusted if it was listed.
+        assert!(!t.is_trusted(&IpAddr::V4(Ipv4Addr::LOCALHOST)));
+    }
+
+    /// SECURITY: a header-bearing request from an UNTRUSTED peer cannot spoof its
+    /// IP — the function returns the peer and ignores the forwarding header.
+    #[test]
+    fn untrusted_peer_cannot_spoof_via_header() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([203, 0, 113, 9]); // not loopback -> untrusted
+
+        let h = headers_with(X_FORWARDED_FOR, "1.2.3.4");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(203, 0, 113, 9));
+
+        let h = headers_with(X_REAL_IP, "5.6.7.8");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(203, 0, 113, 9));
+    }
+
+    /// Trusted peer + X-Real-IP -> that IP.
+    #[test]
+    fn trusted_peer_uses_x_real_ip() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]); // loopback -> trusted
+        let h = headers_with(X_REAL_IP, "198.51.100.23");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(198, 51, 100, 23));
+    }
+
+    /// X-Real-IP takes precedence over X-Forwarded-For when both are present.
+    #[test]
+    fn trusted_peer_prefers_x_real_ip_over_xff() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]);
+        let mut h = headers_with(X_REAL_IP, "198.51.100.23");
+        h.insert(X_FORWARDED_FOR, "9.9.9.9".parse().unwrap());
+        assert_eq!(client_ip(&p, &h, &trusted), v4(198, 51, 100, 23));
+    }
+
+    /// Trusted peer + XFF -> rightmost entry that is not a trusted proxy.
+    #[test]
+    fn trusted_peer_xff_rightmost_untrusted_single_hop() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]);
+        // Client appended its own value, then the real proxy appended 127.0.0.1.
+        let h = headers_with(X_FORWARDED_FOR, "203.0.113.5, 127.0.0.1");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(203, 0, 113, 5));
+    }
+
+    /// Multiple trusted hops on the right are all skipped; the rightmost
+    /// non-trusted entry is the observed client.
+    #[test]
+    fn trusted_peer_xff_skips_multiple_trusted_hops() {
+        // Trust loopback AND an internal LB at 10.0.0.2.
+        let mut set = HashSet::new();
+        set.insert(IpAddr::V4(Ipv4Addr::LOCALHOST));
+        set.insert(v4(10, 0, 0, 2));
+        let trusted = TrustedProxies::new(set);
+
+        let p = peer([127, 0, 0, 1]);
+        // Real client 198.51.100.7, then through LB 10.0.0.2, then to us (127.0.0.1).
+        let h = headers_with(X_FORWARDED_FOR, "198.51.100.7, 10.0.0.2, 127.0.0.1");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(198, 51, 100, 7));
+    }
+
+    /// A client that pre-seeds a forged left-most entry cannot win: the rightmost
+    /// untrusted entry is still the address the trusted proxy observed.
+    #[test]
+    fn trusted_peer_xff_forged_left_entry_ignored() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]);
+        // Attacker pre-set "1.1.1.1"; the proxy observed them as 203.0.113.5 and
+        // appended that, then 127.0.0.1. Rightmost-untrusted = 203.0.113.5.
+        let h = headers_with(X_FORWARDED_FOR, "1.1.1.1, 203.0.113.5, 127.0.0.1");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(203, 0, 113, 5));
+    }
+
+    /// Trusted peer, no forwarding header -> fall back to the peer IP.
+    #[test]
+    fn trusted_peer_no_header_falls_back_to_peer() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]);
+        let h = HeaderMap::new();
+        assert_eq!(client_ip(&p, &h, &trusted), v4(127, 0, 0, 1));
+    }
+
+    /// Trusted peer, empty / garbage forwarding headers -> fall back to peer.
+    #[test]
+    fn trusted_peer_garbage_header_falls_back_to_peer() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]);
+
+        // Empty XFF.
+        let h = headers_with(X_FORWARDED_FOR, "   ");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(127, 0, 0, 1));
+
+        // Garbage XFF tokens (none parse).
+        let h = headers_with(X_FORWARDED_FOR, "not-an-ip, also-bad");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(127, 0, 0, 1));
+
+        // Garbage X-Real-IP -> ignored, no XFF -> peer.
+        let h = headers_with(X_REAL_IP, "nonsense");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(127, 0, 0, 1));
+    }
+
+    /// XFF consisting ONLY of trusted hops -> nothing untrusted to return -> peer.
+    #[test]
+    fn trusted_peer_xff_all_trusted_falls_back_to_peer() {
+        let trusted = TrustedProxies::default_loopback();
+        let p = peer([127, 0, 0, 1]);
+        let h = headers_with(X_FORWARDED_FOR, "127.0.0.1, 127.0.0.1");
+        assert_eq!(client_ip(&p, &h, &trusted), v4(127, 0, 0, 1));
+    }
 }