Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 89c3718266 | |||
| 4106fc4bc4 | |||
| 0f02f23765 | |||
| 59e40c8019 | |||
| c286a29b9d | |||
| 18429f6fe3 | |||
| 3b9e4068c9 | |||
| 87f229509b | |||
| 40c7d860cc | |||
| 0059b21db6 | |||
| f950511e3e | |||
| 16017456aa |
@@ -27,6 +27,15 @@ on:
|
|||||||
# computes the next semver from conventional commits at dispatch time.
|
# computes the next semver from conventional commits at dispatch time.
|
||||||
# build-and-test.yml remains the automatic PR/push CI gate.
|
# build-and-test.yml remains the automatic PR/push CI gate.
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
channel:
|
||||||
|
description: 'Release channel (stable = full versioned release; beta = signed prerelease test build, no version bump/changelog)'
|
||||||
|
required: true
|
||||||
|
default: 'stable'
|
||||||
|
type: choice
|
||||||
|
options:
|
||||||
|
- stable
|
||||||
|
- beta
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -36,8 +45,11 @@ jobs:
|
|||||||
name: Version + Changelog
|
name: Version + Changelog
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
version: ${{ steps.bump.outputs.version }}
|
# Coalesce across the stable (bump) and beta (beta) paths: exactly one of them runs per
|
||||||
released: ${{ steps.bump.outputs.released }}
|
# dispatch, so the first non-empty value wins. prerelease is 'true' only on the beta path.
|
||||||
|
version: ${{ steps.bump.outputs.version || steps.beta.outputs.version }}
|
||||||
|
released: ${{ steps.bump.outputs.released || steps.beta.outputs.released }}
|
||||||
|
prerelease: ${{ steps.beta.outputs.prerelease || 'false' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout (full history + tags)
|
- name: Checkout (full history + tags)
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -59,7 +71,8 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Install git-cliff
|
- name: Install git-cliff
|
||||||
if: steps.guard.outputs.skip != 'true'
|
# Stable-only: beta produces no changelog, so git-cliff is unnecessary on the beta path.
|
||||||
|
if: steps.guard.outputs.skip != 'true' && github.event.inputs.channel == 'stable'
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
CLIFF_VERSION="2.6.1"
|
CLIFF_VERSION="2.6.1"
|
||||||
@@ -72,12 +85,16 @@ jobs:
|
|||||||
|
|
||||||
- name: Determine next version and bump components
|
- name: Determine next version and bump components
|
||||||
id: bump
|
id: bump
|
||||||
if: steps.guard.outputs.skip != 'true'
|
# Stable-only: the beta path (id: beta) handles versioning without a manifest bump/commit.
|
||||||
|
if: steps.guard.outputs.skip != 'true' && github.event.inputs.channel == 'stable'
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# ----- locate the last release tag (vX.Y.Z) -----
|
# ----- locate the last release tag (vX.Y.Z) -----
|
||||||
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | head -n1 || true)"
|
# Match ONLY strict final-release tags (vMAJOR.MINOR.PATCH). Beta tags look like
|
||||||
|
# v0.3.0-beta.7; if one of those were picked up here it would corrupt the next stable
|
||||||
|
# base version, so prerelease tags are explicitly excluded from this lookup.
|
||||||
|
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | head -n1 || true)"
|
||||||
if [ -z "${LAST_TAG}" ]; then
|
if [ -z "${LAST_TAG}" ]; then
|
||||||
echo "[INFO] No prior release tag found; baseline is current manifest version."
|
echo "[INFO] No prior release tag found; baseline is current manifest version."
|
||||||
BASE_VERSION="$(grep -m1 '^version' agent/Cargo.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)".*/\1/')"
|
BASE_VERSION="$(grep -m1 '^version' agent/Cargo.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)".*/\1/')"
|
||||||
@@ -186,8 +203,39 @@ jobs:
|
|||||||
sed -i -E "0,/^version = \"[0-9]+\.[0-9]+\.[0-9]+\"/s//version = \"${NEXT}\"/" Cargo.toml || true
|
sed -i -E "0,/^version = \"[0-9]+\.[0-9]+\.[0-9]+\"/s//version = \"${NEXT}\"/" Cargo.toml || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Beta channel - tag prerelease build (no bump, no commit, no changelog)
|
||||||
|
id: beta
|
||||||
|
# Beta-only path. Reuses the IDENTICAL downstream build + sign + publish jobs, but does
|
||||||
|
# NOT compute a semver bump, mutate any manifest, generate a changelog, or make a release
|
||||||
|
# commit. It just tags the CURRENT HEAD with a unique prerelease version so the Windows
|
||||||
|
# build job can check out `ref: v${VER}` exactly as it does for stable.
|
||||||
|
if: github.event.inputs.channel == 'beta' && steps.guard.outputs.skip != 'true'
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Base version is read straight from the agent manifest — NOT bumped, NOT written back.
|
||||||
|
BASE="$(grep -m1 '^version' agent/Cargo.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)".*/\1/')"
|
||||||
|
# GITHUB_RUN_NUMBER guarantees a unique prerelease suffix without counting existing tags.
|
||||||
|
VER="${BASE}-beta.${GITHUB_RUN_NUMBER}"
|
||||||
|
echo "[INFO] Beta build version: ${VER} (base ${BASE}, run ${GITHUB_RUN_NUMBER})"
|
||||||
|
|
||||||
|
# Tag the current HEAD (no release commit). Push the tag so build-agent-windows can
|
||||||
|
# check out ref: v${VER}.
|
||||||
|
git config user.name "guruconnect-ci"
|
||||||
|
git config user.email "ci@azcomputerguru.com"
|
||||||
|
# Beta tags are disposable test markers; force makes re-running a failed beta dispatch idempotent (re-run reuses GITHUB_RUN_NUMBER, so the tag already exists).
|
||||||
|
git tag -f "v${VER}"
|
||||||
|
REMOTE="https://${{ secrets.CI_PUSH_TOKEN }}@git.azcomputerguru.com/${GITHUB_REPOSITORY}.git"
|
||||||
|
git push --force "${REMOTE}" "v${VER}"
|
||||||
|
echo "[OK] Pushed beta prerelease tag v${VER}"
|
||||||
|
|
||||||
|
echo "version=${VER}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "released=true" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "prerelease=true" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Generate changelog (git-cliff)
|
- name: Generate changelog (git-cliff)
|
||||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true'
|
# Stable-only: beta produces no changelog artifact.
|
||||||
|
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true' && github.event.inputs.channel == 'stable'
|
||||||
env:
|
env:
|
||||||
VERSION: ${{ steps.bump.outputs.version }}
|
VERSION: ${{ steps.bump.outputs.version }}
|
||||||
run: |
|
run: |
|
||||||
@@ -232,7 +280,10 @@ jobs:
|
|||||||
|
|
||||||
# Re-derive the set of changed components (same logic as the bump step). On the first
|
# Re-derive the set of changed components (same logic as the bump step). On the first
|
||||||
# release (no prior tag) all components are considered changed.
|
# release (no prior tag) all components are considered changed.
|
||||||
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | head -n1 || true)"
|
# Match ONLY strict final-release tags (vMAJOR.MINOR.PATCH); exclude beta prerelease
|
||||||
|
# tags (v0.3.0-beta.7) so the changelog diff range is taken against the last real
|
||||||
|
# release, not an intervening beta build.
|
||||||
|
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | head -n1 || true)"
|
||||||
if [ -z "${LAST_TAG}" ]; then
|
if [ -z "${LAST_TAG}" ]; then
|
||||||
CHANGED_FILES="$(git ls-files)"
|
CHANGED_FILES="$(git ls-files)"
|
||||||
FIRST_RELEASE=true
|
FIRST_RELEASE=true
|
||||||
@@ -252,7 +303,8 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Commit release + create tag
|
- name: Commit release + create tag
|
||||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true'
|
# Stable-only: beta tags HEAD directly in the beta step and never makes a release commit.
|
||||||
|
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true' && github.event.inputs.channel == 'stable'
|
||||||
env:
|
env:
|
||||||
VERSION: ${{ steps.bump.outputs.version }}
|
VERSION: ${{ steps.bump.outputs.version }}
|
||||||
run: |
|
run: |
|
||||||
@@ -276,7 +328,8 @@ jobs:
|
|||||||
echo "[OK] Pushed release commit and tag v${VERSION}"
|
echo "[OK] Pushed release commit and tag v${VERSION}"
|
||||||
|
|
||||||
- name: Upload changelog artifact
|
- name: Upload changelog artifact
|
||||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true'
|
# Stable-only: there is no changelog on the beta path, so nothing to upload.
|
||||||
|
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true' && github.event.inputs.channel == 'stable'
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: changelog
|
name: changelog
|
||||||
@@ -445,6 +498,9 @@ jobs:
|
|||||||
echo "sha256=${SUM}" >> "$GITHUB_OUTPUT"
|
echo "sha256=${SUM}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Download changelog artifact
|
- name: Download changelog artifact
|
||||||
|
# Stable-only: the beta path uploads no `changelog` artifact. The release-creation step
|
||||||
|
# already guards on `[ -f changelog-artifact/CHANGELOG.md ]`, so skipping this is safe.
|
||||||
|
if: github.event.inputs.channel == 'stable'
|
||||||
uses: actions/download-artifact@v3
|
uses: actions/download-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: changelog
|
name: changelog
|
||||||
@@ -472,17 +528,26 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
VERSION: ${{ needs.version.outputs.version }}
|
VERSION: ${{ needs.version.outputs.version }}
|
||||||
SHA256: ${{ steps.sha.outputs.sha256 }}
|
SHA256: ${{ steps.sha.outputs.sha256 }}
|
||||||
|
# PRERELEASE is 'true' on the beta path, 'false' on stable; drives the Gitea release flag.
|
||||||
|
PRERELEASE: ${{ needs.version.outputs.prerelease }}
|
||||||
GITEA_TOKEN: ${{ secrets.CI_PUSH_TOKEN }}
|
GITEA_TOKEN: ${{ secrets.CI_PUSH_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
API_BASE="https://git.azcomputerguru.com/api/v1/repos/${GITHUB_REPOSITORY}"
|
API_BASE="https://git.azcomputerguru.com/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
TAG="v${VERSION}"
|
TAG="v${VERSION}"
|
||||||
echo "[INFO] Creating Gitea release ${TAG} on ${GITHUB_REPOSITORY}"
|
echo "[INFO] Creating Gitea release ${TAG} on ${GITHUB_REPOSITORY} (prerelease=${PRERELEASE})"
|
||||||
|
|
||||||
BODY="$(printf 'GuruConnect %s\n\nSHA-256 (guruconnect.exe): %s\n\nSee CHANGELOG.md and /api/changelog for details.' "${TAG}" "${SHA256}")"
|
# Beta builds get a clear "prerelease test build" note in the body; the -beta.N suffix
|
||||||
|
# is already carried in TAG, so the release name "Release v..." needs no extra handling.
|
||||||
|
if [ "${PRERELEASE}" = "true" ]; then
|
||||||
|
BODY="$(printf 'GuruConnect %s (PRERELEASE / beta test build)\n\nSHA-256 (guruconnect.exe): %s\n\nSigned via Azure Trusted Signing. Not a stable release — no changelog/version bump.' "${TAG}" "${SHA256}")"
|
||||||
|
else
|
||||||
|
BODY="$(printf 'GuruConnect %s\n\nSHA-256 (guruconnect.exe): %s\n\nSee CHANGELOG.md and /api/changelog for details.' "${TAG}" "${SHA256}")"
|
||||||
|
fi
|
||||||
|
|
||||||
# Build the JSON payload with python (handles escaping of the multi-line body safely).
|
# Build the JSON payload with python (handles escaping of the multi-line body safely).
|
||||||
CREATE_PAYLOAD="$(TAG="$TAG" BODY="$BODY" python3 -c 'import json,os; print(json.dumps({"tag_name": os.environ["TAG"], "name": "Release " + os.environ["TAG"], "body": os.environ["BODY"], "draft": False, "prerelease": False}))')"
|
# prerelease is derived from the PRERELEASE env var (beta -> true, stable -> false).
|
||||||
|
CREATE_PAYLOAD="$(TAG="$TAG" BODY="$BODY" PRERELEASE="$PRERELEASE" python3 -c 'import json,os; print(json.dumps({"tag_name": os.environ["TAG"], "name": "Release " + os.environ["TAG"], "body": os.environ["BODY"], "draft": False, "prerelease": os.environ.get("PRERELEASE","false") == "true"}))')"
|
||||||
|
|
||||||
RELEASE_JSON="$(curl -fsS -X POST \
|
RELEASE_JSON="$(curl -fsS -X POST \
|
||||||
"${API_BASE}/releases" \
|
"${API_BASE}/releases" \
|
||||||
|
|||||||
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -1407,7 +1407,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "guruconnect"
|
name = "guruconnect"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -1447,7 +1447,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "guruconnect-server"
|
name = "guruconnect-server"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"argon2",
|
"argon2",
|
||||||
|
|||||||
@@ -16,11 +16,16 @@ stack. It ships independently of GuruRMM and integrates with it via a versioned
|
|||||||
> match, blacklist-on-WS, agent-plane rejects user JWTs via per-agent `cak_` keys). The feature specs below
|
> match, blacklist-on-WS, agent-plane rejects user JWTs via per-agent `cak_` keys). The feature specs below
|
||||||
> (SPEC-003–009) are **work-items inside the later v2 phases** — see the mapping.
|
> (SPEC-003–009) are **work-items inside the later v2 phases** — see the mapping.
|
||||||
>
|
>
|
||||||
> **Remaining to formally exit Phase 1:** secure-session-core **Task 8** (end-to-end verification +
|
> **Phase 1 formally EXITED (2026-05-31).** secure-session-core **Task 8** is complete — end-to-end
|
||||||
> `/gc-audit --pass=security` re-audit + the manual CRITICAL checks) and Code-Review sign-off on Tasks 3–5
|
> functional verification (live CRITICAL boundary checks against the deployed binary: login-JWT→401,
|
||||||
> (implemented without a local toolchain at the time; since built + deployed). Live HW-H.264 validation is
|
> wrong-session viewer token→403, JWT-as-agent-key→401) **plus the `/gc-audit --pass=security` re-audit:
|
||||||
> also pending — raw+Zstd remains the shipping default. ~~Sprint 0 (relay-auth CRITICAL hotfix)~~ **not
|
> PASS, 0 CRITICAL/HIGH/MEDIUM/LOW** ([report](../reports/2026-05-31-gc-audit.md)). Code-Review sign-off on
|
||||||
> needed — those fixes shipped in Tasks 2–3.**
|
> Tasks 3–5 landed earlier. On top of Phase 1, **SPEC-004 (Tasks 2/4/5 — machine_uid dedup, session
|
||||||
|
> reaping, operator removal API+UI) is implemented, reviewed, deployed, and the 11 live ghost rows were
|
||||||
|
> purged**; the agent is now **auto-versioned + Azure-Trusted-Signing-signed via `release.yml`** with
|
||||||
|
> **v0.3.0 published** as the stable release. ~~Sprint 0 (relay-auth CRITICAL hotfix)~~ **not needed.**
|
||||||
|
> Still pending (NOT a Phase-1 blocker): live HW-H.264 cross-GPU validation — **raw+Zstd remains the
|
||||||
|
> shipping default** (`DEFAULT_PREFER_H264=false`) until H.264 is validated across GPUs.
|
||||||
|
|
||||||
### v2 phase mapping of current specs
|
### v2 phase mapping of current specs
|
||||||
|
|
||||||
@@ -43,8 +48,9 @@ stack. It ships independently of GuruRMM and integrates with it via a versioned
|
|||||||
|
|
||||||
Bringing GC to parity with GuruRMM's release engineering. Full plan: [SPEC-001](specs/SPEC-001-operational-tooling-parity.md).
|
Bringing GC to parity with GuruRMM's release engineering. Full plan: [SPEC-001](specs/SPEC-001-operational-tooling-parity.md).
|
||||||
|
|
||||||
- [ ] **Code signing — Azure Trusted Signing in CI** — P1 — sign the Windows agent `.exe` via `jsign` (TRUSTEDSIGNING) in Gitea Actions, reusing the shared ACG cert profile. (SPEC-001 §2)
|
- [x] **Code signing — Azure Trusted Signing in CI** — P1 — Windows agent `.exe` signed via `jsign` (TRUSTEDSIGNING) in `release.yml`, fail-closed (never publishes unsigned). Shipped with v0.3.0. (SPEC-001 §2)
|
||||||
- [ ] **Automatic versioning** — P1 — conventional-commit-driven version bump across agent/server/dashboard, embedded via `build.rs`. (SPEC-001 §3)
|
- [ ] **Signed beta/test release channel** — **P1 — NOW** — every binary we hand to a tester must be signed, but signing today only runs on a deliberate full `release.yml` dispatch; the automatic `build-and-test.yml` agent artifact is explicitly **unsigned**. Add a `channel: stable | beta` `workflow_dispatch` input to `release.yml`: `beta` signs the agent and publishes a prerelease-tagged Gitea release (e.g. `v0.4.0-beta.1`) **skipping the semver bump + changelog**; `stable` keeps the existing full path. Keeps signing secrets out of PR-triggered runs. (SPEC-001 §2)
|
||||||
|
- [x] **Automatic versioning** — P1 — conventional-commit-driven version bump computed at dispatch in `release.yml`, embedded via `build.rs`. Shipped with v0.3.0. (SPEC-001 §3)
|
||||||
- [ ] **Changelog generation & API** — P2 — `CHANGELOG.md` + per-version changelogs from conventional commits, served at `/api/changelog/...`. (SPEC-001 §4)
|
- [ ] **Changelog generation & API** — P2 — `CHANGELOG.md` + per-version changelogs from conventional commits, served at `/api/changelog/...`. (SPEC-001 §4)
|
||||||
- [ ] **Feature-request workflow** — P2 — `/gc-feature-request` skill producing `docs/specs/SPEC-NNN-*.md` and updating this roadmap. (SPEC-001 §1)
|
- [ ] **Feature-request workflow** — P2 — `/gc-feature-request` skill producing `docs/specs/SPEC-NNN-*.md` and updating this roadmap. (SPEC-001 §1)
|
||||||
- [ ] **Roadmap / ADR / spec tracking** — P1 — this file + `ARCHITECTURE_DECISIONS.md` + `docs/specs/`. (SPEC-001 §5) — *bootstrapped*
|
- [ ] **Roadmap / ADR / spec tracking** — P1 — this file + `ARCHITECTURE_DECISIONS.md` + `docs/specs/`. (SPEC-001 §5) — *bootstrapped*
|
||||||
@@ -81,10 +87,11 @@ Bringing GC to parity with GuruRMM's release engineering. Full plan: [SPEC-001](
|
|||||||
- [x] Sessions / machines / support-codes / events
|
- [x] Sessions / machines / support-codes / events
|
||||||
- [ ] **Full machine inventory in the connection DB** — P2 — persist per-machine device inventory (OS+locale+install, CPU/RAM, mfr/model/serial, external WAN IP captured server-side + private LAN IP + MAC, logged-on user, idle, time zone, uptime, local-admin) on `connect_machines`, refreshed each `AgentStatus`, shown in the dashboard machine detail (ScreenConnect "Guest Info" parity). Data layer for SPEC-002 Phase 2; closes GC side of agent-IP gap (todo 7459428e). **[→ v2 Phase 2]** ([SPEC-003](specs/SPEC-003-machine-inventory.md))
|
- [ ] **Full machine inventory in the connection DB** — P2 — persist per-machine device inventory (OS+locale+install, CPU/RAM, mfr/model/serial, external WAN IP captured server-side + private LAN IP + MAC, logged-on user, idle, time zone, uptime, local-admin) on `connect_machines`, refreshed each `AgentStatus`, shown in the dashboard machine detail (ScreenConnect "Guest Info" parity). Data layer for SPEC-002 Phase 2; closes GC side of agent-IP gap (todo 7459428e). **[→ v2 Phase 2]** ([SPEC-003](specs/SPEC-003-machine-inventory.md))
|
||||||
- [ ] **Stable machine identity + session lifecycle reaping + operator removal** — P1 — give the agent a deterministic machine-derived `machine_uid` (Windows `MachineGuid`-based) so the same box can't register duplicates (root cause: `agent_id` is a config-file random UUID that a portable/misconfigured run regenerates each launch); key registration on it; add TTL reaping + same-machine supersede as defense-in-depth; and admin-gated per-row + multi-select bulk removal of stale sessions/units. Identity must be bound to the per-machine agent key (spoof guard). Fixes ghost-session accumulation seen on the live console (15 sessions / 0 live, ~10 orphans for one machine). **[→ v2 Phase 1]** ([SPEC-004](specs/SPEC-004-session-lifecycle-and-removal.md))
|
- [ ] **Stable machine identity + session lifecycle reaping + operator removal** — P1 — give the agent a deterministic machine-derived `machine_uid` (Windows `MachineGuid`-based) so the same box can't register duplicates (root cause: `agent_id` is a config-file random UUID that a portable/misconfigured run regenerates each launch); key registration on it; add TTL reaping + same-machine supersede as defense-in-depth; and admin-gated per-row + multi-select bulk removal of stale sessions/units. Identity must be bound to the per-machine agent key (spoof guard). Fixes ghost-session accumulation seen on the live console (15 sessions / 0 live, ~10 orphans for one machine). **[→ v2 Phase 1]** ([SPEC-004](specs/SPEC-004-session-lifecycle-and-removal.md))
|
||||||
|
- [ ] **Zero-touch per-site agent enrollment** — P1 — ScreenConnect-class managed enrollment: one signed installer per site, machines self-register on first run and the server mints a per-machine `cak_` bound to a deterministic `machine_uid` (dedups re-installs). Per-site **rotatable** enrollment key (long secret + `vN (XXXX)` fingerprint) — rotating blocks new enrollments from old installers, leaves enrolled agents untouched. Auto-approve + new-enrollment/site-move alert. **Sign base agent once (CI, shipped) + per-site signed wrapper that writes site config around the signed bytes — resolves SPEC-007's signature-vs-appended-config question.** Anticipated/deferred: enrollment policy + licensing, `--enroll-key`/`--reassign` flag overrides, technician-assisted interactive install. **[→ v2 Phase 1]** ([SPEC-016](specs/SPEC-016-zero-touch-enrollment.md))
|
||||||
- [ ] **Machines list view — dual connection indicators + rich rows** — P2 — ScreenConnect "Access"-list parity: per-row Host/Guest two-segment connection bar (Guest=agent online, Host=viewer connected, with names + durations) and rich inline metadata (company, site, device type, tags, logged-on user + idle, client version in red when outdated). Server-enriches `/api/machines` with live session state + SPEC-003 inventory. **[→ v2 Phase 2]** ([SPEC-005](specs/SPEC-005-machines-list-view-parity.md))
|
- [ ] **Machines list view — dual connection indicators + rich rows** — P2 — ScreenConnect "Access"-list parity: per-row Host/Guest two-segment connection bar (Guest=agent online, Host=viewer connected, with names + durations) and rich inline metadata (company, site, device type, tags, logged-on user + idle, client version in red when outdated). Server-enriches `/api/machines` with live session state + SPEC-003 inventory. **[→ v2 Phase 2]** ([SPEC-005](specs/SPEC-005-machines-list-view-parity.md))
|
||||||
- [ ] Machines "by Company" tree nav with per-company counts — P3 — left-nav grouping sidebar (screenshot parity). Follow-up sub-item of SPEC-005.
|
- [ ] Machines "by Company" tree nav with per-company counts — P3 — left-nav grouping sidebar (screenshot parity). Follow-up sub-item of SPEC-005.
|
||||||
- [ ] **Universal machine search ("everything is searchable")** — P2 — server-side `?q=` on `/api/machines` matching case-insensitive substring across ALL attributes (OS, logged-on user, external/private IP, company, site, tag, serial, MAC, version, …), pg_trgm GIN-indexed; multi-term AND + optional field-scoped syntax (`os:`, `user:`, `ip:`). Replaces the hostname-only client filter. Depends on SPEC-003 (attrs must be persisted). **[→ v2 Phase 2]** ([SPEC-006](specs/SPEC-006-universal-machine-search.md))
|
- [ ] **Universal machine search ("everything is searchable")** — P2 — server-side `?q=` on `/api/machines` matching case-insensitive substring across ALL attributes (OS, logged-on user, external/private IP, company, site, tag, serial, MAC, version, …), pg_trgm GIN-indexed; multi-term AND + optional field-scoped syntax (`os:`, `user:`, `ip:`). Replaces the hostname-only client filter. Depends on SPEC-003 (attrs must be persisted). **[→ v2 Phase 2]** ([SPEC-006](specs/SPEC-006-universal-machine-search.md))
|
||||||
- [ ] **Managed-agent installer builder ("Build Installer")** — P2 — dashboard wizard to build a pre-labeled persistent-agent installer (Name/Company/Site/Department/Device Type/Tag/Type) with Download / Copy URL / Send Link, reusing the existing embed-config download path; adds department + device_type to EmbeddedConfig/AgentStatus so labels persist at install time. Pairs with revocable per-machine keys; signature-vs-appended-config is the key open question. **[→ v2 Phase 2]** ([SPEC-007](specs/SPEC-007-managed-agent-installer-builder.md))
|
- [ ] **Managed-agent installer builder ("Build Installer")** — P2 — dashboard wizard to build a pre-labeled persistent-agent installer (Name/Company/Site/Department/Device Type/Tag/Type) with Download / Copy URL / Send Link, reusing the existing embed-config download path; adds department + device_type to EmbeddedConfig/AgentStatus so labels persist at install time. Pairs with revocable per-machine keys; the signature-vs-appended-config question is resolved by SPEC-016 (sign-once base + per-site signed wrapper, no PE append). **[→ v2 Phase 2]** ([SPEC-007](specs/SPEC-007-managed-agent-installer-builder.md))
|
||||||
- [ ] **Valuable error messages (structured errors + no silent swallows)** — P2 — one structured API error envelope with stable codes + a correlation id that also lands in the logs; contextual tracing on server/agent; sweep the 37 `let _ =` swallows (the pattern that hid the migration-005 bug); dashboard surfaces the real cause + id instead of a generic line. **[→ v2 Phase 0/1 conventions]** ([SPEC-008](specs/SPEC-008-valuable-error-messages.md))
|
- [ ] **Valuable error messages (structured errors + no silent swallows)** — P2 — one structured API error envelope with stable codes + a correlation id that also lands in the logs; contextual tracing on server/agent; sweep the 37 `let _ =` swallows (the pattern that hid the migration-005 bug); dashboard surfaces the real cause + id instead of a generic line. **[→ v2 Phase 0/1 conventions]** ([SPEC-008](specs/SPEC-008-valuable-error-messages.md))
|
||||||
- [ ] **Feature-rich, fully-documented management API** — P2 — everything the console can do, callable by API: OpenAPI 3.x generated from code (utoipa) + browsable docs at `/api/docs`, long-lived revocable scoped API tokens (PAT-style, distinct from the 24h JWT + agent keys), an API-completeness gap audit, and consistent pagination/error conventions. Distinct from the ADR-001 RMM integration contract. **[→ v2 Phase 3]** ([SPEC-009](specs/SPEC-009-feature-rich-documented-api.md))
|
- [ ] **Feature-rich, fully-documented management API** — P2 — everything the console can do, callable by API: OpenAPI 3.x generated from code (utoipa) + browsable docs at `/api/docs`, long-lived revocable scoped API tokens (PAT-style, distinct from the 24h JWT + agent keys), an API-completeness gap audit, and consistent pagination/error conventions. Distinct from the ADR-001 RMM integration contract. **[→ v2 Phase 3]** ([SPEC-009](specs/SPEC-009-feature-rich-documented-api.md))
|
||||||
- [ ] **Branding and white-label configuration** — P2 — Allow MSPs to customize logo, colors, and product name for white-labeled remote support. Dashboard admin settings page with logo upload (PNG/SVG, max 2MB), brand hue slider (OKLCH 0-360°, default 184=cyan), product name override, company name, and favicon. Agent tray tooltip uses custom product name from registry. Singleton database table with public GET endpoint for unauthenticated rendering. CSS variables (`--brand-hue`, `--accent`, `--panel`) for dynamic theming. **[→ v2 Phase 2]** ([SPEC-014](specs/SPEC-014-branding-whitelabel.md))
|
- [ ] **Branding and white-label configuration** — P2 — Allow MSPs to customize logo, colors, and product name for white-labeled remote support. Dashboard admin settings page with logo upload (PNG/SVG, max 2MB), brand hue slider (OKLCH 0-360°, default 184=cyan), product name override, company name, and favicon. Agent tray tooltip uses custom product name from registry. Singleton database table with public GET endpoint for unauthenticated rendering. CSS variables (`--brand-hue`, `--accent`, `--panel`) for dynamic theming. **[→ v2 Phase 2]** ([SPEC-014](specs/SPEC-014-branding-whitelabel.md))
|
||||||
|
|||||||
244
docs/specs/SPEC-016-zero-touch-enrollment.md
Normal file
244
docs/specs/SPEC-016-zero-touch-enrollment.md
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
# SPEC-016: Zero-Touch Per-Site Agent Enrollment
|
||||||
|
|
||||||
|
**Status:** Proposed
|
||||||
|
**Priority:** P1
|
||||||
|
**Requested By:** Mike (2026-06-02)
|
||||||
|
**Estimated Effort:** X-Large
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Give GuruConnect a ScreenConnect-class managed-agent enrollment flow: a technician runs
|
||||||
|
**one signed installer per site** on every machine at that site — no per-machine key
|
||||||
|
minting, no flags, no typing — and each machine **self-registers** on first run, the
|
||||||
|
server minting it a per-machine `cak_` key bound to a stable, machine-derived
|
||||||
|
`machine_uid`. Each site installer carries a **rotatable per-site enrollment key** (a long
|
||||||
|
server-generated secret) plus a short human-readable **fingerprint** (`vN (XXXX)`) so an
|
||||||
|
operator can tell at a glance whether an installer is current. Rotating a site's key blocks
|
||||||
|
*new* enrollments from old installers while leaving already-enrolled machines untouched
|
||||||
|
(they hold their own `cak_`).
|
||||||
|
|
||||||
|
This is the missing piece that turns the v2 secure-session-core (SPEC-004 per-agent keys +
|
||||||
|
`machine_uid`) into a real product workflow, and it **resolves SPEC-007's open
|
||||||
|
signature-vs-appended-config question**: the agent binary is signed **once** in CI
|
||||||
|
(already shipped via `release.yml`), and per-site customization rides in a thin **signed
|
||||||
|
wrapper** that writes site config to the endpoint at install time — never appended into the
|
||||||
|
signed PE.
|
||||||
|
|
||||||
|
**Success criteria:**
|
||||||
|
1. A tech installs one site installer on N machines; all N appear in the console under the
|
||||||
|
correct company/site, each as a distinct, deduplicated machine — zero per-machine setup.
|
||||||
|
2. Re-installing / re-imaging the same hardware **reuses** the existing machine row (no
|
||||||
|
ghost duplicates — the failure mode SPEC-004 documents).
|
||||||
|
3. Rotating a site's enrollment key makes old installers unable to enroll new machines,
|
||||||
|
while every already-enrolled agent keeps working.
|
||||||
|
4. Every distributed installer is **validly Authenticode-signed** (SmartScreen/WDAC clean).
|
||||||
|
|
||||||
|
## Background — what exists today (confirmed in code)
|
||||||
|
|
||||||
|
- **Embedded config is append-based and breaks signing.** `server/src/api/downloads.rs`
|
||||||
|
(`download_agent`, ~`:152`) reads `static/downloads/guruconnect.exe` and **appends**
|
||||||
|
`MAGIC_MARKER` + `len:u32` + JSON (`:196`) to the end of the PE. The agent reads it back
|
||||||
|
in `agent/src/config.rs` (`read_embedded_config`, `:223`). Appending bytes after a signed
|
||||||
|
PE invalidates the Authenticode signature — so the current customization path and the
|
||||||
|
newly-shipped CI signing are mutually exclusive.
|
||||||
|
- **No self-registration exists.** Per-agent `cak_` keys are minted **admin-only** in
|
||||||
|
`server/src/api/machine_keys.rs` (`create_key`, `:119`; "Admin issued a per-agent key",
|
||||||
|
`:146`). There is no endpoint where an agent first-run exchanges an enrollment credential
|
||||||
|
for its own key.
|
||||||
|
- **Relay already accepts per-agent keys.** `server/src/relay/mod.rs`
|
||||||
|
(`validate_agent_api_key`, `:417`) calls `crate::auth::agent_keys::verify_agent_key`
|
||||||
|
(`:422`) — the `cak_` path — then falls back to the **deprecated** shared `AGENT_API_KEY`
|
||||||
|
(`:444`, logs a "migrate to per-agent `cak_`" warning).
|
||||||
|
- **Key primitives exist.** `server/src/auth/agent_keys.rs`: `generate_agent_key` mints a
|
||||||
|
`cak_`-prefixed high-entropy key (`:36`/`:46`); `verify_agent_key` (`:71`).
|
||||||
|
`server/src/db/agent_keys.rs` already inserts into `connect_agent_keys (machine_id,
|
||||||
|
key_hash, tenant_id)` (`:47`) — the v2 tenancy column is present (migration
|
||||||
|
`004_v2_secure_session_core.sql`).
|
||||||
|
- **Identity is a random config UUID, not machine-derived** — the root cause of duplicates
|
||||||
|
per SPEC-004 (`agent/src/config.rs` `generate_agent_id`, `:90`).
|
||||||
|
- **Agent mode dispatch:** `agent/src/main.rs` `Commands::Install` (`:160`) → `run_install`;
|
||||||
|
`agent/src/config.rs` `detect_run_mode` (`:162`) returns `RunMode::PermanentAgent` when
|
||||||
|
embedded config is present.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
### Included in v1 (CORE)
|
||||||
|
|
||||||
|
1. **`machine_uid` — deterministic machine identity (hardware-salted, per-tenant).** Derive
|
||||||
|
a stable id from the Windows `MachineGuid`
|
||||||
|
(`HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid`) **salted with stable hardware
|
||||||
|
signals** (SMBIOS UUID / motherboard + disk serial), independent of the config-file
|
||||||
|
`agent_id`. Hardware-derived salt is deliberate: it **survives an OS reinstall/re-image
|
||||||
|
on the same hardware** (so the row is reused — the re-image dedup goal) while keeping
|
||||||
|
distinct physical boxes distinct (a per-install *random* salt would break re-image dedup
|
||||||
|
and is rejected). Uniqueness is scoped **per-tenant** — dedup key `(tenant_id,
|
||||||
|
machine_uid)` — so the same hardware legitimately present in two tenants stays two
|
||||||
|
independent rows. (Shared root with SPEC-004; whichever lands first owns the impl, the
|
||||||
|
other consumes it.) Used as the dedup key for register/move.
|
||||||
|
|
||||||
|
**Collision-gated activation.** The residual collision case is VMs/templates that share a
|
||||||
|
hardware UUID (some hypervisors clone the SMBIOS UUID). When the server detects a
|
||||||
|
`machine_uid` collision (a seemingly-different endpoint resolving to an existing uid), the
|
||||||
|
endpoint does **not** auto-activate: it drops to a **pending** state, fires an alert, and
|
||||||
|
an operator must confirm in the dashboard that the collided endpoint may activate. This is
|
||||||
|
the one deliberate exception to auto-approve (see item 6).
|
||||||
|
|
||||||
|
2. **Per-site enrollment key + fingerprint.**
|
||||||
|
- Long (≥256-bit) server-generated secret per site, stored **hashed** (Argon2id, same
|
||||||
|
as `cak_`/passwords), never recoverable in plaintext after issue.
|
||||||
|
- A non-secret **fingerprint** = monotonic version + short derived code in **hex**,
|
||||||
|
rendered `vN (XXXX)` (e.g. `v3 (7F2A)`), shown in the dashboard, baked into the
|
||||||
|
installer filename, and reported by the agent at enrollment. Hex is deliberate —
|
||||||
|
**not** the RMM word-style code (`GREEN-FALCON`) — so GuruConnect and GuruRMM
|
||||||
|
artifacts are never visually conflated.
|
||||||
|
- **Rotate** regenerates the secret and bumps the version; old installers are rejected
|
||||||
|
for *new* enrollments; existing agents (holding `cak_`) are unaffected.
|
||||||
|
|
||||||
|
3. **Self-registration endpoint.** New `POST /api/enroll` (public, unauthenticated by JWT —
|
||||||
|
gated by the enrollment key) accepting `{ site_code, enrollment_key, machine_uid,
|
||||||
|
hostname, labels{company,site,department,device_type,tags} }`:
|
||||||
|
- Verify `(site_code, enrollment_key)` against the current per-site key.
|
||||||
|
- **Dedup by `machine_uid`** within the site: if the machine exists, reuse the row and
|
||||||
|
rotate its `cak_`; else create the machine row.
|
||||||
|
- Mint a `cak_` (reuse `generate_agent_key`), store hashed via `db::agent_keys` bound to
|
||||||
|
`machine_id` (+ `tenant_id` from the site), return the plaintext `cak_` **once**.
|
||||||
|
- Emit an audit event + **new-enrollment alert** (and a **site-move** alert when an
|
||||||
|
existing `machine_uid` enrolls under a different site).
|
||||||
|
- **Rate-limit + lockout** per `(site_code, source-IP)` as defense-in-depth (the key is
|
||||||
|
long, so this is belt-and-suspenders, not load-bearing).
|
||||||
|
|
||||||
|
4. **Agent first-run enrollment.** On `RunMode::PermanentAgent` with no stored `cak_`:
|
||||||
|
read site config → call `/api/enroll` with `machine_uid` → persist the returned `cak_`
|
||||||
|
to a SYSTEM-only protected store (HKLM under a SYSTEM-only ACL, or DPAPI-machine) →
|
||||||
|
connect to `wss://connect.azcomputerguru.com/ws/agent` using the `cak_`. On subsequent
|
||||||
|
runs, use the stored `cak_` directly (no re-enroll).
|
||||||
|
|
||||||
|
5. **Sign-once base + per-site signed wrapper (resolves SPEC-007 open question).**
|
||||||
|
- The base agent is signed once in CI (`release.yml`, already shipped) and stays
|
||||||
|
byte-identical for everyone.
|
||||||
|
- Per-site customization (labels + enrollment key + fingerprint) is delivered to the
|
||||||
|
endpoint **at install time** via a signing-safe channel — NOT appended to the signed
|
||||||
|
PE. **v1 produces BOTH a signed bootstrapper `.exe` and a signed MSI per site**
|
||||||
|
(ScreenConnect parity — manual installs grab the `.exe`, GPO/Intune fleet pushes take
|
||||||
|
the MSI), both wrapping the same sign-once agent and writing the site config to the
|
||||||
|
protected config location. The two differ only in packaging (bootstrapper stub vs. WiX
|
||||||
|
bundle); both are signed.
|
||||||
|
- **Deprecate the append path** in `downloads.rs` for managed installs (keep only for
|
||||||
|
attended/support-code if still needed), eliminating the signature-invalidation defect.
|
||||||
|
|
||||||
|
6. **Auto-approve posture (with collision-gate exception).** A self-registered machine is
|
||||||
|
live and controllable immediately (ScreenConnect parity); the new-enrollment alert is the
|
||||||
|
tripwire. The **one** exception is a detected `machine_uid` collision (item 1), which
|
||||||
|
gates the endpoint to **pending** until an operator confirms it in the dashboard.
|
||||||
|
|
||||||
|
### Explicitly out of scope (ANTICIPATED — reserve room, do NOT build in v1)
|
||||||
|
|
||||||
|
The v1 data model and agent mode-dispatch must leave room for these without building them:
|
||||||
|
|
||||||
|
- **Per-site enrollment POLICY** — a `sites.enrollment_policy` field (default
|
||||||
|
`auto-approve`; future `pending-approval`) plus per-seat/per-endpoint licensing controls.
|
||||||
|
Commercial, multi-tenant (the `tenant_id` column already exists). Its own future SPEC.
|
||||||
|
- **Flag overrides** — `--enroll-key` / `--site-code` (generic installer, key supplied on
|
||||||
|
the command line) and `--reassign` (move an existing machine to a new site, gated by
|
||||||
|
possession of the destination site's key, with an **explicit accidental-move guard**:
|
||||||
|
a different-site re-run refuses unless `--reassign` is passed) + cross-client move policy.
|
||||||
|
Backend (`machine_uid` + authorized site + `cak_`) is designed to support it; CLI surface
|
||||||
|
is deferred.
|
||||||
|
- **Technician-assisted interactive install** — `--technician` on a generic installer:
|
||||||
|
prompts for the tech's own server credentials, and on auth presents a **validated**
|
||||||
|
Company/Site/tags picker from the live authorized list (authz-by-identity, full audit
|
||||||
|
trail). Heaviest path (interactive UI + auth/list callback); deferred.
|
||||||
|
|
||||||
|
All three converge on the **same backend operation** delivered in v1: `machine_uid` +
|
||||||
|
authorized site + issued `cak_`. v1 only ships the per-site-embedded-key door.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
- **Agent** (`agent/`): compute `machine_uid`; first-run enroll → store `cak_`; use stored
|
||||||
|
`cak_` thereafter; read site config from the wrapper-written location instead of an
|
||||||
|
appended PE blob. Touches `config.rs` (`EmbeddedConfig`/`detect_run_mode`/storage),
|
||||||
|
`main.rs` (`Install`/run-mode), a new `enroll` client module, transport auth.
|
||||||
|
- **Relay-server** (`server/`): new `POST /api/enroll`; per-site key issue/rotate/verify;
|
||||||
|
`machine_uid` dedup + site-move on register; audit + alert emission; rate-limit/lockout.
|
||||||
|
Touches `api/` (new `enroll.rs`, `sites` key endpoints), `auth/agent_keys.rs`,
|
||||||
|
`db/agent_keys.rs`, `relay/mod.rs` (enrollment vs. connect), `main.rs` routes.
|
||||||
|
- **Dashboard**: per-site enrollment-key display (fingerprint `vN (XXXX)`), **Rotate**
|
||||||
|
action, "current installer" download wired to the signed wrapper build. (Builder UI is
|
||||||
|
SPEC-007; this spec supplies the key/fingerprint/rotation it consumes.)
|
||||||
|
- **DB migration:** `site_enrollment_keys` (or columns on the site): `site_id`,
|
||||||
|
`key_hash`, `version`, `fingerprint`, `created_at`, `rotated_at`, `active`. Reserve
|
||||||
|
`sites.enrollment_policy` (nullable, default `auto-approve`) for the anticipated policy
|
||||||
|
work. `connect_machines` gains `machine_uid` (unique per tenant/site).
|
||||||
|
- **Protobuf** (`proto/guruconnect.proto`): no wire change required for enrollment if
|
||||||
|
`/api/enroll` is REST; `AgentStatus` label fields per SPEC-007 (`department`,
|
||||||
|
`device_type`) ride along if landed together.
|
||||||
|
|
||||||
|
## Security considerations
|
||||||
|
|
||||||
|
- **Two-tier credential model:** low-sensitivity **enrollment key** (gates "may register",
|
||||||
|
shared per site, rotatable) vs. high-sensitivity **per-machine `cak_`** (operating
|
||||||
|
credential, per-machine revocation). Compromise of an enrollment key is recovered by
|
||||||
|
rotating one site — no fleet-wide re-key.
|
||||||
|
- **Enrollment keys stored hashed** (Argon2id); plaintext shown once at issue/rotate.
|
||||||
|
- **`cak_` at rest on the endpoint** is stored as a **DPAPI-machine-encrypted blob inside a
|
||||||
|
SYSTEM-ACL'd location** (HKLM value or `ProgramData` file) — both layers: the SYSTEM ACL
|
||||||
|
stops non-admin users reading it, and DPAPI-machine encryption makes a copied file/export
|
||||||
|
inert off the box. (Local admin/SYSTEM can always recover it; that is accepted — blast
|
||||||
|
radius of one leaked `cak_` is a single, independently-revocable machine.)
|
||||||
|
- **`machine_uid` binding** is the spoof-guard SPEC-004 wants: a `cak_` is bound to a
|
||||||
|
`machine_uid`; a different box presenting another box's `cak_` is detectable.
|
||||||
|
- **Authorization model** for moves/enrolls is possession-of-destination-key in v1
|
||||||
|
(identity-based authz deferred to the technician-assisted path).
|
||||||
|
- **Open registration risk** is mitigated by requiring `(site_code + long key)` and
|
||||||
|
rate-limit/lockout; auto-approve is acceptable because the enrollment key is the gate and
|
||||||
|
every enrollment/site-move fires an alert.
|
||||||
|
- **Audit events:** enroll, re-enroll/reuse, site-move, key-rotate — all logged with
|
||||||
|
`machine_uid`, site, and source IP.
|
||||||
|
|
||||||
|
## Testing strategy
|
||||||
|
|
||||||
|
- **Unit:** `machine_uid` derivation stability; enrollment-key verify/rotate; fingerprint
|
||||||
|
derivation; `cak_` mint/hash/verify; dedup decision (new vs. reuse vs. move).
|
||||||
|
- **Integration:** enroll new → row + `cak_` issued; re-enroll same `machine_uid` → reuse,
|
||||||
|
no duplicate; enroll with rotated (old) key → rejected; old `cak_` still connects after
|
||||||
|
rotation; rate-limit/lockout trips; site-move emits alert.
|
||||||
|
- **Manual:** build a site wrapper installer → run on a clean VM → appears in console under
|
||||||
|
correct site, immediately controllable; re-image VM → same row reused; `signtool verify
|
||||||
|
/pa` passes on the distributed wrapper and the laid-down agent.
|
||||||
|
|
||||||
|
## Effort estimate & dependencies
|
||||||
|
|
||||||
|
- **Size:** X-Large (agent + relay + DB migration + CI build/sign wrapper + dashboard
|
||||||
|
key/rotation surface).
|
||||||
|
- **Depends on:** SPEC-004 `machine_uid` (shared root); the CI signing already shipped
|
||||||
|
(SPEC-001 §2 / `release.yml`).
|
||||||
|
- **Unblocks:** SPEC-007 (installer builder gets a real per-site key + the signing
|
||||||
|
resolution), and the parked managed-agent test deployment on the internal beta machines.
|
||||||
|
- **Relationship to v2 phases:** sits with the Phase-1 secure-session-core (per-agent keys
|
||||||
|
+ identity) and feeds Phase-2 dashboard work.
|
||||||
|
|
||||||
|
## Resolved decisions (2026-06-02, Mike)
|
||||||
|
|
||||||
|
1. **Wrapper shape — BOTH.** v1 ships a signed bootstrapper `.exe` *and* a signed MSI per
|
||||||
|
site (ScreenConnect offers both; manual installs use the `.exe`, GPO/Intune fleet pushes
|
||||||
|
use the MSI). Same sign-once agent inside each.
|
||||||
|
2. **`cak_` storage — BOTH layers.** DPAPI-machine-encrypted blob stored in a SYSTEM-ACL'd
|
||||||
|
location. Non-admins can't read it; a stolen copy is inert off the box.
|
||||||
|
3. **Fingerprint — hex (`7F2A`).** Deliberately *not* the RMM word-code style, so the two
|
||||||
|
products' artifacts are never visually conflated.
|
||||||
|
4. **`machine_uid` — per-tenant scope, hardware-derived salt, collision-gated.** Dedup key
|
||||||
|
`(tenant_id, machine_uid)`; salt from stable hardware signals (survives same-hardware
|
||||||
|
re-image, separates distinct boxes); detected collisions (e.g. template-cloned VMs
|
||||||
|
sharing a hardware UUID) drop to pending + alert and require dashboard confirmation to
|
||||||
|
activate.
|
||||||
|
5. **Attended (support-code) path — unchanged.** `download_support` is filename-based
|
||||||
|
(`GuruConnect-<code>.exe`), not append-based, so renaming never breaks the signature —
|
||||||
|
it is already signing-safe. Only the managed `download_agent` append path is retired.
|
||||||
|
|
||||||
|
## Remaining for planning
|
||||||
|
|
||||||
|
- Exact stable-hardware signal set for the salt (SMBIOS UUID alone vs. + motherboard/disk
|
||||||
|
serial) and hypervisor behavior matrix (which hypervisors duplicate the SMBIOS UUID on
|
||||||
|
clone → exercise the collision-gate).
|
||||||
|
- MSI authoring approach (WiX) and whether per-site config rides as a per-site MSI vs. a
|
||||||
|
base MSI + property/transform.
|
||||||
129
reports/2026-05-31-gc-audit.md
Normal file
129
reports/2026-05-31-gc-audit.md
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
# GuruConnect Audit Report — 2026-05-31
|
||||||
|
|
||||||
|
**Auditor:** Claude (claude-opus-4-8[1m])
|
||||||
|
**Passes:** Security & Remote-Session Integrity (`--pass=security` only)
|
||||||
|
**Previous audit:** 2026-05-30 (`reports/2026-05-30-gc-audit.md`)
|
||||||
|
**Scope note:** v2 **Phase-1 EXIT gate** re-audit. Confirms the three relay CRITICALs stay closed and
|
||||||
|
the prior net-new HIGH is fixed, and assesses the net-new SPEC-004 surface (Tasks 2/4/5 — machine_uid
|
||||||
|
dedup, session reaping, operator removal) now committed + deployed. Includes **live** boundary tests
|
||||||
|
against the running production binary, not just a code re-derivation.
|
||||||
|
|
||||||
|
**Code under audit:** working tree at tag **v0.3.0 / e967cce** = the binary deployed to prod
|
||||||
|
172.16.3.30:3002 (deployed this session from 96f9c0a; e967cce adds only the version bump + changelog).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Executive Summary
|
||||||
|
|
||||||
|
| Pass | Total | Critical | High | Medium | Low | Info |
|
||||||
|
|------|-------|----------|------|--------|-----|------|
|
||||||
|
| Security & Session | 4 | 0 | 0 | 0 | 0 | 4 |
|
||||||
|
|
||||||
|
**Phase-1 security EXIT gate: PASS.** The relay/server plane is clean. All three 2026-05-29 CRITICALs
|
||||||
|
remain CLOSED (verified in code AND live against the deployed server). The prior net-new HIGH (agent
|
||||||
|
auto-update TLS bypass) and the prior LOW (chat content logged at INFO) are both remediated. The
|
||||||
|
net-new SPEC-004 surface (operator removal, machine_uid dedup gate, session reaper/supersede) audits
|
||||||
|
clean with the keyed-identity security invariant intact end-to-end. No net-new findings.
|
||||||
|
|
||||||
|
**Requires action:** none.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Live functional verification (deployed binary, 172.16.3.30:3002)
|
||||||
|
|
||||||
|
Forged tokens (HS256, real `JWT_SECRET`) exercised the WS auth boundaries directly. Each illegitimate
|
||||||
|
access was REJECTED (4xx, never a 101 upgrade):
|
||||||
|
|
||||||
|
| Check | Result | Proves |
|
||||||
|
|-------|--------|--------|
|
||||||
|
| Login-shape JWT on `/ws/viewer` | **401** | Login token not accepted as a viewer token (`purpose=="viewer"` enforced) — CRITICAL #1 |
|
||||||
|
| Validly-signed viewer token for session AAAA used on session BBBB | **403** | Session binding enforced — a correctly-signed token is refused for the wrong session — CRITICAL #1 |
|
||||||
|
| Login JWT used as agent `api_key` on `/ws/agent` | **401** | Agent plane rejects JWTs (no JWT branch) — CRITICAL #3 |
|
||||||
|
| Wrong-signature token on `/ws/viewer` | **401** | Signature validation holds (control) |
|
||||||
|
|
||||||
|
The session-bind case is the decisive one: a token that WOULD be accepted for its own session is
|
||||||
|
rejected 403 for a different session, proving the binding rather than mere signature validation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## The three relay CRITICALs — verdict
|
||||||
|
|
||||||
|
| CRITICAL | Verdict | Enforced at |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| #1 any-JWT-joins-any-session | **CLOSED** | mint authz `api/sessions.rs` (is_admin \|\| permission); viewer WS `relay/mod.rs:496` `validate_viewer_token` (sig+expiry+`purpose=="viewer"`); session-bind `relay/mod.rs:527-534` (`claim != requested → 403`) |
|
||||||
|
| #2 viewer-WS blacklist | **CLOSED** (TTL-bounded residual unchanged) | `relay/mod.rs:509` `token_blacklist.is_revoked` before upgrade. Residual: logout revokes login JWT not minted viewer tokens (5-min TTL) — same tracked MEDIUM, no regression |
|
||||||
|
| #3 JWT-accepted-as-agent-key | **CLOSED**, fails closed | `relay/mod.rs:417` `validate_agent_api_key` — no JWT branch; only `cak_` (`auth/agent_keys.rs`, SHA-256 vs `connect_agent_keys`, `revoked_at IS NULL`) or deprecated shared key (WARN). Unresolved machine → 503 (`:303`); client `agent_id` overridden by key identity (`:283`) |
|
||||||
|
|
||||||
|
Live results match these code paths exactly.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Prior HIGH — FIXED
|
||||||
|
|
||||||
|
**Agent auto-update TLS bypass → MITM-RCE: CLOSED.** `agent/src/update.rs:21` `dev_insecure_tls()` is
|
||||||
|
`cfg!(debug_assertions)` AND env-var gated, so a release build's `cfg!` compiles out and the agent
|
||||||
|
ALWAYS verifies certs. Both `check_for_update` (`:64`) and `download_update` (`:130`) consume it; unit
|
||||||
|
test `test_dev_insecure_tls_release_is_always_false` (`:362`) asserts the release invariant. No
|
||||||
|
`danger_accept_invalid_certs(true)` reachable in production. A signed-manifest defense-in-depth TODO is
|
||||||
|
filed at `install_update` (`:189`) (= tracked task #10, not an exit blocker).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pass 5: Security & Remote-Session Integrity — net-new SPEC-004 surface
|
||||||
|
|
||||||
|
### [INFO] Operator removal API (`server/src/api/removal.rs`) — clean, admin-gated
|
||||||
|
Every removal handler takes the `AdminUser` extractor as its first argument (runs before any DB
|
||||||
|
mutation): `remove_machine` (`:88`), `remove_session` (`:321`), `bulk_remove_machines` (`:471`).
|
||||||
|
`AdminUser` (`auth/mod.rs:141`) validates JWT (signature + expiry + blacklist `:97`) then requires
|
||||||
|
`is_admin()` else 403 (`:146`). Soft-deletes are parameterized + idempotent (`WHERE … AND deleted_at IS
|
||||||
|
NULL`); bulk bounded (MAX_BATCH 500) with per-id UUID validation + isolated failures; audit
|
||||||
|
(`db/events.rs:126`) records actor + target + trusted-proxy IP, best-effort (cannot be suppressed by
|
||||||
|
attacker-controlled input). Removal is admin-role-gated globally (not per-tenant ACL) — same Phase-1
|
||||||
|
posture as viewer-mint, per-tenant narrowing deferred to SPEC-002 Phase 4. Acceptable by context.
|
||||||
|
|
||||||
|
### [INFO] machine_uid dedup security gate — invariant holds
|
||||||
|
Gate at `relay/mod.rs:352`: `effective_machine_uid = if is_keyed_agent { None } else { claimed }`. The
|
||||||
|
suppressed value (not the raw claim) flows to `register_agent` and `upsert_machine`. Keyed (`cak_`)
|
||||||
|
agents take the agent_id-keyed upsert branch and never write/touch a `ON CONFLICT (machine_uid)` row, so
|
||||||
|
a valid key for machine X cannot repoint machine Y via a claimed uid. An un-keyed uid-spoof can only
|
||||||
|
match a uid-bearing row — which the keyed connect path never creates; the only residual is a legacy
|
||||||
|
pre-keying row, and the startup L1 fix (`main.rs:267-288` via `keyed_machine_ids`, fail-closed on query
|
||||||
|
error) ensures keyed machines are never uid-indexed on restore.
|
||||||
|
|
||||||
|
### [INFO] Session reaper + same-machine supersede — clean, TOCTOU closed
|
||||||
|
`reap_stale_persistent` (`:875`) and supersede (`:322`) select under a read lock then re-assert the full
|
||||||
|
predicate under the write lock via `remove_session_if` (`:755`). Predicate requires
|
||||||
|
`!is_online && is_persistent && viewers.is_empty()` (+ TTL / same-uid) — an online, viewer-attached, or
|
||||||
|
support session is never reaped/superseded. Un-keyed uid-spoof blast radius = denial-of-persistence on
|
||||||
|
an offline same-uid session at worst, never a hijack. Lock order matches `register_agent`; predicate is
|
||||||
|
synchronous (no await under lock).
|
||||||
|
|
||||||
|
### [INFO] General posture — confirmed, no regressions
|
||||||
|
Runtime sqlx parameterized everywhere (no `format!`-built SQL); migrations 008/009 idempotent. Frame
|
||||||
|
caps: agent 4 MiB / viewer 64 KiB applied before upgrade. Input throttle retained. `/api/auth/login`
|
||||||
|
rate-limited (`main.rs:397`). `JWT_SECRET` panics if <32 (`main.rs:143`); agent keys SHA-256; Argon2id
|
||||||
|
passwords; no secret/token/code/PII logged. **Chat content no longer logged** (prior LOW fixed —
|
||||||
|
`relay/mod.rs:829,1428` now log length only).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Definitive answers
|
||||||
|
|
||||||
|
- **(a) Any non-admin removal path?** NO — all three removal handlers gate on `AdminUser` (JWT+blacklist+`is_admin`→403) before any DB mutation.
|
||||||
|
- **(b) Any uid-spoof that repoints/hijacks another machine's row or session (not just denial)?** NO — keyed identity is authoritative and uid-suppressed across connect → upsert → reattach → startup restore. Worst case for an un-keyed spoof is denial-of-persistence on an offline same-uid session.
|
||||||
|
- **(c) Any auth-plane bypass (agent↔viewer credential crossover)?** NO — viewer plane requires a `purpose=="viewer"` session-bound minted token; agent plane requires a `cak_`/shared key with no JWT branch. Confirmed in code and live.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Verdict
|
||||||
|
|
||||||
|
**Phase-1 security EXIT gate: PASS.** Relay/server plane clean; prior HIGH + LOW remediated; SPEC-004
|
||||||
|
surface sound with the keyed-identity invariant intact across the connect path, DB upsert, in-memory
|
||||||
|
reattach, and startup restore. No new CRITICAL/HIGH/MEDIUM/LOW.
|
||||||
|
|
||||||
|
**Tracked, deferred-by-design (not exit blockers):**
|
||||||
|
- Viewer-token logout revocation residual (MEDIUM, TTL-bounded) — `v2-secure-session-core/plan.md`.
|
||||||
|
- Update-binary signature verification (defense-in-depth, task #10) — TODO at `update.rs:189`.
|
||||||
|
|
||||||
|
*Note: only `--pass=security` was run. API-surface, Rust-quality, TypeScript, protocol-integrity,
|
||||||
|
docs-reconciliation, and CI/CD passes were not executed this run.*
|
||||||
159
server/migrations/010_spec016_enrollment.sql
Normal file
159
server/migrations/010_spec016_enrollment.sql
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
-- Migration: 010_spec016_enrollment.sql
|
||||||
|
-- Purpose: SPEC-016 zero-touch per-site agent enrollment — server-side data model.
|
||||||
|
--
|
||||||
|
-- Adds the per-site enrollment-key table, a minimal sites table to anchor it,
|
||||||
|
-- and the machine-side columns the collision-gated self-registration flow needs.
|
||||||
|
--
|
||||||
|
-- Two-tier credential model (SPEC-016 §Security): a low-sensitivity, rotatable,
|
||||||
|
-- per-site ENROLLMENT KEY (the `cek_` secret stored hashed here) gates "may this
|
||||||
|
-- machine register at all", while the high-sensitivity per-machine `cak_`
|
||||||
|
-- operating credential (connect_agent_keys, migration 004) is minted on a
|
||||||
|
-- successful enroll. Compromise of an enrollment key is recovered by rotating one
|
||||||
|
-- site, not a fleet-wide re-key.
|
||||||
|
--
|
||||||
|
-- DEVIATION FROM SPEC (documented): SPEC-016 §DB-migration describes
|
||||||
|
-- `site_enrollment_keys.site_id` as `fk -> sites`, assuming a sites table already
|
||||||
|
-- exists. It does NOT — in the current schema "site" and "company/organization" are
|
||||||
|
-- free-text columns on connect_machines (migration 005), there is no relational
|
||||||
|
-- sites entity. This migration therefore CREATES a minimal `connect_sites` table
|
||||||
|
-- (the relational anchor the enrollment-key FK and the dashboard per-site key
|
||||||
|
-- display both require) keyed by a natural `site_code` and scoped per-tenant. It is
|
||||||
|
-- intentionally minimal (code + display name + tenant); richer site/company
|
||||||
|
-- modeling is left to future work. The free-text connect_machines.site /
|
||||||
|
-- .organization columns are untouched and continue to carry agent-reported labels.
|
||||||
|
--
|
||||||
|
-- Idempotent: CREATE TABLE/INDEX IF NOT EXISTS, ADD COLUMN IF NOT EXISTS. Applied on
|
||||||
|
-- server startup by sqlx::migrate!(); never pre-applied via psql. Ordered after 009.
|
||||||
|
-- See .claude/standards/gururmm/sqlx-migrations.md.
|
||||||
|
|
||||||
|
-- pgcrypto provides gen_random_uuid(); enabled in 001/004 but re-asserted for safety.
|
||||||
|
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- connect_sites — relational anchor for per-site enrollment (see DEVIATION above)
|
||||||
|
-- ============================================================================
|
||||||
|
-- A site is the unit a single signed installer targets. `site_code` is the
|
||||||
|
-- non-secret, operator-facing identifier the installer carries and the agent sends
|
||||||
|
-- at /api/enroll (e.g. "ACME-PHX"). Uniqueness is per-tenant: the same human-chosen
|
||||||
|
-- code may legitimately exist in two tenants. tenant_id mirrors the nullable,
|
||||||
|
-- default-tenant-backfilled tenancy column used on every other scoped table
|
||||||
|
-- (migration 004); db::tenancy::current_tenant_id() resolves it for now.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS connect_sites (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
-- Operator-facing site identifier the installer carries. Non-secret.
|
||||||
|
site_code TEXT NOT NULL,
|
||||||
|
-- Human-readable site / company display name for the dashboard.
|
||||||
|
display_name TEXT,
|
||||||
|
-- Default company label applied to machines enrolled at this site (mirrors the
|
||||||
|
-- free-text connect_machines.organization the agent otherwise self-reports).
|
||||||
|
company TEXT,
|
||||||
|
-- Tenancy-ready (Phase 4). Backfilled to the default tenant below.
|
||||||
|
tenant_id UUID,
|
||||||
|
-- RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope):
|
||||||
|
-- default 'auto-approve'; a future 'pending-approval' value will gate new
|
||||||
|
-- enrollments. NOT enforced in Phase A — present so the policy SPEC needs no
|
||||||
|
-- schema change. Do not branch on this column yet.
|
||||||
|
enrollment_policy TEXT DEFAULT 'auto-approve',
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Per-tenant uniqueness of the natural site_code so /api/enroll can resolve a site
|
||||||
|
-- deterministically within a tenant while the same code may exist across tenants.
|
||||||
|
-- COALESCE keeps the index usable while tenant_id is still nullable (Phase 1).
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_sites_tenant_code
|
||||||
|
ON connect_sites (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), site_code);
|
||||||
|
|
||||||
|
-- Backfill the sites tenant_id to the default tenant (table is empty on a fresh DB;
|
||||||
|
-- no-op there, but keeps the migration self-consistent).
|
||||||
|
UPDATE connect_sites
|
||||||
|
SET tenant_id = '00000000-0000-0000-0000-000000000001'
|
||||||
|
WHERE tenant_id IS NULL;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- site_enrollment_keys — rotatable, hashed per-site enrollment secret + fingerprint
|
||||||
|
-- ============================================================================
|
||||||
|
-- Stores ONLY the Argon2id hash of the `cek_` secret; the plaintext is shown once
|
||||||
|
-- at issue/rotate and never recoverable. `version` is the monotonic rotation
|
||||||
|
-- counter; `fingerprint` is the non-secret short hex shown as `vN (XXXX)` in the
|
||||||
|
-- dashboard and baked into the installer filename. `active` marks the current key —
|
||||||
|
-- rotation flips the old key to active=false (blocking NEW enrollments from old
|
||||||
|
-- installers) and inserts a new active row; already-enrolled agents holding their
|
||||||
|
-- own `cak_` are unaffected. Multiple inactive (historical) rows may coexist per
|
||||||
|
-- site; at most one active row is intended (enforced by a partial unique index).
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS site_enrollment_keys (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
site_id UUID NOT NULL REFERENCES connect_sites(id) ON DELETE CASCADE,
|
||||||
|
-- Argon2id hash of the `cek_` enrollment secret. Never the plaintext.
|
||||||
|
key_hash TEXT NOT NULL,
|
||||||
|
-- Monotonic rotation version (1, 2, 3, ...).
|
||||||
|
version INTEGER NOT NULL,
|
||||||
|
-- Non-secret short hex fingerprint code (the XXXX in `vN (XXXX)`), derived from
|
||||||
|
-- the secret. Stored so the dashboard / GET endpoint can show it without the
|
||||||
|
-- secret.
|
||||||
|
fingerprint TEXT NOT NULL,
|
||||||
|
active BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
-- Set when this key is rotated out (active flipped to false).
|
||||||
|
rotated_at TIMESTAMPTZ
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Lookup index for the enroll hot path: resolve the active key for a site.
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_site_enrollment_keys_site_active
|
||||||
|
ON site_enrollment_keys (site_id, active);
|
||||||
|
|
||||||
|
-- At most one ACTIVE enrollment key per site (the "current" installer key).
|
||||||
|
-- Partial unique index so any number of inactive historical rows may coexist.
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_site_enrollment_keys_one_active
|
||||||
|
ON site_enrollment_keys (site_id)
|
||||||
|
WHERE active;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- connect_machines — site binding + enrollment-state collision gate
|
||||||
|
-- ============================================================================
|
||||||
|
-- machine_uid already exists (migration 008) with a partial UNIQUE index on
|
||||||
|
-- (machine_uid) WHERE machine_uid IS NOT NULL. SPEC-016 §item-1 / resolved-decision #4
|
||||||
|
-- call for the dedup key to be PER-TENANT — (tenant_id, machine_uid) — so the same
|
||||||
|
-- hardware legitimately present in two tenants stays two rows. tenant_id is the
|
||||||
|
-- scoping column that exists on connect_machines (migration 004); machines have no
|
||||||
|
-- direct site_id today, so site is tracked separately (site_id below) and tenancy is
|
||||||
|
-- the uniqueness scope, exactly as the spec states.
|
||||||
|
--
|
||||||
|
-- CRITICAL CONSTRAINT (why we ADD rather than REPLACE the 008 index here):
|
||||||
|
-- db::machines::upsert_machine (the live connect-path upsert) uses
|
||||||
|
-- `ON CONFLICT (machine_uid) WHERE machine_uid IS NOT NULL` as its conflict arbiter.
|
||||||
|
-- Postgres matches that arbiter to the EXACT index from migration 008. Dropping that
|
||||||
|
-- index would make the live upsert fail to find an arbiter and error at runtime —
|
||||||
|
-- breaking every un-keyed agent reconnect. So migration 008's global index is LEFT
|
||||||
|
-- IN PLACE (the connect path keeps working unchanged) and the per-tenant index is
|
||||||
|
-- added ALONGSIDE it. In single-tenant Phase 1 the two are equivalent (every row's
|
||||||
|
-- tenant_id is the default tenant), so the per-tenant index adds the SPEC-016 dedup
|
||||||
|
-- semantics without a redundant-uniqueness conflict: a (tenant, uid) pair that is
|
||||||
|
-- unique is also globally unique today. When multi-tenancy activates AND
|
||||||
|
-- upsert_machine's ON CONFLICT is updated to name (tenant_id, machine_uid), a future
|
||||||
|
-- migration drops the global 008 index. Documented as deferred; do not drop it now.
|
||||||
|
|
||||||
|
-- Optional FK to the site a machine enrolled under (NULL for legacy / support-code
|
||||||
|
-- machines that never enrolled through /api/enroll). A site change on re-enroll is
|
||||||
|
-- the "site move" SPEC-016 audits.
|
||||||
|
ALTER TABLE connect_machines ADD COLUMN IF NOT EXISTS site_id UUID REFERENCES connect_sites(id) ON DELETE SET NULL;
|
||||||
|
|
||||||
|
-- enrollment_state: the collision gate (SPEC-016 §item-1/6). 'active' = live and
|
||||||
|
-- controllable (auto-approve posture); 'pending' = a machine_uid collision was
|
||||||
|
-- detected at enroll and an operator must confirm in the dashboard before the
|
||||||
|
-- endpoint may be controlled. Default 'active' so every legacy/connect-path row is
|
||||||
|
-- unaffected.
|
||||||
|
ALTER TABLE connect_machines
|
||||||
|
ADD COLUMN IF NOT EXISTS enrollment_state TEXT NOT NULL DEFAULT 'active'
|
||||||
|
CHECK (enrollment_state IN ('active', 'pending'));
|
||||||
|
|
||||||
|
-- Per-tenant machine_uid uniqueness (SPEC-016). Added ALONGSIDE migration 008's
|
||||||
|
-- global (machine_uid) index (see CRITICAL CONSTRAINT above — the connect-path
|
||||||
|
-- upsert's ON CONFLICT arbiter binds to the 008 index, which must survive). COALESCE
|
||||||
|
-- folds a NULL tenant_id to the default tenant so the index is well-defined while
|
||||||
|
-- tenancy is single-tenant (Phase 1); the WHERE clause excludes NULL machine_uid so
|
||||||
|
-- legacy un-keyed rows coexist freely.
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_connect_machines_tenant_machine_uid
|
||||||
|
ON connect_machines (COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid), machine_uid)
|
||||||
|
WHERE machine_uid IS NOT NULL;
|
||||||
1008
server/src/api/enroll.rs
Normal file
1008
server/src/api/enroll.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,10 +4,12 @@ pub mod auth;
|
|||||||
pub mod auth_logout;
|
pub mod auth_logout;
|
||||||
pub mod changelog;
|
pub mod changelog;
|
||||||
pub mod downloads;
|
pub mod downloads;
|
||||||
|
pub mod enroll;
|
||||||
pub mod machine_keys;
|
pub mod machine_keys;
|
||||||
pub mod releases;
|
pub mod releases;
|
||||||
pub mod removal;
|
pub mod removal;
|
||||||
pub mod sessions;
|
pub mod sessions;
|
||||||
|
pub mod sites;
|
||||||
pub mod users;
|
pub mod users;
|
||||||
|
|
||||||
use axum::{
|
use axum::{
|
||||||
|
|||||||
217
server/src/api/sites.rs
Normal file
217
server/src/api/sites.rs
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
//! Site enrollment-key administration (SPEC-016, admin plane).
|
||||||
|
//!
|
||||||
|
//! Admin (dashboard JWT + admin role) endpoints for the per-site enrollment key
|
||||||
|
//! the dashboard surfaces and rotates:
|
||||||
|
//!
|
||||||
|
//! - `POST /api/sites/:id/enrollment-key/rotate` — regenerate the `cek_` secret,
|
||||||
|
//! bump the monotonic version, derive a new fingerprint, deactivate the prior
|
||||||
|
//! active key, and return the plaintext + fingerprint ONCE. Old installers can no
|
||||||
|
//! longer enroll NEW machines after this; already-enrolled agents (holding their
|
||||||
|
//! own `cak_`) are unaffected (SPEC-016 success-criterion #3). Doubles as
|
||||||
|
//! first-issue when a site has no key yet.
|
||||||
|
//! - `GET /api/sites/:id/enrollment-key` — read the CURRENT non-secret fingerprint
|
||||||
|
//! + version (never the secret). 404 if the site has no active key yet.
|
||||||
|
//!
|
||||||
|
//! Auth mirrors `api::machine_keys`: the [`crate::auth::AdminUser`] extractor gates
|
||||||
|
//! both routes, and they are mounted behind the JWT `auth_layer`.
|
||||||
|
//!
|
||||||
|
//! SECURITY: the plaintext `cek_` is returned exactly once (rotate response),
|
||||||
|
//! never persisted in plaintext and never logged. Read responses expose only the
|
||||||
|
//! version + fingerprint.
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
extract::{Path, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Json,
|
||||||
|
};
|
||||||
|
use serde::Serialize;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::auth::{enrollment_keys, AdminUser};
|
||||||
|
use crate::db;
|
||||||
|
use crate::AppState;
|
||||||
|
|
||||||
|
/// Standard error envelope (matches `api::machine_keys::ApiError`).
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct ApiError {
|
||||||
|
pub detail: String,
|
||||||
|
pub error_code: String,
|
||||||
|
pub status_code: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ApiError {
|
||||||
|
fn new(status: StatusCode, code: &str, detail: &str) -> (StatusCode, Json<ApiError>) {
|
||||||
|
(
|
||||||
|
status,
|
||||||
|
Json(ApiError {
|
||||||
|
detail: detail.to_string(),
|
||||||
|
error_code: code.to_string(),
|
||||||
|
status_code: status.as_u16(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiResult<T> = Result<T, (StatusCode, Json<ApiError>)>;
|
||||||
|
|
||||||
|
/// Response for a freshly rotated/issued enrollment key. `key` is present ONLY
|
||||||
|
/// here, once.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct RotatedEnrollmentKey {
|
||||||
|
pub site_id: Uuid,
|
||||||
|
/// The plaintext `cek_` enrollment key. Shown exactly once — bake it into the
|
||||||
|
/// site installer now; the server keeps only its hash.
|
||||||
|
pub key: String,
|
||||||
|
/// Monotonic rotation version.
|
||||||
|
pub version: i32,
|
||||||
|
/// The non-secret short hex code (the `XXXX` in `vN (XXXX)`).
|
||||||
|
pub fingerprint: String,
|
||||||
|
/// Fully rendered operator-facing fingerprint, e.g. `v3 (7F2A)`.
|
||||||
|
pub fingerprint_label: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Non-secret current-key view for the GET endpoint.
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct EnrollmentKeyView {
|
||||||
|
pub site_id: Uuid,
|
||||||
|
pub version: i32,
|
||||||
|
pub fingerprint: String,
|
||||||
|
pub fingerprint_label: String,
|
||||||
|
pub active: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn require_db(state: &AppState) -> ApiResult<&db::Database> {
|
||||||
|
state.db.as_ref().ok_or_else(|| {
|
||||||
|
ApiError::new(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
"DATABASE_UNAVAILABLE",
|
||||||
|
"Database not available",
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve a site by its UUID path segment, or a 404 envelope.
|
||||||
|
async fn resolve_site(db: &db::Database, site_id: Uuid) -> ApiResult<db::sites::Site> {
|
||||||
|
db::sites::get_site_by_id(db.pool(), site_id)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB error resolving site: {}", e);
|
||||||
|
ApiError::new(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
"INTERNAL_ERROR",
|
||||||
|
"Internal server error",
|
||||||
|
)
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| ApiError::new(StatusCode::NOT_FOUND, "SITE_NOT_FOUND", "Site not found"))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// POST /api/sites/:id/enrollment-key/rotate — rotate (or first-issue) a site's
|
||||||
|
/// enrollment key. Returns the plaintext `cek_` + fingerprint once.
|
||||||
|
pub async fn rotate_enrollment_key(
|
||||||
|
AdminUser(admin): AdminUser,
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(site_id): Path<Uuid>,
|
||||||
|
) -> ApiResult<(StatusCode, Json<RotatedEnrollmentKey>)> {
|
||||||
|
let db = require_db(&state)?;
|
||||||
|
let site = resolve_site(db, site_id).await?;
|
||||||
|
|
||||||
|
// Mint plaintext + Argon2id hash + fingerprint. Only the hash + fingerprint
|
||||||
|
// are persisted; the plaintext is surfaced once below.
|
||||||
|
let plaintext = enrollment_keys::generate_enrollment_key();
|
||||||
|
let key_hash = enrollment_keys::hash_enrollment_key(&plaintext).map_err(|e| {
|
||||||
|
tracing::error!("Failed to hash enrollment key: {}", e);
|
||||||
|
ApiError::new(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
"INTERNAL_ERROR",
|
||||||
|
"Failed to hash enrollment key",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let fingerprint = enrollment_keys::compute_fingerprint(&plaintext);
|
||||||
|
|
||||||
|
let new_key = db::enrollment_keys::rotate_key(db.pool(), site.id, &key_hash, &fingerprint)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB error rotating enrollment key: {}", e);
|
||||||
|
ApiError::new(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
"INTERNAL_ERROR",
|
||||||
|
"Failed to rotate enrollment key",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let fingerprint_label =
|
||||||
|
enrollment_keys::render_fingerprint(new_key.version, &new_key.fingerprint);
|
||||||
|
|
||||||
|
// Audit WITHOUT key material (no plaintext, no hash).
|
||||||
|
if let Err(e) = db::events::log_enrollment_event(
|
||||||
|
db.pool(),
|
||||||
|
db::events::EventTypes::ENROLLMENT_KEY_ROTATED,
|
||||||
|
serde_json::json!({
|
||||||
|
"site_id": site.id,
|
||||||
|
"site_code": site.site_code,
|
||||||
|
"version": new_key.version,
|
||||||
|
"fingerprint": new_key.fingerprint,
|
||||||
|
"rotated_by": admin.username,
|
||||||
|
}),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
tracing::warn!("[ENROLL] failed to write key-rotate audit event: {}", e);
|
||||||
|
}
|
||||||
|
tracing::info!(
|
||||||
|
"Admin {} rotated enrollment key for site {} to {}",
|
||||||
|
admin.username,
|
||||||
|
site.site_code,
|
||||||
|
fingerprint_label
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
StatusCode::CREATED,
|
||||||
|
Json(RotatedEnrollmentKey {
|
||||||
|
site_id: site.id,
|
||||||
|
key: plaintext,
|
||||||
|
version: new_key.version,
|
||||||
|
fingerprint: new_key.fingerprint,
|
||||||
|
fingerprint_label,
|
||||||
|
}),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GET /api/sites/:id/enrollment-key — current non-secret fingerprint + version.
|
||||||
|
pub async fn get_enrollment_key(
|
||||||
|
AdminUser(_admin): AdminUser,
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(site_id): Path<Uuid>,
|
||||||
|
) -> ApiResult<Json<EnrollmentKeyView>> {
|
||||||
|
let db = require_db(&state)?;
|
||||||
|
let site = resolve_site(db, site_id).await?;
|
||||||
|
|
||||||
|
let key = db::enrollment_keys::get_active_for_site(db.pool(), site.id)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB error loading enrollment key: {}", e);
|
||||||
|
ApiError::new(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
"INTERNAL_ERROR",
|
||||||
|
"Internal server error",
|
||||||
|
)
|
||||||
|
})?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
ApiError::new(
|
||||||
|
StatusCode::NOT_FOUND,
|
||||||
|
"NO_ENROLLMENT_KEY",
|
||||||
|
"Site has no active enrollment key",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let fingerprint_label = enrollment_keys::render_fingerprint(key.version, &key.fingerprint);
|
||||||
|
|
||||||
|
Ok(Json(EnrollmentKeyView {
|
||||||
|
site_id: site.id,
|
||||||
|
version: key.version,
|
||||||
|
fingerprint: key.fingerprint,
|
||||||
|
fingerprint_label,
|
||||||
|
active: key.active,
|
||||||
|
}))
|
||||||
|
}
|
||||||
191
server/src/auth/enrollment_keys.rs
Normal file
191
server/src/auth/enrollment_keys.rs
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
//! Per-site enrollment key minting, hashing, verification, and fingerprinting
|
||||||
|
//! (SPEC-016 zero-touch enrollment, auth layer).
|
||||||
|
//!
|
||||||
|
//! This is the low-sensitivity, rotatable side of the two-tier credential model
|
||||||
|
//! (SPEC-016 §Security). A per-site ENROLLMENT key (`cek_` prefix) gates "may
|
||||||
|
//! this machine register at all" at `POST /api/enroll`; a successful enroll mints
|
||||||
|
//! the high-sensitivity per-machine `cak_` operating credential
|
||||||
|
//! ([`crate::auth::agent_keys`]). Compromise of an enrollment key is contained to
|
||||||
|
//! one site and recovered by rotating it.
|
||||||
|
//!
|
||||||
|
//! Lifecycle owned here (the secret side):
|
||||||
|
//!
|
||||||
|
//! - [`generate_enrollment_key`] mints a high-entropy, `cek_`-prefixed plaintext
|
||||||
|
//! secret. Mirrors [`crate::auth::agent_keys::generate_agent_key`]'s entropy
|
||||||
|
//! approach (32 random bytes from the OS CSPRNG, hex-encoded) with a DISTINCT
|
||||||
|
//! prefix so the two key kinds are never confused in logs or storage. The
|
||||||
|
//! plaintext is shown to the operator exactly once at issue/rotate and is NEVER
|
||||||
|
//! persisted or logged.
|
||||||
|
//! - [`hash_enrollment_key`] / [`verify_enrollment_key`] use **Argon2id** (via
|
||||||
|
//! [`crate::auth::password`]). This DIFFERS from `cak_` (which uses SHA-256 for
|
||||||
|
//! a constant-shape equality lookup): SPEC-016 §2 explicitly requires the
|
||||||
|
//! enrollment key be "stored hashed (Argon2id, same as `cak_`/passwords)". The
|
||||||
|
//! trade-off is deliberate — enrollment keys are looked up by `(site, active)`
|
||||||
|
//! first (a small candidate set, usually one row) and only then verified, so the
|
||||||
|
//! per-verify KDF cost is bounded and not on a high-QPS path, while Argon2id
|
||||||
|
//! gives salted, GPU-resistant storage matching the password posture.
|
||||||
|
//! - [`compute_fingerprint`] derives the non-secret short HEX code shown as
|
||||||
|
//! `vN (XXXX)` (SPEC-016 resolved-decision #3 — hex, deliberately NOT the
|
||||||
|
//! GuruRMM word-style code, so the two products' artifacts are never visually
|
||||||
|
//! conflated).
|
||||||
|
//!
|
||||||
|
//! SECURITY: never log a plaintext key or its hash. Functions here return the
|
||||||
|
//! plaintext to the caller (issue/rotate endpoint) but emit no `tracing` output
|
||||||
|
//! containing key material.
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use rand::RngCore;
|
||||||
|
use ring::digest;
|
||||||
|
|
||||||
|
/// Prefix marking a GuruConnect per-site enrollment key. Distinct from the
|
||||||
|
/// per-agent `cak_` prefix so the two key kinds are never confused.
|
||||||
|
pub const ENROLLMENT_KEY_PREFIX: &str = "cek_";
|
||||||
|
|
||||||
|
/// Number of random bytes behind an enrollment key (256 bits of entropy), matching
|
||||||
|
/// [`crate::auth::agent_keys`]. SPEC-016 §2 requires ≥256-bit.
|
||||||
|
const ENROLLMENT_KEY_RANDOM_BYTES: usize = 32;
|
||||||
|
|
||||||
|
/// Number of hex characters in the fingerprint code (the `XXXX` in `vN (XXXX)`).
|
||||||
|
/// Four hex chars = 16 bits — ample to let an operator tell two installers apart at
|
||||||
|
/// a glance; it is a non-secret display aid, not a security control.
|
||||||
|
const FINGERPRINT_HEX_LEN: usize = 4;
|
||||||
|
|
||||||
|
/// Generate a new high-entropy, `cek_`-prefixed per-site enrollment key (plaintext).
|
||||||
|
///
|
||||||
|
/// The returned string is the ONLY time the plaintext exists; the caller must
|
||||||
|
/// surface it to the operator once and store only [`hash_enrollment_key`] of it.
|
||||||
|
/// Uses the OS CSPRNG via `rand::rngs::OsRng`.
|
||||||
|
pub fn generate_enrollment_key() -> String {
|
||||||
|
let mut bytes = [0u8; ENROLLMENT_KEY_RANDOM_BYTES];
|
||||||
|
rand::rngs::OsRng.fill_bytes(&mut bytes);
|
||||||
|
format!("{}{}", ENROLLMENT_KEY_PREFIX, hex_encode(&bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hash an enrollment key for storage using Argon2id (SPEC-016 §2).
|
||||||
|
///
|
||||||
|
/// Delegates to [`crate::auth::password::hash_password`] so the KDF parameters and
|
||||||
|
/// salt generation match the password posture exactly. Returns the PHC-format
|
||||||
|
/// string Postgres stores in `site_enrollment_keys.key_hash`.
|
||||||
|
pub fn hash_enrollment_key(plaintext: &str) -> Result<String> {
|
||||||
|
crate::auth::password::hash_password(plaintext)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify a presented enrollment key against a stored Argon2id hash.
|
||||||
|
///
|
||||||
|
/// Returns `Ok(true)` on a match. A malformed stored hash or a mismatch yields
|
||||||
|
/// `Ok(false)` / an `Err` from the underlying verifier; the caller treats any
|
||||||
|
/// non-`Ok(true)` as a rejection. A cheap structural reject (`cek_` prefix) runs
|
||||||
|
/// first to skip the KDF on obviously-bogus input.
|
||||||
|
///
|
||||||
|
/// SECURITY: only compares; never logs the presented key or the hash.
|
||||||
|
pub fn verify_enrollment_key(presented: &str, stored_hash: &str) -> bool {
|
||||||
|
if !presented.starts_with(ENROLLMENT_KEY_PREFIX) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
crate::auth::password::verify_password(presented, stored_hash).unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the non-secret short HEX fingerprint code for an enrollment key.
|
||||||
|
///
|
||||||
|
/// Derived as the first [`FINGERPRINT_HEX_LEN`] hex chars of the SHA-256 of the
|
||||||
|
/// plaintext secret, uppercased. This is a stable, non-reversible tag of the secret
|
||||||
|
/// (knowing the code does not reveal the key) used purely for display. Pair it with
|
||||||
|
/// the monotonic version via [`render_fingerprint`].
|
||||||
|
pub fn compute_fingerprint(plaintext: &str) -> String {
|
||||||
|
let d = digest::digest(&digest::SHA256, plaintext.as_bytes());
|
||||||
|
let hex = hex_encode(d.as_ref());
|
||||||
|
hex[..FINGERPRINT_HEX_LEN].to_ascii_uppercase()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render the operator-facing fingerprint string `vN (XXXX)` (SPEC-016 §2).
|
||||||
|
///
|
||||||
|
/// `version` is the monotonic rotation counter; `code` is [`compute_fingerprint`].
|
||||||
|
/// Example: `render_fingerprint(3, "7F2A")` -> `"v3 (7F2A)"`.
|
||||||
|
pub fn render_fingerprint(version: i32, code: &str) -> String {
|
||||||
|
format!("v{} ({})", version, code)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lowercase hex encoding without pulling in the `hex` crate (mirrors
|
||||||
|
/// [`crate::auth::agent_keys`]).
|
||||||
|
fn hex_encode(bytes: &[u8]) -> String {
|
||||||
|
use std::fmt::Write;
|
||||||
|
let mut s = String::with_capacity(bytes.len() * 2);
|
||||||
|
for b in bytes {
|
||||||
|
let _ = write!(s, "{:02x}", b);
|
||||||
|
}
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn generated_key_is_prefixed_and_high_entropy() {
|
||||||
|
let key = generate_enrollment_key();
|
||||||
|
assert!(key.starts_with(ENROLLMENT_KEY_PREFIX));
|
||||||
|
assert_eq!(
|
||||||
|
key.len(),
|
||||||
|
ENROLLMENT_KEY_PREFIX.len() + ENROLLMENT_KEY_RANDOM_BYTES * 2
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn generated_keys_are_unique() {
|
||||||
|
assert_ne!(generate_enrollment_key(), generate_enrollment_key());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hash_and_verify_roundtrip() {
|
||||||
|
let key = generate_enrollment_key();
|
||||||
|
let hash = hash_enrollment_key(&key).expect("hash");
|
||||||
|
assert!(verify_enrollment_key(&key, &hash));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn verify_rejects_wrong_key() {
|
||||||
|
let key = generate_enrollment_key();
|
||||||
|
let other = generate_enrollment_key();
|
||||||
|
let hash = hash_enrollment_key(&key).expect("hash");
|
||||||
|
assert!(!verify_enrollment_key(&other, &hash));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn verify_rejects_unprefixed_input_without_touching_kdf() {
|
||||||
|
let key = generate_enrollment_key();
|
||||||
|
let hash = hash_enrollment_key(&key).expect("hash");
|
||||||
|
// A value lacking the cek_ prefix is structurally rejected before the KDF.
|
||||||
|
assert!(!verify_enrollment_key("not-a-key", &hash));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn verify_rejects_malformed_stored_hash() {
|
||||||
|
let key = generate_enrollment_key();
|
||||||
|
// A garbage stored hash must not panic and must reject.
|
||||||
|
assert!(!verify_enrollment_key(&key, "not-a-phc-hash"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fingerprint_is_stable_uppercase_hex_of_expected_len() {
|
||||||
|
let key = "cek_deadbeef";
|
||||||
|
let f1 = compute_fingerprint(key);
|
||||||
|
let f2 = compute_fingerprint(key);
|
||||||
|
assert_eq!(f1, f2);
|
||||||
|
assert_eq!(f1.len(), FINGERPRINT_HEX_LEN);
|
||||||
|
assert!(f1.chars().all(|c| c.is_ascii_hexdigit()));
|
||||||
|
assert_eq!(f1, f1.to_ascii_uppercase());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fingerprint_differs_per_key() {
|
||||||
|
assert_ne!(
|
||||||
|
compute_fingerprint("cek_aaa"),
|
||||||
|
compute_fingerprint("cek_bbb")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn render_fingerprint_matches_spec_shape() {
|
||||||
|
assert_eq!(render_fingerprint(3, "7F2A"), "v3 (7F2A)");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@
|
|||||||
//! validation for agents.
|
//! validation for agents.
|
||||||
|
|
||||||
pub mod agent_keys;
|
pub mod agent_keys;
|
||||||
|
pub mod enrollment_keys;
|
||||||
pub mod jwt;
|
pub mod jwt;
|
||||||
pub mod password;
|
pub mod password;
|
||||||
pub mod token_blacklist;
|
pub mod token_blacklist;
|
||||||
|
|||||||
141
server/src/db/enrollment_keys.rs
Normal file
141
server/src/db/enrollment_keys.rs
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
//! Per-site enrollment key database operations (SPEC-016 zero-touch enrollment).
|
||||||
|
//!
|
||||||
|
//! Backs the `site_enrollment_keys` table (migration 010). Stores ONLY the
|
||||||
|
//! Argon2id hash of the `cek_` secret plus the non-secret rotation metadata
|
||||||
|
//! (version, fingerprint, active flag). Computing the hash and minting the
|
||||||
|
//! plaintext is [`crate::auth::enrollment_keys`]'s job; this module is
|
||||||
|
//! hash-agnostic persistence and takes already-hashed values.
|
||||||
|
//!
|
||||||
|
//! Rotation invariant: at most one `active` row per site (enforced by a partial
|
||||||
|
//! unique index in migration 010). [`rotate_key`] deactivates the current active
|
||||||
|
//! row and inserts a new active one inside a single transaction so the invariant
|
||||||
|
//! is never transiently violated.
|
||||||
|
//!
|
||||||
|
//! All queries use runtime `sqlx::query()` / `sqlx::query_as()` per the codebase
|
||||||
|
//! convention (no compile-time `query!` macros, no `.sqlx` offline cache).
|
||||||
|
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
/// Per-site enrollment key record.
|
||||||
|
///
|
||||||
|
/// `key_hash` is the only representation of the secret the server stores; the
|
||||||
|
/// plaintext is shown once at issue/rotate and never persisted.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||||
|
pub struct EnrollmentKey {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub site_id: Uuid,
|
||||||
|
pub key_hash: String,
|
||||||
|
pub version: i32,
|
||||||
|
pub fingerprint: String,
|
||||||
|
pub active: bool,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub rotated_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch the active enrollment key for a site, if any.
|
||||||
|
///
|
||||||
|
/// This is the `/api/enroll` hot path: resolve the one active key whose hash the
|
||||||
|
/// presented `cek_` is verified against. The partial unique index guarantees at
|
||||||
|
/// most one active row, so `fetch_optional` is correct.
|
||||||
|
pub async fn get_active_for_site(
|
||||||
|
pool: &PgPool,
|
||||||
|
site_id: Uuid,
|
||||||
|
) -> Result<Option<EnrollmentKey>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, EnrollmentKey>(
|
||||||
|
r#"
|
||||||
|
SELECT id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at
|
||||||
|
FROM site_enrollment_keys
|
||||||
|
WHERE site_id = $1 AND active
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(site_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert the FIRST enrollment key for a site at version 1 (initial issue).
|
||||||
|
///
|
||||||
|
/// Use [`rotate_key`] for subsequent rotations. Errors with a unique violation if
|
||||||
|
/// the site already has an active key (the caller should rotate instead).
|
||||||
|
#[allow(dead_code)] // Wired by site-admin issue flow; Phase A exposes rotation (which also covers first issue when none exists).
|
||||||
|
pub async fn insert_initial_key(
|
||||||
|
pool: &PgPool,
|
||||||
|
site_id: Uuid,
|
||||||
|
key_hash: &str,
|
||||||
|
fingerprint: &str,
|
||||||
|
) -> Result<EnrollmentKey, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, EnrollmentKey>(
|
||||||
|
r#"
|
||||||
|
INSERT INTO site_enrollment_keys (site_id, key_hash, version, fingerprint, active)
|
||||||
|
VALUES ($1, $2, 1, $3, true)
|
||||||
|
RETURNING id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(site_id)
|
||||||
|
.bind(key_hash)
|
||||||
|
.bind(fingerprint)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Rotate a site's enrollment key (SPEC-016 §2): deactivate the current active key
|
||||||
|
/// (if any) and insert a new active key at the next monotonic version, all in one
|
||||||
|
/// transaction.
|
||||||
|
///
|
||||||
|
/// Returns the newly-created active key. If the site has no key yet, this issues
|
||||||
|
/// version 1 (so rotation also serves as first-issue). The caller passes the
|
||||||
|
/// already-hashed new secret and its fingerprint; the plaintext is surfaced once by
|
||||||
|
/// the caller and never reaches this layer.
|
||||||
|
///
|
||||||
|
/// The transaction is what keeps the "at most one active key per site" invariant
|
||||||
|
/// (partial unique index) from being transiently violated between the UPDATE and
|
||||||
|
/// the INSERT.
|
||||||
|
pub async fn rotate_key(
|
||||||
|
pool: &PgPool,
|
||||||
|
site_id: Uuid,
|
||||||
|
new_key_hash: &str,
|
||||||
|
new_fingerprint: &str,
|
||||||
|
) -> Result<EnrollmentKey, sqlx::Error> {
|
||||||
|
let mut tx = pool.begin().await?;
|
||||||
|
|
||||||
|
// Highest existing version for this site (NULL -> 0 so the first key is v1).
|
||||||
|
let current_max: Option<i32> =
|
||||||
|
sqlx::query_scalar("SELECT MAX(version) FROM site_enrollment_keys WHERE site_id = $1")
|
||||||
|
.bind(site_id)
|
||||||
|
.fetch_one(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
let next_version = current_max.unwrap_or(0) + 1;
|
||||||
|
|
||||||
|
// Deactivate the current active key (if any), stamping rotated_at.
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
UPDATE site_enrollment_keys
|
||||||
|
SET active = false, rotated_at = NOW()
|
||||||
|
WHERE site_id = $1 AND active
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(site_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Insert the new active key at the next version.
|
||||||
|
let new_key = sqlx::query_as::<_, EnrollmentKey>(
|
||||||
|
r#"
|
||||||
|
INSERT INTO site_enrollment_keys (site_id, key_hash, version, fingerprint, active)
|
||||||
|
VALUES ($1, $2, $3, $4, true)
|
||||||
|
RETURNING id, site_id, key_hash, version, fingerprint, active, created_at, rotated_at
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(site_id)
|
||||||
|
.bind(new_key_hash)
|
||||||
|
.bind(next_version)
|
||||||
|
.bind(new_fingerprint)
|
||||||
|
.fetch_one(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tx.commit().await?;
|
||||||
|
Ok(new_key)
|
||||||
|
}
|
||||||
@@ -69,6 +69,40 @@ impl EventTypes {
|
|||||||
pub const MACHINE_REMOVED: &'static str = "machine_removed";
|
pub const MACHINE_REMOVED: &'static str = "machine_removed";
|
||||||
/// An administrator soft-deleted (purged) a session and dropped it in-memory.
|
/// An administrator soft-deleted (purged) a session and dropped it in-memory.
|
||||||
pub const SESSION_REMOVED: &'static str = "session_removed";
|
pub const SESSION_REMOVED: &'static str = "session_removed";
|
||||||
|
|
||||||
|
// Zero-touch enrollment events (SPEC-016). Written by POST /api/enroll and the
|
||||||
|
// site enrollment-key rotation endpoint. These carry no session, so they are
|
||||||
|
// logged via `log_enrollment_event` with `session_id = NULL`; the structured
|
||||||
|
// detail (machine_uid, site_code, fingerprint, etc.) goes in `details` and the
|
||||||
|
// source IP in `ip_address`.
|
||||||
|
/// A new machine self-registered at a site and was minted its first `cak_`.
|
||||||
|
pub const ENROLL_NEW: &'static str = "enroll_new";
|
||||||
|
/// An existing machine_uid re-enrolled at the SAME site — the row was reused and
|
||||||
|
/// a fresh `cak_` minted (re-image / re-install).
|
||||||
|
pub const ENROLL_REUSE: &'static str = "enroll_reuse";
|
||||||
|
/// An existing machine_uid enrolled under a DIFFERENT site — the machine's site
|
||||||
|
/// binding was updated (a "site move"). Fires an alert.
|
||||||
|
///
|
||||||
|
/// NOTE (SPEC-016 Phase A): the unauthenticated enroll path does NOT perform this
|
||||||
|
/// move — a cross-site enroll is REFUSED (`ENROLL_SITE_CONFLICT`) rather than
|
||||||
|
/// silently repointing the machine. This event is reserved for the deliberate
|
||||||
|
/// Phase-B `--reassign` flow (and the dashboard move action) that supersede it.
|
||||||
|
#[allow(dead_code)] // reserved for Phase-B --reassign; not emitted by Phase A enroll
|
||||||
|
pub const ENROLL_SITE_MOVE: &'static str = "enroll_site_move";
|
||||||
|
/// An existing machine_uid presented a valid key for a DIFFERENT site than the one
|
||||||
|
/// the machine is currently bound to. Phase A REFUSES this (no move, no key minted)
|
||||||
|
/// as the accidental-move / cross-site-hijack guard; the deliberate move arrives
|
||||||
|
/// with the Phase-B `--reassign` flow + dashboard. Fires an alert.
|
||||||
|
pub const ENROLL_SITE_CONFLICT: &'static str = "enroll_site_conflict";
|
||||||
|
/// A machine_uid collision was detected at enroll — the endpoint dropped to
|
||||||
|
/// `pending` and awaits operator confirmation in the dashboard. Fires an alert.
|
||||||
|
pub const ENROLL_COLLISION_PENDING: &'static str = "enroll_collision_pending";
|
||||||
|
/// An enroll attempt failed enrollment-key verification (wrong/inactive key or
|
||||||
|
/// unknown site_code). Security audit trail for the open-registration surface.
|
||||||
|
pub const ENROLL_REJECTED: &'static str = "enroll_rejected";
|
||||||
|
/// An administrator rotated a site's enrollment key (new version + fingerprint;
|
||||||
|
/// old installers can no longer enroll NEW machines).
|
||||||
|
pub const ENROLLMENT_KEY_ROTATED: &'static str = "enrollment_key_rotated";
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Log a session event
|
/// Log a session event
|
||||||
@@ -154,6 +188,42 @@ pub async fn log_admin_removal(
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Log a zero-touch enrollment audit event (SPEC-016).
|
||||||
|
///
|
||||||
|
/// Shares the `connect_session_events` audit table but carries no session
|
||||||
|
/// (`session_id = NULL`, the FK column is nullable) and no viewer — enrollment is
|
||||||
|
/// an unauthenticated agent action, not a viewer/session event. The structured
|
||||||
|
/// detail (machine_uid, site_code, fingerprint version, decision, etc.) goes in
|
||||||
|
/// `details` and the agent's source IP in `ip_address`.
|
||||||
|
///
|
||||||
|
/// Best-effort: a failure to write the audit row must NOT fail the enroll (the
|
||||||
|
/// machine row and `cak_` already exist); the caller logs the error and proceeds,
|
||||||
|
/// matching how the relay and Task-5 removal treat audit writes.
|
||||||
|
pub async fn log_enrollment_event(
|
||||||
|
pool: &PgPool,
|
||||||
|
event_type: &str,
|
||||||
|
details: JsonValue,
|
||||||
|
ip_address: Option<IpAddr>,
|
||||||
|
) -> Result<i64, sqlx::Error> {
|
||||||
|
let ip_str = ip_address.map(|ip| ip.to_string());
|
||||||
|
|
||||||
|
let result = sqlx::query_scalar::<_, i64>(
|
||||||
|
r#"
|
||||||
|
INSERT INTO connect_session_events
|
||||||
|
(session_id, event_type, viewer_id, viewer_name, details, ip_address)
|
||||||
|
VALUES (NULL, $1, NULL, NULL, $2, $3::inet)
|
||||||
|
RETURNING id
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(event_type)
|
||||||
|
.bind(details)
|
||||||
|
.bind(ip_str)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
/// Get events for a session
|
/// Get events for a session
|
||||||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||||||
pub async fn get_session_events(
|
pub async fn get_session_events(
|
||||||
|
|||||||
@@ -64,6 +64,16 @@ pub struct Machine {
|
|||||||
/// history) is retained. NULL = live. Nullable, so it is read NULL-tolerantly
|
/// history) is retained. NULL = live. Nullable, so it is read NULL-tolerantly
|
||||||
/// in the manual `FromRow` below.
|
/// in the manual `FromRow` below.
|
||||||
pub deleted_at: Option<DateTime<Utc>>,
|
pub deleted_at: Option<DateTime<Utc>>,
|
||||||
|
/// Relational site binding for a machine enrolled via `/api/enroll` (SPEC-016,
|
||||||
|
/// migration 010). NULL for legacy / support-code / connect-path machines that
|
||||||
|
/// never enrolled through the zero-touch flow. A change of this on re-enroll is
|
||||||
|
/// the "site move" the enroll path audits.
|
||||||
|
pub site_id: Option<Uuid>,
|
||||||
|
/// Collision-gate state (SPEC-016, migration 010): `'active'` (live, auto-approve)
|
||||||
|
/// or `'pending'` (a machine_uid collision was detected at enroll; awaiting
|
||||||
|
/// operator confirmation before the endpoint may be controlled). Non-null with a
|
||||||
|
/// default of `'active'`; read NULL-tolerantly below for defense in depth.
|
||||||
|
pub enrollment_state: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'r> FromRow<'r, PgRow> for Machine {
|
impl<'r> FromRow<'r, PgRow> for Machine {
|
||||||
@@ -83,6 +93,13 @@ impl<'r> FromRow<'r, PgRow> for Machine {
|
|||||||
machine_uid: row.try_get("machine_uid")?,
|
machine_uid: row.try_get("machine_uid")?,
|
||||||
// Schema-nullable (migration 009); decode directly as Option.
|
// Schema-nullable (migration 009); decode directly as Option.
|
||||||
deleted_at: row.try_get("deleted_at")?,
|
deleted_at: row.try_get("deleted_at")?,
|
||||||
|
// Schema-nullable (migration 010); decode directly as Option.
|
||||||
|
site_id: row.try_get("site_id")?,
|
||||||
|
// Non-null with default 'active' (migration 010); read NULL-tolerantly
|
||||||
|
// (older snapshots / partial rows) and fall back to 'active'.
|
||||||
|
enrollment_state: row
|
||||||
|
.try_get::<Option<String>, _>("enrollment_state")?
|
||||||
|
.unwrap_or_else(|| "active".to_string()),
|
||||||
// Nullable-with-default columns mapped to non-`Option` Rust types: read as
|
// Nullable-with-default columns mapped to non-`Option` Rust types: read as
|
||||||
// `Option<T>` and fall back to the type default so a NULL cell never errors.
|
// `Option<T>` and fall back to the type default so a NULL cell never errors.
|
||||||
is_elevated: row
|
is_elevated: row
|
||||||
@@ -166,7 +183,7 @@ pub async fn upsert_machine(
|
|||||||
r#"
|
r#"
|
||||||
INSERT INTO connect_machines (agent_id, hostname, is_persistent, status, last_seen, machine_uid)
|
INSERT INTO connect_machines (agent_id, hostname, is_persistent, status, last_seen, machine_uid)
|
||||||
VALUES ($1, $2, $3, 'online', NOW(), $4)
|
VALUES ($1, $2, $3, 'online', NOW(), $4)
|
||||||
ON CONFLICT (machine_uid) DO UPDATE SET
|
ON CONFLICT (machine_uid) WHERE machine_uid IS NOT NULL DO UPDATE SET
|
||||||
agent_id = EXCLUDED.agent_id,
|
agent_id = EXCLUDED.agent_id,
|
||||||
hostname = EXCLUDED.hostname,
|
hostname = EXCLUDED.hostname,
|
||||||
status = 'online',
|
status = 'online',
|
||||||
@@ -207,6 +224,131 @@ pub async fn upsert_machine(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find a machine by the SPEC-016 per-tenant dedup key `(tenant_id, machine_uid)`.
|
||||||
|
///
|
||||||
|
/// This is the enroll-time dedup lookup: the same hardware re-enrolling (re-image /
|
||||||
|
/// re-install) resolves to its existing row within the tenant, while the same
|
||||||
|
/// hardware in a DIFFERENT tenant is a distinct row (resolved-decision #4). Tenant
|
||||||
|
/// scoping uses the same default-tenant fold as the unique index so the lookup
|
||||||
|
/// matches the uniqueness guarantee.
|
||||||
|
///
|
||||||
|
/// Unlike `get_machine_by_agent_id`, this deliberately does NOT filter
|
||||||
|
/// `deleted_at IS NULL`: a previously operator-purged machine that legitimately
|
||||||
|
/// re-enrolls must be found so the enroll path can revive it (clearing
|
||||||
|
/// `deleted_at`), mirroring the connect-path revive in `upsert_machine`.
|
||||||
|
pub async fn get_machine_by_tenant_uid(
|
||||||
|
pool: &PgPool,
|
||||||
|
tenant_id: Uuid,
|
||||||
|
machine_uid: &str,
|
||||||
|
) -> Result<Option<Machine>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Machine>(
|
||||||
|
r#"
|
||||||
|
SELECT * FROM connect_machines
|
||||||
|
WHERE machine_uid = $1
|
||||||
|
AND COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid) = $2
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(machine_uid)
|
||||||
|
.bind(tenant_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parameters for an enroll-time machine create/update (SPEC-016 `/api/enroll`).
|
||||||
|
///
|
||||||
|
/// `agent_id` is a freshly minted opaque id for a NEW enrollment (the agent's
|
||||||
|
/// config UUID story is Phase B; the server only needs a unique non-null value for
|
||||||
|
/// the `agent_id UNIQUE` column). On REUSE/MOVE the existing row's `agent_id` is
|
||||||
|
/// preserved (the FK target of any already-minted `cak_`), so the update path does
|
||||||
|
/// not touch it.
|
||||||
|
pub struct EnrollMachineParams<'a> {
|
||||||
|
pub agent_id: &'a str,
|
||||||
|
pub hostname: &'a str,
|
||||||
|
pub machine_uid: &'a str,
|
||||||
|
pub tenant_id: Uuid,
|
||||||
|
pub site_id: Uuid,
|
||||||
|
/// Company label (-> connect_machines.organization).
|
||||||
|
pub company: Option<&'a str>,
|
||||||
|
/// Site label (-> connect_machines.site) — the free-text label, distinct from
|
||||||
|
/// the relational site_id binding.
|
||||||
|
pub site_label: Option<&'a str>,
|
||||||
|
pub tags: &'a [String],
|
||||||
|
/// 'active' (auto-approve) or 'pending' (collision-gated).
|
||||||
|
pub enrollment_state: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a NEW machine row for a first-time enrollment (SPEC-016).
|
||||||
|
///
|
||||||
|
/// Carries the labels, the relational `site_id`, the per-tenant `machine_uid`, and
|
||||||
|
/// the collision-gate `enrollment_state`. Persistent + online. Returns the created
|
||||||
|
/// row (its `id` is the FK target for the `cak_` the caller mints next).
|
||||||
|
pub async fn insert_enrolled_machine(
|
||||||
|
pool: &PgPool,
|
||||||
|
p: &EnrollMachineParams<'_>,
|
||||||
|
) -> Result<Machine, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Machine>(
|
||||||
|
r#"
|
||||||
|
INSERT INTO connect_machines
|
||||||
|
(agent_id, hostname, is_persistent, status, last_seen, machine_uid,
|
||||||
|
tenant_id, site_id, organization, site, tags, enrollment_state)
|
||||||
|
VALUES ($1, $2, true, 'online', NOW(), $3, $4, $5, $6, $7, $8, $9)
|
||||||
|
RETURNING *
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(p.agent_id)
|
||||||
|
.bind(p.hostname)
|
||||||
|
.bind(p.machine_uid)
|
||||||
|
.bind(p.tenant_id)
|
||||||
|
.bind(p.site_id)
|
||||||
|
.bind(p.company)
|
||||||
|
.bind(p.site_label)
|
||||||
|
.bind(p.tags)
|
||||||
|
.bind(p.enrollment_state)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update an EXISTING machine row on re-enroll / reuse / site-move (SPEC-016).
|
||||||
|
///
|
||||||
|
/// Refreshes hostname, site binding (`site_id`), labels, and `enrollment_state`,
|
||||||
|
/// and revives a soft-deleted row (`deleted_at = NULL`) — a re-enroll of a purged
|
||||||
|
/// host means it is live again, mirroring `upsert_machine`'s revive. Deliberately
|
||||||
|
/// does NOT change `agent_id`: the existing id is the FK target of any prior `cak_`.
|
||||||
|
/// Labels are COALESCE-merged so an enroll that omits a label does not wipe an
|
||||||
|
/// existing value; `tags` is overwritten only when a non-empty set is supplied
|
||||||
|
/// (matching `update_machine_metadata`'s convention).
|
||||||
|
pub async fn update_enrolled_machine(
|
||||||
|
pool: &PgPool,
|
||||||
|
machine_id: Uuid,
|
||||||
|
p: &EnrollMachineParams<'_>,
|
||||||
|
) -> Result<Machine, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Machine>(
|
||||||
|
r#"
|
||||||
|
UPDATE connect_machines SET
|
||||||
|
hostname = $2,
|
||||||
|
site_id = $3,
|
||||||
|
organization = COALESCE($4, organization),
|
||||||
|
site = COALESCE($5, site),
|
||||||
|
tags = CASE WHEN $6::text[] = '{}' THEN tags ELSE $6 END,
|
||||||
|
enrollment_state = $7,
|
||||||
|
status = 'online',
|
||||||
|
last_seen = NOW(),
|
||||||
|
deleted_at = NULL
|
||||||
|
WHERE id = $1
|
||||||
|
RETURNING *
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(machine_id)
|
||||||
|
.bind(p.hostname)
|
||||||
|
.bind(p.site_id)
|
||||||
|
.bind(p.company)
|
||||||
|
.bind(p.site_label)
|
||||||
|
.bind(p.tags)
|
||||||
|
.bind(p.enrollment_state)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
/// Update machine status and info
|
/// Update machine status and info
|
||||||
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
#[allow(dead_code)] // TODO(native-remote-control): consumed by the integration API; see docs/specs/native-remote-control/
|
||||||
pub async fn update_machine_status(
|
pub async fn update_machine_status(
|
||||||
|
|||||||
@@ -4,10 +4,12 @@
|
|||||||
//! Optional - server works without database if DATABASE_URL not set.
|
//! Optional - server works without database if DATABASE_URL not set.
|
||||||
|
|
||||||
pub mod agent_keys;
|
pub mod agent_keys;
|
||||||
|
pub mod enrollment_keys;
|
||||||
pub mod events;
|
pub mod events;
|
||||||
pub mod machines;
|
pub mod machines;
|
||||||
pub mod releases;
|
pub mod releases;
|
||||||
pub mod sessions;
|
pub mod sessions;
|
||||||
|
pub mod sites;
|
||||||
pub mod support_codes;
|
pub mod support_codes;
|
||||||
pub mod tenancy;
|
pub mod tenancy;
|
||||||
pub mod users;
|
pub mod users;
|
||||||
|
|||||||
94
server/src/db/sites.rs
Normal file
94
server/src/db/sites.rs
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
//! Site database operations (SPEC-016 zero-touch enrollment).
|
||||||
|
//!
|
||||||
|
//! Backs the `connect_sites` table (migration 010): the relational anchor a
|
||||||
|
//! per-site enrollment key hangs off and the `/api/enroll` flow resolves by
|
||||||
|
//! `site_code`. See the migration header for why this table exists (the prior
|
||||||
|
//! schema modeled "site" only as a free-text column on `connect_machines`).
|
||||||
|
//!
|
||||||
|
//! All queries use runtime `sqlx::query()` / `sqlx::query_as()` per the codebase
|
||||||
|
//! convention (no compile-time `query!` macros, no `.sqlx` offline cache).
|
||||||
|
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
/// Site record from the database.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
|
||||||
|
pub struct Site {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub site_code: String,
|
||||||
|
pub display_name: Option<String>,
|
||||||
|
pub company: Option<String>,
|
||||||
|
pub tenant_id: Option<Uuid>,
|
||||||
|
/// RESERVED for future per-site enrollment POLICY work (SPEC-016 §out-of-scope).
|
||||||
|
/// Not enforced in Phase A.
|
||||||
|
pub enrollment_policy: Option<String>,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve a site by its operator-facing `site_code`, scoped to the given tenant.
|
||||||
|
///
|
||||||
|
/// Tenant scoping uses the same default-tenant fold as the unique index so the
|
||||||
|
/// lookup matches the uniqueness guarantee: `(COALESCE(tenant_id, default),
|
||||||
|
/// site_code)`. Returns `None` if no site with that code exists in the tenant.
|
||||||
|
pub async fn get_site_by_code(
|
||||||
|
pool: &PgPool,
|
||||||
|
site_code: &str,
|
||||||
|
tenant_id: Uuid,
|
||||||
|
) -> Result<Option<Site>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Site>(
|
||||||
|
r#"
|
||||||
|
SELECT id, site_code, display_name, company, tenant_id, enrollment_policy, created_at
|
||||||
|
FROM connect_sites
|
||||||
|
WHERE site_code = $1
|
||||||
|
AND COALESCE(tenant_id, '00000000-0000-0000-0000-000000000001'::uuid) = $2
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(site_code)
|
||||||
|
.bind(tenant_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch a site by its primary-key UUID.
|
||||||
|
pub async fn get_site_by_id(pool: &PgPool, id: Uuid) -> Result<Option<Site>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Site>(
|
||||||
|
r#"
|
||||||
|
SELECT id, site_code, display_name, company, tenant_id, enrollment_policy, created_at
|
||||||
|
FROM connect_sites
|
||||||
|
WHERE id = $1
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a new site, returning the created row.
|
||||||
|
///
|
||||||
|
/// `tenant_id` is `None`-tolerant and resolved via `db::tenancy::current_tenant_id()`
|
||||||
|
/// at the call site. Errors with a unique-violation if `(tenant, site_code)` already
|
||||||
|
/// exists (the caller maps that to a 409).
|
||||||
|
#[allow(dead_code)] // Wired by the site-admin API (dashboard site CRUD); Phase A exposes key rotation, not site CRUD.
|
||||||
|
pub async fn insert_site(
|
||||||
|
pool: &PgPool,
|
||||||
|
site_code: &str,
|
||||||
|
display_name: Option<&str>,
|
||||||
|
company: Option<&str>,
|
||||||
|
tenant_id: Option<Uuid>,
|
||||||
|
) -> Result<Site, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Site>(
|
||||||
|
r#"
|
||||||
|
INSERT INTO connect_sites (site_code, display_name, company, tenant_id)
|
||||||
|
VALUES ($1, $2, $3, $4)
|
||||||
|
RETURNING id, site_code, display_name, company, tenant_id, enrollment_policy, created_at
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(site_code)
|
||||||
|
.bind(display_name)
|
||||||
|
.bind(company)
|
||||||
|
.bind(tenant_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
@@ -448,6 +448,11 @@ async fn main() -> Result<()> {
|
|||||||
)),
|
)),
|
||||||
)
|
)
|
||||||
.route("/api/codes/:code/cancel", post(cancel_code))
|
.route("/api/codes/:code/cancel", post(cancel_code))
|
||||||
|
// Zero-touch enrollment (SPEC-016). PUBLIC: no JWT — the per-site enrollment
|
||||||
|
// key in the body is the gate, and the handler applies its own
|
||||||
|
// per-(site_code, IP) rate limit / lockout (defense-in-depth). Mounted with
|
||||||
|
// the other public API routes.
|
||||||
|
.route("/api/enroll", post(api::enroll::enroll))
|
||||||
// WebSocket endpoints
|
// WebSocket endpoints
|
||||||
.route("/ws/agent", get(relay::agent_ws_handler))
|
.route("/ws/agent", get(relay::agent_ws_handler))
|
||||||
.route("/ws/viewer", get(relay::viewer_ws_handler))
|
.route("/ws/viewer", get(relay::viewer_ws_handler))
|
||||||
@@ -498,6 +503,18 @@ async fn main() -> Result<()> {
|
|||||||
"/api/machines/:agent_id/keys/:key_id",
|
"/api/machines/:agent_id/keys/:key_id",
|
||||||
delete(api::machine_keys::revoke_key),
|
delete(api::machine_keys::revoke_key),
|
||||||
)
|
)
|
||||||
|
// Per-site enrollment key administration (SPEC-016, admin-only / JWT).
|
||||||
|
// Rotate regenerates the cek_ secret + fingerprint (old installers can no
|
||||||
|
// longer enroll new machines); GET returns the current non-secret
|
||||||
|
// fingerprint/version. Both gated by the AdminUser extractor.
|
||||||
|
.route(
|
||||||
|
"/api/sites/:id/enrollment-key",
|
||||||
|
get(api::sites::get_enrollment_key),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/sites/:id/enrollment-key/rotate",
|
||||||
|
post(api::sites::rotate_enrollment_key),
|
||||||
|
)
|
||||||
// REST API - Releases and Version
|
// REST API - Releases and Version
|
||||||
.route("/api/version", get(api::releases::get_version)) // No auth - for agent polling
|
.route("/api/version", get(api::releases::get_version)) // No auth - for agent polling
|
||||||
.route("/api/releases", get(api::releases::list_releases))
|
.route("/api/releases", get(api::releases::list_releases))
|
||||||
|
|||||||
@@ -77,6 +77,19 @@ pub const CODE_VALIDATE_MAX_FAILURES: u32 = 10;
|
|||||||
/// Support-code validate: how long an IP stays locked out once tripped.
|
/// Support-code validate: how long an IP stays locked out once tripped.
|
||||||
pub const CODE_VALIDATE_LOCKOUT: Duration = Duration::from_secs(15 * 60);
|
pub const CODE_VALIDATE_LOCKOUT: Duration = Duration::from_secs(15 * 60);
|
||||||
|
|
||||||
|
/// Enroll (`POST /api/enroll`, SPEC-016): window length.
|
||||||
|
pub const ENROLL_WINDOW: Duration = Duration::from_secs(60);
|
||||||
|
/// Enroll: max requests per window per `(site_code, IP)`. A zero-touch site push
|
||||||
|
/// drives N machines through enroll near-simultaneously, so this is generous
|
||||||
|
/// (mass-deploy friendly) while still capping a runaway loop. Defense-in-depth: the
|
||||||
|
/// 256-bit enrollment key is the load-bearing gate, not this cap.
|
||||||
|
pub const ENROLL_MAX_PER_WINDOW: u32 = 60;
|
||||||
|
/// Enroll: consecutive FAILED enroll attempts (bad/inactive key, unknown site) from
|
||||||
|
/// one `(site_code, IP)` that trip the lockout.
|
||||||
|
pub const ENROLL_MAX_FAILURES: u32 = 20;
|
||||||
|
/// Enroll: how long a `(site_code, IP)` stays locked out once tripped.
|
||||||
|
pub const ENROLL_LOCKOUT: Duration = Duration::from_secs(15 * 60);
|
||||||
|
|
||||||
/// Hard cap on the number of distinct IPs tracked by any single limiter map.
|
/// Hard cap on the number of distinct IPs tracked by any single limiter map.
|
||||||
/// Prevents an IP-rotating attacker from growing memory without bound. When the
|
/// Prevents an IP-rotating attacker from growing memory without bound. When the
|
||||||
/// cap is hit, the oldest-windowed entries are pruned. Generous for a real MSP
|
/// cap is hit, the oldest-windowed entries are pruned. Generous for a real MSP
|
||||||
@@ -260,6 +273,150 @@ impl FailureLockout {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Composite-key limiter for enrollment (keyed by (site_code, IP)) — SPEC-016
|
||||||
|
// ============================================================================
|
||||||
|
//
|
||||||
|
// The login / change-password / code-validate limiters above key purely on IP.
|
||||||
|
// SPEC-016 §3 wants the enroll defense keyed on `(site_code, source-IP)` so a noisy
|
||||||
|
// site push from one office IP cannot lock out a different site enrolling from the
|
||||||
|
// same egress IP. Rather than overload the IP-only maps, this is a small dedicated
|
||||||
|
// composite-key limiter + lockout. It is invoked from the enroll HANDLER (not a
|
||||||
|
// `from_fn` layer) because the `site_code` lives in the JSON body, which a
|
||||||
|
// pre-handler middleware cannot read without consuming it. Documented as
|
||||||
|
// defense-in-depth: the 256-bit enrollment key is the real gate.
|
||||||
|
|
||||||
|
/// Composite limiter key: the site_code and the real client IP.
|
||||||
|
type EnrollKey = (String, IpAddr);
|
||||||
|
|
||||||
|
/// Per-`(site_code, IP)` fixed-window limiter + consecutive-failure lockout.
|
||||||
|
///
|
||||||
|
/// Combines both protections behind one lock-guarded map so the enroll handler
|
||||||
|
/// makes a single allow/deny decision and reports success/failure into the same
|
||||||
|
/// structure. Self-pruning and size-capped, like the IP-only limiters.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct EnrollLimiter {
|
||||||
|
inner: std::sync::Arc<Mutex<HashMap<EnrollKey, EnrollEntry>>>,
|
||||||
|
max_per_window: u32,
|
||||||
|
window: Duration,
|
||||||
|
max_failures: u32,
|
||||||
|
cooldown: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
struct EnrollEntry {
|
||||||
|
window_started: Instant,
|
||||||
|
count: u32,
|
||||||
|
failures: u32,
|
||||||
|
locked_until: Option<Instant>,
|
||||||
|
last_seen: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EnrollLimiter {
|
||||||
|
pub fn new(
|
||||||
|
max_per_window: u32,
|
||||||
|
window: Duration,
|
||||||
|
max_failures: u32,
|
||||||
|
cooldown: Duration,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
inner: std::sync::Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
max_per_window,
|
||||||
|
window,
|
||||||
|
max_failures,
|
||||||
|
cooldown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entry_now() -> EnrollEntry {
|
||||||
|
let now = Instant::now();
|
||||||
|
EnrollEntry {
|
||||||
|
window_started: now,
|
||||||
|
count: 0,
|
||||||
|
failures: 0,
|
||||||
|
locked_until: None,
|
||||||
|
last_seen: now,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Admit one enroll attempt for `(site_code, ip)`. Returns `true` if allowed
|
||||||
|
/// (and counts it). Returns `false` if the key is currently locked out OR over
|
||||||
|
/// the per-window request cap. Clock injected for tests.
|
||||||
|
fn check_at(&self, site_code: &str, ip: IpAddr, now: Instant) -> bool {
|
||||||
|
let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
|
||||||
|
if map.len() >= MAX_TRACKED_IPS {
|
||||||
|
let window = self.window;
|
||||||
|
let cooldown = self.cooldown;
|
||||||
|
map.retain(|_, e| {
|
||||||
|
e.locked_until.map(|u| now < u).unwrap_or(false)
|
||||||
|
|| now.duration_since(e.window_started) < window
|
||||||
|
|| now.duration_since(e.last_seen) < cooldown
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = (site_code.to_string(), ip);
|
||||||
|
let e = map.entry(key).or_insert_with(Self::entry_now);
|
||||||
|
e.last_seen = now;
|
||||||
|
|
||||||
|
// Lockout takes precedence.
|
||||||
|
if let Some(until) = e.locked_until {
|
||||||
|
if now < until {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Cooldown elapsed — clear it for a fresh start.
|
||||||
|
e.locked_until = None;
|
||||||
|
e.failures = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Roll the fixed window forward if elapsed.
|
||||||
|
if now.duration_since(e.window_started) >= self.window {
|
||||||
|
e.window_started = now;
|
||||||
|
e.count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.count >= self.max_per_window {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
e.count += 1;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Admit one enroll attempt (real clock).
|
||||||
|
pub fn check(&self, site_code: &str, ip: IpAddr) -> bool {
|
||||||
|
self.check_at(site_code, ip, Instant::now())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_failure_at(&self, site_code: &str, ip: IpAddr, now: Instant) {
|
||||||
|
let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
let key = (site_code.to_string(), ip);
|
||||||
|
let e = map.entry(key).or_insert_with(Self::entry_now);
|
||||||
|
e.last_seen = now;
|
||||||
|
e.failures = e.failures.saturating_add(1);
|
||||||
|
if e.failures >= self.max_failures {
|
||||||
|
e.locked_until = Some(now + self.cooldown);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a FAILED enroll attempt (bad key / unknown site) for the key,
|
||||||
|
/// tripping the lockout once the streak reaches `max_failures`.
|
||||||
|
pub fn record_failure(&self, site_code: &str, ip: IpAddr) {
|
||||||
|
self.record_failure_at(site_code, ip, Instant::now());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a SUCCESSFUL enroll for the key, resetting its failure streak.
|
||||||
|
pub fn record_success(&self, site_code: &str, ip: IpAddr) {
|
||||||
|
let mut map = self.inner.lock().unwrap_or_else(|e| e.into_inner());
|
||||||
|
let key = (site_code.to_string(), ip);
|
||||||
|
if let Some(e) = map.get_mut(&key) {
|
||||||
|
e.failures = 0;
|
||||||
|
e.locked_until = None;
|
||||||
|
e.last_seen = Instant::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Shared rate-limit state (lives in AppState)
|
// Shared rate-limit state (lives in AppState)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -275,6 +432,9 @@ pub struct RateLimitState {
|
|||||||
pub code_validate: RateLimiter,
|
pub code_validate: RateLimiter,
|
||||||
/// Per-IP lockout on repeated failed code validations (brute-force defense).
|
/// Per-IP lockout on repeated failed code validations (brute-force defense).
|
||||||
pub code_validate_lockout: FailureLockout,
|
pub code_validate_lockout: FailureLockout,
|
||||||
|
/// `POST /api/enroll` (SPEC-016): per-`(site_code, IP)` request cap +
|
||||||
|
/// consecutive-failure lockout. Invoked from the enroll handler.
|
||||||
|
pub enroll: EnrollLimiter,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RateLimitState {
|
impl RateLimitState {
|
||||||
@@ -290,6 +450,12 @@ impl RateLimitState {
|
|||||||
CODE_VALIDATE_MAX_FAILURES,
|
CODE_VALIDATE_MAX_FAILURES,
|
||||||
CODE_VALIDATE_LOCKOUT,
|
CODE_VALIDATE_LOCKOUT,
|
||||||
),
|
),
|
||||||
|
enroll: EnrollLimiter::new(
|
||||||
|
ENROLL_MAX_PER_WINDOW,
|
||||||
|
ENROLL_WINDOW,
|
||||||
|
ENROLL_MAX_FAILURES,
|
||||||
|
ENROLL_LOCKOUT,
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -524,4 +690,51 @@ mod tests {
|
|||||||
assert!(lockout.is_locked_at(ip(8), t0));
|
assert!(lockout.is_locked_at(ip(8), t0));
|
||||||
assert!(!lockout.is_locked_at(ip(9), t0)); // ip9 unaffected
|
assert!(!lockout.is_locked_at(ip(9), t0)); // ip9 unaffected
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -- EnrollLimiter (composite (site_code, IP) key) --------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn enroll_window_allows_up_to_cap_then_blocks() {
|
||||||
|
let lim = EnrollLimiter::new(2, Duration::from_secs(60), 100, Duration::from_secs(600));
|
||||||
|
let t0 = Instant::now();
|
||||||
|
assert!(lim.check_at("SITE-A", ip(1), t0)); // 1
|
||||||
|
assert!(lim.check_at("SITE-A", ip(1), t0)); // 2
|
||||||
|
assert!(!lim.check_at("SITE-A", ip(1), t0)); // over cap
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn enroll_is_keyed_by_site_and_ip() {
|
||||||
|
let lim = EnrollLimiter::new(1, Duration::from_secs(60), 100, Duration::from_secs(600));
|
||||||
|
let t0 = Instant::now();
|
||||||
|
assert!(lim.check_at("SITE-A", ip(1), t0));
|
||||||
|
assert!(!lim.check_at("SITE-A", ip(1), t0)); // same key over cap
|
||||||
|
// Different site, same IP -> independent bucket.
|
||||||
|
assert!(lim.check_at("SITE-B", ip(1), t0));
|
||||||
|
// Same site, different IP -> independent bucket.
|
||||||
|
assert!(lim.check_at("SITE-A", ip(2), t0));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn enroll_lockout_trips_after_failures_and_blocks_check() {
|
||||||
|
let lim = EnrollLimiter::new(100, Duration::from_secs(60), 3, Duration::from_secs(600));
|
||||||
|
let t0 = Instant::now();
|
||||||
|
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||||
|
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||||
|
// Not yet tripped: a check still admits.
|
||||||
|
assert!(lim.check_at("SITE-A", ip(1), t0));
|
||||||
|
lim.record_failure_at("SITE-A", ip(1), t0); // 3rd -> trips
|
||||||
|
// Now locked out: check denies even though under the request cap.
|
||||||
|
assert!(!lim.check_at("SITE-A", ip(1), t0));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn enroll_success_resets_failure_streak() {
|
||||||
|
let lim = EnrollLimiter::new(100, Duration::from_secs(60), 2, Duration::from_secs(600));
|
||||||
|
let t0 = Instant::now();
|
||||||
|
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||||
|
lim.record_success("SITE-A", ip(1)); // reset
|
||||||
|
lim.record_failure_at("SITE-A", ip(1), t0);
|
||||||
|
// Only one failure since reset -> not locked.
|
||||||
|
assert!(lim.check_at("SITE-A", ip(1), t0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -527,3 +527,60 @@ Reference: SPEC-002 §5; `agent/src/encoder/raw.rs` (salvaged), `proto/guruconne
|
|||||||
- **Rate limiting:** hammer `/api/auth/login` and the code-validate route → confirm throttling/lockout.
|
- **Rate limiting:** hammer `/api/auth/login` and the code-validate route → confirm throttling/lockout.
|
||||||
- **Migrations:** fresh DB applies the v2 migrations cleanly; `_sqlx_migrations` consistent; `tenant_id`
|
- **Migrations:** fresh DB applies the v2 migrations cleanly; `_sqlx_migrations` consistent; `tenant_id`
|
||||||
populated with the default tenant.
|
populated with the default tenant.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task 9 [PROPOSED 2026-06-01 — provisioning model = TOFU auto-enroll, chosen by Mike]: `cak_` auto-enroll provisioning + shared-key retirement
|
||||||
|
|
||||||
|
> Context: Task 2 built the SERVER `cak_` machinery (mint/SHA-256 hash/verify in `auth/agent_keys.rs`,
|
||||||
|
> relay validation in `validate_agent_api_key`, admin issuance `POST /api/machines/:id/keys`). What's
|
||||||
|
> missing is how an AGENT obtains and uses a `cak_` — today agents still carry the deprecated shared
|
||||||
|
> `AGENT_API_KEY`, so `connect_agent_keys` is empty and the relay logs the DEPRECATED-shared-key warning
|
||||||
|
> for every agent. This task closes that with **trust-on-first-use auto-enroll** so the shared key can be
|
||||||
|
> retired (unblocks task list #5). NOTE: the agent already presents whatever is in its `api_key` slot and
|
||||||
|
> the relay auto-detects `cak_` vs shared — so a `cak_`-keyed agent needs **no change to its auth call**,
|
||||||
|
> only a way to *receive*, *persist*, and *prefer* a `cak_`.
|
||||||
|
|
||||||
|
**Flow (TOFU):**
|
||||||
|
1. **Bootstrap (first connect):** a fresh agent authenticates on `/ws/agent` with a bootstrap secret —
|
||||||
|
interim: the shared `AGENT_API_KEY` (embedded by the download endpoint); target: a single-use,
|
||||||
|
short-lived **enroll token** (more secure TOFU — see Security).
|
||||||
|
2. **Server issues on first connect:** when an agent authed via the bootstrap path (i.e. NOT already
|
||||||
|
`cak_`-keyed) connects and its machine has **no active (non-revoked) `cak_`**, the relay: resolves/creates
|
||||||
|
the machine row (existing `upsert_machine` on `machine_uid` — now functional after the 2026-06-01
|
||||||
|
ON CONFLICT fix), mints a `cak_` (`generate_agent_key` + `db::agent_keys::insert_agent_key` for that
|
||||||
|
`machine_id`), and sends the plaintext key to the agent **once** over a new server→agent message. Only
|
||||||
|
the hash is stored. **Idempotent:** never re-issue if an active key already exists for the machine.
|
||||||
|
3. **Agent receives + persists + prefers:** on `AgentKeyProvision`, the agent persists the `cak_` durably at
|
||||||
|
`%ProgramData%\GuruConnect\agent_key` (restricted ACL, same pattern as `machine_uid`). On startup it loads
|
||||||
|
the persisted `cak_` if present and uses it as its auth key, falling back to the embedded/bootstrap secret
|
||||||
|
only when no `cak_` is stored yet. After provisioning, every reconnect authenticates via `cak_` (no more
|
||||||
|
DEPRECATED-shared-key warning for that agent).
|
||||||
|
4. **Shared-key retirement (phased):** Phase A — shared key stays as the bootstrap so existing+new agents
|
||||||
|
self-enroll; monitor the relay WARN count → ~0. Phase B — once the fleet is `cak_`-keyed, restrict the
|
||||||
|
shared `AGENT_API_KEY` to enrollment-only or remove the env entirely (only `cak_` / enroll-token accepted).
|
||||||
|
This is the concrete completion of task-list #5.
|
||||||
|
|
||||||
|
**Protocol (4-artifact drift discipline):** add `AgentKeyProvision { string key = 1; }` (server→agent) to
|
||||||
|
`proto/guruconnect.proto` with a new reserved message ID; regenerate prost on both agent + server; the
|
||||||
|
hand-written `dashboard/src/lib/protobuf.ts` decoder does NOT need it (agent-plane only) but reserve the ID.
|
||||||
|
|
||||||
|
**Files:** `proto/guruconnect.proto` (new message); `server/src/relay/mod.rs` (issue+send on bootstrap connect
|
||||||
|
with no active key); `server/src/db/agent_keys.rs` (add `has_active_key(machine_id)` check; reuse insert);
|
||||||
|
`agent/src/transport/*` (handle inbound `AgentKeyProvision`); `agent/src/config.rs` + a small key-store module
|
||||||
|
(load/persist `cak_`, prefer over bootstrap).
|
||||||
|
|
||||||
|
**Security (TOFU):** the first connect trusts the bootstrap secret — a leaked shared key during the enroll
|
||||||
|
window could enroll a rogue agent; the secure target is a **single-use, short-lived enroll token** per
|
||||||
|
deployment instead of the shared key (shared-key bootstrap is interim convenience). The `cak_` is sent
|
||||||
|
plaintext once over the existing wss/TLS channel; only the hash is stored server-side; the agent stores it
|
||||||
|
locally with restricted ACLs. Revocation via the existing `DELETE /api/machines/:id/keys/:key_id` fails the
|
||||||
|
agent closed; on its next bootstrap connect it re-enrolls. The keyed-agent dedup (Task 3) keeps the
|
||||||
|
authenticated identity authoritative.
|
||||||
|
|
||||||
|
**Verification:** drop a current-build (signed 0.3.0+) agent configured with the shared-key bootstrap →
|
||||||
|
it connects, receives a `cak_`, persists it; restart → it authenticates via the `cak_` (relay shows NO
|
||||||
|
DEPRECATED-shared-key warning) and `connect_agent_keys` holds exactly one active key for the machine; issue
|
||||||
|
is idempotent across reconnects; revoke the key via the admin API → agent rejected, then re-enrolls on next
|
||||||
|
bootstrap connect. Reference: `auth/agent_keys.rs`, `api/machine_keys.rs`, `relay/mod.rs:266-309`
|
||||||
|
(`validate_agent_api_key`), `.claude/standards/security/credential-handling.md`.
|
||||||
|
|||||||
Reference in New Issue
Block a user