Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b9e4068c9 | |||
| 87f229509b | |||
| 40c7d860cc | |||
| 0059b21db6 | |||
| f950511e3e | |||
| 16017456aa |
@@ -27,6 +27,15 @@ on:
|
||||
# computes the next semver from conventional commits at dispatch time.
|
||||
# build-and-test.yml remains the automatic PR/push CI gate.
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
channel:
|
||||
description: 'Release channel (stable = full versioned release; beta = signed prerelease test build, no version bump/changelog)'
|
||||
required: true
|
||||
default: 'stable'
|
||||
type: choice
|
||||
options:
|
||||
- stable
|
||||
- beta
|
||||
|
||||
jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -36,8 +45,11 @@ jobs:
|
||||
name: Version + Changelog
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.bump.outputs.version }}
|
||||
released: ${{ steps.bump.outputs.released }}
|
||||
# Coalesce across the stable (bump) and beta (beta) paths: exactly one of them runs per
|
||||
# dispatch, so the first non-empty value wins. prerelease is 'true' only on the beta path.
|
||||
version: ${{ steps.bump.outputs.version || steps.beta.outputs.version }}
|
||||
released: ${{ steps.bump.outputs.released || steps.beta.outputs.released }}
|
||||
prerelease: ${{ steps.beta.outputs.prerelease || 'false' }}
|
||||
steps:
|
||||
- name: Checkout (full history + tags)
|
||||
uses: actions/checkout@v4
|
||||
@@ -59,7 +71,8 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Install git-cliff
|
||||
if: steps.guard.outputs.skip != 'true'
|
||||
# Stable-only: beta produces no changelog, so git-cliff is unnecessary on the beta path.
|
||||
if: steps.guard.outputs.skip != 'true' && github.event.inputs.channel == 'stable'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
CLIFF_VERSION="2.6.1"
|
||||
@@ -72,12 +85,16 @@ jobs:
|
||||
|
||||
- name: Determine next version and bump components
|
||||
id: bump
|
||||
if: steps.guard.outputs.skip != 'true'
|
||||
# Stable-only: the beta path (id: beta) handles versioning without a manifest bump/commit.
|
||||
if: steps.guard.outputs.skip != 'true' && github.event.inputs.channel == 'stable'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# ----- locate the last release tag (vX.Y.Z) -----
|
||||
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | head -n1 || true)"
|
||||
# Match ONLY strict final-release tags (vMAJOR.MINOR.PATCH). Beta tags look like
|
||||
# v0.3.0-beta.7; if one of those were picked up here it would corrupt the next stable
|
||||
# base version, so prerelease tags are explicitly excluded from this lookup.
|
||||
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | head -n1 || true)"
|
||||
if [ -z "${LAST_TAG}" ]; then
|
||||
echo "[INFO] No prior release tag found; baseline is current manifest version."
|
||||
BASE_VERSION="$(grep -m1 '^version' agent/Cargo.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)".*/\1/')"
|
||||
@@ -186,8 +203,39 @@ jobs:
|
||||
sed -i -E "0,/^version = \"[0-9]+\.[0-9]+\.[0-9]+\"/s//version = \"${NEXT}\"/" Cargo.toml || true
|
||||
fi
|
||||
|
||||
- name: Beta channel - tag prerelease build (no bump, no commit, no changelog)
|
||||
id: beta
|
||||
# Beta-only path. Reuses the IDENTICAL downstream build + sign + publish jobs, but does
|
||||
# NOT compute a semver bump, mutate any manifest, generate a changelog, or make a release
|
||||
# commit. It just tags the CURRENT HEAD with a unique prerelease version so the Windows
|
||||
# build job can check out `ref: v${VER}` exactly as it does for stable.
|
||||
if: github.event.inputs.channel == 'beta' && steps.guard.outputs.skip != 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Base version is read straight from the agent manifest — NOT bumped, NOT written back.
|
||||
BASE="$(grep -m1 '^version' agent/Cargo.toml | sed -E 's/.*"([0-9]+\.[0-9]+\.[0-9]+)".*/\1/')"
|
||||
# GITHUB_RUN_NUMBER guarantees a unique prerelease suffix without counting existing tags.
|
||||
VER="${BASE}-beta.${GITHUB_RUN_NUMBER}"
|
||||
echo "[INFO] Beta build version: ${VER} (base ${BASE}, run ${GITHUB_RUN_NUMBER})"
|
||||
|
||||
# Tag the current HEAD (no release commit). Push the tag so build-agent-windows can
|
||||
# check out ref: v${VER}.
|
||||
git config user.name "guruconnect-ci"
|
||||
git config user.email "ci@azcomputerguru.com"
|
||||
# Beta tags are disposable test markers; force makes re-running a failed beta dispatch idempotent (re-run reuses GITHUB_RUN_NUMBER, so the tag already exists).
|
||||
git tag -f "v${VER}"
|
||||
REMOTE="https://${{ secrets.CI_PUSH_TOKEN }}@git.azcomputerguru.com/${GITHUB_REPOSITORY}.git"
|
||||
git push --force "${REMOTE}" "v${VER}"
|
||||
echo "[OK] Pushed beta prerelease tag v${VER}"
|
||||
|
||||
echo "version=${VER}" >> "$GITHUB_OUTPUT"
|
||||
echo "released=true" >> "$GITHUB_OUTPUT"
|
||||
echo "prerelease=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Generate changelog (git-cliff)
|
||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true'
|
||||
# Stable-only: beta produces no changelog artifact.
|
||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true' && github.event.inputs.channel == 'stable'
|
||||
env:
|
||||
VERSION: ${{ steps.bump.outputs.version }}
|
||||
run: |
|
||||
@@ -232,7 +280,10 @@ jobs:
|
||||
|
||||
# Re-derive the set of changed components (same logic as the bump step). On the first
|
||||
# release (no prior tag) all components are considered changed.
|
||||
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | head -n1 || true)"
|
||||
# Match ONLY strict final-release tags (vMAJOR.MINOR.PATCH); exclude beta prerelease
|
||||
# tags (v0.3.0-beta.7) so the changelog diff range is taken against the last real
|
||||
# release, not an intervening beta build.
|
||||
LAST_TAG="$(git tag --list 'v*' --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | head -n1 || true)"
|
||||
if [ -z "${LAST_TAG}" ]; then
|
||||
CHANGED_FILES="$(git ls-files)"
|
||||
FIRST_RELEASE=true
|
||||
@@ -252,7 +303,8 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Commit release + create tag
|
||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true'
|
||||
# Stable-only: beta tags HEAD directly in the beta step and never makes a release commit.
|
||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true' && github.event.inputs.channel == 'stable'
|
||||
env:
|
||||
VERSION: ${{ steps.bump.outputs.version }}
|
||||
run: |
|
||||
@@ -276,7 +328,8 @@ jobs:
|
||||
echo "[OK] Pushed release commit and tag v${VERSION}"
|
||||
|
||||
- name: Upload changelog artifact
|
||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true'
|
||||
# Stable-only: there is no changelog on the beta path, so nothing to upload.
|
||||
if: steps.guard.outputs.skip != 'true' && steps.bump.outputs.released == 'true' && github.event.inputs.channel == 'stable'
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: changelog
|
||||
@@ -445,6 +498,9 @@ jobs:
|
||||
echo "sha256=${SUM}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Download changelog artifact
|
||||
# Stable-only: the beta path uploads no `changelog` artifact. The release-creation step
|
||||
# already guards on `[ -f changelog-artifact/CHANGELOG.md ]`, so skipping this is safe.
|
||||
if: github.event.inputs.channel == 'stable'
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changelog
|
||||
@@ -472,17 +528,26 @@ jobs:
|
||||
env:
|
||||
VERSION: ${{ needs.version.outputs.version }}
|
||||
SHA256: ${{ steps.sha.outputs.sha256 }}
|
||||
# PRERELEASE is 'true' on the beta path, 'false' on stable; drives the Gitea release flag.
|
||||
PRERELEASE: ${{ needs.version.outputs.prerelease }}
|
||||
GITEA_TOKEN: ${{ secrets.CI_PUSH_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
API_BASE="https://git.azcomputerguru.com/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||
TAG="v${VERSION}"
|
||||
echo "[INFO] Creating Gitea release ${TAG} on ${GITHUB_REPOSITORY}"
|
||||
echo "[INFO] Creating Gitea release ${TAG} on ${GITHUB_REPOSITORY} (prerelease=${PRERELEASE})"
|
||||
|
||||
BODY="$(printf 'GuruConnect %s\n\nSHA-256 (guruconnect.exe): %s\n\nSee CHANGELOG.md and /api/changelog for details.' "${TAG}" "${SHA256}")"
|
||||
# Beta builds get a clear "prerelease test build" note in the body; the -beta.N suffix
|
||||
# is already carried in TAG, so the release name "Release v..." needs no extra handling.
|
||||
if [ "${PRERELEASE}" = "true" ]; then
|
||||
BODY="$(printf 'GuruConnect %s (PRERELEASE / beta test build)\n\nSHA-256 (guruconnect.exe): %s\n\nSigned via Azure Trusted Signing. Not a stable release — no changelog/version bump.' "${TAG}" "${SHA256}")"
|
||||
else
|
||||
BODY="$(printf 'GuruConnect %s\n\nSHA-256 (guruconnect.exe): %s\n\nSee CHANGELOG.md and /api/changelog for details.' "${TAG}" "${SHA256}")"
|
||||
fi
|
||||
|
||||
# Build the JSON payload with python (handles escaping of the multi-line body safely).
|
||||
CREATE_PAYLOAD="$(TAG="$TAG" BODY="$BODY" python3 -c 'import json,os; print(json.dumps({"tag_name": os.environ["TAG"], "name": "Release " + os.environ["TAG"], "body": os.environ["BODY"], "draft": False, "prerelease": False}))')"
|
||||
# prerelease is derived from the PRERELEASE env var (beta -> true, stable -> false).
|
||||
CREATE_PAYLOAD="$(TAG="$TAG" BODY="$BODY" PRERELEASE="$PRERELEASE" python3 -c 'import json,os; print(json.dumps({"tag_name": os.environ["TAG"], "name": "Release " + os.environ["TAG"], "body": os.environ["BODY"], "draft": False, "prerelease": os.environ.get("PRERELEASE","false") == "true"}))')"
|
||||
|
||||
RELEASE_JSON="$(curl -fsS -X POST \
|
||||
"${API_BASE}/releases" \
|
||||
|
||||
@@ -16,11 +16,16 @@ stack. It ships independently of GuruRMM and integrates with it via a versioned
|
||||
> match, blacklist-on-WS, agent-plane rejects user JWTs via per-agent `cak_` keys). The feature specs below
|
||||
> (SPEC-003–009) are **work-items inside the later v2 phases** — see the mapping.
|
||||
>
|
||||
> **Remaining to formally exit Phase 1:** secure-session-core **Task 8** (end-to-end verification +
|
||||
> `/gc-audit --pass=security` re-audit + the manual CRITICAL checks) and Code-Review sign-off on Tasks 3–5
|
||||
> (implemented without a local toolchain at the time; since built + deployed). Live HW-H.264 validation is
|
||||
> also pending — raw+Zstd remains the shipping default. ~~Sprint 0 (relay-auth CRITICAL hotfix)~~ **not
|
||||
> needed — those fixes shipped in Tasks 2–3.**
|
||||
> **Phase 1 formally EXITED (2026-05-31).** secure-session-core **Task 8** is complete — end-to-end
|
||||
> functional verification (live CRITICAL boundary checks against the deployed binary: login-JWT→401,
|
||||
> wrong-session viewer token→403, JWT-as-agent-key→401) **plus the `/gc-audit --pass=security` re-audit:
|
||||
> PASS, 0 CRITICAL/HIGH/MEDIUM/LOW** ([report](../reports/2026-05-31-gc-audit.md)). Code-Review sign-off on
|
||||
> Tasks 3–5 landed earlier. On top of Phase 1, **SPEC-004 (Tasks 2/4/5 — machine_uid dedup, session
|
||||
> reaping, operator removal API+UI) is implemented, reviewed, deployed, and the 11 live ghost rows were
|
||||
> purged**; the agent is now **auto-versioned + Azure-Trusted-Signing-signed via `release.yml`** with
|
||||
> **v0.3.0 published** as the stable release. ~~Sprint 0 (relay-auth CRITICAL hotfix)~~ **not needed.**
|
||||
> Still pending (NOT a Phase-1 blocker): live HW-H.264 cross-GPU validation — **raw+Zstd remains the
|
||||
> shipping default** (`DEFAULT_PREFER_H264=false`) until H.264 is validated across GPUs.
|
||||
|
||||
### v2 phase mapping of current specs
|
||||
|
||||
@@ -43,8 +48,9 @@ stack. It ships independently of GuruRMM and integrates with it via a versioned
|
||||
|
||||
Bringing GC to parity with GuruRMM's release engineering. Full plan: [SPEC-001](specs/SPEC-001-operational-tooling-parity.md).
|
||||
|
||||
- [ ] **Code signing — Azure Trusted Signing in CI** — P1 — sign the Windows agent `.exe` via `jsign` (TRUSTEDSIGNING) in Gitea Actions, reusing the shared ACG cert profile. (SPEC-001 §2)
|
||||
- [ ] **Automatic versioning** — P1 — conventional-commit-driven version bump across agent/server/dashboard, embedded via `build.rs`. (SPEC-001 §3)
|
||||
- [x] **Code signing — Azure Trusted Signing in CI** — P1 — Windows agent `.exe` signed via `jsign` (TRUSTEDSIGNING) in `release.yml`, fail-closed (never publishes unsigned). Shipped with v0.3.0. (SPEC-001 §2)
|
||||
- [ ] **Signed beta/test release channel** — **P1 — NOW** — every binary we hand to a tester must be signed, but signing today only runs on a deliberate full `release.yml` dispatch; the automatic `build-and-test.yml` agent artifact is explicitly **unsigned**. Add a `channel: stable | beta` `workflow_dispatch` input to `release.yml`: `beta` signs the agent and publishes a prerelease-tagged Gitea release (e.g. `v0.4.0-beta.1`) **skipping the semver bump + changelog**; `stable` keeps the existing full path. Keeps signing secrets out of PR-triggered runs. (SPEC-001 §2)
|
||||
- [x] **Automatic versioning** — P1 — conventional-commit-driven version bump computed at dispatch in `release.yml`, embedded via `build.rs`. Shipped with v0.3.0. (SPEC-001 §3)
|
||||
- [ ] **Changelog generation & API** — P2 — `CHANGELOG.md` + per-version changelogs from conventional commits, served at `/api/changelog/...`. (SPEC-001 §4)
|
||||
- [ ] **Feature-request workflow** — P2 — `/gc-feature-request` skill producing `docs/specs/SPEC-NNN-*.md` and updating this roadmap. (SPEC-001 §1)
|
||||
- [ ] **Roadmap / ADR / spec tracking** — P1 — this file + `ARCHITECTURE_DECISIONS.md` + `docs/specs/`. (SPEC-001 §5) — *bootstrapped*
|
||||
|
||||
129
reports/2026-05-31-gc-audit.md
Normal file
129
reports/2026-05-31-gc-audit.md
Normal file
@@ -0,0 +1,129 @@
|
||||
# GuruConnect Audit Report — 2026-05-31
|
||||
|
||||
**Auditor:** Claude (claude-opus-4-8[1m])
|
||||
**Passes:** Security & Remote-Session Integrity (`--pass=security` only)
|
||||
**Previous audit:** 2026-05-30 (`reports/2026-05-30-gc-audit.md`)
|
||||
**Scope note:** v2 **Phase-1 EXIT gate** re-audit. Confirms the three relay CRITICALs stay closed and
|
||||
the prior net-new HIGH is fixed, and assesses the net-new SPEC-004 surface (Tasks 2/4/5 — machine_uid
|
||||
dedup, session reaping, operator removal) now committed + deployed. Includes **live** boundary tests
|
||||
against the running production binary, not just a code re-derivation.
|
||||
|
||||
**Code under audit:** working tree at tag **v0.3.0 / e967cce** = the binary deployed to prod
|
||||
172.16.3.30:3002 (deployed this session from 96f9c0a; e967cce adds only the version bump + changelog).
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
| Pass | Total | Critical | High | Medium | Low | Info |
|
||||
|------|-------|----------|------|--------|-----|------|
|
||||
| Security & Session | 4 | 0 | 0 | 0 | 0 | 4 |
|
||||
|
||||
**Phase-1 security EXIT gate: PASS.** The relay/server plane is clean. All three 2026-05-29 CRITICALs
|
||||
remain CLOSED (verified in code AND live against the deployed server). The prior net-new HIGH (agent
|
||||
auto-update TLS bypass) and the prior LOW (chat content logged at INFO) are both remediated. The
|
||||
net-new SPEC-004 surface (operator removal, machine_uid dedup gate, session reaper/supersede) audits
|
||||
clean with the keyed-identity security invariant intact end-to-end. No net-new findings.
|
||||
|
||||
**Requires action:** none.
|
||||
|
||||
---
|
||||
|
||||
## Live functional verification (deployed binary, 172.16.3.30:3002)
|
||||
|
||||
Forged tokens (HS256, real `JWT_SECRET`) exercised the WS auth boundaries directly. Each illegitimate
|
||||
access was REJECTED (4xx, never a 101 upgrade):
|
||||
|
||||
| Check | Result | Proves |
|
||||
|-------|--------|--------|
|
||||
| Login-shape JWT on `/ws/viewer` | **401** | Login token not accepted as a viewer token (`purpose=="viewer"` enforced) — CRITICAL #1 |
|
||||
| Validly-signed viewer token for session AAAA used on session BBBB | **403** | Session binding enforced — a correctly-signed token is refused for the wrong session — CRITICAL #1 |
|
||||
| Login JWT used as agent `api_key` on `/ws/agent` | **401** | Agent plane rejects JWTs (no JWT branch) — CRITICAL #3 |
|
||||
| Wrong-signature token on `/ws/viewer` | **401** | Signature validation holds (control) |
|
||||
|
||||
The session-bind case is the decisive one: a token that WOULD be accepted for its own session is
|
||||
rejected 403 for a different session, proving the binding rather than mere signature validation.
|
||||
|
||||
---
|
||||
|
||||
## The three relay CRITICALs — verdict
|
||||
|
||||
| CRITICAL | Verdict | Enforced at |
|
||||
|----------|---------|-------------|
|
||||
| #1 any-JWT-joins-any-session | **CLOSED** | mint authz `api/sessions.rs` (is_admin \|\| permission); viewer WS `relay/mod.rs:496` `validate_viewer_token` (sig+expiry+`purpose=="viewer"`); session-bind `relay/mod.rs:527-534` (`claim != requested → 403`) |
|
||||
| #2 viewer-WS blacklist | **CLOSED** (TTL-bounded residual unchanged) | `relay/mod.rs:509` `token_blacklist.is_revoked` before upgrade. Residual: logout revokes login JWT not minted viewer tokens (5-min TTL) — same tracked MEDIUM, no regression |
|
||||
| #3 JWT-accepted-as-agent-key | **CLOSED**, fails closed | `relay/mod.rs:417` `validate_agent_api_key` — no JWT branch; only `cak_` (`auth/agent_keys.rs`, SHA-256 vs `connect_agent_keys`, `revoked_at IS NULL`) or deprecated shared key (WARN). Unresolved machine → 503 (`:303`); client `agent_id` overridden by key identity (`:283`) |
|
||||
|
||||
Live results match these code paths exactly.
|
||||
|
||||
---
|
||||
|
||||
## Prior HIGH — FIXED
|
||||
|
||||
**Agent auto-update TLS bypass → MITM-RCE: CLOSED.** `agent/src/update.rs:21` `dev_insecure_tls()` is
|
||||
`cfg!(debug_assertions)` AND env-var gated, so a release build's `cfg!` compiles out and the agent
|
||||
ALWAYS verifies certs. Both `check_for_update` (`:64`) and `download_update` (`:130`) consume it; unit
|
||||
test `test_dev_insecure_tls_release_is_always_false` (`:362`) asserts the release invariant. No
|
||||
`danger_accept_invalid_certs(true)` reachable in production. A signed-manifest defense-in-depth TODO is
|
||||
filed at `install_update` (`:189`) (= tracked task #10, not an exit blocker).
|
||||
|
||||
---
|
||||
|
||||
## Pass 5: Security & Remote-Session Integrity — net-new SPEC-004 surface
|
||||
|
||||
### [INFO] Operator removal API (`server/src/api/removal.rs`) — clean, admin-gated
|
||||
Every removal handler takes the `AdminUser` extractor as its first argument (runs before any DB
|
||||
mutation): `remove_machine` (`:88`), `remove_session` (`:321`), `bulk_remove_machines` (`:471`).
|
||||
`AdminUser` (`auth/mod.rs:141`) validates JWT (signature + expiry + blacklist `:97`) then requires
|
||||
`is_admin()` else 403 (`:146`). Soft-deletes are parameterized + idempotent (`WHERE … AND deleted_at IS
|
||||
NULL`); bulk bounded (MAX_BATCH 500) with per-id UUID validation + isolated failures; audit
|
||||
(`db/events.rs:126`) records actor + target + trusted-proxy IP, best-effort (cannot be suppressed by
|
||||
attacker-controlled input). Removal is admin-role-gated globally (not per-tenant ACL) — same Phase-1
|
||||
posture as viewer-mint, per-tenant narrowing deferred to SPEC-002 Phase 4. Acceptable by context.
|
||||
|
||||
### [INFO] machine_uid dedup security gate — invariant holds
|
||||
Gate at `relay/mod.rs:352`: `effective_machine_uid = if is_keyed_agent { None } else { claimed }`. The
|
||||
suppressed value (not the raw claim) flows to `register_agent` and `upsert_machine`. Keyed (`cak_`)
|
||||
agents take the agent_id-keyed upsert branch and never write/touch a `ON CONFLICT (machine_uid)` row, so
|
||||
a valid key for machine X cannot repoint machine Y via a claimed uid. An un-keyed uid-spoof can only
|
||||
match a uid-bearing row — which the keyed connect path never creates; the only residual is a legacy
|
||||
pre-keying row, and the startup L1 fix (`main.rs:267-288` via `keyed_machine_ids`, fail-closed on query
|
||||
error) ensures keyed machines are never uid-indexed on restore.
|
||||
|
||||
### [INFO] Session reaper + same-machine supersede — clean, TOCTOU closed
|
||||
`reap_stale_persistent` (`:875`) and supersede (`:322`) select under a read lock then re-assert the full
|
||||
predicate under the write lock via `remove_session_if` (`:755`). Predicate requires
|
||||
`!is_online && is_persistent && viewers.is_empty()` (+ TTL / same-uid) — an online, viewer-attached, or
|
||||
support session is never reaped/superseded. Un-keyed uid-spoof blast radius = denial-of-persistence on
|
||||
an offline same-uid session at worst, never a hijack. Lock order matches `register_agent`; predicate is
|
||||
synchronous (no await under lock).
|
||||
|
||||
### [INFO] General posture — confirmed, no regressions
|
||||
Runtime sqlx parameterized everywhere (no `format!`-built SQL); migrations 008/009 idempotent. Frame
|
||||
caps: agent 4 MiB / viewer 64 KiB applied before upgrade. Input throttle retained. `/api/auth/login`
|
||||
rate-limited (`main.rs:397`). `JWT_SECRET` panics if <32 (`main.rs:143`); agent keys SHA-256; Argon2id
|
||||
passwords; no secret/token/code/PII logged. **Chat content no longer logged** (prior LOW fixed —
|
||||
`relay/mod.rs:829,1428` now log length only).
|
||||
|
||||
---
|
||||
|
||||
## Definitive answers
|
||||
|
||||
- **(a) Any non-admin removal path?** NO — all three removal handlers gate on `AdminUser` (JWT+blacklist+`is_admin`→403) before any DB mutation.
|
||||
- **(b) Any uid-spoof that repoints/hijacks another machine's row or session (not just denial)?** NO — keyed identity is authoritative and uid-suppressed across connect → upsert → reattach → startup restore. Worst case for an un-keyed spoof is denial-of-persistence on an offline same-uid session.
|
||||
- **(c) Any auth-plane bypass (agent↔viewer credential crossover)?** NO — viewer plane requires a `purpose=="viewer"` session-bound minted token; agent plane requires a `cak_`/shared key with no JWT branch. Confirmed in code and live.
|
||||
|
||||
---
|
||||
|
||||
## Verdict
|
||||
|
||||
**Phase-1 security EXIT gate: PASS.** Relay/server plane clean; prior HIGH + LOW remediated; SPEC-004
|
||||
surface sound with the keyed-identity invariant intact across the connect path, DB upsert, in-memory
|
||||
reattach, and startup restore. No new CRITICAL/HIGH/MEDIUM/LOW.
|
||||
|
||||
**Tracked, deferred-by-design (not exit blockers):**
|
||||
- Viewer-token logout revocation residual (MEDIUM, TTL-bounded) — `v2-secure-session-core/plan.md`.
|
||||
- Update-binary signature verification (defense-in-depth, task #10) — TODO at `update.rs:189`.
|
||||
|
||||
*Note: only `--pass=security` was run. API-surface, Rust-quality, TypeScript, protocol-integrity,
|
||||
docs-reconciliation, and CI/CD passes were not executed this run.*
|
||||
@@ -166,7 +166,7 @@ pub async fn upsert_machine(
|
||||
r#"
|
||||
INSERT INTO connect_machines (agent_id, hostname, is_persistent, status, last_seen, machine_uid)
|
||||
VALUES ($1, $2, $3, 'online', NOW(), $4)
|
||||
ON CONFLICT (machine_uid) DO UPDATE SET
|
||||
ON CONFLICT (machine_uid) WHERE machine_uid IS NOT NULL DO UPDATE SET
|
||||
agent_id = EXCLUDED.agent_id,
|
||||
hostname = EXCLUDED.hostname,
|
||||
status = 'online',
|
||||
|
||||
@@ -527,3 +527,60 @@ Reference: SPEC-002 §5; `agent/src/encoder/raw.rs` (salvaged), `proto/guruconne
|
||||
- **Rate limiting:** hammer `/api/auth/login` and the code-validate route → confirm throttling/lockout.
|
||||
- **Migrations:** fresh DB applies the v2 migrations cleanly; `_sqlx_migrations` consistent; `tenant_id`
|
||||
populated with the default tenant.
|
||||
|
||||
---
|
||||
|
||||
## Task 9 [PROPOSED 2026-06-01 — provisioning model = TOFU auto-enroll, chosen by Mike]: `cak_` auto-enroll provisioning + shared-key retirement
|
||||
|
||||
> Context: Task 2 built the SERVER `cak_` machinery (mint/SHA-256 hash/verify in `auth/agent_keys.rs`,
|
||||
> relay validation in `validate_agent_api_key`, admin issuance `POST /api/machines/:id/keys`). What's
|
||||
> missing is how an AGENT obtains and uses a `cak_` — today agents still carry the deprecated shared
|
||||
> `AGENT_API_KEY`, so `connect_agent_keys` is empty and the relay logs the DEPRECATED-shared-key warning
|
||||
> for every agent. This task closes that with **trust-on-first-use auto-enroll** so the shared key can be
|
||||
> retired (unblocks task list #5). NOTE: the agent already presents whatever is in its `api_key` slot and
|
||||
> the relay auto-detects `cak_` vs shared — so a `cak_`-keyed agent needs **no change to its auth call**,
|
||||
> only a way to *receive*, *persist*, and *prefer* a `cak_`.
|
||||
|
||||
**Flow (TOFU):**
|
||||
1. **Bootstrap (first connect):** a fresh agent authenticates on `/ws/agent` with a bootstrap secret —
|
||||
interim: the shared `AGENT_API_KEY` (embedded by the download endpoint); target: a single-use,
|
||||
short-lived **enroll token** (more secure TOFU — see Security).
|
||||
2. **Server issues on first connect:** when an agent authed via the bootstrap path (i.e. NOT already
|
||||
`cak_`-keyed) connects and its machine has **no active (non-revoked) `cak_`**, the relay: resolves/creates
|
||||
the machine row (existing `upsert_machine` on `machine_uid` — now functional after the 2026-06-01
|
||||
ON CONFLICT fix), mints a `cak_` (`generate_agent_key` + `db::agent_keys::insert_agent_key` for that
|
||||
`machine_id`), and sends the plaintext key to the agent **once** over a new server→agent message. Only
|
||||
the hash is stored. **Idempotent:** never re-issue if an active key already exists for the machine.
|
||||
3. **Agent receives + persists + prefers:** on `AgentKeyProvision`, the agent persists the `cak_` durably at
|
||||
`%ProgramData%\GuruConnect\agent_key` (restricted ACL, same pattern as `machine_uid`). On startup it loads
|
||||
the persisted `cak_` if present and uses it as its auth key, falling back to the embedded/bootstrap secret
|
||||
only when no `cak_` is stored yet. After provisioning, every reconnect authenticates via `cak_` (no more
|
||||
DEPRECATED-shared-key warning for that agent).
|
||||
4. **Shared-key retirement (phased):** Phase A — shared key stays as the bootstrap so existing+new agents
|
||||
self-enroll; monitor the relay WARN count → ~0. Phase B — once the fleet is `cak_`-keyed, restrict the
|
||||
shared `AGENT_API_KEY` to enrollment-only or remove the env entirely (only `cak_` / enroll-token accepted).
|
||||
This is the concrete completion of task-list #5.
|
||||
|
||||
**Protocol (4-artifact drift discipline):** add `AgentKeyProvision { string key = 1; }` (server→agent) to
|
||||
`proto/guruconnect.proto` with a new reserved message ID; regenerate prost on both agent + server; the
|
||||
hand-written `dashboard/src/lib/protobuf.ts` decoder does NOT need it (agent-plane only) but reserve the ID.
|
||||
|
||||
**Files:** `proto/guruconnect.proto` (new message); `server/src/relay/mod.rs` (issue+send on bootstrap connect
|
||||
with no active key); `server/src/db/agent_keys.rs` (add `has_active_key(machine_id)` check; reuse insert);
|
||||
`agent/src/transport/*` (handle inbound `AgentKeyProvision`); `agent/src/config.rs` + a small key-store module
|
||||
(load/persist `cak_`, prefer over bootstrap).
|
||||
|
||||
**Security (TOFU):** the first connect trusts the bootstrap secret — a leaked shared key during the enroll
|
||||
window could enroll a rogue agent; the secure target is a **single-use, short-lived enroll token** per
|
||||
deployment instead of the shared key (shared-key bootstrap is interim convenience). The `cak_` is sent
|
||||
plaintext once over the existing wss/TLS channel; only the hash is stored server-side; the agent stores it
|
||||
locally with restricted ACLs. Revocation via the existing `DELETE /api/machines/:id/keys/:key_id` fails the
|
||||
agent closed; on its next bootstrap connect it re-enrolls. The keyed-agent dedup (Task 3) keeps the
|
||||
authenticated identity authoritative.
|
||||
|
||||
**Verification:** drop a current-build (signed 0.3.0+) agent configured with the shared-key bootstrap →
|
||||
it connects, receives a `cak_`, persists it; restart → it authenticates via the `cak_` (relay shows NO
|
||||
DEPRECATED-shared-key warning) and `connect_agent_keys` holds exactly one active key for the machine; issue
|
||||
is idempotent across reconnects; revoke the key via the admin API → agent rejected, then re-enrolls on next
|
||||
bootstrap connect. Reference: `auth/agent_keys.rs`, `api/machine_keys.rs`, `relay/mod.rs:266-309`
|
||||
(`validate_agent_api_key`), `.claude/standards/security/credential-handling.md`.
|
||||
|
||||
Reference in New Issue
Block a user