feat: agent coordination system (workflows, locks, components, messages)

Adds /api/coord/* endpoints for real-time cross-session coordination:
- coord_workflows: named units of work per project
- coord_work_items: tasks within workflows with dependency chains
- coord_session_locks: exclusive resource locks with auto-expiry (TTL)
- coord_component_states: live component state per project (upsert)
- coord_messages: cross-session messaging and broadcasts
- /api/coord/status: cross-project snapshot endpoint

Replaces PROJECT_STATE.md as the coordination layer for Claude sessions.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-12 08:25:33 -07:00
parent bd88398297
commit 63975284f4
24 changed files with 1565 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
"""Service layer for CoordComponentState."""
from typing import Optional
from fastapi import HTTPException, status
from sqlalchemy.dialects.mysql import insert
from sqlalchemy.orm import Session
from api.models.coord_component_state import CoordComponentState
from api.schemas.coord_component_state import CoordComponentStateUpsert
def get_component_states(
db: Session,
project_key: Optional[str] = None,
) -> list[CoordComponentState]:
"""Return all component states, optionally filtered by project."""
q = db.query(CoordComponentState)
if project_key:
q = q.filter(CoordComponentState.project_key == project_key)
return q.order_by(CoordComponentState.project_key, CoordComponentState.component).all()
def upsert_component_state(
db: Session,
project_key: str,
component: str,
data: CoordComponentStateUpsert,
) -> CoordComponentState:
"""Insert or update a component state using MariaDB ON DUPLICATE KEY UPDATE."""
try:
stmt = insert(CoordComponentState).values(
project_key=project_key,
component=component,
state=data.state,
version=data.version,
notes=data.notes,
updated_by=data.updated_by,
)
stmt = stmt.on_duplicate_key_update(
state=stmt.inserted.state,
version=stmt.inserted.version,
notes=stmt.inserted.notes,
updated_by=stmt.inserted.updated_by,
)
db.execute(stmt)
db.commit()
record = (
db.query(CoordComponentState)
.filter(
CoordComponentState.project_key == project_key,
CoordComponentState.component == component,
)
.first()
)
return record
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to upsert component state: {e}"
)

View File

@@ -0,0 +1,128 @@
"""Service layer for CoordSessionLock."""
from datetime import datetime, timedelta, timezone
from typing import Optional
from uuid import UUID
from fastapi import HTTPException, status
from sqlalchemy import and_, or_
from sqlalchemy.orm import Session
from api.models.coord_session_lock import CoordSessionLock
from api.schemas.coord_session_lock import CoordSessionLockCreate
def _active_filter(q):
"""Apply the 'lock is currently active' predicate to a query."""
now = datetime.now(timezone.utc).replace(tzinfo=None)
return q.filter(
CoordSessionLock.released_at.is_(None),
or_(
CoordSessionLock.expires_at.is_(None),
CoordSessionLock.expires_at > now,
),
)
def get_active_locks(
db: Session,
project_key: Optional[str] = None,
session_id: Optional[str] = None,
skip: int = 0,
limit: int = 100,
) -> tuple[list[CoordSessionLock], int]:
"""Return currently active locks with optional filters."""
q = db.query(CoordSessionLock)
if project_key:
q = q.filter(CoordSessionLock.project_key == project_key)
if session_id:
q = q.filter(CoordSessionLock.session_id == session_id)
q = _active_filter(q)
total = q.count()
locks = q.order_by(CoordSessionLock.acquired_at.desc()).offset(skip).limit(limit).all()
return locks, total
def check_resource_locked(
db: Session, project_key: str, resource: str
) -> Optional[CoordSessionLock]:
"""Return the active lock on a resource, or None if unlocked."""
q = db.query(CoordSessionLock).filter(
CoordSessionLock.project_key == project_key,
CoordSessionLock.resource == resource,
)
return _active_filter(q).first()
def claim_lock(db: Session, data: CoordSessionLockCreate) -> CoordSessionLock:
"""Claim a resource lock, computing expires_at from ttl_hours."""
expires_at: Optional[datetime] = None
if data.ttl_hours > 0:
expires_at = datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(hours=data.ttl_hours)
try:
lock = CoordSessionLock(
project_key=data.project_key,
session_id=data.session_id,
resource=data.resource,
description=data.description,
acquired_at=datetime.now(timezone.utc).replace(tzinfo=None),
expires_at=expires_at,
released_at=None,
)
db.add(lock)
db.commit()
db.refresh(lock)
return lock
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to claim lock: {e}"
)
def release_lock(db: Session, lock_id: UUID, session_id: str) -> CoordSessionLock:
"""Release a specific lock; only the owning session may release it."""
lock = db.query(CoordSessionLock).filter(CoordSessionLock.id == str(lock_id)).first()
if not lock:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Lock {lock_id} not found"
)
if lock.session_id != session_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only the session that claimed this lock may release it"
)
try:
lock.released_at = datetime.now(timezone.utc).replace(tzinfo=None)
db.commit()
db.refresh(lock)
return lock
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to release lock: {e}"
)
def release_all_session_locks(db: Session, session_id: str) -> dict:
"""Release all active locks held by a session (cleanup on session end)."""
now = datetime.now(timezone.utc).replace(tzinfo=None)
try:
q = db.query(CoordSessionLock).filter(
CoordSessionLock.session_id == session_id,
CoordSessionLock.released_at.is_(None),
)
count = q.count()
q.update({"released_at": now}, synchronize_session=False)
db.commit()
return {"message": f"Released {count} lock(s) for session '{session_id}'", "count": count}
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to release session locks: {e}"
)

View File

@@ -0,0 +1,96 @@
"""Service layer for CoordMessage."""
from datetime import datetime, timezone
from typing import Optional
from uuid import UUID
from fastapi import HTTPException, status
from sqlalchemy.orm import Session
from api.models.coord_message import CoordMessage
from api.schemas.coord_message import CoordMessageCreate
def get_messages(
db: Session,
to_session: Optional[str] = None,
unread_only: bool = False,
skip: int = 0,
limit: int = 100,
) -> tuple[list[CoordMessage], int]:
"""Return paginated messages with optional filters."""
q = db.query(CoordMessage)
if to_session:
q = q.filter(CoordMessage.to_session == to_session)
if unread_only:
q = q.filter(CoordMessage.read_at.is_(None))
total = q.count()
messages = q.order_by(CoordMessage.created_at.desc()).offset(skip).limit(limit).all()
return messages, total
def get_unread_count(db: Session, session_id: str) -> int:
"""Return the number of unread messages addressed to a session."""
return (
db.query(CoordMessage)
.filter(CoordMessage.to_session == session_id, CoordMessage.read_at.is_(None))
.count()
)
def get_message_by_id(db: Session, message_id: UUID) -> CoordMessage:
"""Return a single message or raise 404."""
msg = db.query(CoordMessage).filter(CoordMessage.id == str(message_id)).first()
if not msg:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Message {message_id} not found"
)
return msg
def send_message(db: Session, data: CoordMessageCreate) -> CoordMessage:
"""Persist a new message."""
try:
msg = CoordMessage(**data.model_dump())
db.add(msg)
db.commit()
db.refresh(msg)
return msg
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to send message: {e}"
)
def mark_read(db: Session, message_id: UUID) -> CoordMessage:
"""Mark a message as read at the current time."""
msg = get_message_by_id(db, message_id)
try:
msg.read_at = datetime.now(timezone.utc).replace(tzinfo=None)
db.commit()
db.refresh(msg)
return msg
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to mark message read: {e}"
)
def delete_message(db: Session, message_id: UUID) -> dict:
"""Delete a message by ID."""
msg = get_message_by_id(db, message_id)
try:
db.delete(msg)
db.commit()
return {"message": "Message deleted", "message_id": str(message_id)}
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete message: {e}"
)

View File

@@ -0,0 +1,100 @@
"""Service layer for CoordWorkItem."""
from typing import Optional
from uuid import UUID
from fastapi import HTTPException, status
from sqlalchemy.orm import Session
from api.models.coord_work_item import CoordWorkItem
from api.models.coord_workflow import CoordWorkflow
from api.schemas.coord_work_item import CoordWorkItemCreate, CoordWorkItemUpdate
def get_work_items(
db: Session,
workflow_id: Optional[str] = None,
project_key: Optional[str] = None,
status_filter: Optional[str] = None,
assigned_session: Optional[str] = None,
skip: int = 0,
limit: int = 100,
) -> tuple[list[CoordWorkItem], int]:
"""Return paginated work items with optional filters."""
q = db.query(CoordWorkItem)
if workflow_id:
q = q.filter(CoordWorkItem.workflow_id == workflow_id)
if project_key:
q = q.filter(CoordWorkItem.project_key == project_key)
if status_filter:
q = q.filter(CoordWorkItem.status == status_filter)
if assigned_session:
q = q.filter(CoordWorkItem.assigned_session == assigned_session)
total = q.count()
items = q.order_by(CoordWorkItem.priority.desc(), CoordWorkItem.created_at.asc()).offset(skip).limit(limit).all()
return items, total
def get_work_item_by_id(db: Session, item_id: UUID) -> CoordWorkItem:
"""Return a single work item or raise 404."""
item = db.query(CoordWorkItem).filter(CoordWorkItem.id == str(item_id)).first()
if not item:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Work item {item_id} not found"
)
return item
def create_work_item(db: Session, data: CoordWorkItemCreate) -> CoordWorkItem:
"""Create and persist a new work item, validating the parent workflow exists."""
workflow = db.query(CoordWorkflow).filter(CoordWorkflow.id == data.workflow_id).first()
if not workflow:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Workflow {data.workflow_id} not found"
)
try:
item = CoordWorkItem(**data.model_dump())
db.add(item)
db.commit()
db.refresh(item)
return item
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to create work item: {e}"
)
def update_work_item(db: Session, item_id: UUID, data: CoordWorkItemUpdate) -> CoordWorkItem:
"""Apply partial update to a work item."""
item = get_work_item_by_id(db, item_id)
try:
for field, value in data.model_dump(exclude_unset=True).items():
setattr(item, field, value)
db.commit()
db.refresh(item)
return item
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to update work item: {e}"
)
def delete_work_item(db: Session, item_id: UUID) -> dict:
"""Delete a work item by ID."""
item = get_work_item_by_id(db, item_id)
try:
db.delete(item)
db.commit()
return {"message": "Work item deleted", "item_id": str(item_id)}
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete work item: {e}"
)

View File

@@ -0,0 +1,87 @@
"""Service layer for CoordWorkflow."""
from typing import Optional
from uuid import UUID
from fastapi import HTTPException, status
from sqlalchemy.orm import Session
from api.models.coord_workflow import CoordWorkflow
from api.schemas.coord_workflow import CoordWorkflowCreate, CoordWorkflowUpdate
def get_workflows(
db: Session,
project_key: Optional[str] = None,
status_filter: Optional[str] = None,
skip: int = 0,
limit: int = 100,
) -> tuple[list[CoordWorkflow], int]:
"""Return paginated workflows with optional filters."""
q = db.query(CoordWorkflow)
if project_key:
q = q.filter(CoordWorkflow.project_key == project_key)
if status_filter:
q = q.filter(CoordWorkflow.status == status_filter)
total = q.count()
workflows = q.order_by(CoordWorkflow.created_at.desc()).offset(skip).limit(limit).all()
return workflows, total
def get_workflow_by_id(db: Session, workflow_id: UUID) -> CoordWorkflow:
"""Return a single workflow or raise 404."""
workflow = db.query(CoordWorkflow).filter(CoordWorkflow.id == str(workflow_id)).first()
if not workflow:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Workflow {workflow_id} not found"
)
return workflow
def create_workflow(db: Session, data: CoordWorkflowCreate) -> CoordWorkflow:
"""Create and persist a new workflow."""
try:
workflow = CoordWorkflow(**data.model_dump())
db.add(workflow)
db.commit()
db.refresh(workflow)
return workflow
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to create workflow: {e}"
)
def update_workflow(db: Session, workflow_id: UUID, data: CoordWorkflowUpdate) -> CoordWorkflow:
"""Apply partial update to a workflow."""
workflow = get_workflow_by_id(db, workflow_id)
try:
for field, value in data.model_dump(exclude_unset=True).items():
setattr(workflow, field, value)
db.commit()
db.refresh(workflow)
return workflow
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to update workflow: {e}"
)
def delete_workflow(db: Session, workflow_id: UUID) -> dict:
"""Delete a workflow by ID."""
workflow = get_workflow_by_id(db, workflow_id)
try:
db.delete(workflow)
db.commit()
return {"message": "Workflow deleted", "workflow_id": str(workflow_id)}
except Exception as e:
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete workflow: {e}"
)