fix(rbac): pin organization owner tenant in DB instead of deriving at runtime

Plan 3's isOwnerTenant resolved "is this tenant the owner tenant?"
by looking up the owner user's CURRENT tenant via userRepo. That
breaks silently the moment the owner user is moved to another
tenant: the formerly-protected owning tenant becomes removable
(can be kicked from OTM, role-downgraded), and the new tenant the
owner moved to is treated as untouchable even though it isn't even
in OTM.

Promote owner-tenant to a first-class column on `organizations`:

- New migration 000046 adds owner_tenant_id BIGINT NOT NULL with
  index. Backfill: (1) owner user's current tenant when known,
  (2) earliest Admin tenant in OTM as fallback for orphans, then
  (3) RAISE EXCEPTION on anything still NULL — refuse to silently
  ship a bogus value. SQLite init schema mirrors the column.

- CreateOrganization now sets OwnerTenantID at create time so the
  pinned value is correct for all new orgs.

- isOwnerTenant becomes a pure equality check on the persisted
  column — no userRepo lookup, no error tuple. Fail-closed on a
  zero column value (legacy data not yet backfilled or unit tests
  bypassing the migration) so the worst case is "membership table
  frozen for that org" rather than "owner tenant is removable",
  matching the prior fail-closed-on-error semantics.

- RemoveTenantMember / UpdateTenantMemberRole simplify to a single
  bool check.
This commit is contained in:
wizardchen
2026-05-15 22:53:02 +08:00
committed by lyingbug
parent 0b71e86f93
commit 40fdb978ff
5 changed files with 134 additions and 39 deletions

View File

@@ -101,6 +101,10 @@ func (s *organizationService) CreateOrganization(ctx context.Context, userID str
Description: req.Description,
Avatar: strings.TrimSpace(req.Avatar),
OwnerID: userID,
// Owning tenant is pinned at create time; never changes even if
// the owner user later moves to another tenant. See migration
// 000046 and the isOwnerTenant helper below.
OwnerTenantID: tenantID,
InviteCode: generateInviteCode(),
InviteCodeExpiresAt: resolveInviteExpiry(validityDays, now),
InviteCodeValidityDays: validityDays,
@@ -114,10 +118,10 @@ func (s *organizationService) CreateOrganization(ctx context.Context, userID str
return nil, err
}
// Enrol the creator's tenant as admin. Owner tenant is encoded by
// (Organization.OwnerID points to a user, the user's tenant is the
// owning tenant); the membership row gives that tenant the same
// role plumbing as everyone else for join/leave/role-change UX.
// Enrol the creator's tenant as admin. The membership row gives
// that tenant the same role plumbing as everyone else for
// join/leave/role-change UX, while the persisted owner_tenant_id
// on `organizations` is what gates the "cannot remove owner" check.
joinedAt := now
member := &types.OrganizationTenantMember{
ID: uuid.New().String(),
@@ -389,14 +393,10 @@ func (s *organizationService) RemoveTenantMember(ctx context.Context, orgID stri
if err != nil {
return err
}
// Owner-tenant resolution: derive owner tenant from Organization.OwnerID's
// own tenant. isOwnerTenant is fail-closed on any lookup error we
// also refuse, so a deleted owner user can't be used to bypass the
// "cannot remove owner tenant" invariant.
if isOwnerTenant, ownerErr := s.isOwnerTenant(ctx, org, memberTenantID); isOwnerTenant || ownerErr != nil {
if ownerErr != nil {
logger.Warnf(ctx, "RemoveTenantMember: owner-tenant resolution failed for org %s: %v; refusing", orgID, ownerErr)
}
// Owner-tenant is the persisted org.OwnerTenantID (Plan 3 / migration
// 000046). isOwnerTenant fails-closed on legacy zero values, so this
// is also the right gate for orgs created before the backfill.
if s.isOwnerTenant(ctx, org, memberTenantID) {
return ErrCannotRemoveOwner
}
@@ -435,10 +435,7 @@ func (s *organizationService) UpdateTenantMemberRole(ctx context.Context, orgID
if err != nil {
return err
}
if isOwnerTenant, ownerErr := s.isOwnerTenant(ctx, org, memberTenantID); isOwnerTenant || ownerErr != nil {
if ownerErr != nil {
logger.Warnf(ctx, "UpdateTenantMemberRole: owner-tenant resolution failed for org %s: %v; refusing", orgID, ownerErr)
}
if s.isOwnerTenant(ctx, org, memberTenantID) {
return ErrCannotChangeOwnerRole
}
_ = operatorUserID
@@ -582,33 +579,30 @@ func (s *organizationService) GetTenantRoleInOrg(ctx context.Context, orgID stri
return member.Role, nil
}
// isOwnerTenant returns true when the given tenant is the home tenant of
// org.OwnerID. The owner tenant is undeletable / unchangeable in the
// org membership table — that lets us preserve the "owner can never be
// orphaned" invariant without adding a separate column.
// isOwnerTenant returns true when the given tenant is the org's owning
// tenant, i.e. the tenant the creator was in when CreateOrganization
// ran. Plan 3 (#1303, migration 000046) persists this on the org row
// itself; we no longer derive it from owner.user.TenantID at request
// time, so the answer is stable even if the owner user later switches
// tenants or is soft-deleted.
//
// Fail-closed: when we can't resolve the owner user (lookup error, owner
// soft-deleted, userRepo unwired), we return (true, err) so callers
// refuse the destructive action rather than silently fall through.
// Callers MUST treat any non-nil error as "treat as owner-tenant".
func (s *organizationService) isOwnerTenant(ctx context.Context, org *types.Organization, tenantID uint64) (bool, error) {
// Fail-closed semantics: when org.OwnerTenantID is zero (e.g. legacy
// row that pre-dates migration 000046, or a unit test that bypassed
// the migration), every tenant is treated AS IF it were the owner —
// effectively freezing the membership table for that org until an
// operator backfills the column. This is the conservative choice
// because the alternative (treat as "no owner") would let any tenant
// be removed including the real one, with no recovery path. The
// production migration aborts on any unresolved orphan, so this branch
// should be unreachable outside of tests.
func (s *organizationService) isOwnerTenant(_ context.Context, org *types.Organization, tenantID uint64) bool {
if org == nil {
// No org to compare against — caller is operating on a request
// that already failed validation; nothing to protect here.
return false, nil
return false
}
if s.userRepo == nil {
// Owner identity unknowable; refuse the destructive op.
return true, errors.New("owner tenant cannot be resolved: userRepo unavailable")
if org.OwnerTenantID == 0 {
return true
}
owner, err := s.userRepo.GetUserByID(ctx, org.OwnerID)
if err != nil {
return true, err
}
if owner == nil {
return true, errors.New("owner user not found")
}
return owner.TenantID == tenantID, nil
return org.OwnerTenantID == tenantID
}
// generateInviteCode generates a random 16-character invite code

View File

@@ -69,6 +69,13 @@ type Organization struct {
Avatar string `json:"avatar" gorm:"type:varchar(512)"`
// User ID of the organization owner
OwnerID string `json:"owner_id" gorm:"type:varchar(36);not null;index"`
// OwnerTenantID is the tenant the owner belonged to when the
// organization was created. Plan 3 (#1303) treats this tenant as
// the org's "owning tenant": its membership row in
// organization_tenant_members is undeletable / unchangeable so
// the org can never be orphaned even if the owner user later
// switches tenants or is soft-deleted. See migration 000046.
OwnerTenantID uint64 `json:"owner_tenant_id" gorm:"not null;index"`
// Unique invitation code for joining the organization
InviteCode string `json:"invite_code" gorm:"type:varchar(32);uniqueIndex"`
// When the current invite code expires; nil means no expiry

View File

@@ -377,6 +377,8 @@ CREATE TABLE IF NOT EXISTS organizations (
name VARCHAR(255) NOT NULL,
description TEXT,
owner_id VARCHAR(36) NOT NULL,
-- Plan 3 (#1303): owning tenant pinned at create time; see migration 000046.
owner_tenant_id INTEGER NOT NULL DEFAULT 0,
invite_code VARCHAR(32),
require_approval BOOLEAN DEFAULT 0,
invite_code_expires_at DATETIME,
@@ -390,6 +392,7 @@ CREATE TABLE IF NOT EXISTS organizations (
);
CREATE INDEX IF NOT EXISTS idx_organizations_owner_id ON organizations(owner_id);
CREATE INDEX IF NOT EXISTS idx_organizations_owner_tenant ON organizations(owner_tenant_id);
CREATE INDEX IF NOT EXISTS idx_organizations_deleted_at ON organizations(deleted_at);
CREATE TABLE IF NOT EXISTS organization_tenant_members (

View File

@@ -0,0 +1,13 @@
-- Reverse migration for 000046_org_owner_tenant_id.
--
-- Drop the index first, then the column. The data backfilled in up.sql
-- is recoverable from `users.tenant_id` so we don't park it.
DO $$ BEGIN RAISE NOTICE '[Migration 000046] Dropping organizations.owner_tenant_id'; END $$;
DROP INDEX IF EXISTS idx_organizations_owner_tenant;
ALTER TABLE organizations
DROP COLUMN IF EXISTS owner_tenant_id;
DO $$ BEGIN RAISE NOTICE '[Migration 000046] organizations.owner_tenant_id removed'; END $$;

View File

@@ -0,0 +1,78 @@
-- Migration: 000046_org_owner_tenant_id
--
-- Plan 3 follow-up to #1303: pin the org's "owning tenant" in the
-- organizations table itself instead of recomputing it on every
-- permission check from the owner user's *current* tenant. The old
-- `isOwnerTenant(org, t) == (owner.user.TenantID == t)` rule breaks
-- silently when the owner user moves to a different tenant: the
-- formerly-protected owning tenant becomes removable, and the new
-- tenant the owner moved to (which probably isn't even in OTM) starts
-- being treated as untouchable. Storing owner_tenant_id at create time
-- and never changing it post-hoc gives us a single source of truth.
--
-- Backfill strategy:
-- 1. derive from the owner user's current tenant (works for the
-- vast majority of orgs);
-- 2. for orgs whose owner user is gone, fall back to the earliest
-- Admin tenant in OTM — deterministic across re-runs;
-- 3. anything still NULL means the org is genuinely orphaned (no
-- owner user, no admin tenant) — abort the migration with a loud
-- RAISE EXCEPTION so the operator deals with it manually.
-- We refuse to silently ship an unconstrained NOT NULL since it
-- would mean some org rows are unreachable.
DO $$ BEGIN RAISE NOTICE '[Migration 000046] Adding organizations.owner_tenant_id'; END $$;
ALTER TABLE organizations
ADD COLUMN IF NOT EXISTS owner_tenant_id BIGINT;
-- Pass 1: owner user still exists.
UPDATE organizations o
SET owner_tenant_id = u.tenant_id
FROM users u
WHERE o.owner_id = u.id
AND o.owner_tenant_id IS NULL;
-- Pass 2: orphan owner — pick the earliest Admin tenant in OTM.
UPDATE organizations o
SET owner_tenant_id = sub.tenant_id
FROM (
SELECT DISTINCT ON (otm.organization_id)
otm.organization_id,
otm.tenant_id
FROM organization_tenant_members otm
WHERE otm.role = 'admin'
ORDER BY otm.organization_id, otm.created_at ASC, otm.tenant_id ASC
) sub
WHERE sub.organization_id = o.id
AND o.owner_tenant_id IS NULL;
-- Surface anything still unresolved. We deliberately fail the migration
-- here instead of guessing — making the column NOT NULL with a bogus
-- backfill would corrupt permission checks for those orgs forever.
DO $$
DECLARE
orphan_count INT;
BEGIN
SELECT COUNT(*) INTO orphan_count
FROM organizations
WHERE owner_tenant_id IS NULL
AND deleted_at IS NULL;
IF orphan_count > 0 THEN
RAISE EXCEPTION
'[Migration 000046] % orphan organization(s) have no resolvable owner_tenant_id (owner user missing AND no admin tenant in OTM). Either soft-delete them or backfill manually before retrying. Inspect with: SELECT id, name, owner_id FROM organizations WHERE owner_tenant_id IS NULL AND deleted_at IS NULL;',
orphan_count;
END IF;
END $$;
-- Lock in the invariant.
ALTER TABLE organizations
ALTER COLUMN owner_tenant_id SET NOT NULL;
CREATE INDEX IF NOT EXISTS idx_organizations_owner_tenant
ON organizations (owner_tenant_id);
COMMENT ON COLUMN organizations.owner_tenant_id IS
'Plan 3 (#1303): owning tenant; cannot be removed/downgraded from OTM.';
DO $$ BEGIN RAISE NOTICE '[Migration 000046] organizations.owner_tenant_id ready'; END $$;