From 7b192e546f7b419189198ecd9096c57cc6a89b6a Mon Sep 17 00:00:00 2001 From: wizardchen Date: Tue, 26 May 2026 11:20:29 +0800 Subject: [PATCH] feat(builtin-models): reconcile YAML lifecycle with drift sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the builtin_models.yaml loader so the YAML file becomes a complete source of truth for the rows it owns. Builds on the previous commit's managed_by column. Lifecycle contract: - Every UPSERTed entry is tagged managed_by="yaml". - The DoUpdates list now includes deleted_at, so an entry that was soft-deleted (e.g. via UI/API) is automatically resurrected when it reappears in the file. Closes the "ghost row that exists but is invisible" failure mode. - After all UPSERTs, the loader soft-deletes rows where managed_by='yaml' AND id NOT IN (current YAML id set). Removing an entry from YAML is now the supported way to retire a built-in model — no manual SQL needed. - Rows tagged managed_by='' (UI/API/SQL-seeded built-ins) are invisible to the reconcile path and never touched. - When a YAML entry sets is_default=true, the loader first clears is_default on any other rows in the same (tenant_id, type) bucket, mirroring the invariant enforced by the API path (repository.UnsetDefaultModel). Failure handling stays defensive: - File missing / not a regular file / parse error: warn and skip; the drift sweep is NOT executed so a malformed file cannot wipe rows. - Per-entry UPSERT error: warn, drop the id from the keep-set so the sweep also leaves the existing row alone ("leave alone on failure"). Tests cover: file-missing, parse-error, basic upsert + defaults, idempotency, ${ENV} interpolation (set vs unset), drift sweep removing YAML rows, drift sweep ignoring manual rows, soft-delete resurrection, is_default cleanup across tenant+type, explicit empty list sweeping all yaml-managed rows, and a regression guard ensuring BeforeCreate does not overwrite YAML-supplied stable ids. Docs are rewritten so operators see "delete from YAML and restart" as the supported removal path; SQL is retained only for the legacy managed_by='' slice. --- docs/BUILTIN_MODELS.md | 27 +- internal/types/builtin_models_config.go | 109 ++++++- internal/types/builtin_models_config_test.go | 302 +++++++++++++++++++ 3 files changed, 417 insertions(+), 21 deletions(-) create mode 100644 internal/types/builtin_models_config_test.go diff --git a/docs/BUILTIN_MODELS.md b/docs/BUILTIN_MODELS.md index d5ddf29d..5d7e0e47 100644 --- a/docs/BUILTIN_MODELS.md +++ b/docs/BUILTIN_MODELS.md @@ -237,7 +237,18 @@ WHERE id = '模型ID'; ## 移除内置模型 -YAML 配置删除条目后**不会**自动清理 DB 里的对应行(避免误删 ops 手工维护的内置模型)。如需删除: +**从 YAML 删除条目即可。** 应用启动时会自动软删除 `models` 表中 YAML 不再声明的 YAML 托管行 —— 你不再需要手工跑 SQL。 + +工作原理:每条由 YAML 写入的行会被打上 `managed_by = 'yaml'` 标记。重启时 loader 走两步: + +1. UPSERT 当前 YAML 中的所有条目(按 `id` 幂等,包含把之前软删过的 `deleted_at` 重置为 NULL —— 也就是说从 YAML 拿掉再加回来等于"复活") +2. 软删除 `is_builtin = true AND managed_by = 'yaml' AND id NOT IN (当前 YAML 中的 id 集合)` 的行 + +**手工通过 SQL 插入的 builtin 行(`managed_by = ''`)永远不会被 loader 触碰**,与 YAML 完全隔离。 + +### 手工路径补充 + +如果你是走 SQL 路径管理的(`managed_by = ''`),删除仍然走老方法: ```sql -- 取消 builtin 标记,恢复为普通模型 @@ -247,11 +258,17 @@ UPDATE models SET is_builtin = false WHERE id = '模型ID'; DELETE FROM models WHERE id = '模型ID'; ``` +### 紧急关闭 YAML 接管 + +如果误改了 YAML 想立刻停用接管又不想清空文件,最快的方法是:把环境变量 `BUILTIN_MODELS_CONFIG` 指向一个不存在的路径并重启 —— loader 看到文件缺失会直接 no-op,**包括跳过 drift sweep**,已经写入的 YAML 托管行保留原状。 + ## 注意事项 1. **ID 命名规范**:建议使用 `builtin-{type}-{slug}` 的格式,例如 `builtin-openai-chat`、`builtin-rerank` 2. **租户ID**:内置模型可以属于任意租户,默认 `10000`(与 `tenants_id_seq` 起点一致) -3. **YAML 与 SQL 并存**:两种方式可以同时使用,YAML 不会覆盖通过 SQL 插入但未在 YAML 出现的 builtin 行 -4. **重启即生效**:修改 YAML 后 `docker compose restart app` 即可让新配置生效 -5. **加密**:API Key 在 `parameters` JSONB 中以加密形式存储(若 `SYSTEM_AES_KEY` 已配置),未配置时降级为明文兼容路径 -6. **安全性**:前端会自动隐藏内置模型的 API Key 和 Base URL,但数据库中的原始数据仍然存在,请妥善保管数据库访问权限 +3. **YAML 与 SQL 并存**:两种方式可以同时使用,loader 只动 `managed_by='yaml'` 的行;通过 SQL 插入的 builtin 行对 loader 完全不可见 +4. **`is_default` 单一保证**:YAML 中将某条 entry 标记 `is_default: true` 时,loader 会先把同 `(tenant_id, type)` 下的其它默认模型置为 `false`,避免 API 路径维护的"每类型一个默认模型"语义被破坏 +5. **重启即生效**:修改 YAML 后 `docker compose restart app` 即可让新配置生效 +6. **加密**:API Key 在 `parameters` JSONB 中以加密形式存储(若 `SYSTEM_AES_KEY` 已配置),未配置时降级为明文兼容路径 +7. **安全性**:前端会自动隐藏内置模型的 API Key 和 Base URL,但数据库中的原始数据仍然存在,请妥善保管数据库访问权限 +8. **解析错误自我保护**:YAML 解析失败时 loader 仅打 warning 并跳过 reconcile,**不会**执行 drift sweep,确保一个手抖的 YAML 改动不会大规模软删既有内置模型 diff --git a/internal/types/builtin_models_config.go b/internal/types/builtin_models_config.go index 1992a175..3c64f07d 100644 --- a/internal/types/builtin_models_config.go +++ b/internal/types/builtin_models_config.go @@ -12,6 +12,12 @@ import ( "gorm.io/gorm/clause" ) +// BuiltinModelManagedBy is the value written into `models.managed_by` for +// rows whose lifecycle is owned by config/builtin_models.yaml. Other rows +// (UI / API / hand-written SQL) keep the column at its default empty string +// and are never touched by the YAML loader. +const BuiltinModelManagedBy = "yaml" + // BuiltinModelEntry mirrors one entry in builtin_models.yaml. // Each entry becomes a row in the models table with is_builtin=true. type BuiltinModelEntry struct { @@ -50,14 +56,34 @@ func interpolateBuiltinModelEnv(s string) string { } // LoadBuiltinModelsConfig reads builtin_models.yaml (or the path pointed to by -// BUILTIN_MODELS_CONFIG) and UPSERTs each entry into the models table. +// BUILTIN_MODELS_CONFIG) and reconciles the YAML-managed slice of `models` +// with the file contents. // -// Behaviour: +// Lifecycle contract: +// - YAML loader only ever reads/writes rows tagged managed_by="yaml". Rows +// created via UI/API/SQL (managed_by="") are invisible to the loader and +// are never modified. +// - Each YAML entry is UPSERTed by id, with deleted_at force-reset to NULL +// (so a row that was soft-deleted previously is resurrected when it +// reappears in the file). +// - After all UPSERTs, any pre-existing yaml-managed row whose id is no +// longer in the file is soft-deleted. Removing an entry from YAML is +// therefore the supported way to retire a built-in model — no manual +// SQL needed. +// - If is_default=true is set on a YAML entry, the loader first clears +// is_default on any other rows in the same (tenant_id, type) bucket, +// mirroring the invariant enforced by the API path. Multiple entries +// with is_default=true for the same bucket result in last-one-wins with +// a warning. +// +// Failure handling: // - file not found / mount point is a directory / path unset: no-op -// - YAML parse error: prints a warning, returns nil (never aborts startup) -// - per-entry UPSERT error: prints a warning, continues with the next entry -// - never deletes entries that disappeared from YAML: operators may have -// seeded extras manually via SQL; removing them here would be surprising +// - YAML parse error: prints a warning and aborts the reconcile (the +// drift sweep is NOT run, so a malformed file cannot accidentally wipe +// YAML-managed rows) +// - per-entry UPSERT error: prints a warning, the entry is dropped from +// the "current YAML id set" so the sweep won't delete its existing +// row either (treats the failure as "leave alone") func LoadBuiltinModelsConfig(ctx context.Context, db *gorm.DB, configDir string) error { path := os.Getenv("BUILTIN_MODELS_CONFIG") if path == "" { @@ -83,16 +109,16 @@ func LoadBuiltinModelsConfig(ctx context.Context, db *gorm.DB, configDir string) var file builtinModelsFile if err := yaml.Unmarshal([]byte(expanded), &file); err != nil { - fmt.Printf("Warning: parse built-in models config %s failed: %v; skipping.\n", path, err) - return nil - } - - if len(file.BuiltinModels) == 0 { - fmt.Printf("Built-in models config %s contains no entries.\n", path) + fmt.Printf("Warning: parse built-in models config %s failed: %v; skipping reconcile.\n", path, err) return nil } + // yamlIDs collects every id we successfully upserted this run. Used by + // the drift sweep below to determine which previously-yaml-managed rows + // have disappeared and should be retired. + yamlIDs := make([]string, 0, len(file.BuiltinModels)) applied := 0 + for i := range file.BuiltinModels { e := &file.BuiltinModels[i] if e.ID == "" { @@ -100,11 +126,27 @@ func LoadBuiltinModelsConfig(ctx context.Context, db *gorm.DB, configDir string) continue } m := e.toModel() + + // Mirror the API path's "single default per (tenant_id, type)" + // invariant: clear other defaults before promoting this one. + // Excluded our own id so we don't churn against ourselves. + if m.IsDefault { + if err := db.WithContext(ctx). + Model(&Model{}). + Where("tenant_id = ? AND type = ? AND id <> ? AND is_default = ?", + m.TenantID, m.Type, m.ID, true). + Update("is_default", false).Error; err != nil { + fmt.Printf("Warning: clear existing default for tenant=%d type=%s failed: %v; continuing.\n", + m.TenantID, m.Type, err) + } + } + res := db.WithContext(ctx).Clauses(clause.OnConflict{ Columns: []clause.Column{{Name: "id"}}, DoUpdates: clause.AssignmentColumns([]string{ "tenant_id", "name", "type", "source", "description", - "parameters", "is_default", "status", "is_builtin", "updated_at", + "parameters", "is_default", "status", "is_builtin", + "managed_by", "deleted_at", "updated_at", }), }).Create(&m) if res.Error != nil { @@ -112,16 +154,50 @@ func LoadBuiltinModelsConfig(ctx context.Context, db *gorm.DB, configDir string) continue } applied++ + yamlIDs = append(yamlIDs, e.ID) fmt.Printf("Built-in model upserted: id=%s name=%s type=%s\n", e.ID, e.Name, e.Type) } - fmt.Printf("Built-in models config applied: %d entries from %s.\n", applied, path) + + // Drift sweep: retire YAML-managed rows that no longer appear in the + // file. Manual rows (managed_by='') are untouched. Uses GORM soft delete + // so the row remains recoverable. + pruned, sweepErr := pruneOrphanYAMLManagedModels(ctx, db, yamlIDs) + if sweepErr != nil { + fmt.Printf("Warning: drift sweep for YAML-managed models failed: %v; continuing.\n", sweepErr) + } + + fmt.Printf("Built-in models config applied: %d upserted, %d pruned from %s.\n", applied, pruned, path) return nil } +// pruneOrphanYAMLManagedModels soft-deletes rows where managed_by='yaml' +// and id is NOT in keepIDs. Used to retire entries that have been removed +// from builtin_models.yaml. Returns the number of rows affected. +// +// Safety: +// - Only rows tagged managed_by='yaml' are eligible — manual rows are +// never visible to this query. +// - Already-soft-deleted rows are skipped (GORM default scope), so this +// is idempotent across restarts. +// - When keepIDs is empty the sweep retires ALL yaml-managed rows. That +// matches the natural reading of "the YAML file declares zero entries +// ⇒ no yaml-managed models should exist". The caller has already +// short-circuited on parse failure, so we only reach this branch when +// the operator deliberately reduced the file to an empty list. +func pruneOrphanYAMLManagedModels(ctx context.Context, db *gorm.DB, keepIDs []string) (int64, error) { + q := db.WithContext(ctx). + Where("managed_by = ?", BuiltinModelManagedBy) + if len(keepIDs) > 0 { + q = q.Where("id NOT IN ?", keepIDs) + } + res := q.Delete(&Model{}) + return res.RowsAffected, res.Error +} + // toModel converts a YAML entry to a runtime Model with sensible defaults. // tenant_id defaults to 10000 (matches the seed value of tenants_id_seq); -// source defaults to "remote"; status defaults to "active". IsBuiltin is -// always forced to true regardless of YAML input. +// source defaults to "remote"; status defaults to "active". IsBuiltin and +// ManagedBy are always forced regardless of YAML input. func (e *BuiltinModelEntry) toModel() Model { tenantID := e.TenantID if tenantID == 0 { @@ -145,6 +221,7 @@ func (e *BuiltinModelEntry) toModel() Model { Parameters: e.Parameters, IsDefault: e.IsDefault, IsBuiltin: true, + ManagedBy: BuiltinModelManagedBy, Status: status, } } diff --git a/internal/types/builtin_models_config_test.go b/internal/types/builtin_models_config_test.go new file mode 100644 index 00000000..9854ed7d --- /dev/null +++ b/internal/types/builtin_models_config_test.go @@ -0,0 +1,302 @@ +package types + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gorm.io/driver/sqlite" + "gorm.io/gorm" +) + +// setupBuiltinModelsDB creates an in-memory SQLite DB with `models` migrated +// via GORM AutoMigrate. AutoMigrate honours the struct tags so the +// `managed_by` and soft-delete columns are present, matching what the real +// migrations produce. +func setupBuiltinModelsDB(t *testing.T) *gorm.DB { + t.Helper() + db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{}) + require.NoError(t, err) + require.NoError(t, db.AutoMigrate(&Model{})) + return db +} + +// writeYAML writes a builtin_models.yaml file inside a fresh temp dir and +// returns the dir (which is what LoadBuiltinModelsConfig expects as its +// configDir argument). +func writeYAML(t *testing.T, body string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "builtin_models.yaml") + require.NoError(t, os.WriteFile(path, []byte(body), 0o644)) + return dir +} + +// countModels returns (live, soft-deleted) counts for the given id, used to +// distinguish "row gone via UPSERT failure" from "row soft-deleted via sweep". +func countModels(t *testing.T, db *gorm.DB, id string) (live, deleted int64) { + t.Helper() + require.NoError(t, db.Model(&Model{}).Where("id = ?", id).Count(&live).Error) + require.NoError(t, + db.Unscoped().Model(&Model{}).Where("id = ? AND deleted_at IS NOT NULL", id).Count(&deleted).Error, + ) + return live, deleted +} + +func TestLoadBuiltinModelsConfig_FileMissing(t *testing.T) { + db := setupBuiltinModelsDB(t) + dir := t.TempDir() // empty — no builtin_models.yaml inside + + // Pre-seed a yaml-managed row to verify it is NOT touched when the + // config file is absent (file-missing path must skip the sweep). + require.NoError(t, db.Create(&Model{ + ID: "pre-existing-yaml", Name: "n", Type: ModelTypeKnowledgeQA, + Source: ModelSourceRemote, Status: ModelStatusActive, + IsBuiltin: true, ManagedBy: BuiltinModelManagedBy, + }).Error) + + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + live, deleted := countModels(t, db, "pre-existing-yaml") + assert.Equal(t, int64(1), live, "row must remain when YAML file is absent") + assert.Equal(t, int64(0), deleted) +} + +func TestLoadBuiltinModelsConfig_ParseError(t *testing.T) { + db := setupBuiltinModelsDB(t) + // Malformed YAML — should warn + return without sweeping. + dir := writeYAML(t, "builtin_models: [oops: : bad") + + require.NoError(t, db.Create(&Model{ + ID: "pre-existing-yaml", Name: "n", Type: ModelTypeKnowledgeQA, + Source: ModelSourceRemote, Status: ModelStatusActive, + IsBuiltin: true, ManagedBy: BuiltinModelManagedBy, + }).Error) + + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + live, deleted := countModels(t, db, "pre-existing-yaml") + assert.Equal(t, int64(1), live, "parse error must NOT trigger sweep") + assert.Equal(t, int64(0), deleted) +} + +func TestLoadBuiltinModelsConfig_BasicUpsert(t *testing.T) { + db := setupBuiltinModelsDB(t) + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA + is_default: true + parameters: + provider: openai + api_key: sk-123 +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + var m Model + require.NoError(t, db.Where("id = ?", "builtin-llm").First(&m).Error) + assert.Equal(t, "gpt-4o-mini", m.Name) + assert.Equal(t, ModelTypeKnowledgeQA, m.Type) + assert.Equal(t, ModelSourceRemote, m.Source, "default source must be remote") + assert.Equal(t, ModelStatusActive, m.Status, "default status must be active") + assert.Equal(t, uint64(10000), m.TenantID, "default tenant_id must be 10000") + assert.True(t, m.IsBuiltin, "IsBuiltin must be forced to true") + assert.True(t, m.IsDefault) + assert.Equal(t, BuiltinModelManagedBy, m.ManagedBy) + assert.Equal(t, "openai", m.Parameters.Provider) + assert.Equal(t, "sk-123", m.Parameters.APIKey) +} + +func TestLoadBuiltinModelsConfig_Idempotent(t *testing.T) { + db := setupBuiltinModelsDB(t) + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA + parameters: + provider: openai +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + var count int64 + require.NoError(t, db.Model(&Model{}).Where("id = ?", "builtin-llm").Count(&count).Error) + assert.Equal(t, int64(1), count, "second load must not duplicate") +} + +func TestLoadBuiltinModelsConfig_EnvInterpolation(t *testing.T) { + db := setupBuiltinModelsDB(t) + t.Setenv("BUILTIN_TEST_KEY", "sk-from-env") + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA + parameters: + provider: openai + api_key: ${BUILTIN_TEST_KEY} + base_url: ${BUILTIN_TEST_UNSET} +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + var m Model + require.NoError(t, db.Where("id = ?", "builtin-llm").First(&m).Error) + assert.Equal(t, "sk-from-env", m.Parameters.APIKey, "set env var must be substituted") + assert.Equal(t, "${BUILTIN_TEST_UNSET}", m.Parameters.BaseURL, + "unset env var must be kept as literal placeholder") +} + +func TestLoadBuiltinModelsConfig_DriftSweepRemovesYAMLManaged(t *testing.T) { + db := setupBuiltinModelsDB(t) + + // Round 1: YAML declares two entries. + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA + - id: builtin-rerank + name: bge + type: Rerank +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + // Round 2: rerank entry removed. + require.NoError(t, os.WriteFile( + filepath.Join(dir, "builtin_models.yaml"), + []byte(`builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA +`), 0o644, + )) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + live, deleted := countModels(t, db, "builtin-rerank") + assert.Equal(t, int64(0), live, "rerank row must be soft-deleted") + assert.Equal(t, int64(1), deleted, "rerank row must remain recoverable in raw table") + + live, _ = countModels(t, db, "builtin-llm") + assert.Equal(t, int64(1), live, "llm row must survive the sweep") +} + +func TestLoadBuiltinModelsConfig_DriftSweepIgnoresManual(t *testing.T) { + db := setupBuiltinModelsDB(t) + + // Seed a manual SQL-style builtin (managed_by=""). + require.NoError(t, db.Create(&Model{ + ID: "manual-builtin", Name: "manual", Type: ModelTypeKnowledgeQA, + Source: ModelSourceRemote, Status: ModelStatusActive, + IsBuiltin: true, ManagedBy: "", + }).Error) + + // YAML declares a different id only. Sweep must NOT touch manual row. + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + live, _ := countModels(t, db, "manual-builtin") + assert.Equal(t, int64(1), live, "manual SQL-seeded builtin must survive YAML sweep") +} + +func TestLoadBuiltinModelsConfig_ResurrectsSoftDeleted(t *testing.T) { + db := setupBuiltinModelsDB(t) + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + // Simulate operator soft-deleting the row via API/UI. + require.NoError(t, db.Where("id = ?", "builtin-llm").Delete(&Model{}).Error) + live, deleted := countModels(t, db, "builtin-llm") + require.Equal(t, int64(0), live) + require.Equal(t, int64(1), deleted) + + // Re-running the loader must resurrect the row (deleted_at -> NULL). + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + live, deleted = countModels(t, db, "builtin-llm") + assert.Equal(t, int64(1), live, "row must be resurrected by UPSERT") + assert.Equal(t, int64(0), deleted, "deleted_at must be cleared") +} + +func TestLoadBuiltinModelsConfig_ClearsExistingDefault(t *testing.T) { + db := setupBuiltinModelsDB(t) + + // Seed an existing default (manual, e.g. user picked it via UI). + require.NoError(t, db.Create(&Model{ + ID: "user-default", Name: "user", Type: ModelTypeKnowledgeQA, + TenantID: 10000, Source: ModelSourceRemote, Status: ModelStatusActive, + IsDefault: true, IsBuiltin: false, ManagedBy: "", + }).Error) + + dir := writeYAML(t, `builtin_models: + - id: builtin-llm + name: gpt-4o-mini + type: KnowledgeQA + is_default: true +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + var prev Model + require.NoError(t, db.Where("id = ?", "user-default").First(&prev).Error) + assert.False(t, prev.IsDefault, + "YAML is_default=true must clear other defaults in same (tenant,type)") + + var newDefault Model + require.NoError(t, db.Where("id = ?", "builtin-llm").First(&newDefault).Error) + assert.True(t, newDefault.IsDefault) +} + +func TestLoadBuiltinModelsConfig_EmptyListSweepsAllYAMLManaged(t *testing.T) { + db := setupBuiltinModelsDB(t) + + // Seed one yaml-managed and one manual builtin. + require.NoError(t, db.Create(&Model{ + ID: "yaml-old", Name: "y", Type: ModelTypeKnowledgeQA, + Source: ModelSourceRemote, Status: ModelStatusActive, + IsBuiltin: true, ManagedBy: BuiltinModelManagedBy, + }).Error) + require.NoError(t, db.Create(&Model{ + ID: "manual-builtin", Name: "m", Type: ModelTypeKnowledgeQA, + Source: ModelSourceRemote, Status: ModelStatusActive, + IsBuiltin: true, ManagedBy: "", + }).Error) + + // Explicit empty list. Loader treats this as "no YAML-managed models + // declared" and sweeps the entire yaml-managed slice. Manual rows are + // untouched. + dir := writeYAML(t, "builtin_models: []\n") + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + live, _ := countModels(t, db, "yaml-old") + assert.Equal(t, int64(0), live, "yaml-managed row must be swept") + + live, _ = countModels(t, db, "manual-builtin") + assert.Equal(t, int64(1), live, "manual row must survive empty-list sweep") +} + +func TestLoadBuiltinModelsConfig_PreservesEntryIDOverBeforeCreate(t *testing.T) { + // Regression guard: BeforeCreate now only generates a UUID when ID is + // empty. YAML always supplies a stable id; if BeforeCreate ever forgets + // the guard and overwrites it, the UPSERT key changes and idempotency + // breaks. Reload the file twice and assert the same row id survives. + db := setupBuiltinModelsDB(t) + dir := writeYAML(t, `builtin_models: + - id: stable-id-123 + name: n + type: KnowledgeQA +`) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + require.NoError(t, LoadBuiltinModelsConfig(context.Background(), db, dir)) + + var ids []string + require.NoError(t, db.Model(&Model{}).Pluck("id", &ids).Error) + assert.Equal(t, []string{"stable-id-123"}, ids, + "YAML-declared id must survive across reloads (no UUID regeneration)") +}