mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
Refactored the pinning mechanism for knowledge bases to be user-specific, allowing any user with read access to pin knowledge bases independently. This change includes the introduction of a new `user_kb_pins` table to manage pin states, replacing the previous tenant-wide pinning system. Updated relevant API endpoints and service logic to support this new functionality, ensuring that the UI reflects the user's personal pin status. Additionally, adjusted the knowledge base listing to sort by user-specific pin status, enhancing the user experience by prioritizing pinned items. Refs: #1303
943 lines
34 KiB
Go
943 lines
34 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/Tencent/WeKnora/internal/application/service/retriever"
|
|
apperrors "github.com/Tencent/WeKnora/internal/errors"
|
|
"github.com/Tencent/WeKnora/internal/logger"
|
|
"github.com/Tencent/WeKnora/internal/tracing/langfuse"
|
|
"github.com/Tencent/WeKnora/internal/types"
|
|
"github.com/Tencent/WeKnora/internal/types/interfaces"
|
|
secutils "github.com/Tencent/WeKnora/internal/utils"
|
|
"github.com/google/uuid"
|
|
"github.com/hibiken/asynq"
|
|
)
|
|
|
|
// ErrInvalidTenantID represents an error for invalid tenant ID
|
|
var ErrInvalidTenantID = errors.New("invalid tenant ID")
|
|
|
|
// knowledgeBaseService implements the knowledge base service interface
|
|
type knowledgeBaseService struct {
|
|
repo interfaces.KnowledgeBaseRepository
|
|
kgRepo interfaces.KnowledgeRepository
|
|
chunkRepo interfaces.ChunkRepository
|
|
shareRepo interfaces.KBShareRepository
|
|
kbShareService interfaces.KBShareService
|
|
modelService interfaces.ModelService
|
|
retrieveEngine interfaces.RetrieveEngineRegistry
|
|
ownership retriever.TenantStoreOwnership
|
|
tenantRepo interfaces.TenantRepository
|
|
fileSvc interfaces.FileService
|
|
graphEngine interfaces.RetrieveGraphRepository
|
|
asynqClient interfaces.TaskEnqueuer
|
|
}
|
|
|
|
// NewKnowledgeBaseService creates a new knowledge base service
|
|
func NewKnowledgeBaseService(repo interfaces.KnowledgeBaseRepository,
|
|
kgRepo interfaces.KnowledgeRepository,
|
|
chunkRepo interfaces.ChunkRepository,
|
|
shareRepo interfaces.KBShareRepository,
|
|
kbShareService interfaces.KBShareService,
|
|
modelService interfaces.ModelService,
|
|
retrieveEngine interfaces.RetrieveEngineRegistry,
|
|
ownership retriever.TenantStoreOwnership,
|
|
tenantRepo interfaces.TenantRepository,
|
|
fileSvc interfaces.FileService,
|
|
graphEngine interfaces.RetrieveGraphRepository,
|
|
asynqClient interfaces.TaskEnqueuer,
|
|
) interfaces.KnowledgeBaseService {
|
|
return &knowledgeBaseService{
|
|
repo: repo,
|
|
kgRepo: kgRepo,
|
|
chunkRepo: chunkRepo,
|
|
shareRepo: shareRepo,
|
|
kbShareService: kbShareService,
|
|
modelService: modelService,
|
|
retrieveEngine: retrieveEngine,
|
|
ownership: ownership,
|
|
tenantRepo: tenantRepo,
|
|
fileSvc: fileSvc,
|
|
graphEngine: graphEngine,
|
|
asynqClient: asynqClient,
|
|
}
|
|
}
|
|
|
|
// GetRepository gets the knowledge base repository
|
|
// Parameters:
|
|
// - ctx: Context with authentication and request information
|
|
//
|
|
// Returns:
|
|
// - interfaces.KnowledgeBaseRepository: Knowledge base repository
|
|
func (s *knowledgeBaseService) GetRepository() interfaces.KnowledgeBaseRepository {
|
|
return s.repo
|
|
}
|
|
|
|
// CreateKnowledgeBase creates a new knowledge base.
|
|
//
|
|
// When VectorStoreID is set, the binding is validated against the caller's
|
|
// tenant scope and the engine registry before persisting. A nil or
|
|
// empty-string VectorStoreID is normalized to nil ("use the tenant's
|
|
// effective engines") to match the retrieve-engine factory's pre-condition.
|
|
func (s *knowledgeBaseService) CreateKnowledgeBase(ctx context.Context,
|
|
kb *types.KnowledgeBase,
|
|
) (*types.KnowledgeBase, error) {
|
|
// Generate UUID and set creation timestamps
|
|
if kb.ID == "" {
|
|
kb.ID = uuid.New().String()
|
|
}
|
|
kb.CreatedAt = time.Now()
|
|
kb.TenantID = types.MustTenantIDFromContext(ctx)
|
|
kb.UpdatedAt = time.Now()
|
|
// Record the creator so RBAC's RequireOwnershipOrRole can let
|
|
// Contributors edit their own KBs without granting them tenant-wide
|
|
// edit rights. The X-API-Key auth path attaches a synthetic
|
|
// `system-<tenantID>` user; we deliberately skip those so the KB
|
|
// stays tenant-owned (CreatorID == ""), which matches the original
|
|
// API-key semantics (any human Admin can manage it) and prevents a
|
|
// later "list KBs by creator" feature from surfacing rows nobody can
|
|
// re-attribute.
|
|
if uid, ok := types.UserIDFromContext(ctx); ok && !types.IsSyntheticUserID(uid) {
|
|
kb.CreatorID = uid
|
|
}
|
|
kb.EnsureDefaults()
|
|
|
|
// Fold empty-string vector_store_id into nil so this path and the
|
|
// retrieve-engine factory's pre-condition share a single representation.
|
|
wasEmpty := kb.VectorStoreID != nil && *kb.VectorStoreID == ""
|
|
kb.Normalize()
|
|
if wasEmpty {
|
|
logger.Debugf(ctx,
|
|
"[kb.create] empty vector_store_id normalized to nil for tenant=%d",
|
|
kb.TenantID)
|
|
}
|
|
|
|
if kb.HasVectorStore() {
|
|
if err := s.validateVectorStoreBinding(ctx, kb.TenantID, *kb.VectorStoreID); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
logger.Infof(ctx, "Creating knowledge base, ID: %s, tenant ID: %d, name: %s", kb.ID, kb.TenantID, kb.Name)
|
|
|
|
if err := s.repo.CreateKnowledgeBase(ctx, kb); err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": kb.ID,
|
|
"tenant_id": kb.TenantID,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
logger.Infof(ctx, "Knowledge base created successfully, ID: %s, name: %s", kb.ID, kb.Name)
|
|
return kb, nil
|
|
}
|
|
|
|
// validateVectorStoreBinding routes through retriever.VerifyBinding so the
|
|
// ownership + registry sentinel hierarchy stays the single source of truth.
|
|
// The service layer's responsibility is to:
|
|
//
|
|
// 1. fast-reject malformed UUIDs (cheap pre-flight that also avoids a DB
|
|
// round trip for type-confusion inputs like "' OR 1=1 --"),
|
|
// 2. translate retriever sentinels into user-facing AppErrors with
|
|
// generic messages and the typed error codes.
|
|
//
|
|
// UUID parse failures map to the same "vector store not found" message as
|
|
// cross-tenant attempts to avoid an enumeration oracle that distinguishes
|
|
// "malformed input" from "non-existent UUID".
|
|
func (s *knowledgeBaseService) validateVectorStoreBinding(
|
|
ctx context.Context, tenantID uint64, storeID string,
|
|
) error {
|
|
sanitized := secutils.SanitizeForLog(storeID)
|
|
|
|
if _, err := uuid.Parse(storeID); err != nil {
|
|
logger.WarnWithFields(ctx, logger.Fields{
|
|
"tenant_id": tenantID,
|
|
"store_id": sanitized,
|
|
"reason": "malformed vector_store_id",
|
|
}, "[kb.create] vector store id is not a valid UUID")
|
|
return apperrors.NewVectorStoreBindingInvalidError("vector store not found")
|
|
}
|
|
|
|
switch err := retriever.VerifyBinding(
|
|
ctx, s.retrieveEngine, s.ownership, tenantID, storeID,
|
|
); {
|
|
case err == nil:
|
|
return nil
|
|
case errors.Is(err, retriever.ErrVectorStoreForbidden):
|
|
logger.WarnWithFields(ctx, logger.Fields{
|
|
"tenant_id": tenantID,
|
|
"store_id": sanitized,
|
|
"reason": "cross-tenant or unknown store",
|
|
}, "[kb.create] vector store not owned by tenant")
|
|
return apperrors.NewVectorStoreBindingInvalidError("vector store not found")
|
|
case errors.Is(err, retriever.ErrVectorStoreNotFound):
|
|
logger.WarnWithFields(ctx, logger.Fields{
|
|
"tenant_id": tenantID,
|
|
"store_id": sanitized,
|
|
"reason": "store registered in DB but missing in registry",
|
|
}, "[kb.create] vector store currently unavailable")
|
|
return apperrors.NewVectorStoreUnavailableError(
|
|
"vector store is currently unavailable; check its connection configuration")
|
|
default:
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"tenant_id": tenantID,
|
|
"store_id": sanitized,
|
|
"reason": "binding verification failed",
|
|
})
|
|
return apperrors.NewInternalServerError("failed to verify vector store binding")
|
|
}
|
|
}
|
|
|
|
// GetKnowledgeBaseByID retrieves a knowledge base by its ID
|
|
func (s *knowledgeBaseService) GetKnowledgeBaseByID(ctx context.Context, id string) (*types.KnowledgeBase, error) {
|
|
if id == "" {
|
|
logger.Error(ctx, "Knowledge base ID is empty")
|
|
return nil, errors.New("knowledge base ID cannot be empty")
|
|
}
|
|
|
|
kb, err := s.repo.GetKnowledgeBaseByID(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
kb.EnsureDefaults()
|
|
return kb, nil
|
|
}
|
|
|
|
// GetKnowledgeBaseByIDOnly retrieves knowledge base by ID without tenant filter
|
|
// Used for cross-tenant shared KB access where permission is checked elsewhere
|
|
func (s *knowledgeBaseService) GetKnowledgeBaseByIDOnly(ctx context.Context, id string) (*types.KnowledgeBase, error) {
|
|
if id == "" {
|
|
logger.Error(ctx, "Knowledge base ID is empty")
|
|
return nil, errors.New("knowledge base ID cannot be empty")
|
|
}
|
|
|
|
kb, err := s.repo.GetKnowledgeBaseByID(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
kb.EnsureDefaults()
|
|
return kb, nil
|
|
}
|
|
|
|
// GetKnowledgeBasesByIDsOnly retrieves knowledge bases by IDs without tenant filter (batch).
|
|
func (s *knowledgeBaseService) GetKnowledgeBasesByIDsOnly(ctx context.Context, ids []string) ([]*types.KnowledgeBase, error) {
|
|
if len(ids) == 0 {
|
|
return nil, nil
|
|
}
|
|
kbs, err := s.repo.GetKnowledgeBaseByIDs(ctx, ids)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, kb := range kbs {
|
|
if kb != nil {
|
|
kb.EnsureDefaults()
|
|
}
|
|
}
|
|
return kbs, nil
|
|
}
|
|
|
|
// ListKnowledgeBases returns all knowledge bases for a tenant
|
|
func (s *knowledgeBaseService) ListKnowledgeBases(ctx context.Context) ([]*types.KnowledgeBase, error) {
|
|
tenantID := types.MustTenantIDFromContext(ctx)
|
|
|
|
kbs, err := s.repo.ListKnowledgeBasesByTenantID(ctx, tenantID)
|
|
if err != nil {
|
|
for _, kb := range kbs {
|
|
kb.EnsureDefaults()
|
|
}
|
|
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"tenant_id": tenantID,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
// Query knowledge count and chunk count for each knowledge base
|
|
for _, kb := range kbs {
|
|
kb.EnsureDefaults()
|
|
|
|
// Get knowledge count
|
|
switch kb.Type {
|
|
case types.KnowledgeBaseTypeDocument:
|
|
knowledgeCount, err := s.kgRepo.CountKnowledgeByKnowledgeBaseID(ctx, tenantID, kb.ID)
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to get knowledge count for knowledge base %s: %v", kb.ID, err)
|
|
} else {
|
|
kb.KnowledgeCount = knowledgeCount
|
|
}
|
|
case types.KnowledgeBaseTypeFAQ:
|
|
// Get chunk count
|
|
chunkCount, err := s.chunkRepo.CountChunksByKnowledgeBaseID(ctx, tenantID, kb.ID)
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to get chunk count for knowledge base %s: %v", kb.ID, err)
|
|
} else {
|
|
kb.ChunkCount = chunkCount
|
|
}
|
|
}
|
|
|
|
// Check if there is a processing import task
|
|
processingCount, err := s.kgRepo.CountKnowledgeByStatus(
|
|
ctx,
|
|
tenantID,
|
|
kb.ID,
|
|
[]string{"pending", "processing"},
|
|
)
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to check processing status for knowledge base %s: %v", kb.ID, err)
|
|
} else {
|
|
kb.IsProcessing = processingCount > 0
|
|
kb.ProcessingCount = processingCount
|
|
}
|
|
}
|
|
|
|
// Per-user pin stamping + ordering. The "main" list view is the
|
|
// only path that needs to honour the caller's personal pin set;
|
|
// agent/share/IM callers go through ListKnowledgeBasesByTenantID
|
|
// which also enriches but keys off the user in their own context.
|
|
if userID, ok := types.UserIDFromContext(ctx); ok && userID != "" {
|
|
s.applyUserKBPins(ctx, tenantID, userID, kbs)
|
|
}
|
|
return kbs, nil
|
|
}
|
|
|
|
// ListKnowledgeBasesByTenantID returns all knowledge bases for the given tenant (e.g. for shared agent context).
|
|
func (s *knowledgeBaseService) ListKnowledgeBasesByTenantID(ctx context.Context, tenantID uint64) ([]*types.KnowledgeBase, error) {
|
|
kbs, err := s.repo.ListKnowledgeBasesByTenantID(ctx, tenantID)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"tenant_id": tenantID,
|
|
})
|
|
return nil, err
|
|
}
|
|
for _, kb := range kbs {
|
|
kb.EnsureDefaults()
|
|
switch kb.Type {
|
|
case types.KnowledgeBaseTypeDocument:
|
|
if cnt, err := s.kgRepo.CountKnowledgeByKnowledgeBaseID(ctx, tenantID, kb.ID); err == nil {
|
|
kb.KnowledgeCount = cnt
|
|
}
|
|
case types.KnowledgeBaseTypeFAQ:
|
|
if cnt, err := s.chunkRepo.CountChunksByKnowledgeBaseID(ctx, tenantID, kb.ID); err == nil {
|
|
kb.ChunkCount = cnt
|
|
}
|
|
}
|
|
if processingCount, err := s.kgRepo.CountKnowledgeByStatus(ctx, tenantID, kb.ID, []string{"pending", "processing"}); err == nil {
|
|
kb.IsProcessing = processingCount > 0
|
|
kb.ProcessingCount = processingCount
|
|
}
|
|
}
|
|
|
|
// Stamp pin state from the caller's perspective. The tenantID
|
|
// argument may not match the caller's own tenant (this method is
|
|
// also used to list a shared-agent's source-tenant KBs); we still
|
|
// scope user_kb_pins by `tenantID` since a pin tied to one tenant
|
|
// shouldn't surface when browsing another tenant's KBs.
|
|
if userID, ok := types.UserIDFromContext(ctx); ok && userID != "" {
|
|
s.applyUserKBPins(ctx, tenantID, userID, kbs)
|
|
}
|
|
return kbs, nil
|
|
}
|
|
|
|
// FillKnowledgeBaseCounts fills KnowledgeCount, ChunkCount, IsProcessing, ProcessingCount for the given KB using kb.TenantID.
|
|
func (s *knowledgeBaseService) FillKnowledgeBaseCounts(ctx context.Context, kb *types.KnowledgeBase) error {
|
|
if kb == nil {
|
|
return nil
|
|
}
|
|
tenantID := kb.TenantID
|
|
kb.EnsureDefaults()
|
|
switch kb.Type {
|
|
case types.KnowledgeBaseTypeDocument:
|
|
if cnt, err := s.kgRepo.CountKnowledgeByKnowledgeBaseID(ctx, tenantID, kb.ID); err == nil {
|
|
kb.KnowledgeCount = cnt
|
|
}
|
|
case types.KnowledgeBaseTypeFAQ:
|
|
if cnt, err := s.chunkRepo.CountChunksByKnowledgeBaseID(ctx, tenantID, kb.ID); err == nil {
|
|
kb.ChunkCount = cnt
|
|
}
|
|
}
|
|
if processingCount, err := s.kgRepo.CountKnowledgeByStatus(ctx, tenantID, kb.ID, []string{"pending", "processing"}); err == nil {
|
|
kb.IsProcessing = processingCount > 0
|
|
kb.ProcessingCount = processingCount
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// UpdateKnowledgeBase updates a knowledge base's mutable properties.
|
|
//
|
|
// IMPORTANT — vector_store_id immutability contract:
|
|
// The vector_store_id binding is deliberately not accepted by this method.
|
|
// Two layers enforce immutability:
|
|
//
|
|
// 1. ORM layer: the GORM tag `<-:create` on KnowledgeBase.VectorStoreID
|
|
// makes every UPDATE path (Save / Updates / Select-Updates) a no-op for
|
|
// that column. Verified by repository/knowledgebase_sqlite_test.go.
|
|
// 2. Service layer: this method intentionally omits VectorStoreID from its
|
|
// parameter list, and the matching handler DTO UpdateKnowledgeBaseRequest
|
|
// omits the field as well. A reflection-based regression test
|
|
// (handler/knowledgebase_request_test.go) fails if either DTO field
|
|
// is added back, alerting future maintainers.
|
|
//
|
|
// Any future cross-store rebind workflow must use raw SQL through a
|
|
// dedicated repository method — the only sanctioned write path post-creation.
|
|
func (s *knowledgeBaseService) UpdateKnowledgeBase(ctx context.Context,
|
|
id string,
|
|
name string,
|
|
description string,
|
|
config *types.KnowledgeBaseConfig,
|
|
) (*types.KnowledgeBase, error) {
|
|
if id == "" {
|
|
logger.Error(ctx, "Knowledge base ID is empty")
|
|
return nil, errors.New("knowledge base ID cannot be empty")
|
|
}
|
|
|
|
logger.Infof(ctx, "Updating knowledge base, ID: %s, name: %s", id, name)
|
|
|
|
// Get existing knowledge base
|
|
kb, err := s.repo.GetKnowledgeBaseByID(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
// Update the knowledge base properties
|
|
kb.Name = name
|
|
kb.Description = description
|
|
if config != nil {
|
|
kb.ChunkingConfig = config.ChunkingConfig
|
|
kb.ImageProcessingConfig = config.ImageProcessingConfig
|
|
if config.FAQConfig != nil {
|
|
kb.FAQConfig = config.FAQConfig
|
|
}
|
|
if config.WikiConfig != nil {
|
|
kb.WikiConfig = config.WikiConfig
|
|
}
|
|
// Update indexing strategy — syncs to ExtractConfig for backward compat
|
|
if config.IndexingStrategy != nil {
|
|
if !config.IndexingStrategy.HasAnyIndexing() {
|
|
return nil, errors.New("at least one indexing strategy must be enabled")
|
|
}
|
|
kb.IndexingStrategy = *config.IndexingStrategy
|
|
// Ensure WikiConfig exists when wiki indexing is enabled so that
|
|
// wiki-specific tunables (synthesis model, granularity, …) have a home.
|
|
if kb.WikiConfig == nil && config.IndexingStrategy.WikiEnabled {
|
|
kb.WikiConfig = &types.WikiConfig{}
|
|
}
|
|
// Sync GraphEnabled → ExtractConfig
|
|
if kb.ExtractConfig != nil {
|
|
kb.ExtractConfig.Enabled = config.IndexingStrategy.GraphEnabled
|
|
} else if config.IndexingStrategy.GraphEnabled {
|
|
kb.ExtractConfig = &types.ExtractConfig{Enabled: true}
|
|
}
|
|
}
|
|
}
|
|
kb.UpdatedAt = time.Now()
|
|
kb.EnsureDefaults()
|
|
|
|
logger.Info(ctx, "Saving knowledge base update")
|
|
if err := s.repo.UpdateKnowledgeBase(ctx, kb); err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
logger.Infof(ctx, "Knowledge base updated successfully, ID: %s, name: %s", kb.ID, kb.Name)
|
|
return kb, nil
|
|
}
|
|
|
|
// TogglePinKnowledgeBase toggles whether the calling user has pinned
|
|
// this knowledge base. Pin state is per-(user, kb) as of migration
|
|
// 000050; previously this method flipped a tenant-wide column on the
|
|
// KB row which broke down under RBAC (only Admin/creator could pin,
|
|
// and the pin reordered the list for everyone in the tenant). The
|
|
// public signature is unchanged so the HTTP handler / CLI / SDK don't
|
|
// move.
|
|
//
|
|
// The KB still has to belong to the caller's tenant — the route is
|
|
// already gated behind KBAccessRead, but we re-check via
|
|
// GetKnowledgeBaseByIDAndTenant so a stale param survives a tenant
|
|
// switch cleanly.
|
|
func (s *knowledgeBaseService) TogglePinKnowledgeBase(
|
|
ctx context.Context, id string,
|
|
) (*types.KnowledgeBase, error) {
|
|
if id == "" {
|
|
return nil, errors.New("knowledge base ID cannot be empty")
|
|
}
|
|
tenantID := types.MustTenantIDFromContext(ctx)
|
|
userID, ok := types.UserIDFromContext(ctx)
|
|
if !ok || userID == "" {
|
|
// API-key callers without a user identity can't have a personal
|
|
// pin set. We surface this rather than silently flipping a
|
|
// shared-tenant flag like the old behaviour.
|
|
return nil, errors.New("pin requires an authenticated user")
|
|
}
|
|
|
|
// Look the KB up without a tenant filter: the route's KBAccessRead
|
|
// guard already validated that this caller can see this KB (own,
|
|
// org-shared, or agent-shared). Filtering by the caller's tenant
|
|
// here would 404 every legitimate pin against a shared KB whose
|
|
// owning tenant differs from the caller's active tenant.
|
|
kb, err := s.repo.GetKnowledgeBaseByID(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
"tenant_id": tenantID,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
// Read current pin state to decide direction. ListUserKBPinIDs is
|
|
// already optimised for the "many KBs at once" path; for a single-id
|
|
// check the round-trip is acceptable and avoids leaking a second
|
|
// repository method just for this.
|
|
pins, err := s.repo.ListUserKBPinIDs(ctx, tenantID, userID)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
"tenant_id": tenantID,
|
|
"user_id": userID,
|
|
})
|
|
return nil, err
|
|
}
|
|
_, currentlyPinned := pins[id]
|
|
|
|
pinnedAt, err := s.repo.SetUserKBPin(ctx, tenantID, userID, id, !currentlyPinned)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
"tenant_id": tenantID,
|
|
"user_id": userID,
|
|
"target_pinned": !currentlyPinned,
|
|
})
|
|
return nil, err
|
|
}
|
|
|
|
kb.EnsureDefaults()
|
|
kb.IsPinned = !currentlyPinned
|
|
kb.PinnedAt = pinnedAt
|
|
logger.Infof(ctx, "Knowledge base pin toggled, ID: %s, user: %s, is_pinned: %v",
|
|
id, userID, kb.IsPinned)
|
|
return kb, nil
|
|
}
|
|
|
|
// applyUserKBPins stamps IsPinned / PinnedAt onto each KB in the slice
|
|
// from the caller's perspective and sorts the slice so pinned rows
|
|
// float to the top (newest pin first, ties broken by created_at desc).
|
|
// Safe to call with an empty userID (no-op stamp; default sort by
|
|
// created_at preserved).
|
|
func (s *knowledgeBaseService) applyUserKBPins(
|
|
ctx context.Context, tenantID uint64, userID string, kbs []*types.KnowledgeBase,
|
|
) {
|
|
if len(kbs) == 0 || userID == "" {
|
|
return
|
|
}
|
|
pins, err := s.repo.ListUserKBPinIDs(ctx, tenantID, userID)
|
|
if err != nil {
|
|
// Pin enrichment is best-effort: a transient DB blip here
|
|
// should not break listing KBs. Log and bail without altering
|
|
// the slice — caller still gets a valid list, just unsorted by
|
|
// pin.
|
|
logger.Warnf(ctx, "applyUserKBPins: failed to load pins for tenant=%d user=%s: %v",
|
|
tenantID, userID, err)
|
|
return
|
|
}
|
|
if len(pins) == 0 {
|
|
return
|
|
}
|
|
for _, kb := range kbs {
|
|
if ts, ok := pins[kb.ID]; ok {
|
|
kb.IsPinned = true
|
|
t := ts
|
|
kb.PinnedAt = &t
|
|
}
|
|
}
|
|
sort.SliceStable(kbs, func(i, j int) bool {
|
|
a, b := kbs[i], kbs[j]
|
|
if a.IsPinned != b.IsPinned {
|
|
return a.IsPinned
|
|
}
|
|
if a.IsPinned && b.IsPinned {
|
|
at, bt := a.PinnedAt, b.PinnedAt
|
|
if at != nil && bt != nil && !at.Equal(*bt) {
|
|
return at.After(*bt)
|
|
}
|
|
}
|
|
return a.CreatedAt.After(b.CreatedAt)
|
|
})
|
|
}
|
|
|
|
// DeleteKnowledgeBase deletes a knowledge base by its ID
|
|
// This method marks the knowledge base as deleted and enqueues an async task
|
|
// to handle the heavy cleanup operations (embeddings, chunks, files, graph data)
|
|
func (s *knowledgeBaseService) DeleteKnowledgeBase(ctx context.Context, id string) error {
|
|
if id == "" {
|
|
logger.Error(ctx, "Knowledge base ID is empty")
|
|
return errors.New("knowledge base ID cannot be empty")
|
|
}
|
|
|
|
logger.Infof(ctx, "Deleting knowledge base, ID: %s", id)
|
|
|
|
// Get tenant ID from context
|
|
tenantID := types.MustTenantIDFromContext(ctx)
|
|
tenantInfo, _ := types.TenantInfoFromContext(ctx)
|
|
|
|
// Load the KB before soft-delete so we can snapshot its VectorStoreID
|
|
// into the async cleanup payload. GORM's soft-delete filter hides the
|
|
// row from subsequent reads, so this read must happen first.
|
|
kb, err := s.repo.GetKnowledgeBaseByID(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return err
|
|
}
|
|
var vectorStoreIDSnapshot *string
|
|
if kb != nil {
|
|
vectorStoreIDSnapshot = kb.VectorStoreID
|
|
}
|
|
|
|
// Step 1: Delete the knowledge base record first (mark as deleted)
|
|
logger.Infof(ctx, "Deleting knowledge base from database")
|
|
err = s.repo.DeleteKnowledgeBase(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Step 1b: Remove all organization shares for this KB so org settings no longer show them
|
|
if delErr := s.shareRepo.DeleteByKnowledgeBaseID(ctx, id); delErr != nil {
|
|
logger.Warnf(ctx, "Failed to delete KB shares for knowledge base %s: %v", id, delErr)
|
|
}
|
|
|
|
// Step 2: Enqueue async task for heavy cleanup operations
|
|
payload := types.KBDeletePayload{
|
|
TenantID: tenantID,
|
|
KnowledgeBaseID: id,
|
|
EffectiveEngines: tenantInfo.GetEffectiveEngines(),
|
|
VectorStoreID: vectorStoreIDSnapshot, // snapshot taken before soft-delete
|
|
}
|
|
langfuse.InjectTracing(ctx, &payload)
|
|
|
|
payloadBytes, err := json.Marshal(payload)
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to marshal KB delete payload: %v", err)
|
|
// Don't fail the request, the KB record is already deleted
|
|
return nil
|
|
}
|
|
|
|
task := asynq.NewTask(types.TypeKBDelete, payloadBytes, asynq.Queue("low"), asynq.MaxRetry(3))
|
|
info, err := s.asynqClient.Enqueue(task)
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to enqueue KB delete task: %v", err)
|
|
// Don't fail the request, the KB record is already deleted
|
|
return nil
|
|
}
|
|
|
|
logger.Infof(ctx, "KB delete task enqueued: %s, knowledge base ID: %s", info.ID, id)
|
|
logger.Infof(ctx, "Knowledge base deleted successfully, ID: %s", id)
|
|
return nil
|
|
}
|
|
|
|
// ProcessKBDelete handles async knowledge base deletion task
|
|
// This method performs heavy cleanup operations: deleting embeddings, chunks, files, and graph data
|
|
func (s *knowledgeBaseService) ProcessKBDelete(ctx context.Context, t *asynq.Task) error {
|
|
var payload types.KBDeletePayload
|
|
if err := json.Unmarshal(t.Payload(), &payload); err != nil {
|
|
logger.Errorf(ctx, "Failed to unmarshal KB delete payload: %v", err)
|
|
return err
|
|
}
|
|
|
|
tenantID := payload.TenantID
|
|
kbID := payload.KnowledgeBaseID
|
|
|
|
// Set tenant context for downstream services
|
|
ctx = context.WithValue(ctx, types.TenantIDContextKey, tenantID)
|
|
|
|
logger.Infof(ctx, "Processing KB delete task for knowledge base: %s", kbID)
|
|
|
|
// Step 1: Get all knowledge entries in this knowledge base
|
|
logger.Infof(ctx, "Fetching all knowledge entries in knowledge base, ID: %s", kbID)
|
|
knowledgeList, err := s.kgRepo.ListKnowledgeByKnowledgeBaseID(ctx, tenantID, kbID)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": kbID,
|
|
})
|
|
return err
|
|
}
|
|
logger.Infof(ctx, "Found %d knowledge entries to delete", len(knowledgeList))
|
|
|
|
// Step 2: Delete all knowledge entries and their resources
|
|
if len(knowledgeList) > 0 {
|
|
knowledgeIDs := make([]string, 0, len(knowledgeList))
|
|
for _, knowledge := range knowledgeList {
|
|
knowledgeIDs = append(knowledgeIDs, knowledge.ID)
|
|
}
|
|
|
|
logger.Infof(ctx, "Deleting all knowledge entries and their resources")
|
|
|
|
// Delete embeddings from vector store.
|
|
// Resolve the engine via the factory, using the VectorStoreID captured
|
|
// at enqueue time (may be nil → falls back to payload.EffectiveEngines).
|
|
// If the payload references a store no longer owned/registered
|
|
// (e.g. tampered queue entry or a store that was deleted while the
|
|
// task sat in the queue), the factory returns a sentinel and we
|
|
// SkipRetry to avoid burning retries on an unrecoverable situation.
|
|
logger.Infof(ctx, "Deleting embeddings from vector store")
|
|
retrieveEngine, err := retriever.CreateRetrieveEngineFromPayload(
|
|
ctx,
|
|
s.retrieveEngine,
|
|
s.ownership,
|
|
payload.TenantID,
|
|
payload.EffectiveEngines,
|
|
payload.VectorStoreID,
|
|
)
|
|
if errors.Is(err, retriever.ErrVectorStoreForbidden) ||
|
|
errors.Is(err, retriever.ErrVectorStoreNotFound) {
|
|
logger.Errorf(ctx, "KB delete task aborted: %v (tenant=%d, kb=%s)", err, payload.TenantID, payload.KnowledgeBaseID)
|
|
return asynq.SkipRetry
|
|
}
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to create retrieve engine: %v", err)
|
|
} else {
|
|
// Group knowledge by embedding model and type
|
|
type groupKey struct {
|
|
EmbeddingModelID string
|
|
Type string
|
|
}
|
|
embeddingGroups := make(map[groupKey][]string)
|
|
for _, knowledge := range knowledgeList {
|
|
key := groupKey{EmbeddingModelID: knowledge.EmbeddingModelID, Type: knowledge.Type}
|
|
embeddingGroups[key] = append(embeddingGroups[key], knowledge.ID)
|
|
}
|
|
|
|
for key, knowledgeGroup := range embeddingGroups {
|
|
embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, key.EmbeddingModelID)
|
|
if err != nil {
|
|
logger.Warnf(ctx, "Failed to get embedding model %s: %v", key.EmbeddingModelID, err)
|
|
continue
|
|
}
|
|
if err := retrieveEngine.DeleteByKnowledgeIDList(ctx, knowledgeGroup, embeddingModel.GetDimensions(), key.Type); err != nil {
|
|
logger.Warnf(ctx, "Failed to delete embeddings for model %s: %v", key.EmbeddingModelID, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Collect image URLs before chunks are deleted
|
|
chunkImageInfos, imgErr := s.chunkRepo.ListImageInfoByKnowledgeIDs(ctx, tenantID, knowledgeIDs)
|
|
if imgErr != nil {
|
|
logger.Warnf(ctx, "Failed to collect image URLs for KB delete: %v", imgErr)
|
|
}
|
|
var imageInfoStrs []string
|
|
for _, ci := range chunkImageInfos {
|
|
imageInfoStrs = append(imageInfoStrs, ci.ImageInfo)
|
|
}
|
|
imageURLs := collectImageURLs(ctx, imageInfoStrs)
|
|
|
|
// Delete all chunks
|
|
logger.Infof(ctx, "Deleting all chunks in knowledge base")
|
|
for _, knowledgeID := range knowledgeIDs {
|
|
if err := s.chunkRepo.DeleteChunksByKnowledgeID(ctx, tenantID, knowledgeID); err != nil {
|
|
logger.Warnf(ctx, "Failed to delete chunks for knowledge %s: %v", knowledgeID, err)
|
|
}
|
|
}
|
|
|
|
// Delete physical files, extracted images, and adjust storage
|
|
logger.Infof(ctx, "Deleting physical files and extracted images")
|
|
storageAdjust := int64(0)
|
|
for _, knowledge := range knowledgeList {
|
|
if knowledge.FilePath != "" {
|
|
if err := s.fileSvc.DeleteFile(ctx, knowledge.FilePath); err != nil {
|
|
logger.Warnf(ctx, "Failed to delete file %s: %v", knowledge.FilePath, err)
|
|
}
|
|
}
|
|
storageAdjust -= knowledge.StorageSize
|
|
}
|
|
deleteExtractedImages(ctx, s.fileSvc, imageURLs)
|
|
if storageAdjust != 0 {
|
|
if err := s.tenantRepo.AdjustStorageUsed(ctx, tenantID, storageAdjust); err != nil {
|
|
logger.Warnf(ctx, "Failed to adjust tenant storage: %v", err)
|
|
}
|
|
}
|
|
|
|
// Delete knowledge graph data
|
|
logger.Infof(ctx, "Deleting knowledge graph data")
|
|
namespaces := make([]types.NameSpace, 0, len(knowledgeList))
|
|
for _, knowledge := range knowledgeList {
|
|
namespaces = append(namespaces, types.NameSpace{
|
|
KnowledgeBase: knowledge.KnowledgeBaseID,
|
|
Knowledge: knowledge.ID,
|
|
})
|
|
}
|
|
if s.graphEngine != nil && len(namespaces) > 0 {
|
|
if err := s.graphEngine.DelGraph(ctx, namespaces); err != nil {
|
|
logger.Warnf(ctx, "Failed to delete knowledge graph: %v", err)
|
|
}
|
|
}
|
|
|
|
// Delete all knowledge entries from database
|
|
logger.Infof(ctx, "Deleting knowledge entries from database")
|
|
if err := s.kgRepo.DeleteKnowledgeList(ctx, tenantID, knowledgeIDs); err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": kbID,
|
|
})
|
|
return err
|
|
}
|
|
}
|
|
|
|
logger.Infof(ctx, "KB delete task completed successfully, knowledge base ID: %s", kbID)
|
|
return nil
|
|
}
|
|
|
|
// SetEmbeddingModel sets the embedding model for a knowledge base
|
|
func (s *knowledgeBaseService) SetEmbeddingModel(ctx context.Context, id string, modelID string) error {
|
|
if id == "" {
|
|
logger.Error(ctx, "Knowledge base ID is empty")
|
|
return errors.New("knowledge base ID cannot be empty")
|
|
}
|
|
|
|
if modelID == "" {
|
|
logger.Error(ctx, "Model ID is empty")
|
|
return errors.New("model ID cannot be empty")
|
|
}
|
|
|
|
logger.Infof(ctx, "Setting embedding model for knowledge base, knowledge base ID: %s, model ID: %s", id, modelID)
|
|
|
|
// Get the knowledge base
|
|
kb, err := s.repo.GetKnowledgeBaseByID(ctx, id)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Update the knowledge base's embedding model
|
|
kb.EmbeddingModelID = modelID
|
|
kb.UpdatedAt = time.Now()
|
|
|
|
logger.Info(ctx, "Saving knowledge base embedding model update")
|
|
err = s.repo.UpdateKnowledgeBase(ctx, kb)
|
|
if err != nil {
|
|
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
|
"knowledge_base_id": id,
|
|
"embedding_model_id": modelID,
|
|
})
|
|
return err
|
|
}
|
|
|
|
logger.Infof(
|
|
ctx,
|
|
"Knowledge base embedding model set successfully, knowledge base ID: %s, model ID: %s",
|
|
id,
|
|
modelID,
|
|
)
|
|
return nil
|
|
}
|
|
|
|
// CopyKnowledgeBase copies a knowledge base to a new knowledge base (shallow copy).
|
|
// Source and target must belong to the tenant in context; cross-tenant access is rejected.
|
|
//
|
|
// Defensive checks:
|
|
//
|
|
// - When dstKB != "" (clone into an existing target), the source's
|
|
// EmbeddingModelID and VectorStoreID must match the target's. Mismatched
|
|
// embedding models would silently mix incompatible vector spaces;
|
|
// mismatched vector stores would require copying physical vector data
|
|
// between stores, which is not yet supported.
|
|
// - When dstKB == "" (create a new target), VectorStoreID is copied from
|
|
// the source so the new KB shares the same physical vector index. GORM
|
|
// `<-:create` allows INSERT, so the new row is well-formed.
|
|
//
|
|
// The handler's CopyKnowledgeBase endpoint runs the same checks synchronously
|
|
// before enqueueing the async clone task, so the 400 errors here are
|
|
// defense-in-depth for the worker entry point.
|
|
func (s *knowledgeBaseService) CopyKnowledgeBase(ctx context.Context,
|
|
srcKB string, dstKB string,
|
|
) (*types.KnowledgeBase, *types.KnowledgeBase, error) {
|
|
tenantID := types.MustTenantIDFromContext(ctx)
|
|
// Load source KB with tenant scope to prevent cross-tenant cloning
|
|
sourceKB, err := s.repo.GetKnowledgeBaseByIDAndTenant(ctx, srcKB, tenantID)
|
|
if err != nil {
|
|
logger.Errorf(ctx, "Get source knowledge base failed: %v", err)
|
|
return nil, nil, err
|
|
}
|
|
sourceKB.EnsureDefaults()
|
|
var targetKB *types.KnowledgeBase
|
|
if dstKB != "" {
|
|
// Load target KB with tenant scope so we only clone into the caller's tenant
|
|
targetKB, err = s.repo.GetKnowledgeBaseByIDAndTenant(ctx, dstKB, tenantID)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
// Defense 1: embedding model must match. Mixing incompatible
|
|
// vector spaces would produce semantically broken search results.
|
|
if sourceKB.EmbeddingModelID != targetKB.EmbeddingModelID {
|
|
return nil, nil, apperrors.NewBadRequestError(
|
|
"source and target knowledge bases use different embedding models; " +
|
|
"clone into a target with the same embedding model")
|
|
}
|
|
|
|
// Defense 2: vector store binding must match. Cross-store cloning
|
|
// would require copying physical vector data between stores.
|
|
// (both nil → equal; both same UUID → equal; otherwise → rejected)
|
|
if !sourceKB.SharesStoreWith(targetKB) {
|
|
return nil, nil, apperrors.NewBadRequestError(
|
|
"source and target knowledge bases are bound to different vector stores; " +
|
|
"cross-store cloning is not yet supported")
|
|
}
|
|
} else {
|
|
var faqConfig *types.FAQConfig
|
|
if sourceKB.FAQConfig != nil {
|
|
cfg := *sourceKB.FAQConfig
|
|
faqConfig = &cfg
|
|
}
|
|
// Preserve VectorStoreID so the cloned KB lands on the same
|
|
// physical index. GORM `<-:create` permits the value at INSERT.
|
|
targetKB = &types.KnowledgeBase{
|
|
ID: uuid.New().String(),
|
|
Name: sourceKB.Name,
|
|
Type: sourceKB.Type,
|
|
Description: sourceKB.Description,
|
|
TenantID: tenantID,
|
|
ChunkingConfig: sourceKB.ChunkingConfig,
|
|
ImageProcessingConfig: sourceKB.ImageProcessingConfig,
|
|
EmbeddingModelID: sourceKB.EmbeddingModelID,
|
|
SummaryModelID: sourceKB.SummaryModelID,
|
|
VLMConfig: sourceKB.VLMConfig,
|
|
StorageProviderConfig: sourceKB.StorageProviderConfig,
|
|
StorageConfig: sourceKB.StorageConfig,
|
|
FAQConfig: faqConfig,
|
|
VectorStoreID: sourceKB.VectorStoreID,
|
|
}
|
|
// The clone is owned by the caller, not the original creator —
|
|
// otherwise a Contributor copying someone else's KB would still
|
|
// not be able to edit the result. Skip synthetic API-key users
|
|
// (see CreateKnowledgeBase for the same reasoning).
|
|
if uid, ok := types.UserIDFromContext(ctx); ok && !types.IsSyntheticUserID(uid) {
|
|
targetKB.CreatorID = uid
|
|
}
|
|
targetKB.EnsureDefaults()
|
|
if err := s.repo.CreateKnowledgeBase(ctx, targetKB); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
return sourceKB, targetKB, nil
|
|
}
|