perf: batch tag reference counting

This commit is contained in:
wizardchen
2026-01-20 11:42:34 +08:00
committed by lyingbug
parent a6ba12f389
commit b1895f7443
4 changed files with 96 additions and 11 deletions

View File

@@ -161,6 +161,64 @@ func (r *knowledgeTagRepository) CountReferences(
return
}
// tagCountResult is used to scan the result of batch count queries
type tagCountResult struct {
TagID string `gorm:"column:tag_id"`
Count int64 `gorm:"column:count"`
}
// BatchCountReferences returns the number of knowledges and chunks for multiple tags in a single query.
func (r *knowledgeTagRepository) BatchCountReferences(
ctx context.Context,
tenantID uint64,
kbID string,
tagIDs []string,
) (map[string]types.TagReferenceCounts, error) {
result := make(map[string]types.TagReferenceCounts)
if len(tagIDs) == 0 {
return result, nil
}
// Initialize result with zero counts for all tagIDs
for _, tagID := range tagIDs {
result[tagID] = types.TagReferenceCounts{}
}
// Count knowledge references in a single query
var knowledgeCounts []tagCountResult
if err := r.db.WithContext(ctx).
Model(&types.Knowledge{}).
Select("tag_id, COUNT(*) as count").
Where("tenant_id = ? AND knowledge_base_id = ? AND tag_id IN (?)", tenantID, kbID, tagIDs).
Group("tag_id").
Find(&knowledgeCounts).Error; err != nil {
return nil, err
}
for _, kc := range knowledgeCounts {
counts := result[kc.TagID]
counts.KnowledgeCount = kc.Count
result[kc.TagID] = counts
}
// Count chunk references in a single query
var chunkCounts []tagCountResult
if err := r.db.WithContext(ctx).
Model(&types.Chunk{}).
Select("tag_id, COUNT(*) as count").
Where("tenant_id = ? AND knowledge_base_id = ? AND tag_id IN (?)", tenantID, kbID, tagIDs).
Group("tag_id").
Find(&chunkCounts).Error; err != nil {
return nil, err
}
for _, cc := range chunkCounts {
counts := result[cc.TagID]
counts.ChunkCount = cc.Count
result[cc.TagID] = counts
}
return result, nil
}
// DeleteUnusedTags deletes tags that are not referenced by any knowledge or chunk.
// Returns the number of deleted tags.
func (r *knowledgeTagRepository) DeleteUnusedTags(ctx context.Context, tenantID uint64, kbID string) (int64, error) {

View File

@@ -75,24 +75,37 @@ func (s *knowledgeTagService) ListTags(
return nil, err
}
results := make([]*types.KnowledgeTagWithStats, 0, len(tags))
if len(tags) == 0 {
return types.NewPageResult(total, page, []*types.KnowledgeTagWithStats{}), nil
}
// Collect all tag IDs for batch query
tagIDs := make([]string, 0, len(tags))
for _, tag := range tags {
if tag != nil {
tagIDs = append(tagIDs, tag.ID)
}
}
// Batch query all reference counts in 2 SQL queries instead of 2*N
countsMap, err := s.repo.BatchCountReferences(ctx, tenantID, kbID, tagIDs)
if err != nil {
logger.ErrorWithFields(ctx, err, map[string]interface{}{
"kb_id": kbID,
})
return nil, err
}
results := make([]*types.KnowledgeTagWithStats, 0, len(tags))
for _, tag := range tags {
if tag == nil {
continue
}
kCount, cCount, err := s.repo.CountReferences(ctx, tenantID, kbID, tag.ID)
if err != nil {
logger.ErrorWithFields(ctx, err, map[string]interface{}{
"kb_id": kbID,
"tag_id": tag.ID,
})
return nil, err
}
counts := countsMap[tag.ID]
results = append(results, &types.KnowledgeTagWithStats{
KnowledgeTag: *tag,
KnowledgeCount: kCount,
ChunkCount: cCount,
KnowledgeCount: counts.KnowledgeCount,
ChunkCount: counts.ChunkCount,
})
}

View File

@@ -52,6 +52,14 @@ type KnowledgeTagRepository interface {
kbID string,
tagID string,
) (knowledgeCount int64, chunkCount int64, err error)
// BatchCountReferences returns number of knowledges and chunks for multiple tags in a single query.
// Returns a map of tagID -> {knowledgeCount, chunkCount}
BatchCountReferences(
ctx context.Context,
tenantID uint64,
kbID string,
tagIDs []string,
) (map[string]types.TagReferenceCounts, error)
// DeleteUnusedTags deletes tags that are not referenced by any knowledge or chunk.
DeleteUnusedTags(ctx context.Context, tenantID uint64, kbID string) (int64, error)
}

View File

@@ -32,3 +32,9 @@ type KnowledgeTagWithStats struct {
KnowledgeCount int64 `json:"knowledge_count"`
ChunkCount int64 `json:"chunk_count"`
}
// TagReferenceCounts holds the reference counts for a tag.
type TagReferenceCounts struct {
KnowledgeCount int64
ChunkCount int64
}