mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
perf: batch tag reference counting
This commit is contained in:
@@ -161,6 +161,64 @@ func (r *knowledgeTagRepository) CountReferences(
|
||||
return
|
||||
}
|
||||
|
||||
// tagCountResult is used to scan the result of batch count queries
|
||||
type tagCountResult struct {
|
||||
TagID string `gorm:"column:tag_id"`
|
||||
Count int64 `gorm:"column:count"`
|
||||
}
|
||||
|
||||
// BatchCountReferences returns the number of knowledges and chunks for multiple tags in a single query.
|
||||
func (r *knowledgeTagRepository) BatchCountReferences(
|
||||
ctx context.Context,
|
||||
tenantID uint64,
|
||||
kbID string,
|
||||
tagIDs []string,
|
||||
) (map[string]types.TagReferenceCounts, error) {
|
||||
result := make(map[string]types.TagReferenceCounts)
|
||||
if len(tagIDs) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Initialize result with zero counts for all tagIDs
|
||||
for _, tagID := range tagIDs {
|
||||
result[tagID] = types.TagReferenceCounts{}
|
||||
}
|
||||
|
||||
// Count knowledge references in a single query
|
||||
var knowledgeCounts []tagCountResult
|
||||
if err := r.db.WithContext(ctx).
|
||||
Model(&types.Knowledge{}).
|
||||
Select("tag_id, COUNT(*) as count").
|
||||
Where("tenant_id = ? AND knowledge_base_id = ? AND tag_id IN (?)", tenantID, kbID, tagIDs).
|
||||
Group("tag_id").
|
||||
Find(&knowledgeCounts).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, kc := range knowledgeCounts {
|
||||
counts := result[kc.TagID]
|
||||
counts.KnowledgeCount = kc.Count
|
||||
result[kc.TagID] = counts
|
||||
}
|
||||
|
||||
// Count chunk references in a single query
|
||||
var chunkCounts []tagCountResult
|
||||
if err := r.db.WithContext(ctx).
|
||||
Model(&types.Chunk{}).
|
||||
Select("tag_id, COUNT(*) as count").
|
||||
Where("tenant_id = ? AND knowledge_base_id = ? AND tag_id IN (?)", tenantID, kbID, tagIDs).
|
||||
Group("tag_id").
|
||||
Find(&chunkCounts).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, cc := range chunkCounts {
|
||||
counts := result[cc.TagID]
|
||||
counts.ChunkCount = cc.Count
|
||||
result[cc.TagID] = counts
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// DeleteUnusedTags deletes tags that are not referenced by any knowledge or chunk.
|
||||
// Returns the number of deleted tags.
|
||||
func (r *knowledgeTagRepository) DeleteUnusedTags(ctx context.Context, tenantID uint64, kbID string) (int64, error) {
|
||||
|
||||
@@ -75,24 +75,37 @@ func (s *knowledgeTagService) ListTags(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results := make([]*types.KnowledgeTagWithStats, 0, len(tags))
|
||||
if len(tags) == 0 {
|
||||
return types.NewPageResult(total, page, []*types.KnowledgeTagWithStats{}), nil
|
||||
}
|
||||
|
||||
// Collect all tag IDs for batch query
|
||||
tagIDs := make([]string, 0, len(tags))
|
||||
for _, tag := range tags {
|
||||
if tag != nil {
|
||||
tagIDs = append(tagIDs, tag.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// Batch query all reference counts in 2 SQL queries instead of 2*N
|
||||
countsMap, err := s.repo.BatchCountReferences(ctx, tenantID, kbID, tagIDs)
|
||||
if err != nil {
|
||||
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
||||
"kb_id": kbID,
|
||||
})
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results := make([]*types.KnowledgeTagWithStats, 0, len(tags))
|
||||
for _, tag := range tags {
|
||||
if tag == nil {
|
||||
continue
|
||||
}
|
||||
kCount, cCount, err := s.repo.CountReferences(ctx, tenantID, kbID, tag.ID)
|
||||
if err != nil {
|
||||
logger.ErrorWithFields(ctx, err, map[string]interface{}{
|
||||
"kb_id": kbID,
|
||||
"tag_id": tag.ID,
|
||||
})
|
||||
return nil, err
|
||||
}
|
||||
counts := countsMap[tag.ID]
|
||||
results = append(results, &types.KnowledgeTagWithStats{
|
||||
KnowledgeTag: *tag,
|
||||
KnowledgeCount: kCount,
|
||||
ChunkCount: cCount,
|
||||
KnowledgeCount: counts.KnowledgeCount,
|
||||
ChunkCount: counts.ChunkCount,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -52,6 +52,14 @@ type KnowledgeTagRepository interface {
|
||||
kbID string,
|
||||
tagID string,
|
||||
) (knowledgeCount int64, chunkCount int64, err error)
|
||||
// BatchCountReferences returns number of knowledges and chunks for multiple tags in a single query.
|
||||
// Returns a map of tagID -> {knowledgeCount, chunkCount}
|
||||
BatchCountReferences(
|
||||
ctx context.Context,
|
||||
tenantID uint64,
|
||||
kbID string,
|
||||
tagIDs []string,
|
||||
) (map[string]types.TagReferenceCounts, error)
|
||||
// DeleteUnusedTags deletes tags that are not referenced by any knowledge or chunk.
|
||||
DeleteUnusedTags(ctx context.Context, tenantID uint64, kbID string) (int64, error)
|
||||
}
|
||||
|
||||
@@ -32,3 +32,9 @@ type KnowledgeTagWithStats struct {
|
||||
KnowledgeCount int64 `json:"knowledge_count"`
|
||||
ChunkCount int64 `json:"chunk_count"`
|
||||
}
|
||||
|
||||
// TagReferenceCounts holds the reference counts for a tag.
|
||||
type TagReferenceCounts struct {
|
||||
KnowledgeCount int64
|
||||
ChunkCount int64
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user