feat(repository): add TagID and IsRecommended fields to vector embeddings

- Introduced `TagID` and `IsRecommended` fields across various repository implementations (Elasticsearch, Milvus, Postgres, Qdrant, SQLite) to enhance document categorization and recommendation capabilities.
- Updated the logic for handling `IsEnabled` to ensure it reflects the actual state from the source data.
- Adjusted the knowledge service to incorporate the new `IsRecommended` field, improving the indexing process for knowledge chunks.

These changes enhance the flexibility and functionality of document retrieval and indexing across the application.
This commit is contained in:
wizardchen
2026-03-02 21:40:28 +08:00
committed by lyingbug
parent 7b8a78aa2a
commit 931bd38138
6 changed files with 25 additions and 6 deletions

View File

@@ -15,8 +15,10 @@ type VectorEmbedding struct {
ChunkID string `json:"chunk_id" gorm:"column:chunk_id"` // Unique ID of the text chunk
KnowledgeID string `json:"knowledge_id" gorm:"column:knowledge_id"` // ID of the knowledge item
KnowledgeBaseID string `json:"knowledge_base_id" gorm:"column:knowledge_base_id"` // ID of the knowledge base
TagID string `json:"tag_id" gorm:"column:tag_id"` // Tag ID for categorization
Embedding []float32 `json:"embedding" gorm:"column:embedding;not null"` // Vector embedding of the content
IsEnabled bool `json:"is_enabled"` // Whether the chunk is enabled
IsRecommended bool `json:"is_recommended"` // Whether the chunk is recommended
}
// VectorEmbeddingWithScore extends VectorEmbedding with similarity score
@@ -34,7 +36,9 @@ func ToDBVectorEmbedding(embedding *types.IndexInfo, additionalParams map[string
ChunkID: embedding.ChunkID,
KnowledgeID: embedding.KnowledgeID,
KnowledgeBaseID: embedding.KnowledgeBaseID,
IsEnabled: true, // Default to enabled
TagID: embedding.TagID,
IsEnabled: embedding.IsEnabled,
IsRecommended: embedding.IsRecommended,
}
// Add embedding data if available in additionalParams
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") {
@@ -65,8 +69,10 @@ func FromDBVectorEmbeddingWithScore(id string,
ChunkID: embedding.ChunkID,
KnowledgeID: embedding.KnowledgeID,
KnowledgeBaseID: embedding.KnowledgeBaseID,
TagID: embedding.TagID,
Content: embedding.Content,
Score: embedding.Score,
MatchType: matchType,
IsEnabled: embedding.IsEnabled,
}
}

View File

@@ -592,6 +592,11 @@ func (m *milvusRepository) getBaseFilterForQuery(params types.RetrieveParams) (s
Value: params.ExcludeChunkIDs,
})
}
filters = append(filters, &universalFilterCondition{
Field: fieldIsEnabled,
Operator: operatorEqual,
Value: true,
})
if len(filters) == 0 {
return "", nil, nil
}
@@ -919,7 +924,7 @@ func toMilvusVectorEmbedding(embedding *types.IndexInfo, additionalParams map[st
KnowledgeID: embedding.KnowledgeID,
KnowledgeBaseID: embedding.KnowledgeBaseID,
TagID: embedding.TagID,
IsEnabled: true, // Default to enabled
IsEnabled: embedding.IsEnabled,
}
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), fieldEmbedding) {
if embeddingMap, ok := additionalParams[fieldEmbedding].(map[string][]float32); ok {

View File

@@ -66,7 +66,7 @@ func toDBVectorEmbedding(indexInfo *types.IndexInfo, additionalParams map[string
KnowledgeBaseID: indexInfo.KnowledgeBaseID,
TagID: indexInfo.TagID,
Content: common.CleanInvalidUTF8(indexInfo.Content),
IsEnabled: true, // Default to enabled
IsEnabled: indexInfo.IsEnabled,
}
// Add embedding data if available in additionalParams
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") {

View File

@@ -797,6 +797,10 @@ func (q *qdrantRepository) CopyIndices(ctx context.Context,
targetSourceID = uuid.New().String()
}
isEnabled := true
if v, ok := payload[fieldIsEnabled]; ok {
isEnabled = v.GetBoolValue()
}
newPayload := qdrant.NewValueMap(map[string]any{
fieldContent: payload[fieldContent].GetStringValue(),
fieldSourceID: targetSourceID,
@@ -804,7 +808,8 @@ func (q *qdrantRepository) CopyIndices(ctx context.Context,
fieldChunkID: targetChunkID,
fieldKnowledgeID: targetKnowledgeID,
fieldKnowledgeBaseID: targetKnowledgeBaseID,
fieldIsEnabled: true,
fieldTagID: payload[fieldTagID].GetStringValue(),
fieldIsEnabled: isEnabled,
})
var vectors *qdrant.Vectors
@@ -923,7 +928,7 @@ func toQdrantVectorEmbedding(embedding *types.IndexInfo, additionalParams map[st
KnowledgeID: embedding.KnowledgeID,
KnowledgeBaseID: embedding.KnowledgeBaseID,
TagID: embedding.TagID,
IsEnabled: true, // Default to enabled
IsEnabled: embedding.IsEnabled,
}
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), fieldEmbedding) {
if embeddingMap, ok := additionalParams[fieldEmbedding].(map[string][]float32); ok {

View File

@@ -437,7 +437,7 @@ func (r *sqliteRepository) vectorRetrieve(ctx context.Context, params types.Retr
// --- Internal helpers ---
func toSQLiteEmbedding(info *types.IndexInfo) *sqliteEmbedding {
enabled := true
enabled := info.IsEnabled
return &sqliteEmbedding{
SourceID: info.SourceID,
SourceType: int(info.SourceType),

View File

@@ -6224,6 +6224,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
KnowledgeType: types.KnowledgeTypeFAQ,
TagID: chunk.TagID,
IsEnabled: chunk.IsEnabled,
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
},
}, nil
}
@@ -6252,6 +6253,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
KnowledgeType: types.KnowledgeTypeFAQ,
TagID: chunk.TagID,
IsEnabled: chunk.IsEnabled,
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
})
// 每个相似问创建一个索引项
@@ -6277,6 +6279,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
KnowledgeType: types.KnowledgeTypeFAQ,
TagID: chunk.TagID,
IsEnabled: chunk.IsEnabled,
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
})
}