mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
feat(repository): add TagID and IsRecommended fields to vector embeddings
- Introduced `TagID` and `IsRecommended` fields across various repository implementations (Elasticsearch, Milvus, Postgres, Qdrant, SQLite) to enhance document categorization and recommendation capabilities. - Updated the logic for handling `IsEnabled` to ensure it reflects the actual state from the source data. - Adjusted the knowledge service to incorporate the new `IsRecommended` field, improving the indexing process for knowledge chunks. These changes enhance the flexibility and functionality of document retrieval and indexing across the application.
This commit is contained in:
@@ -15,8 +15,10 @@ type VectorEmbedding struct {
|
||||
ChunkID string `json:"chunk_id" gorm:"column:chunk_id"` // Unique ID of the text chunk
|
||||
KnowledgeID string `json:"knowledge_id" gorm:"column:knowledge_id"` // ID of the knowledge item
|
||||
KnowledgeBaseID string `json:"knowledge_base_id" gorm:"column:knowledge_base_id"` // ID of the knowledge base
|
||||
TagID string `json:"tag_id" gorm:"column:tag_id"` // Tag ID for categorization
|
||||
Embedding []float32 `json:"embedding" gorm:"column:embedding;not null"` // Vector embedding of the content
|
||||
IsEnabled bool `json:"is_enabled"` // Whether the chunk is enabled
|
||||
IsRecommended bool `json:"is_recommended"` // Whether the chunk is recommended
|
||||
}
|
||||
|
||||
// VectorEmbeddingWithScore extends VectorEmbedding with similarity score
|
||||
@@ -34,7 +36,9 @@ func ToDBVectorEmbedding(embedding *types.IndexInfo, additionalParams map[string
|
||||
ChunkID: embedding.ChunkID,
|
||||
KnowledgeID: embedding.KnowledgeID,
|
||||
KnowledgeBaseID: embedding.KnowledgeBaseID,
|
||||
IsEnabled: true, // Default to enabled
|
||||
TagID: embedding.TagID,
|
||||
IsEnabled: embedding.IsEnabled,
|
||||
IsRecommended: embedding.IsRecommended,
|
||||
}
|
||||
// Add embedding data if available in additionalParams
|
||||
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") {
|
||||
@@ -65,8 +69,10 @@ func FromDBVectorEmbeddingWithScore(id string,
|
||||
ChunkID: embedding.ChunkID,
|
||||
KnowledgeID: embedding.KnowledgeID,
|
||||
KnowledgeBaseID: embedding.KnowledgeBaseID,
|
||||
TagID: embedding.TagID,
|
||||
Content: embedding.Content,
|
||||
Score: embedding.Score,
|
||||
MatchType: matchType,
|
||||
IsEnabled: embedding.IsEnabled,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -592,6 +592,11 @@ func (m *milvusRepository) getBaseFilterForQuery(params types.RetrieveParams) (s
|
||||
Value: params.ExcludeChunkIDs,
|
||||
})
|
||||
}
|
||||
filters = append(filters, &universalFilterCondition{
|
||||
Field: fieldIsEnabled,
|
||||
Operator: operatorEqual,
|
||||
Value: true,
|
||||
})
|
||||
if len(filters) == 0 {
|
||||
return "", nil, nil
|
||||
}
|
||||
@@ -919,7 +924,7 @@ func toMilvusVectorEmbedding(embedding *types.IndexInfo, additionalParams map[st
|
||||
KnowledgeID: embedding.KnowledgeID,
|
||||
KnowledgeBaseID: embedding.KnowledgeBaseID,
|
||||
TagID: embedding.TagID,
|
||||
IsEnabled: true, // Default to enabled
|
||||
IsEnabled: embedding.IsEnabled,
|
||||
}
|
||||
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), fieldEmbedding) {
|
||||
if embeddingMap, ok := additionalParams[fieldEmbedding].(map[string][]float32); ok {
|
||||
|
||||
@@ -66,7 +66,7 @@ func toDBVectorEmbedding(indexInfo *types.IndexInfo, additionalParams map[string
|
||||
KnowledgeBaseID: indexInfo.KnowledgeBaseID,
|
||||
TagID: indexInfo.TagID,
|
||||
Content: common.CleanInvalidUTF8(indexInfo.Content),
|
||||
IsEnabled: true, // Default to enabled
|
||||
IsEnabled: indexInfo.IsEnabled,
|
||||
}
|
||||
// Add embedding data if available in additionalParams
|
||||
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") {
|
||||
|
||||
@@ -797,6 +797,10 @@ func (q *qdrantRepository) CopyIndices(ctx context.Context,
|
||||
targetSourceID = uuid.New().String()
|
||||
}
|
||||
|
||||
isEnabled := true
|
||||
if v, ok := payload[fieldIsEnabled]; ok {
|
||||
isEnabled = v.GetBoolValue()
|
||||
}
|
||||
newPayload := qdrant.NewValueMap(map[string]any{
|
||||
fieldContent: payload[fieldContent].GetStringValue(),
|
||||
fieldSourceID: targetSourceID,
|
||||
@@ -804,7 +808,8 @@ func (q *qdrantRepository) CopyIndices(ctx context.Context,
|
||||
fieldChunkID: targetChunkID,
|
||||
fieldKnowledgeID: targetKnowledgeID,
|
||||
fieldKnowledgeBaseID: targetKnowledgeBaseID,
|
||||
fieldIsEnabled: true,
|
||||
fieldTagID: payload[fieldTagID].GetStringValue(),
|
||||
fieldIsEnabled: isEnabled,
|
||||
})
|
||||
|
||||
var vectors *qdrant.Vectors
|
||||
@@ -923,7 +928,7 @@ func toQdrantVectorEmbedding(embedding *types.IndexInfo, additionalParams map[st
|
||||
KnowledgeID: embedding.KnowledgeID,
|
||||
KnowledgeBaseID: embedding.KnowledgeBaseID,
|
||||
TagID: embedding.TagID,
|
||||
IsEnabled: true, // Default to enabled
|
||||
IsEnabled: embedding.IsEnabled,
|
||||
}
|
||||
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), fieldEmbedding) {
|
||||
if embeddingMap, ok := additionalParams[fieldEmbedding].(map[string][]float32); ok {
|
||||
|
||||
@@ -437,7 +437,7 @@ func (r *sqliteRepository) vectorRetrieve(ctx context.Context, params types.Retr
|
||||
// --- Internal helpers ---
|
||||
|
||||
func toSQLiteEmbedding(info *types.IndexInfo) *sqliteEmbedding {
|
||||
enabled := true
|
||||
enabled := info.IsEnabled
|
||||
return &sqliteEmbedding{
|
||||
SourceID: info.SourceID,
|
||||
SourceType: int(info.SourceType),
|
||||
|
||||
@@ -6224,6 +6224,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
|
||||
KnowledgeType: types.KnowledgeTypeFAQ,
|
||||
TagID: chunk.TagID,
|
||||
IsEnabled: chunk.IsEnabled,
|
||||
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
@@ -6252,6 +6253,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
|
||||
KnowledgeType: types.KnowledgeTypeFAQ,
|
||||
TagID: chunk.TagID,
|
||||
IsEnabled: chunk.IsEnabled,
|
||||
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
|
||||
})
|
||||
|
||||
// 每个相似问创建一个索引项
|
||||
@@ -6277,6 +6279,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
|
||||
KnowledgeType: types.KnowledgeTypeFAQ,
|
||||
TagID: chunk.TagID,
|
||||
IsEnabled: chunk.IsEnabled,
|
||||
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user