mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
feat: implement metadata-based alias management for wiki pages
Made-with: Cursor
This commit is contained in:
@@ -11,6 +11,7 @@ export interface WikiPage {
|
||||
status: string;
|
||||
content: string;
|
||||
summary: string;
|
||||
aliases: string[];
|
||||
source_refs: string[];
|
||||
in_links: string[];
|
||||
out_links: string[];
|
||||
|
||||
@@ -2132,6 +2132,7 @@ export default {
|
||||
emptyDesc: "上传文档并启用 Wiki 后将自动生成知识页面",
|
||||
selectPageHint: "从左侧选择一个页面查看内容",
|
||||
version: "v{ver}",
|
||||
aliases: "别名",
|
||||
linksTo: "链接到",
|
||||
linkedFrom: "被链接",
|
||||
sources: "来源文档",
|
||||
|
||||
@@ -182,6 +182,12 @@
|
||||
<!-- Page header -->
|
||||
<div class="wiki-reader-header">
|
||||
<h2 class="wiki-reader-title">{{ selectedPage.title }}</h2>
|
||||
<div v-if="selectedPage.aliases && selectedPage.aliases.length" class="wiki-reader-aliases">
|
||||
<span class="wiki-alias-label">{{ $t('knowledgeEditor.wikiBrowser.aliases') }}:</span>
|
||||
<t-tag v-for="alias in selectedPage.aliases" :key="alias" size="small" variant="light" class="wiki-alias-tag">
|
||||
{{ alias }}
|
||||
</t-tag>
|
||||
</div>
|
||||
<div class="wiki-reader-meta">
|
||||
<t-tag size="small" :theme="getTypeTheme(selectedPage.page_type)" variant="light-outline">
|
||||
{{ getTypeLabel(selectedPage.page_type) }}
|
||||
|
||||
@@ -65,6 +65,7 @@ If previous slugs are provided above, you MUST follow these rules:
|
||||
Each entity should have:
|
||||
- "name": The entity name in {{.Language}} (human-readable)
|
||||
- "slug": URL-friendly slug, format "entity/<lowercase-hyphenated-name>" (use romanized/pinyin form for non-Latin names). **Reuse previous slug if the entity was extracted before.**
|
||||
- "aliases": An array of strings representing alternative names, abbreviations, acronyms or translations of the entity found in the document. Provide [] if none.
|
||||
- "description": **Index listing summary** — one sentence, 15-40 words, in {{.Language}}. Describes WHAT this entity IS and its role in the document. Must be self-contained (understandable without reading the full page). This will be displayed in the wiki index.
|
||||
- "details": A 2-5 sentence summary in {{.Language}} of key facts from the document. **Image rule**: If the document contains relevant <image> elements in an <images> tag, include them in the details using Markdown syntax: .
|
||||
|
||||
@@ -74,6 +75,7 @@ Only include entities that are substantively discussed (mentioned at least twice
|
||||
Each concept should have:
|
||||
- "name": The concept name in {{.Language}} (human-readable)
|
||||
- "slug": URL-friendly slug, format "concept/<lowercase-hyphenated-name>" (use romanized/pinyin form for non-Latin names). **Reuse previous slug if the concept was extracted before.**
|
||||
- "aliases": An array of strings representing alternative names, abbreviations, acronyms or translations of the concept found in the document. Provide [] if none.
|
||||
- "description": **Index listing summary** — one sentence, 15-40 words, in {{.Language}}. Defines WHAT this concept IS. Must be self-contained (understandable without reading the full page). This will be displayed in the wiki index.
|
||||
- "details": A 2-5 sentence explanation in {{.Language}} as discussed in the document. **Image rule**: If the document contains relevant <image> elements in an <images> tag, include them in the details using Markdown syntax: .
|
||||
|
||||
@@ -91,6 +93,7 @@ Output ONLY valid JSON. Example:
|
||||
{
|
||||
"name": "Acme Corp",
|
||||
"slug": "entity/acme-corp",
|
||||
"aliases": ["Acme", "Acme Corporation"],
|
||||
"description": "A technology company specializing in AI solutions.",
|
||||
"details": "Acme Corp was founded in 2020 and has grown to 500 employees. They focus on enterprise AI products and recently launched their flagship RAG platform."
|
||||
}
|
||||
@@ -99,6 +102,7 @@ Output ONLY valid JSON. Example:
|
||||
{
|
||||
"name": "Retrieval-Augmented Generation",
|
||||
"slug": "concept/retrieval-augmented-generation",
|
||||
"aliases": ["RAG"],
|
||||
"description": "A technique that combines information retrieval with language model generation.",
|
||||
"details": "RAG works by first retrieving relevant documents from a knowledge base using vector similarity search, then feeding those documents as context to an LLM for answer generation."
|
||||
}
|
||||
|
||||
@@ -118,10 +118,11 @@ func (r *wikiPageRepository) List(ctx context.Context, req *types.WikiPageListRe
|
||||
query = query.Where("status = ?", req.Status)
|
||||
}
|
||||
if req.Query != "" {
|
||||
// Use PostgreSQL full-text search
|
||||
// Use PostgreSQL full-text search + ILIKE for aliases
|
||||
query = query.Where(
|
||||
"to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(content, '')) @@ plainto_tsquery('simple', ?)",
|
||||
"(to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(content, '')) @@ plainto_tsquery('simple', ?) OR aliases::text ILIKE ?)",
|
||||
req.Query,
|
||||
"%"+req.Query+"%",
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -916,10 +916,11 @@ func (s *wikiIngestService) retractStalePages(
|
||||
|
||||
// extractedItem represents a single extracted entity or concept
|
||||
type extractedItem struct {
|
||||
Name string `json:"name"`
|
||||
Slug string `json:"slug"`
|
||||
Description string `json:"description"`
|
||||
Details string `json:"details"`
|
||||
Name string `json:"name"`
|
||||
Slug string `json:"slug"`
|
||||
Aliases []string `json:"aliases"`
|
||||
Description string `json:"description"`
|
||||
Details string `json:"details"`
|
||||
}
|
||||
|
||||
// combinedExtraction represents the parsed result of the combined entity+concept extraction
|
||||
@@ -1057,6 +1058,19 @@ func (s *wikiIngestService) upsertExtractedPages(
|
||||
if updatedSummary != "" {
|
||||
existing.Summary = updatedSummary
|
||||
}
|
||||
if len(item.Aliases) > 0 {
|
||||
// Merge new aliases with existing ones, deduplicating
|
||||
aliasMap := make(map[string]bool)
|
||||
for _, alias := range existing.Aliases {
|
||||
aliasMap[alias] = true
|
||||
}
|
||||
for _, newAlias := range item.Aliases {
|
||||
if !aliasMap[newAlias] {
|
||||
existing.Aliases = append(existing.Aliases, newAlias)
|
||||
aliasMap[newAlias] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
existing.SourceRefs = appendUnique(existing.SourceRefs, sourceRef)
|
||||
|
||||
if _, err := s.wikiService.UpdatePage(ctx, existing); err != nil {
|
||||
@@ -1075,6 +1089,7 @@ func (s *wikiIngestService) upsertExtractedPages(
|
||||
KnowledgeBaseID: payload.KnowledgeBaseID,
|
||||
Slug: item.Slug,
|
||||
Title: item.Name,
|
||||
Aliases: item.Aliases,
|
||||
PageType: pageType,
|
||||
Status: types.WikiPageStatusDraft,
|
||||
Content: pageContent,
|
||||
@@ -1310,13 +1325,21 @@ func (s *wikiIngestService) deduplicateItems(
|
||||
// Build existing pages listing
|
||||
var existingBuf strings.Builder
|
||||
for _, p := range typedPages {
|
||||
fmt.Fprintf(&existingBuf, "- slug: %s | title: %s\n", p.Slug, p.Title)
|
||||
aliases := ""
|
||||
if len(p.Aliases) > 0 {
|
||||
aliases = fmt.Sprintf(" | aliases: %s", strings.Join(p.Aliases, ", "))
|
||||
}
|
||||
fmt.Fprintf(&existingBuf, "- slug: %s | title: %s%s\n", p.Slug, p.Title, aliases)
|
||||
}
|
||||
|
||||
// Build new items listing
|
||||
var newBuf strings.Builder
|
||||
for _, item := range items {
|
||||
fmt.Fprintf(&newBuf, "- slug: %s | name: %s\n", item.Slug, item.Name)
|
||||
aliases := ""
|
||||
if len(item.Aliases) > 0 {
|
||||
aliases = fmt.Sprintf(" | aliases: %s", strings.Join(item.Aliases, ", "))
|
||||
}
|
||||
fmt.Fprintf(&newBuf, "- slug: %s | name: %s%s\n", item.Slug, item.Name, aliases)
|
||||
}
|
||||
|
||||
// Call LLM for deduplication
|
||||
|
||||
@@ -63,6 +63,8 @@ type WikiPage struct {
|
||||
Content string `json:"content" gorm:"type:text"`
|
||||
// One-line summary for index listing
|
||||
Summary string `json:"summary" gorm:"type:text"`
|
||||
// Alternate names, abbreviations, acronyms or translated names
|
||||
Aliases StringArray `json:"aliases" gorm:"type:json"`
|
||||
// References to source knowledge IDs that contributed to this page
|
||||
SourceRefs StringArray `json:"source_refs" gorm:"type:json"`
|
||||
// Slugs of pages that link TO this page (backlinks)
|
||||
|
||||
Reference in New Issue
Block a user