feat: implement metadata-based alias management for wiki pages

Made-with: Cursor
This commit is contained in:
wizardchen
2026-04-08 22:15:38 +08:00
parent 233c398d7a
commit d9cf4b7aa9
7 changed files with 46 additions and 8 deletions

View File

@@ -11,6 +11,7 @@ export interface WikiPage {
status: string;
content: string;
summary: string;
aliases: string[];
source_refs: string[];
in_links: string[];
out_links: string[];

View File

@@ -2132,6 +2132,7 @@ export default {
emptyDesc: "上传文档并启用 Wiki 后将自动生成知识页面",
selectPageHint: "从左侧选择一个页面查看内容",
version: "v{ver}",
aliases: "别名",
linksTo: "链接到",
linkedFrom: "被链接",
sources: "来源文档",

View File

@@ -182,6 +182,12 @@
<!-- Page header -->
<div class="wiki-reader-header">
<h2 class="wiki-reader-title">{{ selectedPage.title }}</h2>
<div v-if="selectedPage.aliases && selectedPage.aliases.length" class="wiki-reader-aliases">
<span class="wiki-alias-label">{{ $t('knowledgeEditor.wikiBrowser.aliases') }}:</span>
<t-tag v-for="alias in selectedPage.aliases" :key="alias" size="small" variant="light" class="wiki-alias-tag">
{{ alias }}
</t-tag>
</div>
<div class="wiki-reader-meta">
<t-tag size="small" :theme="getTypeTheme(selectedPage.page_type)" variant="light-outline">
{{ getTypeLabel(selectedPage.page_type) }}

View File

@@ -65,6 +65,7 @@ If previous slugs are provided above, you MUST follow these rules:
Each entity should have:
- "name": The entity name in {{.Language}} (human-readable)
- "slug": URL-friendly slug, format "entity/<lowercase-hyphenated-name>" (use romanized/pinyin form for non-Latin names). **Reuse previous slug if the entity was extracted before.**
- "aliases": An array of strings representing alternative names, abbreviations, acronyms or translations of the entity found in the document. Provide [] if none.
- "description": **Index listing summary** — one sentence, 15-40 words, in {{.Language}}. Describes WHAT this entity IS and its role in the document. Must be self-contained (understandable without reading the full page). This will be displayed in the wiki index.
- "details": A 2-5 sentence summary in {{.Language}} of key facts from the document. **Image rule**: If the document contains relevant <image> elements in an <images> tag, include them in the details using Markdown syntax: ![caption](url).
@@ -74,6 +75,7 @@ Only include entities that are substantively discussed (mentioned at least twice
Each concept should have:
- "name": The concept name in {{.Language}} (human-readable)
- "slug": URL-friendly slug, format "concept/<lowercase-hyphenated-name>" (use romanized/pinyin form for non-Latin names). **Reuse previous slug if the concept was extracted before.**
- "aliases": An array of strings representing alternative names, abbreviations, acronyms or translations of the concept found in the document. Provide [] if none.
- "description": **Index listing summary** — one sentence, 15-40 words, in {{.Language}}. Defines WHAT this concept IS. Must be self-contained (understandable without reading the full page). This will be displayed in the wiki index.
- "details": A 2-5 sentence explanation in {{.Language}} as discussed in the document. **Image rule**: If the document contains relevant <image> elements in an <images> tag, include them in the details using Markdown syntax: ![caption](url).
@@ -91,6 +93,7 @@ Output ONLY valid JSON. Example:
{
"name": "Acme Corp",
"slug": "entity/acme-corp",
"aliases": ["Acme", "Acme Corporation"],
"description": "A technology company specializing in AI solutions.",
"details": "Acme Corp was founded in 2020 and has grown to 500 employees. They focus on enterprise AI products and recently launched their flagship RAG platform."
}
@@ -99,6 +102,7 @@ Output ONLY valid JSON. Example:
{
"name": "Retrieval-Augmented Generation",
"slug": "concept/retrieval-augmented-generation",
"aliases": ["RAG"],
"description": "A technique that combines information retrieval with language model generation.",
"details": "RAG works by first retrieving relevant documents from a knowledge base using vector similarity search, then feeding those documents as context to an LLM for answer generation."
}

View File

@@ -118,10 +118,11 @@ func (r *wikiPageRepository) List(ctx context.Context, req *types.WikiPageListRe
query = query.Where("status = ?", req.Status)
}
if req.Query != "" {
// Use PostgreSQL full-text search
// Use PostgreSQL full-text search + ILIKE for aliases
query = query.Where(
"to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(content, '')) @@ plainto_tsquery('simple', ?)",
"(to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(content, '')) @@ plainto_tsquery('simple', ?) OR aliases::text ILIKE ?)",
req.Query,
"%"+req.Query+"%",
)
}

View File

@@ -916,10 +916,11 @@ func (s *wikiIngestService) retractStalePages(
// extractedItem represents a single extracted entity or concept
type extractedItem struct {
Name string `json:"name"`
Slug string `json:"slug"`
Description string `json:"description"`
Details string `json:"details"`
Name string `json:"name"`
Slug string `json:"slug"`
Aliases []string `json:"aliases"`
Description string `json:"description"`
Details string `json:"details"`
}
// combinedExtraction represents the parsed result of the combined entity+concept extraction
@@ -1057,6 +1058,19 @@ func (s *wikiIngestService) upsertExtractedPages(
if updatedSummary != "" {
existing.Summary = updatedSummary
}
if len(item.Aliases) > 0 {
// Merge new aliases with existing ones, deduplicating
aliasMap := make(map[string]bool)
for _, alias := range existing.Aliases {
aliasMap[alias] = true
}
for _, newAlias := range item.Aliases {
if !aliasMap[newAlias] {
existing.Aliases = append(existing.Aliases, newAlias)
aliasMap[newAlias] = true
}
}
}
existing.SourceRefs = appendUnique(existing.SourceRefs, sourceRef)
if _, err := s.wikiService.UpdatePage(ctx, existing); err != nil {
@@ -1075,6 +1089,7 @@ func (s *wikiIngestService) upsertExtractedPages(
KnowledgeBaseID: payload.KnowledgeBaseID,
Slug: item.Slug,
Title: item.Name,
Aliases: item.Aliases,
PageType: pageType,
Status: types.WikiPageStatusDraft,
Content: pageContent,
@@ -1310,13 +1325,21 @@ func (s *wikiIngestService) deduplicateItems(
// Build existing pages listing
var existingBuf strings.Builder
for _, p := range typedPages {
fmt.Fprintf(&existingBuf, "- slug: %s | title: %s\n", p.Slug, p.Title)
aliases := ""
if len(p.Aliases) > 0 {
aliases = fmt.Sprintf(" | aliases: %s", strings.Join(p.Aliases, ", "))
}
fmt.Fprintf(&existingBuf, "- slug: %s | title: %s%s\n", p.Slug, p.Title, aliases)
}
// Build new items listing
var newBuf strings.Builder
for _, item := range items {
fmt.Fprintf(&newBuf, "- slug: %s | name: %s\n", item.Slug, item.Name)
aliases := ""
if len(item.Aliases) > 0 {
aliases = fmt.Sprintf(" | aliases: %s", strings.Join(item.Aliases, ", "))
}
fmt.Fprintf(&newBuf, "- slug: %s | name: %s%s\n", item.Slug, item.Name, aliases)
}
// Call LLM for deduplication

View File

@@ -63,6 +63,8 @@ type WikiPage struct {
Content string `json:"content" gorm:"type:text"`
// One-line summary for index listing
Summary string `json:"summary" gorm:"type:text"`
// Alternate names, abbreviations, acronyms or translated names
Aliases StringArray `json:"aliases" gorm:"type:json"`
// References to source knowledge IDs that contributed to this page
SourceRefs StringArray `json:"source_refs" gorm:"type:json"`
// Slugs of pages that link TO this page (backlinks)