mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
Seven bugs surfaced via two audit rounds — parallel reviewer agents
plus a real-server end-to-end demo. Each fix arrives with a
regression test.
1. doc upload --recursive --json corrupted the envelope stream.
Per-file FAIL/OK plain lines printed unconditionally to stdout,
then a Success envelope, then on partial failure a typed error
that the root handler turned into a SECOND Failure envelope —
three outputs where one was expected. Fix: gate the plain lines
behind !opts.JSONOut, and add cmdutil.Error.Silent so the JSON-
path partial-failure preserves its typed exit code without
triggering PrintErrorEnvelope's default Failure-envelope write.
2. auth refresh / AuthRetryTransport misclassified HTTP failures as
network.error. RefreshAndPersist wrapped every refresher error
with CodeNetworkError, but the SDK emits "HTTP error 401: ..."
for a rejected refresh token — which should surface as
auth.token_expired. Switched to WrapHTTP for proper status-
derived classification. Affects both `auth refresh` and the
transport's refresh closure.
3. doc download accepted ".." as a server-suggested filename. The
rejection list covered "" / "." / filepath.Separator but not
bare ".." — filepath.Base("..") is "..", which slipped through
to os.Create and produced a confusing local.file_io wrap. Added
to the rejection set.
4. search chunks / docs / kb / sessions had no lower bound on
--limit. `-L 0` / `-L -1` was forwarded to the server with
undefined behavior. Added a 1..1000 bound at the RunE boundary
across all four (matching doc list / session list page-size
bounds). Internal callers in tests can still pass Limit==0 for
the "no client-side cap" runChunks path — the bound only applies
at the user-input layer.
5. cli/AGENTS.md ADR-3 verb-canon summary listed only v0.2 verbs as
"gh-canonical" and missed v0.3 additions (edit, pin, unpin,
download — all gh-canonical) plus locally-introduced ones
(empty, refresh, add, remove, link). Rewritten as an explicit
gh-canonical / locally-introduced split.
6. kb pin returned 404. Server registers /knowledge-bases/{id}/pin
as PUT (router.go:292); SDK was using POST. gin's router silently
404s on method-mismatch (treats it as path-not-found, not 405),
so the CLI classified the response as resource.not_found and
masked the real failure mode. Switched the SDK to http.MethodPut.
The asymmetry that hid this past round 1: kb unpin on a freshly-
created KB hits the no-op branch in cmd/kb/pin.go that skips the
SDK call entirely, so unpin "worked" without ever exercising the
broken path. Only the real-server demo, where kb pin actually
fires, surfaced it.
7. kb edit clobbered current Name when only --description was
passed. EditOptions used *string to distinguish "unset" from
"set to empty", but sdk.UpdateKnowledgeBaseRequest declares both
fields as plain string (no omitempty), so the JSON body always
carried `"name": ""`. Server requires Name → 400. Fix: runEdit
does fetch-then-update — GetKnowledgeBase first, build the PUT
body with current values, then overlay user-set fields. Same
TOCTOU window as kb pin / unpin.
Audit-flagged items intentionally NOT changed:
- kb pin / unpin check-then-toggle TOCTOU: documented; the clean
fix would be a server-side setter and belongs in a separate API
change.
- AuthRetryTransport singleflight test gap for one concurrency
scenario; v0.4 polish.
- cli/README.md:50 "once v0.2 ships" and CHANGELOG.md:8
"10 top-level commands": v0.2-PR artifacts, not v0.3-introduced.
- kb edit / kb pin are v0.3-new commands, so neither bug needs a
cli/CHANGELOG.md Fixed entry — the v0.3 release ships them
working as the Added bullets advertise.
458 lines
16 KiB
Go
458 lines
16 KiB
Go
// Package client provides the implementation for interacting with the WeKnora API
|
|
// The KnowledgeBase related interfaces are used to manage knowledge bases
|
|
// Knowledge bases are collections of knowledge entries that can be used for question-answering
|
|
// They can also be searched and queried using hybrid search
|
|
package client
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"time"
|
|
)
|
|
|
|
// KnowledgeBase represents a knowledge base
|
|
type KnowledgeBase struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"` // Name must be unique within the same tenant
|
|
Type string `json:"type"`
|
|
IsTemporary bool `json:"is_temporary"`
|
|
IsPinned bool `json:"is_pinned"`
|
|
Description string `json:"description"`
|
|
TenantID uint64 `json:"tenant_id"`
|
|
ChunkingConfig ChunkingConfig `json:"chunking_config"`
|
|
ImageProcessingConfig ImageProcessingConfig `json:"image_processing_config"`
|
|
FAQConfig *FAQConfig `json:"faq_config"`
|
|
EmbeddingModelID string `json:"embedding_model_id"`
|
|
SummaryModelID string `json:"summary_model_id"`
|
|
VLMConfig VLMConfig `json:"vlm_config"`
|
|
StorageProviderConfig *StorageProviderConfig `json:"storage_provider_config"`
|
|
StorageConfig StorageConfig `json:"storage_config"`
|
|
ExtractConfig *ExtractConfig `json:"extract_config"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
UpdatedAt time.Time `json:"updated_at"`
|
|
// Computed fields (not stored in database)
|
|
KnowledgeCount int64 `json:"knowledge_count"`
|
|
ChunkCount int64 `json:"chunk_count"`
|
|
IsProcessing bool `json:"is_processing"`
|
|
ProcessingCount int64 `json:"processing_count"`
|
|
}
|
|
|
|
// KnowledgeBaseConfig represents knowledge base configuration
|
|
type KnowledgeBaseConfig struct {
|
|
ChunkingConfig ChunkingConfig `json:"chunking_config"`
|
|
ImageProcessingConfig ImageProcessingConfig `json:"image_processing_config"`
|
|
FAQConfig *FAQConfig `json:"faq_config"`
|
|
}
|
|
|
|
// ChunkingConfig represents document chunking configuration
|
|
type ChunkingConfig struct {
|
|
ChunkSize int `json:"chunk_size"` // Chunk size
|
|
ChunkOverlap int `json:"chunk_overlap"` // Overlap size
|
|
Separators []string `json:"separators"` // Separators
|
|
}
|
|
|
|
// FAQConfig represents faq-specific configuration
|
|
type FAQConfig struct {
|
|
IndexMode string `json:"index_mode"`
|
|
QuestionIndexMode string `json:"question_index_mode"`
|
|
}
|
|
|
|
// ImageProcessingConfig represents image processing configuration
|
|
type ImageProcessingConfig struct {
|
|
ModelID string `json:"model_id"` // Multimodal model ID
|
|
}
|
|
|
|
// VLMConfig represents the VLM configuration
|
|
type VLMConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
ModelID string `json:"model_id"`
|
|
}
|
|
|
|
// StorageProviderConfig stores the KB-level storage provider selection.
|
|
type StorageProviderConfig struct {
|
|
Provider string `json:"provider"`
|
|
}
|
|
|
|
// StorageConfig represents the legacy storage configuration (cos_config).
|
|
// Deprecated: use StorageProviderConfig for provider selection.
|
|
type StorageConfig struct {
|
|
SecretID string `json:"secret_id"`
|
|
SecretKey string `json:"secret_key"`
|
|
Region string `json:"region"`
|
|
BucketName string `json:"bucket_name"`
|
|
AppID string `json:"app_id"`
|
|
PathPrefix string `json:"path_prefix"`
|
|
Provider string `json:"provider"`
|
|
}
|
|
|
|
// ExtractConfig represents the extract configuration for a knowledge base
|
|
type ExtractConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
Text string `json:"text,omitempty"`
|
|
Tags []string `json:"tags,omitempty"`
|
|
Nodes []*GraphNode `json:"nodes,omitempty"`
|
|
Relations []*GraphRelation `json:"relations,omitempty"`
|
|
}
|
|
|
|
// GraphNode represents a node in the graph extraction configuration
|
|
type GraphNode struct {
|
|
Name string `json:"name"`
|
|
}
|
|
|
|
// GraphRelation represents a relation in the graph extraction configuration
|
|
type GraphRelation struct {
|
|
Node1 string `json:"node1"`
|
|
Node2 string `json:"node2"`
|
|
Type string `json:"type"`
|
|
}
|
|
|
|
// UnmarshalJSON keeps backward compatibility for legacy responses that still
|
|
// use `cos_config` instead of `storage_config`.
|
|
func (kb *KnowledgeBase) UnmarshalJSON(data []byte) error {
|
|
type alias KnowledgeBase
|
|
aux := struct {
|
|
*alias
|
|
LegacyStorageConfig *StorageConfig `json:"cos_config"`
|
|
}{
|
|
alias: (*alias)(kb),
|
|
}
|
|
if err := json.Unmarshal(data, &aux); err != nil {
|
|
return err
|
|
}
|
|
if aux.LegacyStorageConfig != nil && kb.StorageConfig == (StorageConfig{}) {
|
|
kb.StorageConfig = *aux.LegacyStorageConfig
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// KnowledgeBaseResponse knowledge base response
|
|
type KnowledgeBaseResponse struct {
|
|
Success bool `json:"success"`
|
|
Data KnowledgeBase `json:"data"`
|
|
}
|
|
|
|
// KnowledgeBaseListResponse knowledge base list response
|
|
type KnowledgeBaseListResponse struct {
|
|
Success bool `json:"success"`
|
|
Data []KnowledgeBase `json:"data"`
|
|
}
|
|
|
|
// MatchType records which retrieval channel produced a SearchResult.
|
|
// Numeric values are the wire contract; they mirror the iota order in
|
|
// server-side internal/types/embedding.go — do not reorder without
|
|
// coordinating a server bump. Server-side names are preserved as
|
|
// trailing comments so cross-repo grep works in both directions.
|
|
//
|
|
// Channel grouping: 0-1 primary text channels (vector + keyword);
|
|
// 2-5 enrichment chunks (added in addition to primary matches, score=0);
|
|
// 6-9 alternate sources (graph DB, web search, raw load, data analysis).
|
|
type MatchType int
|
|
|
|
const (
|
|
MatchTypeVector MatchType = 0 // server: MatchTypeEmbedding
|
|
MatchTypeKeyword MatchType = 1 // server: MatchTypeKeywords
|
|
MatchTypeNearby MatchType = 2 // server: MatchTypeNearByChunk
|
|
MatchTypeHistory MatchType = 3 // server: MatchTypeHistory
|
|
MatchTypeParent MatchType = 4 // server: MatchTypeParentChunk
|
|
MatchTypeRelation MatchType = 5 // server: MatchTypeRelationChunk
|
|
MatchTypeGraph MatchType = 6 // server: MatchTypeGraph
|
|
MatchTypeWeb MatchType = 7 // server: MatchTypeWebSearch
|
|
MatchTypeDirect MatchType = 8 // server: MatchTypeDirectLoad — chunk loaded by ID without scoring
|
|
MatchTypeData MatchType = 9 // server: MatchTypeDataAnalysis — produced by analytical pipeline, not retrieval
|
|
)
|
|
|
|
// SearchResult represents search result.
|
|
//
|
|
// Score is the RRF (reciprocal-rank-fusion) score combining vector and
|
|
// keyword channels — typically in the [0, ~0.03] range when both channels
|
|
// hit, NOT the raw vector similarity. Use MatchType to tell which channel
|
|
// produced each result. Per-channel thresholds (vector_threshold,
|
|
// keyword_threshold) filter pre-fusion at retrieval time, before RRF runs.
|
|
type SearchResult struct {
|
|
ID string `json:"id"`
|
|
Content string `json:"content"`
|
|
KnowledgeID string `json:"knowledge_id"`
|
|
ChunkIndex int `json:"chunk_index"`
|
|
KnowledgeTitle string `json:"knowledge_title"`
|
|
StartAt int `json:"start_at"`
|
|
EndAt int `json:"end_at"`
|
|
Seq int `json:"seq"`
|
|
Score float64 `json:"score"`
|
|
MatchType MatchType `json:"match_type"`
|
|
ChunkType string `json:"chunk_type"`
|
|
ImageInfo string `json:"image_info"`
|
|
Metadata map[string]string `json:"metadata"`
|
|
KnowledgeFilename string `json:"knowledge_filename"`
|
|
KnowledgeSource string `json:"knowledge_source"`
|
|
KnowledgeChannel string `json:"knowledge_channel"`
|
|
// MatchedContent is the actual content that was matched in vector search
|
|
// For FAQ: this is the matched question text (standard or similar question)
|
|
MatchedContent string `json:"matched_content,omitempty"`
|
|
}
|
|
|
|
// HybridSearchResponse hybrid search response
|
|
type HybridSearchResponse struct {
|
|
Success bool `json:"success"`
|
|
Data []*SearchResult `json:"data"`
|
|
}
|
|
|
|
type CopyKnowledgeBaseRequest struct {
|
|
TaskID string `json:"task_id,omitempty"`
|
|
SourceID string `json:"source_id"`
|
|
TargetID string `json:"target_id"`
|
|
}
|
|
|
|
// CopyKnowledgeBaseResponse represents the response from copy knowledge base API
|
|
type CopyKnowledgeBaseResponse struct {
|
|
TaskID string `json:"task_id"`
|
|
SourceID string `json:"source_id"`
|
|
TargetID string `json:"target_id"`
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
// KBCloneProgress represents the progress of a knowledge base clone task
|
|
type KBCloneProgress struct {
|
|
TaskID string `json:"task_id"`
|
|
SourceID string `json:"source_id"`
|
|
TargetID string `json:"target_id"`
|
|
Status string `json:"status"` // pending, processing, completed, failed
|
|
Progress int `json:"progress"` // 0-100
|
|
Total int `json:"total"` // Total operations count
|
|
Processed int `json:"processed"` // Processed operations count
|
|
Message string `json:"message"`
|
|
Error string `json:"error,omitempty"`
|
|
CreatedAt int64 `json:"created_at"`
|
|
UpdatedAt int64 `json:"updated_at"`
|
|
}
|
|
|
|
// CreateKnowledgeBase creates a knowledge base
|
|
func (c *Client) CreateKnowledgeBase(ctx context.Context, knowledgeBase *KnowledgeBase) (*KnowledgeBase, error) {
|
|
resp, err := c.doRequest(ctx, http.MethodPost, "/api/v1/knowledge-bases", knowledgeBase, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response KnowledgeBaseResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|
|
|
|
// GetKnowledgeBase gets a knowledge base
|
|
func (c *Client) GetKnowledgeBase(ctx context.Context, knowledgeBaseID string) (*KnowledgeBase, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s", knowledgeBaseID)
|
|
resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response KnowledgeBaseResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|
|
|
|
// ListKnowledgeBases lists knowledge bases
|
|
func (c *Client) ListKnowledgeBases(ctx context.Context) ([]KnowledgeBase, error) {
|
|
resp, err := c.doRequest(ctx, http.MethodGet, "/api/v1/knowledge-bases", nil, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response KnowledgeBaseListResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return response.Data, nil
|
|
}
|
|
|
|
// UpdateKnowledgeBaseRequest update knowledge base request
|
|
type UpdateKnowledgeBaseRequest struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
Config *KnowledgeBaseConfig `json:"config"`
|
|
}
|
|
|
|
// UpdateKnowledgeBase updates a knowledge base
|
|
func (c *Client) UpdateKnowledgeBase(ctx context.Context,
|
|
knowledgeBaseID string,
|
|
request *UpdateKnowledgeBaseRequest,
|
|
) (*KnowledgeBase, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s", knowledgeBaseID)
|
|
resp, err := c.doRequest(ctx, http.MethodPut, path, request, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response KnowledgeBaseResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|
|
|
|
// DeleteKnowledgeBase deletes a knowledge base
|
|
func (c *Client) DeleteKnowledgeBase(ctx context.Context, knowledgeBaseID string) error {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s", knowledgeBaseID)
|
|
resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var response struct {
|
|
Success bool `json:"success"`
|
|
Message string `json:"message,omitempty"`
|
|
}
|
|
|
|
return parseResponse(resp, &response)
|
|
}
|
|
|
|
// ClearKnowledgeBaseContentsResponse represents the response from clear knowledge base contents API
|
|
type ClearKnowledgeBaseContentsResponse struct {
|
|
DeletedCount int `json:"deleted_count"`
|
|
}
|
|
|
|
// ClearKnowledgeBaseContents deletes all knowledge entries in a knowledge base (async).
|
|
// The knowledge base itself is preserved; only its contents are removed.
|
|
func (c *Client) ClearKnowledgeBaseContents(ctx context.Context, knowledgeBaseID string) (*ClearKnowledgeBaseContentsResponse, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s/knowledge", knowledgeBaseID)
|
|
resp, err := c.doRequest(ctx, http.MethodDelete, path, nil, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response struct {
|
|
Success bool `json:"success"`
|
|
Message string `json:"message"`
|
|
Data ClearKnowledgeBaseContentsResponse `json:"data"`
|
|
}
|
|
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|
|
|
|
// SearchParams represents the search parameters for hybrid search
|
|
type SearchParams struct {
|
|
QueryText string `json:"query_text"`
|
|
VectorThreshold float64 `json:"vector_threshold"`
|
|
KeywordThreshold float64 `json:"keyword_threshold"`
|
|
MatchCount int `json:"match_count"`
|
|
DisableKeywordsMatch bool `json:"disable_keywords_match"`
|
|
DisableVectorMatch bool `json:"disable_vector_match"`
|
|
}
|
|
|
|
// HybridSearch performs hybrid search
|
|
// Note: The backend route is GET but expects JSON body, which is non-standard.
|
|
// This client uses POST with JSON body for better compatibility.
|
|
func (c *Client) HybridSearch(ctx context.Context, knowledgeBaseID string, params *SearchParams) ([]*SearchResult, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s/hybrid-search", knowledgeBaseID)
|
|
|
|
resp, err := c.doRequest(ctx, http.MethodGet, path, params, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response HybridSearchResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return response.Data, nil
|
|
}
|
|
|
|
// TogglePinKnowledgeBase toggles the pin status of a knowledge base.
|
|
// Server route is PUT (see internal/router/router.go); using POST silently
|
|
// 404s — the router treats unknown method on a known path as not-found,
|
|
// not 405.
|
|
func (c *Client) TogglePinKnowledgeBase(ctx context.Context, knowledgeBaseID string) (*KnowledgeBase, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s/pin", knowledgeBaseID)
|
|
resp, err := c.doRequest(ctx, http.MethodPut, path, nil, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response KnowledgeBaseResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|
|
|
|
// MoveTarget represents a knowledge base that can receive moved knowledge
|
|
type MoveTarget struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
// ListMoveTargets lists knowledge bases eligible as move targets for the given source KB
|
|
func (c *Client) ListMoveTargets(ctx context.Context, knowledgeBaseID string) ([]KnowledgeBase, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/%s/move-targets", knowledgeBaseID)
|
|
resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response KnowledgeBaseListResponse
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return response.Data, nil
|
|
}
|
|
|
|
// CopyKnowledgeBase copies a knowledge base asynchronously and returns task info
|
|
func (c *Client) CopyKnowledgeBase(ctx context.Context, request *CopyKnowledgeBaseRequest) (*CopyKnowledgeBaseResponse, error) {
|
|
path := "/api/v1/knowledge-bases/copy"
|
|
|
|
resp, err := c.doRequest(ctx, http.MethodPost, path, request, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response struct {
|
|
Success bool `json:"success"`
|
|
Data CopyKnowledgeBaseResponse `json:"data"`
|
|
}
|
|
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|
|
|
|
// GetKBCloneProgress gets the progress of a knowledge base clone task
|
|
func (c *Client) GetKBCloneProgress(ctx context.Context, taskID string) (*KBCloneProgress, error) {
|
|
path := fmt.Sprintf("/api/v1/knowledge-bases/copy/progress/%s", taskID)
|
|
|
|
resp, err := c.doRequest(ctx, http.MethodGet, path, nil, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var response struct {
|
|
Success bool `json:"success"`
|
|
Data KBCloneProgress `json:"data"`
|
|
}
|
|
|
|
if err := parseResponse(resp, &response); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &response.Data, nil
|
|
}
|