mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
Wires KnowledgeBase.VectorStoreID and the ownership-aware retrieve factory into the user-facing knowledge-base lifecycle: - POST /knowledge-bases validates the requested vector_store_id against the caller's tenant scope and the engine registry. New error codes ErrVectorStoreBindingInvalid (2200) and ErrVectorStoreUnavailable (2201) distinguish the typed branches without echoing UUIDs to the client. - GET / POST / PUT / PUT-pin responses embed the bound store's display metadata (name, source, engine_type, status) without exposing any connection credentials. Cross-tenant shared KBs receive a suppressed payload (vector_store_id stripped, source="shared") so operator-chosen store names cannot be enumerated across tenants. - POST /knowledge-bases/copy synchronously rejects clones whose target has a different embedding model or vector store, before the async clone task is enqueued. The async clone worker re-applies the same checks for defense in depth. - DELETE /vector-stores/:id refuses to remove a store with bound KBs, inside a transaction that row-locks the store on PostgreSQL and serializes via WAL on SQLite. unregister-from-registry is wrapped in defer/recover so a panic surfaces as a structured warning instead of silently leaking a stale engine. - vector_store_id is immutable after creation. The GORM <-:create tag blocks every ORM update path; the service-layer DTO omits the field entirely; a reflection-based regression test catches any future maintainer who adds it back to either layer. - Empty-string vector_store_id is normalized to nil at both the create path and inside SharesStoreWith, so rows persisted by callers that did not run Normalize first cannot trip false same-store comparisons. Part of #993. Depends on #994 and #1310.
181 lines
5.9 KiB
Go
181 lines
5.9 KiB
Go
package handler
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
|
|
apperrors "github.com/Tencent/WeKnora/internal/errors"
|
|
"github.com/Tencent/WeKnora/internal/middleware"
|
|
"github.com/Tencent/WeKnora/internal/types"
|
|
"github.com/Tencent/WeKnora/internal/types/interfaces"
|
|
)
|
|
|
|
// handler.CopyKnowledgeBase pre-flight tests.
|
|
//
|
|
// The async clone worker (service.CopyKnowledgeBase) re-applies the same
|
|
// embedding-model and store-binding defenses as defense in depth, but the
|
|
// handler-level pre-flight is the one that surfaces 400 to the API caller
|
|
// synchronously instead of inside Asynq progress polling. These tests pin
|
|
// the synchronous behavior so a future refactor that drops the pre-flight
|
|
// fails loudly here rather than silently degrading UX.
|
|
|
|
// stubKBCopyService provides only the two methods the Copy handler reaches
|
|
// for (GetKnowledgeBaseByID twice). Other interface methods stay nil so any
|
|
// accidental new call panics rather than silently succeeding.
|
|
type stubKBCopyService struct {
|
|
interfaces.KnowledgeBaseService
|
|
byID func(ctx context.Context, id string) (*types.KnowledgeBase, error)
|
|
}
|
|
|
|
func (s *stubKBCopyService) GetKnowledgeBaseByID(ctx context.Context, id string) (*types.KnowledgeBase, error) {
|
|
return s.byID(ctx, id)
|
|
}
|
|
|
|
// stubEnqueuer records whether Enqueue was invoked. The whole point of the
|
|
// pre-flight is to short-circuit *before* enqueue, so the test fails if
|
|
// enqueue ran for a mismatched clone.
|
|
type stubEnqueuer struct {
|
|
calls int
|
|
}
|
|
|
|
func (s *stubEnqueuer) Enqueue(_ interface{}, _ ...interface{}) (*stubEnqueueInfo, error) {
|
|
s.calls++
|
|
return &stubEnqueueInfo{ID: "x"}, nil
|
|
}
|
|
|
|
// stubEnqueueInfo is a stand-in for asynq.TaskInfo so the test does not need
|
|
// to construct one.
|
|
type stubEnqueueInfo struct{ ID string }
|
|
|
|
func newCopyPreflightRouter(svc interfaces.KnowledgeBaseService) (*gin.Engine, *stubEnqueuer) {
|
|
gin.SetMode(gin.TestMode)
|
|
r := gin.New()
|
|
r.Use(middleware.ErrorHandler())
|
|
r.Use(func(c *gin.Context) {
|
|
c.Set(types.TenantIDContextKey.String(), uint64(1))
|
|
c.Set(types.UserIDContextKey.String(), "u-test")
|
|
c.Next()
|
|
})
|
|
enq := &stubEnqueuer{}
|
|
// The real handler reaches into h.asynqClient.Enqueue with the asynq
|
|
// task type. We do not exercise the enqueue path in these tests — every
|
|
// case here either short-circuits at pre-flight or returns 4xx earlier.
|
|
// Leaving asynqClient nil would panic if enqueue ran, which is exactly
|
|
// the regression we want to catch.
|
|
h := &KnowledgeBaseHandler{service: svc}
|
|
r.POST("/knowledge-bases/copy", h.CopyKnowledgeBase)
|
|
return r, enq
|
|
}
|
|
|
|
func storeIDPtr(s string) *string { return &s }
|
|
|
|
func TestCopyHandlerPreflight_DifferentEmbeddingModel(t *testing.T) {
|
|
svc := &stubKBCopyService{
|
|
byID: func(_ context.Context, id string) (*types.KnowledgeBase, error) {
|
|
switch id {
|
|
case "src":
|
|
return &types.KnowledgeBase{
|
|
ID: "src", TenantID: 1, EmbeddingModelID: "embed-A",
|
|
}, nil
|
|
case "dst":
|
|
return &types.KnowledgeBase{
|
|
ID: "dst", TenantID: 1, EmbeddingModelID: "embed-B",
|
|
}, nil
|
|
}
|
|
return nil, nil
|
|
},
|
|
}
|
|
r, _ := newCopyPreflightRouter(svc)
|
|
|
|
w := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/knowledge-bases/copy",
|
|
strings.NewReader(`{"source_id":"src","target_id":"dst"}`))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
r.ServeHTTP(w, req)
|
|
|
|
if w.Code != http.StatusBadRequest {
|
|
t.Fatalf("expected 400 for embedding-model mismatch, got %d body=%s", w.Code, w.Body.String())
|
|
}
|
|
if !strings.Contains(w.Body.String(), "different embedding models") {
|
|
t.Fatalf("expected embedding-model error message, got %s", w.Body.String())
|
|
}
|
|
}
|
|
|
|
func TestCopyHandlerPreflight_DifferentVectorStore(t *testing.T) {
|
|
svc := &stubKBCopyService{
|
|
byID: func(_ context.Context, id string) (*types.KnowledgeBase, error) {
|
|
switch id {
|
|
case "src":
|
|
return &types.KnowledgeBase{
|
|
ID: "src", TenantID: 1, EmbeddingModelID: "embed-A",
|
|
VectorStoreID: storeIDPtr("store-A"),
|
|
}, nil
|
|
case "dst":
|
|
return &types.KnowledgeBase{
|
|
ID: "dst", TenantID: 1, EmbeddingModelID: "embed-A",
|
|
VectorStoreID: storeIDPtr("store-B"),
|
|
}, nil
|
|
}
|
|
return nil, nil
|
|
},
|
|
}
|
|
r, _ := newCopyPreflightRouter(svc)
|
|
|
|
w := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/knowledge-bases/copy",
|
|
strings.NewReader(`{"source_id":"src","target_id":"dst"}`))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
r.ServeHTTP(w, req)
|
|
|
|
if w.Code != http.StatusBadRequest {
|
|
t.Fatalf("expected 400 for store mismatch, got %d body=%s", w.Code, w.Body.String())
|
|
}
|
|
if !strings.Contains(w.Body.String(), "different vector stores") {
|
|
t.Fatalf("expected store-mismatch error message, got %s", w.Body.String())
|
|
}
|
|
if strings.Contains(w.Body.String(), "Phase 4") {
|
|
t.Fatalf("error message must not leak internal roadmap labels: %s", w.Body.String())
|
|
}
|
|
}
|
|
|
|
func TestCopyHandlerPreflight_OneSideNilStore(t *testing.T) {
|
|
svc := &stubKBCopyService{
|
|
byID: func(_ context.Context, id string) (*types.KnowledgeBase, error) {
|
|
switch id {
|
|
case "src":
|
|
return &types.KnowledgeBase{
|
|
ID: "src", TenantID: 1, EmbeddingModelID: "embed-A",
|
|
VectorStoreID: nil,
|
|
}, nil
|
|
case "dst":
|
|
return &types.KnowledgeBase{
|
|
ID: "dst", TenantID: 1, EmbeddingModelID: "embed-A",
|
|
VectorStoreID: storeIDPtr("store-A"),
|
|
}, nil
|
|
}
|
|
return nil, nil
|
|
},
|
|
}
|
|
r, _ := newCopyPreflightRouter(svc)
|
|
|
|
w := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodPost, "/knowledge-bases/copy",
|
|
strings.NewReader(`{"source_id":"src","target_id":"dst"}`))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
r.ServeHTTP(w, req)
|
|
|
|
if w.Code != http.StatusBadRequest {
|
|
t.Fatalf("expected 400 when one side is env-store and the other is DB-store, got %d body=%s",
|
|
w.Code, w.Body.String())
|
|
}
|
|
}
|
|
|
|
// compile-time guard against accidentally dropping the apperrors import
|
|
// from the file — if the pre-flight refactor goes away, this fails too.
|
|
var _ = apperrors.NewBadRequestError
|