mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
Cloning a knowledge base previously copied only the storage path strings (knowledge.FilePath and chunk.ImageInfo.URL), so the source and the clone shared the same physical objects in the storage backend. Once the original file and extracted images are deleted on source removal, the clone is left with dangling references and its document and images become unreadable — data loss that occurs even for same-store clones. Add a CopyFile primitive to the FileService interface and implement it in every backend: server-side CopyObject on the object stores (s3/obs/cos/oss/tos/ks3/minio), io.Copy on local, and a no-op on dummy. Destinations use the knowledge-owned layout and reuse the existing path/object-key guards; a sentinel ErrCrossBackendCopy is returned when the source scheme does not match the backend. Use CopyFile to deep-copy the document file in cloneKnowledge and the extracted images in CloneChunk and cloneFAQKnowledgeBase via a shared cloneChunkImageInfo helper that deduplicates identical image URLs per clone and rewrites them to the new objects. Copied objects are cleaned up best-effort if a clone fails partway through. A clone-time preflight rejects cloning into a target bound to a different storage backend when the tenant pins providers via StorageEngineConfig. Adds unit tests for local CopyFile (independent copy survives source deletion, traversal rejection, cross-backend rejection), cloneChunkImageInfo (empty/multi/dedup/parse-failure/OriginalURL handling), and the storage provider preflight.
236 lines
6.0 KiB
Go
236 lines
6.0 KiB
Go
package service
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"mime/multipart"
|
|
"net/http/httptest"
|
|
"testing"
|
|
|
|
"github.com/Tencent/WeKnora/internal/types"
|
|
"github.com/Tencent/WeKnora/internal/types/interfaces"
|
|
"github.com/hibiken/asynq"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
type createKnowledgeFileRepoStub struct {
|
|
interfaces.KnowledgeRepository
|
|
|
|
createCalls int
|
|
createErr error
|
|
createdKnowledge *types.Knowledge
|
|
}
|
|
|
|
func (r *createKnowledgeFileRepoStub) CheckKnowledgeExists(
|
|
ctx context.Context,
|
|
tenantID uint64,
|
|
kbID string,
|
|
params *types.KnowledgeCheckParams,
|
|
) (bool, *types.Knowledge, error) {
|
|
return false, nil, nil
|
|
}
|
|
|
|
func (r *createKnowledgeFileRepoStub) CreateKnowledge(ctx context.Context, knowledge *types.Knowledge) error {
|
|
r.createCalls++
|
|
copied := *knowledge
|
|
r.createdKnowledge = &copied
|
|
return r.createErr
|
|
}
|
|
|
|
type createKnowledgeFileKBServiceStub struct {
|
|
interfaces.KnowledgeBaseService
|
|
|
|
kb *types.KnowledgeBase
|
|
}
|
|
|
|
func (s *createKnowledgeFileKBServiceStub) GetKnowledgeBaseByID(
|
|
ctx context.Context,
|
|
id string,
|
|
) (*types.KnowledgeBase, error) {
|
|
return s.kb, nil
|
|
}
|
|
|
|
type createKnowledgeFileServiceStub struct {
|
|
saveErr error
|
|
saveCalls int
|
|
savedWithKnowledgeID string
|
|
deleteCalls int
|
|
deletedPath string
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) CheckConnectivity(ctx context.Context) error {
|
|
return nil
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) SaveFile(
|
|
ctx context.Context,
|
|
file *multipart.FileHeader,
|
|
tenantID uint64,
|
|
knowledgeID string,
|
|
) (string, error) {
|
|
s.saveCalls++
|
|
s.savedWithKnowledgeID = knowledgeID
|
|
if s.saveErr != nil {
|
|
return "", s.saveErr
|
|
}
|
|
return "stored/" + knowledgeID, nil
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) SaveBytes(
|
|
ctx context.Context,
|
|
data []byte,
|
|
tenantID uint64,
|
|
fileName string,
|
|
temp bool,
|
|
) (string, error) {
|
|
return "", errors.New("not implemented")
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) GetFile(ctx context.Context, filePath string) (io.ReadCloser, error) {
|
|
return nil, errors.New("not implemented")
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) GetFileURL(ctx context.Context, filePath string) (string, error) {
|
|
return "", errors.New("not implemented")
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) DeleteFile(ctx context.Context, filePath string) error {
|
|
s.deleteCalls++
|
|
s.deletedPath = filePath
|
|
return nil
|
|
}
|
|
|
|
func (s *createKnowledgeFileServiceStub) CopyFile(ctx context.Context, srcPath string, tenantID uint64, knowledgeID string) (string, error) {
|
|
return "", errors.New("not implemented")
|
|
}
|
|
|
|
type createKnowledgeTaskEnqueuerStub struct {
|
|
calls int
|
|
}
|
|
|
|
func (s *createKnowledgeTaskEnqueuerStub) Enqueue(
|
|
task *asynq.Task,
|
|
opts ...asynq.Option,
|
|
) (*asynq.TaskInfo, error) {
|
|
s.calls++
|
|
return &asynq.TaskInfo{ID: "task-1", Queue: "default"}, nil
|
|
}
|
|
|
|
func TestCreateKnowledgeFromFileDoesNotPersistWhenStorageSaveFails(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
repo := &createKnowledgeFileRepoStub{}
|
|
fileSvc := &createKnowledgeFileServiceStub{saveErr: errors.New("storage unavailable")}
|
|
svc := &knowledgeService{
|
|
repo: repo,
|
|
kbService: &createKnowledgeFileKBServiceStub{kb: &types.KnowledgeBase{ID: "kb-1"}},
|
|
fileSvc: fileSvc,
|
|
}
|
|
|
|
knowledge, err := svc.CreateKnowledgeFromFile(
|
|
newCreateKnowledgeFileContext(),
|
|
"kb-1",
|
|
newMultipartFileHeader(t, "doc.txt", "hello"),
|
|
nil,
|
|
nil,
|
|
"",
|
|
"",
|
|
"",
|
|
)
|
|
|
|
require.Error(t, err)
|
|
require.Nil(t, knowledge)
|
|
require.Equal(t, 1, fileSvc.saveCalls)
|
|
require.Zero(t, repo.createCalls)
|
|
}
|
|
|
|
func TestCreateKnowledgeFromFilePersistsStoredFilePathOnCreate(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
repo := &createKnowledgeFileRepoStub{}
|
|
fileSvc := &createKnowledgeFileServiceStub{}
|
|
task := &createKnowledgeTaskEnqueuerStub{}
|
|
svc := &knowledgeService{
|
|
repo: repo,
|
|
kbService: &createKnowledgeFileKBServiceStub{kb: &types.KnowledgeBase{ID: "kb-1"}},
|
|
fileSvc: fileSvc,
|
|
task: task,
|
|
}
|
|
|
|
knowledge, err := svc.CreateKnowledgeFromFile(
|
|
newCreateKnowledgeFileContext(),
|
|
"kb-1",
|
|
newMultipartFileHeader(t, "doc.txt", "hello"),
|
|
nil,
|
|
nil,
|
|
"",
|
|
"",
|
|
"",
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
require.NotNil(t, knowledge)
|
|
require.Equal(t, 1, fileSvc.saveCalls)
|
|
require.NotEmpty(t, fileSvc.savedWithKnowledgeID)
|
|
require.Equal(t, fileSvc.savedWithKnowledgeID, knowledge.ID)
|
|
require.Equal(t, 1, repo.createCalls)
|
|
require.NotNil(t, repo.createdKnowledge)
|
|
require.Equal(t, "stored/"+knowledge.ID, repo.createdKnowledge.FilePath)
|
|
require.Equal(t, 1, task.calls)
|
|
}
|
|
|
|
func TestCreateKnowledgeFromFileDeletesStoredFileWhenCreateFails(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
repo := &createKnowledgeFileRepoStub{createErr: errors.New("database unavailable")}
|
|
fileSvc := &createKnowledgeFileServiceStub{}
|
|
svc := &knowledgeService{
|
|
repo: repo,
|
|
kbService: &createKnowledgeFileKBServiceStub{kb: &types.KnowledgeBase{ID: "kb-1"}},
|
|
fileSvc: fileSvc,
|
|
}
|
|
|
|
knowledge, err := svc.CreateKnowledgeFromFile(
|
|
newCreateKnowledgeFileContext(),
|
|
"kb-1",
|
|
newMultipartFileHeader(t, "doc.txt", "hello"),
|
|
nil,
|
|
nil,
|
|
"",
|
|
"",
|
|
"",
|
|
)
|
|
|
|
require.EqualError(t, err, "database unavailable")
|
|
require.Nil(t, knowledge)
|
|
require.Equal(t, 1, fileSvc.saveCalls)
|
|
require.Equal(t, 1, repo.createCalls)
|
|
require.Equal(t, 1, fileSvc.deleteCalls)
|
|
require.Equal(t, "stored/"+fileSvc.savedWithKnowledgeID, fileSvc.deletedPath)
|
|
}
|
|
|
|
func newCreateKnowledgeFileContext() context.Context {
|
|
ctx := context.WithValue(context.Background(), types.TenantIDContextKey, uint64(1))
|
|
ctx = context.WithValue(ctx, types.TenantInfoContextKey, &types.Tenant{})
|
|
return ctx
|
|
}
|
|
|
|
func newMultipartFileHeader(t *testing.T, filename string, content string) *multipart.FileHeader {
|
|
t.Helper()
|
|
|
|
var body bytes.Buffer
|
|
writer := multipart.NewWriter(&body)
|
|
part, err := writer.CreateFormFile("file", filename)
|
|
require.NoError(t, err)
|
|
_, err = part.Write([]byte(content))
|
|
require.NoError(t, err)
|
|
require.NoError(t, writer.Close())
|
|
|
|
req := httptest.NewRequest("POST", "/", &body)
|
|
req.Header.Set("Content-Type", writer.FormDataContentType())
|
|
require.NoError(t, req.ParseMultipartForm(1024))
|
|
return req.MultipartForm.File["file"][0]
|
|
}
|