mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
Cloning a knowledge base previously copied only the storage path strings (knowledge.FilePath and chunk.ImageInfo.URL), so the source and the clone shared the same physical objects in the storage backend. Once the original file and extracted images are deleted on source removal, the clone is left with dangling references and its document and images become unreadable — data loss that occurs even for same-store clones. Add a CopyFile primitive to the FileService interface and implement it in every backend: server-side CopyObject on the object stores (s3/obs/cos/oss/tos/ks3/minio), io.Copy on local, and a no-op on dummy. Destinations use the knowledge-owned layout and reuse the existing path/object-key guards; a sentinel ErrCrossBackendCopy is returned when the source scheme does not match the backend. Use CopyFile to deep-copy the document file in cloneKnowledge and the extracted images in CloneChunk and cloneFAQKnowledgeBase via a shared cloneChunkImageInfo helper that deduplicates identical image URLs per clone and rewrites them to the new objects. Copied objects are cleaned up best-effort if a clone fails partway through. A clone-time preflight rejects cloning into a target bound to a different storage backend when the tenant pins providers via StorageEngineConfig. Adds unit tests for local CopyFile (independent copy survives source deletion, traversal rejection, cross-backend rejection), cloneChunkImageInfo (empty/multi/dedup/parse-failure/OriginalURL handling), and the storage provider preflight.
62 lines
2.1 KiB
Go
62 lines
2.1 KiB
Go
package file
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"mime/multipart"
|
|
|
|
"github.com/Tencent/WeKnora/internal/logger"
|
|
"github.com/Tencent/WeKnora/internal/types/interfaces"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// DummyFileService is a no-op implementation of the FileService interface
|
|
// used for testing or when file storage is not required
|
|
type DummyFileService struct{}
|
|
|
|
// CheckConnectivity always succeeds for the dummy service.
|
|
func (s *DummyFileService) CheckConnectivity(ctx context.Context) error {
|
|
return nil
|
|
}
|
|
|
|
// NewDummyFileService creates a new instance of DummyFileService
|
|
func NewDummyFileService() interfaces.FileService {
|
|
return &DummyFileService{}
|
|
}
|
|
|
|
// SaveFile pretends to save a file but just returns a random UUID
|
|
// This is useful for testing without actual file operations
|
|
func (s *DummyFileService) SaveFile(ctx context.Context,
|
|
file *multipart.FileHeader, tenantID uint64, knowledgeID string,
|
|
) (string, error) {
|
|
return uuid.New().String(), nil
|
|
}
|
|
|
|
// GetFile always returns an error as dummy service doesn't store files
|
|
func (s *DummyFileService) GetFile(ctx context.Context, filePath string) (io.ReadCloser, error) {
|
|
return nil, errors.New("not implemented")
|
|
}
|
|
|
|
// DeleteFile is a no-op operation that always succeeds
|
|
func (s *DummyFileService) DeleteFile(ctx context.Context, filePath string) error {
|
|
return nil
|
|
}
|
|
|
|
// SaveBytes pretends to save bytes but just returns a random UUID
|
|
func (s *DummyFileService) SaveBytes(ctx context.Context, data []byte, tenantID uint64, fileName string, temp bool) (string, error) {
|
|
return uuid.New().String(), nil
|
|
}
|
|
|
|
// CopyFile is a no-op for the dummy service: it logs a warning and returns the
|
|
// source path unchanged (the shared reference is intentional in this stub).
|
|
func (s *DummyFileService) CopyFile(ctx context.Context, srcPath string, tenantID uint64, knowledgeID string) (string, error) {
|
|
logger.Warnf(ctx, "[dummy] CopyFile no-op: returning source path %q unchanged (no real copy performed)", srcPath)
|
|
return srcPath, nil
|
|
}
|
|
|
|
// GetFileURL returns the file path as URL (dummy implementation)
|
|
func (s *DummyFileService) GetFileURL(ctx context.Context, filePath string) (string, error) {
|
|
return filePath, nil
|
|
}
|