mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
feat(datasource): add Yuque (语雀) connector
Syncs personal and team Yuque knowledge bases into WeKnora as Markdown, with full and incremental sync + deletion detection. Mirrors the Feishu connector pattern; fetches V2Doc.body directly (no async export step). Config: api_token (required) + base_url (optional, defaults to public cloud). 401/403 wraps ErrInvalidCredentials; 429 honors Retry-After; filters to status=1 Doc-type at list stage to conserve API quota. Body is trusted as Markdown for format=markdown|lake — empirically verified against the v2 API: Lake XML lives separately in body_lake, body itself is the Markdown view. Other formats (e.g. html) are skipped with a placeholder item so they surface in sync logs instead of silently feeding HTML to the parser. v2Doc.status accepts both int and string shapes — Yuque's OpenAPI spec declares string but the live API returns int. sanitizeFileName truncates at a UTF-8 rune boundary so multi-byte titles don't emit invalid UTF-8 that downstream filename validation rejects. Also adds logger.SetOutput test hook, used by the token-redaction test.
This commit is contained in:
@@ -52,6 +52,7 @@ import (
|
||||
"github.com/Tencent/WeKnora/internal/datasource"
|
||||
feishuConnector "github.com/Tencent/WeKnora/internal/datasource/connector/feishu"
|
||||
notionConnector "github.com/Tencent/WeKnora/internal/datasource/connector/notion"
|
||||
yuqueConnector "github.com/Tencent/WeKnora/internal/datasource/connector/yuque"
|
||||
"github.com/Tencent/WeKnora/internal/event"
|
||||
"github.com/Tencent/WeKnora/internal/handler"
|
||||
"github.com/Tencent/WeKnora/internal/handler/session"
|
||||
@@ -177,7 +178,6 @@ func BuildContainer(container *dig.Container) *dig.Container {
|
||||
must(container.Provide(service.NewImageMultimodalService, dig.Name("imageMultimodal")))
|
||||
must(container.Provide(service.NewKnowledgePostProcessService, dig.Name("knowledgePostProcess")))
|
||||
|
||||
|
||||
must(container.Provide(service.NewMessageService))
|
||||
must(container.Provide(service.NewMCPServiceService))
|
||||
must(container.Provide(service.NewCustomAgentService))
|
||||
@@ -1472,9 +1472,11 @@ func initConnectorRegistry() *datasource.ConnectorRegistry {
|
||||
// Register Notion connector
|
||||
_ = registry.Register(notionConnector.NewConnector())
|
||||
|
||||
// Register Yuque connector
|
||||
_ = registry.Register(yuqueConnector.NewConnector())
|
||||
|
||||
// Future connectors will be registered here:
|
||||
// _ = registry.Register(confluenceConnector.NewConnector())
|
||||
// _ = registry.Register(yuqueConnector.NewConnector())
|
||||
// _ = registry.Register(githubConnector.NewConnector())
|
||||
|
||||
return registry
|
||||
|
||||
296
internal/datasource/connector/yuque/client.go
Normal file
296
internal/datasource/connector/yuque/client.go
Normal file
@@ -0,0 +1,296 @@
|
||||
package yuque
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Tencent/WeKnora/internal/datasource"
|
||||
"github.com/Tencent/WeKnora/internal/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultTimeout = 30 * time.Second
|
||||
defaultPageSize = 100
|
||||
userAgent = "WeKnora-Yuque-Connector/1.0"
|
||||
)
|
||||
|
||||
// client wraps the Yuque Open API.
|
||||
type client struct {
|
||||
baseURL string
|
||||
token string
|
||||
httpClient *http.Client
|
||||
|
||||
// logTokenOnce ensures the redacted token identity is logged at most once
|
||||
// per client lifetime (first real request), rather than on every call.
|
||||
logTokenOnce sync.Once
|
||||
}
|
||||
|
||||
// newClient constructs a client with a normalized base URL.
|
||||
func newClient(cfg *Config) *client {
|
||||
return &client{
|
||||
baseURL: cfg.GetBaseURL(),
|
||||
token: cfg.APIToken,
|
||||
httpClient: &http.Client{Timeout: defaultTimeout},
|
||||
}
|
||||
}
|
||||
|
||||
// doRequest executes an authenticated request and decodes JSON, with retry logic
|
||||
// for transient errors (429, 5xx, transport failures).
|
||||
// The raw X-Auth-Token is never logged; only a redacted form is emitted once per
|
||||
// client lifetime (not per request) to keep sync logs readable at thousand-doc scale.
|
||||
func (c *client) doRequest(ctx context.Context, method, path string, result interface{}) error {
|
||||
const (
|
||||
maxRetries = 3
|
||||
max5xxRetries = 1
|
||||
retry5xxDelay = 2 * time.Second
|
||||
)
|
||||
var lastErr error
|
||||
backoff := []time.Duration{2 * time.Second, 4 * time.Second, 8 * time.Second}
|
||||
|
||||
c.logTokenOnce.Do(func() {
|
||||
logger.Infof(ctx, "[Yuque] client configured token=%s base=%s", redactToken(c.token), c.baseURL)
|
||||
})
|
||||
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
reqURL := c.baseURL + path
|
||||
req, err := http.NewRequestWithContext(ctx, method, reqURL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create request: %w", err)
|
||||
}
|
||||
req.Header.Set("X-Auth-Token", c.token)
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
req.Header.Set("Content-Type", "application/json; charset=utf-8")
|
||||
|
||||
if attempt == 0 {
|
||||
logger.Infof(ctx, "[Yuque] %s %s", method, path)
|
||||
} else {
|
||||
logger.Infof(ctx, "[Yuque] %s %s (retry %d/%d)", method, path, attempt, maxRetries)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("execute request: %w", err)
|
||||
if attempt < maxRetries {
|
||||
if sErr := sleepCtx(ctx, backoff[attempt]); sErr != nil {
|
||||
return sErr
|
||||
}
|
||||
continue
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
body, readErr := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
if readErr != nil {
|
||||
lastErr = fmt.Errorf("read response body: %w", readErr)
|
||||
if attempt < maxRetries {
|
||||
if sErr := sleepCtx(ctx, backoff[attempt]); sErr != nil {
|
||||
return sErr
|
||||
}
|
||||
continue
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
bodyPreview := truncate(string(body), 500)
|
||||
logger.Infof(ctx, "[Yuque] %s %s → status=%d bodyLen=%d body=%s",
|
||||
method, path, resp.StatusCode, len(body), bodyPreview)
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
wait := parseRetryAfter(resp.Header.Get("Retry-After"), backoff[min(attempt, len(backoff)-1)])
|
||||
lastErr = fmt.Errorf("yuque rate limited: status=429 body=%s", bodyPreview)
|
||||
if attempt < maxRetries {
|
||||
if sErr := sleepCtx(ctx, wait); sErr != nil {
|
||||
return sErr
|
||||
}
|
||||
continue
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 500 && resp.StatusCode < 600 {
|
||||
lastErr = fmt.Errorf("yuque server error: status=%d body=%s", resp.StatusCode, bodyPreview)
|
||||
if attempt < max5xxRetries {
|
||||
if sErr := sleepCtx(ctx, retry5xxDelay); sErr != nil {
|
||||
return sErr
|
||||
}
|
||||
continue
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// 401/403 → surface as ErrInvalidCredentials so DataSourceService can
|
||||
// distinguish bad-token from transient failures and auto-flag the source.
|
||||
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
|
||||
return fmt.Errorf("%w: status=%d body=%s", datasource.ErrInvalidCredentials, resp.StatusCode, bodyPreview)
|
||||
}
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
var apiErr apiErrorBody
|
||||
_ = json.Unmarshal(body, &apiErr)
|
||||
if apiErr.Message != "" {
|
||||
return fmt.Errorf("yuque api error: status=%d msg=%s", resp.StatusCode, apiErr.Message)
|
||||
}
|
||||
return fmt.Errorf("yuque api error: status=%d body=%s", resp.StatusCode, bodyPreview)
|
||||
}
|
||||
|
||||
if result != nil {
|
||||
if err := json.Unmarshal(body, result); err != nil {
|
||||
return fmt.Errorf("decode response: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// parseRetryAfter returns the Retry-After duration from the header, or fallback if unparseable.
|
||||
// Retry-After: "0" (or negative) is coerced to 100ms so we still yield and don't busy-retry.
|
||||
// Note: only integer-seconds form is supported (RFC 7231 also allows HTTP-date — not seen from Yuque).
|
||||
func parseRetryAfter(header string, fallback time.Duration) time.Duration {
|
||||
if header == "" {
|
||||
return fallback
|
||||
}
|
||||
if secs, err := time.ParseDuration(header + "s"); err == nil {
|
||||
if secs <= 0 {
|
||||
return 100 * time.Millisecond
|
||||
}
|
||||
return secs
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
// sleepCtx pauses for d, returning early if ctx is cancelled.
|
||||
func sleepCtx(ctx context.Context, d time.Duration) error {
|
||||
t := time.NewTimer(d)
|
||||
defer t.Stop()
|
||||
select {
|
||||
case <-t.C:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// Ping verifies the credentials by calling GET /api/v2/user.
|
||||
func (c *client) Ping(ctx context.Context) error {
|
||||
var resp v2UserResponse
|
||||
return c.doRequest(ctx, http.MethodGet, "/api/v2/user", &resp)
|
||||
}
|
||||
|
||||
// truncate returns s truncated to maxLen with "..." appended if longer.
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
// buildQuery encodes query parameters, omitting empty values.
|
||||
func buildQuery(params map[string]string) string {
|
||||
values := url.Values{}
|
||||
for k, v := range params {
|
||||
if v != "" {
|
||||
values.Set(k, v)
|
||||
}
|
||||
}
|
||||
if len(values) == 0 {
|
||||
return ""
|
||||
}
|
||||
return "?" + values.Encode()
|
||||
}
|
||||
|
||||
// GetCurrentUser returns the user associated with the current token.
|
||||
func (c *client) GetCurrentUser(ctx context.Context) (v2User, error) {
|
||||
var resp v2UserResponse
|
||||
if err := c.doRequest(ctx, http.MethodGet, "/api/v2/user", &resp); err != nil {
|
||||
return v2User{}, err
|
||||
}
|
||||
return resp.Data, nil
|
||||
}
|
||||
|
||||
// ListUserGroups returns the groups the given user belongs to.
|
||||
// Note: userID is Yuque's numeric user ID (not the login) — the /users/{id}/groups
|
||||
// endpoint requires the integer ID form.
|
||||
func (c *client) ListUserGroups(ctx context.Context, userID int64) ([]v2Group, error) {
|
||||
path := fmt.Sprintf("/api/v2/users/%d/groups", userID)
|
||||
var resp v2GroupListResponse
|
||||
if err := c.doRequest(ctx, http.MethodGet, path, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resp.Data, nil
|
||||
}
|
||||
|
||||
// ListUserRepos returns document-type (Book) repos owned by the given user login.
|
||||
func (c *client) ListUserRepos(ctx context.Context, login string) ([]v2Repo, error) {
|
||||
return c.listReposPaginated(ctx, fmt.Sprintf("/api/v2/users/%s/repos", login))
|
||||
}
|
||||
|
||||
// ListGroupRepos returns document-type (Book) repos owned by the given group login.
|
||||
func (c *client) ListGroupRepos(ctx context.Context, login string) ([]v2Repo, error) {
|
||||
return c.listReposPaginated(ctx, fmt.Sprintf("/api/v2/groups/%s/repos", login))
|
||||
}
|
||||
|
||||
// listReposPaginated walks the pagination for a repo listing endpoint.
|
||||
// Filters to type=Book (document-type knowledge bases only; design/sheet/resource skipped).
|
||||
func (c *client) listReposPaginated(ctx context.Context, basePath string) ([]v2Repo, error) {
|
||||
var all []v2Repo
|
||||
offset := 0
|
||||
for {
|
||||
q := buildQuery(map[string]string{
|
||||
"type": "Book",
|
||||
"offset": fmt.Sprintf("%d", offset),
|
||||
"limit": fmt.Sprintf("%d", defaultPageSize),
|
||||
})
|
||||
var resp v2RepoListResponse
|
||||
if err := c.doRequest(ctx, http.MethodGet, basePath+q, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
all = append(all, resp.Data...)
|
||||
if len(resp.Data) < defaultPageSize {
|
||||
break
|
||||
}
|
||||
offset += defaultPageSize
|
||||
}
|
||||
return all, nil
|
||||
}
|
||||
|
||||
// ListBookDocs lists all documents in a book, handling pagination.
|
||||
// Returns summaries only (body not included — use GetDocDetail to fetch body per doc).
|
||||
func (c *client) ListBookDocs(ctx context.Context, bookID int64) ([]v2Doc, error) {
|
||||
basePath := fmt.Sprintf("/api/v2/repos/%d/docs", bookID)
|
||||
var all []v2Doc
|
||||
offset := 0
|
||||
for {
|
||||
q := buildQuery(map[string]string{
|
||||
"offset": fmt.Sprintf("%d", offset),
|
||||
"limit": fmt.Sprintf("%d", defaultPageSize),
|
||||
})
|
||||
var resp v2DocListResponse
|
||||
if err := c.doRequest(ctx, http.MethodGet, basePath+q, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
all = append(all, resp.Data...)
|
||||
if len(resp.Data) < defaultPageSize {
|
||||
break
|
||||
}
|
||||
offset += defaultPageSize
|
||||
}
|
||||
return all, nil
|
||||
}
|
||||
|
||||
// GetDocDetail fetches the full document detail (including body) by doc ID.
|
||||
func (c *client) GetDocDetail(ctx context.Context, docID int64) (v2DocDetail, error) {
|
||||
path := fmt.Sprintf("/api/v2/repos/docs/%d", docID)
|
||||
var resp v2DocDetailResponse
|
||||
if err := c.doRequest(ctx, http.MethodGet, path, &resp); err != nil {
|
||||
return v2DocDetail{}, err
|
||||
}
|
||||
return resp.Data, nil
|
||||
}
|
||||
418
internal/datasource/connector/yuque/client_test.go
Normal file
418
internal/datasource/connector/yuque/client_test.go
Normal file
@@ -0,0 +1,418 @@
|
||||
package yuque
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/Tencent/WeKnora/internal/datasource"
|
||||
"github.com/Tencent/WeKnora/internal/logger"
|
||||
)
|
||||
|
||||
// fakeYuque sets up an httptest server emulating Yuque API endpoints.
|
||||
// Handlers can be overridden per test.
|
||||
type fakeYuque struct {
|
||||
server *httptest.Server
|
||||
mux *http.ServeMux
|
||||
calls []string // methodPath history
|
||||
}
|
||||
|
||||
func newFakeYuque() *fakeYuque {
|
||||
f := &fakeYuque{mux: http.NewServeMux()}
|
||||
f.server = httptest.NewServer(f.mux)
|
||||
// Default /api/v2/user handler (most tests hit it at least once).
|
||||
f.handleJSON("/api/v2/user", 200, v2UserResponse{Data: v2User{ID: 1, Login: "me", Name: "Me"}})
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *fakeYuque) Close() { f.server.Close() }
|
||||
|
||||
func (f *fakeYuque) cfg() *Config {
|
||||
return &Config{APIToken: "tok-super-secret-value-1234", BaseURL: f.server.URL}
|
||||
}
|
||||
|
||||
func (f *fakeYuque) handleJSON(path string, status int, body interface{}) {
|
||||
f.mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) {
|
||||
f.calls = append(f.calls, r.Method+" "+r.URL.Path+"?"+r.URL.RawQuery)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(body)
|
||||
})
|
||||
}
|
||||
|
||||
func TestClient_Ping_Success(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
|
||||
c := newClient(f.cfg())
|
||||
if err := c.Ping(context.Background()); err != nil {
|
||||
t.Fatalf("Ping error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_Ping_SendsAuthHeader(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
|
||||
var gotToken string
|
||||
f.mux = http.NewServeMux()
|
||||
f.mux.HandleFunc("/api/v2/user", func(w http.ResponseWriter, r *http.Request) {
|
||||
gotToken = r.Header.Get("X-Auth-Token")
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(v2UserResponse{Data: v2User{ID: 1, Login: "me"}})
|
||||
})
|
||||
f.server.Config.Handler = f.mux
|
||||
|
||||
c := newClient(f.cfg())
|
||||
if err := c.Ping(context.Background()); err != nil {
|
||||
t.Fatalf("Ping error: %v", err)
|
||||
}
|
||||
if gotToken != "tok-super-secret-value-1234" {
|
||||
t.Errorf("X-Auth-Token = %q, want the token", gotToken)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_Ping_401(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(401)
|
||||
_, _ = io.WriteString(w, `{"message":"Unauthorized"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := newClient(&Config{APIToken: "bad", BaseURL: srv.URL})
|
||||
err := c.Ping(context.Background())
|
||||
if err == nil {
|
||||
t.Fatal("expected error on 401")
|
||||
}
|
||||
if !errors.Is(err, datasource.ErrInvalidCredentials) {
|
||||
t.Errorf("401 should wrap ErrInvalidCredentials, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_Ping_403WrapsInvalidCredentials(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(403)
|
||||
_, _ = io.WriteString(w, `{"message":"Forbidden"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := newClient(&Config{APIToken: "insufficient", BaseURL: srv.URL})
|
||||
err := c.Ping(context.Background())
|
||||
if err == nil {
|
||||
t.Fatal("expected error on 403")
|
||||
}
|
||||
if !errors.Is(err, datasource.ErrInvalidCredentials) {
|
||||
t.Errorf("403 should wrap ErrInvalidCredentials, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_TokenNeverLoggedInFull(t *testing.T) {
|
||||
// Redirect the project's internal logger to an in-memory buffer so we can
|
||||
// assert the raw token never appears in log output. Using stdlib `log`
|
||||
// would be vacuous — the real logger is a private logrus instance.
|
||||
var buf bytes.Buffer
|
||||
logger.SetOutput(&buf)
|
||||
defer logger.SetOutput(os.Stdout)
|
||||
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
|
||||
rawToken := "tok-super-secret-value-1234"
|
||||
cfg := &Config{APIToken: rawToken, BaseURL: f.server.URL}
|
||||
c := newClient(cfg)
|
||||
_ = c.Ping(context.Background())
|
||||
|
||||
out := buf.String()
|
||||
if out == "" {
|
||||
t.Fatal("expected logger output (sanity check — SetOutput wiring may be broken)")
|
||||
}
|
||||
if strings.Contains(out, rawToken) {
|
||||
t.Errorf("raw token leaked into logs:\n%s", out)
|
||||
}
|
||||
// Verify the redacted form was emitted (positive assertion, not just absence).
|
||||
if !strings.Contains(out, redactToken(rawToken)) {
|
||||
t.Errorf("expected redacted token %q in logs, got:\n%s", redactToken(rawToken), out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRedactToken(t *testing.T) {
|
||||
tests := []struct{ in, want string }{
|
||||
{"short", "***"},
|
||||
{"abcdef1234567890", "abcdef...7890"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if got := redactToken(tt.in); got != tt.want {
|
||||
t.Errorf("redactToken(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_429WithRetryAfter_Retries(t *testing.T) {
|
||||
attempt := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/v2/user", func(w http.ResponseWriter, r *http.Request) {
|
||||
attempt++
|
||||
if attempt == 1 {
|
||||
// "0" is coerced to 100ms inside the client so the test stays fast.
|
||||
w.Header().Set("Retry-After", "0")
|
||||
w.WriteHeader(429)
|
||||
_, _ = io.WriteString(w, `{"message":"rate limited"}`)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(v2UserResponse{Data: v2User{ID: 1, Login: "me"}})
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
c := newClient(&Config{APIToken: "t", BaseURL: srv.URL})
|
||||
if err := c.Ping(context.Background()); err != nil {
|
||||
t.Fatalf("expected success after retry, got %v", err)
|
||||
}
|
||||
if attempt < 2 {
|
||||
t.Errorf("expected at least 2 attempts, got %d", attempt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_429ExhaustsRetries(t *testing.T) {
|
||||
attempts := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/v2/user", func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
w.Header().Set("Retry-After", "0")
|
||||
w.WriteHeader(429)
|
||||
_, _ = io.WriteString(w, `{"message":"rate limited"}`)
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
c := newClient(&Config{APIToken: "t", BaseURL: srv.URL})
|
||||
err := c.Ping(context.Background())
|
||||
if err == nil {
|
||||
t.Fatal("expected error when 429s exceed retry budget")
|
||||
}
|
||||
if attempts != 4 { // initial + 3 retries
|
||||
t.Errorf("attempts = %d, want 4 (1 + 3 retries)", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_5xxRetriesOnce(t *testing.T) {
|
||||
attempts := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/v2/user", func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
w.WriteHeader(500)
|
||||
_, _ = io.WriteString(w, `{"message":"internal error"}`)
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
// Make the fixed 2-second 5xx sleep negligible by using a context whose
|
||||
// deadline fires after just one retry cycle. We still expect exactly 2
|
||||
// attempts because the implementation should retry 5xx once and then stop.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
c := newClient(&Config{APIToken: "t", BaseURL: srv.URL})
|
||||
if err := c.Ping(ctx); err == nil {
|
||||
t.Fatal("expected error after 5xx exhaustion")
|
||||
}
|
||||
if attempts != 2 { // initial + 1 retry
|
||||
t.Errorf("attempts = %d, want 2 (5xx retries exactly once)", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_4xxNotRetried(t *testing.T) {
|
||||
attempts := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/v2/user", func(w http.ResponseWriter, r *http.Request) {
|
||||
attempts++
|
||||
w.WriteHeader(400)
|
||||
_, _ = io.WriteString(w, `{"message":"bad request"}`)
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
c := newClient(&Config{APIToken: "t", BaseURL: srv.URL})
|
||||
if err := c.Ping(context.Background()); err == nil {
|
||||
t.Fatal("expected error on 400")
|
||||
}
|
||||
if attempts != 1 {
|
||||
t.Errorf("attempts = %d, want 1 (non-429/5xx 4xx must not retry)", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRetryAfter(t *testing.T) {
|
||||
fallback := 5 * time.Second
|
||||
tests := []struct {
|
||||
header string
|
||||
want time.Duration
|
||||
}{
|
||||
{"", fallback},
|
||||
{"0", 100 * time.Millisecond},
|
||||
{"-1", 100 * time.Millisecond}, // negative coerced
|
||||
{"3", 3 * time.Second},
|
||||
{"abc", fallback}, // unparseable
|
||||
}
|
||||
for _, tt := range tests {
|
||||
if got := parseRetryAfter(tt.header, fallback); got != tt.want {
|
||||
t.Errorf("parseRetryAfter(%q) = %v, want %v", tt.header, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_GetCurrentUser(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
// Override the default /api/v2/user handler via a fresh mux.
|
||||
f.mux = http.NewServeMux()
|
||||
f.mux.HandleFunc("/api/v2/user", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(v2UserResponse{Data: v2User{ID: 42, Login: "alice", Name: "Alice"}})
|
||||
})
|
||||
f.server.Config.Handler = f.mux
|
||||
|
||||
c := newClient(f.cfg())
|
||||
u, err := c.GetCurrentUser(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("GetCurrentUser: %v", err)
|
||||
}
|
||||
if u.ID != 42 || u.Login != "alice" {
|
||||
t.Errorf("got %+v, want {42 alice}", u)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_ListUserGroups(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/users/42/groups", 200, v2GroupListResponse{Data: []v2Group{
|
||||
{ID: 100, Login: "team-a", Name: "Team A"},
|
||||
{ID: 101, Login: "team-b", Name: "Team B"},
|
||||
}})
|
||||
|
||||
c := newClient(f.cfg())
|
||||
gs, err := c.ListUserGroups(context.Background(), 42)
|
||||
if err != nil {
|
||||
t.Fatalf("ListUserGroups: %v", err)
|
||||
}
|
||||
if len(gs) != 2 || gs[0].Login != "team-a" {
|
||||
t.Errorf("got %+v", gs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_ListUserRepos_FiltersType(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
var gotQuery string
|
||||
f.mux.HandleFunc("/api/v2/users/alice/repos", func(w http.ResponseWriter, r *http.Request) {
|
||||
gotQuery = r.URL.RawQuery
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 9, Slug: "book1", Name: "Book 1", Type: "Book", Namespace: "alice/book1"},
|
||||
}})
|
||||
})
|
||||
|
||||
c := newClient(f.cfg())
|
||||
repos, err := c.ListUserRepos(context.Background(), "alice")
|
||||
if err != nil {
|
||||
t.Fatalf("ListUserRepos: %v", err)
|
||||
}
|
||||
if !strings.Contains(gotQuery, "type=Book") {
|
||||
t.Errorf("expected type=Book in query, got %q", gotQuery)
|
||||
}
|
||||
if !strings.Contains(gotQuery, "offset=0") {
|
||||
t.Errorf("expected offset=0 in query, got %q", gotQuery)
|
||||
}
|
||||
if !strings.Contains(gotQuery, "limit=100") {
|
||||
t.Errorf("expected limit=100 in query, got %q", gotQuery)
|
||||
}
|
||||
if len(repos) != 1 || repos[0].Namespace != "alice/book1" {
|
||||
t.Errorf("got %+v", repos)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_ListGroupRepos(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/groups/team-a/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 11, Slug: "tb", Name: "Team Book", Type: "Book", Namespace: "team-a/tb"},
|
||||
}})
|
||||
|
||||
c := newClient(f.cfg())
|
||||
repos, err := c.ListGroupRepos(context.Background(), "team-a")
|
||||
if err != nil {
|
||||
t.Fatalf("ListGroupRepos: %v", err)
|
||||
}
|
||||
if len(repos) != 1 || repos[0].ID != 11 {
|
||||
t.Errorf("got %+v", repos)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_ListBookDocs_Pagination(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
// Page 1: 100 docs, page 2: 30 docs, then stop.
|
||||
callCount := 0
|
||||
f.mux.HandleFunc("/api/v2/repos/555/docs", func(w http.ResponseWriter, r *http.Request) {
|
||||
callCount++
|
||||
offset := r.URL.Query().Get("offset")
|
||||
docs := make([]v2Doc, 0)
|
||||
switch offset {
|
||||
case "0":
|
||||
for i := 0; i < 100; i++ {
|
||||
docs = append(docs, v2Doc{ID: int64(i + 1), Type: "Doc", Status: "1", Title: fmt.Sprintf("D%d", i+1), ContentUpdatedAt: "2026-04-20T00:00:00Z"})
|
||||
}
|
||||
case "100":
|
||||
for i := 0; i < 30; i++ {
|
||||
docs = append(docs, v2Doc{ID: int64(i + 101), Type: "Doc", Status: "1", Title: fmt.Sprintf("D%d", i+101), ContentUpdatedAt: "2026-04-20T00:00:00Z"})
|
||||
}
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(v2DocListResponse{Data: docs})
|
||||
})
|
||||
|
||||
c := newClient(f.cfg())
|
||||
docs, err := c.ListBookDocs(context.Background(), 555)
|
||||
if err != nil {
|
||||
t.Fatalf("ListBookDocs: %v", err)
|
||||
}
|
||||
if len(docs) != 130 {
|
||||
t.Errorf("len(docs) = %d, want 130", len(docs))
|
||||
}
|
||||
if callCount != 2 {
|
||||
t.Errorf("callCount = %d, want 2 (one full page + one partial)", callCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClient_GetDocDetail(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/docs/7", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{
|
||||
ID: 7,
|
||||
Title: "Hello",
|
||||
Body: "# Hello\n\nworld",
|
||||
Format: "markdown",
|
||||
Status: "1",
|
||||
ContentUpdatedAt: "2026-04-20T12:00:00Z",
|
||||
},
|
||||
})
|
||||
|
||||
c := newClient(f.cfg())
|
||||
d, err := c.GetDocDetail(context.Background(), 7)
|
||||
if err != nil {
|
||||
t.Fatalf("GetDocDetail: %v", err)
|
||||
}
|
||||
if d.Body != "# Hello\n\nworld" {
|
||||
t.Errorf("Body = %q", d.Body)
|
||||
}
|
||||
}
|
||||
312
internal/datasource/connector/yuque/connector.go
Normal file
312
internal/datasource/connector/yuque/connector.go
Normal file
@@ -0,0 +1,312 @@
|
||||
package yuque
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/Tencent/WeKnora/internal/datasource"
|
||||
"github.com/Tencent/WeKnora/internal/logger"
|
||||
"github.com/Tencent/WeKnora/internal/types"
|
||||
)
|
||||
|
||||
// Compile-time proof that *Connector satisfies the datasource.Connector interface.
|
||||
// Catches signature drift as soon as the interface changes, rather than at
|
||||
// container wiring or runtime.
|
||||
var _ datasource.Connector = (*Connector)(nil)
|
||||
|
||||
// Connector implements datasource.Connector for Yuque.
|
||||
type Connector struct{}
|
||||
|
||||
// NewConnector creates a new Yuque connector.
|
||||
func NewConnector() *Connector { return &Connector{} }
|
||||
|
||||
// Type returns the connector type identifier.
|
||||
func (c *Connector) Type() string { return types.ConnectorTypeYuque }
|
||||
|
||||
// Validate verifies the given credentials by pinging the current-user endpoint.
|
||||
func (c *Connector) Validate(ctx context.Context, config *types.DataSourceConfig) error {
|
||||
cfg, err := parseYuqueConfig(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cli := newClient(cfg)
|
||||
if err := cli.Ping(ctx); err != nil {
|
||||
return fmt.Errorf("yuque connection failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ListResources returns all repos (personal + team) accessible to the token.
|
||||
// Serial fetch for v1 (user groups typically <10). TODO(perf): parallelize if slow.
|
||||
func (c *Connector) ListResources(ctx context.Context, config *types.DataSourceConfig) ([]types.Resource, error) {
|
||||
cfg, err := parseYuqueConfig(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cli := newClient(cfg)
|
||||
|
||||
me, err := cli.GetCurrentUser(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get current user: %w", err)
|
||||
}
|
||||
|
||||
// Personal repos come first and win over team representations of the same repo
|
||||
// (first-write-wins dedup) — owner-side metadata is more authoritative.
|
||||
repos := make(map[int64]v2Repo)
|
||||
|
||||
personal, err := cli.ListUserRepos(ctx, me.Login)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list personal repos: %w", err)
|
||||
}
|
||||
for _, r := range personal {
|
||||
if _, ok := repos[r.ID]; !ok {
|
||||
repos[r.ID] = r
|
||||
}
|
||||
}
|
||||
|
||||
groups, err := cli.ListUserGroups(ctx, me.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list user groups: %w", err)
|
||||
}
|
||||
for _, g := range groups {
|
||||
teamRepos, err := cli.ListGroupRepos(ctx, g.Login)
|
||||
if err != nil {
|
||||
// Skip this group but continue others (e.g., 403 on a restricted group).
|
||||
// Log so operators can distinguish partial failures from empty groups.
|
||||
logger.Warnf(ctx, "[Yuque] skip group %s: %v", g.Login, err)
|
||||
continue
|
||||
}
|
||||
for _, r := range teamRepos {
|
||||
if _, ok := repos[r.ID]; !ok {
|
||||
repos[r.ID] = r
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out := make([]types.Resource, 0, len(repos))
|
||||
for _, r := range repos {
|
||||
out = append(out, types.Resource{
|
||||
ExternalID: strconv.FormatInt(r.ID, 10),
|
||||
Name: r.Name,
|
||||
Type: "book",
|
||||
URL: cfg.GetBaseURL() + "/" + r.Namespace,
|
||||
Description: r.Namespace,
|
||||
ModifiedAt: parseContentUpdatedAt(r.UpdatedAt),
|
||||
Metadata: map[string]interface{}{
|
||||
"public": r.Public,
|
||||
"book_type": r.Type,
|
||||
},
|
||||
})
|
||||
}
|
||||
// Stable, deterministic order for UI rendering and response-body caching.
|
||||
sort.Slice(out, func(i, j int) bool { return out[i].ExternalID < out[j].ExternalID })
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// FetchAll performs a full sync of all books specified in resourceIDs.
|
||||
func (c *Connector) FetchAll(ctx context.Context, config *types.DataSourceConfig, resourceIDs []string) ([]types.FetchedItem, error) {
|
||||
items, _, err := c.walk(ctx, config, resourceIDs, nil, false)
|
||||
return items, err
|
||||
}
|
||||
|
||||
// walk is the shared implementation for FetchAll / FetchIncremental.
|
||||
// If incremental is false, prev is ignored and no cursor is returned (returns nil for cursor).
|
||||
func (c *Connector) walk(
|
||||
ctx context.Context,
|
||||
config *types.DataSourceConfig,
|
||||
resourceIDs []string,
|
||||
prev *yuqueCursor,
|
||||
incremental bool,
|
||||
) ([]types.FetchedItem, *yuqueCursor, error) {
|
||||
cfg, err := parseYuqueConfig(config)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
cli := newClient(cfg)
|
||||
|
||||
newCursor := &yuqueCursor{LastSyncTime: time.Now(), BookDocTimes: make(map[string]map[string]string)}
|
||||
var out []types.FetchedItem
|
||||
|
||||
for _, bookIDStr := range resourceIDs {
|
||||
bookID, err := strconv.ParseInt(bookIDStr, 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid book id %q: %w", bookIDStr, err)
|
||||
}
|
||||
|
||||
docs, err := cli.ListBookDocs(ctx, bookID)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("list docs for book %d: %w", bookID, err)
|
||||
}
|
||||
|
||||
currentDocs := make(map[string]bool)
|
||||
newCursor.BookDocTimes[bookIDStr] = make(map[string]string)
|
||||
|
||||
var skippedType, skippedDraft, kept int
|
||||
var sampleSkipType, sampleSkipDraft string
|
||||
for _, d := range docs {
|
||||
// Empty type/status is treated as acceptable — forward-compat with
|
||||
// API variations that omit the field.
|
||||
if d.Type != "" && d.Type != "Doc" {
|
||||
skippedType++
|
||||
if sampleSkipType == "" {
|
||||
sampleSkipType = fmt.Sprintf("id=%d type=%q title=%q", d.ID, d.Type, d.Title)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if d.Status != "" && d.Status != "1" {
|
||||
skippedDraft++
|
||||
if sampleSkipDraft == "" {
|
||||
sampleSkipDraft = fmt.Sprintf("id=%d status=%q title=%q", d.ID, d.Status, d.Title)
|
||||
}
|
||||
continue
|
||||
}
|
||||
kept++
|
||||
docIDStr := strconv.FormatInt(d.ID, 10)
|
||||
currentDocs[docIDStr] = true
|
||||
newCursor.BookDocTimes[bookIDStr][docIDStr] = d.ContentUpdatedAt
|
||||
|
||||
// Incremental: skip if content hasn't changed.
|
||||
if incremental && prev != nil && prev.BookDocTimes != nil {
|
||||
if prevTimes, ok := prev.BookDocTimes[bookIDStr]; ok {
|
||||
if prevTimes[docIDStr] == d.ContentUpdatedAt {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
detail, err := cli.GetDocDetail(ctx, d.ID)
|
||||
if err != nil {
|
||||
// Record failure but continue (placeholder item with error metadata).
|
||||
// Keep doc_id/book_id/slug for observability pipelines that join on these.
|
||||
out = append(out, types.FetchedItem{
|
||||
ExternalID: docIDStr,
|
||||
Title: d.Title,
|
||||
SourceResourceID: bookIDStr,
|
||||
Metadata: map[string]string{
|
||||
"error": err.Error(),
|
||||
"channel": types.ChannelYuque,
|
||||
"doc_id": docIDStr,
|
||||
"book_id": bookIDStr,
|
||||
"slug": d.Slug,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Yuque serializes `body` as Markdown when format is "markdown" or
|
||||
// "lake" (Lake XML lives separately in `body_lake`) — empirically
|
||||
// verified against the v2 API. Any other format (e.g. "html") may
|
||||
// put non-Markdown content in `body`, so skip defensively.
|
||||
if detail.Format != "" && detail.Format != "markdown" && detail.Format != "lake" {
|
||||
logger.Warnf(ctx, "[Yuque] skip doc %d (%q): unsupported format %q",
|
||||
d.ID, d.Title, detail.Format)
|
||||
out = append(out, types.FetchedItem{
|
||||
ExternalID: docIDStr,
|
||||
Title: d.Title,
|
||||
SourceResourceID: bookIDStr,
|
||||
Metadata: map[string]string{
|
||||
"channel": types.ChannelYuque,
|
||||
"doc_id": docIDStr,
|
||||
"book_id": bookIDStr,
|
||||
"slug": d.Slug,
|
||||
"skip_reason": "unsupported format: " + detail.Format,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
out = append(out, types.FetchedItem{
|
||||
ExternalID: docIDStr,
|
||||
Title: d.Title,
|
||||
Content: []byte(detail.Body),
|
||||
ContentType: "text/markdown",
|
||||
FileName: sanitizeFileName(d.Title) + ".md",
|
||||
URL: buildDocURL(cfg.GetBaseURL(), detail.Book.Namespace, d.Slug),
|
||||
UpdatedAt: parseContentUpdatedAt(d.ContentUpdatedAt),
|
||||
SourceResourceID: bookIDStr,
|
||||
Metadata: map[string]string{
|
||||
"doc_id": docIDStr,
|
||||
"book_id": bookIDStr,
|
||||
"slug": d.Slug,
|
||||
"creator": strconv.FormatInt(d.UserID, 10),
|
||||
"word_count": strconv.Itoa(d.WordCount),
|
||||
"channel": types.ChannelYuque,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
logger.Infof(ctx, "[Yuque] book %d: total=%d kept=%d skipped_non_doc=%d skipped_draft=%d non_doc_sample={%s} draft_sample={%s}",
|
||||
bookID, len(docs), kept, skippedType, skippedDraft, sampleSkipType, sampleSkipDraft)
|
||||
|
||||
// Deletion detection (incremental only): previous doc IDs not in current → IsDeleted=true
|
||||
if incremental && prev != nil && prev.BookDocTimes != nil {
|
||||
if prevTimes, ok := prev.BookDocTimes[bookIDStr]; ok {
|
||||
for prevDocID := range prevTimes {
|
||||
if !currentDocs[prevDocID] {
|
||||
out = append(out, types.FetchedItem{
|
||||
ExternalID: prevDocID,
|
||||
IsDeleted: true,
|
||||
SourceResourceID: bookIDStr,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !incremental {
|
||||
return out, nil, nil
|
||||
}
|
||||
return out, newCursor, nil
|
||||
}
|
||||
|
||||
// buildDocURL constructs a browser URL for a Yuque doc.
|
||||
// Namespace may be empty on some responses; fall back to base URL only.
|
||||
func buildDocURL(baseURL, namespace, slug string) string {
|
||||
if namespace == "" {
|
||||
return baseURL
|
||||
}
|
||||
return baseURL + "/" + namespace + "/" + slug
|
||||
}
|
||||
|
||||
// FetchIncremental returns items changed (or deleted) since the prior cursor.
|
||||
// Deletion detection: docs present in the prior cursor but absent from the
|
||||
// current list are emitted as IsDeleted=true placeholder items.
|
||||
func (c *Connector) FetchIncremental(
|
||||
ctx context.Context,
|
||||
config *types.DataSourceConfig,
|
||||
cursor *types.SyncCursor,
|
||||
) ([]types.FetchedItem, *types.SyncCursor, error) {
|
||||
resourceIDs := config.ResourceIDs
|
||||
if len(resourceIDs) == 0 {
|
||||
return nil, nil, fmt.Errorf("no resource IDs (book IDs) configured")
|
||||
}
|
||||
|
||||
// Decode prior cursor (if any).
|
||||
var prev *yuqueCursor
|
||||
if cursor != nil && cursor.ConnectorCursor != nil {
|
||||
var p yuqueCursor
|
||||
b, _ := json.Marshal(cursor.ConnectorCursor)
|
||||
_ = json.Unmarshal(b, &p)
|
||||
prev = &p
|
||||
}
|
||||
|
||||
items, newCursor, err := c.walk(ctx, config, resourceIDs, prev, true)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Marshal newCursor into a generic map for the SyncCursor wrapper.
|
||||
cursorMap := make(map[string]interface{})
|
||||
b, _ := json.Marshal(newCursor)
|
||||
_ = json.Unmarshal(b, &cursorMap)
|
||||
|
||||
return items, &types.SyncCursor{
|
||||
LastSyncTime: newCursor.LastSyncTime,
|
||||
ConnectorCursor: cursorMap,
|
||||
}, nil
|
||||
}
|
||||
475
internal/datasource/connector/yuque/connector_test.go
Normal file
475
internal/datasource/connector/yuque/connector_test.go
Normal file
@@ -0,0 +1,475 @@
|
||||
package yuque
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/Tencent/WeKnora/internal/types"
|
||||
)
|
||||
|
||||
func makeDSConfig(f *fakeYuque, resourceIDs []string) *types.DataSourceConfig {
|
||||
return &types.DataSourceConfig{
|
||||
Type: types.ConnectorTypeYuque,
|
||||
Credentials: map[string]interface{}{"api_token": f.cfg().APIToken, "base_url": f.cfg().BaseURL},
|
||||
ResourceIDs: resourceIDs,
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_Type(t *testing.T) {
|
||||
if NewConnector().Type() != types.ConnectorTypeYuque {
|
||||
t.Errorf("Type() = %q, want %q", NewConnector().Type(), types.ConnectorTypeYuque)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_Validate_Success(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
|
||||
if err := NewConnector().Validate(context.Background(), makeDSConfig(f, nil)); err != nil {
|
||||
t.Fatalf("Validate error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_Validate_Bad401(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(401)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
err := NewConnector().Validate(context.Background(), &types.DataSourceConfig{
|
||||
Credentials: map[string]interface{}{"api_token": "bad", "base_url": srv.URL},
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for 401")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_ListResources_Aggregates(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
// user = alice (id=1)
|
||||
// Override default /api/v2/user handler to return alice (f.handleJSON panics on duplicate path registration).
|
||||
f.mux = http.NewServeMux()
|
||||
f.server.Config.Handler = f.mux
|
||||
f.handleJSON("/api/v2/user", 200, v2UserResponse{Data: v2User{ID: 1, Login: "alice"}})
|
||||
// personal repos
|
||||
f.handleJSON("/api/v2/users/alice/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 10, Slug: "personal", Name: "Personal", Type: "Book", Namespace: "alice/personal"},
|
||||
}})
|
||||
// 2 groups
|
||||
f.handleJSON("/api/v2/users/1/groups", 200, v2GroupListResponse{Data: []v2Group{
|
||||
{ID: 100, Login: "team-a"}, {ID: 101, Login: "team-b"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/groups/team-a/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 20, Slug: "ab", Name: "A Book", Type: "Book", Namespace: "team-a/ab"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/groups/team-b/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 30, Slug: "bb", Name: "B Book", Type: "Book", Namespace: "team-b/bb"},
|
||||
}})
|
||||
|
||||
resources, err := NewConnector().ListResources(context.Background(), makeDSConfig(f, nil))
|
||||
if err != nil {
|
||||
t.Fatalf("ListResources: %v", err)
|
||||
}
|
||||
if len(resources) != 3 {
|
||||
t.Fatalf("len = %d, want 3", len(resources))
|
||||
}
|
||||
// Verify fields on one entry
|
||||
var ab *types.Resource
|
||||
for i := range resources {
|
||||
if resources[i].Name == "A Book" {
|
||||
ab = &resources[i]
|
||||
}
|
||||
}
|
||||
if ab == nil {
|
||||
t.Fatal("expected A Book resource")
|
||||
}
|
||||
if ab.ExternalID != "20" || ab.Description != "team-a/ab" || ab.Type != "book" {
|
||||
t.Errorf("got %+v", ab)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_ListResources_DedupByID(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.mux = http.NewServeMux()
|
||||
f.server.Config.Handler = f.mux
|
||||
f.handleJSON("/api/v2/user", 200, v2UserResponse{Data: v2User{ID: 1, Login: "alice"}})
|
||||
f.handleJSON("/api/v2/users/alice/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 99, Slug: "shared", Type: "Book", Namespace: "alice/shared"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/users/1/groups", 200, v2GroupListResponse{Data: []v2Group{
|
||||
{ID: 200, Login: "team-x"},
|
||||
}})
|
||||
// Same repo (ID 99) appears again via a team the user is in.
|
||||
f.handleJSON("/api/v2/groups/team-x/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 99, Slug: "shared", Type: "Book", Namespace: "alice/shared"},
|
||||
}})
|
||||
|
||||
resources, err := NewConnector().ListResources(context.Background(), makeDSConfig(f, nil))
|
||||
if err != nil {
|
||||
t.Fatalf("ListResources: %v", err)
|
||||
}
|
||||
if len(resources) != 1 {
|
||||
t.Errorf("expected dedup to 1, got %d", len(resources))
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchAll_Markdown(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
// Book 7 → 2 docs; one published Doc, one draft (filtered)
|
||||
f.handleJSON("/api/v2/repos/7/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 101, Type: "Doc", Status: "1", Title: "Hello", Slug: "hello", BookID: 7, ContentUpdatedAt: "2026-04-20T10:00:00Z", WordCount: 42},
|
||||
{ID: 102, Type: "Doc", Status: "0", Title: "Draft", Slug: "draft", BookID: 7, ContentUpdatedAt: "2026-04-20T11:00:00Z"}, // draft, skipped
|
||||
}})
|
||||
f.handleJSON("/api/v2/repos/docs/101", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 101, Title: "Hello", Body: "# Hello\n\nworld", Format: "markdown", Status: "1",
|
||||
ContentUpdatedAt: "2026-04-20T10:00:00Z", Book: v2Repo{Namespace: "alice/demo"}},
|
||||
})
|
||||
|
||||
items, err := NewConnector().FetchAll(context.Background(), makeDSConfig(f, []string{"7"}), []string{"7"})
|
||||
if err != nil {
|
||||
t.Fatalf("FetchAll: %v", err)
|
||||
}
|
||||
if len(items) != 1 {
|
||||
t.Fatalf("len = %d, want 1 (draft filtered)", len(items))
|
||||
}
|
||||
it := items[0]
|
||||
if it.ExternalID != "101" || it.Title != "Hello" {
|
||||
t.Errorf("got %+v", it)
|
||||
}
|
||||
if string(it.Content) != "# Hello\n\nworld" {
|
||||
t.Errorf("Content = %q", string(it.Content))
|
||||
}
|
||||
if it.ContentType != "text/markdown" {
|
||||
t.Errorf("ContentType = %q", it.ContentType)
|
||||
}
|
||||
if it.FileName != "Hello.md" {
|
||||
t.Errorf("FileName = %q", it.FileName)
|
||||
}
|
||||
if it.Metadata["channel"] != types.ChannelYuque {
|
||||
t.Errorf("channel = %q", it.Metadata["channel"])
|
||||
}
|
||||
if it.UpdatedAt.IsZero() {
|
||||
t.Error("UpdatedAt should be parsed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchAll_DocDetailError_EmitsPlaceholder(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/9/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 301, Type: "Doc", Status: "1", Title: "Broken", Slug: "broken", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 302, Type: "Doc", Status: "1", Title: "OK", Slug: "ok", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
f.mux.HandleFunc("/api/v2/repos/docs/301", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusBadRequest) // 4xx is non-retriable, bubbles up as an error
|
||||
_, _ = w.Write([]byte(`{"message":"broken"}`))
|
||||
})
|
||||
f.handleJSON("/api/v2/repos/docs/302", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 302, Title: "OK", Body: "ok", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
})
|
||||
|
||||
items, err := NewConnector().FetchAll(context.Background(), makeDSConfig(f, []string{"9"}), []string{"9"})
|
||||
if err != nil {
|
||||
t.Fatalf("FetchAll must not abort the batch on a single detail error, got %v", err)
|
||||
}
|
||||
if len(items) != 2 {
|
||||
t.Fatalf("want 2 items (placeholder + ok), got %d: %+v", len(items), items)
|
||||
}
|
||||
// Find the placeholder — the one with the "error" metadata key.
|
||||
var placeholder *types.FetchedItem
|
||||
for i := range items {
|
||||
if items[i].Metadata["error"] != "" {
|
||||
placeholder = &items[i]
|
||||
}
|
||||
}
|
||||
if placeholder == nil {
|
||||
t.Fatal("expected a placeholder item with Metadata[error] set")
|
||||
}
|
||||
if placeholder.ExternalID != "301" || placeholder.Title != "Broken" {
|
||||
t.Errorf("placeholder identity wrong: %+v", placeholder)
|
||||
}
|
||||
if placeholder.Metadata["channel"] != types.ChannelYuque {
|
||||
t.Errorf("placeholder channel = %q", placeholder.Metadata["channel"])
|
||||
}
|
||||
if placeholder.Metadata["book_id"] != "9" || placeholder.Metadata["doc_id"] != "301" || placeholder.Metadata["slug"] != "broken" {
|
||||
t.Errorf("placeholder missing traceability metadata: %+v", placeholder.Metadata)
|
||||
}
|
||||
if len(placeholder.Content) != 0 {
|
||||
t.Errorf("placeholder should have empty Content, got %q", string(placeholder.Content))
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchAll_LakeFormatIngestedAsMarkdown(t *testing.T) {
|
||||
// Yuque's v2 doc-detail serves `body` as Markdown for type="Doc" regardless
|
||||
// of authoring format. format="lake" means the doc was edited in the Lake
|
||||
// editor, but `body` is still the Markdown representation (Lake XML lives
|
||||
// in `body_lake`). So both should be ingested identically.
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/12/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 401, Type: "Doc", Status: "1", Title: "Lake Doc", Slug: "lake", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 402, Type: "Doc", Status: "1", Title: "MD Doc", Slug: "md", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/repos/docs/401", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 401, Title: "Lake Doc", Format: "lake", Body: "# lake body as markdown", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
})
|
||||
f.handleJSON("/api/v2/repos/docs/402", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 402, Title: "MD Doc", Format: "markdown", Body: "# MD", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
})
|
||||
|
||||
items, err := NewConnector().FetchAll(context.Background(), makeDSConfig(f, []string{"12"}), []string{"12"})
|
||||
if err != nil {
|
||||
t.Fatalf("FetchAll: %v", err)
|
||||
}
|
||||
if len(items) != 2 {
|
||||
t.Fatalf("want 2 items, got %d", len(items))
|
||||
}
|
||||
wantBody := map[string]string{
|
||||
"401": "# lake body as markdown",
|
||||
"402": "# MD",
|
||||
}
|
||||
for _, it := range items {
|
||||
if string(it.Content) != wantBody[it.ExternalID] {
|
||||
t.Errorf("item %s Content = %q, want %q", it.ExternalID, it.Content, wantBody[it.ExternalID])
|
||||
}
|
||||
if it.ContentType != "text/markdown" {
|
||||
t.Errorf("item %s ContentType = %q, want text/markdown", it.ExternalID, it.ContentType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchAll_SkipsUnsupportedFormats(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/13/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 501, Type: "Doc", Status: "1", Title: "HTML Doc", Slug: "h", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 502, Type: "Doc", Status: "1", Title: "OK Doc", Slug: "ok", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/repos/docs/501", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 501, Title: "HTML Doc", Format: "html", Body: "<p>raw html</p>", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
})
|
||||
f.handleJSON("/api/v2/repos/docs/502", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 502, Title: "OK Doc", Format: "lake", Body: "# ok", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
})
|
||||
|
||||
items, err := NewConnector().FetchAll(context.Background(), makeDSConfig(f, []string{"13"}), []string{"13"})
|
||||
if err != nil {
|
||||
t.Fatalf("FetchAll: %v", err)
|
||||
}
|
||||
if len(items) != 2 {
|
||||
t.Fatalf("want 2 items (placeholder + ok), got %d", len(items))
|
||||
}
|
||||
var htmlPlaceholder, okItem *types.FetchedItem
|
||||
for i := range items {
|
||||
if items[i].ExternalID == "501" {
|
||||
htmlPlaceholder = &items[i]
|
||||
}
|
||||
if items[i].ExternalID == "502" {
|
||||
okItem = &items[i]
|
||||
}
|
||||
}
|
||||
if htmlPlaceholder == nil || okItem == nil {
|
||||
t.Fatal("missing one of the items")
|
||||
}
|
||||
if htmlPlaceholder.Metadata["skip_reason"] == "" {
|
||||
t.Errorf("html doc should have skip_reason metadata, got %+v", htmlPlaceholder.Metadata)
|
||||
}
|
||||
if len(htmlPlaceholder.Content) != 0 {
|
||||
t.Errorf("html placeholder should have empty Content")
|
||||
}
|
||||
if string(okItem.Content) != "# ok" {
|
||||
t.Errorf("lake doc Content = %q, want %q", string(okItem.Content), "# ok")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchAll_SkipsNonDocTypes(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/8/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 201, Type: "Sheet", Status: "1", Title: "S"},
|
||||
{ID: 202, Type: "Board", Status: "1", Title: "B"},
|
||||
{ID: 203, Type: "Thread", Status: "1", Title: "T"},
|
||||
{ID: 204, Type: "Doc", Status: "1", Title: "D", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/repos/docs/204", 200, v2DocDetailResponse{
|
||||
Data: v2DocDetail{ID: 204, Title: "D", Body: "text", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
})
|
||||
|
||||
items, err := NewConnector().FetchAll(context.Background(), makeDSConfig(f, []string{"8"}), []string{"8"})
|
||||
if err != nil {
|
||||
t.Fatalf("FetchAll: %v", err)
|
||||
}
|
||||
if len(items) != 1 || items[0].ExternalID != "204" {
|
||||
t.Errorf("expected only the Doc, got %+v", items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchIncremental_FirstSync(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/10/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 1, Type: "Doc", Status: "1", Title: "A", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 2, Type: "Doc", Status: "1", Title: "B", ContentUpdatedAt: "2026-04-20T11:00:00Z"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/repos/docs/1", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 1, Title: "A", Body: "a", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"}})
|
||||
f.handleJSON("/api/v2/repos/docs/2", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 2, Title: "B", Body: "b", Status: "1", ContentUpdatedAt: "2026-04-20T11:00:00Z"}})
|
||||
|
||||
items, cursor, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f, []string{"10"}), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("FetchIncremental: %v", err)
|
||||
}
|
||||
if len(items) != 2 {
|
||||
t.Errorf("items = %d, want 2 on first sync", len(items))
|
||||
}
|
||||
if cursor == nil {
|
||||
t.Fatal("cursor must not be nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchIncremental_NoChanges(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.handleJSON("/api/v2/repos/10/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 1, Type: "Doc", Status: "1", Title: "A", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/repos/docs/1", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 1, Title: "A", Body: "a", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"}})
|
||||
|
||||
// First sync → establish cursor
|
||||
_, cursor1, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f, []string{"10"}), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("first sync: %v", err)
|
||||
}
|
||||
|
||||
// Second sync with same cursor → 0 items
|
||||
items, _, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f, []string{"10"}), cursor1)
|
||||
if err != nil {
|
||||
t.Fatalf("second sync: %v", err)
|
||||
}
|
||||
if len(items) != 0 {
|
||||
t.Errorf("expected 0 changed items, got %d", len(items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchIncremental_ReturnsOnlyChanged(t *testing.T) {
|
||||
// First sync: 2 docs, both at t0.
|
||||
f1 := newFakeYuque()
|
||||
f1.handleJSON("/api/v2/repos/11/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 1, Type: "Doc", Status: "1", Title: "A", Slug: "a", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 2, Type: "Doc", Status: "1", Title: "B", Slug: "b", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
f1.handleJSON("/api/v2/repos/docs/1", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 1, Title: "A", Body: "a", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"}})
|
||||
f1.handleJSON("/api/v2/repos/docs/2", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 2, Title: "B", Body: "b", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"}})
|
||||
|
||||
_, cursor1, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f1, []string{"11"}), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("first sync: %v", err)
|
||||
}
|
||||
f1.Close()
|
||||
|
||||
// Second sync: doc 1 unchanged, doc 2 has a newer ContentUpdatedAt.
|
||||
f2 := newFakeYuque()
|
||||
defer f2.Close()
|
||||
f2.handleJSON("/api/v2/repos/11/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 1, Type: "Doc", Status: "1", Title: "A", Slug: "a", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 2, Type: "Doc", Status: "1", Title: "B2", Slug: "b", ContentUpdatedAt: "2026-04-20T12:00:00Z"},
|
||||
}})
|
||||
// Only doc 2's detail should be fetched; if the implementation incorrectly
|
||||
// fetches doc 1, the test will still pass metadata-wise — but this handler
|
||||
// makes the expected single call clear.
|
||||
f2.handleJSON("/api/v2/repos/docs/2", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 2, Title: "B2", Body: "b2 updated", Status: "1", ContentUpdatedAt: "2026-04-20T12:00:00Z"}})
|
||||
|
||||
items, _, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f2, []string{"11"}), cursor1)
|
||||
if err != nil {
|
||||
t.Fatalf("second sync: %v", err)
|
||||
}
|
||||
if len(items) != 1 {
|
||||
t.Fatalf("expected exactly 1 changed item, got %d: %+v", len(items), items)
|
||||
}
|
||||
if items[0].ExternalID != "2" {
|
||||
t.Errorf("expected changed item ExternalID=2, got %q", items[0].ExternalID)
|
||||
}
|
||||
if string(items[0].Content) != "b2 updated" {
|
||||
t.Errorf("expected refreshed body, got %q", string(items[0].Content))
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_FetchIncremental_DetectsDeletion(t *testing.T) {
|
||||
// First sync: 2 docs
|
||||
f1 := newFakeYuque()
|
||||
f1.handleJSON("/api/v2/repos/10/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 1, Type: "Doc", Status: "1", Title: "A", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
{ID: 2, Type: "Doc", Status: "1", Title: "B", ContentUpdatedAt: "2026-04-20T11:00:00Z"},
|
||||
}})
|
||||
f1.handleJSON("/api/v2/repos/docs/1", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 1, Title: "A", Body: "a", Status: "1", ContentUpdatedAt: "2026-04-20T10:00:00Z"}})
|
||||
f1.handleJSON("/api/v2/repos/docs/2", 200, v2DocDetailResponse{Data: v2DocDetail{ID: 2, Title: "B", Body: "b", Status: "1", ContentUpdatedAt: "2026-04-20T11:00:00Z"}})
|
||||
|
||||
_, cursor1, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f1, []string{"10"}), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("first sync: %v", err)
|
||||
}
|
||||
f1.Close()
|
||||
|
||||
// Second sync: doc 2 removed
|
||||
f2 := newFakeYuque()
|
||||
defer f2.Close()
|
||||
f2.handleJSON("/api/v2/repos/10/docs", 200, v2DocListResponse{Data: []v2Doc{
|
||||
{ID: 1, Type: "Doc", Status: "1", Title: "A", ContentUpdatedAt: "2026-04-20T10:00:00Z"},
|
||||
}})
|
||||
|
||||
items, _, err := NewConnector().FetchIncremental(context.Background(), makeDSConfig(f2, []string{"10"}), cursor1)
|
||||
if err != nil {
|
||||
t.Fatalf("second sync: %v", err)
|
||||
}
|
||||
deleted := 0
|
||||
for _, it := range items {
|
||||
if it.IsDeleted && it.ExternalID == "2" {
|
||||
deleted++
|
||||
}
|
||||
}
|
||||
if deleted != 1 {
|
||||
t.Errorf("expected 1 deletion for doc 2, got %d; items=%+v", deleted, items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnector_ListResources_ContinuesOnGroupFailure(t *testing.T) {
|
||||
f := newFakeYuque()
|
||||
defer f.Close()
|
||||
f.mux = http.NewServeMux()
|
||||
f.server.Config.Handler = f.mux
|
||||
f.handleJSON("/api/v2/user", 200, v2UserResponse{Data: v2User{ID: 1, Login: "alice"}})
|
||||
f.handleJSON("/api/v2/users/alice/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 10, Slug: "me", Name: "Mine", Type: "Book", Namespace: "alice/me"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/users/1/groups", 200, v2GroupListResponse{Data: []v2Group{
|
||||
{ID: 100, Login: "team-ok"}, {ID: 101, Login: "team-forbidden"},
|
||||
}})
|
||||
f.handleJSON("/api/v2/groups/team-ok/repos", 200, v2RepoListResponse{Data: []v2Repo{
|
||||
{ID: 20, Slug: "ok", Name: "OK Book", Type: "Book", Namespace: "team-ok/ok"},
|
||||
}})
|
||||
// team-forbidden returns 403 — should be skipped, not abort the whole call.
|
||||
f.mux.HandleFunc("/api/v2/groups/team-forbidden/repos", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusForbidden)
|
||||
_, _ = w.Write([]byte(`{"message":"forbidden"}`))
|
||||
})
|
||||
|
||||
resources, err := NewConnector().ListResources(context.Background(), makeDSConfig(f, nil))
|
||||
if err != nil {
|
||||
t.Fatalf("ListResources should not fail on per-group error, got %v", err)
|
||||
}
|
||||
// Expect the 2 successful repos (personal + team-ok). team-forbidden is skipped.
|
||||
if len(resources) != 2 {
|
||||
t.Fatalf("expected 2 resources (personal + team-ok), got %d: %+v", len(resources), resources)
|
||||
}
|
||||
names := map[string]bool{}
|
||||
for _, r := range resources {
|
||||
names[r.Name] = true
|
||||
}
|
||||
if !names["Mine"] || !names["OK Book"] {
|
||||
t.Errorf("expected Mine + OK Book, got %v", names)
|
||||
}
|
||||
}
|
||||
261
internal/datasource/connector/yuque/types.go
Normal file
261
internal/datasource/connector/yuque/types.go
Normal file
@@ -0,0 +1,261 @@
|
||||
// Package yuque implements the Yuque (语雀) data source connector for WeKnora.
|
||||
//
|
||||
// It syncs documents from personal and group knowledge bases (books/repos) into WeKnora
|
||||
// knowledge bases, preserving Markdown formatting.
|
||||
//
|
||||
// Yuque API docs:
|
||||
// - Authentication: X-Auth-Token header, personal token from https://www.yuque.com/settings/tokens
|
||||
// - User: GET /api/v2/user
|
||||
// - Groups: GET /api/v2/users/{id}/groups
|
||||
// - Repos: GET /api/v2/users/{login}/repos, GET /api/v2/groups/{login}/repos
|
||||
// - Docs: GET /api/v2/repos/{book_id}/docs (list), GET /api/v2/repos/docs/{id} (detail)
|
||||
//
|
||||
// Known limitations (v1):
|
||||
// - Only syncs type=Doc (Sheet/Thread/Board/Table skipped)
|
||||
// - Only syncs status="1" (published), drafts skipped
|
||||
// - Private-book images (CDN URLs with auth) may fail to load
|
||||
// - Lake editor may leave non-standard markdown (anchors, sized image attrs)
|
||||
package yuque
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/Tencent/WeKnora/internal/datasource"
|
||||
"github.com/Tencent/WeKnora/internal/types"
|
||||
)
|
||||
|
||||
// DefaultBaseURL is the Yuque public cloud API base URL.
|
||||
const DefaultBaseURL = "https://www.yuque.com"
|
||||
|
||||
// Config holds Yuque-specific configuration.
|
||||
type Config struct {
|
||||
// APIToken is a personal token from Yuque settings → tokens.
|
||||
APIToken string `json:"api_token"`
|
||||
|
||||
// BaseURL is the Yuque deployment base URL (default: https://www.yuque.com).
|
||||
// For enterprise/private deployments, use the company's yuque domain.
|
||||
BaseURL string `json:"base_url,omitempty"`
|
||||
}
|
||||
|
||||
// GetBaseURL returns the normalized base URL:
|
||||
// - empty → DefaultBaseURL
|
||||
// - missing scheme → prepend "https://"
|
||||
// - trailing slash → stripped
|
||||
func (c *Config) GetBaseURL() string {
|
||||
url := strings.TrimSpace(c.BaseURL)
|
||||
if url == "" {
|
||||
return DefaultBaseURL
|
||||
}
|
||||
if !strings.Contains(url, "://") {
|
||||
url = "https://" + url
|
||||
}
|
||||
url = strings.TrimRight(url, "/")
|
||||
return url
|
||||
}
|
||||
|
||||
// parseYuqueConfig extracts and validates Yuque-specific configuration.
|
||||
// Uses JSON marshal/unmarshal roundtrip (consistent with Feishu's parseFeishuConfig)
|
||||
// rather than single-field type assertion, because we have multiple fields with
|
||||
// optional defaults.
|
||||
func parseYuqueConfig(config *types.DataSourceConfig) (*Config, error) {
|
||||
if config == nil {
|
||||
return nil, fmt.Errorf("%w: config is nil", datasource.ErrInvalidConfig)
|
||||
}
|
||||
credBytes, err := json.Marshal(config.Credentials)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal credentials: %w", err)
|
||||
}
|
||||
var cfg Config
|
||||
if err := json.Unmarshal(credBytes, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("parse yuque credentials: %w", err)
|
||||
}
|
||||
if strings.TrimSpace(cfg.APIToken) == "" {
|
||||
return nil, fmt.Errorf("%w: api_token is required", datasource.ErrInvalidCredentials)
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// --- Yuque API response types ---
|
||||
|
||||
// flexibleStatus accepts either a string ("1") or a number (1) for the doc
|
||||
// `status` field. Yuque's OpenAPI spec declares `status` as string, but the
|
||||
// runtime API returns it as an integer, so unmarshaling into a plain string
|
||||
// fails with "cannot unmarshal number into Go struct field ... of type string".
|
||||
// Normalizing to the textual form lets existing comparisons (e.g. != "1") keep
|
||||
// working for both response shapes.
|
||||
type flexibleStatus string
|
||||
|
||||
func (s *flexibleStatus) UnmarshalJSON(b []byte) error {
|
||||
if string(b) == "null" {
|
||||
*s = ""
|
||||
return nil
|
||||
}
|
||||
if len(b) > 0 && b[0] == '"' {
|
||||
var str string
|
||||
if err := json.Unmarshal(b, &str); err != nil {
|
||||
return err
|
||||
}
|
||||
*s = flexibleStatus(str)
|
||||
return nil
|
||||
}
|
||||
// Integer form. We decode to int64 so that floats, booleans, arrays, and
|
||||
// objects fail loudly instead of being silently stringified — if Yuque
|
||||
// changes the shape again, we'd rather surface a clear error than feed
|
||||
// garbage to the Status == "1" comparison.
|
||||
var i int64
|
||||
if err := json.Unmarshal(b, &i); err != nil {
|
||||
return fmt.Errorf("flexibleStatus: expected string or integer, got %s: %w", b, err)
|
||||
}
|
||||
*s = flexibleStatus(strconv.FormatInt(i, 10))
|
||||
return nil
|
||||
}
|
||||
|
||||
// apiErrorBody is the error body shape Yuque sometimes returns on non-2xx.
|
||||
type apiErrorBody struct {
|
||||
Message string `json:"message"`
|
||||
Status int `json:"status"`
|
||||
}
|
||||
|
||||
// v2UserResponse wraps GET /api/v2/user.
|
||||
type v2UserResponse struct {
|
||||
Data v2User `json:"data"`
|
||||
}
|
||||
|
||||
type v2User struct {
|
||||
ID int64 `json:"id"`
|
||||
Login string `json:"login"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// v2GroupListResponse wraps GET /api/v2/users/{id}/groups.
|
||||
type v2GroupListResponse struct {
|
||||
Data []v2Group `json:"data"`
|
||||
}
|
||||
|
||||
type v2Group struct {
|
||||
ID int64 `json:"id"`
|
||||
Login string `json:"login"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// v2RepoListResponse wraps GET /api/v2/users/{login}/repos and /groups/{login}/repos.
|
||||
type v2RepoListResponse struct {
|
||||
Data []v2Repo `json:"data"`
|
||||
}
|
||||
|
||||
type v2Repo struct {
|
||||
ID int64 `json:"id"`
|
||||
Type string `json:"type"` // "Book" | "Design" (listing filter enum; connector requests type=Book)
|
||||
Slug string `json:"slug"`
|
||||
Name string `json:"name"`
|
||||
UserID int64 `json:"user_id"`
|
||||
Namespace string `json:"namespace"` // e.g. "group_login/book_slug"
|
||||
Public int `json:"public"` // 0:private, 1:public, 2:internal
|
||||
Description string `json:"description"`
|
||||
UpdatedAt string `json:"updated_at"` // RFC3339 string
|
||||
}
|
||||
|
||||
// v2DocListResponse wraps GET /api/v2/repos/{book_id}/docs.
|
||||
type v2DocListResponse struct {
|
||||
Meta struct {
|
||||
Total int `json:"total"`
|
||||
} `json:"meta"`
|
||||
Data []v2Doc `json:"data"`
|
||||
}
|
||||
|
||||
// v2Doc is the document summary returned by the list endpoint (no body).
|
||||
type v2Doc struct {
|
||||
ID int64 `json:"id"`
|
||||
Type string `json:"type"` // Doc / Sheet / Thread / Board / Table
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
BookID int64 `json:"book_id"`
|
||||
UserID int64 `json:"user_id"`
|
||||
Status flexibleStatus `json:"status"` // "0" draft, "1" published — API may return int or string
|
||||
ContentUpdatedAt string `json:"content_updated_at"` // RFC3339 string — use for change detection
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
WordCount int `json:"word_count"`
|
||||
}
|
||||
|
||||
// v2DocDetailResponse wraps GET /api/v2/repos/docs/{id}.
|
||||
type v2DocDetailResponse struct {
|
||||
Data v2DocDetail `json:"data"`
|
||||
}
|
||||
|
||||
type v2DocDetail struct {
|
||||
ID int64 `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
BookID int64 `json:"book_id"`
|
||||
Format string `json:"format"` // "markdown" / "lake" / "html"
|
||||
Body string `json:"body"` // Markdown content
|
||||
Status flexibleStatus `json:"status"` // API may return int or string
|
||||
ContentUpdatedAt string `json:"content_updated_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
WordCount int `json:"word_count"`
|
||||
Book v2Repo `json:"book"`
|
||||
}
|
||||
|
||||
// yuqueCursor stores incremental sync state.
|
||||
// Key1: book_id (string), Key2: doc_id (string), Value: content_updated_at (raw RFC3339 string)
|
||||
type yuqueCursor struct {
|
||||
LastSyncTime time.Time `json:"last_sync_time"`
|
||||
BookDocTimes map[string]map[string]string `json:"book_doc_times,omitempty"`
|
||||
}
|
||||
|
||||
// parseContentUpdatedAt parses Yuque ISO 8601 timestamp (returns zero time on parse failure).
|
||||
func parseContentUpdatedAt(ts string) time.Time {
|
||||
if ts == "" {
|
||||
return time.Time{}
|
||||
}
|
||||
t, err := time.Parse(time.RFC3339, ts)
|
||||
if err != nil {
|
||||
return time.Time{}
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// sanitizeFileName removes characters that are invalid in filenames and
|
||||
// truncates to a safe length at a UTF-8 rune boundary. Raw byte truncation
|
||||
// would split a multi-byte codepoint (Chinese characters are 3 bytes in UTF-8)
|
||||
// and produce an invalid UTF-8 string, which downstream filename validation
|
||||
// (utf8.ValidString) rejects with "文件名包含非法字符".
|
||||
func sanitizeFileName(name string) string {
|
||||
if name == "" {
|
||||
return "untitled"
|
||||
}
|
||||
replacer := strings.NewReplacer(
|
||||
"/", "_", "\\", "_", ":", "_", "*", "_",
|
||||
"?", "_", "\"", "_", "<", "_", ">", "_", "|", "_",
|
||||
)
|
||||
result := replacer.Replace(name)
|
||||
const maxBytes = 200
|
||||
if len(result) > maxBytes {
|
||||
result = result[:maxBytes]
|
||||
// Peel any trailing bytes that no longer form a complete rune.
|
||||
// DecodeLastRuneInString returns (RuneError, 1) for a partial UTF-8
|
||||
// sequence — at most 3 bytes need peeling since a rune is ≤ 4 bytes.
|
||||
for len(result) > 0 {
|
||||
r, size := utf8.DecodeLastRuneInString(result)
|
||||
if r != utf8.RuneError || size != 1 {
|
||||
break
|
||||
}
|
||||
result = result[:len(result)-1]
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// redactToken returns a masked form of the token for logging (never log the full token).
|
||||
func redactToken(t string) string {
|
||||
if len(t) < 12 {
|
||||
return "***"
|
||||
}
|
||||
return t[:6] + "..." + t[len(t)-4:]
|
||||
}
|
||||
176
internal/datasource/connector/yuque/types_test.go
Normal file
176
internal/datasource/connector/yuque/types_test.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package yuque
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/Tencent/WeKnora/internal/types"
|
||||
)
|
||||
|
||||
// Yuque's runtime API returns the doc `status` field as an integer (0 or 1),
|
||||
// even though its OpenAPI spec declares it as string. We tolerate both forms.
|
||||
func TestV2Doc_Status_AcceptsNumberAndString(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
body string
|
||||
want flexibleStatus
|
||||
}{
|
||||
{"number 1", `{"id":1,"status":1}`, "1"},
|
||||
{"number 0", `{"id":1,"status":0}`, "0"},
|
||||
{"string 1", `{"id":1,"status":"1"}`, "1"},
|
||||
{"null", `{"id":1,"status":null}`, ""},
|
||||
{"missing", `{"id":1}`, ""},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
var d v2Doc
|
||||
if err := json.Unmarshal([]byte(tc.body), &d); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if d.Status != tc.want {
|
||||
t.Errorf("Status = %q, want %q", d.Status, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestV2DocDetail_Status_AcceptsNumber(t *testing.T) {
|
||||
var d v2DocDetail
|
||||
if err := json.Unmarshal([]byte(`{"id":1,"status":1}`), &d); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if d.Status != "1" {
|
||||
t.Errorf("Status = %q, want %q", d.Status, "1")
|
||||
}
|
||||
}
|
||||
|
||||
func TestV2DocListResponse_Status_Number(t *testing.T) {
|
||||
// Reproduces the real-world failure:
|
||||
// decode response: json: cannot unmarshal number into Go struct field v2Doc.data.status of type string
|
||||
body := `{"meta":{"total":1},"data":[{"id":101,"type":"Doc","status":1,"title":"hello"}]}`
|
||||
var resp v2DocListResponse
|
||||
if err := json.Unmarshal([]byte(body), &resp); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if len(resp.Data) != 1 {
|
||||
t.Fatalf("len(Data) = %d", len(resp.Data))
|
||||
}
|
||||
if resp.Data[0].Status != "1" {
|
||||
t.Errorf("Status = %q, want %q", resp.Data[0].Status, "1")
|
||||
}
|
||||
}
|
||||
|
||||
// flexibleStatus deliberately rejects unexpected JSON shapes so a future Yuque
|
||||
// schema drift (e.g. status becoming a float, bool, array) fails loudly rather
|
||||
// than being silently stringified and bypassing the `!= "1"` filter.
|
||||
func TestV2Doc_Status_RejectsUnexpectedShapes(t *testing.T) {
|
||||
cases := []string{
|
||||
`{"id":1,"status":true}`,
|
||||
`{"id":1,"status":1.5}`,
|
||||
`{"id":1,"status":[1]}`,
|
||||
`{"id":1,"status":{"x":1}}`,
|
||||
}
|
||||
for _, body := range cases {
|
||||
t.Run(body, func(t *testing.T) {
|
||||
var d v2Doc
|
||||
if err := json.Unmarshal([]byte(body), &d); err == nil {
|
||||
t.Errorf("expected error for %s, got Status=%q", body, d.Status)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseYuqueConfig(t *testing.T) {
|
||||
t.Run("valid full", func(t *testing.T) {
|
||||
cfg, err := parseYuqueConfig(&types.DataSourceConfig{
|
||||
Credentials: map[string]interface{}{
|
||||
"api_token": "tok-123",
|
||||
"base_url": "https://company.yuque.com",
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if cfg.APIToken != "tok-123" {
|
||||
t.Errorf("APIToken = %q, want tok-123", cfg.APIToken)
|
||||
}
|
||||
if cfg.BaseURL != "https://company.yuque.com" {
|
||||
t.Errorf("BaseURL = %q", cfg.BaseURL)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("default base_url", func(t *testing.T) {
|
||||
cfg, err := parseYuqueConfig(&types.DataSourceConfig{
|
||||
Credentials: map[string]interface{}{"api_token": "tok-abc"},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if cfg.GetBaseURL() != "https://www.yuque.com" {
|
||||
t.Errorf("GetBaseURL default = %q, want https://www.yuque.com", cfg.GetBaseURL())
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("missing api_token", func(t *testing.T) {
|
||||
_, err := parseYuqueConfig(&types.DataSourceConfig{
|
||||
Credentials: map[string]interface{}{},
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing api_token")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("nil config", func(t *testing.T) {
|
||||
_, err := parseYuqueConfig(nil)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for nil config")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetBaseURL_TrimsTrailingSlash(t *testing.T) {
|
||||
c := &Config{BaseURL: "https://x.yuque.com/"}
|
||||
if got := c.GetBaseURL(); got != "https://x.yuque.com" {
|
||||
t.Errorf("GetBaseURL = %q, want https://x.yuque.com", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBaseURL_AddsSchemeIfMissing(t *testing.T) {
|
||||
c := &Config{BaseURL: "company.yuque.com"}
|
||||
if got := c.GetBaseURL(); got != "https://company.yuque.com" {
|
||||
t.Errorf("GetBaseURL = %q, want https://company.yuque.com", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBaseURL_EmptyUsesDefault(t *testing.T) {
|
||||
c := &Config{BaseURL: ""}
|
||||
if got := c.GetBaseURL(); got != "https://www.yuque.com" {
|
||||
t.Errorf("GetBaseURL = %q, want default", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBaseURL_TrimsWhitespace(t *testing.T) {
|
||||
c := &Config{BaseURL: " https://x.yuque.com/ "}
|
||||
if got := c.GetBaseURL(); got != "https://x.yuque.com" {
|
||||
t.Errorf("GetBaseURL = %q, want https://x.yuque.com", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeFileName_TruncatesAtRuneBoundary(t *testing.T) {
|
||||
// Long Chinese title (each 测 is 3 bytes in UTF-8). Raw byte slicing at 200
|
||||
// would split a rune and produce invalid UTF-8 that downstream filename
|
||||
// validation rejects with "文件名包含非法字符".
|
||||
long := strings.Repeat("测试", 100) // 600 bytes
|
||||
got := sanitizeFileName(long)
|
||||
if !utf8.ValidString(got) {
|
||||
t.Fatalf("sanitizeFileName produced invalid UTF-8: %q", got)
|
||||
}
|
||||
if len(got) > 200 {
|
||||
t.Errorf("len = %d, want ≤ 200", len(got))
|
||||
}
|
||||
if len(got) == 0 {
|
||||
t.Error("result is empty")
|
||||
}
|
||||
}
|
||||
@@ -191,6 +191,16 @@ func GetLogger(c context.Context) *logrus.Entry {
|
||||
return logrus.NewEntry(appLogger)
|
||||
}
|
||||
|
||||
// SetOutput overrides the internal logger's output destination.
|
||||
// Intended for use in tests that need to capture and assert on log content
|
||||
// (e.g. verifying secrets are not written out). Restore the original writer
|
||||
// (usually os.Stdout) in a defer after the test.
|
||||
func SetOutput(w io.Writer) {
|
||||
loggerMu.Lock()
|
||||
defer loggerMu.Unlock()
|
||||
appLogger.SetOutput(w)
|
||||
}
|
||||
|
||||
// SetLogLevel 设置日志级别
|
||||
func SetLogLevel(level LogLevel) {
|
||||
var logLevel logrus.Level
|
||||
|
||||
@@ -29,6 +29,7 @@ const (
|
||||
ChannelSlack = "slack" // Slack
|
||||
ChannelIM = "im" // Generic IM channel
|
||||
ChannelNotion = "notion" // Notion
|
||||
ChannelYuque = "yuque" // Yuque (语雀)
|
||||
)
|
||||
|
||||
// Knowledge parse status constants
|
||||
|
||||
Reference in New Issue
Block a user