Files
WeKnora/cli/cmd/doc/upload.go
nullkey e623e8208f refactor(cli): delete envelope infrastructure, errors to stderr
Removes the entire envelope machinery now that every success path
emits bare JSON:

- cli/internal/format/envelope.go (Envelope, Success, Failure,
  SuccessWithRisk, WriteEnvelope, Meta, Notice, UpdateNotice,
  VersionSkewNotice, Risk, RiskLevel, ErrorBody) + tests.
- cli/internal/format/filter.go envelope-specific helpers
  (WriteEnvelopeFiltered, marshalEnvelope, applyFieldFilter,
  filterDataPayload, filterObjectData); the reusable
  filterArrayItems / filterObjectKeys / writeJQ stay for bare.go.
- cli/internal/cmdutil/exporter.go + tests (envelope-only).
- cli/internal/cmdutil/PrintErrorEnvelope + ToErrorBody +
  operationRiskOf + Error.OperationRisk field + OperationRisk struct.

Error path: all errors now go to stderr via cmdutil.PrintError in
`code: message\nhint: ...` form, regardless of --json. Stdout stays
empty (or holds the partial-success the command already wrote) so
downstream `--json | jq` pipelines never have to filter error shapes
out of the success stream. Typed exit codes (3 auth.* / 4
resource.not_found / 5 input.* / 6 server.rate_limited / 7 server.*
+ network.* / 10 input.confirmation_required) carry the failure
class for agents that branch on it.

Acceptance contract:
- envelope_test.go → wire_test.go (TestEnvelopeGolden → TestWireGolden).
- testdata/envelopes/ → testdata/wire/.
- Error-path cases assert the typed code substring on stderr.
- Orphan whoami.*.json goldens deleted.

AGENTS.md + README.md rewritten for the bare-data contract:
- Drop envelope schema section + dry-run rule.
- Document bare JSON on stdout + `code: msg\nhint: …` on stderr.
- ADR-3 reframed around bare data and why error separation matters
  for `--json | jq` pipelines.

WriteJSONFiltered short-circuits to WriteJSON when both filters are
empty (skip the marshal-buffer round-trip for the common case).

Final review pass:
- Fix wire-contract bug: `--json id,name` (space form) is broken by
  pflag's NoOptDefVal; AGENTS.md / README.md / SetAgentHelp + the
  field-discovery help text all switched to `--json=id,name`.
- Fix `weknora api --jq` silently ignored: api.go now routes through
  WriteJSONFiltered with jopts.JQ.
- AGENTS.md: drop the false claim that `auth logout` honors `-y`
  (logout is local-only with no ConfirmDestructive guard); list the
  actual destructive commands instead.
- Rewrite cli/acceptance/e2e/e2e_test.go for the bare-data wire shape
  (was still parsing `out["data"]` / `env["ok"]`).
- Add `JSONOptions.Emit(w, v)` helper; collapse ~33 repeated
  `format.WriteJSONFiltered(iostreams.IO.Out, X, jopts.Fields,
  jopts.JQ)` sites to `jopts.Emit(iostreams.IO.Out, X)` — drops the
  format import from 22 cmd/* files.
- Delete single-caller `cmdutil.MustRequireFlag`; inline as
  `_ = cmd.MarkFlagRequired(...)` everywhere.
- Add `_ = cmd.MarkFlagRequired("name")` to `kb create`; it was the
  only write command relying on runtime --name validation while
  `context add` already used the cobra-level mark.
- `context use`: register `--json` / `--jq` (was always emitting JSON
  unconditionally with no human path and no flag — diverged from
  every other write command); human mode now prints
  `✓ Switched context to X (was Y)`.
- Replace per-package `confirmPrompter` / `scriptedConfirm` /
  `errPrompter` test doubles with `testutil.ConfirmPrompter`.
- Rename `chatService` → `ChatService` (export to match siblings
  `ListService` / `ViewService`); rename `printUploadSuccess` →
  `renderUploadSuccess` (siblings use `render*`).
- `defaultHint(CodeResourceNotFound)`: drop the hardcoded
  "list available with `weknora kb list`" — misleading on agent /
  doc / session 404. Replaced with "verify the resource ID and try
  again".
- Strip stale `v0.2/v0.3` / "envelope" / "v0.0/v0.1 supports only"
  historical tags from production comments and a few test
  descriptions.
2026-05-15 12:03:56 +08:00

218 lines
8.5 KiB
Go

package doc
import (
"context"
"errors"
"fmt"
"os"
"github.com/spf13/cobra"
"github.com/Tencent/WeKnora/cli/internal/aiclient"
"github.com/Tencent/WeKnora/cli/internal/cmdutil"
"github.com/Tencent/WeKnora/cli/internal/iostreams"
sdk "github.com/Tencent/WeKnora/client"
)
// uploadChannel is the ingestion-channel tag the server records for CLI uploads.
// Distinct from "web" (browser UI), "browser_extension" (one-click capture),
// and "wechat" (mini-program). The server uses this only for analytics.
const uploadChannel = "api"
// docUploadFields enumerates the fields surfaced for `--json` discovery on
// `doc upload`. The single-file upload result is the full Knowledge struct;
// these are its top-level json tags.
var docUploadFields = []string{
"id", "knowledge_base_id", "tag_id", "type", "title", "description",
"source", "channel", "parse_status", "summary_status", "enable_status",
"embedding_model_id", "file_name", "file_type", "file_size", "file_hash",
"file_path", "storage_size",
"created_at", "updated_at", "processed_at", "error_message",
}
type UploadOptions struct {
Name string
Recursive bool // --recursive: positional arg is a directory; walk + upload each match
Glob string // --glob: filename pattern under --recursive (default "*")
FromURL string // --from-url: ingest a remote URL via SDK CreateKnowledgeFromURL
}
// UploadService is the narrow SDK surface this command depends on.
// *sdk.Client satisfies it.
type UploadService interface {
CreateKnowledgeFromFile(
ctx context.Context,
kbID, filePath string,
metadata map[string]string,
enableMultimodel *bool,
customFileName, channel string,
) (*sdk.Knowledge, error)
CreateKnowledgeFromURL(
ctx context.Context,
kbID string,
req sdk.CreateKnowledgeFromURLRequest,
) (*sdk.Knowledge, error)
}
// NewCmdUpload builds `weknora doc upload <file>`.
func NewCmdUpload(f *cmdutil.Factory) *cobra.Command {
opts := &UploadOptions{}
cmd := &cobra.Command{
Use: "upload <file>",
Short: "Upload a local file to the knowledge base",
Long: `Uploads a file (PDF / DOCX / Markdown / TXT / etc.) to the resolved
knowledge base. KB resolution follows the standard 4-level chain:
--kb flag > WEKNORA_KB_ID env > .weknora/project.yaml > error. The --kb
flag accepts either a KB UUID (passed through) or a name (resolved via list).
Pass --name to override the recorded file name (useful when the local file
has a generic name like "report.pdf" but you want to surface it as e.g.
"Q3 Marketing Report.pdf" in the UI).
Use --recursive --glob to upload a directory tree (see Examples).`,
Example: ` weknora doc upload report.pdf
weknora doc upload notes.md --kb a32a63ff-fb36-4874-bcaa-30f48570a694
weknora doc upload notes.md --kb my-kb
weknora doc upload q3.pdf --name "Q3 Marketing Report.pdf"
weknora doc upload ./docs --recursive --glob '*.pdf'
weknora doc upload --from-url https://example.com/whitepaper.pdf
weknora doc upload --from-url https://example.com/article.html --name "Q3 Article"`,
Args: cobra.MaximumNArgs(1),
RunE: func(c *cobra.Command, args []string) error {
jopts, err := cmdutil.CheckJSONFlags(c)
if err != nil {
return err
}
if err := validateUploadFlags(opts, args); err != nil {
return err
}
kbID, err := f.ResolveKB(c)
if err != nil {
return err
}
cli, err := f.Client()
if err != nil {
return err
}
switch {
case opts.FromURL != "":
return runUploadFromURL(c.Context(), opts, jopts, cli, kbID)
case opts.Recursive:
return runUploadRecursive(c.Context(), opts, jopts, cli, kbID, args[0])
default:
if err := validateUploadPath(args[0]); err != nil {
return err
}
return runUpload(c.Context(), opts, jopts, cli, kbID, args[0])
}
},
}
cmd.Flags().String("kb", "", "Knowledge base UUID or name (overrides env / project link)")
cmd.Flags().StringVar(&opts.Name, "name", "", "Custom file name to record (defaults to base name)")
cmd.Flags().BoolVar(&opts.Recursive, "recursive", false, "Treat the positional argument as a directory to walk")
cmd.Flags().StringVar(&opts.Glob, "glob", "*", "Filename pattern to filter when --recursive (e.g. '*.pdf')")
cmd.Flags().StringVar(&opts.FromURL, "from-url", "", "Ingest a remote `URL` (HTTP/HTTPS) instead of a local file")
cmdutil.AddJSONFlags(cmd, docUploadFields)
aiclient.SetAgentHelp(cmd, "Three modes (mutually exclusive): local file (positional), --recursive directory walk + --glob, or --from-url remote ingest. Returns data: Knowledge object (file/URL) or aggregate report (recursive). Errors: resource.already_exists (URL previously ingested) / local.upload_file_not_found.")
return cmd
}
// validateUploadFlags enforces mutual exclusion between the three input
// modes (positional file path / --recursive directory walk / --from-url
// remote ingest) and validates the URL when --from-url is set.
func validateUploadFlags(opts *UploadOptions, args []string) error {
hasPath := len(args) == 1
hasURL := opts.FromURL != ""
if hasURL {
if hasPath {
return cmdutil.NewError(cmdutil.CodeInputInvalidArgument,
"cannot pass a file path with --from-url; choose one input mode")
}
if opts.Recursive {
return cmdutil.NewError(cmdutil.CodeInputInvalidArgument,
"--recursive cannot be combined with --from-url")
}
return cmdutil.ValidateHTTPURL("--from-url", opts.FromURL)
}
if !hasPath {
return cmdutil.NewError(cmdutil.CodeInputInvalidArgument,
"a file path is required (or pass --from-url)")
}
return nil
}
// runUploadFromURL ingests a remote URL via SDK CreateKnowledgeFromURL.
// `--name` becomes the FileName hint so the server's "known file extension"
// detection upgrades crawl-mode to file-download-mode when appropriate.
func runUploadFromURL(ctx context.Context, opts *UploadOptions, jopts *cmdutil.JSONOptions, svc UploadService, kbID string) error {
req := sdk.CreateKnowledgeFromURLRequest{
URL: opts.FromURL,
FileName: opts.Name,
Channel: uploadChannel,
}
k, err := svc.CreateKnowledgeFromURL(ctx, kbID, req)
if err != nil {
if errors.Is(err, sdk.ErrDuplicateURL) {
// Server returns 409 with the existing knowledge entry's data.
// Surface as resource.already_exists; the data payload (if any)
// is observable via err's wrap chain — but the typed code is
// what agents branch on.
return cmdutil.Wrapf(cmdutil.CodeResourceAlreadyExists, err,
"URL already ingested into this knowledge base")
}
return cmdutil.WrapHTTP(err, "ingest URL %s", opts.FromURL)
}
return renderUploadSuccess(k, jopts, "Ingested", opts.Name, opts.FromURL)
}
// renderUploadSuccess emits the post-upload result. JSON path is the bare
// Knowledge object; human path prints a checkmark line. Shared by single-
// file upload and URL ingest; humanVerb varies (uploaded/ingested) and
// fallbackDisplay covers the case when the server-recorded file_name is
// blank (URL ingest pre-redirect).
func renderUploadSuccess(k *sdk.Knowledge, jopts *cmdutil.JSONOptions, humanVerb, customName, fallbackDisplay string) error {
if jopts.Enabled() {
return jopts.Emit(iostreams.IO.Out, k)
}
displayed := customName
if displayed == "" {
displayed = k.FileName
}
if displayed == "" {
displayed = fallbackDisplay
}
fmt.Fprintf(iostreams.IO.Out, "✓ %s %q (id: %s)\n", humanVerb, displayed, k.ID)
return nil
}
// validateUploadPath checks that path exists and refers to a regular file.
// Symlinks and directories are rejected up-front so users get a typed error
// instead of an opaque SDK failure mid-upload. os.Stat (not Lstat) is used
// here so a symlink to a regular file is accepted — that matches what
// `cp` / `git add` do, and the SDK opens the file via os.Open which follows
// symlinks anyway.
func validateUploadPath(path string) error {
info, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return cmdutil.Wrapf(cmdutil.CodeUploadFileNotFound, err, "file not found: %s", path)
}
return cmdutil.Wrapf(cmdutil.CodeLocalFileIO, err, "stat %s", path)
}
if !info.Mode().IsRegular() {
return cmdutil.NewError(cmdutil.CodeInputInvalidArgument,
fmt.Sprintf("not a regular file: %s (directories and devices are not supported)", path))
}
return nil
}
func runUpload(ctx context.Context, opts *UploadOptions, jopts *cmdutil.JSONOptions, svc UploadService, kbID, path string) error {
k, err := svc.CreateKnowledgeFromFile(ctx, kbID, path, nil /*metadata*/, nil /*enableMultimodel*/, opts.Name, uploadChannel)
if err != nil {
return cmdutil.WrapHTTP(err, "upload %s", path)
}
return renderUploadSuccess(k, jopts, "Uploaded", opts.Name, path)
}