diff --git a/.env.example b/.env.example index 63427ea3..df19d5d1 100644 --- a/.env.example +++ b/.env.example @@ -101,12 +101,7 @@ JWT_SECRET=weknora-jwt-secret # Embedding并发数,出现429错误时,可调小此参数 CONCURRENCY_POOL_SIZE=5 -# Docreader 并发任务数(图片OCR/Caption等异步任务),默认 1 -# 默认使用的 paddleocr 在高并发场景下,会出现异常,请谨慎设置 -# IMAGE_MAX_CONCURRENT=1 - -# Docreader OCR后端(no_ocr, paddle, nanonets) -# OCR_BACKEND=paddle +# (Removed: IMAGE_MAX_CONCURRENT, OCR_BACKEND — moved to Go App module after lightweight refactoring) # 如果使用ElasticSearch作为向量存储,需要配置以下参数 # ElasticSearch地址,例如 http://localhost:9200 @@ -242,3 +237,9 @@ APK_MIRROR_ARG=mirrors.tencent.com # Milvus 数据库名称(可选) # MILVUS_DB_NAME=your_milvus_db_name + +# Docreader 地址 +DOCREADER_ADDR=docreader:50051 + +# Docreader 连接方式 +DOCREADER_TRANSPORT=grpc diff --git a/.env.lite.example b/.env.lite.example new file mode 100644 index 00000000..cfdcb56b --- /dev/null +++ b/.env.lite.example @@ -0,0 +1,45 @@ +# WeKnora Lite 配置模板 +# 复制此文件为 .env.lite 并按需修改 +# cp .env.lite.example .env.lite + +GIN_MODE=release + +# === 数据库 === +DB_DRIVER=sqlite +DB_PATH=./data/weknora.db + +# === 检索引擎(FTS5 + sqlite-vec)=== +RETRIEVE_DRIVER=sqlite + +# === 文件存储 === +STORAGE_TYPE=local +LOCAL_STORAGE_BASE_DIR=./data/files + +# === 流管理(内存,无 Redis)=== +STREAM_MANAGER_TYPE=memory + +# === LLM 服务 === +# Ollama 本地服务(默认地址,按需修改) +OLLAMA_BASE_URL=http://127.0.0.1:11434 +# 如使用其他 OpenAI 兼容服务,取消注释: +# OPENAI_API_KEY=sk-xxx +# OPENAI_BASE_URL=https://api.openai.com/v1 + +# === 安全配置(生产环境请务必修改!)=== +TENANT_AES_KEY=CHANGE-ME-32-char-secret-key!!!! +JWT_SECRET=CHANGE-ME-jwt-secret + +# === 功能开关 === +NEO4J_ENABLE=false +WEKNORA_SANDBOX_MODE=disabled +ENABLE_GRAPH_RAG=false +DISABLE_REGISTRATION=false + +# === 性能 === +CONCURRENCY_POOL_SIZE=3 + +# Docreader 地址 +DOCREADER_ADDR=127.0.0.1:50051 + +# Docreader 传输方式 +DOCREADER_TRANSPORT=grpc \ No newline at end of file diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index ccd260ae..4e43ecb9 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -137,13 +137,13 @@ jobs: platform: linux/arm64 runs: ubuntu-24.04-arm runs-on: ${{ matrix.runs }} + timeout-minutes: 45 steps: - name: Checkout code uses: actions/checkout@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - id: setup-buildx - name: Login to Docker Hub uses: docker/login-action@v3 @@ -172,23 +172,6 @@ jobs: # 显示版本信息 ./scripts/get_version.sh info - - name: Build Cache for Docker - uses: actions/cache@v4 - id: cache - with: - path: go-pkg-mod - key: ${{ env.PLATFORM_PAIR }}-go-build-cache-${{ hashFiles('**/go.sum') }} - - - name: Inject go-build-cache - uses: reproducible-containers/buildkit-cache-dance@v3 - with: - builder: ${{ steps.setup-buildx.outputs.name }} - cache-map: | - { - "go-pkg-mod": "/go/pkg/mod" - } - skip-extraction: ${{ steps.cache.outputs.cache-hit }} - - name: Build app Image id: build uses: docker/build-push-action@v3 diff --git a/.github/workflows/release-lite.yml b/.github/workflows/release-lite.yml new file mode 100644 index 00000000..eb6b0bf9 --- /dev/null +++ b/.github/workflows/release-lite.yml @@ -0,0 +1,246 @@ +name: Release Lite Binaries + +on: + push: + tags: + - "v*" + workflow_dispatch: + inputs: + tag: + description: "Release tag (e.g. v0.2.0)" + required: true + +concurrency: + group: release-lite-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +env: + GO_VERSION: "1.24" + NODE_VERSION: "22" + +jobs: + # ── 1. Build frontend once, share as artifact ── + build-frontend: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: npm + cache-dependency-path: frontend/package-lock.json + + - name: Build frontend + working-directory: frontend + run: | + npm ci + npm run build + + - uses: actions/upload-artifact@v4 + with: + name: frontend-dist + path: frontend/dist/ + retention-days: 1 + + # ── 2. Build Go binary per platform (native CGO) ── + build-binary: + needs: build-frontend + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + goos: linux + goarch: amd64 + - os: ubuntu-24.04-arm + goos: linux + goarch: arm64 + - os: macos-13 + goos: darwin + goarch: amd64 + - os: macos-14 + goos: darwin + goarch: arm64 + runs-on: ${{ matrix.os }} + env: + CGO_ENABLED: 1 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache: true + + - uses: actions/download-artifact@v4 + with: + name: frontend-dist + path: web/ + + - name: Resolve version + id: ver + run: | + if [ -n "${{ inputs.tag }}" ]; then + VERSION="${{ inputs.tag }}" + else + VERSION="${GITHUB_REF#refs/tags/}" + fi + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "Building ${VERSION} for ${{ matrix.goos }}/${{ matrix.goarch }}" + + - name: Build + run: | + export EDITION=lite + eval "$(./scripts/get_version.sh env)" + LDFLAGS="-w -s $(./scripts/get_version.sh ldflags)" + export CGO_CFLAGS="-Wno-deprecated-declarations" + if [ "${{ matrix.goos }}" = "darwin" ]; then + export CGO_LDFLAGS="-Wl,-no_warn_duplicate_libraries" + fi + go build -tags "sqlite_fts5" -ldflags="${LDFLAGS}" \ + -o WeKnora-lite ./cmd/server + + - name: Package tarball + run: | + ARCHIVE="WeKnora-lite_${{ steps.ver.outputs.version }}_${{ matrix.goos }}_${{ matrix.goarch }}" + mkdir -p "${ARCHIVE}/web" + cp WeKnora-lite "${ARCHIVE}/" + cp -r web/* "${ARCHIVE}/web/" + cp .env.lite.example "${ARCHIVE}/" + cp docs/LITE.md "${ARCHIVE}/README.md" + if [ -d config ]; then + cp -r config "${ARCHIVE}/config" + fi + if [ -d migrations/sqlite ]; then + mkdir -p "${ARCHIVE}/migrations/sqlite" + cp -r migrations/sqlite/* "${ARCHIVE}/migrations/sqlite/" + fi + if [ -f deploy/weknora-lite.service ]; then + cp deploy/weknora-lite.service "${ARCHIVE}/" + fi + tar czf "${ARCHIVE}.tar.gz" "${ARCHIVE}" + shasum -a 256 "${ARCHIVE}.tar.gz" > "${ARCHIVE}.tar.gz.sha256" + + - uses: actions/upload-artifact@v4 + with: + name: release-${{ matrix.goos }}-${{ matrix.goarch }} + path: | + WeKnora-lite_*.tar.gz + WeKnora-lite_*.tar.gz.sha256 + retention-days: 3 + + # ── 3. Create GitHub Release ── + release: + needs: build-binary + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/download-artifact@v4 + with: + pattern: release-* + merge-multiple: true + + - name: Resolve version + id: ver + run: | + if [ -n "${{ inputs.tag }}" ]; then + VERSION="${{ inputs.tag }}" + else + VERSION="${GITHUB_REF#refs/tags/}" + fi + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + + - name: Generate release notes + id: notes + run: | + cat > notes.md << 'NOTES' + ## WeKnora Lite ${{ steps.ver.outputs.version }} + + 单二进制部署,零外部依赖(无需 Docker / PostgreSQL / Redis)。 + + ### 快速开始 + + ```bash + # 1. 解压 + tar xzf WeKnora-lite_${{ steps.ver.outputs.version }}__.tar.gz + cd WeKnora-lite_${{ steps.ver.outputs.version }}__ + + # 2. 配置 + cp .env.lite.example .env.lite + # 编辑 .env.lite,至少确认 OLLAMA_BASE_URL 正确 + + # 3. 启动 Ollama(如尚未运行) + ollama serve & + ollama pull qwen2.5:7b + ollama pull nomic-embed-text + + # 4. 运行 + set -a && source .env.lite && set +a + ./WeKnora-lite + # 访问 http://localhost:8080 + ``` + + ### 平台支持 + + | 文件 | 平台 | + |------|------| + | `WeKnora-lite_*_linux_amd64.tar.gz` | Linux x86_64 | + | `WeKnora-lite_*_linux_arm64.tar.gz` | Linux ARM64 | + | `WeKnora-lite_*_darwin_amd64.tar.gz` | macOS Intel | + | `WeKnora-lite_*_darwin_arm64.tar.gz` | macOS Apple Silicon | + + 详细文档见 [LITE.md](docs/LITE.md)。 + NOTES + + - name: Create release + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release create "${{ steps.ver.outputs.version }}" \ + --title "WeKnora Lite ${{ steps.ver.outputs.version }}" \ + --notes-file notes.md \ + WeKnora-lite_*.tar.gz \ + WeKnora-lite_*.tar.gz.sha256 + + # ── 4. Update Homebrew Formula ── + update-homebrew: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: main + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Resolve version + id: ver + run: | + if [ -n "${{ inputs.tag }}" ]; then + VERSION="${{ inputs.tag }}" + else + VERSION="${GITHUB_REF#refs/tags/}" + fi + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + + - name: Wait for release assets to be available + run: sleep 15 + + - name: Update Formula + run: ./scripts/update-homebrew-formula.sh "${{ steps.ver.outputs.version }}" + + - name: Commit and push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add Formula/weknora-lite.rb + if git diff --cached --quiet; then + echo "No changes to Formula" + else + git commit -m "formula: update weknora-lite to ${{ steps.ver.outputs.version }}" + git push origin main + fi diff --git a/.gitignore b/.gitignore index 7a0bb5b0..22473111 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # 忽略.env文件和其他包含敏感信息的配置文件 .env +.env.lite # 但不忽略示例文件 !.env.example *.pem @@ -27,9 +28,15 @@ logs/ *.pid WeKnora +WeKnora-lite /models/ test/data/mswag.txt data/files/ +data/weknora.db +data/weknora.db-wal +data/weknora.db-shm + +web/ .venv/ **/__pycache__ diff --git a/Formula/weknora-lite.rb b/Formula/weknora-lite.rb new file mode 100644 index 00000000..c6ca55c0 --- /dev/null +++ b/Formula/weknora-lite.rb @@ -0,0 +1,118 @@ +class WeknoraLite < Formula + desc "Knowledge base management system — single-binary Lite edition" + homepage "https://github.com/Tencent/WeKnora" + version "0.0.0" + license "Apache-2.0" + + on_macos do + on_arm do + url "https://github.com/Tencent/WeKnora/releases/download/v#{version}/WeKnora-lite_v#{version}_darwin_arm64.tar.gz" + sha256 "PLACEHOLDER" + end + on_intel do + url "https://github.com/Tencent/WeKnora/releases/download/v#{version}/WeKnora-lite_v#{version}_darwin_amd64.tar.gz" + sha256 "PLACEHOLDER" + end + end + + on_linux do + on_arm do + url "https://github.com/Tencent/WeKnora/releases/download/v#{version}/WeKnora-lite_v#{version}_linux_arm64.tar.gz" + sha256 "PLACEHOLDER" + end + on_intel do + url "https://github.com/Tencent/WeKnora/releases/download/v#{version}/WeKnora-lite_v#{version}_linux_amd64.tar.gz" + sha256 "PLACEHOLDER" + end + end + + def install + libexec.install "WeKnora-lite" + pkgshare.install "web" if File.directory?("web") + pkgshare.install "config" if File.directory?("config") + pkgshare.install ".env.lite.example" + doc.install "README.md" + pkgshare.install "migrations" if File.directory?("migrations") + + (bin/"weknora-lite").write <<~SH + #!/bin/bash + CONFIG_DIR="${WEKNORA_CONFIG_DIR:-${XDG_CONFIG_HOME:-$HOME/.config}/weknora}" + DATA_DIR="${WEKNORA_DATA_DIR:-${XDG_DATA_HOME:-$HOME/.local/share}/weknora}" + + mkdir -p "$DATA_DIR/files" "$CONFIG_DIR/config" 2>/dev/null + + if [ ! -f "$CONFIG_DIR/config/config.yaml" ]; then + cp -r "#{pkgshare}/config/" "$CONFIG_DIR/config/" + fi + + if [ ! -d "$CONFIG_DIR/migrations" ] && [ -d "#{pkgshare}/migrations" ]; then + ln -sf "#{pkgshare}/migrations" "$CONFIG_DIR/migrations" + fi + + if [ ! -f "$CONFIG_DIR/.env.lite" ]; then + cp "#{pkgshare}/.env.lite.example" "$CONFIG_DIR/.env.lite" + sed -i '' "s|DB_PATH=.*|DB_PATH=$DATA_DIR/weknora.db|" "$CONFIG_DIR/.env.lite" + sed -i '' "s|LOCAL_STORAGE_BASE_DIR=.*|LOCAL_STORAGE_BASE_DIR=$DATA_DIR/files|" "$CONFIG_DIR/.env.lite" + rm -f "$CONFIG_DIR/.env.lite-e" + echo "" + echo "已创建配置文件: $CONFIG_DIR/.env.lite" + echo "请根据需要编辑(如修改 LLM 地址、安全密钥等)。" + echo "" + fi + + set -a + source "$CONFIG_DIR/.env.lite" + set +a + + export DB_PATH="${DB_PATH:-$DATA_DIR/weknora.db}" + export LOCAL_STORAGE_BASE_DIR="${LOCAL_STORAGE_BASE_DIR:-$DATA_DIR/files}" + export WEKNORA_WEB_DIR="${WEKNORA_WEB_DIR:-#{pkgshare}/web}" + + cd "$CONFIG_DIR" + exec "#{libexec}/WeKnora-lite" "$@" + SH + end + + def post_install + (var/"weknora").mkpath + (var/"log").mkpath + end + + service do + run [bin/"weknora-lite"] + keep_alive true + working_dir var/"weknora" + log_path var/"log/weknora-lite.log" + error_log_path var/"log/weknora-lite.log" + end + + def caveats + <<~EOS + 前台运行: + weknora-lite + + 后台服务(推荐): + brew services start weknora-lite # 启动并开机自启 + brew services stop weknora-lite # 停止 + brew services restart weknora-lite # 重启 + brew services info weknora-lite # 查看状态 + + 日志: + #{var}/log/weknora-lite.log + + 首次运行会自动创建配置文件: + ~/.config/weknora/.env.lite + + 数据存储在: + ~/.local/share/weknora/ + + 如需修改配置(LLM 服务地址、安全密钥等): + $EDITOR ~/.config/weknora/.env.lite + brew services restart weknora-lite + EOS + end + + test do + assert_predicate bin/"weknora-lite", :executable? + end +end diff --git a/Makefile b/Makefile index 74586c89..6306f902 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help build run test clean docker-build-app docker-build-docreader docker-build-frontend docker-build-all docker-run migrate-up migrate-down docker-restart docker-stop start-all stop-all start-ollama stop-ollama build-images build-images-app build-images-docreader build-images-frontend clean-images check-env list-containers pull-images show-platform dev-start dev-stop dev-restart dev-logs dev-status dev-app dev-frontend docs install-swagger +.PHONY: help build run test clean docker-build-app docker-build-docreader docker-build-frontend docker-build-all docker-run migrate-up migrate-down docker-restart docker-stop start-all stop-all start-ollama stop-ollama build-images build-images-app build-images-docreader build-images-frontend clean-images check-env list-containers pull-images show-platform dev-start dev-stop dev-restart dev-logs dev-status dev-app dev-frontend docs install-swagger build-lite run-lite package-lite # Show help help: @@ -56,6 +56,11 @@ help: @echo " dev-status 查看开发环境状态" @echo " dev-app 启动后端应用(本地运行,需先运行 dev-start)" @echo " dev-frontend 启动前端(本地运行,需先运行 dev-start)" + @echo "" + @echo "Lite 模式(零外部依赖):" + @echo " build-lite 构建 Lite 版本(先构建前端到 web/,再构建 Go;SKIP_FRONTEND=1 跳过前端)" + @echo " run-lite 构建并启动 Lite 版本" + @echo " package-lite 构建并打包 Lite 发行包(tarball)" # Go related variables BINARY_NAME=WeKnora @@ -223,11 +228,42 @@ deps: build-prod: VERSION=$$(git describe --tags --abbrev=0 2>/dev/null || echo "$${VERSION:-unknown}"); \ COMMIT_ID=$${COMMIT_ID:-unknown}; \ + CGO_ENABLED=1 \ + CGO_CFLAGS="-Wno-deprecated-declarations" \ + CGO_LDFLAGS="-Wl,-no_warn_duplicate_libraries" \ BUILD_TIME=$${BUILD_TIME:-unknown}; \ GO_VERSION=$${GO_VERSION:-unknown}; \ - LDFLAGS="-X 'github.com/Tencent/WeKnora/internal/handler.Version=$$VERSION' -X 'github.com/Tencent/WeKnora/internal/handler.CommitID=$$COMMIT_ID' -X 'github.com/Tencent/WeKnora/internal/handler.BuildTime=$$BUILD_TIME' -X 'github.com/Tencent/WeKnora/internal/handler.GoVersion=$$GO_VERSION' -X 'google.golang.org/protobuf/reflect/protoregistry.conflictPolicy=warn'"; \ + LDFLAGS="-X 'github.com/Tencent/WeKnora/internal/handler.Version=$$VERSION' -X 'github.com/Tencent/WeKnora/internal/handler.Edition=standard' -X 'github.com/Tencent/WeKnora/internal/handler.CommitID=$$COMMIT_ID' -X 'github.com/Tencent/WeKnora/internal/handler.BuildTime=$$BUILD_TIME' -X 'github.com/Tencent/WeKnora/internal/handler.GoVersion=$$GO_VERSION' -X 'google.golang.org/protobuf/reflect/protoregistry.conflictPolicy=warn'"; \ go build -ldflags="-w -s $$LDFLAGS" -o $(BINARY_NAME) $(MAIN_PATH) +# Build Lite version (single binary, SQLite + in-memory queue) +# 会先构建前端到 web/,再构建 Go 二进制;SKIP_FRONTEND=1 可跳过前端 +build-lite: + @if [ -f frontend/package.json ] && [ "$${SKIP_FRONTEND:-}" != "1" ]; then \ + echo ">> Building frontend for Lite..."; \ + (cd frontend && npm ci --prefer-offline && npm run build) && \ + rm -rf web && cp -r frontend/dist web; \ + elif [ "$${SKIP_FRONTEND:-}" = "1" ]; then \ + echo ">> Skipping frontend (SKIP_FRONTEND=1)"; \ + else \ + echo ">> No frontend/package.json, skipping frontend"; \ + fi + EDITION=lite eval "$$(./scripts/get_version.sh env)"; \ + LDFLAGS="$$(EDITION=lite ./scripts/get_version.sh ldflags)"; \ + CGO_ENABLED=1 \ + CGO_CFLAGS="-Wno-deprecated-declarations" \ + CGO_LDFLAGS="-Wl,-no_warn_duplicate_libraries" \ + go build -tags "sqlite_fts5" -ldflags="-w -s $$LDFLAGS" -o $(BINARY_NAME)-lite $(MAIN_PATH) + +# Run Lite version with .env.lite defaults +run-lite: build-lite + @if [ ! -f .env.lite ]; then echo "Error: .env.lite not found"; exit 1; fi + @set -a && . ./.env.lite && set +a && ./$(BINARY_NAME)-lite + +# Package Lite version into distributable tarball +package-lite: + ./scripts/package-lite.sh + download_spatial: go run cmd/download/duckdb/duckdb.go diff --git a/deploy/weknora-lite.service b/deploy/weknora-lite.service new file mode 100644 index 00000000..dac3bb29 --- /dev/null +++ b/deploy/weknora-lite.service @@ -0,0 +1,23 @@ +[Unit] +Description=WeKnora Lite - Knowledge Base Management System +After=network.target + +[Service] +Type=simple +User=weknora +Group=weknora +WorkingDirectory=/opt/weknora +EnvironmentFile=/opt/weknora/.env.lite +ExecStart=/opt/weknora/WeKnora-lite +Restart=on-failure +RestartSec=5 +LimitNOFILE=65536 + +# Security hardening +NoNewPrivileges=true +ProtectSystem=strict +ProtectHome=true +ReadWritePaths=/opt/weknora/data + +[Install] +WantedBy=multi-user.target diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index a7672a53..54b6ca87 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -107,16 +107,19 @@ services: restart: "no" docreader: + build: + context: . + dockerfile: docker/Dockerfile.docreader image: wechatopenai/weknora-docreader:latest container_name: WeKnora-docreader-dev ports: - "${DOCREADER_PORT:-50051}:50051" + volumes: + - docreader-tmp-dev:/tmp/docreader environment: - - MINIO_ENDPOINT=minio:9000 - - STORAGE_TYPE=${STORAGE_TYPE:-} - - MINIO_PUBLIC_ENDPOINT=http://localhost:${MINIO_PORT:-9000} + - DOCREADER_IMAGE_OUTPUT_DIR=/tmp/docreader - MINERU_ENDPOINT=${MINERU_ENDPOINT:-} - - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-}} + - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-} healthcheck: test: ["CMD", "grpc_health_probe", "-addr=:50051"] interval: 30s @@ -165,4 +168,5 @@ volumes: neo4j-data-dev: jaeger_data_dev: qdrant_data_dev: + docreader-tmp-dev: diff --git a/docker-compose.yml b/docker-compose.yml index 79a94f2d..9c98821e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -37,6 +37,7 @@ services: - "${APP_PORT:-8080}:8080" volumes: - data-files:/data/files + - docreader-tmp:/tmp/docreader:ro # Optional: mount custom config file # - ./config/config.yaml:/app/config/config.yaml # Optional: mount custom skills directory (allows adding skills without rebuilding image) @@ -83,7 +84,8 @@ services: - QDRANT_USE_TLS=${QDRANT_USE_TLS:-false} - MILVUS_ADDRESS=milvus:19530 - MILVUS_COLLECTION=${MILVUS_COLLECTION:-weknora_embeddings} - - DOCREADER_ADDR=docreader:50051 + - DOCREADER_ADDR=${DOCREADER_ADDR:-docreader:50051} + - DOCREADER_TRANSPORT=${DOCREADER_TRANSPORT:-grpc} - STORAGE_TYPE=${STORAGE_TYPE:-} - LOCAL_STORAGE_BASE_DIR=${LOCAL_STORAGE_BASE_DIR:-} - AUTO_RECOVER_DIRTY=${AUTO_RECOVER_DIRTY:-true} @@ -146,10 +148,10 @@ services: container_name: WeKnora-docreader ports: - "${DOCREADER_PORT:-50051}:50051" + volumes: + - docreader-tmp:/tmp/docreader environment: - - MINIO_ENDPOINT=minio:9000 - - MINIO_PUBLIC_ENDPOINT=http://localhost:${MINIO_PORT:-9000} - - MINERU_ENDPOINT=${MINERU_ENDPOINT:-} + - DOCREADER_IMAGE_OUTPUT_DIR=/tmp/docreader - MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-} healthcheck: test: ["CMD", "grpc_health_probe", "-addr=:50051"] @@ -312,6 +314,7 @@ networks: volumes: postgres-data: data-files: + docreader-tmp: jaeger_data: minio_data: neo4j-data: diff --git a/docker/Dockerfile.docreader b/docker/Dockerfile.docreader index f73476fa..4c5a4994 100644 --- a/docker/Dockerfile.docreader +++ b/docker/Dockerfile.docreader @@ -1,5 +1,5 @@ # ========================= -# 构建阶段 +# 构建阶段(轻量化:仅文档解析 + 图片提取,无 OCR/VLM) # ========================= FROM python:3.10.18-bookworm AS builder @@ -15,7 +15,6 @@ RUN apt-get update && apt-get install -y \ python3-dev \ libjpeg-dev \ zlib1g-dev \ - libpq-dev \ libffi-dev \ libgl1 \ libglib2.0-0 \ @@ -25,11 +24,10 @@ RUN apt-get update && apt-get install -y \ unzip \ && rm -rf /var/lib/apt/lists/* -# 检查是否存在本地protoc安装包,如果存在则离线安装,否则在线安装,其他安装包按需求添加 +# 检查是否存在本地protoc安装包,如果存在则离线安装,否则在线安装 ARG TARGETARCH COPY packages/ /app/packages/ RUN echo "检查本地protoc安装包..." && \ - # 根据目标架构选择正确的protoc包名 case ${TARGETARCH} in \ "amd64") PROTOC_ARCH="x86_64" ;; \ "arm64") PROTOC_ARCH="aarch_64" ;; \ @@ -39,41 +37,18 @@ RUN echo "检查本地protoc安装包..." && \ PROTOC_PACKAGE="protoc-3.19.4-linux-${PROTOC_ARCH}.zip" && \ if [ -f "/app/packages/${PROTOC_PACKAGE}" ]; then \ echo "发现本地protoc安装包,将进行离线安装"; \ - # 离线安装:使用本地包(精确路径避免歧义) cp /app/packages/${PROTOC_PACKAGE} /app/ && \ unzip -o /app/${PROTOC_PACKAGE} -d /usr/local && \ chmod +x /usr/local/bin/protoc && \ rm -f /app/${PROTOC_PACKAGE}; \ else \ echo "未发现本地protoc安装包,将进行在线安装"; \ - # 在线安装:从网络下载 curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.19.4/${PROTOC_PACKAGE} && \ unzip -o ${PROTOC_PACKAGE} -d /usr/local && \ chmod +x /usr/local/bin/protoc && \ rm -f ${PROTOC_PACKAGE}; \ fi -# 预下载 PP-OCRv4 模型 -RUN mkdir -p /root/.paddleocr/whl/det/ch && \ - mkdir -p /root/.paddleocr/whl/rec/ch && \ - mkdir -p /root/.paddleocr/whl/cls/ch && \ - # 下载检测模型 - wget https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar \ - -O /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar && \ - tar -xf /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar -C /root/.paddleocr/whl/det/ch/ && \ - # 下载识别模型 - wget https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar \ - -O /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer.tar && \ - tar -xf /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer.tar -C /root/.paddleocr/whl/rec/ch/ && \ - # 下载文本方向分类模型(用于判断文本是否需要旋转) - wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar \ - -O /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar && \ - tar -xf /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar -C /root/.paddleocr/whl/cls/ && \ - # 清理压缩包 - rm -f /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar && \ - rm -f /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer.tar && \ - rm -f /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar - # 复制依赖文件 COPY docreader/pyproject.toml docreader/uv.lock ./ RUN pip install uv --break-system-packages && \ @@ -82,15 +57,13 @@ RUN pip install uv --break-system-packages && \ # 复制源代码和生成脚本 COPY docreader docreader -# 生成 protobuf 代码 +# 生成 protobuf 代码(使用 venv 中的 grpc_tools) +ENV PATH="/app/.venv/bin:${PATH}" RUN chmod +x docreader/scripts/generate_proto.sh && \ bash docreader/scripts/generate_proto.sh -# 确保模型目录存在 -RUN ls -la /root/.paddleocr/whl/ - # ========================= -# 运行阶段 +# 运行阶段(轻量化) # ========================= FROM python:3.10.18-bookworm AS runner @@ -100,16 +73,14 @@ RUN sed -i 's@http://deb.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc WORKDIR /app -# 安装运行时依赖 +# 安装运行时依赖(已移除 OCR/PaddleOCR 相关依赖) RUN apt-get update && apt-get install -y \ libjpeg62-turbo \ - libpq5 \ wget \ gnupg \ libgl1 \ libglib2.0-0 \ antiword \ - vim \ tar \ dpkg \ libxinerama1 \ @@ -126,7 +97,6 @@ RUN apt-get update && apt-get install -y \ # 安装 grpc_health_probe ARG TARGETARCH RUN GRPC_HEALTH_PROBE_VERSION=v0.4.24 && \ - # 根据目标架构选择正确的二进制文件 case ${TARGETARCH} in \ "amd64") ARCH="amd64" ;; \ "arm64") ARCH="arm64" ;; \ @@ -135,25 +105,24 @@ RUN GRPC_HEALTH_PROBE_VERSION=v0.4.24 && \ esac && \ wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${ARCH} && \ chmod +x /bin/grpc_health_probe - + # 从构建阶段复制已安装的依赖和生成的代码 ENV VIRTUAL_ENV=/app/.venv COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" COPY --from=builder /usr/local/bin /usr/local/bin -COPY --from=builder /root/.paddleocr /root/.paddleocr -# 安装 Playwright 浏览器 +# 安装 Playwright 浏览器(网页解析) RUN python -m playwright install webkit RUN python -m playwright install-deps webkit -# COPY docreader/scripts/download_deps.py download_deps.py -# RUN python -m download_deps - COPY docreader/pyproject.toml docreader/uv.lock ./ COPY --from=builder /app/docreader docreader +# 创建共享临时图片目录 +RUN mkdir -p /tmp/docreader + # 暴露 gRPC 端口 EXPOSE 50051 diff --git a/docreader/client/client.go b/docreader/client/client.go index 2a0f9126..468b5438 100644 --- a/docreader/client/client.go +++ b/docreader/client/client.go @@ -13,42 +13,35 @@ import ( "google.golang.org/grpc/resolver" ) -// getMaxMessageSize returns the maximum gRPC message size in bytes. -// Default is 50MB, can be configured via MAX_FILE_SIZE_MB environment variable. func getMaxMessageSize() int { if sizeStr := os.Getenv("MAX_FILE_SIZE_MB"); sizeStr != "" { if size, err := strconv.Atoi(sizeStr); err == nil && size > 0 { return size * 1024 * 1024 } } - return 50 * 1024 * 1024 // default 50MB + return 50 * 1024 * 1024 } -// Logger is the default logger used by the client var Logger = log.New(os.Stdout, "[DocReader] ", log.LstdFlags|log.Lmicroseconds) -// ImageInfo 表示一个图片的信息 -type ImageInfo struct { - URL string // 图片URL(COS) - Caption string // 图片描述 - OCRText string // OCR提取的文本 - OriginalURL string // 原始图片URL - Start int // 图片在文本中的开始位置 - End int // 图片在文本中的结束位置 +// ImageRefInfo represents an image reference from a converted document. +type ImageRefInfo struct { + Filename string + OriginalRef string + MimeType string + StorageKey string } -// Client represents a DocReader service client +// Client represents a DocReader service client. type Client struct { conn *grpc.ClientConn proto.DocReaderClient debug bool } -// NewClient creates a new DocReader client with the specified address func NewClient(addr string) (*Client, error) { Logger.Printf("INFO: Creating new DocReader client connecting to %s", addr) - // 设置消息大小限制 (configurable via GRPC_MAX_MESSAGE_SIZE_MB) maxMsgSize := getMaxMessageSize() opts := []grpc.DialOption{ grpc.WithTransportCredentials(insecure.NewCredentials()), @@ -75,19 +68,15 @@ func NewClient(addr string) (*Client, error) { }, nil } -// Close closes the client connection func (c *Client) Close() error { Logger.Printf("INFO: Closing DocReader client connection") return c.conn.Close() } -// SetDebug enables or disables debug logging func (c *Client) SetDebug(debug bool) { c.debug = debug - Logger.Printf("INFO: Debug logging set to %v", debug) } -// Log logs a message with the appropriate level func (c *Client) Log(level string, format string, args ...interface{}) { if level == "DEBUG" && !c.debug { return @@ -95,28 +84,20 @@ func (c *Client) Log(level string, format string, args ...interface{}) { Logger.Printf("%s: %s", level, fmt.Sprintf(format, args...)) } -// GetImagesFromChunk 从一个Chunk中提取所有图片信息 -func GetImagesFromChunk(chunk *proto.Chunk) []ImageInfo { - if chunk == nil || len(chunk.Images) == 0 { +// GetImageRefsFromResponse extracts image references from a ReadResponse. +func GetImageRefsFromResponse(resp *proto.ReadResponse) []ImageRefInfo { + if resp == nil || len(resp.ImageRefs) == 0 { return nil } - images := make([]ImageInfo, 0, len(chunk.Images)) - for _, img := range chunk.Images { - images = append(images, ImageInfo{ - URL: img.Url, - Caption: img.Caption, - OCRText: img.OcrText, - OriginalURL: img.OriginalUrl, - Start: int(img.Start), - End: int(img.End), + refs := make([]ImageRefInfo, 0, len(resp.ImageRefs)) + for _, ref := range resp.ImageRefs { + refs = append(refs, ImageRefInfo{ + Filename: ref.Filename, + OriginalRef: ref.OriginalRef, + MimeType: ref.MimeType, + StorageKey: ref.StorageKey, }) } - - return images -} - -// HasImagesInChunk 判断一个Chunk是否包含图片 -func HasImagesInChunk(chunk *proto.Chunk) bool { - return chunk != nil && len(chunk.Images) > 0 + return refs } diff --git a/docreader/client/client_test.go b/docreader/client/client_test.go index 0a1e618a..e4f0e4e0 100644 --- a/docreader/client/client_test.go +++ b/docreader/client/client_test.go @@ -11,144 +11,75 @@ import ( ) func init() { - // 配置测试日志 log.SetOutput(os.Stdout) log.SetFlags(log.LstdFlags | log.Lmicroseconds | log.Lshortfile) log.Println("INFO: Initializing DocReader client tests") } -func TestReadFromURL(t *testing.T) { - log.Println("INFO: Starting TestReadFromURL") - - // 创建测试客户端 - log.Println("INFO: Creating test client") +func TestReadURL(t *testing.T) { client, err := NewClient("localhost:50051") if err != nil { - log.Printf("ERROR: Failed to create client: %v", err) t.Fatalf("Failed to create client: %v", err) } defer client.Close() - - // 启用调试日志 client.SetDebug(true) - // 测试 ReadFromURL 方法 - log.Println("INFO: Sending ReadFromURL request to server") startTime := time.Now() - resp, err := client.ReadFromURL( + resp, err := client.Read( context.Background(), - &proto.ReadFromURLRequest{ + &proto.ReadRequest{ Url: "https://example.com", Title: "test", - ReadConfig: &proto.ReadConfig{ - ChunkSize: 512, - ChunkOverlap: 50, - Separators: []string{"\n\n", "\n", "。"}, - EnableMultimodal: true, - }, }, ) + log.Printf("INFO: Read(URL) completed in %v", time.Since(startTime)) - requestDuration := time.Since(startTime) if err != nil { - log.Printf("ERROR: ReadFromURL failed: %v", err) - t.Fatalf("ReadFromURL failed: %v", err) + t.Fatalf("Read failed: %v", err) } - log.Printf("INFO: ReadFromURL completed in %v", requestDuration) - - // 验证结果 - chunkCount := len(resp.Chunks) - log.Printf("INFO: Received %d chunks from URL parsing", chunkCount) - if chunkCount == 0 { - log.Println("WARN: Expected non-empty content but received none") - t.Error("Expected non-empty content") + if resp.Error != "" { + t.Fatalf("Read returned error: %s", resp.Error) } - - // 打印结果 - for i, chunk := range resp.Chunks { - if i < 2 || i >= chunkCount-2 { // 只打印前两个和后两个块 - log.Printf("DEBUG: Chunk %d: %s", chunk.Seq, truncateString(chunk.Content, 50)) - } else if i == 2 && chunkCount > 4 { - log.Printf("DEBUG: ... %d more chunks ...", chunkCount-4) - } + if resp.MarkdownContent == "" { + t.Error("Expected non-empty markdown content") } - - log.Println("INFO: TestReadFromURL completed successfully") + log.Printf("INFO: content_len=%d, images=%d", len(resp.MarkdownContent), len(resp.ImageRefs)) } -func TestReadFromFileWithChunking(t *testing.T) { - log.Println("INFO: Starting TestReadFromFileWithChunking") - - // 创建测试客户端 - log.Println("INFO: Creating test client") +func TestReadFile(t *testing.T) { client, err := NewClient("localhost:50051") if err != nil { - log.Printf("ERROR: Failed to create client: %v", err) t.Fatalf("Failed to create client: %v", err) } defer client.Close() - - // 启用调试日志 client.SetDebug(true) - // 读取测试文件 - log.Println("INFO: Reading test file") fileContent, err := os.ReadFile("../testdata/test.md") if err != nil { - log.Printf("ERROR: Failed to read test file: %v", err) t.Fatalf("Failed to read test file: %v", err) } - log.Printf("INFO: Read test file, size: %d bytes", len(fileContent)) - // 测试 ReadFromFile 方法,带分块参数 - log.Println("INFO: Sending ReadFromFile request to server") startTime := time.Now() - resp, err := client.ReadFromFile( + resp, err := client.Read( context.Background(), - &proto.ReadFromFileRequest{ + &proto.ReadRequest{ FileContent: fileContent, FileName: "test.md", FileType: "md", - ReadConfig: &proto.ReadConfig{ - ChunkSize: 200, - ChunkOverlap: 50, - Separators: []string{"\n\n", "\n", "。"}, - EnableMultimodal: true, - }, }, ) + log.Printf("INFO: Read(file) completed in %v", time.Since(startTime)) - requestDuration := time.Since(startTime) if err != nil { - log.Printf("ERROR: ReadFromFile failed: %v", err) - t.Fatalf("ReadFromFile failed: %v", err) + t.Fatalf("Read failed: %v", err) } - log.Printf("INFO: ReadFromFile completed in %v", requestDuration) - - // 验证结果 - chunkCount := len(resp.Chunks) - log.Printf("INFO: Received %d chunks from file parsing", chunkCount) - if chunkCount == 0 { - log.Println("WARN: Expected non-empty content but received none") - t.Error("Expected non-empty content") + if resp.Error != "" { + t.Fatalf("Read returned error: %s", resp.Error) + } + if resp.MarkdownContent == "" { + t.Error("Expected non-empty markdown content") } - // 打印结果 - for i, chunk := range resp.Chunks { - if i < 2 || i >= chunkCount-2 { // 只打印前两个和后两个块 - log.Printf("DEBUG: Chunk %d: %s", chunk.Seq, truncateString(chunk.Content, 50)) - } else if i == 2 && chunkCount > 4 { - log.Printf("DEBUG: ... %d more chunks ...", chunkCount-4) - } - } - - log.Println("INFO: TestReadFromFileWithChunking completed successfully") -} - -// 截断字符串以供日志打印 -func truncateString(s string, maxLen int) string { - if len(s) <= maxLen { - return s - } - return s[:maxLen] + "..." + imageRefs := GetImageRefsFromResponse(resp) + log.Printf("INFO: content_len=%d, images=%d", len(resp.MarkdownContent), len(imageRefs)) } diff --git a/docreader/config.py b/docreader/config.py index 791363cb..845e46fd 100644 --- a/docreader/config.py +++ b/docreader/config.py @@ -52,58 +52,17 @@ class DocReaderConfig: grpc_max_file_size_mb: int grpc_port: int - # Image processing - image_max_concurrent: int - # Proxy external_http_proxy: str external_https_proxy: str - # OCR - ocr_backend: str - ocr_api_base_url: str - ocr_api_key: str - ocr_model: str - - # VLM Caption - vlm_model_base_url: str - vlm_model_name: str - vlm_model_api_key: str - vlm_interface_type: str - - # Storage - storage_type: str - - cos_secret_id: str - cos_secret_key: str - cos_region: str - cos_bucket_name: str - cos_app_id: str - cos_path_prefix: str - cos_enable_old_domain: bool - - minio_access_key_id: str - minio_secret_access_key: str - minio_bucket_name: str - minio_path_prefix: str - minio_endpoint: str - minio_public_endpoint: str - minio_use_ssl: bool - - local_storage_base_dir: str - - # Other - mineru_endpoint: str + # Temp image output directory (shared with Go app via volume, local mode fallback) + image_output_dir: str def load_config() -> DocReaderConfig: - """Load config from environment variables. + """Load config from environment variables (lightweight version).""" - Naming convention (new): DOCREADER_* - Backward compatible keys are supported. - """ - - # gRPC grpc_max_workers = _get_int(["DOCREADER_GRPC_MAX_WORKERS", "GRPC_MAX_WORKERS"], 4) grpc_max_file_size_mb = ( _get_int(["DOCREADER_GRPC_MAX_FILE_SIZE_MB", "MAX_FILE_SIZE_MB"], 50) @@ -112,12 +71,6 @@ def load_config() -> DocReaderConfig: ) grpc_port = _get_int(["DOCREADER_GRPC_PORT", "PORT"], 50051) - # Image processing - image_max_concurrent = _get_int( - ["DOCREADER_IMAGE_MAX_CONCURRENT", "IMAGE_MAX_CONCURRENT"], 1 - ) - - # Proxies external_http_proxy = _get_str( ["DOCREADER_EXTERNAL_HTTP_PROXY", "EXTERNAL_HTTP_PROXY"], "" ) @@ -125,95 +78,17 @@ def load_config() -> DocReaderConfig: ["DOCREADER_EXTERNAL_HTTPS_PROXY", "EXTERNAL_HTTPS_PROXY"], "" ) - # OCR - ocr_backend = _get_str(["DOCREADER_OCR_BACKEND", "OCR_BACKEND"], "") - ocr_api_base_url = _get_str(["DOCREADER_OCR_API_BASE_URL", "OCR_API_BASE_URL"], "") - ocr_api_key = _get_str(["DOCREADER_OCR_API_KEY", "OCR_API_KEY"], "") - ocr_model = _get_str(["DOCREADER_OCR_MODEL", "OCR_MODEL"], "") - - # VLM Caption - vlm_model_base_url = _get_str( - ["DOCREADER_VLM_MODEL_BASE_URL", "VLM_MODEL_BASE_URL"], "" + image_output_dir = _get_str( + ["DOCREADER_IMAGE_OUTPUT_DIR", "IMAGE_OUTPUT_DIR"], "/tmp/docreader" ) - vlm_model_name = _get_str(["DOCREADER_VLM_MODEL_NAME", "VLM_MODEL_NAME"], "") - vlm_model_api_key = _get_str( - ["DOCREADER_VLM_MODEL_API_KEY", "VLM_MODEL_API_KEY"], "" - ) - vlm_interface_type = _get_str( - ["DOCREADER_VLM_INTERFACE_TYPE", "VLM_INTERFACE_TYPE"], "openai" - ).lower() - - # Storage - storage_type = _get_str(["DOCREADER_STORAGE_TYPE", "STORAGE_TYPE"], "cos").lower() - - # COS - cos_secret_id = _get_str(["DOCREADER_COS_SECRET_ID", "COS_SECRET_ID"], "") - cos_secret_key = _get_str(["DOCREADER_COS_SECRET_KEY", "COS_SECRET_KEY"], "") - cos_region = _get_str(["DOCREADER_COS_REGION", "COS_REGION"], "") - cos_bucket_name = _get_str(["DOCREADER_COS_BUCKET_NAME", "COS_BUCKET_NAME"], "") - cos_app_id = _get_str(["DOCREADER_COS_APP_ID", "COS_APP_ID"], "") - cos_path_prefix = _get_str(["DOCREADER_COS_PATH_PREFIX", "COS_PATH_PREFIX"], "") - cos_enable_old_domain = _get_bool( - ["DOCREADER_COS_ENABLE_OLD_DOMAIN", "COS_ENABLE_OLD_DOMAIN"], True - ) - - # MinIO - minio_access_key_id = _get_str( - ["DOCREADER_MINIO_ACCESS_KEY_ID", "MINIO_ACCESS_KEY_ID"], "minioadmin" - ) - minio_secret_access_key = _get_str( - ["DOCREADER_MINIO_SECRET_ACCESS_KEY", "MINIO_SECRET_ACCESS_KEY"], "minioadmin" - ) - minio_bucket_name = _get_str( - ["DOCREADER_MINIO_BUCKET_NAME", "MINIO_BUCKET_NAME"], "WeKnora" - ) - minio_path_prefix = _get_str( - ["DOCREADER_MINIO_PATH_PREFIX", "MINIO_PATH_PREFIX"], "" - ) - minio_endpoint = _get_str(["DOCREADER_MINIO_ENDPOINT", "MINIO_ENDPOINT"], "") - minio_public_endpoint = _get_str( - ["DOCREADER_MINIO_PUBLIC_ENDPOINT", "MINIO_PUBLIC_ENDPOINT"], "" - ) - minio_use_ssl = _get_bool(["DOCREADER_MINIO_USE_SSL", "MINIO_USE_SSL"], False) - - # Local storage - local_storage_base_dir = "./data/files" - - # Other - mineru_endpoint = _get_str(["DOCREADER_MINERU_ENDPOINT", "MINERU_ENDPOINT"], "") return DocReaderConfig( grpc_max_workers=grpc_max_workers, grpc_max_file_size_mb=grpc_max_file_size_mb, grpc_port=grpc_port, - image_max_concurrent=image_max_concurrent, external_http_proxy=external_http_proxy, external_https_proxy=external_https_proxy, - ocr_backend=ocr_backend, - ocr_api_base_url=ocr_api_base_url, - ocr_api_key=ocr_api_key, - ocr_model=ocr_model, - vlm_model_base_url=vlm_model_base_url, - vlm_model_name=vlm_model_name, - vlm_model_api_key=vlm_model_api_key, - vlm_interface_type=vlm_interface_type, - storage_type=storage_type, - cos_secret_id=cos_secret_id, - cos_secret_key=cos_secret_key, - cos_region=cos_region, - cos_bucket_name=cos_bucket_name, - cos_app_id=cos_app_id, - cos_path_prefix=cos_path_prefix, - cos_enable_old_domain=cos_enable_old_domain, - minio_access_key_id=minio_access_key_id, - minio_secret_access_key=minio_secret_access_key, - minio_bucket_name=minio_bucket_name, - minio_path_prefix=minio_path_prefix, - minio_endpoint=minio_endpoint, - minio_public_endpoint=minio_public_endpoint, - minio_use_ssl=minio_use_ssl, - local_storage_base_dir=local_storage_base_dir, - mineru_endpoint=mineru_endpoint, + image_output_dir=image_output_dir, ) @@ -223,56 +98,12 @@ CONFIG = load_config() def dump_config(mask_secrets: bool = True) -> Dict[str, Any]: cfg = CONFIG d: Dict[str, Any] = { - # gRPC "DOCREADER_GRPC_MAX_WORKERS": cfg.grpc_max_workers, "DOCREADER_GRPC_MAX_FILE_SIZE_MB": cfg.grpc_max_file_size_mb, "DOCREADER_GRPC_PORT": cfg.grpc_port, - # Image processing - "DOCREADER_IMAGE_MAX_CONCURRENT": cfg.image_max_concurrent, - # Proxy "DOCREADER_EXTERNAL_HTTP_PROXY": cfg.external_http_proxy, "DOCREADER_EXTERNAL_HTTPS_PROXY": cfg.external_https_proxy, - # OCR - "DOCREADER_OCR_BACKEND": cfg.ocr_backend, - "DOCREADER_OCR_API_BASE_URL": cfg.ocr_api_base_url, - "DOCREADER_OCR_API_KEY": _mask_secret(cfg.ocr_api_key) - if mask_secrets - else cfg.ocr_api_key, - "DOCREADER_OCR_MODEL": cfg.ocr_model, - # VLM - "DOCREADER_VLM_MODEL_BASE_URL": cfg.vlm_model_base_url, - "DOCREADER_VLM_MODEL_NAME": cfg.vlm_model_name, - "DOCREADER_VLM_MODEL_API_KEY": _mask_secret(cfg.vlm_model_api_key) - if mask_secrets - else cfg.vlm_model_api_key, - "DOCREADER_VLM_INTERFACE_TYPE": cfg.vlm_interface_type, - # Storage - "DOCREADER_STORAGE_TYPE": cfg.storage_type, - "DOCREADER_COS_SECRET_ID": _mask_secret(cfg.cos_secret_id) - if mask_secrets - else cfg.cos_secret_id, - "DOCREADER_COS_SECRET_KEY": _mask_secret(cfg.cos_secret_key) - if mask_secrets - else cfg.cos_secret_key, - "DOCREADER_COS_REGION": cfg.cos_region, - "DOCREADER_COS_BUCKET_NAME": cfg.cos_bucket_name, - "DOCREADER_COS_APP_ID": cfg.cos_app_id, - "DOCREADER_COS_PATH_PREFIX": cfg.cos_path_prefix, - "DOCREADER_COS_ENABLE_OLD_DOMAIN": cfg.cos_enable_old_domain, - "DOCREADER_MINIO_ACCESS_KEY_ID": _mask_secret(cfg.minio_access_key_id) - if mask_secrets - else cfg.minio_access_key_id, - "DOCREADER_MINIO_SECRET_ACCESS_KEY": _mask_secret(cfg.minio_secret_access_key) - if mask_secrets - else cfg.minio_secret_access_key, - "DOCREADER_MINIO_BUCKET_NAME": cfg.minio_bucket_name, - "DOCREADER_MINIO_PATH_PREFIX": cfg.minio_path_prefix, - "DOCREADER_MINIO_ENDPOINT": cfg.minio_endpoint, - "DOCREADER_MINIO_PUBLIC_ENDPOINT": cfg.minio_public_endpoint, - "DOCREADER_MINIO_USE_SSL": cfg.minio_use_ssl, - "DOCREADER_LOCAL_STORAGE_BASE_DIR": cfg.local_storage_base_dir, - # Other - "DOCREADER_MINERU_ENDPOINT": cfg.mineru_endpoint, + "DOCREADER_IMAGE_OUTPUT_DIR": cfg.image_output_dir, } return d diff --git a/docreader/main.py b/docreader/main.py index 2f2c9a20..4fc99b50 100644 --- a/docreader/main.py +++ b/docreader/main.py @@ -13,118 +13,130 @@ from grpc_health.v1.health import HealthServicer from docreader import config from docreader.config import CONFIG -from docreader.models.read_config import ChunkingConfig from docreader.parser import Parser from docreader.proto import docreader_pb2_grpc +from docreader.parser.registry import registry from docreader.proto.docreader_pb2 import ( - Chunk, - Image, - ReadConfig, - ReadFromFileRequest, - ReadFromURLRequest, + ReadRequest, ReadResponse, - StorageProvider, + ImageRef, + ListEnginesResponse, + ParserEngineInfo, ) from docreader.utils.request import init_logging_request_id, request_id_context -# Surrogate range U+D800..U+DFFF are invalid Unicode scalar values -# cannot be encoded to UTF-8 _SURROGATE_RE = re.compile(r"[\ud800-\udfff]") def to_valid_utf8_text(s: Optional[str]) -> str: - """Return a UTF-8 safe string for protobuf. - - - Replace any surrogate code points with U+FFFD - - Re-encode with errors='replace' to ensure valid UTF-8 - """ if not s: return "" s = _SURROGATE_RE.sub("\ufffd", s) return s.encode("utf-8", errors="replace").decode("utf-8") -# Ensure no existing handlers for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) -# Configure logging - use stdout handler = logging.StreamHandler(sys.stdout) logging.root.addHandler(handler) -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -logger.info("Initializing server logging") +_level_name = (os.environ.get("LOG_LEVEL") or "INFO").upper() +_level = getattr(logging, _level_name, logging.INFO) +logging.root.setLevel(_level) + +logger = logging.getLogger(__name__) +logger.info("Initializing server logging, level=%s", _level_name) -# Initialize request ID logging init_logging_request_id() -parser = Parser() +def _resolve_images(images: dict, request_id: str, storage_map: dict | None = None) -> tuple[str, list]: + """Resolve document images with priority: shared storage > temp dir > inline bytes. + ``images`` is a dict of {relative_path: raw_data} where raw_data is + base64-encoded string or raw bytes. -def create_chunking_config(read_config: ReadConfig): - """Create ChunkingConfig from ReadConfig request. + ``storage_map`` is a dict from the request's config.image_storage map. + When it contains a valid "provider" key, images are uploaded to shared object + storage and storage_key is set (no inline bytes, saving gRPC message size). - Args: - read_config: The read_config from the gRPC request - - Returns: - ChunkingConfig: Configured chunking configuration object + Returns (image_dir_path, list[ImageRef]). """ - # Extract chunking parameters - chunk_size = read_config.chunk_size or 512 - chunk_overlap = read_config.chunk_overlap or 50 - # Convert protobuf RepeatedScalarFieldContainer to list for type compatibility - separators = ( - list(read_config.separators) if read_config.separators else ["\n\n", "\n", "。"] - ) - enable_multimodal = read_config.enable_multimodal or False + import base64 - logger.info( - f"Using chunking config: size={chunk_size}, " - f"overlap={chunk_overlap}, multimodal={enable_multimodal}" - ) + if not images: + return "", [] - # Extract storage config - sc = read_config.storage_config - storage_config = { - "provider": StorageProvider.Name(sc.provider), - "region": sc.region, - "bucket_name": sc.bucket_name, - "access_key_id": sc.access_key_id, - "secret_access_key": sc.secret_access_key, - "app_id": sc.app_id, - "path_prefix": sc.path_prefix, - } - logger.info( - f"Using Storage config: provider={storage_config.get('provider')}, " - f"bucket={storage_config['bucket_name']}" - ) - - # Extract VLM config - vlm_config = { - "model_name": read_config.vlm_config.model_name, - "base_url": read_config.vlm_config.base_url, - "api_key": read_config.vlm_config.api_key, - "interface_type": read_config.vlm_config.interface_type or "openai", + mime_map = { + ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", + ".gif": "image/gif", ".webp": "image/webp", ".bmp": "image/bmp", } - logger.info( - f"Using VLM config: model={vlm_config['model_name']}, " - f"base_url={vlm_config['base_url']}, " - f"interface_type={vlm_config['interface_type']}" - ) + storage_client = None + use_storage = False + provider = (storage_map or {}).get("provider", "") + if provider: + try: + from docreader.parser.storage import create_storage + storage_client = create_storage(storage_map) + use_storage = True + logger.info("Using shared storage (%s) for image upload", provider) + except Exception as e: + logger.warning("Failed to init shared storage, falling back to inline bytes: %s", e) - # Create and return ChunkingConfig - return ChunkingConfig( - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - separators=separators, - enable_multimodal=enable_multimodal, - storage_config=storage_config, - vlm_config=vlm_config, - ) + base_dir = CONFIG.image_output_dir + output_dir = os.path.join(base_dir, request_id, "images") + wrote_to_dir = False + if not use_storage: + try: + os.makedirs(output_dir, exist_ok=True) + wrote_to_dir = True + except OSError: + logger.warning("Cannot write to image output dir %s", output_dir) + + refs = [] + for ref_path, b64data in images.items(): + try: + img_bytes = base64.b64decode(b64data) + except Exception: + img_bytes = b64data.encode("utf-8") if isinstance(b64data, str) else b64data + + fname = os.path.basename(ref_path) or f"{uuid.uuid4().hex}.png" + ext = os.path.splitext(fname)[1].lower() + mime = mime_map.get(ext, "application/octet-stream") + + if use_storage and storage_client: + storage_key = storage_client.upload_bytes(img_bytes, ext) + if storage_key: + refs.append(ImageRef( + filename=fname, + original_ref=ref_path, + mime_type=mime, + storage_key=storage_key, + )) + continue + logger.warning("Storage upload failed for %s, falling back to inline bytes", fname) + + if wrote_to_dir: + dest = os.path.join(output_dir, fname) + try: + with open(dest, "wb") as f: + f.write(img_bytes) + except OSError as e: + logger.warning("Failed to write image %s: %s", dest, e) + + refs.append(ImageRef( + filename=fname, + original_ref=ref_path, + mime_type=mime, + image_data=img_bytes, + )) + + image_dir = os.path.join(base_dir, request_id) if wrote_to_dir else "" + mode = "storage" if use_storage else ("dir+inline" if wrote_to_dir else "inline") + logger.info("Resolved %d images (mode=%s)", len(refs), mode) + return image_dir, refs class DocReaderServicer(docreader_pb2_grpc.DocReaderServicer): @@ -132,152 +144,90 @@ class DocReaderServicer(docreader_pb2_grpc.DocReaderServicer): super().__init__() self.parser = Parser() - def ReadFromFile(self, request: ReadFromFileRequest, context): - # Get or generate request ID - request_id = ( - request.request_id - if hasattr(request, "request_id") and request.request_id - else str(uuid.uuid4()) - ) + def Read(self, request: ReadRequest, context): + """Unified read: file mode (file_content set) or URL mode (url set).""" + request_id = request.request_id or str(uuid.uuid4()) + is_url = bool(request.url) - # Use request ID context with request_id_context(request_id): try: - # Get file type - file_type = ( - request.file_type or os.path.splitext(request.file_name)[1][1:] - ) - logger.info( - f"ReadFromFile for file: {request.file_name}, type: {file_type}" - ) - logger.info(f"File content size: {len(request.file_content)} bytes") + cfg = request.config + parser_engine = cfg.parser_engine if cfg else "" + engine_overrides = dict(cfg.parser_engine_overrides) if cfg else {} + storage_map = dict(cfg.image_storage) if cfg and cfg.image_storage else None - # Create chunking config - chunking_config = create_chunking_config(request.read_config) + if is_url: + logger.info("Read(URL): url=%s", request.url) + result = self.parser.parse_url( + request.url, + request.title, + parser_engine=parser_engine, + engine_overrides=engine_overrides, + ) + source_desc = request.url + else: + file_type = ( + request.file_type or os.path.splitext(request.file_name)[1][1:] + ) + logger.info( + "Read(File): file=%s, type=%s, size=%d bytes", + request.file_name, file_type, len(request.file_content), + ) + result = self.parser.parse_file( + request.file_name, + file_type, + request.file_content, + parser_engine=parser_engine, + engine_overrides=engine_overrides, + ) + source_desc = request.file_name - # Parse file - logger.info("Starting file parsing process") - result = self.parser.parse_file( - request.file_name, file_type, request.file_content, chunking_config - ) - - if not result: - error_msg = "Failed to parse file" + if not result or not result.content: + error_msg = f"Failed to parse: {source_desc}" logger.error(error_msg) - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(error_msg) - return ReadResponse() - - # Convert to protobuf message - logger.info( - f"Parsed file {request.file_name}, with {len(result.chunks)} chunks" - ) - - # Build response, including image info - response = ReadResponse( - chunks=[ - self._convert_chunk_to_proto(chunk) for chunk in result.chunks - ] - ) - logger.info(f"Response size: {response.ByteSize()} bytes") - return response - - except Exception as e: - error_msg = f"Error reading file: {str(e)}" - logger.error(error_msg) - logger.info(f"Detailed traceback: {traceback.format_exc()}") - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(str(e)) - return ReadResponse(error=str(e)) - - def ReadFromURL(self, request: ReadFromURLRequest, context): - # Get or generate request ID - request_id = ( - request.request_id - if hasattr(request, "request_id") and request.request_id - else str(uuid.uuid4()) - ) - - # Use request ID context - with request_id_context(request_id): - try: - logger.info(f"Received ReadFromURL request for URL: {request.url}") - - # Create chunking config - chunking_config = create_chunking_config(request.read_config) - - # Parse URL - logger.info("Starting URL parsing process") - result = self.parser.parse_url( - request.url, request.title, chunking_config - ) - if not result: - error_msg = "Failed to parse URL" - logger.error(error_msg) - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(error_msg) return ReadResponse(error=error_msg) - # Convert to protobuf message, including image info - logger.info( - f"Parsed URL {request.url}, returning {len(result.chunks)} chunks" + _c = to_valid_utf8_text + image_dir, image_refs = _resolve_images( + result.images, request_id, storage_map=storage_map ) response = ReadResponse( - chunks=[ - self._convert_chunk_to_proto(chunk) for chunk in result.chunks - ] + markdown_content=_c(result.content), + image_refs=image_refs, + image_dir_path=image_dir, + ) + logger.info( + "Read response: content_len=%d, images=%d", + len(result.content), len(image_refs), ) - logger.info(f"Response size: {response.ByteSize()} bytes") return response except Exception as e: - error_msg = f"Error reading URL: {str(e)}" + error_msg = f"Error reading document: {e}" logger.error(error_msg) - logger.info(f"Detailed traceback: {traceback.format_exc()}") - context.set_code(grpc.StatusCode.INTERNAL) - context.set_details(str(e)) + logger.info("Traceback: %s", traceback.format_exc()) return ReadResponse(error=str(e)) - def _convert_chunk_to_proto(self, chunk): - """Convert internal Chunk object to protobuf Chunk message - Ensures all string fields are valid UTF-8 for protobuf (no lone surrogates). - """ - # Clean helper for strings - _c = to_valid_utf8_text - - proto_chunk = Chunk( - content=_c(getattr(chunk, "content", None)), - seq=getattr(chunk, "seq", 0), - start=getattr(chunk, "start", 0), - end=getattr(chunk, "end", 0), - ) - - # If chunk has images attribute and is not empty, add image info - if hasattr(chunk, "images") and chunk.images: - logger.info( - f"Adding {len(chunk.images)} images to chunk {getattr(chunk, 'seq', 0)}" + def ListEngines(self, request, context): + overrides = dict(getattr(request, "config_overrides", None) or {}) + engines_data = registry.list_engines(overrides=overrides or None) + engines = [ + ParserEngineInfo( + name=e["name"], + description=e["description"], + file_types=e["file_types"], + available=e.get("available", True), + unavailable_reason=e.get("unavailable_reason", ""), ) - for img_info in chunk.images: - # img_info expected as dict - proto_image = Image( - url=_c(img_info.get("cos_url", "")), - caption=_c(img_info.get("caption", "")), - ocr_text=_c(img_info.get("ocr_text", "")), - original_url=_c(img_info.get("original_url", "")), - start=int(img_info.get("start", 0) or 0), - end=int(img_info.get("end", 0) or 0), - ) - proto_chunk.images.append(proto_image) - - return proto_chunk + for e in engines_data + ] + return ListEnginesResponse(engines=engines) def main(): - # Print effective env/config at startup config.print_config() - # Create server server = grpc.server( futures.ThreadPoolExecutor(max_workers=CONFIG.grpc_max_workers), options=[ @@ -286,24 +236,18 @@ def main(): ], ) - # Register services docreader_pb2_grpc.add_DocReaderServicer_to_server(DocReaderServicer(), server) - # Register health check service health_servicer = HealthServicer() health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server) - # Set listen address server.add_insecure_port(f"[::]:{CONFIG.grpc_port}") - - # Start service server.start() - logger.info(f"Server started on port {CONFIG.grpc_port}") + logger.info("Server started on port %d", CONFIG.grpc_port) logger.info("Server is ready to accept connections") try: - # Wait for service termination server.wait_for_termination() except KeyboardInterrupt: logger.info("Received termination signal, shutting down server") diff --git a/docreader/models/read_config.py b/docreader/models/read_config.py index c229903c..8f70024c 100644 --- a/docreader/models/read_config.py +++ b/docreader/models/read_config.py @@ -1,27 +1,17 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass @dataclass class ChunkingConfig: - """ - Configuration for text chunking process. - Controls how documents are split into smaller pieces for processing. + """Legacy config kept for backward compatibility. + + After the lightweight refactoring, chunking is done in Go. + This class is only kept so existing parser constructors don't break. """ - # Maximum size of each chunk in tokens/chars chunk_size: int = 512 - - # Number of tokens/chars to overlap between chunks chunk_overlap: int = 50 - - # Text separators in order of priority - separators: list[str] = field(default_factory=lambda: ["\n\n", "\n", "。"]) - - # Whether to enable multimodal processing (text + images) + separators: list[str] | None = None enable_multimodal: bool = False - - # Preferred field name going forward - storage_config: dict[str, str] = field(default_factory=dict) - - # VLM configuration for image captioning - vlm_config: dict[str, str] = field(default_factory=dict) + storage_config: dict[str, str] | None = None + vlm_config: dict[str, str] | None = None diff --git a/docreader/parser/__init__.py b/docreader/parser/__init__.py index 497e1f08..f37075d4 100644 --- a/docreader/parser/__init__.py +++ b/docreader/parser/__init__.py @@ -13,7 +13,6 @@ The parsers extract content from documents and can split them into meaningful chunks for further processing and indexing. """ -from .csv_parser import CSVParser from .doc_parser import DocParser from .docx2_parser import Docx2Parser from .excel_parser import ExcelParser @@ -21,19 +20,19 @@ from .image_parser import ImageParser from .markdown_parser import MarkdownParser from .parser import Parser from .pdf_parser import PDFParser -from .text_parser import TextParser +from .registry import ParserEngineRegistry, registry from .web_parser import WebParser # Export public classes and modules __all__ = [ - "Docx2Parser", # Parser for .docx files (modern Word documents) - "DocParser", # Parser for .doc files (legacy Word documents) - "PDFParser", # Parser for PDF documents - "MarkdownParser", # Parser for Markdown text files - "TextParser", # Parser for plain text files - "ImageParser", # Parser for images with text content - "WebParser", # Parser for web pages - "Parser", # Main parser factory that selects the appropriate parser - "CSVParser", # Parser for CSV files - "ExcelParser", # Parser for Excel files + "Docx2Parser", + "DocParser", + "PDFParser", + "MarkdownParser", + "ImageParser", + "WebParser", + "Parser", + "ExcelParser", + "ParserEngineRegistry", + "registry", ] diff --git a/docreader/parser/base_parser.py b/docreader/parser/base_parser.py index eadd28e3..b868c1e7 100644 --- a/docreader/parser/base_parser.py +++ b/docreader/parser/base_parser.py @@ -1,1007 +1,61 @@ # -*- coding: utf-8 -*- -import asyncio -import io -import ipaddress import logging import os -import re -import time from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Tuple -from urllib.parse import urlparse +from typing import Optional -import requests -from PIL import Image - -from docreader.config import CONFIG -from docreader.models.document import Chunk, Document -from docreader.models.read_config import ChunkingConfig -from docreader.ocr import OCREngine -from docreader.parser.caption import Caption -from docreader.parser.storage import create_storage -from docreader.splitter.splitter import TextSplitter -from docreader.utils import endecode +from docreader.models.document import Document logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) class BaseParser(ABC): - """Base parser interface""" + """Base parser interface. - # Class variable for shared OCR engine instance - _ocr_engine = None - _ocr_engine_failed = False - - @staticmethod - def _is_safe_url(url: str) -> bool: - """Validate URL to prevent SSRF attacks - - Args: - url: URL to validate - - Returns: - True if URL is safe to fetch, False otherwise - """ - try: - parsed_url = urlparse(url) - - # Only allow http and https schemes - if parsed_url.scheme not in ["http", "https"]: - logger.warning(f"Rejected URL with invalid scheme: {parsed_url.scheme}") - return False - - # Extract hostname - hostname = parsed_url.hostname - if not hostname: - logger.warning("No hostname found in URL") - return False - # Try to parse as IP address - try: - ip = ipaddress.ip_address(hostname) - # Reject private, loopback, link-local, multicast addresses - if ( - ip.is_private - or ip.is_loopback - or ip.is_link_local - or ip.is_multicast - or ip.is_reserved - ): - logger.warning(f"Rejected URL with restricted IP: {ip}") - return False - except ValueError: - # Not an IP address, validate hostname - # Reject localhost and common internal hostnames - hostname_lower = hostname.lower() - restricted_hostnames = [ - "localhost", - "127.0.0.1", - "::1", - "metadata.google.internal", - "metadata.tencentyun.com", - "169.254.169.254", # AWS metadata endpoint - ] - if hostname_lower in restricted_hostnames or hostname_lower.endswith( - ".local" - ): - logger.warning(f"Rejected URL with restricted hostname: {hostname}") - return False - - return True - except Exception as e: - logger.warning(f"Error validating URL: {str(e)}") - return False - - @classmethod - def get_ocr_engine(cls, backend_type="paddle", **kwargs): - """Get OCR engine instance - - Args: - backend_type: OCR engine type, e.g. "paddle", "nanonets" - **kwargs: Arguments for the OCR engine - - Returns: - OCR engine instance or None - """ - if cls._ocr_engine is None and not cls._ocr_engine_failed: - try: - cls._ocr_engine = OCREngine.get_instance( - backend_type=backend_type, **kwargs - ) - if cls._ocr_engine is None: - cls._ocr_engine_failed = True - logger.error(f"Failed to initialize OCR engine ({backend_type})") - return None - logger.info(f"Successfully initialized OCR engine: {backend_type}") - except Exception as e: - cls._ocr_engine_failed = True - logger.error(f"Failed to initialize OCR engine: {str(e)}") - return None - return cls._ocr_engine + After the lightweight refactoring, BaseParser only extracts markdown text + and raw image references from documents. Chunking, image storage, OCR, + and VLM caption are handled by the Go App module. + """ def __init__( self, file_name: str = "", file_type: Optional[str] = None, - enable_multimodal: bool = True, - chunk_size: int = 1000, - chunk_overlap: int = 200, - separators: list[str] = ["\n\n", "\n", "。"], - ocr_backend: str = "no_ocr", - ocr_config: dict = {}, - max_image_size: int = 1920, # Maximum image size - max_concurrent_tasks: int = 5, # Max concurrent tasks - max_chunks: int = 1000, # Max number of returned chunks - chunking_config: Optional[ChunkingConfig] = None, **kwargs, ): - """Initialize parser - - Args: - file_name: File name - file_type: File type, inferred from file_name if None - enable_multimodal: Whether to enable multimodal - chunk_size: Chunk size - chunk_overlap: Chunk overlap - separators: List of separators - ocr_backend: OCR engine type - ocr_config: OCR engine config - max_image_size: Maximum image size - max_concurrent_tasks: Max concurrent tasks - max_chunks: Max number of returned chunks - """ - # Storage client instance self.file_name = file_name - self.file_type = file_type or os.path.splitext(file_name)[1] - self.enable_multimodal = enable_multimodal - self.chunk_size = chunk_size - self.chunk_overlap = chunk_overlap - self.separators = separators - self.ocr_backend = ocr_backend - self.ocr_config = ocr_config - self.max_image_size = max_image_size - self.max_concurrent_tasks = max_concurrent_tasks - self.max_chunks = max_chunks - self.chunking_config = chunking_config - self.storage = create_storage( - self.chunking_config.storage_config if self.chunking_config else None - ) + self.file_type = file_type or os.path.splitext(file_name)[1].lstrip(".") logger.info( - f"Initializing parser for file={file_name}, type={file_type}, " - f"parser config: chunk_size={chunk_size}, " - f"overlap={chunk_overlap}, " - f"multimodal={enable_multimodal}, " - f"max_chunks={max_chunks}, " - f"ocr_backend={ocr_backend}, " - f"max_concurrent_tasks={max_concurrent_tasks}" - ) - # Only initialize Caption service if multimodal is enabled - vlm_config = self.chunking_config.vlm_config if self.chunking_config else None - self.caption_parser = ( - Caption(vlm_config=vlm_config) if self.enable_multimodal else None + "Initializing parser for file=%s, type=%s", + file_name, + self.file_type, ) @abstractmethod def parse_into_text(self, content: bytes) -> Document: - """Parse document content - - Args: - content: Document content + """Parse document content into markdown text. Returns: - Either a string containing the parsed text, or a tuple of (text, image_map) - where image_map is a dict mapping image URLs to Image objects + Document with ``content`` (markdown string) and optional + ``images`` dict mapping storage-relative paths to base64 data. """ - def perform_ocr(self, image: Image.Image): - """Execute OCR recognition on the image - - Args: - image: Image object (PIL.Image or numpy array) - - Returns: - Extracted text string - """ - start_time = time.time() - logger.info("Starting OCR recognition") - - # Resize image to avoid processing large images - resized_image = self._resize_image_if_needed(image) - - # Get OCR engine - ocr_engine = OCREngine.get_instance(self.ocr_backend) - - # Execute OCR prediction - logger.info(f"Executing OCR prediction (using {self.ocr_backend} engine)") - ocr_result = ocr_engine.predict(resized_image) - - process_time = time.time() - start_time - logger.info(f"OCR recognition completed, time: {process_time:.2f} seconds") - - return ocr_result - - def _resize_image_if_needed(self, image: Image.Image) -> Image.Image: - """Resize image if it exceeds maximum size limit - - Args: - image: Image object (PIL.Image or numpy array) - - Returns: - Resized image object - """ - width, height = image.size - if width > self.max_image_size or height > self.max_image_size: - logger.info(f"Resizing PIL image, original size: {width}x{height}") - scale = min(self.max_image_size / width, self.max_image_size / height) - new_width = int(width * scale) - new_height = int(height * scale) - resized_image = image.resize((new_width, new_height)) - logger.info(f"Resized to: {new_width}x{new_height}") - return resized_image - - logger.info(f"PIL image size is {width}x{height}, no resizing needed") - return image - - async def process_image_async(self, image: Image.Image, image_url: str): - """Asynchronously process image: perform OCR only (caption is generated asynchronously by Go backend) - - Args: - image: Image object (PIL.Image or numpy array) - image_url: Image URL (if uploaded) - - Returns: - tuple: (ocr_text, caption, image_url) - - ocr_text: OCR extracted text - - caption: Always empty string (caption generation moved to async task in Go backend) - - image_url: Image URL (if provided) - """ - logger.info( - "Starting asynchronous image processing (OCR only, caption deferred to async task)" - ) - - # Resize image - resized_image = self._resize_image_if_needed(image) - try: - # Perform OCR recognition - loop = asyncio.get_event_loop() - try: - # Add timeout mechanism to avoid infinite blocking (30 seconds timeout) - ocr_task = loop.run_in_executor(None, self.perform_ocr, resized_image) - ocr_text = await asyncio.wait_for(ocr_task, timeout=30.0) - except Exception as e: - logger.error(f"OCR processing error, skipping this image: {str(e)}") - ocr_text = "" - - logger.info(f"Successfully obtained image ocr: {ocr_text}") - img_base64 = endecode.decode_image(resized_image) - caption = self.get_image_caption(img_base64) - logger.info(f"Successfully obtained image caption: {caption}") - return ocr_text, caption, image_url - finally: - resized_image.close() - - async def process_with_limit( - self, idx: int, image: Image.Image, url: str, semaphore: asyncio.Semaphore - ): - """Function to process a single image using a semaphore""" - try: - logger.info(f"Waiting to process image {idx + 1}") - async with semaphore: # Use semaphore to control concurrency - logger.info(f"Starting to process image {idx + 1}") - result = await self.process_image_async(image, url) - logger.info(f"Completed processing image {idx + 1}") - return result - except Exception as e: - logger.error(f"Error processing image {idx + 1}: {str(e)}") - return ("", "", url) # Return empty result to avoid overall failure - finally: - # Manually release image resources - image.close() - - async def process_multiple_images(self, images_data: List[Tuple[Image.Image, str]]): - """Process multiple images concurrently - - Args: - images_data: List of (image, image_url) tuples - - Returns: - List of (ocr_text, caption, image_url) tuples - """ - logger.info(f"Starting concurrent processing of {len(images_data)} images") - - if not images_data: - logger.warning("No image data to process") - return [] - - # Use semaphore to limit concurrency - semaphore = asyncio.Semaphore(self.max_concurrent_tasks) - - # Store results to avoid overall failure due to task failure - results = [] - - # Create all tasks, but use semaphore to limit actual concurrency - tasks = [ - self.process_with_limit(i, img, url, semaphore) - for i, (img, url) in enumerate(images_data) - ] - - try: - # Execute all tasks, but set overall timeout - completed_results = await asyncio.gather(*tasks, return_exceptions=True) - - # Handle possible exception results - for i, result in enumerate(completed_results): - if isinstance(result, Exception): - logger.error( - f"Image {i + 1} processing returned an exception: {str(result)}" - ) - # For exceptions, add empty results - if i < len(images_data): - results.append(("", "", images_data[i][1])) - else: - results.append(result) - except Exception as e: - logger.error(f"Error during concurrent image processing: {str(e)}") - # Add empty results for all images - results = [("", "", url) for _, url in images_data] - finally: - # Clean up references and trigger garbage collection - images_data.clear() - logger.info("Image processing resource cleanup complete") - - logger.info( - f"Concurrent processing of {len(results)}/{len(images_data)} images" - ) - return results - - def get_image_caption(self, image_data: str) -> str: - """Get image description - - Args: - image_data: Image data (base64 encoded string or URL) - - Returns: - Image description - """ - if not self.caption_parser: - logger.warning("Caption parser not initialized") - return "" - start_time = time.time() - logger.info( - f"Getting caption for image: {image_data[:250]}..." - if len(image_data) > 250 - else f"Getting caption for image: {image_data}" - ) - caption = self.caption_parser.get_caption(image_data) - if caption: - logger.info( - f"Received caption of length: {len(caption)}, caption: {caption}," - f"cost: {time.time() - start_time} seconds" - ) - else: - logger.warning("Failed to get caption for image") - return caption - def parse(self, content: bytes) -> Document: - """Parse document content + """Parse document and return markdown + image references. - Args: - content: Document content - - Returns: - Parse result + No chunking, no OCR, no VLM caption — those are done in Go. """ logger.info( - f"Parsing document with {self.__class__.__name__}, bytes: {len(content)}" + "Parsing document with %s, bytes: %d", + self.__class__.__name__, + len(content), ) document = self.parse_into_text(content) logger.info( - f"Extracted {len(document.content)} characters from {self.file_name}" + "Extracted %d characters from %s", + len(document.content), + self.file_name, ) - if document.chunks: - return document - - splitter = TextSplitter( - chunk_size=self.chunk_size, - chunk_overlap=self.chunk_overlap, - separators=self.separators, - ) - chunk_str = splitter.split_text(document.content) - chunks = self._str_to_chunk(chunk_str) - logger.info(f"Created {len(chunks)} chunks from document") - - # Limit the number of returned chunks - if len(chunks) > self.max_chunks: - logger.warning( - f"Limiting chunks from {len(chunks)} to maximum {self.max_chunks}" - ) - chunks = chunks[: self.max_chunks] - - # If multimodal is enabled and file type is supported, process images - if self.enable_multimodal: - # Get file extension and convert to lowercase - file_ext = ( - os.path.splitext(self.file_name)[1].lower() - if self.file_name - else (self.file_type.lower() if self.file_type else "") - ) - - # Define allowed file types for image processing - allowed_types = [ - # Text files - ".pdf", - ".md", - ".markdown", - ".doc", - ".docx", - # Image files - ".jpg", - ".jpeg", - ".png", - ".gif", - ".bmp", - ".tiff", - ".webp", - ] - - if file_ext in allowed_types: - logger.info( - f"Processing images in each chunk for file type: {file_ext}" - ) - chunks = self.process_chunks_images(chunks, document.images) - else: - logger.info( - f"Skipping image processing for unsupported file type: {file_ext}" - ) - - document.chunks = chunks return document - - def _str_to_chunk(self, text: List[Tuple[int, int, str]]) -> List[Chunk]: - """Convert string to Chunk object""" - return [ - Chunk(seq=i, content=t, start=start, end=end) - for i, (start, end, t) in enumerate(text) - ] - - def _split_into_units(self, text: str) -> List[str]: - """ - Args: - text: 文本内容 - - Returns: - 基本单元的列表 - """ - logger.info(f"Splitting text into basic units, text length: {len(text)}") - - # 定义所有需要作为整体保护的结构模式 --- - table_pattern = r"(?m)(^\|.*\|[ \t]*\r?\n(?:[ \t]*\r?\n)?^\|\s*:?--+.*\r?\n(?:^\|.*\|\r?\n?)*)" - - # 其他需要保护的结构(代码块、公式块、行内元素) - code_block_pattern = r"```[\s\S]*?```" - math_block_pattern = r"\$\$[\s\S]*?\$\$" - inline_pattern = r"!\[.*?\]\(.*?\)|\[.*?\]\(.*?\)" - - # 查找所有受保护结构的位置 --- - protected_ranges = [] - for pattern in [ - table_pattern, - code_block_pattern, - math_block_pattern, - inline_pattern, - ]: - for match in re.finditer(pattern, text): - # 确保匹配到的不是空字符串,避免无效范围 - if match.group(0).strip(): - protected_ranges.append((match.start(), match.end())) - - # 按起始位置排序 - protected_ranges.sort(key=lambda x: x[0]) - logger.info( - f"Found {len(protected_ranges)} protected structures " - "(tables, code, formulas, images, links)." - ) - - # 合并可能重叠的保护范围 --- - # 确保我们有一组不相交的、需要保护的文本块 - if protected_ranges: - merged_ranges = [] - current_start, current_end = protected_ranges[0] - - for next_start, next_end in protected_ranges[1:]: - if next_start < current_end: - # 如果下一个范围与当前范围重叠,则合并它们 - current_end = max(current_end, next_end) - else: - # 如果不重叠,则完成当前范围并开始一个新的范围 - merged_ranges.append((current_start, current_end)) - current_start, current_end = next_start, next_end - - merged_ranges.append((current_start, current_end)) - protected_ranges = merged_ranges - logger.info( - f"After overlaps, {len(protected_ranges)} protected ranges remain." - ) - - # 根据保护范围和分隔符来分割文本 --- - units = [] - last_end = 0 - - # 定义分隔符的正则表达式,通过加括号来保留分隔符本身 - separator_pattern = f"({'|'.join(re.escape(s) for s in self.separators)})" - - for start, end in protected_ranges: - # a. 处理受保护范围之前的文本 - if start > last_end: - pre_text = text[last_end:start] - # 对这部分非保护文本进行分割,并保留分隔符 - segments = re.split(separator_pattern, pre_text) - units.extend([s for s in segments if s]) # 添加所有非空部分 - - # b. 将整个受保护的块(例如,一个完整的表格)作为一个不可分割的单元添加 - protected_text = text[start:end] - units.append(protected_text) - - last_end = end - - # c. 处理最后一个受保护范围之后的文本 - if last_end < len(text): - post_text = text[last_end:] - segments = re.split(separator_pattern, post_text) - units.extend([s for s in segments if s]) # 添加所有非空部分 - - logger.info(f"Text splitting complete, created {len(units)} final basic units.") - return units - - def chunk_text(self, text: str) -> List[Chunk]: - """Chunk text, preserving Markdown structure - - Args: - text: Text content - - Returns: - List of text chunks - """ - if not text: - logger.warning("Empty text provided for chunking, returning empty list") - return [] - - logger.info(f"Starting text chunking process, text length: {len(text)}") - logger.info( - f"Chunking parameters: size={self.chunk_size}, overlap={self.chunk_overlap}" - ) - - # Split text into basic units - units = self._split_into_units(text) - logger.info(f"Split text into {len(units)} basic units") - - chunks = [] - current_chunk = [] - current_size = 0 - current_start = 0 - - for i, unit in enumerate(units): - unit_size = len(unit) - logger.info(f"Processing unit {i + 1}/{len(units)}, size: {unit_size}") - - # If current chunk plus new unit exceeds size limit, create new chunk - if current_size + unit_size > self.chunk_size and current_chunk: - chunk_text = "".join(current_chunk) - chunks.append( - Chunk( - seq=len(chunks), - content=chunk_text, - start=current_start, - end=current_start + len(chunk_text), - ) - ) - logger.info(f"Created chunk {len(chunks)}, size: {len(chunk_text)}") - - # Keep overlap, ensuring structure integrity - if self.chunk_overlap > 0: - # Calculate target overlap size - overlap_target = min(self.chunk_overlap, len(chunk_text)) - logger.info( - f"Calculating overlap with target size: {overlap_target}" - ) - - # Find complete units from the end - overlap_units = [] - overlap_size = 0 - - for u in reversed(current_chunk): - if overlap_size + len(u) > overlap_target: - logger.info( - f"Overlap target ({overlap_size}/{overlap_target})" - ) - break - overlap_units.insert(0, u) - overlap_size += len(u) - logger.info(f"Added unit to overlap, size: {overlap_size}") - - # Remove elements from overlap that are included in separators - start_index = 0 - for i, u in enumerate(overlap_units): - # Check if u is in separators - all_of_separator = True - for uu in u: - if uu not in self.separators: - all_of_separator = False - break - if all_of_separator: - # Remove the first element - start_index = i + 1 - overlap_size = overlap_size - len(u) - logger.info(f"Removed separator from overlap: '{u}'") - else: - break - - overlap_units = overlap_units[start_index:] - logger.info( - f"Overlap: {len(overlap_units)} units, {overlap_size} size" - ) - - current_chunk = overlap_units - current_size = overlap_size - # Update start position, considering overlap - current_start = current_start + len(chunk_text) - overlap_size - else: - logger.info("No overlap configured, starting fresh chunk") - current_chunk = [] - current_size = 0 - current_start = current_start + len(chunk_text) - - current_chunk.append(unit) - current_size += unit_size - logger.info( - f"Added unit to current chunk, at {current_size}/{self.chunk_size}" - ) - - # Add the last chunk - if current_chunk: - chunk_text = "".join(current_chunk) - chunks.append( - Chunk( - seq=len(chunks), - content=chunk_text, - start=current_start, - end=current_start + len(chunk_text), - ) - ) - logger.info(f"Created final chunk {len(chunks)}, size: {len(chunk_text)}") - - logger.info(f"Chunking complete, created {len(chunks)} chunks from text") - return chunks - - def extract_images_from_chunk(self, chunk: Chunk) -> List[Dict[str, str]]: - """Extract image information from a chunk - - Args: - chunk: Document chunk - - Returns: - List of image information - """ - logger.info(f"Extracting image information from Chunk #{chunk.seq + 1}") - text = chunk.content - - # Regex to extract image information from text, - # support: Markdown images, HTML images - img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)|]*src="([^"]+)" [^>]*>' - - # Extract image information - img_matches = list(re.finditer(img_pattern, text)) - logger.info(f"Chunk #{chunk.seq + 1} found {len(img_matches)} images") - - images_info = [] - for match_idx, match in enumerate(img_matches): - # Process image URL - img_url = match.group(2) if match.group(2) else match.group(3) - alt_text = match.group(1) if match.group(1) else "" - - # Record image information - image_info = { - "original_url": img_url, - "start": match.start(), - "end": match.end(), - "alt_text": alt_text, - "match_text": text[match.start() : match.end()], - } - images_info.append(image_info) - - logger.info( - f"Image in Chunk #{chunk.seq + 1} {match_idx + 1}: URL={img_url[:50]}..." - if len(img_url) > 50 - else f"Image in Chunk #{chunk.seq + 1} {match_idx + 1}: URL={img_url}" - ) - - return images_info - - async def download_and_upload_image( - self, img_url: str - ) -> Tuple[str, str, Image.Image | None]: - """Download image and upload to object storage, - if it's already an object storage path or local path, use directly - - Args: - img_url: Image URL or local path - - Returns: - tuple: (original URL, storage URL, image object), - if failed returns (original URL, None, None) - """ - - try: - # Check if it's already a storage URL (COS or MinIO) - is_storage_url = any( - pattern in img_url - for pattern in ["cos", "myqcloud.com", "minio", ".s3."] - ) - if is_storage_url: - logger.info(f"Image already on COS: {img_url}, no need to re-upload") - try: - # Validate URL to prevent SSRF attacks - if not self._is_safe_url(img_url): - logger.error(f"URL failed validation check: {img_url}") - return img_url, img_url, None - # Still need to get image object for OCR processing - # Get proxy settings from environment variables - proxies = {} - if CONFIG.external_http_proxy: - proxies["http"] = CONFIG.external_http_proxy - if CONFIG.external_https_proxy: - proxies["https"] = CONFIG.external_https_proxy - - response = requests.get(img_url, timeout=5, proxies=proxies) - if response.status_code == 200: - image = Image.open(io.BytesIO(response.content)) - return img_url, img_url, image - else: - logger.warning( - f"Failed to get storage image: {response.status_code}" - ) - return img_url, img_url, None - except Exception as e: - logger.error(f"Error getting storage image: {str(e)}") - return img_url, img_url, None - - # Check if it's a local file path - elif os.path.exists(img_url) and os.path.isfile(img_url): - logger.info(f"Using local image file: {img_url}") - image = None - try: - # Read local image - image = Image.open(img_url) - # Upload to storage - with open(img_url, "rb") as f: - content = f.read() - storage_url = self.storage.upload_bytes(content) - logger.info( - f"Successfully uploaded local image to storage: {storage_url}" - ) - return img_url, storage_url, image - except Exception as e: - logger.error(f"Error processing local image: {str(e)}") - if image and hasattr(image, "close"): - image.close() - return img_url, img_url, None - - # Normal remote URL download handling - else: - # Validate URL to prevent SSRF attacks - if not self._is_safe_url(img_url): - logger.error(f"URL failed validation check: {img_url}") - return img_url, img_url, None - # Get proxy settings from environment variables - proxies = {} - if CONFIG.external_http_proxy: - proxies["http"] = CONFIG.external_http_proxy - if CONFIG.external_https_proxy: - proxies["https"] = CONFIG.external_https_proxy - - logger.info(f"Downloading image {img_url}, using proxy: {proxies}") - response = requests.get(img_url, timeout=5, proxies=proxies) - - if response.status_code == 200: - # Download successful, create image object - image = Image.open(io.BytesIO(response.content)) - try: - # Upload to storage using the method in BaseParser - storage_url = self.storage.upload_bytes(response.content) - logger.info( - f"Successfully uploaded image to storage: {storage_url}" - ) - return img_url, storage_url, image - finally: - # Image will be closed by the caller - pass - else: - logger.warning(f"Failed to download image: {response.status_code}") - return img_url, img_url, None - - except Exception as e: - logger.error(f"Error downloading or processing image: {str(e)}") - return img_url, img_url, None - - async def process_chunk_images_async( - self, chunk, chunk_idx, total_chunks, image_map=None - ): - """Asynchronously process images in a single Chunk - - Args: - chunk: Chunk object to process - chunk_idx: Chunk index - total_chunks: Total number of chunks - image_map: Optional dictionary mapping image URLs to Image objects - - Returns: - Processed Chunk object - """ - - logger.info( - f"Starting to process images in Chunk #{chunk_idx + 1}/{total_chunks}" - ) - - # Extract image information from the Chunk - images_info = self.extract_images_from_chunk(chunk) - if not images_info: - logger.info(f"Chunk #{chunk_idx + 1} found no images") - return chunk - - # Prepare images that need to be downloaded and processed - images_to_process = [] - # Map URL to image information - url_to_info_map = {} - - # Record all image URLs that need to be processed - for img_info in images_info: - url = img_info["original_url"] - url_to_info_map[url] = img_info - - results = [] - download_tasks = [] - # Check if image is already in the image_map - for img_url in url_to_info_map.keys(): - if image_map and img_url in image_map: - logger.info( - f"Image already in image_map: {img_url}, using cached object" - ) - image = Image.open( - io.BytesIO(endecode.encode_image(image_map[img_url])) - ) - results.append((img_url, img_url, image)) - else: - download_task = self.download_and_upload_image(img_url) - download_tasks.append(download_task) - # Concurrent download and upload of images, - # ignore images that are already in the image_map - results.extend(await asyncio.gather(*download_tasks)) - - # Process download results, prepare for OCR processing - for orig_url, cos_url, image in results: - if cos_url and image: - img_info = url_to_info_map[orig_url] - img_info["cos_url"] = cos_url - images_to_process.append((image, cos_url)) - - # If no images were successfully downloaded and uploaded, - # return the original Chunk - if not images_to_process: - logger.info( - f"Chunk #{chunk_idx + 1} not found downloaded and uploaded images" - ) - return chunk - - # Concurrent processing of all images (OCR + caption) - logger.info( - f"Processing {len(images_to_process)} images in Chunk #{chunk_idx + 1}" - ) - - # Concurrent processing of all images - processed_results = await self.process_multiple_images(images_to_process) - - # Process OCR and Caption results - for ocr_text, caption, img_url in processed_results: - # Find the corresponding original URL - for orig_url, info in url_to_info_map.items(): - if info.get("cos_url") == img_url: - info["ocr_text"] = ocr_text if ocr_text else "" - info["caption"] = caption if caption else "" - - if ocr_text: - logger.info( - f"Image OCR extracted {len(ocr_text)} characters: {img_url}" - ) - if caption: - logger.info(f"Obtained image description: '{caption}'") - break - - # Add processed image information to the Chunk - processed_images = [] - for img_info in images_info: - if "cos_url" in img_info: - processed_images.append(img_info) - - # Update image information in the Chunk - chunk.images = processed_images - - logger.info(f"Completed image processing in Chunk #{chunk_idx + 1}") - return chunk - - def process_chunks_images( - self, chunks: List[Chunk], image_map: Dict[str, str] = {} - ) -> List[Chunk]: - """Concurrent processing of images in all Chunks - - Args: - chunks: List of document chunks - - Returns: - List of processed document chunks - """ - logger.info( - f"Starting concurrent processing of images in all {len(chunks)} chunks" - ) - - if not chunks: - logger.warning("No chunks to process") - return chunks - - # Create and run all Chunk concurrent processing tasks - async def process_all_chunks(): - # Use semaphore to limit concurrency - semaphore = asyncio.Semaphore(self.max_concurrent_tasks) - - async def process_with_limit(chunk, idx, total): - """Use semaphore to control concurrent processing of Chunks""" - async with semaphore: - return await self.process_chunk_images_async( - chunk, idx, total, image_map - ) - - # Create tasks for all Chunks - tasks = [ - process_with_limit(chunk, idx, len(chunks)) - for idx, chunk in enumerate(chunks) - ] - - # Execute all tasks concurrently - results = await asyncio.gather(*tasks, return_exceptions=True) - - # Handle possible exceptions - processed_chunks = [] - for i, result in enumerate(results): - if isinstance(result, Exception): - logger.error(f"Error processing Chunk {i + 1}: {str(result)}") - # Keep original Chunk - if i < len(chunks): - processed_chunks.append(chunks[i]) - else: - processed_chunks.append(result) - - return processed_chunks - - # Create event loop and run all tasks - try: - # Check if event loop already exists - try: - loop = asyncio.get_event_loop() - if loop.is_closed(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - except RuntimeError: - # If no event loop, create a new one - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - # Execute processing for all Chunks - processed_chunks = loop.run_until_complete(process_all_chunks()) - logger.info( - f"Completed processing of {len(processed_chunks)}/{len(chunks)} chunks" - ) - - return processed_chunks - except Exception as e: - logger.error(f"Error during concurrent chunk processing: {str(e)}") - return chunks diff --git a/docreader/parser/caption.py b/docreader/parser/caption.py deleted file mode 100644 index cf47f780..00000000 --- a/docreader/parser/caption.py +++ /dev/null @@ -1,387 +0,0 @@ -import json -import logging -import time -from dataclasses import dataclass, field -from typing import Dict, List, Optional, Union - -import ollama -import requests - -from docreader.config import CONFIG - -logger = logging.getLogger(__name__) - - -@dataclass -class ImageUrl: - """Image URL data structure for caption requests.""" - - url: Optional[str] = None - detail: Optional[str] = None - - -@dataclass -class Content: - """Content data structure that can contain text or image URL.""" - - type: Optional[str] = None - text: Optional[str] = None - image_url: Optional[ImageUrl] = None - - -@dataclass -class SystemMessage: - """System message for VLM model requests.""" - - role: Optional[str] = None - content: Optional[str] = None - - -@dataclass -class UserMessage: - """User message for VLM model requests, can contain multiple content items.""" - - role: Optional[str] = None - content: List[Content] = field(default_factory=list) - - -@dataclass -class CompletionRequest: - """Request structure for VLM model completion API.""" - - model: str - temperature: float - top_p: float - messages: List[Union[SystemMessage, UserMessage]] - user: str - - -@dataclass -class Model: - """Model identifier structure.""" - - id: str - - -@dataclass -class ModelsResp: - """Response structure for available models API.""" - - data: List[Model] = field(default_factory=list) - - -@dataclass -class Message: - """Message structure in API response.""" - - role: Optional[str] = None - content: Optional[str] = None - tool_calls: Optional[str] = None - - -@dataclass -class Choice: - """Choice structure in API response.""" - - message: Optional[Message] = None - - -@dataclass -class Usage: - """Token usage information in API response.""" - - prompt_tokens: Optional[int] = 0 - total_tokens: Optional[int] = 0 - completion_tokens: Optional[int] = 0 - - -@dataclass -class CaptionChatResp: - """Response structure for caption chat API.""" - - id: Optional[str] = None - created: Optional[int] = None - model: Optional[Model] = None - object: Optional[str] = None - choices: List[Choice] = field(default_factory=list) - usage: Optional[Usage] = None - - @staticmethod - def from_json(json_data: dict) -> "CaptionChatResp": - """ - Parse API response JSON into a CaptionChatResp object. - - Args: - json_data: The JSON response from the API - - Returns: - A parsed CaptionChatResp object - """ - logger.info("Parsing CaptionChatResp from JSON") - # Manually parse nested fields with safe field extraction - choices = [] - for choice in json_data.get("choices", []): - message_data = choice.get("message", {}) - message = Message( - role=message_data.get("role"), - content=message_data.get("content"), - tool_calls=message_data.get("tool_calls"), - ) - choices.append(Choice(message=message)) - - # Handle usage with safe field extraction - usage_data = json_data.get("usage", {}) - usage = None - if usage_data: - usage = Usage( - prompt_tokens=usage_data.get("prompt_tokens", 0), - total_tokens=usage_data.get("total_tokens", 0), - completion_tokens=usage_data.get("completion_tokens", 0), - ) - - logger.info( - f"Parsed {len(choices)} choices and usage data: {usage is not None}" - ) - return CaptionChatResp( - id=json_data.get("id"), - created=json_data.get("created"), - model=json_data.get("model"), - object=json_data.get("object"), - choices=choices, - usage=usage, - ) - - def choice_data(self) -> str: - """ - Extract the content from the first choice in the response. - - Returns: - The content string from the first choice, or empty string if no choices - """ - if ( - not self.choices - or not self.choices[0] - or not self.choices[0].message - or not self.choices[0].message.content - ): - logger.warning("No choices available in response") - return "" - logger.info("Retrieving content from first choice") - return self.choices[0].message.content - - -class Caption: - """ - Service for generating captions for images using a Vision Language Model. - Uses an external API to process images and return textual descriptions. - """ - - def __init__(self, vlm_config: Optional[Dict[str, str]] = None): - """ - Initialize the Caption service with configuration - from parameters or environment variables. - """ - logger.info("Initializing Caption service") - # Default prompt for image captioning in Chinese: "Briefly describe the main content of the image" - self.prompt = """简单凝炼的描述图片的主要内容""" - # API request timeout in seconds - self.timeout = 30 - - # Use provided VLM config if available, - # otherwise fall back to environment variables - if vlm_config and vlm_config.get("base_url") and vlm_config.get("model_name"): - # Build completion URL from provided base URL - self.completion_url = vlm_config.get("base_url", "") + "/chat/completions" - self.model = vlm_config.get("model_name", "") - self.api_key = vlm_config.get("api_key", "") - # Interface type: "ollama" or "openai" (default) - self.interface_type = vlm_config.get("interface_type", "openai").lower() - else: - # Fall back to environment variables if config not provided - self.completion_url = CONFIG.vlm_model_base_url + "/chat/completions" - self.model = CONFIG.vlm_model_name - self.api_key = CONFIG.vlm_model_api_key - self.interface_type = CONFIG.vlm_interface_type - - # Validate interface type - must be either "ollama" or "openai" - if self.interface_type not in ["ollama", "openai"]: - logger.warning( - f"Unknown interface type: {self.interface_type}, defaulting to openai" - ) - self.interface_type = "openai" - - logger.info( - f"Configured with model: {self.model}, " - f"endpoint: {self.completion_url}, interface: {self.interface_type}" - ) - - def _call_caption_api(self, image_data: str) -> Optional[CaptionChatResp]: - """ - Call the Caption API to generate a description for the given image. - - Args: - image_data: URL of the image or base64 encoded image data - - Returns: - CaptionChatResp object if successful, None otherwise - """ - logger.info("Calling Caption API for image captioning") - logger.info(f"Processing image data: {image_data[:50]}...") - - # Route to appropriate API based on interface type - if self.interface_type == "ollama": - return self._call_ollama_api(image_data) - else: - return self._call_openai_api(image_data) - - def _call_ollama_api(self, image_base64: str) -> Optional[CaptionChatResp]: - """Call Ollama API for image captioning using base64 encoded image data.""" - - # Extract host URL by removing the chat completions endpoint - # Handle both "/v1/chat/completions" and "/chat/completions" patterns - host = self.completion_url.replace("/v1/chat/completions", "").replace("/chat/completions", "") - - # Initialize Ollama client with host and timeout - client = ollama.Client( - host=host, - timeout=self.timeout, - ) - - try: - logger.info(f"Calling Ollama API with model: {self.model}") - - # Call Ollama API with base64 encoded image - # Prompt: "Briefly describe the main content of the image" - response = client.generate( - model=self.model, - prompt="简单凝炼的描述图片的主要内容", - images=[image_base64], # Pass base64 encoded image data - options={ - "temperature": 0.1 - }, # Low temperature for more deterministic output - stream=False, - ) - - # Construct response object in standard format - caption_resp = CaptionChatResp( - id="ollama_response", - created=int(time.time()), - model=Model(id=self.model), - object="chat.completion", - choices=[ - Choice(message=Message(role="assistant", content=response.response)) - ], - ) - - logger.info("Successfully received response from Ollama API") - return caption_resp - - except Exception as e: - logger.error(f"Error calling Ollama API: {e}") - return None - - def _call_openai_api(self, image_base64: str) -> Optional[CaptionChatResp]: - """Call OpenAI-compatible API for image captioning.""" - logger.info(f"Calling OpenAI-compatible API with model: {self.model}") - - # Construct user message with text prompt and base64 encoded image - user_msg = UserMessage( - role="user", - content=[ - Content(type="text", text=self.prompt), - Content( - type="image_url", - image_url=ImageUrl( - url="data:image/png;base64," + image_base64, detail="auto" - ), - ), - ], - ) - - # Build completion request with model parameters - gpt_req = CompletionRequest( - model=self.model, - temperature=0.3, # Moderate randomness for balanced output - top_p=0.8, # Nucleus sampling parameter - messages=[user_msg], - user="abc", - ) - - # Set up HTTP headers for the API request - headers = { - "Content-Type": "application/json", - "Accept": "text/event-stream", - "Cache-Control": "no-cache", - "Connection": "keep-alive", - } - # Add authorization header if API key is provided - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - try: - logger.info( - f"Sending request to OpenAI-compatible API with model: {self.model}" - ) - # Send POST request to the API endpoint - response = requests.post( - self.completion_url, - data=json.dumps(gpt_req, default=lambda o: o.__dict__, indent=4), - headers=headers, - timeout=self.timeout, - ) - # Check for successful response - if response.status_code != 200: - logger.error( - f"OpenAI API returned non-200 status code: {response.status_code}" - ) - response.raise_for_status() - - logger.info(f"Received from OpenAI with status: {response.status_code}") - logger.info("Converting response to CaptionChatResp object") - # Parse JSON response into structured object - caption_resp = CaptionChatResp.from_json(response.json()) - - if caption_resp.usage: - logger.info( - f"API usage: prompt_tokens={caption_resp.usage.prompt_tokens}, " - f"completion_tokens={caption_resp.usage.completion_tokens}" - ) - - return caption_resp - except requests.exceptions.Timeout: - logger.error("Timeout while calling OpenAI-compatible API after 30 seconds") - return None - except requests.exceptions.RequestException as e: - logger.error(f"Request error calling OpenAI-compatible API: {e}") - return None - except Exception as e: - logger.error(f"Unexpected error calling OpenAI-compatible API: {e}") - return None - - def get_caption(self, image_data: str) -> str: - """ - Get a caption for the provided image data. - - Args: - image_data: URL of the image or base64 encoded image data - - Returns: - Caption text as string, or empty string if captioning failed - """ - logger.info("Getting caption for image") - if not image_data or self.completion_url is None: - logger.error("Image data is not set") - return "" - caption_resp = self._call_caption_api(image_data) - if caption_resp: - caption = caption_resp.choice_data() - caption_length = len(caption) - logger.info(f"Successfully generated caption of length {caption_length}") - logger.info( - f"Caption: {caption[:50]}..." - if caption_length > 50 - else f"Caption: {caption}" - ) - return caption - logger.warning("Failed to get caption from Caption API") - return "" diff --git a/docreader/parser/csv_parser.py b/docreader/parser/csv_parser.py deleted file mode 100644 index c2c445a9..00000000 --- a/docreader/parser/csv_parser.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -CSV Parser Module - -This module provides a parser for CSV (Comma-Separated Values) files. -It converts CSV data into a Document with structured chunks, where each row -becomes a separate chunk with key-value pairs. -""" -import logging -from io import BytesIO -from typing import List - -import pandas as pd - -from docreader.models.document import Chunk, Document -from docreader.parser.base_parser import BaseParser - -logger = logging.getLogger(__name__) - - -class CSVParser(BaseParser): - """ - Parser for CSV files that converts tabular data into structured text. - - This parser reads CSV content and transforms each row into a formatted string - with column-value pairs. Each row is stored as a separate Chunk in the Document, - allowing for granular access to individual records. - - The output format for each row is: - "column1: value1, column2: value2, column3: value3\n" - - Usage: - parser = CSVParser() - with open("data.csv", "rb") as f: - document = parser.parse_into_text(f.read()) - """ - - def parse_into_text(self, content: bytes) -> Document: - """Parse CSV content into a Document with structured chunks. - - Each row in the CSV is converted into a formatted string and stored as - a separate Chunk. The chunks maintain sequential order and track their - position in the overall document. - - Args: - content: Raw bytes content of the CSV file - - Returns: - Document: A Document object containing: - - content: Full text with all rows concatenated - - chunks: List of Chunk objects, one per CSV row - - Note: - Bad lines in the CSV are automatically skipped using pandas' - on_bad_lines="skip" parameter. - """ - chunks: List[Chunk] = [] - text: List[str] = [] - start, end = 0, 0 - - # Read CSV content into a pandas DataFrame, skipping malformed lines - df = pd.read_csv(BytesIO(content), on_bad_lines="skip") - - # Process each row in the DataFrame - for i, (idx, row) in enumerate(df.iterrows()): - # Format row as "column: value" pairs separated by commas - content_row = ( - ",".join( - f"{col.strip()}: {str(row[col]).strip()}" for col in df.columns - ) - + "\n" - ) - # Update end position for this chunk - end += len(content_row) - text.append(content_row) - - # Create a chunk for this row with position tracking - chunks.append(Chunk(content=content_row, seq=i, start=start, end=end)) - # Update start position for next chunk - start = end - - return Document( - content="".join(text), - chunks=chunks, - ) - - -if __name__ == "__main__": - # Example usage: Parse a CSV file and display its content - logging.basicConfig(level=logging.DEBUG) - - your_file = "/path/to/your/file.csv" - parser = CSVParser() - with open(your_file, "rb") as f: - content = f.read() - document = parser.parse_into_text(content) - # Display full document content - logger.error(document.content) - - # Display individual chunks (rows) - for chunk in document.chunks: - logger.error(chunk.content) diff --git a/docreader/parser/docx_parser.py b/docreader/parser/docx_parser.py index 979a4fc7..12ffd4a9 100644 --- a/docreader/parser/docx_parser.py +++ b/docreader/parser/docx_parser.py @@ -89,11 +89,33 @@ class DocxParser(BaseParser): logger.info(f"Setting max_workers to {max_workers} for document processing") try: + inline_images: Dict[str, str] = {} + + def _inline_upload(local_path: str) -> str: + """Read temp image file, base64-encode, and return a ref path. + + The Go-side ImageResolver (or main.py _resolve_images) handles + actual storage upload from Document.images. + """ + import base64 + import uuid as _uuid + + try: + with open(local_path, "rb") as f: + raw = f.read() + ext = os.path.splitext(local_path)[1].lower() or ".png" + ref = f"images/{_uuid.uuid4().hex}{ext}" + inline_images[ref] = base64.b64encode(raw).decode() + return ref + except Exception as exc: + logger.warning("Failed to read temp image %s: %s", local_path, exc) + return "" + logger.info(f"Starting Docx processing with max_pages={self.max_pages}") docx_processor = Docx( - max_image_size=self.max_image_size, - enable_multimodal=self.enable_multimodal, - upload_file=self.storage.upload_file, + max_image_size=1920, + enable_multimodal=True, + upload_file=_inline_upload, ) all_lines, tables = docx_processor( binary=content, @@ -153,6 +175,7 @@ class DocxParser(BaseParser): f"generated {len(text)} characters of text" ) + image_parts.update(inline_images) return DocumentModel(content=text, images=image_parts) except Exception as e: logger.error(f"Error parsing DOCX document: {str(e)}") diff --git a/docreader/parser/image_parser.py b/docreader/parser/image_parser.py index 62f2fecf..dfbf1d59 100644 --- a/docreader/parser/image_parser.py +++ b/docreader/parser/image_parser.py @@ -5,44 +5,24 @@ import os from docreader.models.document import Document from docreader.parser.base_parser import BaseParser -# Set up logger for this module logger = logging.getLogger(__name__) class ImageParser(BaseParser): - """ - Parser for image files with OCR capability. - Extracts text from images and generates captions. + """Parser for standalone image files. - This parser handles image processing by: - 1. Uploading the image to storage - 2. Generating a descriptive caption - 3. Performing OCR to extract text content - 4. Returning a combined result with both text and image reference + Returns the image as a markdown reference with the raw image data + in Document.images so that the Go-side ImageResolver (or main.py's + _resolve_images) can handle storage upload. """ def parse_into_text(self, content: bytes) -> Document: - """ - Parse image content into markdown text - :param content: bytes content of the image - :return: Document object - """ - logger.info(f"Parsing image content, size: {len(content)} bytes") + logger.info("Parsing image file=%s, size=%d bytes", self.file_name, len(content)) - # Get file extension - ext = os.path.splitext(self.file_name)[1].lower() + ext = os.path.splitext(self.file_name)[1].lower() or ".png" + ref_path = f"images/{self.file_name}" - # Upload image to storage - image_url = self.storage.upload_bytes(content, file_ext=ext) - logger.info(f"Successfully uploaded image, URL: {image_url[:50]}...") + text = f"![{self.file_name}]({ref_path})" + images = {ref_path: base64.b64encode(content).decode()} - if not image_url: - logger.warning(f"Failed to upload image: {self.file_name}") - return Document(content=f"{self.file_name}", images={}) - - # Generate markdown text - text = f"![{self.file_name}]({image_url})" - images = {image_url: base64.b64encode(content).decode()} - - # Create image object and add to map return Document(content=text, images=images) diff --git a/docreader/parser/markdown_parser.py b/docreader/parser/markdown_parser.py index 38d7bdf0..93e0497a 100644 --- a/docreader/parser/markdown_parser.py +++ b/docreader/parser/markdown_parser.py @@ -348,23 +348,11 @@ class MarkdownImageUtil: class MarkdownImageBase64(BaseParser): - """Parser for extracting and uploading base64 images from Markdown. + """Parser for extracting base64 images from Markdown. - This parser: - 1. Extracts base64-encoded images from Markdown content - 2. Uploads the decoded images to storage - 3. Replaces base64 data with uploaded URLs - 4. Returns a Document with clean Markdown and image mappings - - Requires: - - self.storage: Storage backend for uploading images - - Example: - >>> parser = MarkdownImageBase64(storage=my_storage) - >>> content = b"![logo](data:image/png;base64,iVBORw0...)" - >>> doc = parser.parse_into_text(content) - >>> print(doc.content) - ![logo](https://storage.com/uuid.png) + Extracts base64-encoded images, replaces them with path references, + and returns the raw image data in Document.images for the Go-side + ImageResolver (or main.py _resolve_images) to handle storage. """ def __init__(self, **kwargs): @@ -372,41 +360,14 @@ class MarkdownImageBase64(BaseParser): self.image_helper = MarkdownImageUtil() def parse_into_text(self, content: bytes) -> Document: - """Parse Markdown and process base64 images. - - Args: - content: Raw Markdown content as bytes - - Returns: - Document with: - - content: Markdown with base64 replaced by URLs - - images: Dict mapping URLs to base64 strings - """ - # Convert byte content to string using universal decoding method text = endecode.decode_bytes(content) - # Extract base64 images and replace with temporary paths text, img_b64 = self.image_helper.extract_base64(text, path_prefix="images") - # Final image mapping: URL -> base64 string (for Document) images: Dict[str, str] = {} - # Temporary mapping: temp_path -> uploaded_URL (for replacement) - image_replace: Dict[str, str] = {} + for ipath, raw_bytes in img_b64.items(): + images[ipath] = base64.b64encode(raw_bytes).decode() - logger.debug(f"Uploading {len(img_b64)} images from markdown") - # Upload each extracted image to storage - for ipath, b64_bytes in img_b64.items(): - # Get file extension for proper MIME type - ext = os.path.splitext(ipath)[1].lower() - # Upload binary data and get back URL - image_url = self.storage.upload_bytes(b64_bytes, ext) - - # Map temp path to uploaded URL for replacement - image_replace[ipath] = image_url - # Store base64 string in final images dict - images[image_url] = base64.b64encode(b64_bytes).decode() - - # Replace temporary paths with actual uploaded URLs - text = self.image_helper.replace_path(text, image_replace) + logger.debug("Extracted %d base64 images from markdown", len(images)) return Document(content=text, images=images) diff --git a/docreader/parser/mineru_parser.py b/docreader/parser/mineru_parser.py deleted file mode 100644 index fc78ef0a..00000000 --- a/docreader/parser/mineru_parser.py +++ /dev/null @@ -1,329 +0,0 @@ -import logging -import re -import time -from typing import Dict, Optional - -import markdownify -import requests - -from docreader.config import CONFIG -from docreader.models.document import Document -from docreader.parser.base_parser import BaseParser -from docreader.parser.chain_parser import PipelineParser -from docreader.parser.markdown_parser import MarkdownImageUtil, MarkdownTableFormatter -from docreader.utils import endecode - -logger = logging.getLogger(__name__) - - -class StdMinerUParser(BaseParser): - """ - Standard MinerU Parser for document parsing. - - This parser uses MinerU API to parse documents (especially PDFs) into markdown format, - with support for tables, formulas, and images extraction. - """ - - def __init__( - self, - enable_markdownify: bool = True, - mineru_endpoint: Optional[str] = None, # Added: 支持传入自定义 endpoint - **kwargs, - ): - """ - Initialize MinerU parser. - - Args: - enable_markdownify: Whether to convert HTML tables to markdown format - mineru_endpoint: MinerU API endpoint URL - **kwargs: Additional arguments passed to BaseParser - """ - super().__init__(**kwargs) - # Get MinerU endpoint from environment variable or parameter - # Modified: 优先使用传入的参数,否则使用 Config - base_url = mineru_endpoint if mineru_endpoint else CONFIG.mineru_endpoint - self.minerU = base_url.rstrip("/") if base_url else "" - - self.enable_markdownify = enable_markdownify - # Helper for processing markdown images - self.image_helper = MarkdownImageUtil() - # Pattern to match base64 encoded images - self.base64_pattern = re.compile(r"data:image/(\w+);base64,(.*)") - # Check if MinerU API is available - self.enable = self.ping() - - def ping(self, timeout: int = 5) -> bool: - """ - Check if MinerU API is available. - - Args: - timeout: Request timeout in seconds - - Returns: - True if API is available, False otherwise - """ - try: - response = requests.get( - self.minerU + "/docs", timeout=timeout, allow_redirects=True - ) - response.raise_for_status() - return True - except Exception: - return False - - def parse_into_text(self, content: bytes) -> Document: - """ - Parse document content into text using MinerU API. - - Args: - content: Raw document content in bytes - - Returns: - Document object containing parsed text and images - """ - if not self.enable: - logger.debug("MinerU API is not enabled") - return Document() - - logger.info(f"Parsing scanned PDF via MinerU API (size: {len(content)} bytes)") - md_content: str = "" - images_b64: Dict[str, str] = {} - try: - # Call MinerU API to parse document - response = requests.post( - url=self.minerU + "/file_parse", - data={ - "return_md": True, # Return markdown content - "return_images": True, # Return extracted images - "lang_list": ["ch", "en"], # Support Chinese and English - "table_enable": True, # Enable table parsing - "formula_enable": True, # Enable formula parsing - "parse_method": "auto", # Auto detect parsing method - "start_page_id": 0, # Start from first page - "end_page_id": 99999, # Parse all pages - "backend": "pipeline", # Use pipeline backend - "response_format_zip": False, # Return JSON instead of ZIP - "return_middle_json": False, # Don't return intermediate JSON - "return_model_output": False, # Don't return model output - "return_content_list": False, # Don't return content list - }, - files={"files": content}, - timeout=1000, - ) - response.raise_for_status() - result = response.json()["results"]["files"] - md_content = result["md_content"] - images_b64 = result.get("images", {}) - except Exception as e: - logger.error(f"MinerU parsing failed: {e}", exc_info=True) - return Document() - - # Convert HTML tables in markdown to markdown table format - if self.enable_markdownify: - logger.debug("Converting HTML to Markdown") - md_content = markdownify.markdownify(md_content) - - images = {} - image_replace = {} - # Filter images that are actually used in markdown content - # Some images in images_b64 may not be referenced in md_content - # (e.g., images embedded in tables), so we need to filter them - for ipath, b64_str in images_b64.items(): - # Skip images that are not referenced in markdown content - if f"images/{ipath}" not in md_content: - logger.debug(f"Image {ipath} not used in markdown") - continue - # Parse base64 image data - match = self.base64_pattern.match(b64_str) - if match: - # Extract image format (e.g., png, jpg) - file_ext = match.group(1) - # Extract base64 encoded data - b64_str = match.group(2) - - # Decode base64 string to bytes - image_bytes = endecode.encode_image(b64_str, errors="ignore") - if not image_bytes: - logger.error("Failed to decode base64 image skip it") - continue - - # Upload image to storage and get URL - image_url = self.storage.upload_bytes( - image_bytes, file_ext=f".{file_ext}" - ) - - # Store image mapping for later use - images[image_url] = b64_str - # Prepare replacement mapping for markdown content - image_replace[f"images/{ipath}"] = image_url - - logger.info(f"Replaced {len(image_replace)} images in markdown") - # Replace image paths in markdown with uploaded URLs - text = self.image_helper.replace_path(md_content, image_replace) - - logger.info( - f"Successfully parsed PDF, text: {len(text)}, images: {len(images)}" - ) - return Document(content=text, images=images) - - -# Added: 新增 MinerUCloudParser 类,支持异步任务提交 -class MinerUCloudParser(StdMinerUParser): - """ - MinerU Parser for REMOTE/CLOUD API (Asynchronous). - Uses the /submit -> /status -> /result workflow. - """ - - SUBMIT_TIMEOUT = 30 - POLL_INTERVAL = 2 - MAX_WAIT_TIME = 600 - - def parse_into_text(self, content: bytes) -> Document: - """ - Parse document content using Cloud MinerU API (Async/Polling). - """ - if not self.enable: - return Document() - - logger.info(f"Parsing PDF via Cloud MinerU API (size: {len(content)} bytes)") - - try: - # --- Step 1: Submit Task --- - submit_url = f"{self.minerU}/submit" - logger.info(f"Submitting task to {submit_url}") - - response = requests.post( - url=submit_url, - files={"files": content}, - data={ - "enable_formula": "true", - "enable_table": "true", - "layout_model": "doclayout_yolo", - "backend": "pipeline", - }, - timeout=self.SUBMIT_TIMEOUT, - ) - response.raise_for_status() - - # Robust task_id extraction - resp_data = response.json() - task_id = resp_data.get("task_id") or resp_data.get("data", {}).get("task_id") - - if not task_id: - raise ValueError(f"No task_id in response: {resp_data}") - - logger.info(f"Task submitted, ID: {task_id}, waiting for completion...") - - # --- Step 2: Poll Status --- - start_time = time.time() - - while True: - if time.time() - start_time > self.MAX_WAIT_TIME: - raise TimeoutError(f"Task {task_id} timed out after {self.MAX_WAIT_TIME}s") - - try: - status_resp = requests.get( - f"{self.minerU}/status/{task_id}", - timeout=10 - ) - status_resp.raise_for_status() - status_data = status_resp.json() - except requests.RequestException as e: - logger.warning(f"Status check failed for {task_id}: {e}. Retrying...") - time.sleep(self.POLL_INTERVAL) - continue - - state = status_data.get("status") or status_data.get("state") - - if state in ["done", "success"]: - break - elif state == "failed": - error_msg = status_data.get("error") or "Unknown error" - raise RuntimeError(f"Task {task_id} failed: {error_msg}") - else: - time.sleep(self.POLL_INTERVAL) - - # --- Step 3: Get Result --- - result_resp = requests.get( - f"{self.minerU}/result/{task_id}", - timeout=30 - ) - result_resp.raise_for_status() - result_json = result_resp.json() - - # Normalize result data - result_data = result_json.get("result", result_json) - - md_content = result_data.get("md_content", "") - images_b64 = result_data.get("images", {}) - - # 使用父类的方法处理图片和Markdown转换 (复用现有逻辑) - - # Convert HTML tables - if self.enable_markdownify: - md_content = markdownify.markdownify(md_content) - - images = {} - image_replace = {} - - for ipath, b64_str in images_b64.items(): - if f"images/{ipath}" not in md_content: - continue - match = self.base64_pattern.match(b64_str) - if match: - file_ext = match.group(1) - b64_str_clean = match.group(2) - image_bytes = endecode.encode_image(b64_str_clean, errors="ignore") - if not image_bytes: continue - - if self.storage: - image_url = self.storage.upload_bytes(image_bytes, file_ext=f".{file_ext}") - images[image_url] = b64_str_clean - image_replace[f"images/{ipath}"] = image_url - - if image_replace: - md_content = self.image_helper.replace_path(md_content, image_replace) - - return Document(content=md_content, images=images) - - except Exception as e: - logger.error(f"Cloud MinerU parsing failed: {e}", exc_info=True) - return Document() - - -class MinerUParser(PipelineParser): - """ - MinerU Parser with pipeline processing. - - This parser combines StdMinerUParser for document parsing and - MarkdownTableFormatter for table formatting in a pipeline. - """ - - _parser_cls = (StdMinerUParser, MarkdownTableFormatter) - - -if __name__ == "__main__": - import os - - # Example usage for testing - logging.basicConfig(level=logging.DEBUG) - - # Configure your file path and MinerU endpoint - your_file = "/path/to/your/file.pdf" - - # Added: 修改为 Localhost 方便测试 - test_endpoint = "http://localhost:9987" - os.environ["MINERU_ENDPOINT"] = test_endpoint - - # Create parser instance - # Modified: 传入 endpoint - parser = MinerUParser(mineru_endpoint=test_endpoint) - - # Parse PDF file - if os.path.exists(your_file): - with open(your_file, "rb") as f: - content = f.read() - document = parser.parse_into_text(content) - logger.error(document.content) - else: - print(f"File not found: {your_file}") \ No newline at end of file diff --git a/docreader/parser/parser.py b/docreader/parser/parser.py index 3db36fd7..992e5ba9 100644 --- a/docreader/parser/parser.py +++ b/docreader/parser/parser.py @@ -1,178 +1,82 @@ import logging -from typing import Dict, Type +from typing import Any, Optional -from docreader.config import CONFIG from docreader.models.document import Document -from docreader.models.read_config import ChunkingConfig -from docreader.parser.base_parser import BaseParser -from docreader.parser.csv_parser import CSVParser -from docreader.parser.doc_parser import DocParser -from docreader.parser.docx2_parser import Docx2Parser -from docreader.parser.excel_parser import ExcelParser -from docreader.parser.image_parser import ImageParser -from docreader.parser.markdown_parser import MarkdownParser -from docreader.parser.pdf_parser import PDFParser -from docreader.parser.text_parser import TextParser +from docreader.parser.registry import registry from docreader.parser.web_parser import WebParser logger = logging.getLogger(__name__) class Parser: - """ - Document parser facade that integrates all specialized parsers. - Provides a unified interface for parsing various document types. + """Document parser facade (lightweight version). + + Converts files/URLs to markdown + image references. + No chunking, no storage, no OCR, no VLM. """ def __init__(self): - # Initialize all parser types - maps file extensions to their corresponding parser classes - self.parsers: Dict[str, Type[BaseParser]] = { - # Document formats - "docx": Docx2Parser, - "doc": DocParser, - "pdf": PDFParser, - "md": MarkdownParser, - "txt": TextParser, - # Image formats - all use the same ImageParser - "jpg": ImageParser, - "jpeg": ImageParser, - "png": ImageParser, - "gif": ImageParser, - "bmp": ImageParser, - "tiff": ImageParser, - "webp": ImageParser, - # Alternative markdown extension - "markdown": MarkdownParser, - # Spreadsheet formats - "csv": CSVParser, - "xlsx": ExcelParser, - "xls": ExcelParser, - } + self.registry = registry logger.info( - "Parser initialized with %d parsers: %s", - len(self.parsers), - ", ".join(self.parsers.keys()), + "Parser initialized with engines: %s", + ", ".join(self.registry.get_engine_names()), ) - def get_parser(self, file_type: str) -> Type[BaseParser]: - """ - Get parser class for the specified file type. - - Args: - file_type: The file extension or type identifier - - Returns: - Parser class for the file type, or None if unsupported - """ - # Look up parser by file type (case-insensitive) - parser = self.parsers.get(file_type.lower()) - if not parser: - # Raise error if file type is not supported - raise ValueError(f"Unsupported file type: {file_type}") - return parser - def parse_file( self, file_name: str, file_type: str, content: bytes, - config: ChunkingConfig, + parser_engine: Optional[str] = None, + engine_overrides: Optional[dict[str, Any]] = None, ) -> Document: - """ - Parse file content using appropriate parser based on file type. - - Args: - file_name: Name of the file being parsed - file_type: Type/extension of the file - content: Raw file content as bytes - config: Configuration for chunking process - - Returns: - ParseResult containing chunks and metadata, or None if parsing failed - """ - logger.info(f"Parsing file: {file_name} with type: {file_type}") + """Parse file content to markdown.""" + engine = parser_engine or "" + overrides = engine_overrides or {} logger.info( - f"Chunking config: size={config.chunk_size}, " - f"overlap={config.chunk_overlap}, " - f"multimodal={config.enable_multimodal}" + "Parsing file: %s, type: %s, engine: %s", + file_name, + file_type, + engine or "builtin", ) - # Get appropriate parser class for the file type - cls = self.get_parser(file_type) - - # Create parser instance with configuration - logger.info(f"Creating parser instance for {file_type} file") + cls = self.registry.get_parser_class(engine, file_type) + logger.info( + "Creating %s parser instance for %s file", + cls.__name__, + file_type, + ) parser = cls( file_name=file_name, file_type=file_type, - chunk_size=config.chunk_size, # Size of each text chunk - chunk_overlap=config.chunk_overlap, # Overlap between consecutive chunks - separators=config.separators, # Text separators for chunking - enable_multimodal=config.enable_multimodal, # Enable image/multimodal processing - max_image_size=1920, # Limit image size to 1920px for performance - chunking_config=config, # Pass the entire chunking config for advanced options - max_concurrent_tasks=CONFIG.image_max_concurrent, - ocr_backend=CONFIG.ocr_backend, + **overrides, ) - logger.info(f"Starting to parse file content, size: {len(content)} bytes") - # Execute the parsing process + logger.info("Starting to parse file content, size: %d bytes", len(content)) result = parser.parse(content) - # Validate parsing results and log warnings if needed if not result.content: - logger.warning(f"Parser returned empty content for file: {file_name}") - elif not result.chunks: - logger.warning(f"Parser returned empty chunks for file: {file_name}") - elif result.chunks[0]: - # Log first chunk size for debugging - logger.info(f"First chunk content length: {len(result.chunks[0].content)}") - logger.info(f"Parsed file {file_name}, with {len(result.chunks)} chunks") + logger.warning("Parser returned empty content for file: %s", file_name) + logger.info( + "Parsed file %s, content length=%d", file_name, len(result.content) + ) return result - def parse_url(self, url: str, title: str, config: ChunkingConfig) -> Document: - """ - Parse content from a URL using the WebParser. - - Args: - url: URL to parse - title: Title of the webpage (for metadata) - config: Configuration for chunking process - - Returns: - ParseResult containing chunks and metadata, or None if parsing failed - """ - logger.info(f"Parsing URL: {url}, title: {title}") - logger.info( - f"Chunking config: size={config.chunk_size}, " - f"overlap={config.chunk_overlap}, multimodal={config.enable_multimodal}" - ) - - # Create web parser instance with configuration - logger.info("Creating WebParser instance") - parser = WebParser( - title=title, # Webpage title for metadata - chunk_size=config.chunk_size, # Size of each text chunk - chunk_overlap=config.chunk_overlap, # Overlap between consecutive chunks - separators=config.separators, # Text separators for chunking - enable_multimodal=config.enable_multimodal, # Enable image/multimodal processing - max_image_size=1920, # Limit image size to 1920px for performance - chunking_config=config, # Pass the entire chunking config - max_concurrent_tasks=CONFIG.image_max_concurrent, - ocr_backend=CONFIG.ocr_backend, - ) + def parse_url( + self, + url: str, + title: str, + parser_engine: Optional[str] = None, + engine_overrides: Optional[dict[str, Any]] = None, + ) -> Document: + """Parse content from a URL to markdown.""" + logger.info("Parsing URL: %s, title: %s", url, title) + parser = WebParser(title=title) logger.info("Starting to parse URL content") - # Parse URL content (encode URL string to bytes as required by parser interface) result = parser.parse(url.encode()) - # Validate parsing results and log warnings if needed if not result.content: - logger.warning(f"Parser returned empty content for url: {url}") - elif not result.chunks: - logger.warning(f"Parser returned empty chunks for url: {url}") - elif result.chunks[0]: - # Log first chunk size for debugging - logger.info(f"First chunk content length: {len(result.chunks[0].content)}") - logger.info(f"Parsed url {url}, with {len(result.chunks)} chunks") + logger.warning("Parser returned empty content for url: %s", url) + logger.info("Parsed url %s, content length=%d", url, len(result.content)) return result diff --git a/docreader/parser/pdf_parser.py b/docreader/parser/pdf_parser.py index 7e1e6dda..f98ab3eb 100644 --- a/docreader/parser/pdf_parser.py +++ b/docreader/parser/pdf_parser.py @@ -1,6 +1,5 @@ from docreader.parser.chain_parser import FirstParser from docreader.parser.markitdown_parser import MarkitdownParser -from docreader.parser.mineru_parser import MinerUParser class PDFParser(FirstParser): @@ -13,4 +12,4 @@ class PDFParser(FirstParser): The first successful parser result will be returned. """ # Parser classes to try in order (chain of responsibility pattern) - _parser_cls = (MinerUParser, MarkitdownParser) + _parser_cls = (MarkitdownParser,) diff --git a/docreader/parser/registry.py b/docreader/parser/registry.py new file mode 100644 index 00000000..18580b32 --- /dev/null +++ b/docreader/parser/registry.py @@ -0,0 +1,160 @@ +import logging +from typing import Any, Callable, Dict, List, Optional, Tuple, Type + +from docreader.parser.base_parser import BaseParser +from docreader.parser.doc_parser import DocParser +from docreader.parser.docx2_parser import Docx2Parser +from docreader.parser.excel_parser import ExcelParser +from docreader.parser.image_parser import ImageParser +from docreader.parser.markdown_parser import MarkdownParser +from docreader.parser.markitdown_parser import MarkitdownParser +from docreader.parser.pdf_parser import PDFParser + +logger = logging.getLogger(__name__) + +BUILTIN_ENGINE = "builtin" + + +class ParserEngineRegistry: + """Registry for parser engines. + + Each engine maps file extensions to parser classes. + When a requested engine doesn't support a file type, the registry + falls back to the builtin engine automatically. + """ + + def __init__(self): + self._engines: Dict[str, Dict[str, Type[BaseParser]]] = {} + self._descriptions: Dict[str, str] = {} + self._check_available: Dict[str, Callable[..., Tuple[bool, str]]] = {} + self._unavailable_hint: Dict[str, str] = {} + + def register( + self, + name: str, + file_types: Dict[str, Type[BaseParser]], + description: str = "", + check_available: Callable[..., Tuple[bool, str]] | None = None, + unavailable_hint: str = "", + ): + self._engines[name] = file_types + self._descriptions[name] = description + if check_available is not None: + self._check_available[name] = check_available + self._unavailable_hint[name] = unavailable_hint + logger.info( + "Registered parser engine '%s' with file types: %s", + name, + ", ".join(file_types.keys()), + ) + + def get_parser_class(self, engine: str, file_type: str) -> Type[BaseParser]: + """Resolve parser class for the given engine and file type. + + Falls back to builtin engine when the requested engine doesn't + support the file type. + """ + ft = file_type.lower() + + if engine and engine in self._engines: + cls = self._engines[engine].get(ft) + if cls: + logger.info("Using engine '%s' for file type '%s'", engine, ft) + return cls + logger.info( + "Engine '%s' does not support '%s', falling back to builtin", + engine, + ft, + ) + + builtin = self._engines.get(BUILTIN_ENGINE, {}) + cls = builtin.get(ft) + if cls: + return cls + + raise ValueError(f"Unsupported file type: {file_type}") + + def list_engines(self, overrides: Optional[Dict[str, str]] = None) -> List[Dict]: + """Return metadata for all registered engines, including availability. + + Args: + overrides: tenant-level config overrides (e.g. mineru_endpoint, mineru_api_key) + forwarded to each engine's check_available function. + """ + result = [] + for name, parsers in self._engines.items(): + available = True + unavailable_reason = "" + check = self._check_available.get(name) + if check is not None: + try: + available, unavailable_reason = check(overrides) + except Exception as e: + available = False + unavailable_reason = str(e) or self._unavailable_hint.get(name, "") + if not available and not unavailable_reason: + unavailable_reason = self._unavailable_hint.get(name, "不可用") + result.append( + { + "name": name, + "description": self._descriptions.get(name, ""), + "file_types": sorted(parsers.keys()), + "available": available, + "unavailable_reason": unavailable_reason, + } + ) + return result + + def get_engine_names(self) -> List[str]: + return list(self._engines.keys()) + + +def _build_default_registry() -> ParserEngineRegistry: + """Create and populate the default registry with all known engines.""" + reg = ParserEngineRegistry() + + _image_types = { + ext: ImageParser for ext in ("jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp") + } + + reg.register( + BUILTIN_ENGINE, + { + "docx": Docx2Parser, + "doc": DocParser, + "pdf": PDFParser, + "md": MarkdownParser, + "markdown": MarkdownParser, + "xlsx": ExcelParser, + "xls": ExcelParser, + **_image_types, + }, + description="内置解析引擎", + ) + + reg.register( + "markitdown", + { + "md": MarkitdownParser, + "markdown": MarkitdownParser, + "pdf": MarkitdownParser, + "docx": MarkitdownParser, + "doc": MarkitdownParser, + "pptx": MarkitdownParser, + "ppt": MarkitdownParser, + "xlsx": MarkitdownParser, + "xls": MarkitdownParser, + "csv": MarkitdownParser, + }, + description="MarkItDown 解析引擎(微软 MarkItDown 库)", + ) + + # NOTE: Engine listing is managed by Go-side engine registry + # (docparser.ListAllEngines). The Python list_engines method is kept for + # backward compatibility with the gRPC ListEngines RPC but the Go app + # no longer calls it. MinerU engines are handled natively by Go. + + return reg + + +registry = _build_default_registry() diff --git a/docreader/parser/storage.py b/docreader/parser/storage.py index 20ab944c..7f62896f 100644 --- a/docreader/parser/storage.py +++ b/docreader/parser/storage.py @@ -5,96 +5,70 @@ import os import traceback import uuid from abc import ABC, abstractmethod -from typing import Dict +from typing import Dict, Optional from minio import Minio from qcloud_cos import CosConfig, CosS3Client -from docreader.config import CONFIG from docreader.utils import endecode logger = logging.getLogger(__name__) +def _cfg(storage_config: Optional[Dict], key: str, *env_keys: str, default: str = "") -> str: + """Read a value from storage_config dict, falling back to env vars.""" + if storage_config: + v = storage_config.get(key, "") + if v: + return str(v) + for ek in env_keys: + v = os.environ.get(ek, "") + if v: + return v + return default + + class Storage(ABC): """Abstract base class for object storage operations""" @abstractmethod def upload_file(self, file_path: str) -> str: - """Upload file to object storage - - Args: - file_path: File path - - Returns: - File URL - """ pass @abstractmethod def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str: - """Upload bytes to object storage - - Args: - content: Byte content to upload - file_ext: File extension - - Returns: - File URL - """ pass class CosStorage(Storage): """Tencent Cloud COS storage implementation""" - def __init__(self, storage_config=None): - """Initialize COS storage - - Args: - storage_config: Storage configuration - """ + def __init__(self, storage_config: Optional[Dict] = None): self.storage_config = storage_config self.client, self.bucket_name, self.region, self.prefix = ( self._init_cos_client() ) def _init_cos_client(self): - """Initialize Tencent Cloud COS client""" try: - # Use provided COS config if available, - # otherwise fall back to environment variables - if self.storage_config and self.storage_config.get("access_key_id") != "": - cos_config = self.storage_config - secret_id = cos_config.get("access_key_id") - secret_key = cos_config.get("secret_access_key") - region = cos_config.get("region") - bucket_name = cos_config.get("bucket_name") - appid = cos_config.get("app_id") - prefix = cos_config.get("path_prefix", "") - else: - # Get COS configuration from environment variables - secret_id = CONFIG.cos_secret_id - secret_key = CONFIG.cos_secret_key - region = CONFIG.cos_region - bucket_name = CONFIG.cos_bucket_name - appid = CONFIG.cos_app_id - prefix = CONFIG.cos_path_prefix - - enable_old_domain = CONFIG.cos_enable_old_domain + sc = self.storage_config + secret_id = _cfg(sc, "access_key_id", "COS_SECRET_ID") + secret_key = _cfg(sc, "secret_access_key", "COS_SECRET_KEY") + region = _cfg(sc, "region", "COS_REGION") + bucket_name = _cfg(sc, "bucket_name", "COS_BUCKET_NAME") + appid = _cfg(sc, "app_id", "COS_APP_ID") + prefix = _cfg(sc, "path_prefix", "COS_PATH_PREFIX") + enable_old_domain = os.environ.get("COS_ENABLE_OLD_DOMAIN", "").lower() in ("1", "true", "yes") if not all([secret_id, secret_key, region, bucket_name, appid]): logger.error( - "Incomplete COS configuration, missing environment variables" - f"secret_id: {secret_id}, secret_key: {secret_key}, " - f"region: {region}, bucket_name: {bucket_name}, appid: {appid}" + "Incomplete COS configuration: " + "secret_id=%s, region=%s, bucket=%s, appid=%s", + bool(secret_id), region, bucket_name, appid, ) return None, None, None, None - # Initialize COS configuration - logger.info( - f"Initializing COS client with region: {region}, bucket: {bucket_name}" - ) + logger.info("Initializing COS client: region=%s, bucket=%s", region, bucket_name) config = CosConfig( Appid=appid, Region=region, @@ -102,93 +76,50 @@ class CosStorage(Storage): SecretKey=secret_key, EnableOldDomain=enable_old_domain, ) - - # Create client client = CosS3Client(config) return client, bucket_name, region, prefix except Exception as e: - logger.error(f"Failed to initialize COS client: {str(e)}") + logger.error("Failed to initialize COS client: %s", e) return None, None, None, None def _get_download_url(self, bucket_name, region, object_key): - """Generate COS object URL - - Args: - bucket_name: Bucket name - region: Region - object_key: Object key - - Returns: - File URL - """ return f"https://{bucket_name}.cos.{region}.myqcloud.com/{object_key}" def upload_file(self, file_path: str) -> str: - """Upload file to Tencent Cloud COS - - Args: - file_path: File path - - Returns: - File URL - """ - logger.info(f"Uploading file to COS: {file_path}") try: if not self.client: return "" - - # Generate object key, use UUID to avoid conflicts file_ext = os.path.splitext(file_path)[1] object_key = f"{self.prefix}/images/{uuid.uuid4().hex}{file_ext}" - logger.info(f"Generated object key: {object_key}") - - # Upload file - logger.info("Attempting to upload file to COS") self.client.upload_file( Bucket=self.bucket_name, LocalFilePath=file_path, Key=object_key, ) - - # Get file URL file_url = self._get_download_url(self.bucket_name, self.region, object_key) - - logger.info(f"Successfully uploaded file to COS: {file_url}") + logger.info("COS upload_file ok: %s", file_url) return file_url - except Exception as e: - logger.error(f"Failed to upload file to COS: {str(e)}") + logger.error("COS upload_file failed: %s", e) return "" def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str: - """Upload bytes to Tencent Cloud COS - - Args: - content: Byte content to upload - file_ext: File extension - - Returns: - File URL - """ try: - logger.info(f"Uploading bytes content to COS, size: {len(content)} bytes") if not self.client: return "" - object_key = ( f"{self.prefix}/images/{uuid.uuid4().hex}{file_ext}" if self.prefix else f"images/{uuid.uuid4().hex}{file_ext}" ) - logger.info(f"Generated object key: {object_key}") self.client.put_object( Bucket=self.bucket_name, Body=content, Key=object_key ) file_url = self._get_download_url(self.bucket_name, self.region, object_key) - logger.info(f"Successfully uploaded bytes to COS: {file_url}") + logger.info("COS upload_bytes ok: %s", file_url) return file_url except Exception as e: - logger.error(f"Failed to upload bytes to COS: {str(e)}") + logger.error("COS upload_bytes failed: %s", e) traceback.print_exc() return "" @@ -196,67 +127,34 @@ class CosStorage(Storage): class MinioStorage(Storage): """MinIO storage implementation""" - def __init__(self, storage_config=None): - """Initialize MinIO storage - - Args: - storage_config: Storage configuration - """ + def __init__(self, storage_config: Optional[Dict] = None): self.storage_config = storage_config self.client, self.bucket_name, self.use_ssl, self.endpoint, self.path_prefix = ( self._init_minio_client() ) def _init_minio_client(self): - """Initialize MinIO client from environment variables or injected config. - - If storage_config contains valid configuration, prefer those values - to override environment variables. - """ try: - # Get configuration from storage_config with environment variables as fallback - # Each field can independently fall back to environment variables - access_key = ( - self.storage_config.get("access_key_id") - if self.storage_config and self.storage_config.get("access_key_id") - else CONFIG.minio_access_key_id - ) - secret_key = ( - self.storage_config.get("secret_access_key") - if self.storage_config and self.storage_config.get("secret_access_key") - else CONFIG.minio_secret_access_key - ) - bucket_name = ( - self.storage_config.get("bucket_name") - if self.storage_config and self.storage_config.get("bucket_name") - else CONFIG.minio_bucket_name - ) - path_prefix_raw = ( - self.storage_config.get("path_prefix") - if self.storage_config and self.storage_config.get("path_prefix") - else CONFIG.minio_path_prefix - ) + sc = self.storage_config + access_key = _cfg(sc, "access_key_id", "MINIO_ACCESS_KEY_ID") + secret_key = _cfg(sc, "secret_access_key", "MINIO_SECRET_ACCESS_KEY") + bucket_name = _cfg(sc, "bucket_name", "MINIO_BUCKET_NAME") + path_prefix_raw = _cfg(sc, "path_prefix", "MINIO_PATH_PREFIX") path_prefix = path_prefix_raw.strip().strip("/") if path_prefix_raw else "" - - endpoint = CONFIG.minio_endpoint - use_ssl = CONFIG.minio_use_ssl + endpoint = _cfg(sc, "endpoint", "MINIO_ENDPOINT") + use_ssl = os.environ.get("MINIO_USE_SSL", "").lower() in ("1", "true", "yes") if not all([endpoint, access_key, secret_key, bucket_name]): - logger.error( - "Incomplete MinIO configuration, missing environment variables" - ) + logger.error("Incomplete MinIO configuration") return None, None, None, None, None - # Initialize client client = Minio( endpoint, access_key=access_key, secret_key=secret_key, secure=use_ssl ) - # Ensure bucket exists found = client.bucket_exists(bucket_name) if not found: client.make_bucket(bucket_name) - # Set public read policy for the bucket policy = ( "{" '"Version":"2012-10-17",' @@ -273,51 +171,26 @@ class MinioStorage(Storage): return client, bucket_name, use_ssl, endpoint, path_prefix except Exception as e: - logger.error(f"Failed to initialize MinIO client: {str(e)}") + logger.error("Failed to initialize MinIO client: %s", e) return None, None, None, None, None def _get_download_url(self, object_key: str): - """Construct a public URL for MinIO object. - - If MINIO_PUBLIC_ENDPOINT is provided, use it; otherwise fallback to endpoint. - """ - # 1. Use public endpoint if provided - endpoint = CONFIG.minio_public_endpoint - if endpoint: - return f"{endpoint}/{self.bucket_name}/{object_key}" - - # 2. Use SSL if enabled - if self.use_ssl: - return f"https://{self.endpoint}/{self.bucket_name}/{object_key}" - - # 3. Use HTTP default - return f"http://{self.endpoint}/{self.bucket_name}/{object_key}" + public_endpoint = os.environ.get("MINIO_PUBLIC_ENDPOINT", "") + if public_endpoint: + return f"{public_endpoint}/{self.bucket_name}/{object_key}" + scheme = "https" if self.use_ssl else "http" + return f"{scheme}://{self.endpoint}/{self.bucket_name}/{object_key}" def upload_file(self, file_path: str) -> str: - """Upload file to MinIO - - Args: - file_path: File path - - Returns: - File URL - """ - logger.info(f"Uploading file to MinIO: {file_path}") try: if not self.client: return "" - - # Generate object key, use UUID to avoid conflicts file_name = os.path.basename(file_path) object_key = ( f"{self.path_prefix}/images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}" if self.path_prefix else f"images/{uuid.uuid4().hex}{os.path.splitext(file_name)[1]}" ) - logger.info(f"Generated MinIO object key: {object_key}") - - # Upload file - logger.info("Attempting to upload file to MinIO") with open(file_path, "rb") as file_data: file_size = os.path.getsize(file_path) self.client.put_object( @@ -327,38 +200,22 @@ class MinioStorage(Storage): length=file_size, content_type="application/octet-stream", ) - - # Get file URL file_url = self._get_download_url(object_key) - - logger.info(f"Successfully uploaded file to MinIO: {file_url}") + logger.info("MinIO upload_file ok: %s", file_url) return file_url - except Exception as e: - logger.error(f"Failed to upload file to MinIO: {str(e)}") + logger.error("MinIO upload_file failed: %s", e) return "" def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str: - """Upload bytes to MinIO - - Args: - content: Byte content to upload - file_ext: File extension - - Returns: - File URL - """ try: - logger.info(f"Uploading bytes content to MinIO, size: {len(content)} bytes") if not self.client: return "" - object_key = ( f"{self.path_prefix}/images/{uuid.uuid4().hex}{file_ext}" if self.path_prefix else f"images/{uuid.uuid4().hex}{file_ext}" ) - logger.info(f"Generated MinIO object key: {object_key}") self.client.put_object( self.bucket_name or "", object_key, @@ -367,52 +224,67 @@ class MinioStorage(Storage): content_type="application/octet-stream", ) file_url = self._get_download_url(object_key) - logger.info(f"Successfully uploaded bytes to MinIO: {file_url}") + logger.info("MinIO upload_bytes ok: %s", file_url) return file_url except Exception as e: - logger.error(f"Failed to upload bytes to MinIO: {str(e)}") + logger.error("MinIO upload_bytes failed: %s", e) traceback.print_exc() return "" class LocalStorage(Storage): - """Local file system storage implementation""" + """Local file system storage implementation. - def __init__(self, storage_config: Dict[str, str] = {}): - self.storage_config = storage_config - base_dir = storage_config.get("base_dir", CONFIG.local_storage_base_dir) - self.image_dir = os.path.join(base_dir, "images") + Saves files under base_dir and returns web-accessible URL paths + (e.g. /files/images/uuid.jpg) so that the Go app can serve them. + """ + + def __init__(self, storage_config: Optional[Dict] = None): + sc = storage_config or {} + self.base_dir = ( + sc.get("base_dir") + or os.environ.get("LOCAL_STORAGE_BASE_DIR", "/data/files") + ) + path_prefix = (sc.get("path_prefix") or "").strip().strip("/") + if path_prefix: + self.image_dir = os.path.join(self.base_dir, path_prefix, "images") + else: + self.image_dir = os.path.join(self.base_dir, "images") + self.url_prefix = ( + sc.get("url_prefix") + or os.environ.get("LOCAL_STORAGE_URL_PREFIX", "/files") + ) os.makedirs(self.image_dir, exist_ok=True) + def _to_url(self, fpath: str) -> str: + if self.url_prefix: + rel = os.path.relpath(fpath, self.base_dir) + return f"{self.url_prefix}/{rel}" + return fpath + def upload_file(self, file_path: str) -> str: - logger.info(f"Uploading file to local storage: {file_path}") return file_path def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str: - logger.info(f"Uploading file to local storage: {len(content)} bytes") - fname = os.path.join(self.image_dir, f"{uuid.uuid4()}{file_ext}") - with open(fname, "wb") as f: + fpath = os.path.join(self.image_dir, f"{uuid.uuid4()}{file_ext}") + with open(fpath, "wb") as f: f.write(content) - return fname + url = self._to_url(fpath) + logger.info("Local storage saved: %s -> %s", fpath, url) + return url class Base64Storage(Storage): def upload_file(self, file_path: str) -> str: - logger.info(f"Uploading file to base64 storage: {file_path}") return file_path def upload_bytes(self, content: bytes, file_ext: str = ".png") -> str: - logger.info(f"Uploading file to base64 storage: {len(content)} bytes") file_ext = file_ext.lstrip(".") return f"data:image/{file_ext};base64,{endecode.decode_image(content)}" class DummyStorage(Storage): - """Dummy storage implementation. - - It is used in tests or in environments where object storage is disabled. - All upload methods return empty string. - """ + """Dummy storage — all uploads return empty string.""" def upload_file(self, file_path: str) -> str: return "" @@ -421,26 +293,30 @@ class DummyStorage(Storage): return "" -def create_storage(storage_config: Dict[str, str] | None = None) -> Storage: - """Create a storage instance based on configuration or environment variables +def create_storage(storage_config: Optional[Dict[str, str]] = None) -> Storage: + """Create a storage instance based on storage_config dict. - Args: - storage_config: Storage configuration dictionary - - Returns: - Storage instance + The ``provider`` key in storage_config determines the backend: + minio, cos, local, base64. + Falls back to STORAGE_TYPE env var, then ``local``. """ - storage_type = CONFIG.storage_type + storage_type = "" if storage_config: - storage_type = str(storage_config.get("provider", storage_type)).lower() - logger.info(f"Creating {storage_type} storage instance") + provider = str(storage_config.get("provider", "")).lower().strip() + if provider and provider not in ("unspecified", "storage_provider_unspecified"): + storage_type = provider + + if not storage_type: + storage_type = os.environ.get("STORAGE_TYPE", "local").lower().strip() + + logger.info("Creating %s storage instance", storage_type) if storage_type == "minio": return MinioStorage(storage_config) elif storage_type == "cos": return CosStorage(storage_config) elif storage_type == "local": - return LocalStorage(storage_config or {}) + return LocalStorage(storage_config) elif storage_type == "base64": return Base64Storage() return DummyStorage() diff --git a/docreader/parser/text_parser.py b/docreader/parser/text_parser.py deleted file mode 100644 index 7675f173..00000000 --- a/docreader/parser/text_parser.py +++ /dev/null @@ -1,55 +0,0 @@ -import logging - -from docreader.models.document import Document -from docreader.parser.base_parser import BaseParser -from docreader.utils import endecode - -logger = logging.getLogger(__name__) - - -class TextParser(BaseParser): - """ - Text document parser for processing plain text files. - This parser handles text extraction and chunking from plain text documents. - """ - - def parse_into_text(self, content: bytes) -> Document: - """ - Parse text document content by decoding bytes to string. - - This is a straightforward parser that simply converts the binary content - to text using appropriate character encoding. - - Args: - content: Raw document content as bytes - - Returns: - Parsed text content as string - """ - logger.info(f"Parsing text document, content size: {len(content)} bytes") - text = endecode.decode_bytes(content) - logger.info( - f"Successfully parsed text document, extracted {len(text)} characters" - ) - return Document(content=text) - - -if __name__ == "__main__": - logger = logging.getLogger(__name__) - - # Sample text for testing - text = """## 标题1 - ![alt text](image.png) - ## 标题2 - ![alt text](image2.png) - ## 标题3 - ![alt text](image3.png)""" - logger.info(f"Test text content: {text}") - - # Define separators for text splitting - seperators = ["\n\n", "\n", "。"] - parser = TextParser(separators=seperators) - logger.info("Splitting text into units") - units = parser._split_into_units(text) - logger.info(f"Split text into {len(units)} units") - logger.info(f"Units: {units}") diff --git a/docreader/proto/docreader.pb.go b/docreader/proto/docreader.pb.go index 95199af5..5d456227 100644 --- a/docreader/proto/docreader.pb.go +++ b/docreader/proto/docreader.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.36.6 -// protoc v5.29.3 +// protoc v6.33.4 // source: docreader.proto package proto @@ -21,233 +21,18 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -// 对象存储提供方 -type StorageProvider int32 - -const ( - StorageProvider_STORAGE_PROVIDER_UNSPECIFIED StorageProvider = 0 - StorageProvider_COS StorageProvider = 1 // 腾讯云 COS - StorageProvider_MINIO StorageProvider = 2 // MinIO/S3 兼容 -) - -// Enum value maps for StorageProvider. -var ( - StorageProvider_name = map[int32]string{ - 0: "STORAGE_PROVIDER_UNSPECIFIED", - 1: "COS", - 2: "MINIO", - } - StorageProvider_value = map[string]int32{ - "STORAGE_PROVIDER_UNSPECIFIED": 0, - "COS": 1, - "MINIO": 2, - } -) - -func (x StorageProvider) Enum() *StorageProvider { - p := new(StorageProvider) - *p = x - return p -} - -func (x StorageProvider) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (StorageProvider) Descriptor() protoreflect.EnumDescriptor { - return file_docreader_proto_enumTypes[0].Descriptor() -} - -func (StorageProvider) Type() protoreflect.EnumType { - return &file_docreader_proto_enumTypes[0] -} - -func (x StorageProvider) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Use StorageProvider.Descriptor instead. -func (StorageProvider) EnumDescriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{0} -} - -// 通用对象存储配置,兼容 COS 与 MinIO -type StorageConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - Provider StorageProvider `protobuf:"varint,1,opt,name=provider,proto3,enum=docreader.StorageProvider" json:"provider,omitempty"` // 存储提供方 - Region string `protobuf:"bytes,2,opt,name=region,proto3" json:"region,omitempty"` // 区域(COS 使用) - BucketName string `protobuf:"bytes,3,opt,name=bucket_name,json=bucketName,proto3" json:"bucket_name,omitempty"` // 桶名 - AccessKeyId string `protobuf:"bytes,4,opt,name=access_key_id,json=accessKeyId,proto3" json:"access_key_id,omitempty"` // 访问密钥 ID(MinIO/S3 使用) - SecretAccessKey string `protobuf:"bytes,5,opt,name=secret_access_key,json=secretAccessKey,proto3" json:"secret_access_key,omitempty"` // 访问密钥 Secret(MinIO/S3 使用) - AppId string `protobuf:"bytes,6,opt,name=app_id,json=appId,proto3" json:"app_id,omitempty"` // 应用 ID(COS 使用) - PathPrefix string `protobuf:"bytes,7,opt,name=path_prefix,json=pathPrefix,proto3" json:"path_prefix,omitempty"` // 路径前缀 - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *StorageConfig) Reset() { - *x = StorageConfig{} - mi := &file_docreader_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *StorageConfig) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StorageConfig) ProtoMessage() {} - -func (x *StorageConfig) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[0] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StorageConfig.ProtoReflect.Descriptor instead. -func (*StorageConfig) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{0} -} - -func (x *StorageConfig) GetProvider() StorageProvider { - if x != nil { - return x.Provider - } - return StorageProvider_STORAGE_PROVIDER_UNSPECIFIED -} - -func (x *StorageConfig) GetRegion() string { - if x != nil { - return x.Region - } - return "" -} - -func (x *StorageConfig) GetBucketName() string { - if x != nil { - return x.BucketName - } - return "" -} - -func (x *StorageConfig) GetAccessKeyId() string { - if x != nil { - return x.AccessKeyId - } - return "" -} - -func (x *StorageConfig) GetSecretAccessKey() string { - if x != nil { - return x.SecretAccessKey - } - return "" -} - -func (x *StorageConfig) GetAppId() string { - if x != nil { - return x.AppId - } - return "" -} - -func (x *StorageConfig) GetPathPrefix() string { - if x != nil { - return x.PathPrefix - } - return "" -} - -// VLM 配置 -type VLMConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - ModelName string `protobuf:"bytes,1,opt,name=model_name,json=modelName,proto3" json:"model_name,omitempty"` // VLM Model Name - BaseUrl string `protobuf:"bytes,2,opt,name=base_url,json=baseUrl,proto3" json:"base_url,omitempty"` // VLM Base URL - ApiKey string `protobuf:"bytes,3,opt,name=api_key,json=apiKey,proto3" json:"api_key,omitempty"` // VLM API Key - InterfaceType string `protobuf:"bytes,4,opt,name=interface_type,json=interfaceType,proto3" json:"interface_type,omitempty"` // VLM Interface Type: "ollama" or "openai" - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *VLMConfig) Reset() { - *x = VLMConfig{} - mi := &file_docreader_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *VLMConfig) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*VLMConfig) ProtoMessage() {} - -func (x *VLMConfig) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[1] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use VLMConfig.ProtoReflect.Descriptor instead. -func (*VLMConfig) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{1} -} - -func (x *VLMConfig) GetModelName() string { - if x != nil { - return x.ModelName - } - return "" -} - -func (x *VLMConfig) GetBaseUrl() string { - if x != nil { - return x.BaseUrl - } - return "" -} - -func (x *VLMConfig) GetApiKey() string { - if x != nil { - return x.ApiKey - } - return "" -} - -func (x *VLMConfig) GetInterfaceType() string { - if x != nil { - return x.InterfaceType - } - return "" -} - type ReadConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - ChunkSize int32 `protobuf:"varint,1,opt,name=chunk_size,json=chunkSize,proto3" json:"chunk_size,omitempty"` // 分块大小 - ChunkOverlap int32 `protobuf:"varint,2,opt,name=chunk_overlap,json=chunkOverlap,proto3" json:"chunk_overlap,omitempty"` // 分块重叠 - Separators []string `protobuf:"bytes,3,rep,name=separators,proto3" json:"separators,omitempty"` // 分隔符 - EnableMultimodal bool `protobuf:"varint,4,opt,name=enable_multimodal,json=enableMultimodal,proto3" json:"enable_multimodal,omitempty"` // 多模态处理 - StorageConfig *StorageConfig `protobuf:"bytes,5,opt,name=storage_config,json=storageConfig,proto3" json:"storage_config,omitempty"` // 对象存储配置(通用) - VlmConfig *VLMConfig `protobuf:"bytes,6,opt,name=vlm_config,json=vlmConfig,proto3" json:"vlm_config,omitempty"` // VLM 配置 - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + ParserEngine string `protobuf:"bytes,1,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"` + ParserEngineOverrides map[string]string `protobuf:"bytes,2,rep,name=parser_engine_overrides,json=parserEngineOverrides,proto3" json:"parser_engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + ImageStorage map[string]string `protobuf:"bytes,3,rep,name=image_storage,json=imageStorage,proto3" json:"image_storage,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` // storage provider config (provider, endpoint, bucket, ...) + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ReadConfig) Reset() { *x = ReadConfig{} - mi := &file_docreader_proto_msgTypes[2] + mi := &file_docreader_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -259,7 +44,7 @@ func (x *ReadConfig) String() string { func (*ReadConfig) ProtoMessage() {} func (x *ReadConfig) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[2] + mi := &file_docreader_proto_msgTypes[0] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -272,78 +57,59 @@ func (x *ReadConfig) ProtoReflect() protoreflect.Message { // Deprecated: Use ReadConfig.ProtoReflect.Descriptor instead. func (*ReadConfig) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{2} + return file_docreader_proto_rawDescGZIP(), []int{0} } -func (x *ReadConfig) GetChunkSize() int32 { +func (x *ReadConfig) GetParserEngine() string { if x != nil { - return x.ChunkSize + return x.ParserEngine } - return 0 + return "" } -func (x *ReadConfig) GetChunkOverlap() int32 { +func (x *ReadConfig) GetParserEngineOverrides() map[string]string { if x != nil { - return x.ChunkOverlap - } - return 0 -} - -func (x *ReadConfig) GetSeparators() []string { - if x != nil { - return x.Separators + return x.ParserEngineOverrides } return nil } -func (x *ReadConfig) GetEnableMultimodal() bool { +func (x *ReadConfig) GetImageStorage() map[string]string { if x != nil { - return x.EnableMultimodal - } - return false -} - -func (x *ReadConfig) GetStorageConfig() *StorageConfig { - if x != nil { - return x.StorageConfig + return x.ImageStorage } return nil } -func (x *ReadConfig) GetVlmConfig() *VLMConfig { - if x != nil { - return x.VlmConfig - } - return nil -} - -// 从文件读取文档请求 -type ReadFromFileRequest struct { +// Unified read request: set file_content for file mode, url for URL mode. +type ReadRequest struct { state protoimpl.MessageState `protogen:"open.v1"` - FileContent []byte `protobuf:"bytes,1,opt,name=file_content,json=fileContent,proto3" json:"file_content,omitempty"` // 文件内容 - FileName string `protobuf:"bytes,2,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"` // 文件名 - FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"` // 文件类型 - ReadConfig *ReadConfig `protobuf:"bytes,4,opt,name=read_config,json=readConfig,proto3" json:"read_config,omitempty"` - RequestId string `protobuf:"bytes,5,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` + FileContent []byte `protobuf:"bytes,1,opt,name=file_content,json=fileContent,proto3" json:"file_content,omitempty"` + FileName string `protobuf:"bytes,2,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"` + FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"` + Url string `protobuf:"bytes,4,opt,name=url,proto3" json:"url,omitempty"` + Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"` + Config *ReadConfig `protobuf:"bytes,6,opt,name=config,proto3" json:"config,omitempty"` + RequestId string `protobuf:"bytes,7,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } -func (x *ReadFromFileRequest) Reset() { - *x = ReadFromFileRequest{} - mi := &file_docreader_proto_msgTypes[3] +func (x *ReadRequest) Reset() { + *x = ReadRequest{} + mi := &file_docreader_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } -func (x *ReadFromFileRequest) String() string { +func (x *ReadRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*ReadFromFileRequest) ProtoMessage() {} +func (*ReadRequest) ProtoMessage() {} -func (x *ReadFromFileRequest) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[3] +func (x *ReadRequest) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[1] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -354,143 +120,86 @@ func (x *ReadFromFileRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use ReadFromFileRequest.ProtoReflect.Descriptor instead. -func (*ReadFromFileRequest) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{3} +// Deprecated: Use ReadRequest.ProtoReflect.Descriptor instead. +func (*ReadRequest) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{1} } -func (x *ReadFromFileRequest) GetFileContent() []byte { +func (x *ReadRequest) GetFileContent() []byte { if x != nil { return x.FileContent } return nil } -func (x *ReadFromFileRequest) GetFileName() string { +func (x *ReadRequest) GetFileName() string { if x != nil { return x.FileName } return "" } -func (x *ReadFromFileRequest) GetFileType() string { +func (x *ReadRequest) GetFileType() string { if x != nil { return x.FileType } return "" } -func (x *ReadFromFileRequest) GetReadConfig() *ReadConfig { - if x != nil { - return x.ReadConfig - } - return nil -} - -func (x *ReadFromFileRequest) GetRequestId() string { - if x != nil { - return x.RequestId - } - return "" -} - -// 从URL读取文档请求 -type ReadFromURLRequest struct { - state protoimpl.MessageState `protogen:"open.v1"` - Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` // 文档URL - Title string `protobuf:"bytes,2,opt,name=title,proto3" json:"title,omitempty"` // 标题 - ReadConfig *ReadConfig `protobuf:"bytes,3,opt,name=read_config,json=readConfig,proto3" json:"read_config,omitempty"` - RequestId string `protobuf:"bytes,4,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *ReadFromURLRequest) Reset() { - *x = ReadFromURLRequest{} - mi := &file_docreader_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *ReadFromURLRequest) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*ReadFromURLRequest) ProtoMessage() {} - -func (x *ReadFromURLRequest) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[4] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use ReadFromURLRequest.ProtoReflect.Descriptor instead. -func (*ReadFromURLRequest) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{4} -} - -func (x *ReadFromURLRequest) GetUrl() string { +func (x *ReadRequest) GetUrl() string { if x != nil { return x.Url } return "" } -func (x *ReadFromURLRequest) GetTitle() string { +func (x *ReadRequest) GetTitle() string { if x != nil { return x.Title } return "" } -func (x *ReadFromURLRequest) GetReadConfig() *ReadConfig { +func (x *ReadRequest) GetConfig() *ReadConfig { if x != nil { - return x.ReadConfig + return x.Config } return nil } -func (x *ReadFromURLRequest) GetRequestId() string { +func (x *ReadRequest) GetRequestId() string { if x != nil { return x.RequestId } return "" } -// 图片信息 -type Image struct { +type ImageRef struct { state protoimpl.MessageState `protogen:"open.v1"` - Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` // 图片URL - Caption string `protobuf:"bytes,2,opt,name=caption,proto3" json:"caption,omitempty"` // 图片描述 - OcrText string `protobuf:"bytes,3,opt,name=ocr_text,json=ocrText,proto3" json:"ocr_text,omitempty"` // OCR提取的文本内容 - OriginalUrl string `protobuf:"bytes,4,opt,name=original_url,json=originalUrl,proto3" json:"original_url,omitempty"` // 原始图片URL - Start int32 `protobuf:"varint,5,opt,name=start,proto3" json:"start,omitempty"` // 图片在文本中的开始位置 - End int32 `protobuf:"varint,6,opt,name=end,proto3" json:"end,omitempty"` // 图片在文本中的结束位置 + Filename string `protobuf:"bytes,1,opt,name=filename,proto3" json:"filename,omitempty"` + OriginalRef string `protobuf:"bytes,2,opt,name=original_ref,json=originalRef,proto3" json:"original_ref,omitempty"` + MimeType string `protobuf:"bytes,3,opt,name=mime_type,json=mimeType,proto3" json:"mime_type,omitempty"` + StorageKey string `protobuf:"bytes,4,opt,name=storage_key,json=storageKey,proto3" json:"storage_key,omitempty"` // download URL from shared storage + ImageData []byte `protobuf:"bytes,5,opt,name=image_data,json=imageData,proto3" json:"image_data,omitempty"` // inline bytes fallback unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } -func (x *Image) Reset() { - *x = Image{} - mi := &file_docreader_proto_msgTypes[5] +func (x *ImageRef) Reset() { + *x = ImageRef{} + mi := &file_docreader_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } -func (x *Image) String() string { +func (x *ImageRef) String() string { return protoimpl.X.MessageStringOf(x) } -func (*Image) ProtoMessage() {} +func (*ImageRef) ProtoMessage() {} -func (x *Image) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[5] +func (x *ImageRef) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -501,141 +210,60 @@ func (x *Image) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use Image.ProtoReflect.Descriptor instead. -func (*Image) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{5} +// Deprecated: Use ImageRef.ProtoReflect.Descriptor instead. +func (*ImageRef) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{2} } -func (x *Image) GetUrl() string { +func (x *ImageRef) GetFilename() string { if x != nil { - return x.Url + return x.Filename } return "" } -func (x *Image) GetCaption() string { +func (x *ImageRef) GetOriginalRef() string { if x != nil { - return x.Caption + return x.OriginalRef } return "" } -func (x *Image) GetOcrText() string { +func (x *ImageRef) GetMimeType() string { if x != nil { - return x.OcrText + return x.MimeType } return "" } -func (x *Image) GetOriginalUrl() string { +func (x *ImageRef) GetStorageKey() string { if x != nil { - return x.OriginalUrl + return x.StorageKey } return "" } -func (x *Image) GetStart() int32 { +func (x *ImageRef) GetImageData() []byte { if x != nil { - return x.Start - } - return 0 -} - -func (x *Image) GetEnd() int32 { - if x != nil { - return x.End - } - return 0 -} - -type Chunk struct { - state protoimpl.MessageState `protogen:"open.v1"` - Content string `protobuf:"bytes,1,opt,name=content,proto3" json:"content,omitempty"` // 块内容 - Seq int32 `protobuf:"varint,2,opt,name=seq,proto3" json:"seq,omitempty"` // 块在文档中的次序 - Start int32 `protobuf:"varint,3,opt,name=start,proto3" json:"start,omitempty"` // 块在文档中的起始位置 - End int32 `protobuf:"varint,4,opt,name=end,proto3" json:"end,omitempty"` // 块在文档中的结束位置 - Images []*Image `protobuf:"bytes,5,rep,name=images,proto3" json:"images,omitempty"` // 块中包含的图片信息 - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *Chunk) Reset() { - *x = Chunk{} - mi := &file_docreader_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *Chunk) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Chunk) ProtoMessage() {} - -func (x *Chunk) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[6] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Chunk.ProtoReflect.Descriptor instead. -func (*Chunk) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{6} -} - -func (x *Chunk) GetContent() string { - if x != nil { - return x.Content - } - return "" -} - -func (x *Chunk) GetSeq() int32 { - if x != nil { - return x.Seq - } - return 0 -} - -func (x *Chunk) GetStart() int32 { - if x != nil { - return x.Start - } - return 0 -} - -func (x *Chunk) GetEnd() int32 { - if x != nil { - return x.End - } - return 0 -} - -func (x *Chunk) GetImages() []*Image { - if x != nil { - return x.Images + return x.ImageData } return nil } -// 从URL读取文档响应 type ReadResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Chunks []*Chunk `protobuf:"bytes,1,rep,name=chunks,proto3" json:"chunks,omitempty"` // 文档分块 - Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` // 错误信息 - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + MarkdownContent string `protobuf:"bytes,1,opt,name=markdown_content,json=markdownContent,proto3" json:"markdown_content,omitempty"` + ImageRefs []*ImageRef `protobuf:"bytes,2,rep,name=image_refs,json=imageRefs,proto3" json:"image_refs,omitempty"` + ImageDirPath string `protobuf:"bytes,3,opt,name=image_dir_path,json=imageDirPath,proto3" json:"image_dir_path,omitempty"` + Metadata map[string]string `protobuf:"bytes,4,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + Error string `protobuf:"bytes,5,opt,name=error,proto3" json:"error,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ReadResponse) Reset() { *x = ReadResponse{} - mi := &file_docreader_proto_msgTypes[7] + mi := &file_docreader_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -647,7 +275,7 @@ func (x *ReadResponse) String() string { func (*ReadResponse) ProtoMessage() {} func (x *ReadResponse) ProtoReflect() protoreflect.Message { - mi := &file_docreader_proto_msgTypes[7] + mi := &file_docreader_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -660,12 +288,33 @@ func (x *ReadResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use ReadResponse.ProtoReflect.Descriptor instead. func (*ReadResponse) Descriptor() ([]byte, []int) { - return file_docreader_proto_rawDescGZIP(), []int{7} + return file_docreader_proto_rawDescGZIP(), []int{3} } -func (x *ReadResponse) GetChunks() []*Chunk { +func (x *ReadResponse) GetMarkdownContent() string { if x != nil { - return x.Chunks + return x.MarkdownContent + } + return "" +} + +func (x *ReadResponse) GetImageRefs() []*ImageRef { + if x != nil { + return x.ImageRefs + } + return nil +} + +func (x *ReadResponse) GetImageDirPath() string { + if x != nil { + return x.ImageDirPath + } + return "" +} + +func (x *ReadResponse) GetMetadata() map[string]string { + if x != nil { + return x.Metadata } return nil } @@ -677,77 +326,351 @@ func (x *ReadResponse) GetError() string { return "" } +type ListEnginesRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + ConfigOverrides map[string]string `protobuf:"bytes,1,rep,name=config_overrides,json=configOverrides,proto3" json:"config_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ListEnginesRequest) Reset() { + *x = ListEnginesRequest{} + mi := &file_docreader_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ListEnginesRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ListEnginesRequest) ProtoMessage() {} + +func (x *ListEnginesRequest) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ListEnginesRequest.ProtoReflect.Descriptor instead. +func (*ListEnginesRequest) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{4} +} + +func (x *ListEnginesRequest) GetConfigOverrides() map[string]string { + if x != nil { + return x.ConfigOverrides + } + return nil +} + +type ParserEngineInfo struct { + state protoimpl.MessageState `protogen:"open.v1"` + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Description string `protobuf:"bytes,2,opt,name=description,proto3" json:"description,omitempty"` + FileTypes []string `protobuf:"bytes,3,rep,name=file_types,json=fileTypes,proto3" json:"file_types,omitempty"` + Available bool `protobuf:"varint,4,opt,name=available,proto3" json:"available,omitempty"` + UnavailableReason string `protobuf:"bytes,5,opt,name=unavailable_reason,json=unavailableReason,proto3" json:"unavailable_reason,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ParserEngineInfo) Reset() { + *x = ParserEngineInfo{} + mi := &file_docreader_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ParserEngineInfo) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ParserEngineInfo) ProtoMessage() {} + +func (x *ParserEngineInfo) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ParserEngineInfo.ProtoReflect.Descriptor instead. +func (*ParserEngineInfo) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{5} +} + +func (x *ParserEngineInfo) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *ParserEngineInfo) GetDescription() string { + if x != nil { + return x.Description + } + return "" +} + +func (x *ParserEngineInfo) GetFileTypes() []string { + if x != nil { + return x.FileTypes + } + return nil +} + +func (x *ParserEngineInfo) GetAvailable() bool { + if x != nil { + return x.Available + } + return false +} + +func (x *ParserEngineInfo) GetUnavailableReason() string { + if x != nil { + return x.UnavailableReason + } + return "" +} + +type ListEnginesResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Engines []*ParserEngineInfo `protobuf:"bytes,1,rep,name=engines,proto3" json:"engines,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ListEnginesResponse) Reset() { + *x = ListEnginesResponse{} + mi := &file_docreader_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ListEnginesResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ListEnginesResponse) ProtoMessage() {} + +func (x *ListEnginesResponse) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ListEnginesResponse.ProtoReflect.Descriptor instead. +func (*ListEnginesResponse) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{6} +} + +func (x *ListEnginesResponse) GetEngines() []*ParserEngineInfo { + if x != nil { + return x.Engines + } + return nil +} + +type ConvertToPDFRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + FileContent []byte `protobuf:"bytes,1,opt,name=file_content,json=fileContent,proto3" json:"file_content,omitempty"` + FileName string `protobuf:"bytes,2,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"` + FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ConvertToPDFRequest) Reset() { + *x = ConvertToPDFRequest{} + mi := &file_docreader_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ConvertToPDFRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ConvertToPDFRequest) ProtoMessage() {} + +func (x *ConvertToPDFRequest) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[7] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ConvertToPDFRequest.ProtoReflect.Descriptor instead. +func (*ConvertToPDFRequest) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{7} +} + +func (x *ConvertToPDFRequest) GetFileContent() []byte { + if x != nil { + return x.FileContent + } + return nil +} + +func (x *ConvertToPDFRequest) GetFileName() string { + if x != nil { + return x.FileName + } + return "" +} + +func (x *ConvertToPDFRequest) GetFileType() string { + if x != nil { + return x.FileType + } + return "" +} + +type ConvertToPDFResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + PdfContent []byte `protobuf:"bytes,1,opt,name=pdf_content,json=pdfContent,proto3" json:"pdf_content,omitempty"` + Error string `protobuf:"bytes,2,opt,name=error,proto3" json:"error,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ConvertToPDFResponse) Reset() { + *x = ConvertToPDFResponse{} + mi := &file_docreader_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ConvertToPDFResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ConvertToPDFResponse) ProtoMessage() {} + +func (x *ConvertToPDFResponse) ProtoReflect() protoreflect.Message { + mi := &file_docreader_proto_msgTypes[8] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ConvertToPDFResponse.ProtoReflect.Descriptor instead. +func (*ConvertToPDFResponse) Descriptor() ([]byte, []int) { + return file_docreader_proto_rawDescGZIP(), []int{8} +} + +func (x *ConvertToPDFResponse) GetPdfContent() []byte { + if x != nil { + return x.PdfContent + } + return nil +} + +func (x *ConvertToPDFResponse) GetError() string { + if x != nil { + return x.Error + } + return "" +} + var File_docreader_proto protoreflect.FileDescriptor const file_docreader_proto_rawDesc = "" + "\n" + - "\x0fdocreader.proto\x12\tdocreader\"\x88\x02\n" + - "\rStorageConfig\x126\n" + - "\bprovider\x18\x01 \x01(\x0e2\x1a.docreader.StorageProviderR\bprovider\x12\x16\n" + - "\x06region\x18\x02 \x01(\tR\x06region\x12\x1f\n" + - "\vbucket_name\x18\x03 \x01(\tR\n" + - "bucketName\x12\"\n" + - "\raccess_key_id\x18\x04 \x01(\tR\vaccessKeyId\x12*\n" + - "\x11secret_access_key\x18\x05 \x01(\tR\x0fsecretAccessKey\x12\x15\n" + - "\x06app_id\x18\x06 \x01(\tR\x05appId\x12\x1f\n" + - "\vpath_prefix\x18\a \x01(\tR\n" + - "pathPrefix\"\x85\x01\n" + - "\tVLMConfig\x12\x1d\n" + + "\x0fdocreader.proto\x12\tdocreader\"\xf4\x02\n" + "\n" + - "model_name\x18\x01 \x01(\tR\tmodelName\x12\x19\n" + - "\bbase_url\x18\x02 \x01(\tR\abaseUrl\x12\x17\n" + - "\aapi_key\x18\x03 \x01(\tR\x06apiKey\x12%\n" + - "\x0einterface_type\x18\x04 \x01(\tR\rinterfaceType\"\x93\x02\n" + - "\n" + - "ReadConfig\x12\x1d\n" + - "\n" + - "chunk_size\x18\x01 \x01(\x05R\tchunkSize\x12#\n" + - "\rchunk_overlap\x18\x02 \x01(\x05R\fchunkOverlap\x12\x1e\n" + - "\n" + - "separators\x18\x03 \x03(\tR\n" + - "separators\x12+\n" + - "\x11enable_multimodal\x18\x04 \x01(\bR\x10enableMultimodal\x12?\n" + - "\x0estorage_config\x18\x05 \x01(\v2\x18.docreader.StorageConfigR\rstorageConfig\x123\n" + - "\n" + - "vlm_config\x18\x06 \x01(\v2\x14.docreader.VLMConfigR\tvlmConfig\"\xc9\x01\n" + - "\x13ReadFromFileRequest\x12!\n" + + "ReadConfig\x12#\n" + + "\rparser_engine\x18\x01 \x01(\tR\fparserEngine\x12h\n" + + "\x17parser_engine_overrides\x18\x02 \x03(\v20.docreader.ReadConfig.ParserEngineOverridesEntryR\x15parserEngineOverrides\x12L\n" + + "\rimage_storage\x18\x03 \x03(\v2'.docreader.ReadConfig.ImageStorageEntryR\fimageStorage\x1aH\n" + + "\x1aParserEngineOverridesEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\x1a?\n" + + "\x11ImageStorageEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xe0\x01\n" + + "\vReadRequest\x12!\n" + "\ffile_content\x18\x01 \x01(\fR\vfileContent\x12\x1b\n" + "\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" + - "\tfile_type\x18\x03 \x01(\tR\bfileType\x126\n" + - "\vread_config\x18\x04 \x01(\v2\x15.docreader.ReadConfigR\n" + - "readConfig\x12\x1d\n" + + "\tfile_type\x18\x03 \x01(\tR\bfileType\x12\x10\n" + + "\x03url\x18\x04 \x01(\tR\x03url\x12\x14\n" + + "\x05title\x18\x05 \x01(\tR\x05title\x12-\n" + + "\x06config\x18\x06 \x01(\v2\x15.docreader.ReadConfigR\x06config\x12\x1d\n" + "\n" + - "request_id\x18\x05 \x01(\tR\trequestId\"\x93\x01\n" + - "\x12ReadFromURLRequest\x12\x10\n" + - "\x03url\x18\x01 \x01(\tR\x03url\x12\x14\n" + - "\x05title\x18\x02 \x01(\tR\x05title\x126\n" + - "\vread_config\x18\x03 \x01(\v2\x15.docreader.ReadConfigR\n" + - "readConfig\x12\x1d\n" + + "request_id\x18\a \x01(\tR\trequestId\"\xa6\x01\n" + + "\bImageRef\x12\x1a\n" + + "\bfilename\x18\x01 \x01(\tR\bfilename\x12!\n" + + "\foriginal_ref\x18\x02 \x01(\tR\voriginalRef\x12\x1b\n" + + "\tmime_type\x18\x03 \x01(\tR\bmimeType\x12\x1f\n" + + "\vstorage_key\x18\x04 \x01(\tR\n" + + "storageKey\x12\x1d\n" + "\n" + - "request_id\x18\x04 \x01(\tR\trequestId\"\x99\x01\n" + - "\x05Image\x12\x10\n" + - "\x03url\x18\x01 \x01(\tR\x03url\x12\x18\n" + - "\acaption\x18\x02 \x01(\tR\acaption\x12\x19\n" + - "\bocr_text\x18\x03 \x01(\tR\aocrText\x12!\n" + - "\foriginal_url\x18\x04 \x01(\tR\voriginalUrl\x12\x14\n" + - "\x05start\x18\x05 \x01(\x05R\x05start\x12\x10\n" + - "\x03end\x18\x06 \x01(\x05R\x03end\"\x85\x01\n" + - "\x05Chunk\x12\x18\n" + - "\acontent\x18\x01 \x01(\tR\acontent\x12\x10\n" + - "\x03seq\x18\x02 \x01(\x05R\x03seq\x12\x14\n" + - "\x05start\x18\x03 \x01(\x05R\x05start\x12\x10\n" + - "\x03end\x18\x04 \x01(\x05R\x03end\x12(\n" + - "\x06images\x18\x05 \x03(\v2\x10.docreader.ImageR\x06images\"N\n" + - "\fReadResponse\x12(\n" + - "\x06chunks\x18\x01 \x03(\v2\x10.docreader.ChunkR\x06chunks\x12\x14\n" + - "\x05error\x18\x02 \x01(\tR\x05error*G\n" + - "\x0fStorageProvider\x12 \n" + - "\x1cSTORAGE_PROVIDER_UNSPECIFIED\x10\x00\x12\a\n" + - "\x03COS\x10\x01\x12\t\n" + - "\x05MINIO\x10\x022\x9f\x01\n" + - "\tDocReader\x12I\n" + - "\fReadFromFile\x12\x1e.docreader.ReadFromFileRequest\x1a\x17.docreader.ReadResponse\"\x00\x12G\n" + - "\vReadFromURL\x12\x1d.docreader.ReadFromURLRequest\x1a\x17.docreader.ReadResponse\"\x00B5Z3github.com/Tencent/WeKnora/internal/docreader/protob\x06proto3" + "image_data\x18\x05 \x01(\fR\timageData\"\xa9\x02\n" + + "\fReadResponse\x12)\n" + + "\x10markdown_content\x18\x01 \x01(\tR\x0fmarkdownContent\x122\n" + + "\n" + + "image_refs\x18\x02 \x03(\v2\x13.docreader.ImageRefR\timageRefs\x12$\n" + + "\x0eimage_dir_path\x18\x03 \x01(\tR\fimageDirPath\x12A\n" + + "\bmetadata\x18\x04 \x03(\v2%.docreader.ReadResponse.MetadataEntryR\bmetadata\x12\x14\n" + + "\x05error\x18\x05 \x01(\tR\x05error\x1a;\n" + + "\rMetadataEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xb7\x01\n" + + "\x12ListEnginesRequest\x12]\n" + + "\x10config_overrides\x18\x01 \x03(\v22.docreader.ListEnginesRequest.ConfigOverridesEntryR\x0fconfigOverrides\x1aB\n" + + "\x14ConfigOverridesEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xb4\x01\n" + + "\x10ParserEngineInfo\x12\x12\n" + + "\x04name\x18\x01 \x01(\tR\x04name\x12 \n" + + "\vdescription\x18\x02 \x01(\tR\vdescription\x12\x1d\n" + + "\n" + + "file_types\x18\x03 \x03(\tR\tfileTypes\x12\x1c\n" + + "\tavailable\x18\x04 \x01(\bR\tavailable\x12-\n" + + "\x12unavailable_reason\x18\x05 \x01(\tR\x11unavailableReason\"L\n" + + "\x13ListEnginesResponse\x125\n" + + "\aengines\x18\x01 \x03(\v2\x1b.docreader.ParserEngineInfoR\aengines\"r\n" + + "\x13ConvertToPDFRequest\x12!\n" + + "\ffile_content\x18\x01 \x01(\fR\vfileContent\x12\x1b\n" + + "\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" + + "\tfile_type\x18\x03 \x01(\tR\bfileType\"M\n" + + "\x14ConvertToPDFResponse\x12\x1f\n" + + "\vpdf_content\x18\x01 \x01(\fR\n" + + "pdfContent\x12\x14\n" + + "\x05error\x18\x02 \x01(\tR\x05error2\xe9\x01\n" + + "\tDocReader\x129\n" + + "\x04Read\x12\x16.docreader.ReadRequest\x1a\x17.docreader.ReadResponse\"\x00\x12N\n" + + "\vListEngines\x12\x1d.docreader.ListEnginesRequest\x1a\x1e.docreader.ListEnginesResponse\"\x00\x12Q\n" + + "\fConvertToPDF\x12\x1e.docreader.ConvertToPDFRequest\x1a\x1f.docreader.ConvertToPDFResponse\"\x00B5Z3github.com/Tencent/WeKnora/internal/docreader/protob\x06proto3" var ( file_docreader_proto_rawDescOnce sync.Once @@ -761,36 +684,41 @@ func file_docreader_proto_rawDescGZIP() []byte { return file_docreader_proto_rawDescData } -var file_docreader_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_docreader_proto_msgTypes = make([]protoimpl.MessageInfo, 8) +var file_docreader_proto_msgTypes = make([]protoimpl.MessageInfo, 13) var file_docreader_proto_goTypes = []any{ - (StorageProvider)(0), // 0: docreader.StorageProvider - (*StorageConfig)(nil), // 1: docreader.StorageConfig - (*VLMConfig)(nil), // 2: docreader.VLMConfig - (*ReadConfig)(nil), // 3: docreader.ReadConfig - (*ReadFromFileRequest)(nil), // 4: docreader.ReadFromFileRequest - (*ReadFromURLRequest)(nil), // 5: docreader.ReadFromURLRequest - (*Image)(nil), // 6: docreader.Image - (*Chunk)(nil), // 7: docreader.Chunk - (*ReadResponse)(nil), // 8: docreader.ReadResponse + (*ReadConfig)(nil), // 0: docreader.ReadConfig + (*ReadRequest)(nil), // 1: docreader.ReadRequest + (*ImageRef)(nil), // 2: docreader.ImageRef + (*ReadResponse)(nil), // 3: docreader.ReadResponse + (*ListEnginesRequest)(nil), // 4: docreader.ListEnginesRequest + (*ParserEngineInfo)(nil), // 5: docreader.ParserEngineInfo + (*ListEnginesResponse)(nil), // 6: docreader.ListEnginesResponse + (*ConvertToPDFRequest)(nil), // 7: docreader.ConvertToPDFRequest + (*ConvertToPDFResponse)(nil), // 8: docreader.ConvertToPDFResponse + nil, // 9: docreader.ReadConfig.ParserEngineOverridesEntry + nil, // 10: docreader.ReadConfig.ImageStorageEntry + nil, // 11: docreader.ReadResponse.MetadataEntry + nil, // 12: docreader.ListEnginesRequest.ConfigOverridesEntry } var file_docreader_proto_depIdxs = []int32{ - 0, // 0: docreader.StorageConfig.provider:type_name -> docreader.StorageProvider - 1, // 1: docreader.ReadConfig.storage_config:type_name -> docreader.StorageConfig - 2, // 2: docreader.ReadConfig.vlm_config:type_name -> docreader.VLMConfig - 3, // 3: docreader.ReadFromFileRequest.read_config:type_name -> docreader.ReadConfig - 3, // 4: docreader.ReadFromURLRequest.read_config:type_name -> docreader.ReadConfig - 6, // 5: docreader.Chunk.images:type_name -> docreader.Image - 7, // 6: docreader.ReadResponse.chunks:type_name -> docreader.Chunk - 4, // 7: docreader.DocReader.ReadFromFile:input_type -> docreader.ReadFromFileRequest - 5, // 8: docreader.DocReader.ReadFromURL:input_type -> docreader.ReadFromURLRequest - 8, // 9: docreader.DocReader.ReadFromFile:output_type -> docreader.ReadResponse - 8, // 10: docreader.DocReader.ReadFromURL:output_type -> docreader.ReadResponse - 9, // [9:11] is the sub-list for method output_type - 7, // [7:9] is the sub-list for method input_type - 7, // [7:7] is the sub-list for extension type_name - 7, // [7:7] is the sub-list for extension extendee - 0, // [0:7] is the sub-list for field type_name + 9, // 0: docreader.ReadConfig.parser_engine_overrides:type_name -> docreader.ReadConfig.ParserEngineOverridesEntry + 10, // 1: docreader.ReadConfig.image_storage:type_name -> docreader.ReadConfig.ImageStorageEntry + 0, // 2: docreader.ReadRequest.config:type_name -> docreader.ReadConfig + 2, // 3: docreader.ReadResponse.image_refs:type_name -> docreader.ImageRef + 11, // 4: docreader.ReadResponse.metadata:type_name -> docreader.ReadResponse.MetadataEntry + 12, // 5: docreader.ListEnginesRequest.config_overrides:type_name -> docreader.ListEnginesRequest.ConfigOverridesEntry + 5, // 6: docreader.ListEnginesResponse.engines:type_name -> docreader.ParserEngineInfo + 1, // 7: docreader.DocReader.Read:input_type -> docreader.ReadRequest + 4, // 8: docreader.DocReader.ListEngines:input_type -> docreader.ListEnginesRequest + 7, // 9: docreader.DocReader.ConvertToPDF:input_type -> docreader.ConvertToPDFRequest + 3, // 10: docreader.DocReader.Read:output_type -> docreader.ReadResponse + 6, // 11: docreader.DocReader.ListEngines:output_type -> docreader.ListEnginesResponse + 8, // 12: docreader.DocReader.ConvertToPDF:output_type -> docreader.ConvertToPDFResponse + 10, // [10:13] is the sub-list for method output_type + 7, // [7:10] is the sub-list for method input_type + 7, // [7:7] is the sub-list for extension type_name + 7, // [7:7] is the sub-list for extension extendee + 0, // [0:7] is the sub-list for field type_name } func init() { file_docreader_proto_init() } @@ -803,14 +731,13 @@ func file_docreader_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_docreader_proto_rawDesc), len(file_docreader_proto_rawDesc)), - NumEnums: 1, - NumMessages: 8, + NumEnums: 0, + NumMessages: 13, NumExtensions: 0, NumServices: 1, }, GoTypes: file_docreader_proto_goTypes, DependencyIndexes: file_docreader_proto_depIdxs, - EnumInfos: file_docreader_proto_enumTypes, MessageInfos: file_docreader_proto_msgTypes, }.Build() File_docreader_proto = out.File diff --git a/docreader/proto/docreader.proto b/docreader/proto/docreader.proto index 44ff4ace..89b7938a 100644 --- a/docreader/proto/docreader.proto +++ b/docreader/proto/docreader.proto @@ -4,86 +4,56 @@ package docreader; option go_package = "github.com/Tencent/WeKnora/internal/docreader/proto"; -// 文档读取服务 service DocReader { - // 从文件读取文档 - rpc ReadFromFile(ReadFromFileRequest) returns (ReadResponse) {} - // 从URL读取文档 - rpc ReadFromURL(ReadFromURLRequest) returns (ReadResponse) {} -} - -// 对象存储提供方 -enum StorageProvider { - STORAGE_PROVIDER_UNSPECIFIED = 0; - COS = 1; // 腾讯云 COS - MINIO = 2; // MinIO/S3 兼容 -} - -// 通用对象存储配置,兼容 COS 与 MinIO -message StorageConfig { - StorageProvider provider = 1; // 存储提供方 - string region = 2; // 区域(COS 使用) - string bucket_name = 3; // 桶名 - string access_key_id = 4; // 访问密钥 ID(MinIO/S3 使用) - string secret_access_key = 5; // 访问密钥 Secret(MinIO/S3 使用) - string app_id = 6; // 应用 ID(COS 使用) - string path_prefix = 7; // 路径前缀 -} - -// VLM 配置 -message VLMConfig { - string model_name = 1; // VLM Model Name - string base_url = 2; // VLM Base URL - string api_key = 3; // VLM API Key - string interface_type = 4; // VLM Interface Type: "ollama" or "openai" + rpc Read(ReadRequest) returns (ReadResponse) {} + rpc ListEngines(ListEnginesRequest) returns (ListEnginesResponse) {} } message ReadConfig { - int32 chunk_size = 1; // 分块大小 - int32 chunk_overlap = 2; // 分块重叠 - repeated string separators = 3; // 分隔符 - bool enable_multimodal = 4; // 多模态处理 - StorageConfig storage_config = 5; // 对象存储配置(通用) - VLMConfig vlm_config = 6; // VLM 配置 + string parser_engine = 1; + map parser_engine_overrides = 2; + map image_storage = 3; // storage provider config (provider, endpoint, bucket, ...) } -// 从文件读取文档请求 -message ReadFromFileRequest { - bytes file_content = 1; // 文件内容 - string file_name = 2; // 文件名 - string file_type = 3; // 文件类型 - ReadConfig read_config = 4; - string request_id = 5; +// Unified read request: set file_content for file mode, url for URL mode. +message ReadRequest { + bytes file_content = 1; + string file_name = 2; + string file_type = 3; + string url = 4; + string title = 5; + ReadConfig config = 6; + string request_id = 7; } -// 从URL读取文档请求 -message ReadFromURLRequest { - string url = 1; // 文档URL - string title = 2; // 标题 - ReadConfig read_config = 3; - string request_id = 4; +message ImageRef { + string filename = 1; + string original_ref = 2; + string mime_type = 3; + string storage_key = 4; // download URL from shared storage + bytes image_data = 5; // inline bytes fallback } -// 图片信息 -message Image { - string url = 1; // 图片URL - string caption = 2; // 图片描述 - string ocr_text = 3; // OCR提取的文本内容 - string original_url = 4; // 原始图片URL - int32 start = 5; // 图片在文本中的开始位置 - int32 end = 6; // 图片在文本中的结束位置 -} - -message Chunk { - string content = 1; // 块内容 - int32 seq = 2; // 块在文档中的次序 - int32 start = 3; // 块在文档中的起始位置 - int32 end = 4; // 块在文档中的结束位置 - repeated Image images = 5; // 块中包含的图片信息 -} - -// 从URL读取文档响应 message ReadResponse { - repeated Chunk chunks = 1; // 文档分块 - string error = 2; // 错误信息 -} \ No newline at end of file + string markdown_content = 1; + repeated ImageRef image_refs = 2; + string image_dir_path = 3; + map metadata = 4; + string error = 5; +} + +message ListEnginesRequest { + map config_overrides = 1; +} + +message ParserEngineInfo { + string name = 1; + string description = 2; + repeated string file_types = 3; + bool available = 4; + string unavailable_reason = 5; +} + +message ListEnginesResponse { + repeated ParserEngineInfo engines = 1; +} diff --git a/docreader/proto/docreader_grpc.pb.go b/docreader/proto/docreader_grpc.pb.go index 79bd7ef3..754854a5 100644 --- a/docreader/proto/docreader_grpc.pb.go +++ b/docreader/proto/docreader_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.5.1 -// - protoc v5.29.3 +// - protoc v6.33.4 // source: docreader.proto package proto @@ -19,20 +19,18 @@ import ( const _ = grpc.SupportPackageIsVersion9 const ( - DocReader_ReadFromFile_FullMethodName = "/docreader.DocReader/ReadFromFile" - DocReader_ReadFromURL_FullMethodName = "/docreader.DocReader/ReadFromURL" + DocReader_Read_FullMethodName = "/docreader.DocReader/Read" + DocReader_ListEngines_FullMethodName = "/docreader.DocReader/ListEngines" + DocReader_ConvertToPDF_FullMethodName = "/docreader.DocReader/ConvertToPDF" ) // DocReaderClient is the client API for DocReader service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -// -// 文档读取服务 type DocReaderClient interface { - // 从文件读取文档 - ReadFromFile(ctx context.Context, in *ReadFromFileRequest, opts ...grpc.CallOption) (*ReadResponse, error) - // 从URL读取文档 - ReadFromURL(ctx context.Context, in *ReadFromURLRequest, opts ...grpc.CallOption) (*ReadResponse, error) + Read(ctx context.Context, in *ReadRequest, opts ...grpc.CallOption) (*ReadResponse, error) + ListEngines(ctx context.Context, in *ListEnginesRequest, opts ...grpc.CallOption) (*ListEnginesResponse, error) + ConvertToPDF(ctx context.Context, in *ConvertToPDFRequest, opts ...grpc.CallOption) (*ConvertToPDFResponse, error) } type docReaderClient struct { @@ -43,20 +41,30 @@ func NewDocReaderClient(cc grpc.ClientConnInterface) DocReaderClient { return &docReaderClient{cc} } -func (c *docReaderClient) ReadFromFile(ctx context.Context, in *ReadFromFileRequest, opts ...grpc.CallOption) (*ReadResponse, error) { +func (c *docReaderClient) Read(ctx context.Context, in *ReadRequest, opts ...grpc.CallOption) (*ReadResponse, error) { cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) out := new(ReadResponse) - err := c.cc.Invoke(ctx, DocReader_ReadFromFile_FullMethodName, in, out, cOpts...) + err := c.cc.Invoke(ctx, DocReader_Read_FullMethodName, in, out, cOpts...) if err != nil { return nil, err } return out, nil } -func (c *docReaderClient) ReadFromURL(ctx context.Context, in *ReadFromURLRequest, opts ...grpc.CallOption) (*ReadResponse, error) { +func (c *docReaderClient) ListEngines(ctx context.Context, in *ListEnginesRequest, opts ...grpc.CallOption) (*ListEnginesResponse, error) { cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) - out := new(ReadResponse) - err := c.cc.Invoke(ctx, DocReader_ReadFromURL_FullMethodName, in, out, cOpts...) + out := new(ListEnginesResponse) + err := c.cc.Invoke(ctx, DocReader_ListEngines_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *docReaderClient) ConvertToPDF(ctx context.Context, in *ConvertToPDFRequest, opts ...grpc.CallOption) (*ConvertToPDFResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(ConvertToPDFResponse) + err := c.cc.Invoke(ctx, DocReader_ConvertToPDF_FullMethodName, in, out, cOpts...) if err != nil { return nil, err } @@ -66,13 +74,10 @@ func (c *docReaderClient) ReadFromURL(ctx context.Context, in *ReadFromURLReques // DocReaderServer is the server API for DocReader service. // All implementations must embed UnimplementedDocReaderServer // for forward compatibility. -// -// 文档读取服务 type DocReaderServer interface { - // 从文件读取文档 - ReadFromFile(context.Context, *ReadFromFileRequest) (*ReadResponse, error) - // 从URL读取文档 - ReadFromURL(context.Context, *ReadFromURLRequest) (*ReadResponse, error) + Read(context.Context, *ReadRequest) (*ReadResponse, error) + ListEngines(context.Context, *ListEnginesRequest) (*ListEnginesResponse, error) + ConvertToPDF(context.Context, *ConvertToPDFRequest) (*ConvertToPDFResponse, error) mustEmbedUnimplementedDocReaderServer() } @@ -83,11 +88,14 @@ type DocReaderServer interface { // pointer dereference when methods are called. type UnimplementedDocReaderServer struct{} -func (UnimplementedDocReaderServer) ReadFromFile(context.Context, *ReadFromFileRequest) (*ReadResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method ReadFromFile not implemented") +func (UnimplementedDocReaderServer) Read(context.Context, *ReadRequest) (*ReadResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Read not implemented") } -func (UnimplementedDocReaderServer) ReadFromURL(context.Context, *ReadFromURLRequest) (*ReadResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method ReadFromURL not implemented") +func (UnimplementedDocReaderServer) ListEngines(context.Context, *ListEnginesRequest) (*ListEnginesResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ListEngines not implemented") +} +func (UnimplementedDocReaderServer) ConvertToPDF(context.Context, *ConvertToPDFRequest) (*ConvertToPDFResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ConvertToPDF not implemented") } func (UnimplementedDocReaderServer) mustEmbedUnimplementedDocReaderServer() {} func (UnimplementedDocReaderServer) testEmbeddedByValue() {} @@ -110,38 +118,56 @@ func RegisterDocReaderServer(s grpc.ServiceRegistrar, srv DocReaderServer) { s.RegisterService(&DocReader_ServiceDesc, srv) } -func _DocReader_ReadFromFile_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ReadFromFileRequest) +func _DocReader_Read_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ReadRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(DocReaderServer).ReadFromFile(ctx, in) + return srv.(DocReaderServer).Read(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: DocReader_ReadFromFile_FullMethodName, + FullMethod: DocReader_Read_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(DocReaderServer).ReadFromFile(ctx, req.(*ReadFromFileRequest)) + return srv.(DocReaderServer).Read(ctx, req.(*ReadRequest)) } return interceptor(ctx, in, info, handler) } -func _DocReader_ReadFromURL_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ReadFromURLRequest) +func _DocReader_ListEngines_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ListEnginesRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(DocReaderServer).ReadFromURL(ctx, in) + return srv.(DocReaderServer).ListEngines(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: DocReader_ReadFromURL_FullMethodName, + FullMethod: DocReader_ListEngines_FullMethodName, } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(DocReaderServer).ReadFromURL(ctx, req.(*ReadFromURLRequest)) + return srv.(DocReaderServer).ListEngines(ctx, req.(*ListEnginesRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _DocReader_ConvertToPDF_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ConvertToPDFRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(DocReaderServer).ConvertToPDF(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: DocReader_ConvertToPDF_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(DocReaderServer).ConvertToPDF(ctx, req.(*ConvertToPDFRequest)) } return interceptor(ctx, in, info, handler) } @@ -154,12 +180,16 @@ var DocReader_ServiceDesc = grpc.ServiceDesc{ HandlerType: (*DocReaderServer)(nil), Methods: []grpc.MethodDesc{ { - MethodName: "ReadFromFile", - Handler: _DocReader_ReadFromFile_Handler, + MethodName: "Read", + Handler: _DocReader_Read_Handler, }, { - MethodName: "ReadFromURL", - Handler: _DocReader_ReadFromURL_Handler, + MethodName: "ListEngines", + Handler: _DocReader_ListEngines_Handler, + }, + { + MethodName: "ConvertToPDF", + Handler: _DocReader_ConvertToPDF_Handler, }, }, Streams: []grpc.StreamDesc{}, diff --git a/docreader/proto/docreader_pb2.py b/docreader/proto/docreader_pb2.py index 633d4a0d..59f2c320 100644 --- a/docreader/proto/docreader_pb2.py +++ b/docreader/proto/docreader_pb2.py @@ -24,7 +24,7 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x64ocreader.proto\x12\tdocreader\"\xb9\x01\n\rStorageConfig\x12,\n\x08provider\x18\x01 \x01(\x0e\x32\x1a.docreader.StorageProvider\x12\x0e\n\x06region\x18\x02 \x01(\t\x12\x13\n\x0b\x62ucket_name\x18\x03 \x01(\t\x12\x15\n\raccess_key_id\x18\x04 \x01(\t\x12\x19\n\x11secret_access_key\x18\x05 \x01(\t\x12\x0e\n\x06\x61pp_id\x18\x06 \x01(\t\x12\x13\n\x0bpath_prefix\x18\x07 \x01(\t\"Z\n\tVLMConfig\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x10\n\x08\x62\x61se_url\x18\x02 \x01(\t\x12\x0f\n\x07\x61pi_key\x18\x03 \x01(\t\x12\x16\n\x0einterface_type\x18\x04 \x01(\t\"\xc2\x01\n\nReadConfig\x12\x12\n\nchunk_size\x18\x01 \x01(\x05\x12\x15\n\rchunk_overlap\x18\x02 \x01(\x05\x12\x12\n\nseparators\x18\x03 \x03(\t\x12\x19\n\x11\x65nable_multimodal\x18\x04 \x01(\x08\x12\x30\n\x0estorage_config\x18\x05 \x01(\x0b\x32\x18.docreader.StorageConfig\x12(\n\nvlm_config\x18\x06 \x01(\x0b\x32\x14.docreader.VLMConfig\"\x91\x01\n\x13ReadFromFileRequest\x12\x14\n\x0c\x66ile_content\x18\x01 \x01(\x0c\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12*\n\x0bread_config\x18\x04 \x01(\x0b\x32\x15.docreader.ReadConfig\x12\x12\n\nrequest_id\x18\x05 \x01(\t\"p\n\x12ReadFromURLRequest\x12\x0b\n\x03url\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12*\n\x0bread_config\x18\x03 \x01(\x0b\x32\x15.docreader.ReadConfig\x12\x12\n\nrequest_id\x18\x04 \x01(\t\"i\n\x05Image\x12\x0b\n\x03url\x18\x01 \x01(\t\x12\x0f\n\x07\x63\x61ption\x18\x02 \x01(\t\x12\x10\n\x08ocr_text\x18\x03 \x01(\t\x12\x14\n\x0coriginal_url\x18\x04 \x01(\t\x12\r\n\x05start\x18\x05 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x06 \x01(\x05\"c\n\x05\x43hunk\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x0b\n\x03seq\x18\x02 \x01(\x05\x12\r\n\x05start\x18\x03 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x04 \x01(\x05\x12 \n\x06images\x18\x05 \x03(\x0b\x32\x10.docreader.Image\"?\n\x0cReadResponse\x12 \n\x06\x63hunks\x18\x01 \x03(\x0b\x32\x10.docreader.Chunk\x12\r\n\x05\x65rror\x18\x02 \x01(\t*G\n\x0fStorageProvider\x12 \n\x1cSTORAGE_PROVIDER_UNSPECIFIED\x10\x00\x12\x07\n\x03\x43OS\x10\x01\x12\t\n\x05MINIO\x10\x02\x32\x9f\x01\n\tDocReader\x12I\n\x0cReadFromFile\x12\x1e.docreader.ReadFromFileRequest\x1a\x17.docreader.ReadResponse\"\x00\x12G\n\x0bReadFromURL\x12\x1d.docreader.ReadFromURLRequest\x1a\x17.docreader.ReadResponse\"\x00\x42\x35Z3github.com/Tencent/WeKnora/internal/docreader/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x64ocreader.proto\x12\tdocreader\"\xa9\x02\n\nReadConfig\x12\x15\n\rparser_engine\x18\x01 \x01(\t\x12Q\n\x17parser_engine_overrides\x18\x02 \x03(\x0b\x32\x30.docreader.ReadConfig.ParserEngineOverridesEntry\x12>\n\rimage_storage\x18\x03 \x03(\x0b\x32\'.docreader.ReadConfig.ImageStorageEntry\x1a<\n\x1aParserEngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x33\n\x11ImageStorageEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa0\x01\n\x0bReadRequest\x12\x14\n\x0c\x66ile_content\x18\x01 \x01(\x0c\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x0b\n\x03url\x18\x04 \x01(\t\x12\r\n\x05title\x18\x05 \x01(\t\x12%\n\x06\x63onfig\x18\x06 \x01(\x0b\x32\x15.docreader.ReadConfig\x12\x12\n\nrequest_id\x18\x07 \x01(\t\"n\n\x08ImageRef\x12\x10\n\x08\x66ilename\x18\x01 \x01(\t\x12\x14\n\x0coriginal_ref\x18\x02 \x01(\t\x12\x11\n\tmime_type\x18\x03 \x01(\t\x12\x13\n\x0bstorage_key\x18\x04 \x01(\t\x12\x12\n\nimage_data\x18\x05 \x01(\x0c\"\xe2\x01\n\x0cReadResponse\x12\x18\n\x10markdown_content\x18\x01 \x01(\t\x12\'\n\nimage_refs\x18\x02 \x03(\x0b\x32\x13.docreader.ImageRef\x12\x16\n\x0eimage_dir_path\x18\x03 \x01(\t\x12\x37\n\x08metadata\x18\x04 \x03(\x0b\x32%.docreader.ReadResponse.MetadataEntry\x12\r\n\x05\x65rror\x18\x05 \x01(\t\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x9a\x01\n\x12ListEnginesRequest\x12L\n\x10\x63onfig_overrides\x18\x01 \x03(\x0b\x32\x32.docreader.ListEnginesRequest.ConfigOverridesEntry\x1a\x36\n\x14\x43onfigOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"x\n\x10ParserEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x12\n\nfile_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t\"C\n\x13ListEnginesResponse\x12,\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x1b.docreader.ParserEngineInfo\"Q\n\x13\x43onvertToPDFRequest\x12\x14\n\x0c\x66ile_content\x18\x01 \x01(\x0c\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\":\n\x14\x43onvertToPDFResponse\x12\x13\n\x0bpdf_content\x18\x01 \x01(\x0c\x12\r\n\x05\x65rror\x18\x02 \x01(\t2\xe9\x01\n\tDocReader\x12\x39\n\x04Read\x12\x16.docreader.ReadRequest\x1a\x17.docreader.ReadResponse\"\x00\x12N\n\x0bListEngines\x12\x1d.docreader.ListEnginesRequest\x1a\x1e.docreader.ListEnginesResponse\"\x00\x12Q\n\x0c\x43onvertToPDF\x12\x1e.docreader.ConvertToPDFRequest\x1a\x1f.docreader.ConvertToPDFResponse\"\x00\x42\x35Z3github.com/Tencent/WeKnora/internal/docreader/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -32,24 +32,40 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'docreader_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: _globals['DESCRIPTOR']._loaded_options = None _globals['DESCRIPTOR']._serialized_options = b'Z3github.com/Tencent/WeKnora/internal/docreader/proto' - _globals['_STORAGEPROVIDER']._serialized_start=1042 - _globals['_STORAGEPROVIDER']._serialized_end=1113 - _globals['_STORAGECONFIG']._serialized_start=31 - _globals['_STORAGECONFIG']._serialized_end=216 - _globals['_VLMCONFIG']._serialized_start=218 - _globals['_VLMCONFIG']._serialized_end=308 - _globals['_READCONFIG']._serialized_start=311 - _globals['_READCONFIG']._serialized_end=505 - _globals['_READFROMFILEREQUEST']._serialized_start=508 - _globals['_READFROMFILEREQUEST']._serialized_end=653 - _globals['_READFROMURLREQUEST']._serialized_start=655 - _globals['_READFROMURLREQUEST']._serialized_end=767 - _globals['_IMAGE']._serialized_start=769 - _globals['_IMAGE']._serialized_end=874 - _globals['_CHUNK']._serialized_start=876 - _globals['_CHUNK']._serialized_end=975 - _globals['_READRESPONSE']._serialized_start=977 - _globals['_READRESPONSE']._serialized_end=1040 - _globals['_DOCREADER']._serialized_start=1116 - _globals['_DOCREADER']._serialized_end=1275 + _globals['_READCONFIG_PARSERENGINEOVERRIDESENTRY']._loaded_options = None + _globals['_READCONFIG_PARSERENGINEOVERRIDESENTRY']._serialized_options = b'8\001' + _globals['_READCONFIG_IMAGESTORAGEENTRY']._loaded_options = None + _globals['_READCONFIG_IMAGESTORAGEENTRY']._serialized_options = b'8\001' + _globals['_READRESPONSE_METADATAENTRY']._loaded_options = None + _globals['_READRESPONSE_METADATAENTRY']._serialized_options = b'8\001' + _globals['_LISTENGINESREQUEST_CONFIGOVERRIDESENTRY']._loaded_options = None + _globals['_LISTENGINESREQUEST_CONFIGOVERRIDESENTRY']._serialized_options = b'8\001' + _globals['_READCONFIG']._serialized_start=31 + _globals['_READCONFIG']._serialized_end=328 + _globals['_READCONFIG_PARSERENGINEOVERRIDESENTRY']._serialized_start=215 + _globals['_READCONFIG_PARSERENGINEOVERRIDESENTRY']._serialized_end=275 + _globals['_READCONFIG_IMAGESTORAGEENTRY']._serialized_start=277 + _globals['_READCONFIG_IMAGESTORAGEENTRY']._serialized_end=328 + _globals['_READREQUEST']._serialized_start=331 + _globals['_READREQUEST']._serialized_end=491 + _globals['_IMAGEREF']._serialized_start=493 + _globals['_IMAGEREF']._serialized_end=603 + _globals['_READRESPONSE']._serialized_start=606 + _globals['_READRESPONSE']._serialized_end=832 + _globals['_READRESPONSE_METADATAENTRY']._serialized_start=785 + _globals['_READRESPONSE_METADATAENTRY']._serialized_end=832 + _globals['_LISTENGINESREQUEST']._serialized_start=835 + _globals['_LISTENGINESREQUEST']._serialized_end=989 + _globals['_LISTENGINESREQUEST_CONFIGOVERRIDESENTRY']._serialized_start=935 + _globals['_LISTENGINESREQUEST_CONFIGOVERRIDESENTRY']._serialized_end=989 + _globals['_PARSERENGINEINFO']._serialized_start=991 + _globals['_PARSERENGINEINFO']._serialized_end=1111 + _globals['_LISTENGINESRESPONSE']._serialized_start=1113 + _globals['_LISTENGINESRESPONSE']._serialized_end=1180 + _globals['_CONVERTTOPDFREQUEST']._serialized_start=1182 + _globals['_CONVERTTOPDFREQUEST']._serialized_end=1263 + _globals['_CONVERTTOPDFRESPONSE']._serialized_start=1265 + _globals['_CONVERTTOPDFRESPONSE']._serialized_end=1323 + _globals['_DOCREADER']._serialized_start=1326 + _globals['_DOCREADER']._serialized_end=1559 # @@protoc_insertion_point(module_scope) diff --git a/docreader/proto/docreader_pb2.pyi b/docreader/proto/docreader_pb2.pyi index 9f7cdf65..19f96abd 100644 --- a/docreader/proto/docreader_pb2.pyi +++ b/docreader/proto/docreader_pb2.pyi @@ -1,5 +1,4 @@ from google.protobuf.internal import containers as _containers -from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from collections.abc import Iterable as _Iterable, Mapping as _Mapping @@ -7,121 +6,130 @@ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor -class StorageProvider(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - STORAGE_PROVIDER_UNSPECIFIED: _ClassVar[StorageProvider] - COS: _ClassVar[StorageProvider] - MINIO: _ClassVar[StorageProvider] -STORAGE_PROVIDER_UNSPECIFIED: StorageProvider -COS: StorageProvider -MINIO: StorageProvider - -class StorageConfig(_message.Message): - __slots__ = ("provider", "region", "bucket_name", "access_key_id", "secret_access_key", "app_id", "path_prefix") - PROVIDER_FIELD_NUMBER: _ClassVar[int] - REGION_FIELD_NUMBER: _ClassVar[int] - BUCKET_NAME_FIELD_NUMBER: _ClassVar[int] - ACCESS_KEY_ID_FIELD_NUMBER: _ClassVar[int] - SECRET_ACCESS_KEY_FIELD_NUMBER: _ClassVar[int] - APP_ID_FIELD_NUMBER: _ClassVar[int] - PATH_PREFIX_FIELD_NUMBER: _ClassVar[int] - provider: StorageProvider - region: str - bucket_name: str - access_key_id: str - secret_access_key: str - app_id: str - path_prefix: str - def __init__(self, provider: _Optional[_Union[StorageProvider, str]] = ..., region: _Optional[str] = ..., bucket_name: _Optional[str] = ..., access_key_id: _Optional[str] = ..., secret_access_key: _Optional[str] = ..., app_id: _Optional[str] = ..., path_prefix: _Optional[str] = ...) -> None: ... - -class VLMConfig(_message.Message): - __slots__ = ("model_name", "base_url", "api_key", "interface_type") - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - BASE_URL_FIELD_NUMBER: _ClassVar[int] - API_KEY_FIELD_NUMBER: _ClassVar[int] - INTERFACE_TYPE_FIELD_NUMBER: _ClassVar[int] - model_name: str - base_url: str - api_key: str - interface_type: str - def __init__(self, model_name: _Optional[str] = ..., base_url: _Optional[str] = ..., api_key: _Optional[str] = ..., interface_type: _Optional[str] = ...) -> None: ... - class ReadConfig(_message.Message): - __slots__ = ("chunk_size", "chunk_overlap", "separators", "enable_multimodal", "storage_config", "vlm_config") - CHUNK_SIZE_FIELD_NUMBER: _ClassVar[int] - CHUNK_OVERLAP_FIELD_NUMBER: _ClassVar[int] - SEPARATORS_FIELD_NUMBER: _ClassVar[int] - ENABLE_MULTIMODAL_FIELD_NUMBER: _ClassVar[int] - STORAGE_CONFIG_FIELD_NUMBER: _ClassVar[int] - VLM_CONFIG_FIELD_NUMBER: _ClassVar[int] - chunk_size: int - chunk_overlap: int - separators: _containers.RepeatedScalarFieldContainer[str] - enable_multimodal: bool - storage_config: StorageConfig - vlm_config: VLMConfig - def __init__(self, chunk_size: _Optional[int] = ..., chunk_overlap: _Optional[int] = ..., separators: _Optional[_Iterable[str]] = ..., enable_multimodal: bool = ..., storage_config: _Optional[_Union[StorageConfig, _Mapping]] = ..., vlm_config: _Optional[_Union[VLMConfig, _Mapping]] = ...) -> None: ... + __slots__ = ("parser_engine", "parser_engine_overrides", "image_storage") + class ParserEngineOverridesEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + class ImageStorageEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + PARSER_ENGINE_FIELD_NUMBER: _ClassVar[int] + PARSER_ENGINE_OVERRIDES_FIELD_NUMBER: _ClassVar[int] + IMAGE_STORAGE_FIELD_NUMBER: _ClassVar[int] + parser_engine: str + parser_engine_overrides: _containers.ScalarMap[str, str] + image_storage: _containers.ScalarMap[str, str] + def __init__(self, parser_engine: _Optional[str] = ..., parser_engine_overrides: _Optional[_Mapping[str, str]] = ..., image_storage: _Optional[_Mapping[str, str]] = ...) -> None: ... -class ReadFromFileRequest(_message.Message): - __slots__ = ("file_content", "file_name", "file_type", "read_config", "request_id") +class ReadRequest(_message.Message): + __slots__ = ("file_content", "file_name", "file_type", "url", "title", "config", "request_id") FILE_CONTENT_FIELD_NUMBER: _ClassVar[int] FILE_NAME_FIELD_NUMBER: _ClassVar[int] FILE_TYPE_FIELD_NUMBER: _ClassVar[int] - READ_CONFIG_FIELD_NUMBER: _ClassVar[int] + URL_FIELD_NUMBER: _ClassVar[int] + TITLE_FIELD_NUMBER: _ClassVar[int] + CONFIG_FIELD_NUMBER: _ClassVar[int] REQUEST_ID_FIELD_NUMBER: _ClassVar[int] file_content: bytes file_name: str file_type: str - read_config: ReadConfig - request_id: str - def __init__(self, file_content: _Optional[bytes] = ..., file_name: _Optional[str] = ..., file_type: _Optional[str] = ..., read_config: _Optional[_Union[ReadConfig, _Mapping]] = ..., request_id: _Optional[str] = ...) -> None: ... - -class ReadFromURLRequest(_message.Message): - __slots__ = ("url", "title", "read_config", "request_id") - URL_FIELD_NUMBER: _ClassVar[int] - TITLE_FIELD_NUMBER: _ClassVar[int] - READ_CONFIG_FIELD_NUMBER: _ClassVar[int] - REQUEST_ID_FIELD_NUMBER: _ClassVar[int] url: str title: str - read_config: ReadConfig + config: ReadConfig request_id: str - def __init__(self, url: _Optional[str] = ..., title: _Optional[str] = ..., read_config: _Optional[_Union[ReadConfig, _Mapping]] = ..., request_id: _Optional[str] = ...) -> None: ... + def __init__(self, file_content: _Optional[bytes] = ..., file_name: _Optional[str] = ..., file_type: _Optional[str] = ..., url: _Optional[str] = ..., title: _Optional[str] = ..., config: _Optional[_Union[ReadConfig, _Mapping]] = ..., request_id: _Optional[str] = ...) -> None: ... -class Image(_message.Message): - __slots__ = ("url", "caption", "ocr_text", "original_url", "start", "end") - URL_FIELD_NUMBER: _ClassVar[int] - CAPTION_FIELD_NUMBER: _ClassVar[int] - OCR_TEXT_FIELD_NUMBER: _ClassVar[int] - ORIGINAL_URL_FIELD_NUMBER: _ClassVar[int] - START_FIELD_NUMBER: _ClassVar[int] - END_FIELD_NUMBER: _ClassVar[int] - url: str - caption: str - ocr_text: str - original_url: str - start: int - end: int - def __init__(self, url: _Optional[str] = ..., caption: _Optional[str] = ..., ocr_text: _Optional[str] = ..., original_url: _Optional[str] = ..., start: _Optional[int] = ..., end: _Optional[int] = ...) -> None: ... - -class Chunk(_message.Message): - __slots__ = ("content", "seq", "start", "end", "images") - CONTENT_FIELD_NUMBER: _ClassVar[int] - SEQ_FIELD_NUMBER: _ClassVar[int] - START_FIELD_NUMBER: _ClassVar[int] - END_FIELD_NUMBER: _ClassVar[int] - IMAGES_FIELD_NUMBER: _ClassVar[int] - content: str - seq: int - start: int - end: int - images: _containers.RepeatedCompositeFieldContainer[Image] - def __init__(self, content: _Optional[str] = ..., seq: _Optional[int] = ..., start: _Optional[int] = ..., end: _Optional[int] = ..., images: _Optional[_Iterable[_Union[Image, _Mapping]]] = ...) -> None: ... +class ImageRef(_message.Message): + __slots__ = ("filename", "original_ref", "mime_type", "storage_key", "image_data") + FILENAME_FIELD_NUMBER: _ClassVar[int] + ORIGINAL_REF_FIELD_NUMBER: _ClassVar[int] + MIME_TYPE_FIELD_NUMBER: _ClassVar[int] + STORAGE_KEY_FIELD_NUMBER: _ClassVar[int] + IMAGE_DATA_FIELD_NUMBER: _ClassVar[int] + filename: str + original_ref: str + mime_type: str + storage_key: str + image_data: bytes + def __init__(self, filename: _Optional[str] = ..., original_ref: _Optional[str] = ..., mime_type: _Optional[str] = ..., storage_key: _Optional[str] = ..., image_data: _Optional[bytes] = ...) -> None: ... class ReadResponse(_message.Message): - __slots__ = ("chunks", "error") - CHUNKS_FIELD_NUMBER: _ClassVar[int] + __slots__ = ("markdown_content", "image_refs", "image_dir_path", "metadata", "error") + class MetadataEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + MARKDOWN_CONTENT_FIELD_NUMBER: _ClassVar[int] + IMAGE_REFS_FIELD_NUMBER: _ClassVar[int] + IMAGE_DIR_PATH_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] ERROR_FIELD_NUMBER: _ClassVar[int] - chunks: _containers.RepeatedCompositeFieldContainer[Chunk] + markdown_content: str + image_refs: _containers.RepeatedCompositeFieldContainer[ImageRef] + image_dir_path: str + metadata: _containers.ScalarMap[str, str] error: str - def __init__(self, chunks: _Optional[_Iterable[_Union[Chunk, _Mapping]]] = ..., error: _Optional[str] = ...) -> None: ... + def __init__(self, markdown_content: _Optional[str] = ..., image_refs: _Optional[_Iterable[_Union[ImageRef, _Mapping]]] = ..., image_dir_path: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ..., error: _Optional[str] = ...) -> None: ... + +class ListEnginesRequest(_message.Message): + __slots__ = ("config_overrides",) + class ConfigOverridesEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + CONFIG_OVERRIDES_FIELD_NUMBER: _ClassVar[int] + config_overrides: _containers.ScalarMap[str, str] + def __init__(self, config_overrides: _Optional[_Mapping[str, str]] = ...) -> None: ... + +class ParserEngineInfo(_message.Message): + __slots__ = ("name", "description", "file_types", "available", "unavailable_reason") + NAME_FIELD_NUMBER: _ClassVar[int] + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] + FILE_TYPES_FIELD_NUMBER: _ClassVar[int] + AVAILABLE_FIELD_NUMBER: _ClassVar[int] + UNAVAILABLE_REASON_FIELD_NUMBER: _ClassVar[int] + name: str + description: str + file_types: _containers.RepeatedScalarFieldContainer[str] + available: bool + unavailable_reason: str + def __init__(self, name: _Optional[str] = ..., description: _Optional[str] = ..., file_types: _Optional[_Iterable[str]] = ..., available: bool = ..., unavailable_reason: _Optional[str] = ...) -> None: ... + +class ListEnginesResponse(_message.Message): + __slots__ = ("engines",) + ENGINES_FIELD_NUMBER: _ClassVar[int] + engines: _containers.RepeatedCompositeFieldContainer[ParserEngineInfo] + def __init__(self, engines: _Optional[_Iterable[_Union[ParserEngineInfo, _Mapping]]] = ...) -> None: ... + +class ConvertToPDFRequest(_message.Message): + __slots__ = ("file_content", "file_name", "file_type") + FILE_CONTENT_FIELD_NUMBER: _ClassVar[int] + FILE_NAME_FIELD_NUMBER: _ClassVar[int] + FILE_TYPE_FIELD_NUMBER: _ClassVar[int] + file_content: bytes + file_name: str + file_type: str + def __init__(self, file_content: _Optional[bytes] = ..., file_name: _Optional[str] = ..., file_type: _Optional[str] = ...) -> None: ... + +class ConvertToPDFResponse(_message.Message): + __slots__ = ("pdf_content", "error") + PDF_CONTENT_FIELD_NUMBER: _ClassVar[int] + ERROR_FIELD_NUMBER: _ClassVar[int] + pdf_content: bytes + error: str + def __init__(self, pdf_content: _Optional[bytes] = ..., error: _Optional[str] = ...) -> None: ... diff --git a/docreader/proto/docreader_pb2_grpc.py b/docreader/proto/docreader_pb2_grpc.py index b2c9f116..a4f1c48c 100644 --- a/docreader/proto/docreader_pb2_grpc.py +++ b/docreader/proto/docreader_pb2_grpc.py @@ -3,9 +3,9 @@ import grpc import warnings -import docreader_pb2 as docreader__pb2 +from docreader.proto import docreader_pb2 as docreader__pb2 -GRPC_GENERATED_VERSION = '1.76.0' +GRPC_GENERATED_VERSION = '1.78.0' GRPC_VERSION = grpc.__version__ _version_not_supported = False @@ -26,8 +26,7 @@ if _version_not_supported: class DocReaderStub(object): - """文档读取服务 - """ + """Missing associated documentation comment in .proto file.""" def __init__(self, channel): """Constructor. @@ -35,32 +34,40 @@ class DocReaderStub(object): Args: channel: A grpc.Channel. """ - self.ReadFromFile = channel.unary_unary( - '/docreader.DocReader/ReadFromFile', - request_serializer=docreader__pb2.ReadFromFileRequest.SerializeToString, + self.Read = channel.unary_unary( + '/docreader.DocReader/Read', + request_serializer=docreader__pb2.ReadRequest.SerializeToString, response_deserializer=docreader__pb2.ReadResponse.FromString, _registered_method=True) - self.ReadFromURL = channel.unary_unary( - '/docreader.DocReader/ReadFromURL', - request_serializer=docreader__pb2.ReadFromURLRequest.SerializeToString, - response_deserializer=docreader__pb2.ReadResponse.FromString, + self.ListEngines = channel.unary_unary( + '/docreader.DocReader/ListEngines', + request_serializer=docreader__pb2.ListEnginesRequest.SerializeToString, + response_deserializer=docreader__pb2.ListEnginesResponse.FromString, + _registered_method=True) + self.ConvertToPDF = channel.unary_unary( + '/docreader.DocReader/ConvertToPDF', + request_serializer=docreader__pb2.ConvertToPDFRequest.SerializeToString, + response_deserializer=docreader__pb2.ConvertToPDFResponse.FromString, _registered_method=True) class DocReaderServicer(object): - """文档读取服务 - """ + """Missing associated documentation comment in .proto file.""" - def ReadFromFile(self, request, context): - """从文件读取文档 - """ + def Read(self, request, context): + """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') - def ReadFromURL(self, request, context): - """从URL读取文档 - """ + def ListEngines(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ConvertToPDF(self, request, context): + """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') @@ -68,15 +75,20 @@ class DocReaderServicer(object): def add_DocReaderServicer_to_server(servicer, server): rpc_method_handlers = { - 'ReadFromFile': grpc.unary_unary_rpc_method_handler( - servicer.ReadFromFile, - request_deserializer=docreader__pb2.ReadFromFileRequest.FromString, + 'Read': grpc.unary_unary_rpc_method_handler( + servicer.Read, + request_deserializer=docreader__pb2.ReadRequest.FromString, response_serializer=docreader__pb2.ReadResponse.SerializeToString, ), - 'ReadFromURL': grpc.unary_unary_rpc_method_handler( - servicer.ReadFromURL, - request_deserializer=docreader__pb2.ReadFromURLRequest.FromString, - response_serializer=docreader__pb2.ReadResponse.SerializeToString, + 'ListEngines': grpc.unary_unary_rpc_method_handler( + servicer.ListEngines, + request_deserializer=docreader__pb2.ListEnginesRequest.FromString, + response_serializer=docreader__pb2.ListEnginesResponse.SerializeToString, + ), + 'ConvertToPDF': grpc.unary_unary_rpc_method_handler( + servicer.ConvertToPDF, + request_deserializer=docreader__pb2.ConvertToPDFRequest.FromString, + response_serializer=docreader__pb2.ConvertToPDFResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -87,11 +99,10 @@ def add_DocReaderServicer_to_server(servicer, server): # This class is part of an EXPERIMENTAL API. class DocReader(object): - """文档读取服务 - """ + """Missing associated documentation comment in .proto file.""" @staticmethod - def ReadFromFile(request, + def Read(request, target, options=(), channel_credentials=None, @@ -104,8 +115,8 @@ class DocReader(object): return grpc.experimental.unary_unary( request, target, - '/docreader.DocReader/ReadFromFile', - docreader__pb2.ReadFromFileRequest.SerializeToString, + '/docreader.DocReader/Read', + docreader__pb2.ReadRequest.SerializeToString, docreader__pb2.ReadResponse.FromString, options, channel_credentials, @@ -118,7 +129,7 @@ class DocReader(object): _registered_method=True) @staticmethod - def ReadFromURL(request, + def ListEngines(request, target, options=(), channel_credentials=None, @@ -131,9 +142,36 @@ class DocReader(object): return grpc.experimental.unary_unary( request, target, - '/docreader.DocReader/ReadFromURL', - docreader__pb2.ReadFromURLRequest.SerializeToString, - docreader__pb2.ReadResponse.FromString, + '/docreader.DocReader/ListEngines', + docreader__pb2.ListEnginesRequest.SerializeToString, + docreader__pb2.ListEnginesResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def ConvertToPDF(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/docreader.DocReader/ConvertToPDF', + docreader__pb2.ConvertToPDFRequest.SerializeToString, + docreader__pb2.ConvertToPDFResponse.FromString, options, channel_credentials, insecure, diff --git a/docreader/scripts/generate_proto.sh b/docreader/scripts/generate_proto.sh index 4bef4078..a44501a5 100755 --- a/docreader/scripts/generate_proto.sh +++ b/docreader/scripts/generate_proto.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -x +set -ex # 设置目录 PROTO_DIR="docreader/proto" @@ -13,12 +13,16 @@ python3 -m grpc_tools.protoc -I${PROTO_DIR} \ --grpc_python_out=${PYTHON_OUT} \ ${PROTO_DIR}/docreader.proto -# 生成Go代码 -protoc -I${PROTO_DIR} --go_out=${GO_OUT} \ - --go_opt=paths=source_relative \ - --go-grpc_out=${GO_OUT} \ - --go-grpc_opt=paths=source_relative \ - ${PROTO_DIR}/docreader.proto +# 生成Go代码(仅在 protoc-gen-go 可用时执行) +if command -v protoc-gen-go &> /dev/null; then + protoc -I${PROTO_DIR} --go_out=${GO_OUT} \ + --go_opt=paths=source_relative \ + --go-grpc_out=${GO_OUT} \ + --go-grpc_opt=paths=source_relative \ + ${PROTO_DIR}/docreader.proto +else + echo "protoc-gen-go not found, skipping Go code generation" +fi # 修复Python导入问题(MacOS兼容版本) if [ "$(uname)" == "Darwin" ]; then diff --git a/docs/LITE.md b/docs/LITE.md new file mode 100644 index 00000000..deb48caf --- /dev/null +++ b/docs/LITE.md @@ -0,0 +1,165 @@ +# WeKnora Lite + +零外部依赖的单二进制部署模式。无需 Docker、PostgreSQL、Redis,适合快速体验和小规模私有部署。 + +## 架构 + +| 组件 | 标准版 | Lite 版 | +|------|--------|---------| +| 数据库 | PostgreSQL | SQLite (WAL) | +| 向量检索 | pgvector / Qdrant / ES | sqlite-vec (vec0) | +| 关键词检索 | ParadeDB BM25 / ES | SQLite FTS5 | +| 消息队列 | Redis + Asynq | 内存 SyncTaskExecutor | +| 会话存储 | Redis | 内存 | +| 流管理 | Redis / 内存 | 内存 | +| 文件存储 | MinIO / COS / 本地 | 本地 | +| 文档解析 | DocReader (gRPC) | 不可用(文本/段落导入可用)| +| 前端 | Nginx 容器 | Go 内置静态文件服务 | + +## 快速开始 + +### 方式一:Homebrew 安装(macOS / Linux,推荐) + +```bash +brew tap Tencent/weknora https://github.com/Tencent/WeKnora +brew install weknora-lite +``` + +安装完成后,推荐使用 **brew services** 以后台服务方式运行: + +```bash +brew services start weknora-lite # 启动服务(开机自动启动) +brew services info weknora-lite # 查看运行状态 +# 首次启动自动创建配置文件 ~/.config/weknora/.env.lite +# 数据存储在 ~/.local/share/weknora/ +# 访问 http://localhost:8080 +``` + +常用服务管理命令: + +```bash +brew services stop weknora-lite # 停止服务 +brew services restart weknora-lite # 重启服务(修改配置后需重启) +brew services info weknora-lite # 查看状态 +``` + +日志位于 `$(brew --prefix)/var/log/weknora-lite.log`。 + +也可以前台直接运行: + +```bash +weknora-lite +``` + +如需修改配置(LLM 服务地址、安全密钥等): + +```bash +$EDITOR ~/.config/weknora/.env.lite +brew services restart weknora-lite # 修改配置后重启生效 +``` + +> **LLM 服务**:WeKnora Lite 需要一个 OpenAI 兼容的 LLM 服务来提供对话和 Embedding 能力。 +> 可以使用 [Ollama](https://ollama.com/)(本地)、通义千问、OpenAI 等任何兼容服务, +> 在配置文件中设置对应的地址和 API Key 即可。 + +### 方式二:下载预编译包 + +从 [GitHub Releases](https://github.com/Tencent/WeKnora/releases) 下载对应平台的 tarball: + +| 文件 | 平台 | +|------|------| +| `WeKnora-lite_*_linux_amd64.tar.gz` | Linux x86_64 | +| `WeKnora-lite_*_linux_arm64.tar.gz` | Linux ARM64 | +| `WeKnora-lite_*_darwin_amd64.tar.gz` | macOS Intel | +| `WeKnora-lite_*_darwin_arm64.tar.gz` | macOS Apple Silicon | + +```bash +# 1. 解压 +tar xzf WeKnora-lite_v0.2.0_darwin_arm64.tar.gz +cd WeKnora-lite_v0.2.0_darwin_arm64 + +# 2. 配置 +cp .env.lite.example .env.lite +# 编辑 .env.lite,配置 LLM 服务地址和安全密钥 + +# 3. 运行 +set -a && source .env.lite && set +a +./WeKnora-lite +# 访问 http://localhost:8080 +``` + +### 方式三:从源码构建 + +前置条件:Go 1.22+(需要 CGO)、C 编译器 (gcc/clang)、Node.js 22+(前端构建)。 + +```bash +make run-lite +``` + +## 配置 + +Lite 模式通过 `.env.lite` 文件配置(模板见 `.env.lite.example`)。关键环境变量: + +```bash +DB_DRIVER=sqlite # 使用 SQLite +DB_PATH=./data/weknora.db # 数据库文件路径 +RETRIEVE_DRIVER=sqlite # SQLite 检索引擎 (FTS5 + sqlite-vec) +STORAGE_TYPE=local # 本地文件存储 +LOCAL_STORAGE_BASE_DIR=./data/files +STREAM_MANAGER_TYPE=memory # 内存流管理 +# REDIS_ADDR= # 留空 = 不使用 Redis +OLLAMA_BASE_URL=http://127.0.0.1:11434 +``` + +完整配置参见 [.env.lite.example](../.env.lite.example)。 + +## 后台运行 + +### Homebrew 用户(macOS / Linux) + +Homebrew 安装后直接使用 `brew services` 管理,详见上方「快速开始 → 方式一」。 + +### Linux systemd(tarball 安装) + +tarball 中附带 `weknora-lite.service` 模板,按需修改路径后安装: + +```bash +# 创建用户和目录 +sudo useradd -r -s /sbin/nologin weknora +sudo mkdir -p /opt/weknora/data +sudo cp WeKnora-lite web/ .env.lite /opt/weknora/ +sudo chown -R weknora:weknora /opt/weknora + +# 安装并启动服务 +sudo cp weknora-lite.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now weknora-lite + +# 管理 +sudo systemctl status weknora-lite # 查看状态 +sudo journalctl -u weknora-lite -f # 查看日志 +``` + +## 功能限制 + +与标准版相比,Lite 版有以下限制: + +- **文档解析**:不支持文件上传和 URL 导入的自动解析(PDF/Word/Excel 等)。可使用文本和段落方式手动导入。 +- **向量检索**:sqlite-vec 使用精确 KNN(非近似),适合 10 万条以下的小规模数据集。 +- **并发**:SQLite 单写者模型,高并发写入场景下性能不如 PostgreSQL。 +- **任务队列**:无持久化队列,进程重启后未完成的异步任务会丢失。 +- **知识图谱**:默认禁用 (`NEO4J_ENABLE=false`)。 +- **Agent Skills 沙箱**:默认禁用 (`WEKNORA_SANDBOX_MODE=disabled`)。 + +## 数据目录 + +默认所有数据存储在 `./data/` 目录下: + +``` +data/ +├── weknora.db # SQLite 数据库 +├── weknora.db-wal # WAL 日志 +└── files/ # 上传文件 +``` + +备份只需复制整个 `data/` 目录。 diff --git a/frontend/nginx.conf b/frontend/nginx.conf index a1678c8c..1349d0d8 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -21,6 +21,15 @@ server { try_files $uri $uri/ /index.html; } + # 本地存储文件代理到后端服务(用于渲染 markdown 中的图片) + location /files/ { + proxy_pass ${APP_SCHEME}://${APP_HOST}:${APP_PORT}/files/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + # API请求代理到后端服务 # APP_SCHEME 默认 http,远程 HTTPS 后端可设为 https location /api/ { diff --git a/frontend/package-lock.json b/frontend/package-lock.json index fcf04f07..ee5d804f 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -11,7 +11,9 @@ "@microsoft/fetch-event-source": "^2.0.1", "@types/dompurify": "^3.0.5", "@types/papaparse": "^5.5.0", + "@vue-office/pptx": "^1.0.1", "axios": "^1.8.4", + "docx-preview": "^0.3.7", "dompurify": "^3.2.6", "highlight.js": "^11.11.1", "marked": "^5.1.2", @@ -23,6 +25,7 @@ "tdesign-icons-vue-next": "^0.4.1", "tdesign-vue-next": "^1.17.2", "vue": "^3.5.13", + "vue-demi": "^0.14.6", "vue-i18n": "^11.1.12", "vue-router": "^4.5.0", "webpack": "^5.94.0", @@ -37,10 +40,10 @@ "@vue/tsconfig": "^0.7.0", "less": "^4.3.0", "less-loader": "^12.2.0", - "npm-run-all2": "^7.0.2", + "npm-run-all2": "^8.0.4", "typescript": "~5.8.0", "vite": "^7.2.2", - "vue-tsc": "^2.2.8" + "vue-tsc": "^3.2.5" } }, "node_modules/@ampproject/remapping": { @@ -59,7 +62,7 @@ }, "node_modules/@antfu/install-pkg": { "version": "1.1.0", - "resolved": "https://registry.npmmirror.com/@antfu/install-pkg/-/install-pkg-1.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/@antfu/install-pkg/-/install-pkg-1.1.0.tgz", "integrity": "sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==", "license": "MIT", "dependencies": { @@ -98,7 +101,6 @@ "resolved": "https://mirrors.tencent.com/npm/@babel/core/-/core-7.28.0.tgz", "integrity": "sha512-UlLAnTPrFdNGoFtbSXwcGFQBtQZJCNjaN6hQNP3UPvuNXT1i82N26KL3dZeIpNalWywr9IuQuncaAfUaS1g6sQ==", "dev": true, - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -457,13 +459,13 @@ }, "node_modules/@braintree/sanitize-url": { "version": "7.1.2", - "resolved": "https://registry.npmmirror.com/@braintree/sanitize-url/-/sanitize-url-7.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@braintree/sanitize-url/-/sanitize-url-7.1.2.tgz", "integrity": "sha512-jigsZK+sMF/cuiB7sERuo9V7N9jx+dhmHHnQyDSVdpZwVutaBu7WvNYqMDLSgFgfB30n452TP3vjDAvFC973mA==", "license": "MIT" }, "node_modules/@chevrotain/cst-dts-gen": { "version": "11.1.2", - "resolved": "https://registry.npmmirror.com/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.1.2.tgz", "integrity": "sha512-XTsjvDVB5nDZBQB8o0o/0ozNelQtn2KrUVteIHSlPd2VAV2utEb6JzyCJaJ8tGxACR4RiBNWy5uYUHX2eji88Q==", "license": "Apache-2.0", "dependencies": { @@ -474,7 +476,7 @@ }, "node_modules/@chevrotain/gast": { "version": "11.1.2", - "resolved": "https://registry.npmmirror.com/@chevrotain/gast/-/gast-11.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@chevrotain/gast/-/gast-11.1.2.tgz", "integrity": "sha512-Z9zfXR5jNZb1Hlsd/p+4XWeUFugrHirq36bKzPWDSIacV+GPSVXdk+ahVWZTwjhNwofAWg/sZg58fyucKSQx5g==", "license": "Apache-2.0", "dependencies": { @@ -484,19 +486,19 @@ }, "node_modules/@chevrotain/regexp-to-ast": { "version": "11.1.2", - "resolved": "https://registry.npmmirror.com/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.1.2.tgz", "integrity": "sha512-nMU3Uj8naWer7xpZTYJdxbAs6RIv/dxYzkYU8GSwgUtcAAlzjcPfX1w+RKRcYG8POlzMeayOQ/znfwxEGo5ulw==", "license": "Apache-2.0" }, "node_modules/@chevrotain/types": { "version": "11.1.2", - "resolved": "https://registry.npmmirror.com/@chevrotain/types/-/types-11.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@chevrotain/types/-/types-11.1.2.tgz", "integrity": "sha512-U+HFai5+zmJCkK86QsaJtoITlboZHBqrVketcO2ROv865xfCMSFpELQoz1GkX5GzME8pTa+3kbKrZHQtI0gdbw==", "license": "Apache-2.0" }, "node_modules/@chevrotain/utils": { "version": "11.1.2", - "resolved": "https://registry.npmmirror.com/@chevrotain/utils/-/utils-11.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@chevrotain/utils/-/utils-11.1.2.tgz", "integrity": "sha512-4mudFAQ6H+MqBTfqLmU7G1ZwRzCLfJEooL/fsF6rCX5eePMbGhoy5n4g+G4vlh2muDcsCTJtL+uKbOzWxs5LHA==", "license": "Apache-2.0" }, @@ -938,13 +940,13 @@ }, "node_modules/@iconify/types": { "version": "2.0.0", - "resolved": "https://registry.npmmirror.com/@iconify/types/-/types-2.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/@iconify/types/-/types-2.0.0.tgz", "integrity": "sha512-+wluvCrRhXrhyOmRDJ3q8mux9JkKy5SJ/v8ol2tu4FVjyYvtEzkc/3pK15ET6RKg4b4w4BmTk1+gsCUhf21Ykg==", "license": "MIT" }, "node_modules/@iconify/utils": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/@iconify/utils/-/utils-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/@iconify/utils/-/utils-3.1.0.tgz", "integrity": "sha512-Zlzem1ZXhI1iHeeERabLNzBHdOa4VhQbqAcOQaMKuTuyZCpwKbC2R4Dd0Zo3g9EAc+Y4fiarO8HIHRAth7+skw==", "license": "MIT", "dependencies": { @@ -1041,7 +1043,7 @@ }, "node_modules/@mermaid-js/parser": { "version": "1.0.0", - "resolved": "https://registry.npmmirror.com/@mermaid-js/parser/-/parser-1.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/@mermaid-js/parser/-/parser-1.0.0.tgz", "integrity": "sha512-vvK0Hi/VWndxoh03Mmz6wa1KDriSPjS2XMZL/1l19HFwygiObEEoEwSDxOqyLzzAI6J2PU3261JjTMTO7x+BPw==", "license": "MIT", "dependencies": { @@ -1135,35 +1137,37 @@ "license": "MIT" }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.45.0.tgz", - "integrity": "sha512-2o/FgACbji4tW1dzXOqAV15Eu7DdgbKsF2QKcxfG4xbh5iwU7yr5RRP5/U+0asQliSYv5M4o7BevlGIoSL0LXg==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", + "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==", "cpu": [ "arm" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "android" ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.45.0.tgz", - "integrity": "sha512-PSZ0SvMOjEAxwZeTx32eI/j5xSYtDCRxGu5k9zvzoY77xUNssZM+WV6HYBLROpY5CkXsbQjvz40fBb7WPwDqtQ==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz", + "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "android" ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.45.0.tgz", - "integrity": "sha512-BA4yPIPssPB2aRAWzmqzQ3y2/KotkLyZukVB7j3psK/U3nVJdceo6qr9pLM2xN6iRP/wKfxEbOb1yrlZH6sYZg==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz", + "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==", "cpu": [ "arm64" ], @@ -1175,35 +1179,37 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.45.0.tgz", - "integrity": "sha512-Pr2o0lvTwsiG4HCr43Zy9xXrHspyMvsvEw4FwKYqhli4FuLE5FjcZzuQ4cfPe0iUFCvSQG6lACI0xj74FDZKRA==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz", + "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==", "cpu": [ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "darwin" ] }, "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.45.0.tgz", - "integrity": "sha512-lYE8LkE5h4a/+6VnnLiL14zWMPnx6wNbDG23GcYFpRW1V9hYWHAw9lBZ6ZUIrOaoK7NliF1sdwYGiVmziUF4vA==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz", + "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "freebsd" ] }, "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.45.0.tgz", - "integrity": "sha512-PVQWZK9sbzpvqC9Q0GlehNNSVHR+4m7+wET+7FgSnKG3ci5nAMgGmr9mGBXzAuE5SvguCKJ6mHL6vq1JaJ/gvw==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz", + "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==", "cpu": [ "x64" ], @@ -1215,22 +1221,23 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.45.0.tgz", - "integrity": "sha512-hLrmRl53prCcD+YXTfNvXd776HTxNh8wPAMllusQ+amcQmtgo3V5i/nkhPN6FakW+QVLoUUr2AsbtIRPFU3xIA==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz", + "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==", "cpu": [ "arm" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.45.0.tgz", - "integrity": "sha512-XBKGSYcrkdiRRjl+8XvrUR3AosXU0NvF7VuqMsm7s5nRy+nt58ZMB19Jdp1RdqewLcaYnpk8zeVs/4MlLZEJxw==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz", + "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==", "cpu": [ "arm" ], @@ -1242,9 +1249,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.45.0.tgz", - "integrity": "sha512-fRvZZPUiBz7NztBE/2QnCS5AtqLVhXmUOPj9IHlfGEXkapgImf4W9+FSkL8cWqoAjozyUzqFmSc4zh2ooaeF6g==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz", + "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==", "cpu": [ "arm64" ], @@ -1256,35 +1263,65 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.45.0.tgz", - "integrity": "sha512-Btv2WRZOcUGi8XU80XwIvzTg4U6+l6D0V6sZTrZx214nrwxw5nAi8hysaXj/mctyClWgesyuxbeLylCBNauimg==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz", + "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" ] }, - "node_modules/@rollup/rollup-linux-loongarch64-gnu": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.45.0.tgz", - "integrity": "sha512-Li0emNnwtUZdLwHjQPBxn4VWztcrw/h7mgLyHiEI5Z0MhpeFGlzaiBHpSNVOMB/xucjXTTcO+dhv469Djr16KA==", + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz", + "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==", "cpu": [ "loong64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" ] }, - "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.45.0.tgz", - "integrity": "sha512-sB8+pfkYx2kvpDCfd63d5ScYT0Fz1LO6jIb2zLZvmK9ob2D8DeVqrmBDE0iDK8KlBVmsTNzrjr3G1xV4eUZhSw==", + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz", + "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz", + "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz", + "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==", "cpu": [ "ppc64" ], @@ -1296,9 +1333,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.45.0.tgz", - "integrity": "sha512-5GQ6PFhh7E6jQm70p1aW05G2cap5zMOvO0se5JMecHeAdj5ZhWEHbJ4hiKpfi1nnnEdTauDXxPgXae/mqjow9w==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz", + "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==", "cpu": [ "riscv64" ], @@ -1310,22 +1347,23 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-musl": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.45.0.tgz", - "integrity": "sha512-N/euLsBd1rekWcuduakTo/dJw6U6sBP3eUq+RXM9RNfPuWTvG2w/WObDkIvJ2KChy6oxZmOSC08Ak2OJA0UiAA==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz", + "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==", "cpu": [ "riscv64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.45.0.tgz", - "integrity": "sha512-2l9sA7d7QdikL0xQwNMO3xURBUNEWyHVHfAsHsUdq+E/pgLTUcCE+gih5PCdmyHmfTDeXUWVhqL0WZzg0nua3g==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz", + "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==", "cpu": [ "s390x" ], @@ -1337,9 +1375,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.45.0.tgz", - "integrity": "sha512-XZdD3fEEQcwG2KrJDdEQu7NrHonPxxaV0/w2HpvINBdcqebz1aL+0vM2WFJq4DeiAVT6F5SUQas65HY5JDqoPw==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz", + "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==", "cpu": [ "x64" ], @@ -1351,9 +1389,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.45.0.tgz", - "integrity": "sha512-7ayfgvtmmWgKWBkCGg5+xTQ0r5V1owVm67zTrsEY1008L5ro7mCyGYORomARt/OquB9KY7LpxVBZes+oSniAAQ==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz", + "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==", "cpu": [ "x64" ], @@ -1364,10 +1402,38 @@ "linux" ] }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz", + "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz", + "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ] + }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.45.0.tgz", - "integrity": "sha512-B+IJgcBnE2bm93jEW5kHisqvPITs4ddLOROAcOc/diBgrEiQJJ6Qcjby75rFSmH5eMGrqJryUgJDhrfj942apQ==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz", + "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==", "cpu": [ "arm64" ], @@ -1379,9 +1445,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.45.0.tgz", - "integrity": "sha512-+CXwwG66g0/FpWOnP/v1HnrGVSOygK/osUbu3wPRy8ECXjoYKjRAyfxYpDQOfghC5qPJYLPH0oN4MCOjwgdMug==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz", + "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==", "cpu": [ "ia32" ], @@ -1392,10 +1458,24 @@ "win32" ] }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz", + "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.45.0.tgz", - "integrity": "sha512-SRf1cytG7wqcHVLrBc9VtPK4pU5wxiB/lNIkNmW2ApKXIg+RpqwHfsaEK+e7eH4A1BpI6BX/aBWXxZCIrJg3uA==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", + "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", "cpu": [ "x64" ], @@ -1415,7 +1495,7 @@ }, "node_modules/@types/d3": { "version": "7.4.3", - "resolved": "https://registry.npmmirror.com/@types/d3/-/d3-7.4.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3/-/d3-7.4.3.tgz", "integrity": "sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww==", "license": "MIT", "dependencies": { @@ -1453,13 +1533,13 @@ }, "node_modules/@types/d3-array": { "version": "3.2.2", - "resolved": "https://registry.npmmirror.com/@types/d3-array/-/d3-array-3.2.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-array/-/d3-array-3.2.2.tgz", "integrity": "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw==", "license": "MIT" }, "node_modules/@types/d3-axis": { "version": "3.0.6", - "resolved": "https://registry.npmmirror.com/@types/d3-axis/-/d3-axis-3.0.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-axis/-/d3-axis-3.0.6.tgz", "integrity": "sha512-pYeijfZuBd87T0hGn0FO1vQ/cgLk6E1ALJjfkC0oJ8cbwkZl3TpgS8bVBLZN+2jjGgg38epgxb2zmoGtSfvgMw==", "license": "MIT", "dependencies": { @@ -1468,7 +1548,7 @@ }, "node_modules/@types/d3-brush": { "version": "3.0.6", - "resolved": "https://registry.npmmirror.com/@types/d3-brush/-/d3-brush-3.0.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-brush/-/d3-brush-3.0.6.tgz", "integrity": "sha512-nH60IZNNxEcrh6L1ZSMNA28rj27ut/2ZmI3r96Zd+1jrZD++zD3LsMIjWlvg4AYrHn/Pqz4CF3veCxGjtbqt7A==", "license": "MIT", "dependencies": { @@ -1477,19 +1557,19 @@ }, "node_modules/@types/d3-chord": { "version": "3.0.6", - "resolved": "https://registry.npmmirror.com/@types/d3-chord/-/d3-chord-3.0.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-chord/-/d3-chord-3.0.6.tgz", "integrity": "sha512-LFYWWd8nwfwEmTZG9PfQxd17HbNPksHBiJHaKuY1XeqscXacsS2tyoo6OdRsjf+NQYeB6XrNL3a25E3gH69lcg==", "license": "MIT" }, "node_modules/@types/d3-color": { "version": "3.1.3", - "resolved": "https://registry.npmmirror.com/@types/d3-color/-/d3-color-3.1.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-color/-/d3-color-3.1.3.tgz", "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", "license": "MIT" }, "node_modules/@types/d3-contour": { "version": "3.0.6", - "resolved": "https://registry.npmmirror.com/@types/d3-contour/-/d3-contour-3.0.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-contour/-/d3-contour-3.0.6.tgz", "integrity": "sha512-BjzLgXGnCWjUSYGfH1cpdo41/hgdWETu4YxpezoztawmqsvCeep+8QGfiY6YbDvfgHz/DkjeIkkZVJavB4a3rg==", "license": "MIT", "dependencies": { @@ -1499,19 +1579,19 @@ }, "node_modules/@types/d3-delaunay": { "version": "6.0.4", - "resolved": "https://registry.npmmirror.com/@types/d3-delaunay/-/d3-delaunay-6.0.4.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-delaunay/-/d3-delaunay-6.0.4.tgz", "integrity": "sha512-ZMaSKu4THYCU6sV64Lhg6qjf1orxBthaC161plr5KuPHo3CNm8DTHiLw/5Eq2b6TsNP0W0iJrUOFscY6Q450Hw==", "license": "MIT" }, "node_modules/@types/d3-dispatch": { "version": "3.0.7", - "resolved": "https://registry.npmmirror.com/@types/d3-dispatch/-/d3-dispatch-3.0.7.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-dispatch/-/d3-dispatch-3.0.7.tgz", "integrity": "sha512-5o9OIAdKkhN1QItV2oqaE5KMIiXAvDWBDPrD85e58Qlz1c1kI/J0NcqbEG88CoTwJrYe7ntUCVfeUl2UJKbWgA==", "license": "MIT" }, "node_modules/@types/d3-drag": { "version": "3.0.7", - "resolved": "https://registry.npmmirror.com/@types/d3-drag/-/d3-drag-3.0.7.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-drag/-/d3-drag-3.0.7.tgz", "integrity": "sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==", "license": "MIT", "dependencies": { @@ -1520,19 +1600,19 @@ }, "node_modules/@types/d3-dsv": { "version": "3.0.7", - "resolved": "https://registry.npmmirror.com/@types/d3-dsv/-/d3-dsv-3.0.7.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-dsv/-/d3-dsv-3.0.7.tgz", "integrity": "sha512-n6QBF9/+XASqcKK6waudgL0pf/S5XHPPI8APyMLLUHd8NqouBGLsU8MgtO7NINGtPBtk9Kko/W4ea0oAspwh9g==", "license": "MIT" }, "node_modules/@types/d3-ease": { "version": "3.0.2", - "resolved": "https://registry.npmmirror.com/@types/d3-ease/-/d3-ease-3.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-ease/-/d3-ease-3.0.2.tgz", "integrity": "sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==", "license": "MIT" }, "node_modules/@types/d3-fetch": { "version": "3.0.7", - "resolved": "https://registry.npmmirror.com/@types/d3-fetch/-/d3-fetch-3.0.7.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-fetch/-/d3-fetch-3.0.7.tgz", "integrity": "sha512-fTAfNmxSb9SOWNB9IoG5c8Hg6R+AzUHDRlsXsDZsNp6sxAEOP0tkP3gKkNSO/qmHPoBFTxNrjDprVHDQDvo5aA==", "license": "MIT", "dependencies": { @@ -1541,19 +1621,19 @@ }, "node_modules/@types/d3-force": { "version": "3.0.10", - "resolved": "https://registry.npmmirror.com/@types/d3-force/-/d3-force-3.0.10.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-force/-/d3-force-3.0.10.tgz", "integrity": "sha512-ZYeSaCF3p73RdOKcjj+swRlZfnYpK1EbaDiYICEEp5Q6sUiqFaFQ9qgoshp5CzIyyb/yD09kD9o2zEltCexlgw==", "license": "MIT" }, "node_modules/@types/d3-format": { "version": "3.0.4", - "resolved": "https://registry.npmmirror.com/@types/d3-format/-/d3-format-3.0.4.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-format/-/d3-format-3.0.4.tgz", "integrity": "sha512-fALi2aI6shfg7vM5KiR1wNJnZ7r6UuggVqtDA+xiEdPZQwy/trcQaHnwShLuLdta2rTymCNpxYTiMZX/e09F4g==", "license": "MIT" }, "node_modules/@types/d3-geo": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/@types/d3-geo/-/d3-geo-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-geo/-/d3-geo-3.1.0.tgz", "integrity": "sha512-856sckF0oP/diXtS4jNsiQw/UuK5fQG8l/a9VVLeSouf1/PPbBE1i1W852zVwKwYCBkFJJB7nCFTbk6UMEXBOQ==", "license": "MIT", "dependencies": { @@ -1562,13 +1642,13 @@ }, "node_modules/@types/d3-hierarchy": { "version": "3.1.7", - "resolved": "https://registry.npmmirror.com/@types/d3-hierarchy/-/d3-hierarchy-3.1.7.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-hierarchy/-/d3-hierarchy-3.1.7.tgz", "integrity": "sha512-tJFtNoYBtRtkNysX1Xq4sxtjK8YgoWUNpIiUee0/jHGRwqvzYxkq0hGVbbOGSz+JgFxxRu4K8nb3YpG3CMARtg==", "license": "MIT" }, "node_modules/@types/d3-interpolate": { "version": "3.0.4", - "resolved": "https://registry.npmmirror.com/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz", "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==", "license": "MIT", "dependencies": { @@ -1577,31 +1657,31 @@ }, "node_modules/@types/d3-path": { "version": "3.1.1", - "resolved": "https://registry.npmmirror.com/@types/d3-path/-/d3-path-3.1.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-path/-/d3-path-3.1.1.tgz", "integrity": "sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg==", "license": "MIT" }, "node_modules/@types/d3-polygon": { "version": "3.0.2", - "resolved": "https://registry.npmmirror.com/@types/d3-polygon/-/d3-polygon-3.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-polygon/-/d3-polygon-3.0.2.tgz", "integrity": "sha512-ZuWOtMaHCkN9xoeEMr1ubW2nGWsp4nIql+OPQRstu4ypeZ+zk3YKqQT0CXVe/PYqrKpZAi+J9mTs05TKwjXSRA==", "license": "MIT" }, "node_modules/@types/d3-quadtree": { "version": "3.0.6", - "resolved": "https://registry.npmmirror.com/@types/d3-quadtree/-/d3-quadtree-3.0.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-quadtree/-/d3-quadtree-3.0.6.tgz", "integrity": "sha512-oUzyO1/Zm6rsxKRHA1vH0NEDG58HrT5icx/azi9MF1TWdtttWl0UIUsjEQBBh+SIkrpd21ZjEv7ptxWys1ncsg==", "license": "MIT" }, "node_modules/@types/d3-random": { "version": "3.0.3", - "resolved": "https://registry.npmmirror.com/@types/d3-random/-/d3-random-3.0.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-random/-/d3-random-3.0.3.tgz", "integrity": "sha512-Imagg1vJ3y76Y2ea0871wpabqp613+8/r0mCLEBfdtqC7xMSfj9idOnmBYyMoULfHePJyxMAw3nWhJxzc+LFwQ==", "license": "MIT" }, "node_modules/@types/d3-scale": { "version": "4.0.9", - "resolved": "https://registry.npmmirror.com/@types/d3-scale/-/d3-scale-4.0.9.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-scale/-/d3-scale-4.0.9.tgz", "integrity": "sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw==", "license": "MIT", "dependencies": { @@ -1610,19 +1690,19 @@ }, "node_modules/@types/d3-scale-chromatic": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/@types/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", "integrity": "sha512-iWMJgwkK7yTRmWqRB5plb1kadXyQ5Sj8V/zYlFGMUBbIPKQScw+Dku9cAAMgJG+z5GYDoMjWGLVOvjghDEFnKQ==", "license": "MIT" }, "node_modules/@types/d3-selection": { "version": "3.0.11", - "resolved": "https://registry.npmmirror.com/@types/d3-selection/-/d3-selection-3.0.11.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-selection/-/d3-selection-3.0.11.tgz", "integrity": "sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==", "license": "MIT" }, "node_modules/@types/d3-shape": { "version": "3.1.8", - "resolved": "https://registry.npmmirror.com/@types/d3-shape/-/d3-shape-3.1.8.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-shape/-/d3-shape-3.1.8.tgz", "integrity": "sha512-lae0iWfcDeR7qt7rA88BNiqdvPS5pFVPpo5OfjElwNaT2yyekbM0C9vK+yqBqEmHr6lDkRnYNoTBYlAgJa7a4w==", "license": "MIT", "dependencies": { @@ -1631,25 +1711,25 @@ }, "node_modules/@types/d3-time": { "version": "3.0.4", - "resolved": "https://registry.npmmirror.com/@types/d3-time/-/d3-time-3.0.4.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-time/-/d3-time-3.0.4.tgz", "integrity": "sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==", "license": "MIT" }, "node_modules/@types/d3-time-format": { "version": "4.0.3", - "resolved": "https://registry.npmmirror.com/@types/d3-time-format/-/d3-time-format-4.0.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-time-format/-/d3-time-format-4.0.3.tgz", "integrity": "sha512-5xg9rC+wWL8kdDj153qZcsJ0FWiFt0J5RB6LYUNZjwSnesfblqrI/bJ1wBdJ8OQfncgbJG5+2F+qfqnqyzYxyg==", "license": "MIT" }, "node_modules/@types/d3-timer": { "version": "3.0.2", - "resolved": "https://registry.npmmirror.com/@types/d3-timer/-/d3-timer-3.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-timer/-/d3-timer-3.0.2.tgz", "integrity": "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==", "license": "MIT" }, "node_modules/@types/d3-transition": { "version": "3.0.9", - "resolved": "https://registry.npmmirror.com/@types/d3-transition/-/d3-transition-3.0.9.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-transition/-/d3-transition-3.0.9.tgz", "integrity": "sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==", "license": "MIT", "dependencies": { @@ -1658,7 +1738,7 @@ }, "node_modules/@types/d3-zoom": { "version": "3.0.8", - "resolved": "https://registry.npmmirror.com/@types/d3-zoom/-/d3-zoom-3.0.8.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/d3-zoom/-/d3-zoom-3.0.8.tgz", "integrity": "sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==", "license": "MIT", "dependencies": { @@ -1702,7 +1782,7 @@ }, "node_modules/@types/geojson": { "version": "7946.0.16", - "resolved": "https://registry.npmmirror.com/@types/geojson/-/geojson-7946.0.16.tgz", + "resolved": "https://mirrors.tencent.com/npm/@types/geojson/-/geojson-7946.0.16.tgz", "integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==", "license": "MIT" }, @@ -1818,34 +1898,51 @@ "license": "MIT" }, "node_modules/@volar/language-core": { - "version": "2.4.15", - "resolved": "https://mirrors.tencent.com/npm/@volar/language-core/-/language-core-2.4.15.tgz", - "integrity": "sha512-3VHw+QZU0ZG9IuQmzT68IyN4hZNd9GchGPhbD9+pa8CVv7rnoOZwo7T8weIbrRmihqy3ATpdfXFnqRrfPVK6CA==", + "version": "2.4.28", + "resolved": "https://mirrors.tencent.com/npm/@volar/language-core/-/language-core-2.4.28.tgz", + "integrity": "sha512-w4qhIJ8ZSitgLAkVay6AbcnC7gP3glYM3fYwKV3srj8m494E3xtrCv6E+bWviiK/8hs6e6t1ij1s2Endql7vzQ==", "dev": true, "license": "MIT", "dependencies": { - "@volar/source-map": "2.4.15" + "@volar/source-map": "2.4.28" } }, "node_modules/@volar/source-map": { - "version": "2.4.15", - "resolved": "https://mirrors.tencent.com/npm/@volar/source-map/-/source-map-2.4.15.tgz", - "integrity": "sha512-CPbMWlUN6hVZJYGcU/GSoHu4EnCHiLaXI9n8c9la6RaI9W5JHX+NqG+GSQcB0JdC2FIBLdZJwGsfKyBB71VlTg==", + "version": "2.4.28", + "resolved": "https://mirrors.tencent.com/npm/@volar/source-map/-/source-map-2.4.28.tgz", + "integrity": "sha512-yX2BDBqJkRXfKw8my8VarTyjv48QwxdJtvRgUpNE5erCsgEUdI2DsLbpa+rOQVAJYshY99szEcRDmyHbF10ggQ==", "dev": true, "license": "MIT" }, "node_modules/@volar/typescript": { - "version": "2.4.15", - "resolved": "https://mirrors.tencent.com/npm/@volar/typescript/-/typescript-2.4.15.tgz", - "integrity": "sha512-2aZ8i0cqPGjXb4BhkMsPYDkkuc2ZQ6yOpqwAuNwUoncELqoy5fRgOQtLR9gB0g902iS0NAkvpIzs27geVyVdPg==", + "version": "2.4.28", + "resolved": "https://mirrors.tencent.com/npm/@volar/typescript/-/typescript-2.4.28.tgz", + "integrity": "sha512-Ja6yvWrbis2QtN4ClAKreeUZPVYMARDYZl9LMEv1iQ1QdepB6wn0jTRxA9MftYmYa4DQ4k/DaSZpFPUfxl8giw==", "dev": true, "license": "MIT", "dependencies": { - "@volar/language-core": "2.4.15", + "@volar/language-core": "2.4.28", "path-browserify": "^1.0.1", "vscode-uri": "^3.0.8" } }, + "node_modules/@vue-office/pptx": { + "version": "1.0.1", + "resolved": "https://mirrors.tencent.com/npm/@vue-office/pptx/-/pptx-1.0.1.tgz", + "integrity": "sha512-+V7Kctzl6f6+Yk4NaD/wQGRIkqLWcowe0jEhPexWQb8Oilbzt1OyhWRWcMsxNDTdrgm6aMLP+0/tmw27cxddMg==", + "hasInstallScript": true, + "license": "MIT", + "peerDependencies": { + "@vue/composition-api": "^1.7.1", + "vue": "^2.0.0 || >=3.0.0", + "vue-demi": "^0.14.6" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, "node_modules/@vue/babel-helper-vue-transform-on": { "version": "1.4.0", "resolved": "https://mirrors.tencent.com/npm/@vue/babel-helper-vue-transform-on/-/babel-helper-vue-transform-on-1.4.0.tgz", @@ -1944,17 +2041,6 @@ "@vue/shared": "3.5.17" } }, - "node_modules/@vue/compiler-vue2": { - "version": "2.7.16", - "resolved": "https://mirrors.tencent.com/npm/@vue/compiler-vue2/-/compiler-vue2-2.7.16.tgz", - "integrity": "sha512-qYC3Psj9S/mfu9uVi5WvNZIzq+xnXMhOwbTFKKDD7b1lhpnn71jXSFdTQ+WsIEk0ONCd7VV2IMm7ONl6tbQ86A==", - "dev": true, - "license": "MIT", - "dependencies": { - "de-indent": "^1.0.2", - "he": "^1.2.0" - } - }, "node_modules/@vue/devtools-api": { "version": "7.7.7", "resolved": "https://mirrors.tencent.com/npm/@vue/devtools-api/-/devtools-api-7.7.7.tgz", @@ -1989,27 +2075,19 @@ } }, "node_modules/@vue/language-core": { - "version": "2.2.12", - "resolved": "https://mirrors.tencent.com/npm/@vue/language-core/-/language-core-2.2.12.tgz", - "integrity": "sha512-IsGljWbKGU1MZpBPN+BvPAdr55YPkj2nB/TBNGNC32Vy2qLG25DYu/NBN2vNtZqdRbTRjaoYrahLrToim2NanA==", + "version": "3.2.5", + "resolved": "https://mirrors.tencent.com/npm/@vue/language-core/-/language-core-3.2.5.tgz", + "integrity": "sha512-d3OIxN/+KRedeM5wQ6H6NIpwS3P5gC9nmyaHgBk+rO6dIsjY+tOh4UlPpiZbAh3YtLdCGEX4M16RmsBqPmJV+g==", "dev": true, + "license": "MIT", "dependencies": { - "@volar/language-core": "2.4.15", + "@volar/language-core": "2.4.28", "@vue/compiler-dom": "^3.5.0", - "@vue/compiler-vue2": "^2.7.16", "@vue/shared": "^3.5.0", - "alien-signals": "^1.0.3", - "minimatch": "^9.0.3", + "alien-signals": "^3.0.0", "muggle-string": "^0.4.1", - "path-browserify": "^1.0.1" - }, - "peerDependencies": { - "typescript": "*" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } + "path-browserify": "^1.0.1", + "picomatch": "^4.0.2" } }, "node_modules/@vue/reactivity": { @@ -2233,7 +2311,6 @@ "version": "8.15.0", "resolved": "https://mirrors.tencent.com/npm/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2254,11 +2331,9 @@ } }, "node_modules/ajv": { - "version": "8.17.1", - "resolved": "https://mirrors.tencent.com/npm/ajv/-/ajv-8.17.1.tgz", - "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", - "license": "MIT", - "peer": true, + "version": "8.18.0", + "resolved": "https://mirrors.tencent.com/npm/ajv/-/ajv-8.18.0.tgz", + "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -2274,6 +2349,7 @@ "version": "2.1.1", "resolved": "https://mirrors.tencent.com/npm/ajv-formats/-/ajv-formats-2.1.1.tgz", "integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==", + "license": "MIT", "dependencies": { "ajv": "^8.0.0" }, @@ -2298,9 +2374,9 @@ } }, "node_modules/alien-signals": { - "version": "1.0.13", - "resolved": "https://mirrors.tencent.com/npm/alien-signals/-/alien-signals-1.0.13.tgz", - "integrity": "sha512-OGj9yyTnJEttvzhTUWuscOvtqxq5vrhF7vL9oS0xJ2mK0ItPYP1/y+vCFebfxoEyAz0++1AIwJ5CMr+Fk3nDmg==", + "version": "3.1.2", + "resolved": "https://mirrors.tencent.com/npm/alien-signals/-/alien-signals-3.1.2.tgz", + "integrity": "sha512-d9dYqZTS90WLiU0I5c6DHj/HcKkF8ZyGN3G5x8wSbslulz70KOxaqCT0hQCo9KOyhVqzqGojvNdJXoTumZOtcw==", "dev": true, "license": "MIT" }, @@ -2319,24 +2395,30 @@ "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://mirrors.tencent.com/npm/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" }, "node_modules/axios": { - "version": "1.13.2", - "resolved": "https://mirrors.tencent.com/npm/axios/-/axios-1.13.2.tgz", - "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", - "license": "MIT", + "version": "1.13.5", + "resolved": "https://mirrors.tencent.com/npm/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.4", + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", "proxy-from-env": "^1.1.0" } }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://mirrors.tencent.com/npm/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true + "node_modules/baseline-browser-mapping": { + "version": "2.10.0", + "resolved": "https://mirrors.tencent.com/npm/baseline-browser-mapping/-/baseline-browser-mapping-2.10.0.tgz", + "integrity": "sha512-lIyg0szRfYbiy67j9KN8IyeD7q7hcmqnJ1ddWmNt19ItGpNN64mnllmxUNFIOdOm6by97jlL6wfpTTJrmnjWAA==", + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.cjs" + }, + "engines": { + "node": ">=6.0.0" + } }, "node_modules/birpc": { "version": "2.5.0", @@ -2347,19 +2429,10 @@ "url": "https://github.com/sponsors/antfu" } }, - "node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://mirrors.tencent.com/npm/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "dependencies": { - "balanced-match": "^1.0.0" - } - }, "node_modules/browserslist": { - "version": "4.25.1", - "resolved": "https://mirrors.tencent.com/npm/browserslist/-/browserslist-4.25.1.tgz", - "integrity": "sha512-KGj0KoOMXLpSNkkEI6Z6mShmQy0bc1I+T7K9N81k4WWMrfz+6fQ6es80B/YLAeRoKvjYE1YSHHOW1qe9xIVzHw==", + "version": "4.28.1", + "resolved": "https://mirrors.tencent.com/npm/browserslist/-/browserslist-4.28.1.tgz", + "integrity": "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==", "funding": [ { "type": "opencollective", @@ -2374,12 +2447,12 @@ "url": "https://github.com/sponsors/ai" } ], - "peer": true, "dependencies": { - "caniuse-lite": "^1.0.30001726", - "electron-to-chromium": "^1.5.173", - "node-releases": "^2.0.19", - "update-browserslist-db": "^1.1.3" + "baseline-browser-mapping": "^2.9.0", + "caniuse-lite": "^1.0.30001759", + "electron-to-chromium": "^1.5.263", + "node-releases": "^2.0.27", + "update-browserslist-db": "^1.2.0" }, "bin": { "browserslist": "cli.js" @@ -2398,6 +2471,7 @@ "version": "1.0.2", "resolved": "https://mirrors.tencent.com/npm/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" @@ -2407,9 +2481,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001727", - "resolved": "https://mirrors.tencent.com/npm/caniuse-lite/-/caniuse-lite-1.0.30001727.tgz", - "integrity": "sha512-pB68nIHmbN6L/4C6MH1DokyR3bYqFwjaSs/sWDHGj4CTcFtQUQMuJftVwWkXq7mNWOybD3KhUv3oWHoGxgP14Q==", + "version": "1.0.30001774", + "resolved": "https://mirrors.tencent.com/npm/caniuse-lite/-/caniuse-lite-1.0.30001774.tgz", + "integrity": "sha512-DDdwPGz99nmIEv216hKSgLD+D4ikHQHjBC/seF98N9CPqRX4M5mSxT9eTV6oyisnJcuzxtZy4n17yKKQYmYQOA==", "funding": [ { "type": "opencollective", @@ -2423,15 +2497,12 @@ "type": "github", "url": "https://github.com/sponsors/ai" } - ], - "license": "CC-BY-4.0" + ] }, "node_modules/chevrotain": { "version": "11.1.2", - "resolved": "https://registry.npmmirror.com/chevrotain/-/chevrotain-11.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/chevrotain/-/chevrotain-11.1.2.tgz", "integrity": "sha512-opLQzEVriiH1uUQ4Kctsd49bRoFDXGGSC4GUqj7pGyxM3RehRhvTlZJc1FL/Flew2p5uwxa1tUDWKzI4wNM8pg==", - "license": "Apache-2.0", - "peer": true, "dependencies": { "@chevrotain/cst-dts-gen": "11.1.2", "@chevrotain/gast": "11.1.2", @@ -2443,7 +2514,7 @@ }, "node_modules/chevrotain-allstar": { "version": "0.3.1", - "resolved": "https://registry.npmmirror.com/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz", "integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==", "license": "MIT", "dependencies": { @@ -2481,7 +2552,7 @@ }, "node_modules/confbox": { "version": "0.1.8", - "resolved": "https://registry.npmmirror.com/confbox/-/confbox-0.1.8.tgz", + "resolved": "https://mirrors.tencent.com/npm/confbox/-/confbox-0.1.8.tgz", "integrity": "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==", "license": "MIT" }, @@ -2504,9 +2575,15 @@ "url": "https://github.com/sponsors/mesqueeb" } }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://mirrors.tencent.com/npm/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==", + "license": "MIT" + }, "node_modules/cose-base": { "version": "1.0.3", - "resolved": "https://registry.npmmirror.com/cose-base/-/cose-base-1.0.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/cose-base/-/cose-base-1.0.3.tgz", "integrity": "sha512-s9whTXInMSgAp/NVXVNuVxVKzGH2qck3aQlVHxDCdAEPgtMKwc4Wq6/QKhgdEdgbLSi9rBTAcPoRa6JpiG4ksg==", "license": "MIT", "dependencies": { @@ -2556,17 +2633,16 @@ }, "node_modules/cytoscape": { "version": "3.33.1", - "resolved": "https://registry.npmmirror.com/cytoscape/-/cytoscape-3.33.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/cytoscape/-/cytoscape-3.33.1.tgz", "integrity": "sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10" } }, "node_modules/cytoscape-cose-bilkent": { "version": "4.1.0", - "resolved": "https://registry.npmmirror.com/cytoscape-cose-bilkent/-/cytoscape-cose-bilkent-4.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/cytoscape-cose-bilkent/-/cytoscape-cose-bilkent-4.1.0.tgz", "integrity": "sha512-wgQlVIUJF13Quxiv5e1gstZ08rnZj2XaLHGoFMYXz7SkNfCDOOteKBE6SYRfA9WxxI/iBc3ajfDoc6hb/MRAHQ==", "license": "MIT", "dependencies": { @@ -2578,7 +2654,7 @@ }, "node_modules/cytoscape-fcose": { "version": "2.2.0", - "resolved": "https://registry.npmmirror.com/cytoscape-fcose/-/cytoscape-fcose-2.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/cytoscape-fcose/-/cytoscape-fcose-2.2.0.tgz", "integrity": "sha512-ki1/VuRIHFCzxWNrsshHYPs6L7TvLu3DL+TyIGEsRcvVERmxokbf5Gdk7mFxZnTdiGtnA4cfSmjZJMviqSuZrQ==", "license": "MIT", "dependencies": { @@ -2590,7 +2666,7 @@ }, "node_modules/cytoscape-fcose/node_modules/cose-base": { "version": "2.2.0", - "resolved": "https://registry.npmmirror.com/cose-base/-/cose-base-2.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/cose-base/-/cose-base-2.2.0.tgz", "integrity": "sha512-AzlgcsCbUMymkADOJtQm3wO9S3ltPfYOFD5033keQn9NJzIbtnZj+UdBJe7DYml/8TdbtHJW3j58SOnKhWY/5g==", "license": "MIT", "dependencies": { @@ -2599,13 +2675,13 @@ }, "node_modules/cytoscape-fcose/node_modules/layout-base": { "version": "2.0.1", - "resolved": "https://registry.npmmirror.com/layout-base/-/layout-base-2.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/layout-base/-/layout-base-2.0.1.tgz", "integrity": "sha512-dp3s92+uNI1hWIpPGH3jK2kxE2lMjdXdr+DH8ynZHpd6PUlH6x6cbuXnoMmiNumznqaNO31xu9e79F0uuZ0JFg==", "license": "MIT" }, "node_modules/d3": { "version": "7.9.0", - "resolved": "https://registry.npmmirror.com/d3/-/d3-7.9.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3/-/d3-7.9.0.tgz", "integrity": "sha512-e1U46jVP+w7Iut8Jt8ri1YsPOvFpg46k+K8TpCb0P+zjCkjkPnV7WzfDJzMHy1LnA+wj5pLT1wjO901gLXeEhA==", "license": "ISC", "dependencies": { @@ -2646,7 +2722,7 @@ }, "node_modules/d3-array": { "version": "3.2.4", - "resolved": "https://registry.npmmirror.com/d3-array/-/d3-array-3.2.4.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-array/-/d3-array-3.2.4.tgz", "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", "license": "ISC", "dependencies": { @@ -2658,7 +2734,7 @@ }, "node_modules/d3-axis": { "version": "3.0.0", - "resolved": "https://registry.npmmirror.com/d3-axis/-/d3-axis-3.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-axis/-/d3-axis-3.0.0.tgz", "integrity": "sha512-IH5tgjV4jE/GhHkRV0HiVYPDtvfjHQlQfJHs0usq7M30XcSBvOotpmH1IgkcXsO/5gEQZD43B//fc7SRT5S+xw==", "license": "ISC", "engines": { @@ -2667,7 +2743,7 @@ }, "node_modules/d3-brush": { "version": "3.0.0", - "resolved": "https://registry.npmmirror.com/d3-brush/-/d3-brush-3.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-brush/-/d3-brush-3.0.0.tgz", "integrity": "sha512-ALnjWlVYkXsVIGlOsuWH1+3udkYFI48Ljihfnh8FZPF2QS9o+PzGLBslO0PjzVoHLZ2KCVgAM8NVkXPJB2aNnQ==", "license": "ISC", "dependencies": { @@ -2683,7 +2759,7 @@ }, "node_modules/d3-chord": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-chord/-/d3-chord-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-chord/-/d3-chord-3.0.1.tgz", "integrity": "sha512-VE5S6TNa+j8msksl7HwjxMHDM2yNK3XCkusIlpX5kwauBfXuyLAtNg9jCp/iHH61tgI4sb6R/EIMWCqEIdjT/g==", "license": "ISC", "dependencies": { @@ -2695,7 +2771,7 @@ }, "node_modules/d3-color": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/d3-color/-/d3-color-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-color/-/d3-color-3.1.0.tgz", "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", "license": "ISC", "engines": { @@ -2704,7 +2780,7 @@ }, "node_modules/d3-contour": { "version": "4.0.2", - "resolved": "https://registry.npmmirror.com/d3-contour/-/d3-contour-4.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-contour/-/d3-contour-4.0.2.tgz", "integrity": "sha512-4EzFTRIikzs47RGmdxbeUvLWtGedDUNkTcmzoeyg4sP/dvCexO47AaQL7VKy/gul85TOxw+IBgA8US2xwbToNA==", "license": "ISC", "dependencies": { @@ -2716,7 +2792,7 @@ }, "node_modules/d3-delaunay": { "version": "6.0.4", - "resolved": "https://registry.npmmirror.com/d3-delaunay/-/d3-delaunay-6.0.4.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-delaunay/-/d3-delaunay-6.0.4.tgz", "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==", "license": "ISC", "dependencies": { @@ -2728,7 +2804,7 @@ }, "node_modules/d3-dispatch": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-dispatch/-/d3-dispatch-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-dispatch/-/d3-dispatch-3.0.1.tgz", "integrity": "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==", "license": "ISC", "engines": { @@ -2737,7 +2813,7 @@ }, "node_modules/d3-drag": { "version": "3.0.0", - "resolved": "https://registry.npmmirror.com/d3-drag/-/d3-drag-3.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-drag/-/d3-drag-3.0.0.tgz", "integrity": "sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==", "license": "ISC", "dependencies": { @@ -2750,7 +2826,7 @@ }, "node_modules/d3-dsv": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-dsv/-/d3-dsv-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-dsv/-/d3-dsv-3.0.1.tgz", "integrity": "sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q==", "license": "ISC", "dependencies": { @@ -2775,7 +2851,7 @@ }, "node_modules/d3-dsv/node_modules/commander": { "version": "7.2.0", - "resolved": "https://registry.npmmirror.com/commander/-/commander-7.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/commander/-/commander-7.2.0.tgz", "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==", "license": "MIT", "engines": { @@ -2784,7 +2860,7 @@ }, "node_modules/d3-ease": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-ease/-/d3-ease-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-ease/-/d3-ease-3.0.1.tgz", "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", "license": "BSD-3-Clause", "engines": { @@ -2793,7 +2869,7 @@ }, "node_modules/d3-fetch": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-fetch/-/d3-fetch-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-fetch/-/d3-fetch-3.0.1.tgz", "integrity": "sha512-kpkQIM20n3oLVBKGg6oHrUchHM3xODkTzjMoj7aWQFq5QEM+R6E4WkzT5+tojDY7yjez8KgCBRoj4aEr99Fdqw==", "license": "ISC", "dependencies": { @@ -2805,7 +2881,7 @@ }, "node_modules/d3-force": { "version": "3.0.0", - "resolved": "https://registry.npmmirror.com/d3-force/-/d3-force-3.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-force/-/d3-force-3.0.0.tgz", "integrity": "sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg==", "license": "ISC", "dependencies": { @@ -2819,7 +2895,7 @@ }, "node_modules/d3-format": { "version": "3.1.2", - "resolved": "https://registry.npmmirror.com/d3-format/-/d3-format-3.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-format/-/d3-format-3.1.2.tgz", "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==", "license": "ISC", "engines": { @@ -2828,7 +2904,7 @@ }, "node_modules/d3-geo": { "version": "3.1.1", - "resolved": "https://registry.npmmirror.com/d3-geo/-/d3-geo-3.1.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-geo/-/d3-geo-3.1.1.tgz", "integrity": "sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==", "license": "ISC", "dependencies": { @@ -2840,7 +2916,7 @@ }, "node_modules/d3-hierarchy": { "version": "3.1.2", - "resolved": "https://registry.npmmirror.com/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz", "integrity": "sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA==", "license": "ISC", "engines": { @@ -2849,7 +2925,7 @@ }, "node_modules/d3-interpolate": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-interpolate/-/d3-interpolate-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-interpolate/-/d3-interpolate-3.0.1.tgz", "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", "license": "ISC", "dependencies": { @@ -2861,7 +2937,7 @@ }, "node_modules/d3-path": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/d3-path/-/d3-path-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-path/-/d3-path-3.1.0.tgz", "integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==", "license": "ISC", "engines": { @@ -2870,7 +2946,7 @@ }, "node_modules/d3-polygon": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-polygon/-/d3-polygon-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-polygon/-/d3-polygon-3.0.1.tgz", "integrity": "sha512-3vbA7vXYwfe1SYhED++fPUQlWSYTTGmFmQiany/gdbiWgU/iEyQzyymwL9SkJjFFuCS4902BSzewVGsHHmHtXg==", "license": "ISC", "engines": { @@ -2879,7 +2955,7 @@ }, "node_modules/d3-quadtree": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-quadtree/-/d3-quadtree-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-quadtree/-/d3-quadtree-3.0.1.tgz", "integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==", "license": "ISC", "engines": { @@ -2888,7 +2964,7 @@ }, "node_modules/d3-random": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-random/-/d3-random-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-random/-/d3-random-3.0.1.tgz", "integrity": "sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ==", "license": "ISC", "engines": { @@ -2897,7 +2973,7 @@ }, "node_modules/d3-sankey": { "version": "0.12.3", - "resolved": "https://registry.npmmirror.com/d3-sankey/-/d3-sankey-0.12.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-sankey/-/d3-sankey-0.12.3.tgz", "integrity": "sha512-nQhsBRmM19Ax5xEIPLMY9ZmJ/cDvd1BG3UVvt5h3WRxKg5zGRbvnteTyWAbzeSvlh3tW7ZEmq4VwR5mB3tutmQ==", "license": "BSD-3-Clause", "dependencies": { @@ -2907,7 +2983,7 @@ }, "node_modules/d3-sankey/node_modules/d3-array": { "version": "2.12.1", - "resolved": "https://registry.npmmirror.com/d3-array/-/d3-array-2.12.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-array/-/d3-array-2.12.1.tgz", "integrity": "sha512-B0ErZK/66mHtEsR1TkPEEkwdy+WDesimkM5gpZr5Dsg54BiTA5RXtYW5qTLIAcekaS9xfZrzBLF/OAkB3Qn1YQ==", "license": "BSD-3-Clause", "dependencies": { @@ -2916,13 +2992,13 @@ }, "node_modules/d3-sankey/node_modules/d3-path": { "version": "1.0.9", - "resolved": "https://registry.npmmirror.com/d3-path/-/d3-path-1.0.9.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-path/-/d3-path-1.0.9.tgz", "integrity": "sha512-VLaYcn81dtHVTjEHd8B+pbe9yHWpXKZUC87PzoFmsFrJqgFwDe/qxfp5MlfsfM1V5E/iVt0MmEbWQ7FVIXh/bg==", "license": "BSD-3-Clause" }, "node_modules/d3-sankey/node_modules/d3-shape": { "version": "1.3.7", - "resolved": "https://registry.npmmirror.com/d3-shape/-/d3-shape-1.3.7.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-shape/-/d3-shape-1.3.7.tgz", "integrity": "sha512-EUkvKjqPFUAZyOlhY5gzCxCeI0Aep04LwIRpsZ/mLFelJiUfnK56jo5JMDSE7yyP2kLSb6LtF+S5chMk7uqPqw==", "license": "BSD-3-Clause", "dependencies": { @@ -2931,13 +3007,13 @@ }, "node_modules/d3-sankey/node_modules/internmap": { "version": "1.0.1", - "resolved": "https://registry.npmmirror.com/internmap/-/internmap-1.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/internmap/-/internmap-1.0.1.tgz", "integrity": "sha512-lDB5YccMydFBtasVtxnZ3MRBHuaoE8GKsppq+EchKL2U4nK/DmEpPHNH8MZe5HkMtpSiTSOZwfN0tzYjO/lJEw==", "license": "ISC" }, "node_modules/d3-scale": { "version": "4.0.2", - "resolved": "https://registry.npmmirror.com/d3-scale/-/d3-scale-4.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-scale/-/d3-scale-4.0.2.tgz", "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", "license": "ISC", "dependencies": { @@ -2953,7 +3029,7 @@ }, "node_modules/d3-scale-chromatic": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", "integrity": "sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==", "license": "ISC", "dependencies": { @@ -2966,17 +3042,16 @@ }, "node_modules/d3-selection": { "version": "3.0.0", - "resolved": "https://registry.npmmirror.com/d3-selection/-/d3-selection-3.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } }, "node_modules/d3-shape": { "version": "3.2.0", - "resolved": "https://registry.npmmirror.com/d3-shape/-/d3-shape-3.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-shape/-/d3-shape-3.2.0.tgz", "integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==", "license": "ISC", "dependencies": { @@ -2988,7 +3063,7 @@ }, "node_modules/d3-time": { "version": "3.1.0", - "resolved": "https://registry.npmmirror.com/d3-time/-/d3-time-3.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-time/-/d3-time-3.1.0.tgz", "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", "license": "ISC", "dependencies": { @@ -3000,7 +3075,7 @@ }, "node_modules/d3-time-format": { "version": "4.1.0", - "resolved": "https://registry.npmmirror.com/d3-time-format/-/d3-time-format-4.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-time-format/-/d3-time-format-4.1.0.tgz", "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", "license": "ISC", "dependencies": { @@ -3012,7 +3087,7 @@ }, "node_modules/d3-timer": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-timer/-/d3-timer-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-timer/-/d3-timer-3.0.1.tgz", "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", "license": "ISC", "engines": { @@ -3021,7 +3096,7 @@ }, "node_modules/d3-transition": { "version": "3.0.1", - "resolved": "https://registry.npmmirror.com/d3-transition/-/d3-transition-3.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-transition/-/d3-transition-3.0.1.tgz", "integrity": "sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==", "license": "ISC", "dependencies": { @@ -3040,7 +3115,7 @@ }, "node_modules/d3-zoom": { "version": "3.0.0", - "resolved": "https://registry.npmmirror.com/d3-zoom/-/d3-zoom-3.0.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/d3-zoom/-/d3-zoom-3.0.0.tgz", "integrity": "sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==", "license": "ISC", "dependencies": { @@ -3056,7 +3131,7 @@ }, "node_modules/dagre-d3-es": { "version": "7.0.13", - "resolved": "https://registry.npmmirror.com/dagre-d3-es/-/dagre-d3-es-7.0.13.tgz", + "resolved": "https://mirrors.tencent.com/npm/dagre-d3-es/-/dagre-d3-es-7.0.13.tgz", "integrity": "sha512-efEhnxpSuwpYOKRm/L5KbqoZmNNukHa/Flty4Wp62JRvgH2ojwVgPgdYyr4twpieZnyRDdIH7PY2mopX26+j2Q==", "license": "MIT", "dependencies": { @@ -3066,16 +3141,10 @@ }, "node_modules/dayjs": { "version": "1.11.19", - "resolved": "https://registry.npmmirror.com/dayjs/-/dayjs-1.11.19.tgz", + "resolved": "https://mirrors.tencent.com/npm/dayjs/-/dayjs-1.11.19.tgz", "integrity": "sha512-t5EcLVS6QPBNqM2z8fakk/NKel+Xzshgt8FFKAn+qwlD1pzZWxh0nVCrvFK7ZDb6XucZeF9z8C7CBWTRIVApAw==", "license": "MIT" }, - "node_modules/de-indent": { - "version": "1.0.2", - "resolved": "https://mirrors.tencent.com/npm/de-indent/-/de-indent-1.0.2.tgz", - "integrity": "sha512-e/1zu3xH5MQryN2zdVaF0OrdNLUbvWxzMbi+iNA6Bky7l1RoP8a2fIbRocyHclXt/arDrrR6lL3TqFD9pMQTsg==", - "dev": true - }, "node_modules/debug": { "version": "4.4.1", "resolved": "https://mirrors.tencent.com/npm/debug/-/debug-4.4.1.tgz", @@ -3095,7 +3164,7 @@ }, "node_modules/delaunator": { "version": "5.0.1", - "resolved": "https://registry.npmmirror.com/delaunator/-/delaunator-5.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/delaunator/-/delaunator-5.0.1.tgz", "integrity": "sha512-8nvh+XBe96aCESrGOqMp/84b13H9cdKbG5P2ejQCh4d4sK9RL4371qou9drQjMhvnPmhWl5hnmqbEE0fXr9Xnw==", "license": "ISC", "dependencies": { @@ -3111,6 +3180,15 @@ "node": ">=0.4.0" } }, + "node_modules/docx-preview": { + "version": "0.3.7", + "resolved": "https://mirrors.tencent.com/npm/docx-preview/-/docx-preview-0.3.7.tgz", + "integrity": "sha512-Lav69CTA/IYZPJTsKH7oYeoZjyg96N0wEJMNslGJnZJ+dMUZK85Lt5ASC79yUlD48ecWjuv+rkcmFt6EVPV0Xg==", + "license": "Apache-2.0", + "dependencies": { + "jszip": ">=3.0.0" + } + }, "node_modules/dompurify": { "version": "3.2.6", "resolved": "https://mirrors.tencent.com/npm/dompurify/-/dompurify-3.2.6.tgz", @@ -3124,7 +3202,6 @@ "version": "1.0.1", "resolved": "https://mirrors.tencent.com/npm/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", @@ -3135,17 +3212,19 @@ } }, "node_modules/electron-to-chromium": { - "version": "1.5.183", - "resolved": "https://mirrors.tencent.com/npm/electron-to-chromium/-/electron-to-chromium-1.5.183.tgz", - "integrity": "sha512-vCrDBYjQCAEefWGjlK3EpoSKfKbT10pR4XXPdn65q7snuNOZnthoVpBfZPykmDapOKfoD+MMIPG8ZjKyyc9oHA==" + "version": "1.5.302", + "resolved": "https://mirrors.tencent.com/npm/electron-to-chromium/-/electron-to-chromium-1.5.302.tgz", + "integrity": "sha512-sM6HAN2LyK82IyPBpznDRqlTQAtuSaO+ShzFiWTvoMJLHyZ+Y39r8VMfHzwbU8MVBzQ4Wdn85+wlZl2TLGIlwg==", + "license": "ISC" }, "node_modules/enhanced-resolve": { - "version": "5.18.2", - "resolved": "https://mirrors.tencent.com/npm/enhanced-resolve/-/enhanced-resolve-5.18.2.tgz", - "integrity": "sha512-6Jw4sE1maoRJo3q8MsSIn2onJFbLTOjY9hlx4DZXmOKvLRd1Ok2kXmAGXaafL2+ijsJZ1ClYbl/pmqr9+k4iUQ==", + "version": "5.19.0", + "resolved": "https://mirrors.tencent.com/npm/enhanced-resolve/-/enhanced-resolve-5.19.0.tgz", + "integrity": "sha512-phv3E1Xl4tQOShqSte26C7Fl84EwUdZsyOuSSk9qtAGyyQs2s3jJzComh+Abf4g187lUUAvH+H26omrqia2aGg==", + "license": "MIT", "dependencies": { "graceful-fs": "^4.2.4", - "tapable": "^2.2.0" + "tapable": "^2.3.0" }, "engines": { "node": ">=10.13.0" @@ -3189,14 +3268,16 @@ "version": "1.3.0", "resolved": "https://mirrors.tencent.com/npm/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", "engines": { "node": ">= 0.4" } }, "node_modules/es-module-lexer": { - "version": "1.7.0", - "resolved": "https://mirrors.tencent.com/npm/es-module-lexer/-/es-module-lexer-1.7.0.tgz", - "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==" + "version": "2.0.0", + "resolved": "https://mirrors.tencent.com/npm/es-module-lexer/-/es-module-lexer-2.0.0.tgz", + "integrity": "sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==", + "license": "MIT" }, "node_modules/es-object-atoms": { "version": "1.1.1", @@ -3334,9 +3415,9 @@ "license": "MIT" }, "node_modules/fast-uri": { - "version": "3.0.6", - "resolved": "https://mirrors.tencent.com/npm/fast-uri/-/fast-uri-3.0.6.tgz", - "integrity": "sha512-Atfo14OibSv5wAp4VWNsFYE1AchQRTv9cBGWET4pZWHzYshFSS9NQI6I57rdKn9croWVMbYFbLhJ+yJvmZIIHw==", + "version": "3.1.0", + "resolved": "https://mirrors.tencent.com/npm/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", "funding": [ { "type": "github", @@ -3368,16 +3449,15 @@ } }, "node_modules/follow-redirects": { - "version": "1.15.9", - "resolved": "https://mirrors.tencent.com/npm/follow-redirects/-/follow-redirects-1.15.9.tgz", - "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", + "version": "1.15.11", + "resolved": "https://mirrors.tencent.com/npm/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", "funding": [ { "type": "individual", "url": "https://github.com/sponsors/RubenVerborgh" } ], - "license": "MIT", "engines": { "node": ">=4.0" }, @@ -3388,9 +3468,9 @@ } }, "node_modules/form-data": { - "version": "4.0.4", - "resolved": "https://mirrors.tencent.com/npm/form-data/-/form-data-4.0.4.tgz", - "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", + "version": "4.0.5", + "resolved": "https://mirrors.tencent.com/npm/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", @@ -3438,7 +3518,6 @@ "version": "1.3.0", "resolved": "https://mirrors.tencent.com/npm/get-intrinsic/-/get-intrinsic-1.3.0.tgz", "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", @@ -3481,6 +3560,7 @@ "version": "1.2.0", "resolved": "https://mirrors.tencent.com/npm/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -3496,7 +3576,7 @@ }, "node_modules/hachure-fill": { "version": "0.5.2", - "resolved": "https://registry.npmmirror.com/hachure-fill/-/hachure-fill-0.5.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/hachure-fill/-/hachure-fill-0.5.2.tgz", "integrity": "sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==", "license": "MIT" }, @@ -3513,7 +3593,6 @@ "version": "1.1.0", "resolved": "https://mirrors.tencent.com/npm/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -3547,16 +3626,6 @@ "node": ">= 0.4" } }, - "node_modules/he": { - "version": "1.2.0", - "resolved": "https://mirrors.tencent.com/npm/he/-/he-1.2.0.tgz", - "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", - "dev": true, - "license": "MIT", - "bin": { - "he": "bin/he" - } - }, "node_modules/highlight.js": { "version": "11.11.1", "resolved": "https://registry.npmmirror.com/highlight.js/-/highlight.js-11.11.1.tgz", @@ -3596,9 +3665,21 @@ "node": ">=0.10.0" } }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://mirrors.tencent.com/npm/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==", + "license": "MIT" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://mirrors.tencent.com/npm/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, "node_modules/internmap": { "version": "2.0.3", - "resolved": "https://registry.npmmirror.com/internmap/-/internmap-2.0.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/internmap/-/internmap-2.0.3.tgz", "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", "license": "ISC", "engines": { @@ -3612,6 +3693,12 @@ "dev": true, "license": "MIT" }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://mirrors.tencent.com/npm/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", + "license": "MIT" + }, "node_modules/isexe": { "version": "3.1.1", "resolved": "https://mirrors.tencent.com/npm/isexe/-/isexe-3.1.1.tgz", @@ -3625,6 +3712,7 @@ "version": "27.5.1", "resolved": "https://mirrors.tencent.com/npm/jest-worker/-/jest-worker-27.5.1.tgz", "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==", + "license": "MIT", "dependencies": { "@types/node": "*", "merge-stream": "^2.0.0", @@ -3680,9 +3768,21 @@ "node": ">=6" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://mirrors.tencent.com/npm/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "license": "(MIT OR GPL-3.0-or-later)", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, "node_modules/katex": { "version": "0.16.33", - "resolved": "https://registry.npmmirror.com/katex/-/katex-0.16.33.tgz", + "resolved": "https://mirrors.tencent.com/npm/katex/-/katex-0.16.33.tgz", "integrity": "sha512-q3N5u+1sY9Bu7T4nlXoiRBXWfwSefNGoKeOwekV+gw0cAXQlz2Ww6BLcmBxVDeXBMUDQv6fK5bcNaJLxob3ZQA==", "funding": [ "https://opencollective.com/katex", @@ -3698,7 +3798,7 @@ }, "node_modules/katex/node_modules/commander": { "version": "8.3.0", - "resolved": "https://registry.npmmirror.com/commander/-/commander-8.3.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/commander/-/commander-8.3.0.tgz", "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==", "license": "MIT", "engines": { @@ -3707,12 +3807,12 @@ }, "node_modules/khroma": { "version": "2.1.0", - "resolved": "https://registry.npmmirror.com/khroma/-/khroma-2.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/khroma/-/khroma-2.1.0.tgz", "integrity": "sha512-Ls993zuzfayK269Svk9hzpeGUKob/sIgZzyHYdjQoAdQetRKpOLj+k/QQQ/6Qi0Yz65mlROrfd+Ev+1+7dz9Kw==" }, "node_modules/langium": { "version": "4.2.1", - "resolved": "https://registry.npmmirror.com/langium/-/langium-4.2.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/langium/-/langium-4.2.1.tgz", "integrity": "sha512-zu9QWmjpzJcomzdJQAHgDVhLGq5bLosVak1KVa40NzQHXfqr4eAHupvnPOVXEoLkg6Ocefvf/93d//SB7du4YQ==", "license": "MIT", "dependencies": { @@ -3729,7 +3829,7 @@ }, "node_modules/layout-base": { "version": "1.0.2", - "resolved": "https://registry.npmmirror.com/layout-base/-/layout-base-1.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/layout-base/-/layout-base-1.0.2.tgz", "integrity": "sha512-8h2oVEZNktL4BH2JCOI90iD1yXwL6iNW7KcCKT2QZgQJR2vbqDsldCTPRU9NifTCqHZci57XvQQ15YTu+sTYPg==", "license": "MIT" }, @@ -3738,7 +3838,6 @@ "resolved": "https://mirrors.tencent.com/npm/less/-/less-4.3.0.tgz", "integrity": "sha512-X9RyH9fvemArzfdP8Pi3irr7lor2Ok4rOttDXBhlwDg+wKQsXOXgHWduAJE1EsF7JJx0w0bcO6BC6tCKKYnXKA==", "dev": true, - "peer": true, "dependencies": { "copy-anything": "^2.0.1", "parse-node-version": "^1.0.1", @@ -3786,20 +3885,32 @@ } } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://mirrors.tencent.com/npm/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "license": "MIT", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/loader-runner": { - "version": "4.3.0", - "resolved": "https://mirrors.tencent.com/npm/loader-runner/-/loader-runner-4.3.0.tgz", - "integrity": "sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==", + "version": "4.3.1", + "resolved": "https://mirrors.tencent.com/npm/loader-runner/-/loader-runner-4.3.1.tgz", + "integrity": "sha512-IWqP2SCPhyVFTBtRcgMHdzlf9ul25NwaFx4wCEH/KjAXuuHY4yNjvPXsBokp8jCB936PyWRaPKUNh8NvylLp2Q==", "license": "MIT", "engines": { "node": ">=6.11.5" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" } }, "node_modules/lodash-es": { "version": "4.17.23", - "resolved": "https://registry.npmmirror.com/lodash-es/-/lodash-es-4.17.23.tgz", - "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==", - "license": "MIT" + "resolved": "https://mirrors.tencent.com/npm/lodash-es/-/lodash-es-4.17.23.tgz", + "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==" }, "node_modules/lru-cache": { "version": "5.1.1", @@ -3882,7 +3993,7 @@ }, "node_modules/mermaid": { "version": "11.12.3", - "resolved": "https://registry.npmmirror.com/mermaid/-/mermaid-11.12.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/mermaid/-/mermaid-11.12.3.tgz", "integrity": "sha512-wN5ZSgJQIC+CHJut9xaKWsknLxaFBwCPwPkGTSUYrTiHORWvpT8RxGk849HPnpUAQ+/9BPRqYb80jTpearrHzQ==", "license": "MIT", "dependencies": { @@ -3910,7 +4021,7 @@ }, "node_modules/mermaid/node_modules/marked": { "version": "16.4.2", - "resolved": "https://registry.npmmirror.com/marked/-/marked-16.4.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/marked/-/marked-16.4.2.tgz", "integrity": "sha512-TI3V8YYWvkVf3KJe1dRkpnjs68JUPyEa5vjKrp1XEEJUAOaQc+Qj+L1qWbPd0SJuAdQkFU0h73sXXqwDYxsiDA==", "license": "MIT", "bin": { @@ -3954,21 +4065,6 @@ "node": ">= 0.6" } }, - "node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://mirrors.tencent.com/npm/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/mitt": { "version": "3.0.1", "resolved": "https://mirrors.tencent.com/npm/mitt/-/mitt-3.0.1.tgz", @@ -3976,7 +4072,7 @@ }, "node_modules/mlly": { "version": "1.8.0", - "resolved": "https://registry.npmmirror.com/mlly/-/mlly-1.8.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/mlly/-/mlly-1.8.0.tgz", "integrity": "sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==", "license": "MIT", "dependencies": { @@ -4039,9 +4135,9 @@ "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, "node_modules/node-releases": { - "version": "2.0.19", - "resolved": "https://mirrors.tencent.com/npm/node-releases/-/node-releases-2.0.19.tgz", - "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==", + "version": "2.0.27", + "resolved": "https://mirrors.tencent.com/npm/node-releases/-/node-releases-2.0.27.tgz", + "integrity": "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==", "license": "MIT" }, "node_modules/npm-normalize-package-bin": { @@ -4055,16 +4151,16 @@ } }, "node_modules/npm-run-all2": { - "version": "7.0.2", - "resolved": "https://mirrors.tencent.com/npm/npm-run-all2/-/npm-run-all2-7.0.2.tgz", - "integrity": "sha512-7tXR+r9hzRNOPNTvXegM+QzCuMjzUIIq66VDunL6j60O4RrExx32XUhlrS7UK4VcdGw5/Wxzb3kfNcFix9JKDA==", + "version": "8.0.4", + "resolved": "https://mirrors.tencent.com/npm/npm-run-all2/-/npm-run-all2-8.0.4.tgz", + "integrity": "sha512-wdbB5My48XKp2ZfJUlhnLVihzeuA1hgBnqB2J9ahV77wLS+/YAJAlN8I+X3DIFIPZ3m5L7nplmlbhNiFDmXRDA==", "dev": true, "license": "MIT", "dependencies": { "ansi-styles": "^6.2.1", "cross-spawn": "^7.0.6", "memorystream": "^0.3.1", - "minimatch": "^9.0.0", + "picomatch": "^4.0.2", "pidtree": "^0.6.0", "read-package-json-fast": "^4.0.0", "shell-quote": "^1.7.3", @@ -4077,13 +4173,13 @@ "run-s": "bin/run-s/index.js" }, "engines": { - "node": "^18.17.0 || >=20.5.0", - "npm": ">= 9" + "node": "^20.5.0 || >=22.0.0", + "npm": ">= 10" } }, "node_modules/package-manager-detector": { "version": "1.6.0", - "resolved": "https://registry.npmmirror.com/package-manager-detector/-/package-manager-detector-1.6.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/package-manager-detector/-/package-manager-detector-1.6.0.tgz", "integrity": "sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==", "license": "MIT" }, @@ -4103,6 +4199,12 @@ "@pagefind/windows-x64": "1.3.0" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://mirrors.tencent.com/npm/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "license": "(MIT AND Zlib)" + }, "node_modules/papaparse": { "version": "5.5.3", "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.5.3.tgz", @@ -4123,11 +4225,12 @@ "version": "1.0.1", "resolved": "https://mirrors.tencent.com/npm/path-browserify/-/path-browserify-1.0.1.tgz", "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/path-data-parser": { "version": "0.1.0", - "resolved": "https://registry.npmmirror.com/path-data-parser/-/path-data-parser-0.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/path-data-parser/-/path-data-parser-0.1.0.tgz", "integrity": "sha512-NOnmBpt5Y2RWbuv0LMzsayp3lVylAHLPUTut412ZA3l+C4uw4ZVkQbjShYCQ8TCpUMdPapr4YjUqLYD6v68j+w==", "license": "MIT" }, @@ -4142,7 +4245,7 @@ }, "node_modules/pathe": { "version": "2.0.3", - "resolved": "https://registry.npmmirror.com/pathe/-/pathe-2.0.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/pathe/-/pathe-2.0.3.tgz", "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", "license": "MIT" }, @@ -4162,7 +4265,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -4216,7 +4318,7 @@ }, "node_modules/pkg-types": { "version": "1.3.1", - "resolved": "https://registry.npmmirror.com/pkg-types/-/pkg-types-1.3.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/pkg-types/-/pkg-types-1.3.1.tgz", "integrity": "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==", "license": "MIT", "dependencies": { @@ -4227,13 +4329,13 @@ }, "node_modules/points-on-curve": { "version": "0.2.0", - "resolved": "https://registry.npmmirror.com/points-on-curve/-/points-on-curve-0.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/points-on-curve/-/points-on-curve-0.2.0.tgz", "integrity": "sha512-0mYKnYYe9ZcqMCWhUjItv/oHjvgEsfKvnUTg8sAtnHr3GVy7rGkXCb6d5cSyqrWqL4k81b9CPg3urd+T7aop3A==", "license": "MIT" }, "node_modules/points-on-path": { "version": "0.2.1", - "resolved": "https://registry.npmmirror.com/points-on-path/-/points-on-path-0.2.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/points-on-path/-/points-on-path-0.2.1.tgz", "integrity": "sha512-25ClnWWuw7JbWZcgqY/gJ4FQWadKxGWk+3kR/7kD0tCaDtPPMj7oHu2ToLaVhfpnHrZzYby2w6tUA0eOIuUg8g==", "license": "MIT", "dependencies": { @@ -4268,6 +4370,12 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://mirrors.tencent.com/npm/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", + "license": "MIT" + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://mirrors.tencent.com/npm/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -4302,10 +4410,32 @@ "node": "^18.17.0 || >=20.5.0" } }, + "node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://mirrors.tencent.com/npm/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/readable-stream/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://mirrors.tencent.com/npm/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, "node_modules/require-from-string": { "version": "2.0.2", "resolved": "https://mirrors.tencent.com/npm/require-from-string/-/require-from-string-2.0.2.tgz", "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", "engines": { "node": ">=0.10.0" } @@ -4318,16 +4448,15 @@ }, "node_modules/robust-predicates": { "version": "3.0.2", - "resolved": "https://registry.npmmirror.com/robust-predicates/-/robust-predicates-3.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/robust-predicates/-/robust-predicates-3.0.2.tgz", "integrity": "sha512-IXgzBWvWQwE6PrDI05OvmXUIruQTcoMDzRsOd5CDvHCVLcLHMTSYvOK5Cm46kWqlV3yAbuSpBZdJ5oP5OUoStg==", "license": "Unlicense" }, "node_modules/rollup": { - "version": "4.45.0", - "resolved": "https://mirrors.tencent.com/npm/rollup/-/rollup-4.45.0.tgz", - "integrity": "sha512-WLjEcJRIo7i3WDDgOIJqVI2d+lAC3EwvOGy+Xfq6hs+GQuAA4Di/H72xmXkOhrIWFg2PFYSKZYfH0f4vfKXN4A==", + "version": "4.59.0", + "resolved": "https://mirrors.tencent.com/npm/rollup/-/rollup-4.59.0.tgz", + "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==", "dev": true, - "license": "MIT", "dependencies": { "@types/estree": "1.0.8" }, @@ -4339,32 +4468,37 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.45.0", - "@rollup/rollup-android-arm64": "4.45.0", - "@rollup/rollup-darwin-arm64": "4.45.0", - "@rollup/rollup-darwin-x64": "4.45.0", - "@rollup/rollup-freebsd-arm64": "4.45.0", - "@rollup/rollup-freebsd-x64": "4.45.0", - "@rollup/rollup-linux-arm-gnueabihf": "4.45.0", - "@rollup/rollup-linux-arm-musleabihf": "4.45.0", - "@rollup/rollup-linux-arm64-gnu": "4.45.0", - "@rollup/rollup-linux-arm64-musl": "4.45.0", - "@rollup/rollup-linux-loongarch64-gnu": "4.45.0", - "@rollup/rollup-linux-powerpc64le-gnu": "4.45.0", - "@rollup/rollup-linux-riscv64-gnu": "4.45.0", - "@rollup/rollup-linux-riscv64-musl": "4.45.0", - "@rollup/rollup-linux-s390x-gnu": "4.45.0", - "@rollup/rollup-linux-x64-gnu": "4.45.0", - "@rollup/rollup-linux-x64-musl": "4.45.0", - "@rollup/rollup-win32-arm64-msvc": "4.45.0", - "@rollup/rollup-win32-ia32-msvc": "4.45.0", - "@rollup/rollup-win32-x64-msvc": "4.45.0", + "@rollup/rollup-android-arm-eabi": "4.59.0", + "@rollup/rollup-android-arm64": "4.59.0", + "@rollup/rollup-darwin-arm64": "4.59.0", + "@rollup/rollup-darwin-x64": "4.59.0", + "@rollup/rollup-freebsd-arm64": "4.59.0", + "@rollup/rollup-freebsd-x64": "4.59.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", + "@rollup/rollup-linux-arm-musleabihf": "4.59.0", + "@rollup/rollup-linux-arm64-gnu": "4.59.0", + "@rollup/rollup-linux-arm64-musl": "4.59.0", + "@rollup/rollup-linux-loong64-gnu": "4.59.0", + "@rollup/rollup-linux-loong64-musl": "4.59.0", + "@rollup/rollup-linux-ppc64-gnu": "4.59.0", + "@rollup/rollup-linux-ppc64-musl": "4.59.0", + "@rollup/rollup-linux-riscv64-gnu": "4.59.0", + "@rollup/rollup-linux-riscv64-musl": "4.59.0", + "@rollup/rollup-linux-s390x-gnu": "4.59.0", + "@rollup/rollup-linux-x64-gnu": "4.59.0", + "@rollup/rollup-linux-x64-musl": "4.59.0", + "@rollup/rollup-openbsd-x64": "4.59.0", + "@rollup/rollup-openharmony-arm64": "4.59.0", + "@rollup/rollup-win32-arm64-msvc": "4.59.0", + "@rollup/rollup-win32-ia32-msvc": "4.59.0", + "@rollup/rollup-win32-x64-gnu": "4.59.0", + "@rollup/rollup-win32-x64-msvc": "4.59.0", "fsevents": "~2.3.2" } }, "node_modules/roughjs": { "version": "4.6.6", - "resolved": "https://registry.npmmirror.com/roughjs/-/roughjs-4.6.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/roughjs/-/roughjs-4.6.6.tgz", "integrity": "sha512-ZUz/69+SYpFN/g/lUlo2FXcIjRkSu3nDarreVdGGndHEBJ6cXPdKguS8JGxwj5HA5xIbVKSmLgr5b3AWxtRfvQ==", "license": "MIT", "dependencies": { @@ -4376,7 +4510,7 @@ }, "node_modules/rw": { "version": "1.3.3", - "resolved": "https://registry.npmmirror.com/rw/-/rw-1.3.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/rw/-/rw-1.3.3.tgz", "integrity": "sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ==", "license": "BSD-3-Clause" }, @@ -4397,7 +4531,8 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "license": "MIT" }, "node_modules/safer-buffer": { "version": "2.1.2", @@ -4414,9 +4549,9 @@ "optional": true }, "node_modules/schema-utils": { - "version": "4.3.2", - "resolved": "https://mirrors.tencent.com/npm/schema-utils/-/schema-utils-4.3.2.tgz", - "integrity": "sha512-Gn/JaSk/Mt9gYubxTtSn/QCV4em9mpAPiR1rqy/Ocu19u/G9J5WWdNoUT4SiV6mFC3y6cxyFcFwdzPM3FgxGAQ==", + "version": "4.3.3", + "resolved": "https://mirrors.tencent.com/npm/schema-utils/-/schema-utils-4.3.3.tgz", + "integrity": "sha512-eflK8wEtyOE6+hsaRVPxvUKYCpRgzLqDTb8krvAsRIwOGlHoSgYLgBXoubGgLd2fT41/OUYdb48v4k4WWHQurA==", "license": "MIT", "dependencies": { "@types/json-schema": "^7.0.9", @@ -4446,10 +4581,17 @@ "version": "6.0.2", "resolved": "https://mirrors.tencent.com/npm/serialize-javascript/-/serialize-javascript-6.0.2.tgz", "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", + "license": "BSD-3-Clause", "dependencies": { "randombytes": "^2.1.0" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://mirrors.tencent.com/npm/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://mirrors.tencent.com/npm/shebang-command/-/shebang-command-2.0.0.tgz", @@ -4524,9 +4666,24 @@ "node": ">=0.10.0" } }, + "node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://mirrors.tencent.com/npm/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, + "node_modules/string_decoder/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://mirrors.tencent.com/npm/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, "node_modules/stylis": { "version": "4.3.6", - "resolved": "https://registry.npmmirror.com/stylis/-/stylis-4.3.6.tgz", + "resolved": "https://mirrors.tencent.com/npm/stylis/-/stylis-4.3.6.tgz", "integrity": "sha512-yQ3rwFWRfwNUY7H5vpU0wfdkNSnvnJinhF9830Swlaxl03zsOjCfmX0ugac+3LtK0lYSgwL/KXc8oYL3mG4YFQ==", "license": "MIT" }, @@ -4583,9 +4740,9 @@ } }, "node_modules/swiper": { - "version": "12.0.3", - "resolved": "https://registry.npmjs.org/swiper/-/swiper-12.0.3.tgz", - "integrity": "sha512-BHd6U1VPEIksrXlyXjMmRWO0onmdNPaTAFduzqR3pgjvi7KfmUCAm/0cj49u2D7B0zNjMw02TSeXfinC1hDCXg==", + "version": "12.1.2", + "resolved": "https://mirrors.tencent.com/npm/swiper/-/swiper-12.1.2.tgz", + "integrity": "sha512-4gILrI3vXZqoZh71I1PALqukCFgk+gpOwe1tOvz5uE9kHtl2gTDzmYflYCwWvR4LOvCrJi6UEEU+gnuW5BtkgQ==", "funding": [ { "type": "patreon", @@ -4602,11 +4759,16 @@ } }, "node_modules/tapable": { - "version": "2.2.2", - "resolved": "https://mirrors.tencent.com/npm/tapable/-/tapable-2.2.2.tgz", - "integrity": "sha512-Re10+NauLTMCudc7T5WLFLAwDhQ0JWdrMK+9B2M8zR5hRExKmsRDCBA7/aV/pNJFltmBFO5BAMlQFi/vq3nKOg==", + "version": "2.3.0", + "resolved": "https://mirrors.tencent.com/npm/tapable/-/tapable-2.3.0.tgz", + "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", + "license": "MIT", "engines": { "node": ">=6" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" } }, "node_modules/tdesign-icons-vue-next": { @@ -4667,9 +4829,9 @@ } }, "node_modules/terser-webpack-plugin": { - "version": "5.3.14", - "resolved": "https://mirrors.tencent.com/npm/terser-webpack-plugin/-/terser-webpack-plugin-5.3.14.tgz", - "integrity": "sha512-vkZjpUjb6OMS7dhV+tILUW6BhpDR7P2L/aQSAv+Uwk+m8KATX9EccViHTJR2qDtACKPIYndLGCyl3FMo+r2LMw==", + "version": "5.3.16", + "resolved": "https://mirrors.tencent.com/npm/terser-webpack-plugin/-/terser-webpack-plugin-5.3.16.tgz", + "integrity": "sha512-h9oBFCWrq78NyWWVcSwZarJkZ01c2AyGrzs1crmHZO3QUg9D61Wu4NPjBy69n7JqylFF5y+CsUZYmYEIZ3mR+Q==", "dependencies": { "@jridgewell/trace-mapping": "^0.3.25", "jest-worker": "^27.4.5", @@ -4707,7 +4869,7 @@ }, "node_modules/tinyexec": { "version": "1.0.2", - "resolved": "https://registry.npmmirror.com/tinyexec/-/tinyexec-1.0.2.tgz", + "resolved": "https://mirrors.tencent.com/npm/tinyexec/-/tinyexec-1.0.2.tgz", "integrity": "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg==", "license": "MIT", "engines": { @@ -4733,7 +4895,7 @@ }, "node_modules/ts-dedent": { "version": "2.2.0", - "resolved": "https://registry.npmmirror.com/ts-dedent/-/ts-dedent-2.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/ts-dedent/-/ts-dedent-2.2.0.tgz", "integrity": "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==", "license": "MIT", "engines": { @@ -4752,7 +4914,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -4763,7 +4924,7 @@ }, "node_modules/ufo": { "version": "1.6.3", - "resolved": "https://registry.npmmirror.com/ufo/-/ufo-1.6.3.tgz", + "resolved": "https://mirrors.tencent.com/npm/ufo/-/ufo-1.6.3.tgz", "integrity": "sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==", "license": "MIT" }, @@ -4773,9 +4934,9 @@ "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==" }, "node_modules/update-browserslist-db": { - "version": "1.1.3", - "resolved": "https://mirrors.tencent.com/npm/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz", - "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==", + "version": "1.2.3", + "resolved": "https://mirrors.tencent.com/npm/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", + "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==", "funding": [ { "type": "opencollective", @@ -4790,6 +4951,7 @@ "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" @@ -4801,9 +4963,15 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://mirrors.tencent.com/npm/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/uuid": { "version": "11.1.0", - "resolved": "https://registry.npmmirror.com/uuid/-/uuid-11.1.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/uuid/-/uuid-11.1.0.tgz", "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", "funding": [ "https://github.com/sponsors/broofa", @@ -4829,7 +4997,6 @@ "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -4901,7 +5068,7 @@ }, "node_modules/vscode-jsonrpc": { "version": "8.2.0", - "resolved": "https://registry.npmmirror.com/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "resolved": "https://mirrors.tencent.com/npm/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", "license": "MIT", "engines": { @@ -4910,7 +5077,7 @@ }, "node_modules/vscode-languageserver": { "version": "9.0.1", - "resolved": "https://registry.npmmirror.com/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz", + "resolved": "https://mirrors.tencent.com/npm/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz", "integrity": "sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==", "license": "MIT", "dependencies": { @@ -4922,7 +5089,7 @@ }, "node_modules/vscode-languageserver-protocol": { "version": "3.17.5", - "resolved": "https://registry.npmmirror.com/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "resolved": "https://mirrors.tencent.com/npm/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", "license": "MIT", "dependencies": { @@ -4932,28 +5099,26 @@ }, "node_modules/vscode-languageserver-textdocument": { "version": "1.0.12", - "resolved": "https://registry.npmmirror.com/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz", + "resolved": "https://mirrors.tencent.com/npm/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz", "integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==", "license": "MIT" }, "node_modules/vscode-languageserver-types": { "version": "3.17.5", - "resolved": "https://registry.npmmirror.com/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "resolved": "https://mirrors.tencent.com/npm/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==", "license": "MIT" }, "node_modules/vscode-uri": { "version": "3.1.0", "resolved": "https://mirrors.tencent.com/npm/vscode-uri/-/vscode-uri-3.1.0.tgz", - "integrity": "sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ==", - "license": "MIT" + "integrity": "sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ==" }, "node_modules/vue": { "version": "3.5.17", "resolved": "https://mirrors.tencent.com/npm/vue/-/vue-3.5.17.tgz", "integrity": "sha512-LbHV3xPN9BeljML+Xctq4lbz2lVHCR6DtbpTf5XIO6gugpXUN49j2QQPcMj086r9+AkJ0FfUT8xjulKKBkkr9g==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.17", "@vue/compiler-sfc": "3.5.17", @@ -4970,6 +5135,32 @@ } } }, + "node_modules/vue-demi": { + "version": "0.14.6", + "resolved": "https://mirrors.tencent.com/npm/vue-demi/-/vue-demi-0.14.6.tgz", + "integrity": "sha512-8QA7wrYSHKaYgUxDA5ZC24w+eHm3sYCbp0EzcDwKqN3p6HqtTCGR/GVsPyZW92unff4UlcSh++lmqDWN3ZIq4w==", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "vue-demi-fix": "bin/vue-demi-fix.js", + "vue-demi-switch": "bin/vue-demi-switch.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "@vue/composition-api": "^1.0.0-rc.1", + "vue": "^3.0.0-0 || ^2.6.0" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, "node_modules/vue-i18n": { "version": "11.1.12", "resolved": "https://mirrors.tencent.com/npm/vue-i18n/-/vue-i18n-11.1.12.tgz", @@ -5016,13 +5207,13 @@ "license": "MIT" }, "node_modules/vue-tsc": { - "version": "2.2.12", - "resolved": "https://mirrors.tencent.com/npm/vue-tsc/-/vue-tsc-2.2.12.tgz", - "integrity": "sha512-P7OP77b2h/Pmk+lZdJ0YWs+5tJ6J2+uOQPo7tlBnY44QqQSPYvS0qVT4wqDJgwrZaLe47etJLLQRFia71GYITw==", + "version": "3.2.5", + "resolved": "https://mirrors.tencent.com/npm/vue-tsc/-/vue-tsc-3.2.5.tgz", + "integrity": "sha512-/htfTCMluQ+P2FISGAooul8kO4JMheOTCbCy4M6dYnYYjqLe3BExZudAua6MSIKSFYQtFOYAll7XobYwcpokGA==", "dev": true, "dependencies": { - "@volar/typescript": "2.4.15", - "@vue/language-core": "2.2.12" + "@volar/typescript": "2.4.28", + "@vue/language-core": "3.2.5" }, "bin": { "vue-tsc": "bin/vue-tsc.js" @@ -5032,9 +5223,10 @@ } }, "node_modules/watchpack": { - "version": "2.4.4", - "resolved": "https://mirrors.tencent.com/npm/watchpack/-/watchpack-2.4.4.tgz", - "integrity": "sha512-c5EGNOiyxxV5qmTtAB7rbiXxi1ooX1pQKMLX/MIabJjRA0SJBQOjKF+KSVfHkr9U1cADPon0mRiVe/riyaiDUA==", + "version": "2.5.1", + "resolved": "https://mirrors.tencent.com/npm/watchpack/-/watchpack-2.5.1.tgz", + "integrity": "sha512-Zn5uXdcFNIA1+1Ei5McRd+iRzfhENPCe7LeABkJtNulSxjma+l7ltNx55BWZkRlwRnpOgHqxnjyaDgJnNXnqzg==", + "license": "MIT", "dependencies": { "glob-to-regexp": "^0.4.1", "graceful-fs": "^4.1.2" @@ -5044,9 +5236,10 @@ } }, "node_modules/webpack": { - "version": "5.100.1", - "resolved": "https://mirrors.tencent.com/npm/webpack/-/webpack-5.100.1.tgz", - "integrity": "sha512-YJB/ESPUe2Locd0NKXmw72Dx8fZQk1gTzI6rc9TAT4+Sypbnhl8jd8RywB1bDsDF9Dy1RUR7gn3q/ZJTd0OZZg==", + "version": "5.105.2", + "resolved": "https://mirrors.tencent.com/npm/webpack/-/webpack-5.105.2.tgz", + "integrity": "sha512-dRXm0a2qcHPUBEzVk8uph0xWSjV/xZxenQQbLwnwP7caQCYpqG1qddwlyEkIDkYn0K8tvmcrZ+bOrzoQ3HxCDw==", + "license": "MIT", "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -5056,22 +5249,22 @@ "@webassemblyjs/wasm-parser": "^1.14.1", "acorn": "^8.15.0", "acorn-import-phases": "^1.0.3", - "browserslist": "^4.24.0", + "browserslist": "^4.28.1", "chrome-trace-event": "^1.0.2", - "enhanced-resolve": "^5.17.2", - "es-module-lexer": "^1.2.1", + "enhanced-resolve": "^5.19.0", + "es-module-lexer": "^2.0.0", "eslint-scope": "5.1.1", "events": "^3.2.0", "glob-to-regexp": "^0.4.1", "graceful-fs": "^4.2.11", "json-parse-even-better-errors": "^2.3.1", - "loader-runner": "^4.2.0", + "loader-runner": "^4.3.1", "mime-types": "^2.1.27", "neo-async": "^2.6.2", - "schema-utils": "^4.3.2", - "tapable": "^2.1.1", - "terser-webpack-plugin": "^5.3.11", - "watchpack": "^2.4.1", + "schema-utils": "^4.3.3", + "tapable": "^2.3.0", + "terser-webpack-plugin": "^5.3.16", + "watchpack": "^2.5.1", "webpack-sources": "^3.3.3" }, "bin": { diff --git a/frontend/package.json b/frontend/package.json index 9f441932..7c24bf67 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -15,7 +15,9 @@ "@microsoft/fetch-event-source": "^2.0.1", "@types/dompurify": "^3.0.5", "@types/papaparse": "^5.5.0", + "@vue-office/pptx": "^1.0.1", "axios": "^1.8.4", + "docx-preview": "^0.3.7", "dompurify": "^3.2.6", "highlight.js": "^11.11.1", "marked": "^5.1.2", @@ -27,6 +29,7 @@ "tdesign-icons-vue-next": "^0.4.1", "tdesign-vue-next": "^1.17.2", "vue": "^3.5.13", + "vue-demi": "^0.14.6", "vue-i18n": "^11.1.12", "vue-router": "^4.5.0", "webpack": "^5.94.0", @@ -41,10 +44,10 @@ "@vue/tsconfig": "^0.7.0", "less": "^4.3.0", "less-loader": "^12.2.0", - "npm-run-all2": "^7.0.2", + "npm-run-all2": "^8.0.4", "typescript": "~5.8.0", "vite": "^7.2.2", - "vue-tsc": "^2.2.8" + "vue-tsc": "^3.2.5" }, "overrides": { "lightningcss": "none", diff --git a/frontend/src/api/initialization/index.ts b/frontend/src/api/initialization/index.ts index 0ab23ced..070bbe32 100644 --- a/frontend/src/api/initialization/index.ts +++ b/frontend/src/api/initialization/index.ts @@ -82,24 +82,13 @@ export interface KBModelConfigRequest { chunkSize: number chunkOverlap: number separators: string[] + parserEngineRules?: { file_types: string[]; engine: string }[] } multimodal: { enabled: boolean - storageType?: 'cos' | 'minio' - cos?: { - secretId: string - secretKey: string - region: string - bucketName: string - appId: string - pathPrefix: string - } - minio?: { - bucketName: string - useSSL: boolean - pathPrefix: string - } } + /** 存储引擎选择:"local" | "minio" | "cos",影响文档上传与文档内图片存储 */ + storageProvider?: string nodeExtract: { enabled: boolean text: string diff --git a/frontend/src/api/knowledge-base/index.ts b/frontend/src/api/knowledge-base/index.ts index f382abbb..b75b278a 100644 --- a/frontend/src/api/knowledge-base/index.ts +++ b/frontend/src/api/knowledge-base/index.ts @@ -106,6 +106,10 @@ export function downKnowledgeDetails(id: string) { return getDown(`/api/v1/knowledge/${id}/download`); } +export function previewKnowledgeFile(id: string) { + return getDown(`/api/v1/knowledge/${id}/preview`); +} + /** @param idsQueryString - query string with ids (e.g. ids=xxx&ids=yyy) */ export function batchQueryKnowledge(idsQueryString: string, kbId?: string, agentId?: string) { let qs = idsQueryString; diff --git a/frontend/src/api/system/index.ts b/frontend/src/api/system/index.ts index 24c5e1c5..1f00abd3 100644 --- a/frontend/src/api/system/index.ts +++ b/frontend/src/api/system/index.ts @@ -1,7 +1,8 @@ -import { get, put } from '@/utils/request' +import { get, post, put } from '@/utils/request' export interface SystemInfo { version: string + edition?: string commit_id?: string build_time?: string go_version?: string @@ -110,3 +111,115 @@ export interface ListMinioBucketsResponse { export function listMinioBuckets(): Promise<{ data: ListMinioBucketsResponse }> { return get('/api/v1/system/minio/buckets') } + +export interface ParserEngineInfo { + Name: string + Description: string + FileTypes: string[] + Available?: boolean + UnavailableReason?: string +} + +/** 解析引擎配置(引擎相关存租户;docreader 地址由环境变量配置) */ +export interface ParserEngineConfig { + docreader_addr?: string + docreader_transport?: string + mineru_endpoint?: string + mineru_api_key?: string + mineru_api_base_url?: string + // MinerU 自建参数 + mineru_model?: string + mineru_enable_formula?: boolean | null + mineru_enable_table?: boolean | null + mineru_enable_ocr?: boolean | null + mineru_language?: string + // MinerU 云 API 参数 + mineru_cloud_model?: string + mineru_cloud_enable_formula?: boolean | null + mineru_cloud_enable_table?: boolean | null + mineru_cloud_enable_ocr?: boolean | null + mineru_cloud_language?: string +} + +export interface ParserEnginesResponse { + data: ParserEngineInfo[] + docreader_addr?: string + /** 连接方式:grpc | http,由服务端环境/配置决定 */ + docreader_transport?: string + connected?: boolean +} + +export function getParserEngines(): Promise { + return get('/api/v1/system/parser-engines') +} + +/** 使用当前填写的参数检测引擎可用性(不保存),用于填写新参数后即时测试 */ +export function checkParserEngines(config: ParserEngineConfig): Promise { + return post('/api/v1/system/parser-engines/check', config) +} + +export function getParserEngineConfig(): Promise<{ data: ParserEngineConfig }> { + return get('/api/v1/tenants/kv/parser-engine-config') +} + +export function updateParserEngineConfig(config: ParserEngineConfig): Promise<{ data: ParserEngineConfig }> { + return put('/api/v1/tenants/kv/parser-engine-config', config) +} + +export function reconnectDocReader(addr: string): Promise { + return post('/api/v1/system/docreader/reconnect', { addr }) +} + +// ---- 存储引擎配置(租户级,供文档/图片存储与 docreader 使用) ---- + +export interface StorageEngineConfig { + default_provider: string // "local" | "minio" | "cos" + local?: { path_prefix: string } + minio?: { mode: string; endpoint: string; access_key_id: string; secret_access_key: string; bucket_name: string; use_ssl: boolean; path_prefix: string } + cos?: { + secret_id: string + secret_key: string + region: string + bucket_name: string + app_id: string + path_prefix: string + } +} + +export interface StorageEngineStatusItem { + name: string + available: boolean + description: string +} + +export interface GetStorageEngineStatusResponse { + engines: StorageEngineStatusItem[] + minio_env_available: boolean +} + +export function getStorageEngineConfig(): Promise<{ data: StorageEngineConfig }> { + return get('/api/v1/tenants/kv/storage-engine-config') +} + +export function updateStorageEngineConfig(config: StorageEngineConfig): Promise<{ data: StorageEngineConfig }> { + return put('/api/v1/tenants/kv/storage-engine-config', config) +} + +export function getStorageEngineStatus(): Promise<{ data: GetStorageEngineStatusResponse }> { + return get('/api/v1/system/storage-engine-status') +} + +export interface StorageCheckRequest { + provider: string // "minio" | "cos" + minio?: StorageEngineConfig['minio'] + cos?: StorageEngineConfig['cos'] +} + +export interface StorageCheckResponse { + ok: boolean + message: string +} + +export function checkStorageEngine(req: StorageCheckRequest): Promise<{ data: StorageCheckResponse }> { + return post('/api/v1/system/storage-engine-check', req) +} diff --git a/frontend/src/assets/img/agent.svg b/frontend/src/assets/img/agent.svg index 3b94a604..67b2ed27 100644 --- a/frontend/src/assets/img/agent.svg +++ b/frontend/src/assets/img/agent.svg @@ -1,5 +1,5 @@ - - - + + + diff --git a/frontend/src/assets/img/organization-grey.svg b/frontend/src/assets/img/organization-grey.svg index 626df6ed..0a98218f 100644 --- a/frontend/src/assets/img/organization-grey.svg +++ b/frontend/src/assets/img/organization-grey.svg @@ -1,3 +1,3 @@ - + diff --git a/frontend/src/assets/img/organization.svg b/frontend/src/assets/img/organization.svg index dc78f882..bd5efed6 100644 --- a/frontend/src/assets/img/organization.svg +++ b/frontend/src/assets/img/organization.svg @@ -1,3 +1,3 @@ - + diff --git a/frontend/src/assets/img/prefixIcon-green.svg b/frontend/src/assets/img/prefixIcon-green.svg index b83c674c..a8e23a5d 100644 --- a/frontend/src/assets/img/prefixIcon-green.svg +++ b/frontend/src/assets/img/prefixIcon-green.svg @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/frontend/src/assets/img/prefixIcon-grey.svg b/frontend/src/assets/img/prefixIcon-grey.svg index af434f50..70f52da2 100644 --- a/frontend/src/assets/img/prefixIcon-grey.svg +++ b/frontend/src/assets/img/prefixIcon-grey.svg @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/frontend/src/assets/img/prefixIcon.svg b/frontend/src/assets/img/prefixIcon.svg index e38907f0..cbdf487b 100644 --- a/frontend/src/assets/img/prefixIcon.svg +++ b/frontend/src/assets/img/prefixIcon.svg @@ -1,4 +1,4 @@ - - - + + + \ No newline at end of file diff --git a/frontend/src/assets/img/zhishiku-green.svg b/frontend/src/assets/img/zhishiku-green.svg index c61f8503..4df4f291 100644 --- a/frontend/src/assets/img/zhishiku-green.svg +++ b/frontend/src/assets/img/zhishiku-green.svg @@ -1,3 +1,3 @@ - - + + \ No newline at end of file diff --git a/frontend/src/assets/img/zhishiku.svg b/frontend/src/assets/img/zhishiku.svg index a590d03f..d88a2e3f 100644 --- a/frontend/src/assets/img/zhishiku.svg +++ b/frontend/src/assets/img/zhishiku.svg @@ -1,3 +1,3 @@ - - + + \ No newline at end of file diff --git a/frontend/src/components/Input-field.vue b/frontend/src/components/Input-field.vue index 920b5c12..231b3764 100644 --- a/frontend/src/components/Input-field.vue +++ b/frontend/src/components/Input-field.vue @@ -97,6 +97,9 @@ const agentKBSelectionMode = computed(() => { return currentAgentConfig.value?.kb_selection_mode || 'all'; }); +// 共享智能体下的知识库列表(来自 listKnowledgeBases(agent_id)),用于已选知识库展示与 org 角标 +const sharedAgentKbList = ref>([]); + // 当智能体改变时,模型、网络搜索、可@知识库列表均跟随新智能体配置 // 知识库:用新智能体配置的列表替换当前选中,使已选与可@列表一致(含共享智能体) watch([selectedAgentId, agentKnowledgeBases, agentKBSelectionMode], ([newAgentId, newAgentKbs, newKbMode], [oldAgentId]) => { @@ -205,9 +208,6 @@ const sharedAgentOrgName = computed(() => { return shared?.org_name || shared?.shared_by_username || ''; }); -// 共享智能体下的知识库列表(来自 listKnowledgeBases(agent_id)),用于已选知识库展示与 org 角标 -const sharedAgentKbList = ref>([]); - const props = defineProps({ isReplying: { type: Boolean, diff --git a/frontend/src/components/doc-content.vue b/frontend/src/components/doc-content.vue index 6cc28979..de189e97 100644 --- a/frontend/src/components/doc-content.vue +++ b/frontend/src/components/doc-content.vue @@ -10,6 +10,7 @@ import { MessagePlugin, DialogPlugin } from "tdesign-vue-next"; import { sanitizeHTML, safeMarkdownToHTML, createSafeImage, isValidImageURL } from '@/utils/security'; import { openMermaidFullscreen } from '@/utils/mermaidViewer'; import { useI18n } from 'vue-i18n'; +import DocumentPreview from '@/components/document-preview.vue'; const { t } = useI18n(); @@ -57,10 +58,9 @@ let doc = null; let down = ref() let mdContentWrap = ref() let url = ref('') -// 视图模式:chunks / original / merged -const viewMode = ref<'chunks' | 'original' | 'merged'>('merged'); -const originalContent = ref(''); -const loadingOriginal = ref(false); +// 视图模式:chunks / merged / preview +// file 类型默认「预览」,URL / 手动创建 默认「全文」 +const viewMode = ref<'chunks' | 'merged' | 'preview'>('merged'); // 合并后的文档内容 const mergedContent = ref(''); @@ -212,6 +212,31 @@ watch(() => props.details?.md, (newChunks) => { } }, { immediate: true, deep: true }); +const previewSupportedTypes = new Set([ + 'pdf', 'docx', 'pptx', 'ppt', 'xlsx', 'xls', 'csv', + 'jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', 'tiff', 'svg', + 'txt', 'md', 'markdown', 'json', 'xml', 'html', 'css', 'js', 'ts', + 'py', 'java', 'go', 'cpp', 'c', 'h', 'sh', 'yaml', 'yml', + 'ini', 'conf', 'log', 'sql', 'rs', 'rb', 'php', 'swift', 'kt', + 'scala', 'r', 'lua', 'pl', 'toml', +]); + +const canPreview = (): boolean => { + if (props.details?.type !== 'file') return false; + const ft = props.details?.file_type?.toLowerCase(); + return !!ft && previewSupportedTypes.has(ft); +}; + +// 当文档详情加载完成时,file 类型自动切换到「预览」 +watch(() => props.details?.id, (newId) => { + if (!newId) return; + if (props.details?.type === 'file' && canPreview()) { + viewMode.value = 'preview'; + } else { + viewMode.value = 'merged'; + } +}); + const isTextFile = (fileType?: string): boolean => { if (!fileType) return false; const textTypes = ['txt', 'md', 'markdown', 'json', 'xml', 'html', 'css', 'js', 'ts', 'py', 'java', 'go', 'cpp', 'c', 'h', 'sh', 'yaml', 'yml', 'ini', 'conf', 'log']; @@ -222,25 +247,6 @@ const isMarkdownFile = (fileType?: string): boolean => { const markdownTypes = ['md', 'markdown']; return markdownTypes.includes(fileType.toLowerCase()); }; -const loadOriginalContent = async () => { - if (!props.details.id || !props.details.type || props.details.type !== 'file') return; - const fileType = props.details.file_type?.toLowerCase(); - if (!isTextFile(fileType)) { - MessagePlugin.warning(t('knowledgeBase.originalFileNotSupported') || '该文件类型不支持原文件展示,请下载查看'); - return; - } - loadingOriginal.value = true; - try { - const blob = await downKnowledgeDetails(props.details.id); - const text = await blob.text(); - originalContent.value = text; - } catch (error: any) { - console.error('Failed to load original content:', error); - MessagePlugin.error(error?.message || t('knowledgeBase.loadOriginalFailed') || '加载原文件内容失败'); - } finally { - loadingOriginal.value = false; - } -}; watch(() => props.details.md, (newVal) => { nextTick(async () => { const images = mdContentWrap.value.querySelectorAll('img.markdown-image'); @@ -346,7 +352,6 @@ const handleClose = () => { emit("closeDoc", false); doc.scrollTop = 0; viewMode.value = 'merged'; - originalContent.value = ''; }; // 获取显示标题 @@ -622,6 +627,17 @@ const handleDetailsScroll = () => { {{ getTimeLabel() }}:{{ details.time }}
+ {{ $t('preview.tab') || '预览' }} + + { > {{ $t('knowledgeBase.viewChunks') || '分块' }} -
@@ -709,6 +724,16 @@ const handleDetailsScroll = () => { + +
+ +
+