mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
Problem:
The hard-coded Doris vector function implementation (cosine_distance_approximate with
UNIQUE KEY ANN tables) fails on SelectDB 4.0.2-rc01 and other Doris builds lacking that
specific function support. Users had no way to adapt without code changes.
Root cause:
There is assumption all Doris deployments support the same vector function API, but different
builds (Doris OSS, SelectDB, Doris Cloud) ship with different function variants and table
key constraints. No capability detection or user configuration existed.
Solution:
Implement DORIS_COMPAT_MODE environment variable with three modes:
* auto (default/recommended): probe Doris server on first use to detect available vector
functions; prefer inner_product_duplicate (modern Doris 4.0+), fall back to legacy
(older builds lacking inner_product_approximate)
* legacy: hard-set to cosine_distance_approximate + UNIQUE KEY (for older Doris/SelectDB
builds without inner_product_approximate support)
* inner_product_duplicate: hard-set to inner_product_approximate + DUPLICATE KEY
(for modern Doris 4.0+ and current SelectDB with normalized embeddings)
Implementation details:
- add compat.go with one-time mode resolution (sync.Once) and capability probing
- inspect existing weknora_embeddings_* table DDL via SHOW CREATE TABLE to detect and
enforce schema compatibility; prevents silent mismatches
- fail fast with clear error message when configured mode does not match existing tables,
with explicit remediation steps (recreate tables or change env var)
- branch all query paths (inner_product_approximate vs cosine_distance_approximate),
DDL generation (DUPLICATE KEY vs UNIQUE KEY), write paths (embed normalization),
and chunk updates (Stream Load vs read-modify-write) by resolved compat mode
- add comprehensive repository tests for mode selection, auto-detection, and mismatch
scenarios; all tests pass
- expose DORIS_COMPAT_MODE in docker-compose.yml with auto as default
- document in .env.example with clear mode decision guidance
- log all mode decisions (requested, detected, probed, final) at INFO/WARN level
Key guarantee:
⚠️ DORIS_COMPAT_MODE is NOT interchangeable after embedding tables are created.
App will reject mode switches that conflict with existing table layout, preventing
silent data mismatches and query failures.
780 lines
30 KiB
YAML
780 lines
30 KiB
YAML
services:
|
||
frontend:
|
||
image: wechatopenai/weknora-ui:${WEKNORA_VERSION:-latest}
|
||
build:
|
||
context: ./frontend
|
||
args:
|
||
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
|
||
container_name: WeKnora-frontend
|
||
ports:
|
||
- "${FRONTEND_PORT:-80}:80"
|
||
environment:
|
||
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
|
||
- APP_HOST=${APP_HOST:-app}
|
||
# APP_BACKEND_PORT: the port NGINX proxies to (default 8080).
|
||
# For local deployment this is the App container's listening port, independent of host-mapped APP_PORT.
|
||
# For remote deployment, set this to the remote App's service port.
|
||
- APP_PORT=${APP_BACKEND_PORT:-8080}
|
||
- APP_SCHEME=${APP_SCHEME:-http}
|
||
# NOTE: If using a remote App backend, comment out or remove the depends_on
|
||
# block below and set APP_HOST/APP_BACKEND_PORT/APP_SCHEME in your .env file.
|
||
depends_on:
|
||
app:
|
||
condition: service_healthy
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
|
||
app:
|
||
image: wechatopenai/weknora-app:${WEKNORA_VERSION:-latest}
|
||
build:
|
||
context: .
|
||
dockerfile: docker/Dockerfile.app
|
||
args:
|
||
- APK_MIRROR_ARG=${APK_MIRROR_ARG:-}
|
||
container_name: WeKnora-app
|
||
ports:
|
||
- "${APP_PORT:-8080}:8080"
|
||
volumes:
|
||
- data-files:/data/files
|
||
- docreader-tmp:/tmp/docreader:ro
|
||
- ./config/config.yaml:/app/config/config.yaml
|
||
# Optional: mount custom skills directory (allows adding skills without rebuilding image)
|
||
- ./skills/preloaded:/app/skills/preloaded
|
||
healthcheck:
|
||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||
interval: 30s
|
||
timeout: 10s
|
||
retries: 3
|
||
start_period: 60s
|
||
environment:
|
||
- LOG_LEVEL=${LOG_LEVEL:-}
|
||
- COS_SECRET_ID=${COS_SECRET_ID:-}
|
||
- COS_SECRET_KEY=${COS_SECRET_KEY:-}
|
||
- COS_REGION=${COS_REGION:-}
|
||
- COS_BUCKET_NAME=${COS_BUCKET_NAME:-}
|
||
- COS_APP_ID=${COS_APP_ID:-}
|
||
- COS_PATH_PREFIX=${COS_PATH_PREFIX:-}
|
||
- COS_ENABLE_OLD_DOMAIN=${COS_ENABLE_OLD_DOMAIN:-}
|
||
- GIN_MODE=${GIN_MODE:-release}
|
||
- DISABLE_REGISTRATION=${DISABLE_REGISTRATION:-false}
|
||
- DB_DRIVER=postgres
|
||
- DB_HOST=postgres
|
||
- DB_PORT=5432
|
||
- DB_USER=${DB_USER:-}
|
||
- DB_PASSWORD=${DB_PASSWORD:-}
|
||
- DB_NAME=${DB_NAME:-}
|
||
- TZ=${TZ:-Asia/Shanghai}
|
||
- WEKNORA_LANGUAGE=${WEKNORA_LANGUAGE:-zh-CN}
|
||
- OTEL_EXPORTER_OTLP_ENDPOINT=jaeger:4317
|
||
- OTEL_SERVICE_NAME=WeKnora
|
||
- OTEL_TRACES_EXPORTER=otlp
|
||
- OTEL_METRICS_EXPORTER=none
|
||
- OTEL_LOGS_EXPORTER=none
|
||
- OTEL_PROPAGATORS=tracecontext,baggage
|
||
# ========== Langfuse (optional observability) ==========
|
||
# Enabled automatically when LANGFUSE_PUBLIC_KEY + LANGFUSE_SECRET_KEY are set.
|
||
# See docs/Langfuse集成.md for the full list of tuning knobs.
|
||
- LANGFUSE_ENABLED=${LANGFUSE_ENABLED:-}
|
||
- LANGFUSE_HOST=${LANGFUSE_HOST:-https://cloud.langfuse.com}
|
||
- LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY:-}
|
||
- LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY:-}
|
||
- LANGFUSE_RELEASE=${LANGFUSE_RELEASE:-}
|
||
- LANGFUSE_ENVIRONMENT=${LANGFUSE_ENVIRONMENT:-}
|
||
- LANGFUSE_FLUSH_AT=${LANGFUSE_FLUSH_AT:-}
|
||
- LANGFUSE_FLUSH_INTERVAL=${LANGFUSE_FLUSH_INTERVAL:-}
|
||
- LANGFUSE_QUEUE_SIZE=${LANGFUSE_QUEUE_SIZE:-}
|
||
- LANGFUSE_REQUEST_TIMEOUT=${LANGFUSE_REQUEST_TIMEOUT:-}
|
||
- LANGFUSE_SAMPLE_RATE=${LANGFUSE_SAMPLE_RATE:-}
|
||
- LANGFUSE_DEBUG=${LANGFUSE_DEBUG:-}
|
||
- RETRIEVE_DRIVER=${RETRIEVE_DRIVER:-}
|
||
- ELASTICSEARCH_ADDR=${ELASTICSEARCH_ADDR:-}
|
||
- ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME:-}
|
||
- ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-}
|
||
- ELASTICSEARCH_INDEX=${ELASTICSEARCH_INDEX:-}
|
||
- QDRANT_HOST=qdrant
|
||
- QDRANT_PORT=${QDRANT_PORT:-6334}
|
||
- QDRANT_COLLECTION=${QDRANT_COLLECTION:-weknora_embeddings}
|
||
- QDRANT_API_KEY=${QDRANT_API_KEY:-}
|
||
- QDRANT_USE_TLS=${QDRANT_USE_TLS:-false}
|
||
- MILVUS_ADDRESS=milvus:19530
|
||
- MILVUS_COLLECTION=${MILVUS_COLLECTION:-weknora_embeddings}
|
||
- MILVUS_METRIC_TYPE=${MILVUS_METRIC_TYPE:-IP}
|
||
- DOCREADER_ADDR=${DOCREADER_ADDR:-docreader:50051}
|
||
- DOCREADER_TRANSPORT=${DOCREADER_TRANSPORT:-grpc}
|
||
# docreader gRPC TLS / 认证(客户端侧)
|
||
- GRPC_TLS_ENABLED=${GRPC_TLS_ENABLED:-false}
|
||
- GRPC_TLS_CERT=${GRPC_TLS_CERT:-}
|
||
- GRPC_TLS_KEY=${GRPC_TLS_KEY:-}
|
||
- GRPC_TLS_CA=${GRPC_TLS_CA:-}
|
||
- GRPC_TLS_SERVER_NAME=${GRPC_TLS_SERVER_NAME:-}
|
||
- GRPC_AUTH_TOKEN=${GRPC_AUTH_TOKEN:-}
|
||
- WEAVIATE_HOST=${WEAVIATE_HOST:-weaviate:8080}
|
||
- WEAVIATE_GRPC_ADDRESS=${WEAVIATE_GRPC_ADDRESS:-weaviate:50051}
|
||
- WEAVIATE_SCHEME=${WEAVIATE_SCHEME:-http}
|
||
- WEAVIATE_AUTH_ENABLED=${WEAVIATE_AUTH_ENABLED:-false}
|
||
- WEAVIATE_API_KEY=${WEAVIATE_API_KEY:-}
|
||
- DORIS_ADDR=${DORIS_ADDR:-doris-fe:9030}
|
||
- DORIS_HTTP_PORT=${DORIS_HTTP_PORT:-8030}
|
||
- DORIS_DATABASE=${DORIS_DATABASE:-weknora}
|
||
- DORIS_USERNAME=${DORIS_USERNAME:-root}
|
||
- DORIS_PASSWORD=${DORIS_PASSWORD:-}
|
||
- DORIS_TABLE_PREFIX=${DORIS_TABLE_PREFIX:-weknora_embeddings}
|
||
- DORIS_COMPAT_MODE=${DORIS_COMPAT_MODE:-auto}
|
||
- STORAGE_TYPE=${STORAGE_TYPE:-}
|
||
- LOCAL_STORAGE_BASE_DIR=${LOCAL_STORAGE_BASE_DIR:-}
|
||
- AUTO_RECOVER_DIRTY=${AUTO_RECOVER_DIRTY:-true}
|
||
- MINIO_ENDPOINT=${MINIO_ENDPOINT:-minio:9000}
|
||
- MINIO_ACCESS_KEY_ID=${MINIO_ACCESS_KEY_ID:-minioadmin}
|
||
- MINIO_SECRET_ACCESS_KEY=${MINIO_SECRET_ACCESS_KEY:-minioadmin}
|
||
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME:-}
|
||
- OBS_ENDPOINT=${OBS_ENDPOINT:-}
|
||
- OBS_REGION=${OBS_REGION:-}
|
||
- OBS_ACCESS_KEY=${OBS_ACCESS_KEY:-}
|
||
- OBS_SECRET_KEY=${OBS_SECRET_KEY:-}
|
||
- OBS_BUCKET_NAME=${OBS_BUCKET_NAME:-}
|
||
- OBS_PATH_PREFIX=${OBS_PATH_PREFIX:-}
|
||
- OBS_PROXY_DOMAIN=${OBS_PROXY_DOMAIN:-}
|
||
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
|
||
- STREAM_MANAGER_TYPE=${STREAM_MANAGER_TYPE:-}
|
||
- REDIS_ADDR=redis:6379
|
||
- REDIS_USERNAME=${REDIS_USERNAME:-}
|
||
- REDIS_PASSWORD=${REDIS_PASSWORD:-}
|
||
- REDIS_DB=${REDIS_DB:-}
|
||
- REDIS_PREFIX=${REDIS_PREFIX:-}
|
||
- ENABLE_GRAPH_RAG=${ENABLE_GRAPH_RAG:-}
|
||
- NEO4J_ENABLE=${NEO4J_ENABLE:-}
|
||
- NEO4J_URI=bolt://neo4j:7687
|
||
- NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j}
|
||
- NEO4J_PASSWORD=${NEO4J_PASSWORD:-password}
|
||
- TENANT_AES_KEY=${TENANT_AES_KEY:-}
|
||
- SYSTEM_AES_KEY=${SYSTEM_AES_KEY:-}
|
||
- SSRF_WHITELIST=${SSRF_WHITELIST:-}
|
||
# 保留原始 URL 的图片域名白名单(逗号分隔,不替换为 provider://)
|
||
- IMAGE_HOST_KEEP_URL=${IMAGE_HOST_KEEP_URL:-}
|
||
# Always allow the optional searxng sidecar (compose service hostname);
|
||
# merged on top of SSRF_WHITELIST so user overrides don't clobber it.
|
||
- SSRF_WHITELIST_EXTRA=${SSRF_WHITELIST_EXTRA:-searxng}
|
||
- CONCURRENCY_POOL_SIZE=${CONCURRENCY_POOL_SIZE:-5}
|
||
- JWT_SECRET=${JWT_SECRET:-}
|
||
# Crypto: 主密钥和盐值,用于 AppSecret 等敏感字段的 AES-256 加密
|
||
# 若不设置则自动生成并持久化到 data-files volume(/data/files/.crypto_state.json)
|
||
# 重启时自动从文件恢复,保证已加密数据可继续解密
|
||
- CRYPTO_MASTER_KEY=${CRYPTO_MASTER_KEY:-}
|
||
- CRYPTO_SALT=${CRYPTO_SALT:-}
|
||
# File size limit (in MB)
|
||
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
|
||
# Agent Skills Sandbox
|
||
- WEKNORA_SANDBOX_MODE=${WEKNORA_SANDBOX_MODE:-docker}
|
||
- WEKNORA_SANDBOX_TIMEOUT=${WEKNORA_SANDBOX_TIMEOUT:-60}
|
||
- WEKNORA_SANDBOX_DOCKER_IMAGE=${WEKNORA_SANDBOX_DOCKER_IMAGE:-wechatopenai/weknora-sandbox:${WEKNORA_VERSION:-latest}}
|
||
# Agent LLM call timeout
|
||
- WEKNORA_AGENT_LLM_TIMEOUT=${WEKNORA_AGENT_LLM_TIMEOUT:-}
|
||
- WEKNORA_AGENT_TOOL_APPROVAL_TIMEOUT=${WEKNORA_AGENT_TOOL_APPROVAL_TIMEOUT:-}
|
||
# Tenant / RBAC(详见 docs/rbac.md 与 .env.example 注释)
|
||
# - WEKNORA_TENANT_ENABLE_RBAC: 是否启用租户角色强制鉴权(true / false),默认 true
|
||
# - WEKNORA_TENANT_MAX_OWNED_PER_USER: 单个非超管自助创建租户的上限
|
||
# >0 强制限额;=0 走 handler 默认;<0 关闭限额(不建议共享部署使用)
|
||
- WEKNORA_TENANT_ENABLE_RBAC=${WEKNORA_TENANT_ENABLE_RBAC:-}
|
||
- WEKNORA_TENANT_MAX_OWNED_PER_USER=${WEKNORA_TENANT_MAX_OWNED_PER_USER:-}
|
||
- APK_MIRROR_ARG=${APK_MIRROR_ARG:-}
|
||
depends_on:
|
||
redis:
|
||
condition: service_started
|
||
postgres:
|
||
condition: service_healthy
|
||
docreader:
|
||
condition: service_healthy
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
extra_hosts:
|
||
- "host.docker.internal:host-gateway"
|
||
|
||
# Sandbox 镜像:仅用于 build/pull,非常驻服务;app 执行 Skills 时按需 docker run 该镜像,用毕即释
|
||
sandbox:
|
||
image: wechatopenai/weknora-sandbox:${WEKNORA_VERSION:-latest}
|
||
container_name: WeKnora-sandbox
|
||
build:
|
||
context: .
|
||
dockerfile: docker/Dockerfile.sandbox
|
||
profiles:
|
||
- full
|
||
command: ["true"]
|
||
restart: "no"
|
||
|
||
docreader:
|
||
image: wechatopenai/weknora-docreader:${WEKNORA_VERSION:-latest}
|
||
build:
|
||
context: .
|
||
dockerfile: docker/Dockerfile.docreader
|
||
args:
|
||
- APT_MIRROR=${APT_MIRROR:-}
|
||
container_name: WeKnora-docreader
|
||
# docreader gRPC ships without auth/TLS and is only consumed by the app
|
||
# container over the WeKnora-network (DOCREADER_ADDR defaults to
|
||
# docreader:50051), so we don't publish 50051 to the host. `expose` keeps
|
||
# the port reachable inside the compose network for documentation only.
|
||
# If you need to call docreader from outside the host (e.g. for debugging),
|
||
# add a `ports:` entry locally, preferably bound to 127.0.0.1.
|
||
expose:
|
||
- "50051"
|
||
volumes:
|
||
- docreader-tmp:/tmp/docreader
|
||
environment:
|
||
- DOCREADER_IMAGE_OUTPUT_DIR=/tmp/docreader
|
||
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-}
|
||
- DOCREADER_MARKITDOWN_MAX_WORKERS=${DOCREADER_MARKITDOWN_MAX_WORKERS:-1}
|
||
- DOCREADER_PDF_RENDER_MAX_WORKERS=${DOCREADER_PDF_RENDER_MAX_WORKERS:-1}
|
||
- DOCREADER_PDF_RENDER_DPI=${DOCREADER_PDF_RENDER_DPI:-200}
|
||
- DOCREADER_PDF_JPEG_QUALITY=${DOCREADER_PDF_JPEG_QUALITY:-90}
|
||
- GRPC_TLS_ENABLED=${GRPC_TLS_ENABLED:-false}
|
||
- GRPC_TLS_CERT=${GRPC_TLS_CERT:-}
|
||
- GRPC_TLS_KEY=${GRPC_TLS_KEY:-}
|
||
- GRPC_TLS_CA=${GRPC_TLS_CA:-}
|
||
- GRPC_AUTH_TOKEN=${GRPC_AUTH_TOKEN:-}
|
||
- OBS_ENDPOINT=${OBS_ENDPOINT:-}
|
||
- OBS_REGION=${OBS_REGION:-}
|
||
- OBS_ACCESS_KEY=${OBS_ACCESS_KEY:-}
|
||
- OBS_SECRET_KEY=${OBS_SECRET_KEY:-}
|
||
- OBS_BUCKET_NAME=${OBS_BUCKET_NAME:-}
|
||
- OBS_PATH_PREFIX=${OBS_PATH_PREFIX:-}
|
||
- OBS_PROXY_DOMAIN=${OBS_PROXY_DOMAIN:-}
|
||
healthcheck:
|
||
test: ["CMD", "grpc_health_probe", "-addr=localhost:50051"]
|
||
interval: 30s
|
||
timeout: 10s
|
||
retries: 3
|
||
start_period: 60s
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
extra_hosts:
|
||
- "host.docker.internal:host-gateway"
|
||
|
||
# 修改的PostgreSQL配置
|
||
postgres:
|
||
image: paradedb/paradedb:v0.22.2-pg17
|
||
container_name: WeKnora-postgres
|
||
environment:
|
||
- POSTGRES_USER=${DB_USER}
|
||
- POSTGRES_PASSWORD=${DB_PASSWORD}
|
||
- POSTGRES_DB=${DB_NAME}
|
||
volumes:
|
||
- postgres-data:/var/lib/postgresql/data
|
||
networks:
|
||
- WeKnora-network
|
||
healthcheck:
|
||
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
|
||
interval: 10s # 增加时间间隔
|
||
timeout: 10s # 增加超时时间
|
||
retries: 3 # 减少重试次数,让失败更快反馈
|
||
start_period: 30s # 给予初始启动更多时间
|
||
restart: unless-stopped
|
||
# 添加停机时的优雅退出时间
|
||
stop_grace_period: 1m
|
||
|
||
redis:
|
||
image: redis:7.0-alpine
|
||
container_name: WeKnora-redis
|
||
command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD}
|
||
restart: always
|
||
networks:
|
||
- WeKnora-network
|
||
|
||
# One-shot init: copy the read-only template settings.yml into a named volume
|
||
# before searxng starts. Necessary because the searxng entrypoint sed-replaces
|
||
# `ultrasecretkey` in /etc/searxng/settings.yml in place; bind-mounting the
|
||
# repo file directly would write the resolved SEARXNG_SECRET back into the
|
||
# working tree (and prevent re-templating on subsequent restarts).
|
||
searxng-init:
|
||
image: busybox:1.36
|
||
container_name: WeKnora-searxng-init
|
||
command: ["sh", "-c", "cp /template/settings.yml /etc/searxng/settings.yml && chmod 0644 /etc/searxng/settings.yml"]
|
||
volumes:
|
||
- ./docker/searxng/settings.yml:/template/settings.yml:ro
|
||
- searxng_config:/etc/searxng
|
||
restart: "no"
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- searxng
|
||
- full
|
||
|
||
searxng:
|
||
image: searxng/searxng:latest
|
||
container_name: WeKnora-searxng
|
||
# Bind to loopback by default; override SEARXNG_BIND=0.0.0.0 only after
|
||
# rotating SEARXNG_SECRET and (optionally) re-enabling limiter in settings.yml.
|
||
ports:
|
||
- "${SEARXNG_BIND:-127.0.0.1}:${SEARXNG_PORT:-8888}:8080"
|
||
volumes:
|
||
- searxng_config:/etc/searxng
|
||
environment:
|
||
- SEARXNG_BASE_URL=http://localhost:${SEARXNG_PORT:-8888}/
|
||
- INSTANCE_NAME=weknora-searxng
|
||
# Default value lets `docker compose --profile searxng up` work zero-config.
|
||
# Override via .env (e.g. `openssl rand -hex 32`) before exposing the
|
||
# instance beyond the default 127.0.0.1 bind, since secret_key signs
|
||
# image-proxy URLs and a shared default would be guessable by anyone.
|
||
- SEARXNG_SECRET=${SEARXNG_SECRET:-weknora-default-searxng-secret-rotate-before-exposing-publicly}
|
||
cap_drop:
|
||
- ALL
|
||
cap_add:
|
||
- CHOWN
|
||
- SETGID
|
||
- SETUID
|
||
restart: unless-stopped
|
||
depends_on:
|
||
searxng-init:
|
||
condition: service_completed_successfully
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- searxng
|
||
- full
|
||
|
||
minio:
|
||
image: minio/minio:RELEASE.2025-09-07T16-13-09Z
|
||
container_name: WeKnora-minio
|
||
ports:
|
||
- "${MINIO_PORT:-9000}:9000"
|
||
- "${MINIO_CONSOLE_PORT:-9001}:9001"
|
||
environment:
|
||
- MINIO_ROOT_USER=${MINIO_ACCESS_KEY_ID:-minioadmin}
|
||
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_ACCESS_KEY:-minioadmin}
|
||
command: server --console-address ":9001" /data
|
||
volumes:
|
||
- minio_data:/data
|
||
healthcheck:
|
||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||
interval: 30s
|
||
timeout: 20s
|
||
retries: 3
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- minio
|
||
- full
|
||
|
||
jaeger:
|
||
image: jaegertracing/all-in-one:1.76.0
|
||
container_name: WeKnora-jaeger
|
||
ports:
|
||
- "6831:6831/udp" # Jaeger Thrift接收器
|
||
- "6832:6832/udp" # Jaeger Thrift接收器(Compact)
|
||
- "5778:5778" # 配置端口
|
||
- "16686:16686" # Web UI
|
||
- "4317:4317" # OTLP gRPC接收器
|
||
- "4318:4318" # OTLP HTTP接收器
|
||
- "14250:14250" # 接收模型端口
|
||
- "14268:14268" # Jaeger HTTP接收器
|
||
- "9411:9411" # Zipkin兼容性端口
|
||
environment:
|
||
- COLLECTOR_OTLP_ENABLED=true
|
||
- COLLECTOR_ZIPKIN_HOST_PORT=:9411
|
||
volumes:
|
||
- jaeger_data:/var/lib/jaeger # 持久化 Jaeger 数据
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
profiles:
|
||
- jaeger
|
||
- full
|
||
|
||
neo4j:
|
||
image: neo4j:2025.10.1
|
||
container_name: WeKnora-neo4j
|
||
volumes:
|
||
- neo4j-data:/data
|
||
environment:
|
||
- NEO4J_AUTH=${NEO4J_USERNAME:-neo4j}/${NEO4J_PASSWORD:-password}
|
||
- NEO4J_apoc_export_file_enabled=true
|
||
- NEO4J_apoc_import_file_enabled=true
|
||
- NEO4J_apoc_import_file_use__neo4j__config=true
|
||
- NEO4JLABS_PLUGINS=["apoc"]
|
||
ports:
|
||
- "7474:7474"
|
||
- "7687:7687"
|
||
restart: always
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- neo4j
|
||
- full
|
||
|
||
qdrant:
|
||
image: qdrant/qdrant:v1.16.2
|
||
container_name: WeKnora-qdrant
|
||
ports:
|
||
- "${QDRANT_REST_PORT:-6333}:6333"
|
||
- "${QDRANT_PORT:-6334}:6334"
|
||
volumes:
|
||
- qdrant_data:/qdrant/storage
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
profiles:
|
||
- qdrant
|
||
- full
|
||
|
||
milvus:
|
||
image: milvusdb/milvus:v2.6.11
|
||
container_name: WeKnora-milvus
|
||
security_opt:
|
||
- seccomp:unconfined
|
||
command: ["milvus", "run", "standalone"]
|
||
environment:
|
||
- ETCD_USE_EMBED=true
|
||
- ETCD_DATA_DIR=/var/lib/milvus/etcd
|
||
- COMMON_STORAGETYPE=local
|
||
- DEPLOY_MODE=STANDALONE
|
||
healthcheck:
|
||
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
|
||
interval: 30s
|
||
start_period: 90s
|
||
timeout: 20s
|
||
retries: 3
|
||
ports:
|
||
- "19530:19530"
|
||
- "9091:9091"
|
||
volumes:
|
||
- milvus_data:/var/lib/milvus
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
profiles:
|
||
- milvus
|
||
|
||
weaviate:
|
||
image: semitechnologies/weaviate:1.28.4
|
||
container_name: WeKnora-weaviate
|
||
environment:
|
||
- PERSISTENCE_DATA_PATH=/var/lib/weaviate
|
||
- CLUSTER_HOSTNAME=node1
|
||
- DEFAULT_VECTORIZER_MODULE=none
|
||
- ENABLE_MODULES=none
|
||
- AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true
|
||
- CLUSTER_GOSSIP_BIND_PORT=7000
|
||
- CLUSTER_DATA_BIND_PORT=7001
|
||
- RAFT_BOOTSTRAP_EXPECT=1
|
||
ports:
|
||
- "9035:8080"
|
||
- "50052:50051"
|
||
volumes:
|
||
- weaviate_data:/var/lib/weaviate
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
profiles:
|
||
- weaviate
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Apache Doris 4.1(FE + BE 单实例 standalone 部署,opt-in via `--profile doris`)
|
||
#
|
||
# 版本要求:必须 >= 3.0(HNSW ANN 索引、cosine_distance_approximate、
|
||
# Stream Load partial_columns)。本 compose 使用 4.1 官方稳定 tag。
|
||
#
|
||
# 端口说明:
|
||
# - 9030 (FE MySQL):WeKnora 主链路读写、SQL 查询都走此端口。
|
||
# - 8030 (FE HTTP):Stream Load partial update 入口。
|
||
# - 8040 (BE) :BE 心跳与数据传输;FE 会把 stream load 重定向到 BE。
|
||
#
|
||
# 启用方式:
|
||
# docker compose --profile doris up -d
|
||
#
|
||
# 数据持久化在 doris_fe_data / doris_be_data 卷中。
|
||
# ---------------------------------------------------------------------------
|
||
doris-fe:
|
||
image: apache/doris:fe-4.1.0
|
||
container_name: WeKnora-doris-fe
|
||
hostname: doris-fe
|
||
environment:
|
||
- FE_SERVERS=fe1:doris-fe:9010
|
||
- FE_ID=1
|
||
ports:
|
||
- "${DORIS_FE_HTTP_PORT:-8030}:8030"
|
||
- "${DORIS_FE_MYSQL_PORT:-9030}:9030"
|
||
volumes:
|
||
- doris_fe_meta:/opt/apache-doris/fe/doris-meta
|
||
- doris_fe_log:/opt/apache-doris/fe/log
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
profiles:
|
||
- doris
|
||
|
||
doris-be:
|
||
image: apache/doris:be-4.1.0
|
||
container_name: WeKnora-doris-be
|
||
hostname: doris-be
|
||
environment:
|
||
- FE_SERVERS=fe1:doris-fe:9010
|
||
- BE_ADDR=doris-be:9050
|
||
depends_on:
|
||
- doris-fe
|
||
ports:
|
||
- "${DORIS_BE_HTTP_PORT:-8040}:8040"
|
||
volumes:
|
||
- doris_be_storage:/opt/apache-doris/be/storage
|
||
- doris_be_log:/opt/apache-doris/be/log
|
||
networks:
|
||
- WeKnora-network
|
||
restart: unless-stopped
|
||
profiles:
|
||
- doris
|
||
|
||
dex:
|
||
image: dexidp/dex:latest
|
||
container_name: dex
|
||
ports:
|
||
- "5556:5556"
|
||
volumes:
|
||
- ./misc/dex-config.yaml:/etc/dex/config.yaml
|
||
command: ["dex", "serve", "/etc/dex/config.yaml"]
|
||
profiles:
|
||
- dex
|
||
- full
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Langfuse 自建可观测栈 (opt-in via `--profile langfuse`)
|
||
#
|
||
# 为了最小化资源占用,Langfuse 会复用 WeKnora 已有的 postgres 和 redis:
|
||
# - postgres:在 WeKnora-postgres 中创建独立的 "langfuse" 数据库
|
||
# - redis :复用 WeKnora-redis 的 DB 1(WeKnora 自己用 DB 0)
|
||
# 需要新增的只有 Langfuse 应用自身 + ClickHouse(OLAP)+ 专用 MinIO(事件/媒体 S3)。
|
||
#
|
||
# 启用方式:
|
||
# docker compose --profile langfuse up -d
|
||
# 启用后:
|
||
# 1. Langfuse UI: http://localhost:3000 (首次访问注册管理员并在 Settings → API Keys 生成 key)
|
||
# 2. 在 .env 里把 LANGFUSE_HOST 改成 http://langfuse-web:3000(容器间通信)
|
||
# 并填入刚生成的 LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY
|
||
# 3. 重启 app 容器:docker compose up -d app
|
||
# 相关默认密码已写成占位符,生产部署务必用 openssl rand 重新生成!
|
||
# ---------------------------------------------------------------------------
|
||
|
||
# 一次性初始化脚本:在已有的 WeKnora-postgres 里创建 langfuse 数据库(幂等,数据库已存在则跳过)
|
||
# 复用 WeKnora-postgres 的 ParadeDB 镜像,避免多拉一个 postgres 镜像
|
||
langfuse-db-init:
|
||
image: paradedb/paradedb:v0.22.2-pg17
|
||
container_name: WeKnora-langfuse-db-init
|
||
depends_on:
|
||
postgres:
|
||
condition: service_healthy
|
||
environment:
|
||
PGPASSWORD: ${DB_PASSWORD}
|
||
# ${LANGFUSE_DB_NAME:-langfuse} / ${DB_USER} 由 compose 解析成字面量后再传给 shell;
|
||
# 脚本中需要 shell 自己展开的变量(无)一律用 $$ 转义。
|
||
entrypoint: ["sh", "-c"]
|
||
command:
|
||
- |
|
||
set -e
|
||
echo "[langfuse-db-init] ensuring database '${LANGFUSE_DB_NAME:-langfuse}' exists in WeKnora-postgres..."
|
||
# 先刷新现有库的 collation(镜像 ICU 2.36 与宿主 2.41 不匹配时必须做),否则 CREATE DATABASE 会失败
|
||
psql -h postgres -U ${DB_USER} -d postgres -v ON_ERROR_STOP=0 -c "ALTER DATABASE template1 REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true
|
||
psql -h postgres -U ${DB_USER} -d postgres -v ON_ERROR_STOP=0 -c "ALTER DATABASE postgres REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true
|
||
# 幂等创建:已存在则跳过;不存在则从 template0 克隆(template0 永远不会有 collation 漂移)
|
||
if psql -h postgres -U ${DB_USER} -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname='${LANGFUSE_DB_NAME:-langfuse}'" | grep -q 1; then
|
||
echo "[langfuse-db-init] database '${LANGFUSE_DB_NAME:-langfuse}' already exists, skipping."
|
||
else
|
||
psql -h postgres -U ${DB_USER} -d postgres -v ON_ERROR_STOP=1 -c "CREATE DATABASE \"${LANGFUSE_DB_NAME:-langfuse}\" TEMPLATE template0;"
|
||
echo "[langfuse-db-init] database '${LANGFUSE_DB_NAME:-langfuse}' created."
|
||
fi
|
||
echo "[langfuse-db-init] done."
|
||
networks:
|
||
- WeKnora-network
|
||
restart: "no"
|
||
profiles:
|
||
- langfuse
|
||
- full
|
||
|
||
langfuse-clickhouse:
|
||
image: clickhouse/clickhouse-server:24.8
|
||
container_name: WeKnora-langfuse-clickhouse
|
||
restart: unless-stopped
|
||
user: "101:101"
|
||
environment:
|
||
CLICKHOUSE_DB: default
|
||
CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse}
|
||
CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse}
|
||
volumes:
|
||
- langfuse_clickhouse_data:/var/lib/clickhouse
|
||
- langfuse_clickhouse_logs:/var/log/clickhouse-server
|
||
healthcheck:
|
||
test: wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1
|
||
interval: 5s
|
||
timeout: 5s
|
||
retries: 10
|
||
start_period: 10s
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- langfuse
|
||
- full
|
||
|
||
langfuse-minio:
|
||
image: minio/minio:RELEASE.2025-09-07T16-13-09Z
|
||
container_name: WeKnora-langfuse-minio
|
||
restart: unless-stopped
|
||
entrypoint: sh
|
||
# 启动前创建 `langfuse` 桶,Langfuse 开箱即用
|
||
command: -c 'mkdir -p /data/langfuse && minio server --address ":9000" --console-address ":9001" /data'
|
||
environment:
|
||
MINIO_ROOT_USER: ${LANGFUSE_MINIO_USER:-langfuseminio}
|
||
MINIO_ROOT_PASSWORD: ${LANGFUSE_MINIO_PASSWORD:-langfuseminiosecret}
|
||
ports:
|
||
# 9100:S3 API(浏览器上传媒体时会直连此端口,必须对外暴露)
|
||
- "${LANGFUSE_MINIO_S3_PORT:-9100}:9000"
|
||
# 9101:MinIO 控制台(可选,方便排障)
|
||
- "${LANGFUSE_MINIO_CONSOLE_PORT:-9101}:9001"
|
||
volumes:
|
||
- langfuse_minio_data:/data
|
||
healthcheck:
|
||
test: ["CMD", "mc", "ready", "local"]
|
||
interval: 5s
|
||
timeout: 10s
|
||
retries: 5
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- langfuse
|
||
- full
|
||
|
||
langfuse-worker:
|
||
image: langfuse/langfuse-worker:3
|
||
container_name: WeKnora-langfuse-worker
|
||
restart: unless-stopped
|
||
depends_on: &langfuse-depends-on
|
||
# 复用 WeKnora 已有的 postgres/redis,需要先完成 langfuse 数据库的创建
|
||
langfuse-db-init:
|
||
condition: service_completed_successfully
|
||
redis:
|
||
condition: service_started
|
||
langfuse-clickhouse:
|
||
condition: service_healthy
|
||
langfuse-minio:
|
||
condition: service_healthy
|
||
# 用 wrapper entrypoint:在容器启动时把 DB_PASSWORD / REDIS_PASSWORD 做 URL 编码,
|
||
# 避免 DB_PASSWORD 含 '@' / '#' 等字符导致 Prisma 无法解析 DATABASE_URL (P1013)。
|
||
# Langfuse 镜像基于 Node.js,直接用 node 的 encodeURIComponent,不引外部依赖。
|
||
# 注意:compose 覆盖 entrypoint 会清空镜像默认 CMD,因此在 wrapper 末尾写死 exec 的原始命令。
|
||
entrypoint:
|
||
- /bin/sh
|
||
- -ec
|
||
- |
|
||
_enc() { node -e 'process.stdout.write(encodeURIComponent(process.argv[1]))' "$$1"; }
|
||
DU=$$(_enc "$$_LF_DB_USER")
|
||
DP=$$(_enc "$$_LF_DB_PASSWORD")
|
||
RP=$$(_enc "$$_LF_REDIS_PASSWORD")
|
||
export DATABASE_URL="postgresql://$$DU:$$DP@postgres:5432/$$_LF_DB_NAME"
|
||
export REDIS_CONNECTION_STRING="redis://:$$RP@redis:6379/$$_LF_REDIS_DB"
|
||
unset _LF_DB_USER _LF_DB_PASSWORD _LF_REDIS_PASSWORD
|
||
exec dumb-init -- ./worker/entrypoint.sh node worker/dist/index.js
|
||
environment: &langfuse-env
|
||
# 原始凭证(未 URL 编码),由 entrypoint wrapper 读取并组装成 DATABASE_URL / REDIS_CONNECTION_STRING
|
||
_LF_DB_USER: ${DB_USER}
|
||
_LF_DB_PASSWORD: ${DB_PASSWORD}
|
||
_LF_DB_NAME: ${LANGFUSE_DB_NAME:-langfuse}
|
||
_LF_REDIS_PASSWORD: ${REDIS_PASSWORD}
|
||
_LF_REDIS_DB: ${LANGFUSE_REDIS_DB:-1}
|
||
# SALT / ENCRYPTION_KEY 生产环境务必重新生成:
|
||
# SALT: openssl rand -base64 32
|
||
# ENCRYPTION_KEY: openssl rand -hex 32
|
||
SALT: ${LANGFUSE_SALT:-weknora-langfuse-dev-salt-change-me}
|
||
ENCRYPTION_KEY: ${LANGFUSE_ENCRYPTION_KEY:-0000000000000000000000000000000000000000000000000000000000000000}
|
||
NEXTAUTH_URL: ${LANGFUSE_NEXTAUTH_URL:-http://localhost:3000}
|
||
NEXTAUTH_SECRET: ${LANGFUSE_NEXTAUTH_SECRET:-weknora-langfuse-dev-nextauth-secret-change-me}
|
||
TELEMETRY_ENABLED: ${LANGFUSE_TELEMETRY_ENABLED:-false}
|
||
LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES: "false"
|
||
CLICKHOUSE_URL: http://langfuse-clickhouse:8123
|
||
CLICKHOUSE_MIGRATION_URL: clickhouse://langfuse-clickhouse:9000
|
||
CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse}
|
||
CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse}
|
||
CLICKHOUSE_CLUSTER_ENABLED: "false"
|
||
LANGFUSE_S3_EVENT_UPLOAD_BUCKET: langfuse
|
||
LANGFUSE_S3_EVENT_UPLOAD_REGION: auto
|
||
LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_USER:-langfuseminio}
|
||
LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_PASSWORD:-langfuseminiosecret}
|
||
LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT: http://langfuse-minio:9000
|
||
LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE: "true"
|
||
LANGFUSE_S3_EVENT_UPLOAD_PREFIX: events/
|
||
LANGFUSE_S3_MEDIA_UPLOAD_BUCKET: langfuse
|
||
LANGFUSE_S3_MEDIA_UPLOAD_REGION: auto
|
||
LANGFUSE_S3_MEDIA_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_USER:-langfuseminio}
|
||
LANGFUSE_S3_MEDIA_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_PASSWORD:-langfuseminiosecret}
|
||
# 媒体上传用的外部 endpoint(浏览器直连),默认指向宿主机映射的 9100 端口
|
||
LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT: ${LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT:-http://localhost:9100}
|
||
LANGFUSE_S3_MEDIA_UPLOAD_FORCE_PATH_STYLE: "true"
|
||
LANGFUSE_S3_MEDIA_UPLOAD_PREFIX: media/
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- langfuse
|
||
- full
|
||
|
||
langfuse-web:
|
||
image: langfuse/langfuse:3
|
||
container_name: WeKnora-langfuse-web
|
||
restart: unless-stopped
|
||
depends_on: *langfuse-depends-on
|
||
ports:
|
||
- "${LANGFUSE_WEB_PORT:-3000}:3000"
|
||
# 同样走 wrapper entrypoint,末尾写死 web 的原始启动命令(见镜像 Dockerfile 的 CMD)
|
||
entrypoint:
|
||
- /bin/sh
|
||
- -ec
|
||
- |
|
||
_enc() { node -e 'process.stdout.write(encodeURIComponent(process.argv[1]))' "$$1"; }
|
||
DU=$$(_enc "$$_LF_DB_USER")
|
||
DP=$$(_enc "$$_LF_DB_PASSWORD")
|
||
RP=$$(_enc "$$_LF_REDIS_PASSWORD")
|
||
export DATABASE_URL="postgresql://$$DU:$$DP@postgres:5432/$$_LF_DB_NAME"
|
||
export REDIS_CONNECTION_STRING="redis://:$$RP@redis:6379/$$_LF_REDIS_DB"
|
||
unset _LF_DB_USER _LF_DB_PASSWORD _LF_REDIS_PASSWORD
|
||
if [ -n "$$NEXT_PUBLIC_LANGFUSE_CLOUD_REGION" ]; then
|
||
exec dumb-init -- ./web/entrypoint.sh node --import dd-trace/initialize.mjs ./web/server.js --keepAliveTimeout 110000
|
||
else
|
||
exec dumb-init -- ./web/entrypoint.sh node ./web/server.js --keepAliveTimeout 110000
|
||
fi
|
||
environment:
|
||
<<: *langfuse-env
|
||
# 可选:首次启动时自动创建组织、项目和管理员(填了就不用在 UI 上手动注册)
|
||
LANGFUSE_INIT_ORG_ID: ${LANGFUSE_INIT_ORG_ID:-}
|
||
LANGFUSE_INIT_ORG_NAME: ${LANGFUSE_INIT_ORG_NAME:-}
|
||
LANGFUSE_INIT_PROJECT_ID: ${LANGFUSE_INIT_PROJECT_ID:-}
|
||
LANGFUSE_INIT_PROJECT_NAME: ${LANGFUSE_INIT_PROJECT_NAME:-}
|
||
LANGFUSE_INIT_PROJECT_PUBLIC_KEY: ${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-}
|
||
LANGFUSE_INIT_PROJECT_SECRET_KEY: ${LANGFUSE_INIT_PROJECT_SECRET_KEY:-}
|
||
LANGFUSE_INIT_USER_EMAIL: ${LANGFUSE_INIT_USER_EMAIL:-}
|
||
LANGFUSE_INIT_USER_NAME: ${LANGFUSE_INIT_USER_NAME:-}
|
||
LANGFUSE_INIT_USER_PASSWORD: ${LANGFUSE_INIT_USER_PASSWORD:-}
|
||
networks:
|
||
- WeKnora-network
|
||
profiles:
|
||
- langfuse
|
||
- full
|
||
|
||
networks:
|
||
WeKnora-network:
|
||
driver: bridge
|
||
|
||
volumes:
|
||
postgres-data:
|
||
data-files:
|
||
docreader-tmp:
|
||
jaeger_data:
|
||
minio_data:
|
||
neo4j-data:
|
||
qdrant_data:
|
||
milvus_data:
|
||
weaviate_data:
|
||
doris_fe_meta:
|
||
doris_fe_log:
|
||
doris_be_storage:
|
||
doris_be_log:
|
||
langfuse_clickhouse_data:
|
||
langfuse_clickhouse_logs:
|
||
langfuse_minio_data:
|
||
searxng_config:
|