Files
WeKnora/docker-compose.dev.yml
wizardchen 492e92580b feat(observability): integrate Langfuse for LLM token tracking and tracing
Closes #620 #497. Add opt-in Langfuse observability covering all five
model types (chat, embedding, rerank, VLM, ASR) with HTTP-request-scoped
traces and Docker Compose support (both cloud and self-hosted).

Core package internal/tracing/langfuse:
- HTTP client with batched async ingestion (non-blocking in request path)
- Sampling, environment / release tagging, and graceful fallback when
  LANGFUSE_* env vars are absent (wrappers become no-ops)
- Gin middleware opens one trace per traced request and finishes it after
  the handler chain returns, attaching method / path / user / session
- Trace context is stored under a typed key exported from internal/types
  so logger.CloneContext can preserve it across handler / goroutine
  boundaries (otherwise each LLM call auto-created an orphan trace,
  fragmenting one request into many)

Per-model generation wrappers (opt-in via NewChat/NewEmbedder/...):
- chat: captures prompt, streaming output, token usage + TTFT
- embedding: approximates tokens when the provider omits usage
- rerank: previews query/docs, summarizes results to keep payload small
- vlm: records image count and total bytes, never uploads raw pixels
- asr: records file size and audio duration, never uploads audio bytes

Async title generation (GenerateTitleAsync) now forwards the trace key
into the goroutine so title calls appear under the parent chat trace.

Docker Compose:
- LANGFUSE_* env passthrough on the `app` service for cloud deployments
- Optional `langfuse` profile spins up a self-hosted Langfuse stack that
  reuses WeKnora's existing PostgreSQL (separate database via an idempotent
  init container that fixes ICU collation drift) and Redis (separate DB
  number), adding only ClickHouse, MinIO, web and worker containers
- web/worker entrypoints URL-encode DB_PASSWORD / REDIS_PASSWORD at start
  to avoid Prisma P1013 when passwords contain @ / # / etc.

Docs: docs/Langfuse集成.md covers cloud vs self-hosted, per-model usage
strategy, code map, and resource footprint.
2026-04-24 10:29:19 +08:00

420 lines
14 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 开发环境配置 - 只启动基础设施服务app 和 frontend 在本地运行
services:
# 只启动依赖的基础设施服务
postgres:
image: paradedb/paradedb:v0.22.2-pg17
container_name: WeKnora-postgres-dev
ports:
- "${DB_PORT:-5432}:5432"
environment:
- POSTGRES_USER=${DB_USER}
- POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=${DB_NAME}
volumes:
- postgres-data-dev:/var/lib/postgresql/data
networks:
- WeKnora-network-dev
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
interval: 10s
timeout: 10s
retries: 3
start_period: 30s
restart: unless-stopped
stop_grace_period: 1m
redis:
image: redis:7.0-alpine
container_name: WeKnora-redis-dev
ports:
- "${REDIS_PORT:-6379}:6379"
volumes:
- redis_data_dev:/data
command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD}
restart: always
networks:
- WeKnora-network-dev
minio:
image: minio/minio:latest
container_name: WeKnora-minio-dev
ports:
- "${MINIO_PORT:-9000}:9000"
- "${MINIO_CONSOLE_PORT:-9001}:9001"
environment:
- MINIO_ROOT_USER=${MINIO_ACCESS_KEY_ID:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_SECRET_ACCESS_KEY:-minioadmin}
command: server --console-address ":9001" /data
volumes:
- minio_data_dev:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
networks:
- WeKnora-network-dev
profiles:
- minio
- full
qdrant:
image: qdrant/qdrant:v1.16.2
container_name: WeKnora-qdrant-dev
ports:
- "${QDRANT_REST_PORT:-6333}:6333"
- "${QDRANT_PORT:-6334}:6334"
volumes:
- qdrant_data_dev:/qdrant/storage
networks:
- WeKnora-network-dev
restart: unless-stopped
profiles:
- qdrant
- full
milvus:
image: milvusdb/milvus:v2.6.11
container_name: WeKnora-milvus-dev
security_opt:
- seccomp:unconfined
command: ["milvus", "run", "standalone"]
environment:
- ETCD_USE_EMBED=true
- ETCD_DATA_DIR=/var/lib/milvus/etcd
- COMMON_STORAGETYPE=local
- DEPLOY_MODE=STANDALONE
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
ports:
- "${MILVUS_PORT:-19530}:19530"
- "${MILVUS_HEALTH_PORT:-9091}:9091"
volumes:
- milvus_data_dev:/var/lib/milvus
networks:
- WeKnora-network-dev
restart: unless-stopped
profiles:
- milvus
- full
neo4j:
image: neo4j:latest
container_name: WeKnora-neo4j-dev
volumes:
- neo4j-data-dev:/data
environment:
- NEO4J_AUTH=${NEO4J_USERNAME:-neo4j}/${NEO4J_PASSWORD:-password}
- NEO4J_apoc_export_file_enabled=true
- NEO4J_apoc_import_file_enabled=true
- NEO4J_apoc_import_file_use__neo4j__config=true
- NEO4JLABS_PLUGINS=["apoc"]
ports:
- "7474:7474"
- "7687:7687"
restart: always
networks:
- WeKnora-network-dev
profiles:
- neo4j
- full
# Sandbox 镜像:仅用于 build/pull非常驻服务本地 app 执行 Skills 时按需 docker run 该镜像,用毕即释
sandbox:
image: wechatopenai/weknora-sandbox:${WEKNORA_VERSION:-latest}
container_name: WeKnora-sandbox-dev
build:
context: .
dockerfile: docker/Dockerfile.sandbox
profiles:
- full
command: ["true"]
restart: "no"
docreader:
build:
context: .
dockerfile: docker/Dockerfile.docreader
image: wechatopenai/weknora-docreader:${WEKNORA_VERSION:-latest}
container_name: WeKnora-docreader-dev
ports:
- "${DOCREADER_PORT:-50051}:50051"
volumes:
- docreader-tmp-dev:/tmp/docreader
environment:
- DOCREADER_IMAGE_OUTPUT_DIR=/tmp/docreader
- MINERU_ENDPOINT=${MINERU_ENDPOINT:-}
- MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-}
healthcheck:
test: ["CMD", "grpc_health_probe", "-addr=localhost:50051"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
networks:
- WeKnora-network-dev
restart: unless-stopped
extra_hosts:
- "host.docker.internal:host-gateway"
jaeger:
image: jaegertracing/all-in-one:latest
container_name: WeKnora-jaeger-dev
ports:
- "6831:6831/udp"
- "6832:6832/udp"
- "5778:5778"
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "14250:14250"
- "14268:14268"
- "9411:9411"
environment:
- COLLECTOR_OTLP_ENABLED=true
- COLLECTOR_ZIPKIN_HOST_PORT=:9411
volumes:
- jaeger_data_dev:/var/lib/jaeger
networks:
- WeKnora-network-dev
restart: unless-stopped
profiles:
- jaeger
- full
dex:
image: dexidp/dex:latest
container_name: WeKnora-dex-dev
ports:
- "5556:5556"
volumes:
- ./misc/dex-config.yaml:/etc/dex/config.yaml
command: ["dex", "serve", "/etc/dex/config.yaml"]
profiles:
- dex
- full
# ---------------------------------------------------------------------------
# Langfuse 自建栈dev 对称版)
#
# 用法:
# docker compose -f docker-compose.dev.yml --profile langfuse up -d
#
# 本地 app (go run) 需要的环境变量:
# export LANGFUSE_HOST=http://localhost:3000
# export LANGFUSE_PUBLIC_KEY=pk-lf-xxx
# export LANGFUSE_SECRET_KEY=sk-lf-xxx
#
# 复用 dev 已有的 postgres独立 langfuse 数据库)和 redisDB 1
# 新增clickhouse、minio、web、worker + 一次性 db-init和生产版结构一致。
# ---------------------------------------------------------------------------
# 复用 dev 已有的 ParadeDB 镜像,不额外拉 postgres 镜像
langfuse-db-init:
image: paradedb/paradedb:v0.22.2-pg17
container_name: WeKnora-langfuse-db-init-dev
depends_on:
postgres:
condition: service_healthy
environment:
PGPASSWORD: ${DB_PASSWORD}
# ${LANGFUSE_DB_NAME:-langfuse} / ${DB_USER} 由 compose 解析成字面量后再传给 shell。
entrypoint: ["sh", "-c"]
command:
- |
set -e
echo "[langfuse-db-init] ensuring database '${LANGFUSE_DB_NAME:-langfuse}' exists..."
# 先刷新现有库的 collation镜像 ICU 2.36 与宿主 2.41 不匹配时必须做),否则 CREATE DATABASE 会失败
psql -h postgres -U ${DB_USER} -d postgres -v ON_ERROR_STOP=0 -c "ALTER DATABASE template1 REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true
psql -h postgres -U ${DB_USER} -d postgres -v ON_ERROR_STOP=0 -c "ALTER DATABASE postgres REFRESH COLLATION VERSION;" >/dev/null 2>&1 || true
# 幂等创建:已存在则跳过;不存在则从 template0 克隆template0 永远不会有 collation 漂移)
if psql -h postgres -U ${DB_USER} -d postgres -tAc "SELECT 1 FROM pg_database WHERE datname='${LANGFUSE_DB_NAME:-langfuse}'" | grep -q 1; then
echo "[langfuse-db-init] database '${LANGFUSE_DB_NAME:-langfuse}' already exists, skipping."
else
psql -h postgres -U ${DB_USER} -d postgres -v ON_ERROR_STOP=1 -c "CREATE DATABASE \"${LANGFUSE_DB_NAME:-langfuse}\" TEMPLATE template0;"
echo "[langfuse-db-init] database '${LANGFUSE_DB_NAME:-langfuse}' created."
fi
echo "[langfuse-db-init] done."
networks:
- WeKnora-network-dev
restart: "no"
profiles:
- langfuse
- full
langfuse-clickhouse:
image: clickhouse/clickhouse-server:24.8
container_name: WeKnora-langfuse-clickhouse-dev
restart: unless-stopped
user: "101:101"
environment:
CLICKHOUSE_DB: default
CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse}
CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse}
volumes:
- langfuse_clickhouse_data_dev:/var/lib/clickhouse
- langfuse_clickhouse_logs_dev:/var/log/clickhouse-server
healthcheck:
test: wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1
interval: 5s
timeout: 5s
retries: 10
start_period: 10s
networks:
- WeKnora-network-dev
profiles:
- langfuse
- full
langfuse-minio:
image: minio/minio:RELEASE.2025-09-07T16-13-09Z
container_name: WeKnora-langfuse-minio-dev
restart: unless-stopped
entrypoint: sh
command: -c 'mkdir -p /data/langfuse && minio server --address ":9000" --console-address ":9001" /data'
environment:
MINIO_ROOT_USER: ${LANGFUSE_MINIO_USER:-langfuseminio}
MINIO_ROOT_PASSWORD: ${LANGFUSE_MINIO_PASSWORD:-langfuseminiosecret}
ports:
- "${LANGFUSE_MINIO_S3_PORT:-9100}:9000"
- "${LANGFUSE_MINIO_CONSOLE_PORT:-9101}:9001"
volumes:
- langfuse_minio_data_dev:/data
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 5s
timeout: 10s
retries: 5
networks:
- WeKnora-network-dev
profiles:
- langfuse
- full
langfuse-worker:
image: langfuse/langfuse-worker:3
container_name: WeKnora-langfuse-worker-dev
restart: unless-stopped
depends_on: &langfuse-dev-depends-on
langfuse-db-init:
condition: service_completed_successfully
redis:
condition: service_started
langfuse-clickhouse:
condition: service_healthy
langfuse-minio:
condition: service_healthy
# wrapper entrypoint 把 DB_PASSWORD / REDIS_PASSWORD URL 编码后再拼 URL
# 避免密码含 '@' / '#' 等字符导致 Prisma P1013 解析失败。
# 注意compose 覆盖 entrypoint 会清空镜像默认 CMD所以末尾写死原始命令。
entrypoint:
- /bin/sh
- -ec
- |
_enc() { node -e 'process.stdout.write(encodeURIComponent(process.argv[1]))' "$$1"; }
DU=$$(_enc "$$_LF_DB_USER")
DP=$$(_enc "$$_LF_DB_PASSWORD")
RP=$$(_enc "$$_LF_REDIS_PASSWORD")
export DATABASE_URL="postgresql://$$DU:$$DP@postgres:5432/$$_LF_DB_NAME"
export REDIS_CONNECTION_STRING="redis://:$$RP@redis:6379/$$_LF_REDIS_DB"
unset _LF_DB_USER _LF_DB_PASSWORD _LF_REDIS_PASSWORD
exec dumb-init -- ./worker/entrypoint.sh node worker/dist/index.js
environment: &langfuse-dev-env
# 原始凭证(未 URL 编码),由 entrypoint wrapper 读取并组装
_LF_DB_USER: ${DB_USER}
_LF_DB_PASSWORD: ${DB_PASSWORD}
_LF_DB_NAME: ${LANGFUSE_DB_NAME:-langfuse}
_LF_REDIS_PASSWORD: ${REDIS_PASSWORD}
_LF_REDIS_DB: ${LANGFUSE_REDIS_DB:-1}
SALT: ${LANGFUSE_SALT:-weknora-langfuse-dev-salt-change-me}
ENCRYPTION_KEY: ${LANGFUSE_ENCRYPTION_KEY:-0000000000000000000000000000000000000000000000000000000000000000}
NEXTAUTH_URL: ${LANGFUSE_NEXTAUTH_URL:-http://localhost:3000}
NEXTAUTH_SECRET: ${LANGFUSE_NEXTAUTH_SECRET:-weknora-langfuse-dev-nextauth-secret-change-me}
TELEMETRY_ENABLED: ${LANGFUSE_TELEMETRY_ENABLED:-false}
LANGFUSE_ENABLE_EXPERIMENTAL_FEATURES: "false"
CLICKHOUSE_URL: http://langfuse-clickhouse:8123
CLICKHOUSE_MIGRATION_URL: clickhouse://langfuse-clickhouse:9000
CLICKHOUSE_USER: ${LANGFUSE_CLICKHOUSE_USER:-clickhouse}
CLICKHOUSE_PASSWORD: ${LANGFUSE_CLICKHOUSE_PASSWORD:-clickhouse}
CLICKHOUSE_CLUSTER_ENABLED: "false"
LANGFUSE_S3_EVENT_UPLOAD_BUCKET: langfuse
LANGFUSE_S3_EVENT_UPLOAD_REGION: auto
LANGFUSE_S3_EVENT_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_USER:-langfuseminio}
LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_PASSWORD:-langfuseminiosecret}
LANGFUSE_S3_EVENT_UPLOAD_ENDPOINT: http://langfuse-minio:9000
LANGFUSE_S3_EVENT_UPLOAD_FORCE_PATH_STYLE: "true"
LANGFUSE_S3_EVENT_UPLOAD_PREFIX: events/
LANGFUSE_S3_MEDIA_UPLOAD_BUCKET: langfuse
LANGFUSE_S3_MEDIA_UPLOAD_REGION: auto
LANGFUSE_S3_MEDIA_UPLOAD_ACCESS_KEY_ID: ${LANGFUSE_MINIO_USER:-langfuseminio}
LANGFUSE_S3_MEDIA_UPLOAD_SECRET_ACCESS_KEY: ${LANGFUSE_MINIO_PASSWORD:-langfuseminiosecret}
LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT: ${LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT:-http://localhost:9100}
LANGFUSE_S3_MEDIA_UPLOAD_FORCE_PATH_STYLE: "true"
LANGFUSE_S3_MEDIA_UPLOAD_PREFIX: media/
networks:
- WeKnora-network-dev
profiles:
- langfuse
- full
langfuse-web:
image: langfuse/langfuse:3
container_name: WeKnora-langfuse-web-dev
restart: unless-stopped
depends_on: *langfuse-dev-depends-on
ports:
- "${LANGFUSE_WEB_PORT:-3000}:3000"
entrypoint:
- /bin/sh
- -ec
- |
_enc() { node -e 'process.stdout.write(encodeURIComponent(process.argv[1]))' "$$1"; }
DU=$$(_enc "$$_LF_DB_USER")
DP=$$(_enc "$$_LF_DB_PASSWORD")
RP=$$(_enc "$$_LF_REDIS_PASSWORD")
export DATABASE_URL="postgresql://$$DU:$$DP@postgres:5432/$$_LF_DB_NAME"
export REDIS_CONNECTION_STRING="redis://:$$RP@redis:6379/$$_LF_REDIS_DB"
unset _LF_DB_USER _LF_DB_PASSWORD _LF_REDIS_PASSWORD
if [ -n "$$NEXT_PUBLIC_LANGFUSE_CLOUD_REGION" ]; then
exec dumb-init -- ./web/entrypoint.sh node --import dd-trace/initialize.mjs ./web/server.js --keepAliveTimeout 110000
else
exec dumb-init -- ./web/entrypoint.sh node ./web/server.js --keepAliveTimeout 110000
fi
environment:
<<: *langfuse-dev-env
LANGFUSE_INIT_ORG_ID: ${LANGFUSE_INIT_ORG_ID:-}
LANGFUSE_INIT_ORG_NAME: ${LANGFUSE_INIT_ORG_NAME:-}
LANGFUSE_INIT_PROJECT_ID: ${LANGFUSE_INIT_PROJECT_ID:-}
LANGFUSE_INIT_PROJECT_NAME: ${LANGFUSE_INIT_PROJECT_NAME:-}
LANGFUSE_INIT_PROJECT_PUBLIC_KEY: ${LANGFUSE_INIT_PROJECT_PUBLIC_KEY:-}
LANGFUSE_INIT_PROJECT_SECRET_KEY: ${LANGFUSE_INIT_PROJECT_SECRET_KEY:-}
LANGFUSE_INIT_USER_EMAIL: ${LANGFUSE_INIT_USER_EMAIL:-}
LANGFUSE_INIT_USER_NAME: ${LANGFUSE_INIT_USER_NAME:-}
LANGFUSE_INIT_USER_PASSWORD: ${LANGFUSE_INIT_USER_PASSWORD:-}
networks:
- WeKnora-network-dev
profiles:
- langfuse
- full
networks:
WeKnora-network-dev:
driver: bridge
volumes:
postgres-data-dev:
redis_data_dev:
minio_data_dev:
neo4j-data-dev:
jaeger_data_dev:
qdrant_data_dev:
milvus_data_dev:
docreader-tmp-dev:
langfuse_clickhouse_data_dev:
langfuse_clickhouse_logs_dev:
langfuse_minio_data_dev: