diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 972d7ef8..e768f2b7 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -119,6 +119,61 @@ services: - qdrant - full + # OpenSearch k-NN (Phase 3 driver). Single-node dev profile with the + # security plugin disabled → plain HTTP on :9200, no auth/TLS. The image + # bundles the opensearch-knn plugin. For production use a secured, + # multi-node cluster. See docs/dev/opensearch-integration-test.md. + opensearch: + image: opensearchproject/opensearch:3.3.2 + container_name: WeKnora-opensearch-dev + environment: + - discovery.type=single-node + # dev only: plain HTTP on :9200, no TLS/auth. The entrypoint script + # honours DISABLE_SECURITY_PLUGIN (env var) to skip both the demo + # install and the OPENSEARCH_INITIAL_ADMIN_PASSWORD requirement. + - DISABLE_SECURITY_PLUGIN=true + - DISABLE_INSTALL_DEMO_CONFIG=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m + - bootstrap.memory_lock=true + ulimits: + memlock: + soft: -1 + hard: -1 + ports: + - "${OPENSEARCH_PORT:-9200}:9200" + volumes: + - opensearch_data_dev:/usr/share/opensearch/data + networks: + - WeKnora-network-dev + restart: unless-stopped + profiles: + - opensearch + - full + # Also a member of opensearch-ui so the Dashboards depends_on resolves + # when only that profile is active (`--profile opensearch-ui up`). + - opensearch-ui + + # Optional UI for visual index/mapping/query inspection. Decoupled from the + # "opensearch" / "full" profiles so the heavy Dashboards container is never + # forced up alongside the cluster — the driver e2e is fully curl-verifiable + # against :9200. Start it on demand with `--profile opensearch-ui up -d` + # (depends_on pulls the cluster in automatically). + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:3.3.0 + container_name: WeKnora-opensearch-dashboards-dev + environment: + - OPENSEARCH_HOSTS=["http://opensearch:9200"] + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true + ports: + - "${OPENSEARCH_DASHBOARDS_PORT:-5601}:5601" + networks: + - WeKnora-network-dev + depends_on: + - opensearch + restart: unless-stopped + profiles: + - opensearch-ui + milvus: image: milvusdb/milvus:v2.6.11 container_name: WeKnora-milvus-dev @@ -468,6 +523,7 @@ volumes: neo4j-data-dev: jaeger_data_dev: qdrant_data_dev: + opensearch_data_dev: milvus_data_dev: docreader-tmp-dev: langfuse_clickhouse_data_dev: diff --git a/docs/dev/opensearch-integration-test.md b/docs/dev/opensearch-integration-test.md new file mode 100644 index 00000000..0edd5f1c --- /dev/null +++ b/docs/dev/opensearch-integration-test.md @@ -0,0 +1,109 @@ +# OpenSearch k-NN driver — local integration test + +This guide brings up a single-node OpenSearch cluster and exercises the +OpenSearch retrieve engine end to end. The driver lives in +`internal/application/repository/retriever/opensearch/`. + +## 1. Start a dev cluster + +```bash +docker compose -f docker-compose.dev.yml --profile opensearch up -d +``` + +This starts: + +- `opensearch` on `http://localhost:9200` — single-node, **security plugin + disabled** (plain HTTP, no auth/TLS). The image bundles the + `opensearch-knn` plugin. + +> **OpenSearch Dashboards is optional** and lives in a separate +> `opensearch-ui` profile, so it is *not* started by `--profile opensearch`. +> The whole integration test below is curl-verifiable against `:9200`. If you +> want the web UI (Dev Tools console / visual index inspection), start it on +> demand: +> +> ```bash +> docker compose -f docker-compose.dev.yml --profile opensearch-ui up -d +> # opensearch-dashboards on http://localhost:5601 (depends_on pulls the cluster in) +> ``` + +Verify: + +```bash +curl -s localhost:9200 | jq '.version.distribution, .version.number' +# "opensearch" "3.3.2" +curl -s 'localhost:9200/_cat/plugins?format=json' | jq -r '.[].component' | grep opensearch-knn +``` + +> Production clusters must enable the security plugin (TLS + auth). The dev +> profile disables it only to keep local setup trivial. When connecting to a +> secured cluster, set `username` / `password` and — for self-signed certs in +> dev only — `insecure_skip_verify=true`. + +## 2. Register the store + +### Option A — DB store (UI / API) + +`POST /api/v1/vector-stores`: + +```json +{ + "name": "opensearch-local", + "engine_type": "opensearch", + "connection_config": { "addr": "http://localhost:9200" }, + "index_config": { + "number_of_shards": 1, + "number_of_replicas": 0, + "hnsw_m": 16, + "hnsw_ef_construction": 100, + "knn_engine": "lucene" + } +} +``` + +`CreateStore` runs the connection probe (version + k-NN plugin) before +persisting; a bad address / unsupported version / missing plugin is rejected +with `400`. + +### Option B — env store + +```bash +export RETRIEVE_DRIVER=opensearch +export OPENSEARCH_ADDR=http://localhost:9200 +# export OPENSEARCH_USERNAME / OPENSEARCH_PASSWORD for a secured cluster +# export OPENSEARCH_INSECURE_SKIP_VERIFY=true # self-signed dev TLS only +``` + +## 3. Single-node note (important) + +On a single-node cluster, any index created with `number_of_replicas >= 1` +leaves its replica shard **unassigned**, so the index health goes **Yellow**. +Yellow does **not** block reads or writes — it is safe for local testing — but +to keep the cluster Green set **`number_of_replicas: 0`** at store +registration (as in the Option A example above). The driver default is `1` +(it assumes a ≥2-node cluster). + +## 4. Exercise the flow + +1. Bind a knowledge base to the store and ingest a few documents. +2. Confirm the per-dimension index appears: + `curl -s 'localhost:9200/_cat/indices?v' | grep weknora` + (e.g. `weknora__768` + alias, plus `weknora__keywords`). +3. Run a retrieval query against the bound KB and confirm hits come back. +4. Copy the KB to another KB and confirm the docs are reindexed + (`opensearch.reindex_executed` audit event). +5. Toggle chunk enabled-status / tag and confirm `_update_by_query` applies it. + +## 5. Tear down + +```bash +docker compose -f docker-compose.dev.yml --profile opensearch down -v +``` + +## Scope notes + +- Large-batch async reindex / delete (task polling) is a follow-up; the sync + paths handle typical KB sizes (pagination is bounded by `max_result_window`, + default 10000). +- Native `hybrid` query + search pipeline is out of scope — fusion stays at the + service layer (RRF). diff --git a/frontend/src/api/vector-store.ts b/frontend/src/api/vector-store.ts index f9aeafe2..dc164c51 100644 --- a/frontend/src/api/vector-store.ts +++ b/frontend/src/api/vector-store.ts @@ -29,6 +29,16 @@ export interface FieldSchema { sensitive?: boolean description?: string default?: any + // Inclusive bounds for number fields (omitempty on the backend). When + // absent the UI falls back to per-field heuristics (isReplicaField). + min?: number + max?: number + // Closed value set for string fields (e.g. knn_engine ∈ lucene|faiss). + // When non-empty the UI renders a select instead of a free-text input. + enum?: string[] + // Marks a field that cannot change after store creation. Informational + // for now (edit mode is fully read-only); kept for forward use. + immutable?: boolean } // ===== API Functions ===== diff --git a/frontend/src/i18n/locales/en-US.ts b/frontend/src/i18n/locales/en-US.ts index 80d5669f..67c0d4d5 100755 --- a/frontend/src/i18n/locales/en-US.ts +++ b/frontend/src/i18n/locales/en-US.ts @@ -1107,11 +1107,17 @@ export default { shards_num: 'Shards', replica_number: 'In-memory Replicas', desired_shard_count: 'Shard Count', + insecure_skip_verify: 'Skip TLS Verification', + hnsw_m: 'HNSW M (graph degree)', + hnsw_ef_construction: 'HNSW ef_construction', + hnsw_ef_search: 'HNSW ef_search', + knn_engine: 'k-NN Engine', }, envTag: 'DEFAULT', testConnection: 'Test Connection', testing: 'Testing...', immutableNotice: 'Engine type, connection, and index settings cannot be changed after creation.\nTo change these, delete and recreate.', + insecureSkipVerifyWarning: 'Disabling TLS certificate verification exposes the connection to man-in-the-middle attacks. Use only for self-signed development clusters — never in production.', validation: { nameRequired: 'Name is required', engineTypeRequired: 'Engine type is required', diff --git a/frontend/src/i18n/locales/ko-KR.ts b/frontend/src/i18n/locales/ko-KR.ts index a24e8651..587faefe 100755 --- a/frontend/src/i18n/locales/ko-KR.ts +++ b/frontend/src/i18n/locales/ko-KR.ts @@ -967,11 +967,17 @@ export default { shards_num: "샤드 수", replica_number: "인메모리 레플리카", desired_shard_count: "샤드 수", + insecure_skip_verify: "TLS 인증서 검증 생략", + hnsw_m: "HNSW M (그래프 차수)", + hnsw_ef_construction: "HNSW ef_construction", + hnsw_ef_search: "HNSW ef_search", + knn_engine: "k-NN 엔진", }, envTag: "DEFAULT", testConnection: "연결 테스트", testing: "테스트 중...", immutableNotice: "엔진 타입, 연결 정보, 인덱스 설정은 생성 후 변경할 수 없습니다.\n변경이 필요하면 삭제 후 다시 생성하세요.", + insecureSkipVerifyWarning: "TLS 인증서 검증을 끄면 중간자 공격에 노출됩니다. 자체 서명 인증서를 쓰는 개발 클러스터에서만 사용하고, 운영 환경에서는 절대 사용하지 마세요.", validation: { nameRequired: "이름은 필수입니다", engineTypeRequired: "엔진 타입은 필수입니다", diff --git a/frontend/src/i18n/locales/ru-RU.ts b/frontend/src/i18n/locales/ru-RU.ts index 09bb042b..4579c790 100755 --- a/frontend/src/i18n/locales/ru-RU.ts +++ b/frontend/src/i18n/locales/ru-RU.ts @@ -1019,11 +1019,17 @@ export default { shards_num: 'Шарды', replica_number: 'Реплики в памяти', desired_shard_count: 'Количество шардов', + insecure_skip_verify: 'Пропустить проверку TLS', + hnsw_m: 'HNSW M (степень графа)', + hnsw_ef_construction: 'HNSW ef_construction', + hnsw_ef_search: 'HNSW ef_search', + knn_engine: 'Движок k-NN', }, envTag: 'DEFAULT', testConnection: 'Тест подключения', testing: 'Тестирование...', immutableNotice: 'Тип движка, подключение и настройки индекса нельзя изменить после создания.\nДля изменения удалите и создайте заново.', + insecureSkipVerifyWarning: 'Отключение проверки сертификата TLS делает соединение уязвимым для атак «человек посередине». Используйте только для dev-кластеров с самоподписанными сертификатами — никогда в продакшене.', validation: { nameRequired: 'Название обязательно', engineTypeRequired: 'Тип движка обязателен', diff --git a/frontend/src/i18n/locales/zh-CN.ts b/frontend/src/i18n/locales/zh-CN.ts index 0f0888d0..20009942 100755 --- a/frontend/src/i18n/locales/zh-CN.ts +++ b/frontend/src/i18n/locales/zh-CN.ts @@ -965,11 +965,17 @@ export default { shards_num: "分片数", replica_number: "内存副本数", desired_shard_count: "分片数", + insecure_skip_verify: "跳过 TLS 证书校验", + hnsw_m: "HNSW M(图度数)", + hnsw_ef_construction: "HNSW ef_construction", + hnsw_ef_search: "HNSW ef_search", + knn_engine: "k-NN 引擎", }, envTag: "DEFAULT", testConnection: "测试连接", testing: "测试中...", immutableNotice: "创建后无法更改引擎类型、连接和索引设置。\n如需更改,请删除后重新创建。", + insecureSkipVerifyWarning: "关闭 TLS 证书校验会使连接面临中间人攻击风险。仅可用于自签名证书的开发集群,切勿在生产环境使用。", validation: { nameRequired: "名称为必填项", engineTypeRequired: "引擎类型为必填项", diff --git a/frontend/src/views/settings/VectorStoreSettings.vue b/frontend/src/views/settings/VectorStoreSettings.vue index 39149fcf..88774ecd 100644 --- a/frontend/src/views/settings/VectorStoreSettings.vue +++ b/frontend/src/views/settings/VectorStoreSettings.vue @@ -165,10 +165,15 @@ :label="fieldLabel(field.name)" :name="`connection_config.${field.name}`" > - +
+ +
+ {{ t('vectorStoreSettings.insecureSkipVerifyWarning') }} +
+