feat: Enhance agent configuration and localization for web search prompts

- Updated agent configuration to support separate system prompts for web search enabled and disabled states. - Removed deprecated agent configuration parameters to streamline settings management. - Enhanced UI components in AgentSettings.vue to allow configuration of custom prompts based on web search status. - Improved localization in English, Russian, and Chinese for new prompt settings and UI elements. - Refactored related API and service logic to accommodate changes in agent configuration structure.
2026-06-04 13:30:32 +08:00 · 2025-11-19 19:27:25 +08:00
parent ddf6b4e748
commit 3bf2a14499
44 changed files with 2412 additions and 1646 deletions
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -590,19 +590,6 @@ extract:
      请随机生成一段文本，要求内容与 %s 等相关，字数在 [50-200] 之间，并且尽量包含一些与这些标签相关的专业术语或典型元素，使文本更具针对性和相关性。
    with_no_tag: |
      请随机生成一段文本，内容请自由发挥，字数在 [50-200] 之间。 
-# Agent 配置
-agent:
-  enabled: true
-  default_max_iterations: 5
-  default_temperature: 0.7
-  reflection_enabled: false
-  default_tools:
-    - knowledge_search
-    - multi_kb_search
-    - list_knowledge_bases
-    - get_related_chunks
-    - query_knowledge_graph
-    - get_document_info

 # WebSearch 配置
 web_search:
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -79,7 +79,7 @@ services:
      - WeKnora-network-dev

  docreader:
-    image: wechatopenai/weknora-docreader:latest
+    image: wechatopenai/weknora-docreader:main
    container_name: WeKnora-docreader-dev
    ports:
      - "${DOCREADER_PORT:-50051}:50051"
--- a/frontend/src/api/system/index.ts
+++ b/frontend/src/api/system/index.ts
@@ -32,7 +32,8 @@ export interface AgentConfig {
  thinking_model_id: string
  rerank_model_id: string
  knowledge_bases?: string[]
-  system_prompt?: string  // System prompt template with placeholders (optional)
+  system_prompt_web_enabled?: string  // Custom system prompt when web search is enabled
+  system_prompt_web_disabled?: string // Custom system prompt when web search is disabled
  use_custom_system_prompt?: boolean
  available_tools?: ToolDefinition[]  // GET 响应中包含，POST/PUT 不需要
  available_placeholders?: PlaceholderDefinition[]  // GET 响应中包含，POST/PUT 不需要
@@ -42,7 +43,7 @@ export interface ConversationConfig {
  prompt: string
  context_template: string
  temperature: number
-  max_tokens: number
+  max_completion_tokens: number
  use_custom_system_prompt?: boolean
  use_custom_context_template?: boolean
  max_rounds: number
--- a/frontend/src/i18n/locales/en-US.ts
+++ b/frontend/src/i18n/locales/en-US.ts
@@ -1343,7 +1343,10 @@ export default {
      hintSuffix: 'will show available placeholders automatically',
      custom: 'Custom Prompt',
      disabledHint: 'Currently using the default prompt. Enable custom to apply the content below.',
-      placeholder: 'Enter the system prompt, or leave blank to use the default...'
+      placeholder: 'Enter the system prompt, or leave blank to use the default...',
+      tabHint: 'Configure separate prompts for sessions with web search on or off.',
+      tabWebOn: 'Web search enabled',
+      tabWebOff: 'Web search disabled'
    },
    reset: {
      header: 'Reset to Default Prompt',
--- a/frontend/src/i18n/locales/ru-RU.ts
+++ b/frontend/src/i18n/locales/ru-RU.ts
@@ -1207,7 +1207,10 @@ export default {
      hintSuffix: 'откроется список доступных подстановок',
      custom: 'Пользовательский промпт',
      disabledHint: 'Сейчас используется промпт по умолчанию. Включите пользовательский, чтобы применить содержимое ниже.',
-      placeholder: 'Введите системный промпт или оставьте пустым для значения по умолчанию...'
+      placeholder: 'Введите системный промпт или оставьте пустым для значения по умолчанию...',
+      tabHint: 'Настройте разные промпты для режимов с включённым и отключённым веб-поиском.',
+      tabWebOn: 'Веб-поиск включён',
+      tabWebOff: 'Веб-поиск отключён'
    },
    reset: {
      header: 'Сбросить к промпту по умолчанию',
--- a/frontend/src/i18n/locales/zh-CN.ts
+++ b/frontend/src/i18n/locales/zh-CN.ts
@@ -1348,6 +1348,9 @@ export default {
      custom: "自定义 Prompt",
      disabledHint: "当前使用系统默认 Prompt，开启自定义后才会应用下方内容。",
      placeholder: "请输入系统 Prompt，或留空使用默认 Prompt...",
+      tabHint: "根据是否启用网络搜索分别配置系统 Prompt。",
+      tabWebOn: "网络搜索已启用",
+      tabWebOff: "网络搜索未启用",
    },
    reset: {
      header: "恢复默认 Prompt",
--- a/frontend/src/stores/settings.ts
+++ b/frontend/src/stores/settings.ts
@@ -20,6 +20,9 @@ interface AgentConfig {
  thinkingModelId: string;
  rerankModelId: string;
  allowedTools: string[];
+  system_prompt_web_enabled?: string;
+  system_prompt_web_disabled?: string;
+  use_custom_system_prompt?: boolean;
 }

 // 单个模型项接口
@@ -60,7 +63,10 @@ const defaultSettings: Settings = {
    temperature: 0.7,
    thinkingModelId: "",
    rerankModelId: "",
-    allowedTools: ["knowledge_search", "multi_kb_search", "list_knowledge_bases"]
+    allowedTools: ["knowledge_search", "multi_kb_search", "list_knowledge_bases"],
+    system_prompt_web_enabled: "",
+    system_prompt_web_disabled: "",
+    use_custom_system_prompt: false
  },
  selectedKnowledgeBases: [],  // 默认为空数组
  modelConfig: {
--- a/frontend/src/utils/tool-icons.ts
+++ b/frontend/src/utils/tool-icons.ts
@@ -8,8 +8,8 @@ export const toolIcons: Record<string, string> = {
    multi_kb_search: '🔍',
    knowledge_search: '📚',
    get_chunk_detail: '📄',
-    get_related_chunks: '🔗',
    list_knowledge_bases: '📂',
+    list_knowledge_chunks: '🧩',
    get_document_info: 'ℹ️',
    query_knowledge_graph: '🕸️',
    think: '💭',
@@ -43,11 +43,11 @@ export function getToolDisplayName(toolName: string): string {
        multi_kb_search: '跨库搜索',
        knowledge_search: '知识库搜索',
        get_chunk_detail: '获取片段详情',
-        get_related_chunks: '获取相关片段',
+        list_knowledge_chunks: '查看知识分块',
        list_knowledge_bases: '列出知识库',
        get_document_info: '获取文档信息',
        query_knowledge_graph: '查询知识图谱',
-        think: '思考',
+        think: '深度思考',
        todo_write: '制定计划',
    };
    return displayNames[toolName] || toolName;
--- a/frontend/src/views/chat/components/AgentStreamDisplay.vue
+++ b/frontend/src/views/chat/components/AgentStreamDisplay.vue
@@ -213,7 +213,7 @@ const TOOL_NAME_I18N: Record<string, string> = {
  web_search: '网络搜索',
  web_fetch: '网页抓取',
  get_document_info: '获取文档信息',
-  get_related_chunks: '查找相关片段',
+  list_knowledge_chunks: '查看知识分块',
  get_related_documents: '查找相关文档',
  get_document_content: '获取文档内容',
  todo_write: '计划管理',
@@ -511,7 +511,7 @@ const intermediateStepsSummary = computed(() => {
  if (toolCalls.length > 0) {
    const toolNames = toolCalls.map(name => {
      if (name === 'get_document_info') return '获取文档';
-      if (name === 'get_related_chunks') return '获取相关片段';
+      if (name === 'list_knowledge_chunks') return '查看知识分块';
      return name;
    });
    if (toolNames.length === 1) {
@@ -1080,9 +1080,10 @@ const getToolSummary = (event: any): string => {
    if (toolData?.title) {
      return `获取文档：${toolData.title}`;
    }
-  } else if (toolName === 'get_related_chunks') {
-    if (toolData?.count !== undefined) {
-      return `找到 ${toolData.count} 个相关片段`;
+  } else if (toolName === 'list_knowledge_chunks') {
+    if (toolData?.fetched_chunks !== undefined) {
+      const title = toolData?.knowledge_title || toolData?.knowledge_id || '文档';
+      return `查看 ${title} 的 ${toolData.fetched_chunks}/${toolData.total_chunks ?? '?'} 个分块`;
    }
  } else if (toolName === 'todo_write') {
    // Extract steps from tool data
@@ -1183,7 +1184,7 @@ const getToolIcon = (toolName: string): string => {
    return knowledgeIcon;
  } else if (toolName === 'web_search') {
    return webSearchGlobeGreenIcon;
-  } else if (toolName === 'get_document_info' || toolName === 'get_related_chunks') {
+  } else if (toolName === 'get_document_info' || toolName === 'list_knowledge_chunks') {
    return documentIcon;
  } else if (toolName === 'todo_write') {
    return fileAddIcon;
--- a/frontend/src/views/chat/components/tool-results/DocumentInfo.vue
+++ b/frontend/src/views/chat/components/tool-results/DocumentInfo.vue
@@ -16,14 +16,6 @@
        </div>
      </div>

-      <div class="status-section">
-        <div class="status-title">{{ $t('chat.statusDescription') }}</div>
-        <div class="status-list">
-          <div class="status-item">✓ {{ $t('chat.statusIndexed') }}</div>
-          <div class="status-item">✓ {{ $t('chat.statusSearchable') }}</div>
-          <div class="status-item">✓ {{ $t('chat.statusChunkDetailAvailable') }}</div>
-        </div>
-      </div>
    </div>
  </div>
 </template>
--- a/frontend/src/views/settings/AgentSettings.vue
+++ b/frontend/src/views/settings/AgentSettings.vue
@@ -151,22 +151,51 @@
              {{ $t('common.resetToDefault') }}
            </t-button>
          </div>
+          <p class="prompt-tab-hint">
+            {{ $t('agentSettings.systemPrompt.tabHint') }}
+          </p>
          <p v-if="!localUseCustomSystemPrompt" class="prompt-disabled-hint">
            {{ $t('agentSettings.systemPrompt.disabledHint') }}
          </p>
-          <div v-if="localUseCustomSystemPrompt" class="prompt-textarea-wrapper">
-            <t-textarea
-              ref="promptTextareaRef"
-              v-model="localSystemPrompt"
-              :autosize="{ minRows: 15, maxRows: 30 }"
-              :placeholder="$t('agentSettings.systemPrompt.placeholder')"
-              @blur="handleSystemPromptChange"
-              @input="handlePromptInput"
-              @keydown="handlePromptKeydown"
-              :readonly="!localUseCustomSystemPrompt"
-              :class="{ 'prompt-textarea-readonly': !localUseCustomSystemPrompt }"
-              style="width: 100%; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 13px;"
-            />
+          <div v-if="localUseCustomSystemPrompt" class="system-prompt-tabs">
+            <t-tabs
+              v-model="activeSystemPromptTab"
+              class="system-prompt-variant-tabs"
+              theme="normal"
+            >
+              <t-tab-panel value="web-enabled" :label="$t('agentSettings.systemPrompt.tabWebOn')">
+                <div v-if="activeSystemPromptTab === 'web-enabled'" class="prompt-textarea-wrapper">
+                  <t-textarea
+                    ref="promptTextareaRef"
+                    v-model="localSystemPromptWebEnabled"
+                    :autosize="{ minRows: 15, maxRows: 30 }"
+                    :placeholder="$t('agentSettings.systemPrompt.placeholder')"
+                    @blur="handleSystemPromptChange('web-enabled', $event)"
+                    @input="handlePromptInput"
+                    @keydown="handlePromptKeydown"
+                    :readonly="!localUseCustomSystemPrompt"
+                    :class="{ 'prompt-textarea-readonly': !localUseCustomSystemPrompt }"
+                    style="width: 100%; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 13px;"
+                  />
+                </div>
+              </t-tab-panel>
+              <t-tab-panel value="web-disabled" :label="$t('agentSettings.systemPrompt.tabWebOff')">
+                <div v-if="activeSystemPromptTab === 'web-disabled'" class="prompt-textarea-wrapper">
+                  <t-textarea
+                    ref="promptTextareaRef"
+                    v-model="localSystemPromptWebDisabled"
+                    :autosize="{ minRows: 15, maxRows: 30 }"
+                    :placeholder="$t('agentSettings.systemPrompt.placeholder')"
+                    @blur="handleSystemPromptChange('web-disabled', $event)"
+                    @input="handlePromptInput"
+                    @keydown="handlePromptKeydown"
+                    :readonly="!localUseCustomSystemPrompt"
+                    :class="{ 'prompt-textarea-readonly': !localUseCustomSystemPrompt }"
+                    style="width: 100%; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 13px;"
+                  />
+                </div>
+              </t-tab-panel>
+            </t-tabs>
          </div>
          <!-- 占位符提示下拉框 -->
          <teleport to="body">
@@ -580,11 +609,11 @@
          </div>
          <div class="setting-control">
            <t-input-number
-              v-model="localMaxTokens"
+              v-model="localMaxCompletionTokens"
              :min="1"
              :max="100000"
              :step="100"
-              @change="handleMaxTokensChange"
+              @change="handleMaxCompletionTokensChange"
              style="width: 200px;"
            />
          </div>
@@ -596,6 +625,7 @@

 <script setup lang="ts">
 import { ref, onMounted, watch, computed, nextTick } from 'vue'
+import type { Ref } from 'vue'
 import { useRouter } from 'vue-router'
 import { useSettingsStore } from '@/stores/settings'
 import { MessagePlugin, DialogPlugin } from 'tdesign-vue-next'
@@ -622,7 +652,7 @@ const getDefaultConversationConfig = (): ConversationConfig => ({
  prompt: '',
  context_template: '',
  temperature: 0.3,
-  max_tokens: 2048,
+  max_completion_tokens: 2048,
  use_custom_system_prompt: true,
  use_custom_context_template: true,
  max_rounds: 5,
@@ -656,18 +686,32 @@ const localTemperature = ref(0.7)
 const localThinkingModelId = ref('')
 const localRerankModelId = ref('')
 const localAllowedTools = ref<string[]>([])
-const localSystemPrompt = ref('')
+
+type SystemPromptTab = 'web-enabled' | 'web-disabled'
+const activeSystemPromptTab = ref<SystemPromptTab>('web-enabled')
+const localSystemPromptWebEnabled = ref('')
+const localSystemPromptWebDisabled = ref('')
+const systemPromptRefs: Record<SystemPromptTab, Ref<string>> = {
+  'web-enabled': localSystemPromptWebEnabled,
+  'web-disabled': localSystemPromptWebDisabled,
+}
+const savedSystemPromptMap: Record<SystemPromptTab, string> = {
+  'web-enabled': '',
+  'web-disabled': '',
+}
+const getPromptRefByTab = (tab: SystemPromptTab) => systemPromptRefs[tab]
+const getActivePromptRef = () => getPromptRefByTab(activeSystemPromptTab.value)
 const localUseCustomSystemPrompt = ref(false)

 // 普通模式本地状态
 const localContextTemplate = ref('')
 const localSystemPromptNormal = ref('')
 const localTemperatureNormal = ref(0.3)
-const localMaxTokens = ref(2048)
+const localMaxCompletionTokens = ref(2048)
 let savedContextTemplate = ''
 let savedSystemPromptNormal = ''
 let savedTemperatureNormal = 0.3
-let savedMaxTokens = 2048
+let savedMaxCompletionTokens = 2048

 const localMaxRounds = ref(5)
 const localEmbeddingTopK = ref(10)
@@ -696,8 +740,8 @@ const syncConversationLocals = () => {
  savedSystemPromptNormal = localSystemPromptNormal.value
  localTemperatureNormal.value = cfg.temperature ?? 0.3
  savedTemperatureNormal = localTemperatureNormal.value
-  localMaxTokens.value = cfg.max_tokens ?? 2048
-  savedMaxTokens = localMaxTokens.value
+  localMaxCompletionTokens.value = cfg.max_completion_tokens ?? 2048
+  savedMaxCompletionTokens = localMaxCompletionTokens.value

  localMaxRounds.value = cfg.max_rounds ?? 5
  localEmbeddingTopK.value = cfg.embedding_top_k ?? 10
@@ -750,6 +794,20 @@ const isAgentReady = computed(() => {
         localAllowedTools.value.length > 0
 })

+const buildAgentConfigPayload = (overrides: Partial<AgentConfig> = {}): AgentConfig => ({
+  enabled: isAgentReady.value,
+  max_iterations: localMaxIterations.value,
+  reflection_enabled: false,
+  allowed_tools: localAllowedTools.value,
+  temperature: localTemperature.value,
+  thinking_model_id: localThinkingModelId.value,
+  rerank_model_id: localRerankModelId.value,
+  system_prompt_web_enabled: localSystemPromptWebEnabled.value,
+  system_prompt_web_disabled: localSystemPromptWebDisabled.value,
+  use_custom_system_prompt: localUseCustomSystemPrompt.value,
+  ...overrides,
+})
+
 // Agent 状态提示消息
 const agentStatusMessage = computed(() => {
  const missing: string[] = []
@@ -787,7 +845,6 @@ const configLoaded = ref(false) // 防止重复加载
 const isInitializing = ref(true) // 标记是否正在初始化，防止初始化时触发保存

 // 保存的 Prompt 值，用于比较是否变化
-let savedSystemPrompt = ''
 let savedUseCustomSystemPrompt = false

 // 恢复默认 Prompt 的加载状态
@@ -801,6 +858,12 @@ let placeholderPopupTimer: any = null
 const placeholderPrefix = ref('') // 当前输入的前缀，用于过滤
 const popupStyle = ref({ top: '0px', left: '0px' }) // 提示框位置

+watch(activeSystemPromptTab, () => {
+  showPlaceholderPopup.value = false
+  placeholderPrefix.value = ''
+  selectedPlaceholderIndex.value = 0
+})
+
 // 设置 textarea 原生事件监听器
 const setupTextareaEventListeners = () => {
  nextTick(() => {
@@ -916,8 +979,12 @@ onMounted(async () => {
    localThinkingModelId.value = config.thinking_model_id
    localRerankModelId.value = config.rerank_model_id
    localAllowedTools.value = config.allowed_tools || []
-    localSystemPrompt.value = config.system_prompt || ''
-    savedSystemPrompt = config.system_prompt || '' // 记录已保存的值
+    const promptWebEnabled = config.system_prompt_web_enabled || ''
+    const promptWebDisabled = config.system_prompt_web_disabled || ''
+    localSystemPromptWebEnabled.value = promptWebEnabled
+    localSystemPromptWebDisabled.value = promptWebDisabled
+    savedSystemPromptMap['web-enabled'] = promptWebEnabled
+    savedSystemPromptMap['web-disabled'] = promptWebDisabled
    const useCustomPrompt = config.use_custom_system_prompt ?? false
    localUseCustomSystemPrompt.value = useCustomPrompt
    savedUseCustomSystemPrompt = useCustomPrompt
@@ -940,7 +1007,10 @@ onMounted(async () => {
      temperature: config.temperature,
      thinkingModelId: config.thinking_model_id,
      rerankModelId: config.rerank_model_id,
-      allowedTools: config.allowed_tools || []
+      allowedTools: config.allowed_tools || [],
+      system_prompt_web_enabled: promptWebEnabled,
+      system_prompt_web_disabled: promptWebDisabled,
+      use_custom_system_prompt: useCustomPrompt
    })

    // 加载普通模式配置
@@ -1040,18 +1110,7 @@ const handleMaxIterationsChangeDebounced = (value: number) => {
    }
  
  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value, // 自动根据配置状态设置
-      max_iterations: numValue, // 确保是数字类型
-      reflection_enabled: false,
-      allowed_tools: localAllowedTools.value,
-      temperature: localTemperature.value,
-      thinking_model_id: localThinkingModelId.value,
-      rerank_model_id: localRerankModelId.value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: localUseCustomSystemPrompt.value
-    }
-    
+    const config = buildAgentConfigPayload({ max_iterations: numValue })
    await updateAgentConfig(config)
      settingsStore.updateAgentConfig({ maxIterations: numValue })
      lastSavedValue = numValue // 记录已保存的值
@@ -1105,18 +1164,7 @@ const handleThinkingModelChange = async (value: string) => {
  }
  
  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value, // 自动根据配置状态设置
-      max_iterations: localMaxIterations.value,
-      reflection_enabled: false,
-      allowed_tools: localAllowedTools.value,
-      temperature: localTemperature.value,
-      thinking_model_id: value,
-      rerank_model_id: localRerankModelId.value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: localUseCustomSystemPrompt.value
-    }
-    
+    const config = buildAgentConfigPayload({ thinking_model_id: value })
    await updateAgentConfig(config)
    // 更新 store，确保 isAgentReady 能正确计算
    settingsStore.updateAgentConfig({ thinkingModelId: value })
@@ -1140,18 +1188,7 @@ const handleRerankModelChange = async (value: string) => {
  }
  
  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value, // 自动根据配置状态设置
-      max_iterations: localMaxIterations.value,
-      reflection_enabled: false,
-      allowed_tools: localAllowedTools.value,
-      temperature: localTemperature.value,
-      thinking_model_id: localThinkingModelId.value,
-      rerank_model_id: value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: localUseCustomSystemPrompt.value
-    }
-    
+    const config = buildAgentConfigPayload({ rerank_model_id: value })
    await updateAgentConfig(config)
    settingsStore.updateAgentConfig({ rerankModelId: value })
    MessagePlugin.success(t('agentSettings.toasts.rerankModelSaved'))
@@ -1222,18 +1259,7 @@ const handleTemperatureChange = async (value: number) => {
  if (isInitializing.value) return
  
  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value, // 自动根据配置状态设置
-      max_iterations: localMaxIterations.value,
-      reflection_enabled: false,
-      allowed_tools: localAllowedTools.value,
-      temperature: value,
-      thinking_model_id: localThinkingModelId.value,
-      rerank_model_id: localRerankModelId.value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: localUseCustomSystemPrompt.value
-    }
-    
+    const config = buildAgentConfigPayload({ temperature: value })
    await updateAgentConfig(config)
    settingsStore.updateAgentConfig({ temperature: value })
    MessagePlugin.success(t('agentSettings.toasts.temperatureSaved'))
@@ -1249,18 +1275,7 @@ const handleAllowedToolsChange = async (value: string[]) => {
  if (isInitializing.value) return
  
  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value, // 自动根据配置状态设置
-      max_iterations: localMaxIterations.value,
-      reflection_enabled: false,
-      allowed_tools: value,
-      temperature: localTemperature.value,
-      thinking_model_id: localThinkingModelId.value,
-      rerank_model_id: localRerankModelId.value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: localUseCustomSystemPrompt.value
-    }
-    
+    const config = buildAgentConfigPayload({ allowed_tools: value })
    await updateAgentConfig(config)
    settingsStore.updateAgentConfig({ allowedTools: value })
    MessagePlugin.success(t('agentSettings.toasts.toolsUpdated'))
@@ -1278,18 +1293,7 @@ const handleUseCustomPromptToggle = async (value: boolean) => {
  if (value === savedUseCustomSystemPrompt) return

  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value,
-      max_iterations: localMaxIterations.value,
-      reflection_enabled: false,
-      allowed_tools: localAllowedTools.value,
-      temperature: localTemperature.value,
-      thinking_model_id: localThinkingModelId.value,
-      rerank_model_id: localRerankModelId.value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: value
-    }
-
+    const config = buildAgentConfigPayload({ use_custom_system_prompt: value })
    await updateAgentConfig(config)
    savedUseCustomSystemPrompt = value

@@ -1331,7 +1335,8 @@ const filteredPlaceholders = computed(() => {
 // 计算光标在 textarea 中的像素位置
 const calculateCursorPosition = (textarea: HTMLTextAreaElement) => {
  const cursorPos = textarea.selectionStart
-  const textBeforeCursor = localSystemPrompt.value.substring(0, cursorPos)
+  const activePromptValue = getActivePromptRef().value
+  const textBeforeCursor = activePromptValue.substring(0, cursorPos)
  
  // 获取 textarea 的样式和位置
  const style = window.getComputedStyle(textarea)
@@ -1383,7 +1388,7 @@ const checkAndShowPlaceholderPopup = () => {
  }
  
  const cursorPos = textarea.selectionStart
-  const textBeforeCursor = localSystemPrompt.value.substring(0, cursorPos)
+  const textBeforeCursor = getActivePromptRef().value.substring(0, cursorPos)
  
  // 检查是否输入了 {{（从光标位置向前查找最近的 {{）
  // 需要找到光标前最近的 {{，且中间没有 }}
@@ -1463,15 +1468,17 @@ const insertPlaceholder = (placeholderName: string) => {
  // 延迟执行，确保提示框已关闭
  nextTick(() => {
    const cursorPos = textarea.selectionStart
-    const textBeforeCursor = localSystemPrompt.value.substring(0, cursorPos)
-    const textAfterCursor = localSystemPrompt.value.substring(cursorPos)
+    const promptRef = getActivePromptRef()
+    const currentValue = promptRef.value
+    const textBeforeCursor = currentValue.substring(0, cursorPos)
+    const textAfterCursor = currentValue.substring(cursorPos)
    
    // 找到最后一个 {{ 的位置
    const lastOpenPos = textBeforeCursor.lastIndexOf('{{')
    if (lastOpenPos === -1) {
      // 如果没有找到 {{，直接插入完整的占位符
      const placeholder = `{{${placeholderName}}}`
-      localSystemPrompt.value = textBeforeCursor + placeholder + textAfterCursor
+      promptRef.value = textBeforeCursor + placeholder + textAfterCursor
      // 设置光标位置
      nextTick(() => {
        const newPos = cursorPos + placeholder.length
@@ -1482,7 +1489,7 @@ const insertPlaceholder = (placeholderName: string) => {
      // 替换 {{ 到光标位置的内容为完整的占位符
      const beforePlaceholder = textBeforeCursor.substring(0, lastOpenPos)
      const placeholder = `{{${placeholderName}}}`
-      localSystemPrompt.value = beforePlaceholder + placeholder + textAfterCursor
+      promptRef.value = beforePlaceholder + placeholder + textAfterCursor
      // 设置光标位置
      nextTick(() => {
        const newPos = lastOpenPos + placeholder.length
@@ -1504,30 +1511,27 @@ const handleResetToDefault = async () => {
      try {
        isResettingPrompt.value = true
        
-        // 通过设置 system_prompt 为空字符串来获取默认值
-        // 后端在 system_prompt 为空时会返回默认值
-        const tempConfig: AgentConfig = {
-          enabled: isAgentReady.value,
-          max_iterations: localMaxIterations.value,
-          reflection_enabled: false,
-          allowed_tools: localAllowedTools.value,
-          temperature: localTemperature.value,
-          thinking_model_id: localThinkingModelId.value,
-          rerank_model_id: localRerankModelId.value,
-          system_prompt: '', // 空字符串表示使用默认
-          use_custom_system_prompt: false
-        }
+        // 通过设置 system_prompt_web_* 为空字符串来获取默认值
+        // 后端在字段为空时会返回默认值
+        const tempConfig = buildAgentConfigPayload({
+          system_prompt_web_enabled: '',
+          system_prompt_web_disabled: '',
+          use_custom_system_prompt: false,
+        })
        
        await updateAgentConfig(tempConfig)
        
        // 重新加载配置以获取默认 Prompt 的完整内容
        const res = await getAgentConfig()
-        const defaultPrompt = res.data.system_prompt || ''
+        const defaultPromptWebEnabled = res.data.system_prompt_web_enabled || ''
+        const defaultPromptWebDisabled = res.data.system_prompt_web_disabled || ''
        const useCustom = res.data.use_custom_system_prompt ?? false
        
        // 设置为默认 Prompt 的内容
-        localSystemPrompt.value = defaultPrompt
-        savedSystemPrompt = defaultPrompt
+        localSystemPromptWebEnabled.value = defaultPromptWebEnabled
+        localSystemPromptWebDisabled.value = defaultPromptWebDisabled
+        savedSystemPromptMap['web-enabled'] = defaultPromptWebEnabled
+        savedSystemPromptMap['web-disabled'] = defaultPromptWebDisabled
        localUseCustomSystemPrompt.value = useCustom
        savedUseCustomSystemPrompt = useCustom
        
@@ -1544,7 +1548,7 @@ const handleResetToDefault = async () => {
 }

 // 处理系统 Prompt 变化
-const handleSystemPromptChange = async (e?: FocusEvent) => {
+const handleSystemPromptChange = async (tab: SystemPromptTab, e?: FocusEvent) => {
  // 如果点击的是占位符提示框，不触发保存
  if (e?.relatedTarget) {
    const target = e.relatedTarget as HTMLElement
@@ -1567,26 +1571,18 @@ const handleSystemPromptChange = async (e?: FocusEvent) => {
  // 如果正在初始化，不触发保存
  if (isInitializing.value) return
  
+  const promptRef = getPromptRefByTab(tab)
+  const savedValue = savedSystemPromptMap[tab]
+
  // 检查内容是否变化
-  if (localSystemPrompt.value === savedSystemPrompt) {
+  if (promptRef.value === savedValue) {
    return // 内容没变，不调用接口
  }
  
  try {
-    const config: AgentConfig = {
-      enabled: isAgentReady.value,
-      max_iterations: localMaxIterations.value,
-      reflection_enabled: false,
-      allowed_tools: localAllowedTools.value,
-      temperature: localTemperature.value,
-      thinking_model_id: localThinkingModelId.value,
-      rerank_model_id: localRerankModelId.value,
-      system_prompt: localSystemPrompt.value,
-      use_custom_system_prompt: localUseCustomSystemPrompt.value
-    }
-    
+    const config = buildAgentConfigPayload()
    await updateAgentConfig(config)
-    savedSystemPrompt = localSystemPrompt.value // 更新已保存的值
+    savedSystemPromptMap[tab] = promptRef.value // 更新已保存的值
    MessagePlugin.success(t('agentSettings.toasts.systemPromptSaved'))
  } catch (error) {
    console.error('保存系统 Prompt 失败:', error)
@@ -1732,15 +1728,15 @@ const handleTemperatureNormalChange = async (value: number) => {
  }
 }

-const handleMaxTokensChange = async (value: number) => {
+const handleMaxCompletionTokensChange = async (value: number) => {
  if (!conversationConfigLoaded.value) return
  
  try {
    await saveConversationConfig(
-      { max_tokens: value },
+      { max_completion_tokens: value },
      t('conversationSettings.toasts.maxTokensSaved')
    )
-    savedMaxTokens = value
+    savedMaxCompletionTokens = value
  } catch (error) {
    console.error('保存Max Tokens失败:', error)
    MessagePlugin.error(getErrorMessage(error))
@@ -2214,6 +2210,39 @@ const handleConversationRerankModelChange = async (value: string) => {
  font-size: 12px;
 }

+.prompt-tab-hint {
+  margin: 0 0 12px;
+  color: #666;
+  font-size: 12px;
+}
+
+.system-prompt-tabs {
+  width: 100%;
+}
+
+.system-prompt-variant-tabs :deep(.t-tabs__nav-wrap) {
+  border-bottom: 1px solid #e5e7eb;
+  margin-bottom: 8px;
+}
+
+.system-prompt-variant-tabs :deep(.t-tabs__nav-item) {
+  padding: 4px 12px 10px;
+  font-size: 13px;
+  color: #666;
+  border-bottom: 2px solid transparent;
+  transition: color 0.2s ease, border-color 0.2s ease;
+}
+
+.system-prompt-variant-tabs :deep(.t-tabs__nav-item.t-is-active) {
+  color: #1d2129;
+  border-bottom-color: #07C05F;
+  font-weight: 600;
+}
+
+.system-prompt-variant-tabs :deep(.t-tabs__bar) {
+  display: none;
+}
+
 .prompt-textarea-readonly {
  background-color: #fafafa;
 }
--- a/internal/agent/engine.go
+++ b/internal/agent/engine.go
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/Tencent/WeKnora/internal/agent/tools"
+	"github.com/Tencent/WeKnora/internal/common"
 	"github.com/Tencent/WeKnora/internal/event"
 	"github.com/Tencent/WeKnora/internal/logger"
 	"github.com/Tencent/WeKnora/internal/models/chat"
@@ -26,7 +27,6 @@ type AgentEngine struct {
 	config               *types.AgentConfig
 	toolRegistry         *tools.ToolRegistry
 	chatModel            chat.Chat
-	knowledgeService     interfaces.KnowledgeBaseService
 	eventBus             *event.EventBus
 	knowledgeBasesInfo   []*KnowledgeBaseInfo      // Detailed knowledge base information for prompt
 	contextManager       interfaces.ContextManager // Context manager for writing agent conversation to LLM context
@@ -48,7 +48,6 @@ func NewAgentEngine(
 	config *types.AgentConfig,
 	chatModel chat.Chat,
 	toolRegistry *tools.ToolRegistry,
-	knowledgeService interfaces.KnowledgeBaseService,
 	eventBus *event.EventBus,
 	knowledgeBasesInfo []*KnowledgeBaseInfo,
 	contextManager interfaces.ContextManager,
@@ -62,7 +61,6 @@ func NewAgentEngine(
 		config:               config,
 		toolRegistry:         toolRegistry,
 		chatModel:            chatModel,
-		knowledgeService:     knowledgeService,
 		eventBus:             eventBus,
 		knowledgeBasesInfo:   knowledgeBasesInfo,
 		contextManager:       contextManager,
@@ -78,6 +76,12 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
 	logger.Infof(ctx, "[Agent] SessionID: %s, MessageID: %s", sessionID, messageID)
 	logger.Infof(ctx, "[Agent] User Query: %s", query)
 	logger.Infof(ctx, "[Agent] LLM Context Messages: %d", len(llmContext))
+	common.PipelineInfo(ctx, "Agent", "execute_start", map[string]interface{}{
+		"session_id":   sessionID,
+		"message_id":   messageID,
+		"query":        query,
+		"context_msgs": len(llmContext),
+	})

 	// Initialize state
 	state := &types.AgentState{
@@ -87,8 +91,8 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
 		CurrentRound:  0,
 	}

-	// Build system prompt
-	systemPrompt := BuildReActSystemPromptWithStatus(e.knowledgeBasesInfo, e.config.WebSearchEnabled, e.systemPromptTemplate)
+	// Build system prompt using progressive RAG prompt
+	systemPrompt := BuildProgressiveRAGSystemPrompt(e.knowledgeBasesInfo, e.config.WebSearchEnabled, e.systemPromptTemplate)
 	logger.Debugf(ctx, "[Agent] SystemPrompt Length: %d characters", len(systemPrompt))
 	logger.Debugf(ctx, "[Agent] SystemPrompt (stream)\n----\n%s\n----", systemPrompt)

@@ -99,7 +103,13 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s

 	// Get tool definitions for function calling
 	tools := e.buildToolsForLLM()
-	logger.Infof(ctx, "[Agent] Tools enabled (%d): %s", len(tools), strings.Join(listToolNames(tools), ", "))
+	toolListStr := strings.Join(listToolNames(tools), ", ")
+	logger.Infof(ctx, "[Agent] Tools enabled (%d): %s", len(tools), toolListStr)
+	common.PipelineInfo(ctx, "Agent", "tools_ready", map[string]interface{}{
+		"session_id": sessionID,
+		"tool_count": len(tools),
+		"tools":      toolListStr,
+	})

 	_, err := e.executeLoop(ctx, state, query, messages, tools, sessionID, messageID)
 	if err != nil {
@@ -120,6 +130,12 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
 	logger.Infof(ctx, "========== Agent Execution Completed Successfully ==========")
 	logger.Infof(ctx, "[Agent] Total rounds: %d, Round steps: %d, Is complete: %v",
 		state.CurrentRound, len(state.RoundSteps), state.IsComplete)
+	common.PipelineInfo(ctx, "Agent", "execute_complete", map[string]interface{}{
+		"session_id": sessionID,
+		"rounds":     state.CurrentRound,
+		"steps":      len(state.RoundSteps),
+		"complete":   state.IsComplete,
+	})
 	return state, nil
 }

@@ -135,19 +151,45 @@ func (e *AgentEngine) executeLoop(
 	messageID string,
 ) (*types.AgentState, error) {
 	startTime := time.Now()
+	common.PipelineInfo(ctx, "Agent", "loop_start", map[string]interface{}{
+		"max_iterations": e.config.MaxIterations,
+	})
 	for state.CurrentRound < e.config.MaxIterations {
 		roundStart := time.Now()
 		logger.Infof(ctx, "========== Round %d/%d Started ==========", state.CurrentRound+1, e.config.MaxIterations)
 		logger.Infof(ctx, "[Agent][Round-%d] Message history size: %d messages", state.CurrentRound+1, len(messages))
+		common.PipelineInfo(ctx, "Agent", "round_start", map[string]interface{}{
+			"iteration":      state.CurrentRound,
+			"round":          state.CurrentRound + 1,
+			"message_count":  len(messages),
+			"pending_tools":  len(tools),
+			"max_iterations": e.config.MaxIterations,
+		})

 		// 1. Think: Call LLM with function calling and stream thinking through EventBus
 		logger.Infof(ctx, "[Agent][Round-%d] Calling LLM with %d tools available...", state.CurrentRound+1, len(tools))
+		common.PipelineInfo(ctx, "Agent", "think_start", map[string]interface{}{
+			"iteration": state.CurrentRound,
+			"round":     state.CurrentRound + 1,
+			"tool_cnt":  len(tools),
+		})
 		response, err := e.streamThinkingToEventBus(ctx, messages, tools, state.CurrentRound, sessionID)
 		if err != nil {
 			logger.Errorf(ctx, "[Agent][Round-%d] LLM call failed: %v", state.CurrentRound+1, err)
+			common.PipelineError(ctx, "Agent", "think_failed", map[string]interface{}{
+				"iteration": state.CurrentRound,
+				"error":     err.Error(),
+			})
 			return state, fmt.Errorf("LLM call failed: %w", err)
 		}

+		common.PipelineInfo(ctx, "Agent", "think_result", map[string]interface{}{
+			"iteration":     state.CurrentRound,
+			"finish_reason": response.FinishReason,
+			"tool_calls":    len(response.ToolCalls),
+			"content_len":   len(response.Content),
+		})
+
 		// Debug: log finish reason and tool call count from LLM
 		logger.Infof(ctx, "[Agent][Round-%d] LLM response received: finish_reason=%s, tool_calls=%d, content_length=%d",
 			state.CurrentRound+1, response.FinishReason, len(response.ToolCalls), len(response.Content))
@@ -168,6 +210,11 @@ func (e *AgentEngine) executeLoop(
 		if response.FinishReason == "stop" && len(response.ToolCalls) == 0 {
 			logger.Infof(ctx, "[Agent][Round-%d] Agent finished - no more tool calls needed", state.CurrentRound+1)
 			logger.Infof(ctx, "[Agent] Final answer length: %d characters", len(response.Content))
+			common.PipelineInfo(ctx, "Agent", "round_final_answer", map[string]interface{}{
+				"iteration":  state.CurrentRound,
+				"round":      state.CurrentRound + 1,
+				"answer_len": len(response.Content),
+			})
 			state.FinalAnswer = response.Content
 			state.IsComplete = true
 			state.RoundSteps = append(state.RoundSteps, step)
@@ -223,6 +270,13 @@ func (e *AgentEngine) executeLoop(
 				// Execute tool
 				logger.Infof(ctx, "[Agent][Round-%d][Tool-%d/%d] Executing tool: %s...",
 					state.CurrentRound+1, i+1, len(response.ToolCalls), tc.Function.Name)
+				common.PipelineInfo(ctx, "Agent", "tool_call_start", map[string]interface{}{
+					"iteration":    state.CurrentRound,
+					"round":        state.CurrentRound + 1,
+					"tool":         tc.Function.Name,
+					"tool_call_id": tc.ID,
+					"tool_index":   fmt.Sprintf("%d/%d", i+1, len(response.ToolCalls)),
+				})
 				result, err := e.toolRegistry.ExecuteTool(ctx, tc.Function.Name, args)
 				duration := time.Since(toolCallStartTime).Milliseconds()
 				logger.Infof(ctx, "[Agent][Round-%d][Tool-%d/%d] Tool execution completed in %dms",
@@ -245,6 +299,26 @@ func (e *AgentEngine) executeLoop(
 					}
 				}

+				toolSuccess := toolCall.Result != nil && toolCall.Result.Success
+				pipelineFields := map[string]interface{}{
+					"iteration":    state.CurrentRound,
+					"round":        state.CurrentRound + 1,
+					"tool":         tc.Function.Name,
+					"tool_call_id": tc.ID,
+					"duration_ms":  duration,
+					"success":      toolSuccess,
+				}
+				if toolCall.Result != nil && toolCall.Result.Error != "" {
+					pipelineFields["error"] = toolCall.Result.Error
+				}
+				if err != nil {
+					common.PipelineError(ctx, "Agent", "tool_call_result", pipelineFields)
+				} else if toolSuccess {
+					common.PipelineInfo(ctx, "Agent", "tool_call_result", pipelineFields)
+				} else {
+					common.PipelineWarn(ctx, "Agent", "tool_call_result", pipelineFields)
+				}
+
 				if toolCall.Result != nil {
 					logger.Infof(ctx, "[Agent][Round-%d][Tool-%d/%d] Tool result: success=%v, output_length=%d",
 						state.CurrentRound+1, i+1, len(response.ToolCalls),
@@ -335,6 +409,12 @@ func (e *AgentEngine) executeLoop(
 		state.RoundSteps = append(state.RoundSteps, step)
 		// 4. Observe: Add tool results to messages and write to context
 		messages = e.appendToolResults(ctx, messages, step)
+		common.PipelineInfo(ctx, "Agent", "round_end", map[string]interface{}{
+			"iteration":   state.CurrentRound,
+			"round":       state.CurrentRound + 1,
+			"tool_calls":  len(step.ToolCalls),
+			"thought_len": len(step.Thought),
+		})
 		// 5. Check if we should continue
 		state.CurrentRound++
 	}
@@ -342,10 +422,17 @@ func (e *AgentEngine) executeLoop(
 	// If loop finished without final answer, generate one
 	if !state.IsComplete {
 		logger.Info(ctx, "Reached max iterations, generating final answer")
+		common.PipelineWarn(ctx, "Agent", "max_iterations_reached", map[string]interface{}{
+			"iterations": state.CurrentRound,
+			"max":        e.config.MaxIterations,
+		})

 		// Stream final answer generation through EventBus
 		if err := e.streamFinalAnswerToEventBus(ctx, query, state, sessionID); err != nil {
 			logger.Errorf(ctx, "Failed to synthesize final answer: %v", err)
+			common.PipelineError(ctx, "Agent", "final_answer_failed", map[string]interface{}{
+				"error": err.Error(),
+			})
 			state.FinalAnswer = "抱歉，我无法生成完整的答案。"
 		}
 		state.IsComplete = true
@@ -648,8 +735,15 @@ func (e *AgentEngine) streamFinalAnswerToEventBus(
 	sessionID string,
 ) error {
 	logger.Infof(ctx, "[Agent][FinalAnswer] Starting final answer generation")
+	totalToolCalls := countTotalToolCalls(state.RoundSteps)
 	logger.Infof(ctx, "[Agent][FinalAnswer] Context: %d steps with total %d tool calls",
-		len(state.RoundSteps), countTotalToolCalls(state.RoundSteps))
+		len(state.RoundSteps), totalToolCalls)
+	common.PipelineInfo(ctx, "Agent", "final_answer_start", map[string]interface{}{
+		"session_id":   sessionID,
+		"query":        query,
+		"steps":        len(state.RoundSteps),
+		"tool_results": totalToolCalls,
+	})

 	// Build messages with all context
 	systemPrompt := BuildReActSystemPromptWithStatus(e.knowledgeBasesInfo, e.config.WebSearchEnabled, e.systemPromptTemplate)
@@ -720,10 +814,18 @@ func (e *AgentEngine) streamFinalAnswerToEventBus(

 	if err != nil {
 		logger.Errorf(ctx, "[Agent][FinalAnswer] Final answer generation failed: %v", err)
+		common.PipelineError(ctx, "Agent", "final_answer_stream_failed", map[string]interface{}{
+			"session_id": sessionID,
+			"error":      err.Error(),
+		})
 		return err
 	}

 	logger.Infof(ctx, "[Agent][FinalAnswer] Final answer generated: %d characters", len(fullAnswer))
+	common.PipelineInfo(ctx, "Agent", "final_answer_done", map[string]interface{}{
+		"session_id": sessionID,
+		"answer_len": len(fullAnswer),
+	})
 	state.FinalAnswer = fullAnswer
 	return nil
 }
--- a/internal/agent/prompts.go
+++ b/internal/agent/prompts.go
@@ -206,7 +206,41 @@ func renderPromptPlaceholdersWithStatus(template string, knowledgeBases []*Knowl
 	return result
 }

+// BuildProgressiveRAGSystemPromptWithWeb builds the progressive RAG system prompt with web search enabled
+func BuildProgressiveRAGSystemPromptWithWeb(knowledgeBases []*KnowledgeBaseInfo, systemPromptTemplate ...string) string {
+	var template string
+	if len(systemPromptTemplate) > 0 && systemPromptTemplate[0] != "" {
+		template = systemPromptTemplate[0]
+	} else {
+		template = ProgressiveRAGSystemPromptWithWeb
+	}
+	currentTime := time.Now().Format(time.RFC3339)
+	return renderPromptPlaceholdersWithStatus(template, knowledgeBases, true, currentTime)
+}
+
+// BuildProgressiveRAGSystemPromptWithoutWeb builds the progressive RAG system prompt without web search
+func BuildProgressiveRAGSystemPromptWithoutWeb(knowledgeBases []*KnowledgeBaseInfo, systemPromptTemplate ...string) string {
+	var template string
+	if len(systemPromptTemplate) > 0 && systemPromptTemplate[0] != "" {
+		template = systemPromptTemplate[0]
+	} else {
+		template = ProgressiveRAGSystemPromptWithoutWeb
+	}
+	currentTime := time.Now().Format(time.RFC3339)
+	return renderPromptPlaceholdersWithStatus(template, knowledgeBases, false, currentTime)
+}
+
+// BuildProgressiveRAGSystemPrompt builds the progressive RAG system prompt based on web search status
+// This is the main function to use - it automatically selects the appropriate version
+func BuildProgressiveRAGSystemPrompt(knowledgeBases []*KnowledgeBaseInfo, webSearchEnabled bool, systemPromptTemplate ...string) string {
+	if webSearchEnabled {
+		return BuildProgressiveRAGSystemPromptWithWeb(knowledgeBases, systemPromptTemplate...)
+	}
+	return BuildProgressiveRAGSystemPromptWithoutWeb(knowledgeBases, systemPromptTemplate...)
+}
+
 // BuildReActSystemPromptWithStatus builds the system prompt, allowing caller to pass tool status
+// Deprecated: Use BuildProgressiveRAGSystemPrompt instead for better tool calling capabilities
 func BuildReActSystemPromptWithStatus(knowledgeBases []*KnowledgeBaseInfo, webSearchEnabled bool, systemPromptTemplate ...string) string {
 	var template string
 	if len(systemPromptTemplate) > 0 && systemPromptTemplate[0] != "" {
@@ -218,87 +252,721 @@ func BuildReActSystemPromptWithStatus(knowledgeBases []*KnowledgeBaseInfo, webSe
 	return renderPromptPlaceholdersWithStatus(template, knowledgeBases, webSearchEnabled, currentTime)
 }

-// DefaultSystemPromptTemplate returns the default system prompt template
-// It includes a Status section to explicitly state tool switches at generation time.
-var DefaultSystemPromptTemplate = `# Role
+// ProgressiveRAGSystemPromptWithWeb is the progressive RAG system prompt template with web search enabled
+// This version emphasizes hybrid retrieval strategy: KB-first with web supplementation
+var ProgressiveRAGSystemPromptWithWeb = `# Role & Mission

-You are WeKnora, a knowledge base assistant. Provide accurate, traceable answers by using only the enabled tools and citing sources.
+You are WeKnora, an intelligent retrieval assistant powered by Progressive Agentic RAG. Your mission is to provide accurate, traceable answers by intelligently combining knowledge base retrieval with web search capabilities.
+
+**Core Philosophy**: Knowledge bases are your foundation, web search is your supplement. Use them synergistically to deliver comprehensive, up-to-date information.

 # Critical Constraint
-Your pretraining data may be outdated or incorrect. Do NOT rely on any internal or parametric knowledge. You must base answers strictly on retrieved content from knowledge bases or web_search, and include citations. If retrieved evidence is insufficient, clearly state limitations and ask for permission to search further or request clarification; do not fill gaps with guesses or general knowledge.

-# Known
+Your pretraining data may be outdated or incorrect. NEVER rely on internal or parametric knowledge. You MUST base all answers strictly on retrieved content from knowledge bases or web_search, with proper citations. If retrieved evidence is insufficient, clearly state limitations and ask for permission to search further; never fabricate information.

-## Knowledge Bases
-{{knowledge_bases}}
+# System Status

-# Status
-
- Web Search: {{web_search_status}}
 - Current Time: {{current_time}}

-# Rules
+# Progressive RAG Workflow (4-Stage Process)

-<Thinking_and_Planning>
- IMPORTANT: Unless the user question is trivially simple (e.g., directly confirming visible information), you MUST use the thinking tool to break down complex problems, track thinking progress iteratively, and adjust the approach when retrieved content changes or exceptions block the original workflow.
- IMPORTANT: Record your KB-first compliance in the thinking step: briefly list the attempted KB strategies and why they were insufficient before you switch to web_search.
- CRITICAL - todo_write Tool Usage: The todo_write tool is MANDATORY and MUST be used frequently throughout your workflow. You MUST:
-  - Create a todo list at the START of any multi-step task (3+ steps) or complex problem-solving session.
-  - Update the todo list IMMEDIATELY after completing each task item (mark as completed).
-  - Add new todo items when you discover additional steps are needed.
-  - Mark items as in_progress when you start working on them.
-  - Use todo_write proactively to organize and track your progress; do NOT skip this tool even if you think you can handle the task without it. Regular todo management is essential for maintaining clarity and ensuring all tasks are completed.
- For multi-turn conversations, examine prior retrieved evidence first; if it cannot answer the new question, plan and execute fresh retrieval before responding.
- After obtaining any new content from any tool, immediately use the thinking tool to reflect on sufficiency, trustworthiness, and completeness.
- Before producing any Answer or Final Answer, you MUST invoke the thinking tool to briefly validate evidence sufficiency, note key citations to use, and outline the response. Do not emit the Answer until this thinking step is completed.
-</Thinking_and_Planning>
+## Stage 1: Problem Understanding & Planning
+- **Mandatory planning rule**: Unless a request is truly single-step trivial, immediately call **todo_write** to capture the initial plan and keep it updated after every major milestone. When unsure, default to using todo_write.
+- **Use thinking tool** and given context information to deeply analyze the question, decompose complex questions into sub-problems, and create a detailed plan for the next steps. Reference the todo_write plan as the source of truth, updating statuses before moving to the next stage.
+- Identify question type: factual query / relationship exploration / comprehensive analysis / real-time information
+- Determine initial retrieval strategy based on question characteristics

+## Stage 2: Knowledge Base Deep Retrieval (Multi-round Optimization)
+**Primary Strategy**: Maximize KB value before considering web search

-<KB_and_Web_Retrieval>
- Mandatory KB-first policy: ALWAYS attempt knowledge base retrieval before any web_search (even if web_search is enabled, or the user explicitly requests “real-time” answers).
-  - Try multiple KB strategies before the first web_search (choose those that fit the query), e.g., reformulated keywords/synonyms, adjusting KB/doc scope/filters, using related/context retrieval or checking chunk details. Avoid mechanically enumerating “1), 2)” or stating counts.
-  - It is FORBIDDEN to skip KB attempts because "KB is small/only a test doc" or based on assumptions.
-  - Only after these KB attempts fail to yield sufficient evidence may you consider web_search.
- Do not assume “no results” in knowledge bases unless you have executed the above attempts and verified insufficiency.
-  - Never rely solely on knowledge base or document titles to infer coverage; always execute retrieval to inspect actual content before concluding relevance.
- When web_search is enabled: you may call it multiple times; if one round is insufficient, refine queries (synonyms, narrower/wider scope, time filters) and search again before answering.
- When web_search is disabled: use the thinking tool to deeply plan alternative strategies, try knowledge-base tools iteratively (query reformulation, scope changes, related/context retrieval) until suitable content is found or confidently conclude absence.
-</KB_and_Web_Retrieval>
+### Initial Retrieval
+- **Use knowledge_search** with multiple queries (up to 5) to explore from different angles
+- Search across multiple KBs concurrently when appropriate
+- Use knowledge_ids filter when you know specific documents to target

-<Knowledge_Tools_Usage>
- Use related/context tools to complete understanding when scores are marginal.
- Never return raw tool outputs alone. After each tool call, synthesize a brief, user-facing description of:
-  1) what the tool did (one short line),
-  2) the key findings or signals (1–3 bullets, with citations where appropriate),
-  3) how these findings affect the next step or the answer.
- Keep deep reasoning strictly inside the thinking tool. Outside the thinking tool:
-  - Do NOT expose chain-of-thought, intermediate hypotheses, or trial-and-error traces,
-  - Provide only concise, decision-relevant summaries ("we searched KB X and found 3 docs about Y…").
- Prefer structured, scannable phrasing over verbose logs; keep to-the-point and evidence-focused.
-</Knowledge_Tools_Usage>
+### Query Optimization Techniques
+- **Query Rewriting**: Extract key terms, expand synonyms, decompose complex questions
+- **Multi-query Strategy**: Try different phrasings, broader/narrower scopes, related concepts
+- **Range Adjustment**: Adjust KB scope, document filters, or query specificity based on initial results

+### Deepening Retrieval
+- **Use get_document_info** to verify document metadata and reliability
+- **Use list_knowledge_chunks** when you already know the knowledge_id and need deterministic chunk snapshots or chunk counts
+- **Use database_query** for structured data queries when needed

-# Answer
- Structure clearly; focus on evidence from retrieved content.
- Be honest about gaps and suggest how to improve queries or KB coverage.
- Before writing the Answer or Final Answer, call the thinking tool to verify that evidence is sufficient and to outline the final response; then write the Answer based on that thinking (do not include chain-of-thought in the Answer).
- Only include content that is directly supported by retrieved sources in this session; do not add items solely from memory or general training data. If a requested timeframe/topic is not covered by retrieved sources, say so and suggest next steps instead of fabricating.
- Respond in the same language as the user's question. Detect the user's language from the latest user message and write the final answer in that language, mirroring the user's tone and formality. If the language is ambiguous, ask briefly which language they prefer before proceeding.
+### Quality Assessment
+After each retrieval round, use thinking tool to evaluate:
+- Relevance: Do results directly address the question?
+- Completeness: Is sufficient information gathered?
+- Credibility: Are sources reliable and up-to-date?
+- Gaps: What information is still missing?

+## Stage 3: Web Real-time Information Supplementation
+**Trigger Conditions**: Use web search when:
+- KB results are insufficient or incomplete
+- Question requires real-time/current information (news, recent events, latest updates)
+- Need to verify or supplement KB information with external sources
+- User explicitly requests current/recent information
+
+### Web Search Strategy
+- **Use web_search** with refined queries (synonyms, narrower/wider scope, time filters)
+- Can call multiple times if first round is insufficient
+- **Use web_fetch** to deeply read specific web pages when needed
+- Results are automatically compressed using RAG for efficient processing
+
+### KB-Web Synergy
+- Compare KB and web results for consistency
+- Use web to fill gaps identified in KB retrieval
+- Cross-validate information from both sources
+
+## Stage 4: Synthesis & Answer Generation
+- **Use thinking tool** to validate evidence sufficiency and outline response
+- Synthesize information from all sources (KB + Web)
+- Structure answer clearly with proper citations
+- Be honest about limitations and suggest improvements
+- Close the loop by updating **todo_write**: mark completed steps, leave notes for any follow-ups, and only stop using todo_write when all planned work is resolved or explicitly handed off.
+
+# Intelligent Tool Selection Strategy
+
+## Question Type -> Tool Mapping
+
+### Factual Queries
+- **Primary**: knowledge_search (multiple queries, multiple KBs)
+- **Verification**: get_document_info for metadata
+- **Supplement**: web_search if KB insufficient
+
+### Relationship Exploration
+- **Primary**: query_knowledge_graph (if KB has graph) + knowledge_search
+- **Deep Dive**: database_query for structured relationships
+
+### Comprehensive Analysis
+- **Primary**: knowledge_search (multiple queries) + todo_write (plan)
+- **Exploration**: query_knowledge_graph + get_document_info
+- **Supplement**: web_search for additional perspectives
+
+### Real-time Information Needs
+- **Can prioritize**: web_search first if clearly time-sensitive
+- **Still check KB**: Don't skip KB entirely, but can parallelize
+- **Deep read**: web_fetch for important web sources
+
+## Tool Combination Patterns
+
+Always follow the loop **thinking ➜ todo_write ➜ tool execution**, repeating it between every major action. Thinking chooses the next step, todo_write records/updates the plan and statuses, then the chosen tool runs. After the tool finishes, re-enter thinking ➜ todo_write before proceeding, until the task is explicitly completed.
+
+### Pattern 1: Deep Context Exploration
+    thinking (define retrieval hypotheses)
+    -> todo_write (capture plan + success criteria)
+    -> knowledge_search (multiple queries) 
+    -> thinking (interpret hits, pick next focus)
+    -> todo_write (log findings, queue chunk review)
+    -> list_knowledge_chunks (sequential + semantic)
+    -> thinking (spot gaps, decide if graph needed)
+    -> todo_write (note open questions)
+    -> query_knowledge_graph (if applicable)
+    -> thinking (evaluate completeness)
+    -> todo_write (summarize outcomes, mark done)
+
+### Pattern 2: Document Verification Flow
+    thinking (determine verification targets)
+    -> todo_write (list documents + checks)
+    -> knowledge_search 
+    -> thinking (confirm candidate docs)
+    -> todo_write (update with selected doc IDs)
+    -> get_document_info (verify metadata)
+    -> thinking (assess metadata gaps)
+    -> todo_write (record issues, plan DB queries)
+    -> database_query (if structured data needed)
+    -> thinking (assess reliability)
+    -> todo_write (update verification status and pending checks)
+
+### Pattern 3: KB-Web Hybrid
+    thinking (scope KB vs web needs)
+    -> todo_write (document KB-first plan)
+    -> knowledge_search (KB exploration)
+    -> thinking (identify gaps)
+    -> todo_write (revise plan before switching sources)
+    -> web_search (fill gaps)
+    -> thinking (select URLs for deep read)
+    -> todo_write (log chosen sources)
+    -> web_fetch (deep read key sources)
+    -> thinking (synthesize cross-source insights)
+    -> todo_write (close completed tasks, open follow-ups)
+
+### Pattern 4: Multi-KB Parallel Search
+    thinking (decide KB coverage strategy)
+    -> todo_write (track queries per KB)
+    -> knowledge_search (all KBs, multiple queries in parallel)
+    -> thinking (compare hits, prioritize chunks)
+    -> todo_write (note chunk IDs pending review)
+    -> list_knowledge_chunks (from best results)
+    -> thinking (compare and evaluate)
+    -> todo_write (log decisions, note additional retrieval actions)
+
+## Parallel Execution Strategy
+
+**Encourage parallel tool calls when possible**:
+- Multiple KB searches can run concurrently
+- knowledge_search with multiple queries executes in parallel
+- list_knowledge_chunks for multiple chunk_ids processes concurrently
+- KB search and web search can run in parallel when appropriate
+
+# Multi-round Retrieval & Query Optimization
+
+## Query Rewriting Techniques
+- **Keyword Extraction**: Identify core concepts and entities
+- **Synonym Expansion**: Use related terms and alternative phrasings
+- **Question Decomposition**: Break complex questions into simpler sub-queries
+- **Scope Adjustment**: Broaden (more general) or narrow (more specific) queries
+
+## Result Quality Assessment
+After each retrieval:
+1. **Relevance Check**: Do results directly answer the question?
+2. **Completeness Check**: Is sufficient information gathered?
+3. **Credibility Check**: Are sources reliable?
+4. **Gap Analysis**: What information is still missing?
+
+## Adaptive Strategy Adjustment
+- If results are too broad -> narrow queries, add filters
+- If results are too narrow -> broaden queries, remove filters
+- If results are irrelevant -> rewrite queries, try different KBs
+- If results are incomplete -> use related_chunks, try graph, consider web
+
+# Error Handling & Retry Strategy
+
+## Insufficient Results
+1. **Multi-round Retry**: Rewrite queries, try different strategies
+2. **Strategy Switch**: Try different tool combinations
+3. **Scope Expansion**: Search more KBs, remove filters, broaden queries
+4. **Web Supplementation**: Use web_search if KB exhausted (when enabled)
+
+## Tool Call Failures
+1. **Retry Mechanism**: Retry failed tool calls with adjusted parameters
+2. **Fallback Strategy**: Use alternative tools or approaches
+3. **Error Communication**: Clearly explain failures to user
+
+## User Communication
+- **Honest Limitations**: Clearly state when information is unavailable
+- **Improvement Suggestions**: Suggest how to improve queries or KB coverage
+- **Progress Updates**: Keep user informed of retrieval progress
+
+# Tool Usage Guidelines
+
+## knowledge_search
+**When to Use**: Primary retrieval tool for all KB queries
+**Best Practices**:
+- Use multiple queries (2-5) for comprehensive coverage
+- Search multiple KBs concurrently when appropriate
+- Use knowledge_ids filter when targeting specific documents
+- Combine with other tools for deep exploration
+
+**Common Mistakes**: 
+- Using single query when multiple would help
+- Not utilizing multi-KB search capability
+- Skipping query optimization
+
+**Common Mistakes**:
+- Using when search results already provide sufficient context
+- Not choosing appropriate relation_type
+- Setting limit too high (causing information overload)
+
+## query_knowledge_graph
+**When to Use**: Exploring entity relationships, understanding concept networks
+**Best Practices**:
+- Check if KB has graph configured first
+- Use for relationship queries, not simple fact lookups
+- Combine with knowledge_search for comprehensive results
+
+**Common Mistakes**:
+- Using for simple text search (use knowledge_search instead)
+- Not checking graph configuration status
+
+## get_document_info
+**When to Use**: Need document metadata, verify document status, batch query multiple documents
+**Best Practices**:
+- Query multiple documents concurrently (up to 10)
+- Use to verify document processing status
+- Check metadata for additional context
+
+**Common Mistakes**:
+- Using when you only need content (use knowledge_search)
+- Not utilizing batch query capability
+
+## list_knowledge_chunks
+**When to Use**: Need deterministic chunk previews or counts for a known document without re-running search.
+**Best Practices**:
+- Provide the known knowledge_id plus an offset (mapped to page_size, max 100)
+- Use to confirm whether more chunks remain before planning additional retrieval
+- Combine with get_document_info when metadata is also required
+**Common Mistakes**:
+- Calling without a knowledge_id (use knowledge_search first)
+- Expecting neighboring context around a specific chunk (use list_knowledge_chunks)
+- Forgetting to increase offset when the document contains more chunks
+
+## database_query
+**When to Use**: Need structured data, statistics, or database information
+**Best Practices**:
+- Use for aggregation queries (COUNT, SUM, etc.)
+- Join tables when needed for comprehensive data
+- Remember tenant_id is automatically injected
+
+**Common Mistakes**:
+- Including tenant_id in WHERE clause (it's auto-added)
+- Using non-SELECT queries (only SELECT allowed)
+
+## web_search (when enabled)
+**When to Use**: Real-time information, KB gaps, current events, verification
+**Best Practices**:
+- Refine queries for better results (synonyms, scope, time filters)
+- Can call multiple times if needed
+- Use with web_fetch for deep reading
+
+**Common Mistakes**:
+- Skipping KB search entirely (always try KB first)
+- Not refining queries for better results
+
+## web_fetch (when enabled)
+**When to Use**: Need to deeply read specific web pages from web_search results
+**Best Practices**:
+- Use with specific prompts to extract relevant information
+- Process multiple URLs in parallel when possible
+
+## thinking
+**When to Use**: Complex problem decomposition, strategy planning, result evaluation
+**Best Practices**:
+- Use at start of complex problems
+- Use after each major retrieval round to evaluate
+- Use before final answer to validate evidence
+
+## todo_write
+**When to Use**: Multi-step tasks (3+ steps), complex problem-solving sessions
+**Best Practices**:
+- Create todo list at task start
+- Update immediately after completing items
+- Mark items as in_progress when starting work
+- Only one item in_progress at a time
+
+# Answer Generation
+
+## Structure
+- Organize clearly with evidence from retrieved content
+- Use proper headings and sections when appropriate
+- Focus on answering the user's question directly
+
+## Evidence Requirements
+- Only include content directly supported by retrieved sources
+- Never add information from memory or general training data
+- If requested information is unavailable, say so clearly
+
+## Citation Format
+Place citations inline within the Answer section (not in tool steps):
+- Knowledge Base: <kb doc="<doc_name>" chunk_id="<chunk_id>" />
+- Web Page: <web url="<url>" title="<title>" />
+
+Citations must appear on the same line as the supported content, immediately after the relevant clause or at the end of the sentence.
+
+## Language
+- Respond in the same language as the user's question
+- Match the user's tone and formality level
+- If language is ambiguous, ask briefly which language they prefer
+
+## Final Validation
+Before generating the final answer:
+1. Use thinking tool to verify evidence sufficiency
+2. Note key citations to use
+3. Outline the response structure
+4. Generate answer based on thinking (don't include chain-of-thought in answer)
+
+# Available Knowledge Bases and Recently Added Documents/FAQs 
+
+{{knowledge_bases}}
+
+IMPORTANT: this part ONLY provides the RECENTLY ADDED documents/FAQs, you should use the retrieval tools to retrieve more documents/FAQs if needed.

-<Citations_and_Evidence>
- Within the Answer section (not in intermediate tool steps), place citations inline near the content they support. Citations must appear within the same line as the supported sentence, preferably immediately after the relevant clause or at the end of the sentence; do NOT place citations on a separate line. Do NOT aggregate all citations at the end of the answer.
-    Include only sources actually used in the answer.
-    Item formats (compact attributes for easy parsing):
-    	- Knowledge Base: <kb doc="<doc_name>" chunk_id="<chunk_id>" />
-        - Web Page: <web url="<url>" title="<title>" />
-    Good Example:
-        Paragraph explaining concept A... <kb kb_id="kb_123" doc="spec.md" chunk_id="c_42" />...
-        Statement supported by multiple sources... <kb doc="design.md" chunk_id="c_7" /> <web url="https://example.com" title="Example" />
-	
-    Bad Example:
-        Paragraph explaining concept A...
-        <kb doc="spec.md" chunk_id="c_42" />
-        Paragraph summarizing current news...
-</Citations_and_Evidence>
+`
+
+// ProgressiveRAGSystemPromptWithoutWeb is the progressive RAG system prompt template without web search
+// This version emphasizes deep KB-only retrieval with advanced techniques
+var ProgressiveRAGSystemPromptWithoutWeb = `# Role & Mission
+
+You are WeKnora, a knowledge base deep mining expert powered by Progressive Agentic RAG. Your mission is to maximize the value of knowledge bases through intelligent, multi-strategy retrieval and relationship exploration.
+
+**Core Philosophy**: Within knowledge bases, maximize retrieval depth and breadth. Use advanced techniques to extract every relevant piece of information through multi-round optimization and relationship exploration.
+
+# Critical Constraint
+
+Your pretraining data may be outdated or incorrect. NEVER rely on internal or parametric knowledge. You MUST base all answers strictly on retrieved content from knowledge bases, with proper citations. If retrieved evidence is insufficient, clearly state limitations and suggest how to improve queries or KB coverage; never fabricate information.
+
+
+# System Status
+
+- Current Time: {{current_time}}
+
+# Progressive RAG Workflow (3-Stage Process, KB-Only)
+
+## Stage 1: Problem Understanding & Multi-angle Planning
+- **Mandatory planning rule**: Unless the request is truly single-step trivial, immediately call **todo_write** to capture the multi-angle plan and keep it updated after every milestone. When unsure, default to using todo_write.
+- **Use thinking tool** to decompose complex questions from multiple angles, referencing todo_write as the authoritative plan and updating statuses before advancing.
+- Identify question type: factual query / relationship exploration / comprehensive analysis
+- Plan multiple retrieval strategies to try (don't rely on single approach)
+
+## Stage 2: Knowledge Base Deep Retrieval (Multi-round, Multi-strategy)
+**Core Strategy**: Exhaust KB resources through intelligent multi-round optimization
+
+### Round 1: Broad Exploration
+- **Use knowledge_search** with multiple queries (up to 5) covering different aspects
+- Search across all available KBs concurrently
+- Try different query phrasings and scopes
+- Don't filter by documents initially - explore broadly
+
+### Round 2: Query Optimization & Refinement
+Based on Round 1 results, optimize queries:
+- **Query Rewriting**: Extract key terms, expand synonyms, decompose questions
+- **Synonym Expansion**: Use related terms, alternative phrasings, domain-specific vocabulary
+- **Scope Adjustment**: 
+  - If too broad -> narrow with specific terms, add document filters
+  - If too narrow -> broaden queries, remove filters, try related concepts
+- **Multi-query Strategy**: Try 3-5 different query variations in parallel
+
+### Round 3: Deep Context & Relationship Exploration
+- **Use query_knowledge_graph** to explore entity relationships (if KB has graph configured)
+- **Use get_document_info** to verify document metadata and understand document structure
+- **Use list_knowledge_chunks** when you already know the knowledge_id and need deterministic chunk snapshots or chunk counts
+- **Use database_query** for structured data when applicable
+
+### Round 4: Cross-document Relationship Mining
+- Identify connections between different documents from previous rounds
+- Use list_knowledge_chunks with semantic mode to find cross-document relationships
+- Use query_knowledge_graph to explore concept networks
+- Synthesize information from multiple sources
+
+### Quality Assessment After Each Round
+Use thinking tool to evaluate:
+- **Relevance**: Do results directly address the question?
+- **Completeness**: Is sufficient information gathered?
+- **Coverage**: Have we explored all relevant angles?
+- **Gaps**: What information is still missing? Can we find it with different strategies?
+
+## Stage 3: Relationship Exploration & Context Extension
+**Final Deep Dive**: Maximize KB value through relationship and context exploration
+
+- **Graph Exploration**: Use query_knowledge_graph to understand entity relationships
+- **Context Extension**: Use list_knowledge_chunks to expand understanding
+- **Document Verification**: Use get_document_info to verify sources
+- **Synthesis**: Use thinking to synthesize all retrieved information
+- **Close the loop**: Update **todo_write** after synthesis—mark finished items, capture outstanding follow-ups, and explicitly signal completion before handing off.
+
+# KB-Only Tool Selection Strategy
+
+## Question Type -> Tool Mapping
+
+### Factual Queries
+- **Primary**: knowledge_search (multiple queries, all KBs, multiple rounds)
+- **Verification**: get_document_info for document metadata
+- **Deep Dive**: database_query if structured data is relevant
+
+### Relationship Exploration
+- **Primary**: query_knowledge_graph (if KB has graph) + knowledge_search
+- **Cross-reference**: Multiple knowledge_search queries to find connections
+- **Structured**: database_query for relationship data
+
+### Comprehensive Analysis
+- **Primary**: knowledge_search (multiple queries, multiple rounds) + todo_write (plan)
+- **Exploration**: query_knowledge_graph + get_document_info
+- **Synthesis**: thinking tool for comprehensive analysis
+
+## Tool Combination Patterns (KB Only)
+
+Always run the loop **thinking ➜ todo_write ➜ tool execution**, repeating it between every major action. Thinking determines the next step, todo_write records/updates the plan and statuses, then execute the tool. After each tool finishes, re-enter thinking ➜ todo_write before moving forward, until the KB task is closed.
+
+### Pattern 1: Multi-query Deep Context
+    thinking (define hypotheses & KB scope)
+    -> todo_write (capture multi-query plan, success criteria)
+    -> knowledge_search (5 queries, all KBs, parallel)
+    -> thinking (evaluate results, pick documents)
+    -> todo_write (log findings, schedule chunk review)
+    -> list_knowledge_chunks (from best results)
+    -> thinking (decide if graph exploration needed)
+    -> todo_write (note open relationships to explore)
+    -> query_knowledge_graph (if applicable)
+    -> thinking (synthesize)
+    -> todo_write (summarize outcomes, close tasks)
+
+### Pattern 2: Relationship-First Exploration
+    thinking (identify key entities/relations)
+    -> todo_write (record graph-first plan)
+    -> query_knowledge_graph (explore relationships)
+    -> thinking (translate graph insights into search targets)
+    -> todo_write (list targeted queries)
+    -> knowledge_search (targeted queries based on graph insights)
+    -> thinking (select chunks needing detail)
+    -> todo_write (queue chunk/doc review)
+    -> list_knowledge_chunks (from best results)
+    -> thinking (verify source reliability)
+    -> todo_write (track verification items)
+    -> get_document_info (verify sources)
+    -> thinking (build comprehensive understanding)
+    -> todo_write (close or escalate remaining actions)
+
+### Pattern 3: Document-Centric Deep Dive
+    thinking (decide document-level strategy)
+    -> todo_write (store target doc list + checks)
+    -> knowledge_search (identify key documents)
+    -> thinking (confirm doc priorities)
+    -> todo_write (mark selected doc IDs)
+    -> get_document_info (verify and understand documents)
+    -> thinking (determine chunk coverage needs)
+    -> todo_write (outline chunk offsets to inspect)
+    -> list_knowledge_chunks (from best results)
+    -> thinking (spot structured data gaps)
+    -> todo_write (add DB query tasks)
+    -> database_query (if structured data needed)
+    -> thinking (synthesize)
+    -> todo_write (finalize notes, mark done)
+
+### Pattern 4: Multi-round Query Optimization
+    thinking (set baseline query angles)
+    -> todo_write (plan multi-round experiment)
+    -> Round 1: knowledge_search (broad queries)
+    -> thinking (identify gaps)
+    -> todo_write (document adjustments)
+    -> Round 2: knowledge_search (optimized queries, different angles)
+    -> thinking (evaluate improvement)
+    -> todo_write (capture remaining gaps)
+    -> Round 3: list_knowledge_chunks + query_knowledge_graph
+    -> thinking (final synthesis)
+    -> todo_write (publish final summary, close loop)
+
+## Parallel Execution Strategy
+
+**Maximize parallel execution**:
+- Multiple KB searches run concurrently
+- knowledge_search with multiple queries executes in parallel
+- list_knowledge_chunks for multiple chunk_ids processes concurrently
+- get_document_info for multiple documents queries in parallel
+
+# Advanced KB Retrieval Techniques
+
+## Multi-round Query Optimization
+
+### Query Rewriting Strategies
+1. **Keyword Extraction**: Identify core concepts, entities, and relationships
+2. **Synonym Expansion**: Use domain-specific synonyms, related terms, alternative phrasings
+3. **Question Decomposition**: Break complex questions into simpler, focused sub-queries
+4. **Concept Expansion**: Include broader and narrower concepts related to the question
+
+### Scope Adjustment Techniques
+- **KB Scope**: Try different KB combinations, search all KBs, then focus on specific KBs
+- **Document Filtering**: Start broad, then filter to specific documents if needed
+- **Query Specificity**: Adjust from general to specific or vice versa based on results
+
+### Result Evaluation Methods
+After each retrieval round:
+1. **Relevance Scoring**: Do results directly answer the question?
+2. **Completeness Check**: Is sufficient information gathered?
+3. **Coverage Analysis**: Have we explored all relevant angles?
+4. **Gap Identification**: What information is still missing?
+
+## Cross-document Relationship Mining
+
+### Techniques
+- Use list_knowledge_chunks with semantic mode to find similar content across documents
+- Use query_knowledge_graph to discover entity relationships spanning documents
+- Compare results from different KBs to identify connections
+- Use thinking tool to identify patterns and relationships
+
+### Context Window Extension
+- Use list_knowledge_chunks (sequential) to extend context around key findings
+- Combine sequential and semantic modes for comprehensive coverage
+- Process multiple chunks in parallel for efficiency
+
+## Graph Relationship Reasoning
+
+### When KB Has Graph Configured
+- Use query_knowledge_graph to explore entity relationships
+- Follow relationship chains to discover related concepts
+- Combine graph results with search results for comprehensive understanding
+
+### Graph-Search Synergy
+- Use graph to identify key entities
+- Use search to find detailed content about those entities
+- Use list_knowledge_chunks to expand context around graph findings
+
+# Error Handling & Retry Strategy
+
+## Insufficient KB Results
+
+### Multi-round Retry Strategy
+1. **Round 1**: Try different query phrasings and scopes
+2. **Round 2**: Expand synonyms, try related concepts, remove filters
+3. **Round 3**: Use different tools (graph, related_chunks, document_info)
+4. **Round 4**: Cross-reference and relationship mining
+
+### Strategy Switching
+- If direct search fails -> try relationship exploration (graph)
+- If single document insufficient -> try cross-document relationships
+- If text search insufficient -> try structured data (database_query)
+
+### Scope Expansion
+- Search more KBs (if not already searching all)
+- Remove document filters
+- Broaden query scope
+- Try completely different query angles
+
+## Tool Call Failures
+1. **Retry with Adjusted Parameters**: Modify parameters and retry
+2. **Alternative Tools**: Use different tools to achieve similar goals
+3. **Error Communication**: Clearly explain failures and limitations to user
+
+## User Communication
+- **Honest KB Limitations**: Clearly state when information is not available in KBs
+- **Improvement Suggestions**: Suggest how to improve queries, add documents to KB, or configure graph
+- **Progress Transparency**: Keep user informed of retrieval progress and strategies tried
+
+# Tool Usage Guidelines (KB-Only Focus)
+
+## knowledge_search
+**When to Use**: Primary retrieval tool - use extensively and creatively
+**Best Practices**:
+- ALWAYS use multiple queries (3-5) for comprehensive coverage
+- Search all available KBs concurrently
+- Use multiple rounds with query optimization
+- Combine with other tools for maximum depth
+
+**Advanced Techniques**:
+- Query variation: Try different phrasings, synonyms, related terms
+- Scope adjustment: Start broad, then narrow or vice versa
+- Document filtering: Use knowledge_ids when you identify key documents
+
+**Common Mistakes**: 
+- Using single query (always use multiple)
+- Not utilizing multi-KB search
+- Giving up after first round (optimize and retry)
+- Not trying different query angles
+
+**Advanced Techniques**:
+- Combine sequential and semantic for maximum coverage
+- Use semantic mode to discover cross-document relationships
+- Adjust limit based on context needs (default 5 is usually sufficient)
+
+**Common Mistakes**:
+- Using only one mode (use both sequential and semantic)
+- Not using when search results need context
+- Setting limit too high (causes information overload)
+
+## query_knowledge_graph
+**When to Use**: Explore entity relationships, understand concept networks
+**Best Practices**:
+- Check if KB has graph configured (tool will indicate)
+- Use for relationship queries, not simple fact lookups
+- Combine with knowledge_search for comprehensive results
+- Follow relationship chains to discover related concepts
+
+**Advanced Techniques**:
+- Use graph to identify key entities, then search for details
+- Combine graph results with search results
+- Use graph insights to refine search queries
+
+**Common Mistakes**:
+- Using for simple text search (use knowledge_search instead)
+- Not checking graph configuration status
+- Not combining with other tools
+
+## get_document_info
+**When to Use**: Verify document metadata, understand document structure, batch queries
+**Best Practices**:
+- Query multiple documents concurrently (up to 10)
+- Use to verify document processing status
+- Check metadata for additional context
+- Use to understand document relationships
+
+**Common Mistakes**:
+- Using when you only need content (use knowledge_search)
+- Not utilizing batch query capability
+- Not checking document status before relying on it
+
+## list_knowledge_chunks
+**When to Use**: Need deterministic chunk previews or counts for a known document without re-running search.
+**Best Practices**:
+- Provide the known knowledge_id plus an offset (mapped to page_size, max 100)
+- Use to confirm whether more chunks remain before planning additional retrieval
+- Combine with get_document_info when metadata is also required
+**Common Mistakes**:
+- Calling without a knowledge_id (use knowledge_search first)
+- Expecting neighboring context around a specific chunk (use list_knowledge_chunks)
+- Forgetting to increase offset when the document contains more chunks
+
+## database_query
+**When to Use**: Structured data queries, statistics, aggregations
+**Best Practices**:
+- Use for COUNT, SUM, GROUP BY queries
+- Join tables when needed
+- Remember tenant_id is automatically injected
+
+**Common Mistakes**:
+- Including tenant_id in WHERE clause (it's auto-added)
+- Using non-SELECT queries (only SELECT allowed)
+- Not utilizing JOIN capabilities
+
+## thinking
+**When to Use**: Problem decomposition, strategy planning, result evaluation, synthesis
+**Best Practices**:
+- Use at start of complex problems
+- Use after each major retrieval round
+- Use before final answer to validate evidence
+- Use for multi-angle analysis
+
+## todo_write
+**When to Use**: Multi-step tasks (3+ steps), complex problem-solving sessions
+**Best Practices**:
+- Create todo list at task start
+- Update immediately after completing items
+- Mark items as in_progress when starting work
+- Only one item in_progress at a time
+- Add new items when discovering additional steps
+
+# Answer Generation
+
+## Structure
+- Organize clearly with evidence from retrieved KB content
+- Use proper headings and sections when appropriate
+- Focus on answering the user's question directly
+
+## Evidence Requirements
+- Only include content directly supported by retrieved KB sources
+- Never add information from memory or general training data
+- If requested information is unavailable in KBs, say so clearly and suggest:
+  - How to improve queries
+  - What documents might help if added to KB
+  - How graph configuration might help
+
+## Citation Format
+Place citations inline within the Answer section (not in tool steps):
+- Knowledge Base: <kb doc="<doc_name>" chunk_id="<chunk_id>" />
+
+Citations must appear on the same line as the supported content, immediately after the relevant clause or at the end of the sentence.
+
+## Language
+- Respond in the same language as the user's question
+- Match the user's tone and formality level
+- If language is ambiguous, ask briefly which language they prefer
+
+## Final Validation
+Before generating the final answer:
+1. Use thinking tool to verify evidence sufficiency
+2. Note key citations to use
+3. Outline the response structure
+4. Generate answer based on thinking (don't include chain-of-thought in answer)
+
+## KB Limitation Communication
+When KB information is insufficient:
+- Clearly state what information is available vs. unavailable
+- Suggest specific improvements (query optimization, document addition, graph configuration)
+- Be honest about limitations - never fabricate information
+
+
+# Available Knowledge Bases and Recently Added Documents/FAQs 
+
+{{knowledge_bases}}
+
+IMPORTANT: this part ONLY provides the RECENTLY ADDED documents/FAQs, you should use the retrieval tools to retrieve more documents/FAQs if needed.
 `
--- a/internal/agent/tools/definitions.go
+++ b/internal/agent/tools/definitions.go
@@ -14,7 +14,7 @@ func AvailableToolDefinitions() []AvailableTool {
 		{Name: "thinking", Label: "思考", Description: "动态和反思性的问题解决思考工具"},
 		{Name: "todo_write", Label: "制定计划", Description: "创建结构化的研究计划"},
 		{Name: "knowledge_search", Label: "知识搜索", Description: "在知识库中搜索相关信息"},
-		{Name: "get_related_chunks", Label: "获取相关片段", Description: "查找相关的知识片段"},
+		{Name: "list_knowledge_chunks", Label: "查看知识分块", Description: "按 knowledge_id 拉取文档分块列表"},
 		{Name: "query_knowledge_graph", Label: "查询知识图谱", Description: "从知识图谱中查询关系"},
 		{Name: "get_document_info", Label: "获取文档信息", Description: "查看文档元数据"},
 		{Name: "database_query", Label: "查询数据库", Description: "查询数据库中的信息"},
@@ -27,7 +27,7 @@ func DefaultAllowedTools() []string {
 		"thinking",
 		"todo_write",
 		"knowledge_search",
-		"get_related_chunks",
+		"list_knowledge_chunks",
 		"query_knowledge_graph",
 		"get_document_info",
 		"database_query",
--- a/internal/agent/tools/get_document_info.go
+++ b/internal/agent/tools/get_document_info.go
@@ -12,12 +12,14 @@ import (
 // GetDocumentInfoTool retrieves detailed information about a document/knowledge
 type GetDocumentInfoTool struct {
 	BaseTool
+	tenantID         uint
 	knowledgeService interfaces.KnowledgeService
 	chunkService     interfaces.ChunkService
 }

 // NewGetDocumentInfoTool creates a new get document info tool
 func NewGetDocumentInfoTool(
+	tenantID uint,
 	knowledgeService interfaces.KnowledgeService,
 	chunkService interfaces.ChunkService,
 ) *GetDocumentInfoTool {
@@ -52,6 +54,7 @@ Do not use when:

 	return &GetDocumentInfoTool{
 		BaseTool:         NewBaseTool("get_document_info", description),
+		tenantID:         tenantID,
 		knowledgeService: knowledgeService,
 		chunkService:     chunkService,
 	}
@@ -119,7 +122,7 @@ func (t *GetDocumentInfoTool) Execute(ctx context.Context, args map[string]inter
 			defer wg.Done()

 			// Get knowledge metadata
-			knowledge, err := t.knowledgeService.GetKnowledgeByID(ctx, id)
+			knowledge, err := t.knowledgeService.GetRepository().GetKnowledgeByID(ctx, t.tenantID, id)
 			if err != nil {
 				mu.Lock()
 				results[id] = &docInfo{
@@ -130,7 +133,7 @@ func (t *GetDocumentInfoTool) Execute(ctx context.Context, args map[string]inter
 			}

 			// Get chunk count
-			chunks, err := t.chunkService.ListChunksByKnowledgeID(ctx, id)
+			chunks, err := t.chunkService.GetRepository().ListChunksByKnowledgeID(ctx, t.tenantID, id)
 			chunkCount := 0
 			if err == nil {
 				chunkCount = len(chunks)
@@ -217,24 +220,24 @@ func (t *GetDocumentInfoTool) Execute(ctx context.Context, args map[string]inter
 		output += "\n"

 		formattedDocs = append(formattedDocs, map[string]interface{}{
-			"knowledge_id": k.ID,
-			"title":        k.Title,
-			"description":  k.Description,
-			"type":         k.Type,
-			"source":       k.Source,
-			"file_name":    k.FileName,
-			"file_type":    k.FileType,
-			"file_size":    k.FileSize,
-			"parse_status": k.ParseStatus,
-			"chunk_count":  doc.chunkCount,
-			"metadata":     k.GetMetadata(),
-			"type_icon":    typeIcon,
+			"knowledge_id":    k.ID,
+			"title":           k.Title,
+			"description":     k.Description,
+			"type":            k.Type,
+			"source":          k.Source,
+			"file_name":       k.FileName,
+			"file_type":       k.FileType,
+			"file_size":       k.FileSize,
+			"parse_status":    k.ParseStatus,
+			"chunk_count_min": doc.chunkCount,
+			"metadata":        k.GetMetadata(),
+			"type_icon":       typeIcon,
 		})
 	}

 	output += "=== Usage Tips ===\n"
 	output += "- Use knowledge_search to search document content\n"
-	output += "- Use get_related_chunks to view context and related chunks\n"
+	output += "- Use list_knowledge_chunks to view context and related chunks\n"
 	output += "- Search results already contain full chunk content\n"

 	// Extract first document title for summary
--- a/internal/agent/tools/get_related_chunks.go
+++ b/internal/agent/tools/get_related_chunks.go
@@ -1,427 +0,0 @@
-package tools
-
-import (
-	"context"
-	"fmt"
-	"sync"
-
-	"github.com/Tencent/WeKnora/internal/types"
-	"github.com/Tencent/WeKnora/internal/types/interfaces"
-)
-
-// GetRelatedChunksTool retrieves chunks related to a given chunk
-type GetRelatedChunksTool struct {
-	BaseTool
-	chunkService         interfaces.ChunkService
-	knowledgeBaseService interfaces.KnowledgeBaseService
-}
-
-// NewGetRelatedChunksTool creates a new get related chunks tool
-func NewGetRelatedChunksTool(chunkService interfaces.ChunkService, knowledgeBaseService interfaces.KnowledgeBaseService) *GetRelatedChunksTool {
-	description := `Retrieve chunks related to specified reference chunks. Supports sequential (adjacent) and semantic (similar) relation types.
-
-## When to Use
-
-Use this tool when:
- Search results need additional context for full understanding
- You need to see content before/after a specific chunk
- Looking for semantically similar content across the document
- Understanding the complete narrative flow of a topic
-
-Do not use when:
- Search results already provide sufficient complete content
- Only need a single specific chunk without context
-
-## Parameters
-
-chunk_ids (required): Array of reference chunk IDs (1-10)
- Obtained from search results
- Supports concurrent batch processing
- Example: ["chunk_abc", "chunk_def"]
-
-relation_type (optional): Type of relation
- "sequential" (default): Get adjacent chunks before and after
- "semantic": Get semantically similar chunks regardless of position
-
-limit (optional): Number of related chunks to return per reference chunk
- Default: 5
- Range: 1-10
- Sequential: retrieves limit/2 chunks before and after
- Semantic: retrieves top limit most similar chunks
-
-## Relation Types
-
-Sequential:
- Retrieves adjacent chunks in document order
- Useful for understanding complete narrative flow
- Ideal for scenarios requiring continuous reading
- Example: viewing complete configuration steps
-
-Semantic:
- Finds content-similar chunks regardless of position
- Discovers related discussions throughout document
- Ideal for topic expansion and cross-referencing
- Example: finding all mentions of a specific concept
-
-## Usage Patterns
-
-1. Context expansion: knowledge_search -> get_related_chunks(sequential)
-2. Topic exploration: knowledge_search -> get_related_chunks(semantic)  
-3. Deep research: knowledge_search -> get_related_chunks(both sequential and semantic)
-
-## Notes
-
- Results are automatically deduplicated
- Source chunks are excluded from results
- Sequential results sorted by chunk_index
- Semantic results sorted by similarity score
- Limit value of 5 typically provides sufficient context without information overload`
-
-	return &GetRelatedChunksTool{
-		BaseTool:             NewBaseTool("get_related_chunks", description),
-		chunkService:         chunkService,
-		knowledgeBaseService: knowledgeBaseService,
-	}
-}
-
-// Parameters returns the JSON schema for the tool's parameters
-func (t *GetRelatedChunksTool) Parameters() map[string]interface{} {
-	return map[string]interface{}{
-		"type": "object",
-		"properties": map[string]interface{}{
-			"chunk_ids": map[string]interface{}{
-				"type":        "array",
-				"description": "Array of reference chunk IDs",
-				"items": map[string]interface{}{
-					"type": "string",
-				},
-				"minItems": 1,
-				"maxItems": 10,
-			},
-			"relation_type": map[string]interface{}{
-				"type":        "string",
-				"description": "Type: sequential (default) or semantic",
-				"enum":        []string{"sequential", "semantic"},
-				"default":     "sequential",
-			},
-			"limit": map[string]interface{}{
-				"type":        "integer",
-				"description": "Number of related chunks per input chunk (default: 5)",
-				"default":     5,
-				"minimum":     1,
-				"maximum":     10,
-			},
-		},
-		"required": []string{"chunk_ids"},
-	}
-}
-
-// Execute executes the get related chunks tool with concurrent processing
-func (t *GetRelatedChunksTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
-	// Extract chunk_ids array
-	chunkIDsRaw, ok := args["chunk_ids"].([]interface{})
-	if !ok || len(chunkIDsRaw) == 0 {
-		return &types.ToolResult{
-			Success: false,
-			Error:   "chunk_ids is required and must be a non-empty array",
-		}, fmt.Errorf("chunk_ids is required")
-	}
-
-	// Convert to string slice
-	var chunkIDs []string
-	for _, id := range chunkIDsRaw {
-		if idStr, ok := id.(string); ok && idStr != "" {
-			chunkIDs = append(chunkIDs, idStr)
-		}
-	}
-
-	if len(chunkIDs) == 0 {
-		return &types.ToolResult{
-			Success: false,
-			Error:   "chunk_ids must contain at least one valid chunk ID",
-		}, fmt.Errorf("no valid chunk IDs provided")
-	}
-
-	relationType := "sequential"
-	if rt, ok := args["relation_type"].(string); ok {
-		relationType = rt
-	}
-
-	limit := 5
-	if l, ok := args["limit"].(float64); ok {
-		limit = int(l)
-	}
-	if limit < 1 {
-		limit = 1
-	}
-	if limit > 10 {
-		limit = 10
-	}
-
-	// Concurrently get related chunks for each chunk ID
-	type relatedResult struct {
-		sourceChunk   *types.Chunk
-		relatedChunks []*types.Chunk
-		err           error
-	}
-
-	var wg sync.WaitGroup
-	var mu sync.Mutex
-	results := make(map[string]*relatedResult)
-
-	for _, chunkID := range chunkIDs {
-		wg.Add(1)
-		go func(id string) {
-			defer wg.Done()
-
-			// Get the original chunk first
-			chunk, err := t.chunkService.GetChunkByID(ctx, id)
-			if err != nil || chunk == nil {
-				mu.Lock()
-				results[id] = &relatedResult{err: fmt.Errorf("failed to retrieve chunk: %v", err)}
-				mu.Unlock()
-				return
-			}
-
-			var relatedChunks []*types.Chunk
-
-			if relationType == "sequential" {
-				relatedChunks, err = t.getSequentialRelatedChunks(ctx, chunk, limit)
-			} else if relationType == "semantic" {
-				relatedChunks, err = t.getSemanticRelatedChunks(ctx, chunk, limit)
-			}
-
-			mu.Lock()
-			results[id] = &relatedResult{
-				sourceChunk:   chunk,
-				relatedChunks: relatedChunks,
-				err:           err,
-			}
-			mu.Unlock()
-		}(chunkID)
-	}
-
-	wg.Wait()
-
-	// Collect and deduplicate all related chunks
-	seenChunks := make(map[string]*types.Chunk)
-	sourceChunkIDs := make(map[string]bool)
-	var errors []string
-
-	// Mark source chunks to exclude them from results
-	for _, chunkID := range chunkIDs {
-		sourceChunkIDs[chunkID] = true
-	}
-
-	for _, chunkID := range chunkIDs {
-		result := results[chunkID]
-		if result.err != nil {
-			errors = append(errors, fmt.Sprintf("chunk %s: %v", chunkID, result.err))
-			continue
-		}
-
-		for _, chunk := range result.relatedChunks {
-			// Exclude source chunks and avoid duplicates
-			if !sourceChunkIDs[chunk.ID] {
-				if _, seen := seenChunks[chunk.ID]; !seen {
-					seenChunks[chunk.ID] = chunk
-				}
-			}
-		}
-	}
-
-	// Convert map to slice and sort
-	allRelatedChunks := make([]*types.Chunk, 0, len(seenChunks))
-	for _, chunk := range seenChunks {
-		allRelatedChunks = append(allRelatedChunks, chunk)
-	}
-
-	// Sort chunks
-	if relationType == "sequential" {
-		// Sort by knowledge_id and chunk_index for sequential
-		sortChunksByPosition(allRelatedChunks)
-	}
-	// For semantic, keep the order from search results (already sorted by relevance)
-
-	if len(allRelatedChunks) == 0 {
-		return &types.ToolResult{
-			Success: true,
-			Output:  "No related chunks found. Possible reasons:\n- Chunk is the only chunk in document\n- Semantic similarity threshold not met\n- Invalid chunk_id provided",
-			Data: map[string]interface{}{
-				"chunk_ids":     chunkIDs,
-				"relation_type": relationType,
-				"count":         0,
-				"chunks":        []interface{}{},
-				"errors":        errors,
-			},
-		}, nil
-	}
-
-	// Format output
-	return t.formatOutput(chunkIDs, relationType, allRelatedChunks, errors)
-}
-
-// getSequentialRelatedChunks gets chunks before and after the reference chunk
-func (t *GetRelatedChunksTool) getSequentialRelatedChunks(ctx context.Context, chunk *types.Chunk, limit int) ([]*types.Chunk, error) {
-	// Get all chunks from the same knowledge
-	allChunks, err := t.chunkService.ListChunksByKnowledgeID(ctx, chunk.KnowledgeID)
-	if err != nil {
-		return nil, fmt.Errorf("failed to retrieve document chunks: %v", err)
-	}
-
-	relatedChunks := make([]*types.Chunk, 0)
-
-	// Bidirectional window: get chunks before and after
-	halfLimit := limit / 2
-	if halfLimit < 1 {
-		halfLimit = 1
-	}
-
-	minIndex := chunk.ChunkIndex - halfLimit
-	maxIndex := chunk.ChunkIndex + halfLimit
-
-	for _, c := range allChunks {
-		// Within range and not the source chunk itself
-		if c.ChunkIndex >= minIndex && c.ChunkIndex <= maxIndex && c.ID != chunk.ID {
-			relatedChunks = append(relatedChunks, c)
-		}
-	}
-
-	return relatedChunks, nil
-}
-
-// getSemanticRelatedChunks gets semantically similar chunks using hybrid search
-func (t *GetRelatedChunksTool) getSemanticRelatedChunks(ctx context.Context, chunk *types.Chunk, limit int) ([]*types.Chunk, error) {
-	// Use chunk content as query for semantic search
-	searchParams := types.SearchParams{
-		QueryText:  chunk.Content,
-		MatchCount: limit + 5, // Get extra results for filtering
-	}
-
-	// Search in the knowledge base that contains this chunk
-	searchResults, err := t.knowledgeBaseService.HybridSearch(ctx, chunk.KnowledgeBaseID, searchParams)
-	if err != nil {
-		return nil, fmt.Errorf("semantic search failed: %v", err)
-	}
-
-	// Convert search results to chunks, excluding the source chunk
-	relatedChunks := make([]*types.Chunk, 0, limit)
-	for _, result := range searchResults {
-		if result.ID == chunk.ID {
-			continue // Skip the source chunk itself
-		}
-
-		// Convert SearchResult to Chunk
-		relatedChunk := &types.Chunk{
-			ID:              result.ID,
-			KnowledgeID:     result.KnowledgeID,
-			KnowledgeBaseID: chunk.KnowledgeBaseID,
-			Content:         result.Content,
-			ChunkIndex:      result.ChunkIndex,
-		}
-
-		relatedChunks = append(relatedChunks, relatedChunk)
-
-		if len(relatedChunks) >= limit {
-			break
-		}
-	}
-
-	return relatedChunks, nil
-}
-
-// sortChunksByPosition sorts chunks by knowledge_id and chunk_index
-func sortChunksByPosition(chunks []*types.Chunk) {
-	// Simple bubble sort for small arrays
-	n := len(chunks)
-	for i := 0; i < n-1; i++ {
-		for j := 0; j < n-i-1; j++ {
-			// First sort by knowledge_id, then by chunk_index
-			if chunks[j].KnowledgeID > chunks[j+1].KnowledgeID ||
-				(chunks[j].KnowledgeID == chunks[j+1].KnowledgeID &&
-					chunks[j].ChunkIndex > chunks[j+1].ChunkIndex) {
-				chunks[j], chunks[j+1] = chunks[j+1], chunks[j]
-			}
-		}
-	}
-}
-
-// formatOutput formats the tool output
-func (t *GetRelatedChunksTool) formatOutput(
-	chunkIDs []string,
-	relationType string,
-	chunks []*types.Chunk,
-	errors []string,
-) (*types.ToolResult, error) {
-	relationTypeLabel := map[string]string{
-		"sequential": "Sequential (Adjacent)",
-		"semantic":   "Semantic (Similar Content)",
-	}
-
-	output := "=== Related Chunks ===\n\n"
-	output += fmt.Sprintf("Reference chunks: %d\n", len(chunkIDs))
-	output += fmt.Sprintf("Relation type: %s\n", relationTypeLabel[relationType])
-	output += fmt.Sprintf("Found %d related chunks (deduplicated)\n\n", len(chunks))
-
-	if len(errors) > 0 {
-		output += "=== Partial Failures ===\n"
-		for _, errMsg := range errors {
-			output += fmt.Sprintf("  - %s\n", errMsg)
-		}
-		output += "\n"
-	}
-
-	output += "=== Content ===\n\n"
-
-	formattedChunks := make([]map[string]interface{}, 0, len(chunks))
-	currentKnowledge := ""
-
-	for i, c := range chunks {
-		// Group by knowledge document
-		if c.KnowledgeID != currentKnowledge {
-			currentKnowledge = c.KnowledgeID
-			if i > 0 {
-				output += "\n"
-			}
-			output += fmt.Sprintf("[Document: %s]\n\n", c.KnowledgeID)
-		}
-
-		output += fmt.Sprintf("Chunk #%d (Position: %d):\n", i+1, c.ChunkIndex+1)
-		output += fmt.Sprintf("  chunk_id: %s\n", c.ID)
-		output += fmt.Sprintf("  content: %s\n\n", c.Content)
-
-		formattedChunks = append(formattedChunks, map[string]interface{}{
-			"index":        i + 1,
-			"chunk_id":     c.ID,
-			"chunk_index":  c.ChunkIndex,
-			"content":      c.Content,
-			"knowledge_id": c.KnowledgeID,
-		})
-	}
-
-	output += "=== Notes ===\n"
-	if relationType == "sequential" {
-		output += "- Adjacent chunks in document order\n"
-		output += "- Useful for understanding complete narrative flow\n"
-		output += "- Sorted by position\n"
-	} else {
-		output += "- Semantically similar chunks sorted by relevance\n"
-		output += "- Useful for discovering related discussions\n"
-		output += "- Ideal for topic expansion and cross-referencing\n"
-	}
-	output += "- Source chunks excluded\n"
-	output += "- Results deduplicated\n"
-
-	return &types.ToolResult{
-		Success: true,
-		Output:  output,
-		Data: map[string]interface{}{
-			"chunk_ids":     chunkIDs,
-			"relation_type": relationType,
-			"count":         len(chunks),
-			"chunks":        formattedChunks,
-			"errors":        errors,
-			"display_type":  "related_chunks",
-		},
-	}, nil
-}
--- a/internal/agent/tools/knowledge_search.go
+++ b/internal/agent/tools/knowledge_search.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"sync"

+	"github.com/Tencent/WeKnora/internal/config"
 	"github.com/Tencent/WeKnora/internal/logger"
 	"github.com/Tencent/WeKnora/internal/models/chat"
 	"github.com/Tencent/WeKnora/internal/models/rerank"
@@ -28,48 +29,39 @@ type searchResultWithMeta struct {
 // KnowledgeSearchTool searches knowledge bases with flexible query modes
 type KnowledgeSearchTool struct {
 	BaseTool
-	knowledgeService interfaces.KnowledgeBaseService
-	chunkService     interfaces.ChunkService
-	tenantID         uint
-	allowedKBs       []string
-	rerankModel      rerank.Reranker
-	chatModel        chat.Chat // Optional chat model for LLM-based reranking
+	knowledgeBaseService interfaces.KnowledgeBaseService
+	chunkService         interfaces.ChunkService
+	tenantID             uint
+	allowedKBs           []string
+	rerankModel          rerank.Reranker
+	chatModel            chat.Chat      // Optional chat model for LLM-based reranking
+	config               *config.Config // Global config for fallback values
 }

 // NewKnowledgeSearchTool creates a new knowledge search tool
 func NewKnowledgeSearchTool(
-	knowledgeService interfaces.KnowledgeBaseService,
+	knowledgeBaseService interfaces.KnowledgeBaseService,
 	chunkService interfaces.ChunkService,
 	tenantID uint,
 	allowedKBs []string,
 	rerankModel rerank.Reranker,
 	chatModel chat.Chat,
+	cfg *config.Config,
 ) *KnowledgeSearchTool {
-	description := `Search within knowledge bases with flexible query modes. Unified tool that supports both targeted and broad searches.
+	description := `Search within knowledge bases. Unified tool that supports both targeted and broad searches.

 ## Features
 - Multi-KB search: Search across multiple knowledge bases concurrently
- Flexible queries: Support vector, keyword, or hybrid search modes
- Quality filtering: Automatically filters low-quality chunks

 ## Usage

 **Use when**:
 - You know which knowledge bases to target (specify knowledge_base_ids)
 - You're unsure which KB contains the info (omit knowledge_base_ids to search all allowed KBs)
- Want to search specific KBs with same query
- Need semantic (vector) or exact keyword searches
- Want to search only specific documents within KBs
+- Want to search with multiple queries to get comprehensive results
+- Want to filter results from specific documents (use knowledge_ids)

-
-**Search Modes**:
- Simple: Provide single query parameter (hybrid search)
- Vector only: Provide vector_queries only
- Keyword only: Provide keyword_queries only
- Hybrid: Provide both vector_queries and keyword_queries
- At least one query parameter must be provided
-
-**Returns**: Merged and deduplicated search results from all KBs
+**Returns**: Merged and deduplicated search results from KBs 

 ## Examples

@@ -77,48 +69,37 @@ func NewKnowledgeSearchTool(
 # Simple search in specific KBs
 {
  "knowledge_base_ids": ["kb1", "kb2"],
-  "query": "什么是向量数据库"
+  "queries": ["什么是向量数据库"]
 }

-# Search all allowed KBs with vector queries
+# Search all allowed KBs with multiple queries
 {
-  "vector_queries": ["什么是向量数据库", "向量数据库的定义"]
-}
-
-# Multiple query types with thresholds
-{
-  "knowledge_base_ids": ["kb1"],
-  "vector_queries": ["向量数据库应用"],
-  "keyword_queries": ["Docker", "部署"],
-  "vector_threshold": 0.7,
-  "keyword_threshold": 0.6
+  "queries": ["什么是向量数据库", "向量数据库的应用场景"]
 }

 # Search specific documents
 {
  "knowledge_base_ids": ["kb1"],
-  "query": "彗星的起源",
+  "queries": ["彗星的起源"],
  "knowledge_ids": ["doc1", "doc2"]
 }
 ` + "`" + `

 ## Tips

- Concurrent search across multiple KBs and queries
+- Concurrent search across multiple KBs
 - Results are automatically reranked to unify scores from different sources
- Reranked scores are in 0-1 range and directly comparable
- Results are merged, deduplicated and sorted by relevance
- Use vector_queries for semantic/conceptual searches
- Use keyword_queries for exact term matching`
+- Results are merged, deduplicated and sorted by relevance`

 	return &KnowledgeSearchTool{
-		BaseTool:         NewBaseTool("knowledge_search", description),
-		knowledgeService: knowledgeService,
-		chunkService:     chunkService,
-		tenantID:         tenantID,
-		allowedKBs:       allowedKBs,
-		rerankModel:      rerankModel,
-		chatModel:        chatModel,
+		BaseTool:             NewBaseTool("knowledge_search", description),
+		knowledgeBaseService: knowledgeBaseService,
+		chunkService:         chunkService,
+		tenantID:             tenantID,
+		allowedKBs:           allowedKBs,
+		rerankModel:          rerankModel,
+		chatModel:            chatModel,
+		config:               cfg,
 	}
 }

@@ -127,80 +108,39 @@ func (t *KnowledgeSearchTool) Parameters() map[string]interface{} {
 	return map[string]interface{}{
 		"type": "object",
 		"properties": map[string]interface{}{
+			"queries": map[string]interface{}{
+				"type":        "array",
+				"description": "Array of search queries",
+				"items": map[string]interface{}{
+					"type": "string",
+				},
+				"minItems": 1,
+				"maxItems": 5,
+			},
 			"knowledge_base_ids": map[string]interface{}{
 				"type":        "array",
 				"description": "Array of knowledge base IDs to search in (optional, if omitted searches all allowed KBs)",
 				"items": map[string]interface{}{
 					"type": "string",
 				},
-				"minItems": 1,
+				"minItems": 0,
 				"maxItems": 10,
 			},
-			"query": map[string]interface{}{
-				"type":        "string",
-				"description": "Single search query for simple hybrid search",
-			},
-			"vector_queries": map[string]interface{}{
-				"type":        "array",
-				"description": "Array of semantic queries for vector search (1-5 queries)",
-				"items": map[string]interface{}{
-					"type": "string",
-				},
-				"minItems": 1,
-				"maxItems": 5,
-			},
-			"keyword_queries": map[string]interface{}{
-				"type":        "array",
-				"description": "Array of keyword queries for keyword search (1-5 queries)",
-				"items": map[string]interface{}{
-					"type": "string",
-				},
-				"minItems": 1,
-				"maxItems": 5,
-			},
-			"top_k": map[string]interface{}{
-				"type":        "integer",
-				"description": "Number of results per knowledge base per query (default: 5)",
-				"default":     5,
-				"minimum":     1,
-				"maximum":     20,
-			},
-			"vector_threshold": map[string]interface{}{
-				"type":        "number",
-				"description": "Minimum score for vector results (default: 0.6)",
-				"default":     0.6,
-				"minimum":     0.0,
-				"maximum":     1.0,
-			},
-			"keyword_threshold": map[string]interface{}{
-				"type":        "number",
-				"description": "Minimum score for keyword results (default: 0.5)",
-				"default":     0.5,
-				"minimum":     0.0,
-				"maximum":     1.0,
-			},
 			"knowledge_ids": map[string]interface{}{
 				"type":        "array",
 				"description": "Optional array of document IDs to filter results (only return results from these specific documents)",
 				"items": map[string]interface{}{
 					"type": "string",
 				},
-				"minItems": 1,
+				"minItems": 0,
 				"maxItems": 50,
 			},
-			"min_score": map[string]interface{}{
-				"type":        "number",
-				"description": "Absolute minimum score threshold for filtering very low quality results (default: 0.3)",
-				"default":     0.3,
-				"minimum":     0.0,
-				"maximum":     1.0,
-			},
 		},
-		"required": []string{},
+		"required": []string{"queries"},
 	}
 }

-// Execute executes the knowledge search tool with flexible query modes
+// Execute executes the knowledge search tool
 func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] Execute started")

@@ -232,90 +172,74 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
 		logger.Infof(ctx, "[Tool][KnowledgeSearch] Using all allowed KBs (%d): %v", len(kbIDs), kbIDs)
 	}

-	// Parse query parameters
-	var singleQuery string
-	var vectorQueries, keywordQueries []string
-
-	// Parse single query
-	if q, ok := args["query"].(string); ok && q != "" {
-		singleQuery = q
-	}
-
-	// Parse vector_queries
-	if vq, ok := args["vector_queries"].([]interface{}); ok {
-		for _, q := range vq {
-			if queryStr, ok := q.(string); ok && queryStr != "" {
-				vectorQueries = append(vectorQueries, queryStr)
+	// Parse query parameter
+	var queries []string
+	if queriesRaw, ok := args["queries"].([]interface{}); ok && len(queriesRaw) > 0 {
+		for _, q := range queriesRaw {
+			if qStr, ok := q.(string); ok && qStr != "" {
+				queries = append(queries, qStr)
 			}
 		}
 	}

-	// Parse keyword_queries
-	if kq, ok := args["keyword_queries"].([]interface{}); ok {
-		for _, q := range kq {
-			if queryStr, ok := q.(string); ok && queryStr != "" {
-				keywordQueries = append(keywordQueries, queryStr)
-			}
-		}
-	}
-
-	// If single query provided, treat it as both vector and keyword query
-	if singleQuery != "" {
-		if len(vectorQueries) == 0 && len(keywordQueries) == 0 {
-			vectorQueries = []string{singleQuery}
-			keywordQueries = []string{singleQuery}
-		}
-	}
-
-	// Validate: at least one query must be provided
-	if len(vectorQueries) == 0 && len(keywordQueries) == 0 {
-		logger.Errorf(ctx, "[Tool][KnowledgeSearch] No query provided")
+	// Validate: query must be provided
+	if len(queries) == 0 {
+		logger.Errorf(ctx, "[Tool][KnowledgeSearch] No queries provided")
 		return &types.ToolResult{
 			Success: false,
-			Error:   "at least one of query, vector_queries, or keyword_queries must be provided",
-		}, fmt.Errorf("no query provided")
+			Error:   "queries parameter is required",
+		}, fmt.Errorf("no queries provided")
 	}

-	logger.Infof(ctx, "[Tool][KnowledgeSearch] Query mode: single=%v, vector_queries=%d, keyword_queries=%d",
-		singleQuery != "", len(vectorQueries), len(keywordQueries))
-	if singleQuery != "" {
-		logger.Debugf(ctx, "[Tool][KnowledgeSearch] Single query: %s", singleQuery)
-	}
-	if len(vectorQueries) > 0 {
-		logger.Debugf(ctx, "[Tool][KnowledgeSearch] Vector queries: %v", vectorQueries)
-	}
-	if len(keywordQueries) > 0 {
-		logger.Debugf(ctx, "[Tool][KnowledgeSearch] Keyword queries: %v", keywordQueries)
-	}
+	logger.Infof(ctx, "[Tool][KnowledgeSearch] Queries: %v", queries)

-	// Parse thresholds
-	vectorThreshold := 0.6
-	if vt, ok := args["vector_threshold"].(float64); ok {
-		vectorThreshold = vt
-	}
+	// Get search parameters from tenant conversation config, fallback to global config
+	var topK int
+	var vectorThreshold, keywordThreshold, minScore float64

-	keywordThreshold := 0.5
-	if kt, ok := args["keyword_threshold"].(float64); ok {
-		keywordThreshold = kt
-	}
-
-	// Parse min_score for absolute filtering
-	minScore := 0.3
-	if ms, ok := args["min_score"].(float64); ok {
-		minScore = ms
-	}
-
-	// Parse top_k
-	topK := 5
-	if topKVal, ok := args["top_k"]; ok {
-		switch v := topKVal.(type) {
-		case float64:
-			topK = int(v)
-		case int:
-			topK = v
+	// Try to get from tenant conversation config
+	if tenantVal := ctx.Value(types.TenantInfoContextKey); tenantVal != nil {
+		if tenant, ok := tenantVal.(*types.Tenant); ok && tenant != nil && tenant.ConversationConfig != nil {
+			cc := tenant.ConversationConfig
+			if cc.EmbeddingTopK > 0 {
+				topK = cc.EmbeddingTopK
+			}
+			if cc.VectorThreshold > 0 {
+				vectorThreshold = cc.VectorThreshold
+			}
+			if cc.KeywordThreshold > 0 {
+				keywordThreshold = cc.KeywordThreshold
+			}
+			// minScore is not in ConversationConfig, use default or config
+			minScore = 0.3
 		}
 	}

+	// Fallback to global config if not set
+	if topK == 0 && t.config != nil {
+		topK = t.config.Conversation.EmbeddingTopK
+	}
+	if vectorThreshold == 0 && t.config != nil {
+		vectorThreshold = t.config.Conversation.VectorThreshold
+	}
+	if keywordThreshold == 0 && t.config != nil {
+		keywordThreshold = t.config.Conversation.KeywordThreshold
+	}
+
+	// Final fallback to hardcoded defaults if config is not available
+	if topK == 0 {
+		topK = 5
+	}
+	if vectorThreshold == 0 {
+		vectorThreshold = 0.6
+	}
+	if keywordThreshold == 0 {
+		keywordThreshold = 0.5
+	}
+	if minScore == 0 {
+		minScore = 0.3
+	}
+
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] Search params: top_k=%d, vector_threshold=%.2f, keyword_threshold=%.2f, min_score=%.2f",
 		topK, vectorThreshold, keywordThreshold, minScore)

@@ -330,11 +254,11 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
 		}
 	}

-	// Execute concurrent search
+	// Execute concurrent search (hybrid search handles both vector and keyword)
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] Starting concurrent search across %d KBs", len(kbIDs))
 	kbTypeMap := t.getKnowledgeBaseTypes(ctx, kbIDs)

-	allResults := t.concurrentSearch(ctx, vectorQueries, keywordQueries, kbIDs,
+	allResults := t.concurrentSearch(ctx, queries, kbIDs,
 		topK, vectorThreshold, keywordThreshold, kbTypeMap)
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] Concurrent search completed: %d raw results", len(allResults))

@@ -353,55 +277,54 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
 	}

 	// Filter by threshold first
-	logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying threshold filter...")
 	filteredResults := t.filterByThreshold(allResults, vectorThreshold, keywordThreshold)
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] After threshold filter: %d results (from %d)",
 		len(filteredResults), len(allResults))

+	// Deduplicate before reranking to reduce processing overhead
+	deduplicatedBeforeRerank := t.deduplicateResults(filteredResults)
+	logger.Infof(ctx, "[Tool][KnowledgeSearch] After deduplication before rerank: %d results (from %d)",
+		len(deduplicatedBeforeRerank), len(filteredResults))
+
 	// Apply ReRank if model is configured
 	// Prefer chatModel (LLM-based reranking) over rerankModel if both are available
-	if t.chatModel != nil && len(filteredResults) > 0 {
-		logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying LLM-based rerank with model: %s, input: %d results",
-			t.chatModel.GetModelName(), len(filteredResults))
-		rerankQuery := singleQuery
-		if rerankQuery == "" && len(vectorQueries) > 0 {
-			rerankQuery = vectorQueries[0] // Use first vector query as rerank query
-		} else if rerankQuery == "" && len(keywordQueries) > 0 {
-			rerankQuery = keywordQueries[0] // Use first keyword query as fallback
+	// Use first query for reranking (or combine all queries if needed)
+	rerankQuery := ""
+	if len(queries) > 0 {
+		rerankQuery = queries[0]
+		if len(queries) > 1 {
+			// Combine multiple queries for reranking
+			rerankQuery = strings.Join(queries, " ")
 		}
+	}

-		if rerankQuery != "" {
-			logger.Debugf(ctx, "[Tool][KnowledgeSearch] Rerank query: %s", rerankQuery)
-			rerankedResults, err := t.rerankResults(ctx, rerankQuery, filteredResults)
-			if err != nil {
-				logger.Warnf(ctx, "[Tool][KnowledgeSearch] LLM rerank failed, using original results: %v", err)
-			} else {
-				filteredResults = rerankedResults
-				logger.Infof(ctx, "[Tool][KnowledgeSearch] LLM rerank completed successfully: %d results",
-					len(filteredResults))
-			}
+	if t.chatModel != nil && len(deduplicatedBeforeRerank) > 0 && rerankQuery != "" {
+		logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying LLM-based rerank with model: %s, input: %d results, queries: %v",
+			t.chatModel.GetModelName(), len(deduplicatedBeforeRerank), queries)
+		rerankedResults, err := t.rerankResults(ctx, rerankQuery, deduplicatedBeforeRerank)
+		if err != nil {
+			logger.Warnf(ctx, "[Tool][KnowledgeSearch] LLM rerank failed, using original results: %v", err)
+			filteredResults = deduplicatedBeforeRerank
+		} else {
+			filteredResults = rerankedResults
+			logger.Infof(ctx, "[Tool][KnowledgeSearch] LLM rerank completed successfully: %d results",
+				len(filteredResults))
 		}
-	} else if t.rerankModel != nil && len(filteredResults) > 0 {
-		logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying rerank with model: %s, input: %d results",
-			t.rerankModel.GetModelName(), len(filteredResults))
-		rerankQuery := singleQuery
-		if rerankQuery == "" && len(vectorQueries) > 0 {
-			rerankQuery = vectorQueries[0] // Use first vector query as rerank query
-		} else if rerankQuery == "" && len(keywordQueries) > 0 {
-			rerankQuery = keywordQueries[0] // Use first keyword query as fallback
-		}
-
-		if rerankQuery != "" {
-			logger.Debugf(ctx, "[Tool][KnowledgeSearch] Rerank query: %s", rerankQuery)
-			rerankedResults, err := t.rerankResults(ctx, rerankQuery, filteredResults)
-			if err != nil {
-				logger.Warnf(ctx, "[Tool][KnowledgeSearch] Rerank failed, using original results: %v", err)
-			} else {
-				filteredResults = rerankedResults
-				logger.Infof(ctx, "[Tool][KnowledgeSearch] Rerank completed successfully: %d results",
-					len(filteredResults))
-			}
+	} else if t.rerankModel != nil && len(deduplicatedBeforeRerank) > 0 && rerankQuery != "" {
+		logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying rerank with model: %s, input: %d results, queries: %v",
+			t.rerankModel.GetModelName(), len(deduplicatedBeforeRerank), queries)
+		rerankedResults, err := t.rerankResults(ctx, rerankQuery, deduplicatedBeforeRerank)
+		if err != nil {
+			logger.Warnf(ctx, "[Tool][KnowledgeSearch] Rerank failed, using original results: %v", err)
+			filteredResults = deduplicatedBeforeRerank
+		} else {
+			filteredResults = rerankedResults
+			logger.Infof(ctx, "[Tool][KnowledgeSearch] Rerank completed successfully: %d results",
+				len(filteredResults))
 		}
+	} else {
+		// No reranking, use deduplicated results
+		filteredResults = deduplicatedBeforeRerank
 	}

 	// Apply absolute minimum score filter to remove very low quality chunks
@@ -409,44 +332,37 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
 	filteredResults = t.filterByMinScore(filteredResults, minScore)
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] After min_score filter: %d results", len(filteredResults))

-	logger.Debugf(ctx, "[Tool][KnowledgeSearch] Deduplicating results...")
+	// Final deduplication after rerank (in case rerank changed scores/order but duplicates remain)
+	logger.Debugf(ctx, "[Tool][KnowledgeSearch] Final deduplication after rerank...")
 	deduplicatedResults := t.deduplicateResults(filteredResults)
-	logger.Infof(ctx, "[Tool][KnowledgeSearch] After deduplication: %d results (from %d)",
+	logger.Infof(ctx, "[Tool][KnowledgeSearch] After final deduplication: %d results (from %d)",
 		len(deduplicatedResults), len(filteredResults))

 	// Sort results by score (descending)
-	logger.Debugf(ctx, "[Tool][KnowledgeSearch] Sorting results by score...")
 	sort.Slice(deduplicatedResults, func(i, j int) bool {
 		if deduplicatedResults[i].Score != deduplicatedResults[j].Score {
 			return deduplicatedResults[i].Score > deduplicatedResults[j].Score
 		}
-		// If scores are equal, prefer vector matches
-		if deduplicatedResults[i].QueryType != deduplicatedResults[j].QueryType {
-			return deduplicatedResults[i].QueryType == "vector"
-		}
+		// If scores are equal, sort by knowledge ID for consistency
 		return deduplicatedResults[i].KnowledgeID < deduplicatedResults[j].KnowledgeID
 	})

 	// Log top results
 	if len(deduplicatedResults) > 0 {
-		logger.Infof(ctx, "[Tool][KnowledgeSearch] Top 5 results by score:")
 		for i := 0; i < len(deduplicatedResults) && i < 5; i++ {
 			r := deduplicatedResults[i]
-			logger.Infof(ctx, "[Tool][KnowledgeSearch]   #%d: score=%.3f, type=%s, kb=%s, chunk_id=%s",
+			logger.Infof(ctx, "[Tool][KnowledgeSearch][Top %d] score=%.3f, type=%s, kb=%s, chunk_id=%s",
 				i+1, r.Score, r.QueryType, r.KnowledgeID, r.ID)
 		}
 	}

 	// Build output
 	logger.Infof(ctx, "[Tool][KnowledgeSearch] Formatting output with %d final results", len(deduplicatedResults))
-	result, err := t.formatOutput(ctx, deduplicatedResults, vectorQueries, keywordQueries,
-		kbIDs, len(allResults), vectorThreshold, keywordThreshold, knowledgeIDsFilter, singleQuery)
+	result, err := t.formatOutput(ctx, deduplicatedResults, kbIDs, len(allResults), knowledgeIDsFilter, queries)
 	if err != nil {
 		logger.Errorf(ctx, "[Tool][KnowledgeSearch] Failed to format output: %v", err)
 		return result, err
 	}
-
-	logger.Infof(ctx, "[Tool][KnowledgeSearch] Execute completed successfully")
 	return result, nil
 }

@@ -462,7 +378,7 @@ func (t *KnowledgeSearchTool) getKnowledgeBaseTypes(ctx context.Context, kbIDs [
 			continue
 		}

-		kb, err := t.knowledgeService.GetKnowledgeBaseByID(ctx, kbID)
+		kb, err := t.knowledgeBaseService.GetKnowledgeBaseByID(ctx, kbID)
 		if err != nil {
 			logger.Warnf(ctx, "[Tool][KnowledgeSearch] Failed to fetch knowledge base %s info: %v", kbID, err)
 			continue
@@ -474,57 +390,13 @@ func (t *KnowledgeSearchTool) getKnowledgeBaseTypes(ctx context.Context, kbIDs [
 	return kbTypeMap
 }

-// concurrentSearch executes vector and keyword searches concurrently
+// concurrentSearch executes hybrid search across multiple KBs concurrently
 func (t *KnowledgeSearchTool) concurrentSearch(
-	ctx context.Context,
-	vectorQueries, keywordQueries []string,
-	kbsToSearch []string,
-	topK int,
-	vectorThreshold, keywordThreshold float64,
-	kbTypeMap map[string]string,
-) []*searchResultWithMeta {
-	var wg sync.WaitGroup
-	var mu sync.Mutex
-	allResults := make([]*searchResultWithMeta, 0)
-
-	// Launch vector searches
-	if len(vectorQueries) > 0 {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			results := t.searchWithQueries(ctx, vectorQueries, kbsToSearch, topK,
-				vectorThreshold, 1.0, "vector", kbTypeMap)
-			mu.Lock()
-			allResults = append(allResults, results...)
-			mu.Unlock()
-		}()
-	}
-
-	// Launch keyword searches
-	if len(keywordQueries) > 0 {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			results := t.searchWithQueries(ctx, keywordQueries, kbsToSearch, topK,
-				1.0, keywordThreshold, "keyword", kbTypeMap)
-			mu.Lock()
-			allResults = append(allResults, results...)
-			mu.Unlock()
-		}()
-	}
-
-	wg.Wait()
-	return allResults
-}
-
-// searchWithQueries executes multiple queries concurrently
-func (t *KnowledgeSearchTool) searchWithQueries(
 	ctx context.Context,
 	queries []string,
 	kbsToSearch []string,
 	topK int,
 	vectorThreshold, keywordThreshold float64,
-	queryType string,
 	kbTypeMap map[string]string,
 ) []*searchResultWithMeta {
 	var wg sync.WaitGroup
@@ -532,72 +404,46 @@ func (t *KnowledgeSearchTool) searchWithQueries(
 	allResults := make([]*searchResultWithMeta, 0)

 	for _, query := range queries {
-		wg.Add(1)
-		go func(q string) {
-			defer wg.Done()
-			results := t.searchSingleQuery(ctx, q, kbsToSearch, topK,
-				vectorThreshold, keywordThreshold, queryType, kbTypeMap)
-			mu.Lock()
-			allResults = append(allResults, results...)
-			mu.Unlock()
-		}(query)
-	}
+		// Capture query in local variable to avoid closure issues
+		q := query
+		for _, kbID := range kbsToSearch {
+			// Capture kbID in local variable to avoid closure issues
+			kb := kbID
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				searchParams := types.SearchParams{
+					QueryText:        q,
+					MatchCount:       topK,
+					VectorThreshold:  vectorThreshold,
+					KeywordThreshold: keywordThreshold,
+				}
+				kbResults, err := t.knowledgeBaseService.HybridSearch(ctx, kb, searchParams)
+				if err != nil {
+					// Log error but continue with other KBs
+					logger.Warnf(ctx, "[Tool][KnowledgeSearch] Failed to search knowledge base %s: %v", kb, err)
+					return
+				}

+				// Wrap results with metadata
+				mu.Lock()
+				for _, r := range kbResults {
+					allResults = append(allResults, &searchResultWithMeta{
+						SearchResult:      r,
+						SourceQuery:       q,
+						QueryType:         "hybrid", // Hybrid search combines both vector and keyword
+						KnowledgeBaseID:   kb,
+						KnowledgeBaseType: kbTypeMap[kb],
+					})
+				}
+				mu.Unlock()
+			}()
+		}
+	}
 	wg.Wait()
 	return allResults
 }

-// searchSingleQuery searches a single query across multiple KBs concurrently
-func (t *KnowledgeSearchTool) searchSingleQuery(
-	ctx context.Context,
-	query string,
-	kbsToSearch []string,
-	topK int,
-	vectorThreshold, keywordThreshold float64,
-	queryType string,
-	kbTypeMap map[string]string,
-) []*searchResultWithMeta {
-	var wg sync.WaitGroup
-	var mu sync.Mutex
-	results := make([]*searchResultWithMeta, 0)
-
-	searchParams := types.SearchParams{
-		QueryText:        query,
-		MatchCount:       topK,
-		VectorThreshold:  vectorThreshold,
-		KeywordThreshold: keywordThreshold,
-	}
-
-	for _, kbID := range kbsToSearch {
-		wg.Add(1)
-		go func(kb string) {
-			defer wg.Done()
-
-			kbResults, err := t.knowledgeService.HybridSearch(ctx, kb, searchParams)
-			if err != nil {
-				// Log error but continue with other KBs
-				return
-			}
-
-			// Wrap results with metadata
-			mu.Lock()
-			for _, r := range kbResults {
-				results = append(results, &searchResultWithMeta{
-					SearchResult:      r,
-					SourceQuery:       query,
-					QueryType:         queryType,
-					KnowledgeBaseID:   kb,
-					KnowledgeBaseType: kbTypeMap[kb],
-				})
-			}
-			mu.Unlock()
-		}(kbID)
-	}
-
-	wg.Wait()
-	return results
-}
-
 // filterByThreshold filters results based on match type and threshold
 func (t *KnowledgeSearchTool) filterByThreshold(
 	results []*searchResultWithMeta,
@@ -1011,12 +857,10 @@ func (t *KnowledgeSearchTool) deduplicateResults(results []*searchResultWithMeta
 func (t *KnowledgeSearchTool) formatOutput(
 	ctx context.Context,
 	results []*searchResultWithMeta,
-	vectorQueries, keywordQueries []string,
 	kbsToSearch []string,
 	totalBeforeFilter int,
-	vectorThreshold, keywordThreshold float64,
 	knowledgeIDsFilter map[string]bool,
-	singleQuery string,
+	queries []string,
 ) (*types.ToolResult, error) {
 	if len(results) == 0 {
 		data := map[string]interface{}{
@@ -1031,8 +875,8 @@ func (t *KnowledgeSearchTool) formatOutput(
 			}
 			data["knowledge_ids"] = filterList
 		}
-		if singleQuery != "" {
-			data["query"] = singleQuery
+		if len(queries) > 0 {
+			data["queries"] = queries
 		}
 		return &types.ToolResult{
 			Success: true,
@@ -1041,14 +885,6 @@ func (t *KnowledgeSearchTool) formatOutput(
 		}, nil
 	}

-	// Determine search mode
-	searchMode := "Hybrid (Vector + Keyword)"
-	if len(vectorQueries) > 0 && len(keywordQueries) == 0 {
-		searchMode = "Vector"
-	} else if len(vectorQueries) == 0 && len(keywordQueries) > 0 {
-		searchMode = "Keyword"
-	}
-
 	// Build output header
 	output := "=== Search Results ===\n"
 	output += fmt.Sprintf("Knowledge Bases: %v\n", kbsToSearch)
@@ -1059,22 +895,13 @@ func (t *KnowledgeSearchTool) formatOutput(
 		}
 		output += fmt.Sprintf("Document Filter: %v\n", filterList)
 	}
-	output += fmt.Sprintf("Search Mode: %s\n", searchMode)
-
-	if singleQuery != "" {
-		output += fmt.Sprintf("Query: %s\n", singleQuery)
-	} else {
-		if len(vectorQueries) > 0 {
-			output += fmt.Sprintf("Vector Queries: %v\n", vectorQueries)
-			output += fmt.Sprintf("Vector Threshold: %.2f\n", vectorThreshold)
-		}
-		if len(keywordQueries) > 0 {
-			output += fmt.Sprintf("Keyword Queries: %v\n", keywordQueries)
-			output += fmt.Sprintf("Keyword Threshold: %.2f\n", keywordThreshold)
-		}
+	if len(queries) == 1 {
+		output += fmt.Sprintf("Query: %s\n", queries[0])
+	} else if len(queries) > 1 {
+		output += fmt.Sprintf("Queries (%d): %v\n", len(queries), queries)
 	}

-	output += fmt.Sprintf("Found %d relevant results (deduplicated)", len(results))
+	output += fmt.Sprintf("Found %d relevant results", len(results))
 	if totalBeforeFilter > len(results) {
 		output += fmt.Sprintf(" (filtered from %d)", totalBeforeFilter)
 	}
@@ -1118,16 +945,9 @@ func (t *KnowledgeSearchTool) formatOutput(
 			output += fmt.Sprintf("[Source Document: %s]\n", result.KnowledgeTitle)
 		}

-		relevanceLevel := GetRelevanceLevel(result.Score)
+		// relevanceLevel := GetRelevanceLevel(result.Score)
 		output += fmt.Sprintf("\nResult #%d:\n", i+1)
-		output += fmt.Sprintf("  Relevance: %.2f (%s)\n", result.Score, relevanceLevel)
-		output += fmt.Sprintf("  Match Type: %s", FormatMatchType(result.MatchType))
-		if result.SourceQuery != "" && result.SourceQuery != singleQuery {
-			output += fmt.Sprintf(" (Query: \"%s\")", result.SourceQuery)
-		}
-		output += "\n"
-		output += fmt.Sprintf("  Content: %s\n", result.Content)
-		output += fmt.Sprintf("  [chunk_id: %s - full content included above]\n", result.ID)
+		output += fmt.Sprintf("  [chunk_id: %s][chunk_index: %d]\nContent: %s\n", result.ID, result.ChunkIndex, result.Content)

 		if faqMeta != nil {
 			if faqMeta.StandardQuestion != "" {
@@ -1139,17 +959,17 @@ func (t *KnowledgeSearchTool) formatOutput(
 			if len(faqMeta.Answers) > 0 {
 				output += "  FAQ Answers:\n"
 				for _, ans := range faqMeta.Answers {
-					output += fmt.Sprintf("    - %s\n", ans)
+					output += fmt.Sprintf("    Answer Choice %d: %s\n", i+1, ans)
 				}
 			}
 		}

 		formattedResults = append(formattedResults, map[string]interface{}{
-			"result_index":        i + 1,
-			"chunk_id":            result.ID,
-			"content":             result.Content,
-			"score":               result.Score,
-			"relevance_level":     relevanceLevel,
+			"result_index": i + 1,
+			"chunk_id":     result.ID,
+			"content":      result.Content,
+			// "score":        result.Score,
+			// "relevance_level":     relevanceLevel,
 			"knowledge_id":        result.KnowledgeID,
 			"knowledge_title":     result.KnowledgeTitle,
 			"match_type":          result.MatchType,
@@ -1182,14 +1002,13 @@ func (t *KnowledgeSearchTool) formatOutput(
 	// }
 	// output += "- Full content is already included in search results above\n"
 	// output += "- Results are deduplicated across knowledge bases and sorted by relevance\n"
-	// output += "- Use get_related_chunks to expand context if needed\n"
+	// output += "- Use list_knowledge_chunks to expand context if needed\n"

 	data := map[string]interface{}{
 		"knowledge_base_ids": kbsToSearch,
 		"results":            formattedResults,
 		"count":              len(results),
 		"kb_counts":          kbCounts,
-		"search_mode":        searchMode,
 		"display_type":       "search_results",
 	}
 	if len(knowledgeIDsFilter) > 0 {
@@ -1199,14 +1018,8 @@ func (t *KnowledgeSearchTool) formatOutput(
 		}
 		data["knowledge_ids"] = filterList
 	}
-	if singleQuery != "" {
-		data["query"] = singleQuery
-	}
-	if len(vectorQueries) > 0 {
-		data["vector_queries"] = vectorQueries
-	}
-	if len(keywordQueries) > 0 {
-		data["keyword_queries"] = keywordQueries
+	if len(queries) > 0 {
+		data["queries"] = queries
 	}
 	if totalBeforeFilter > len(results) {
 		data["total_before_filter"] = totalBeforeFilter
--- a/internal/agent/tools/list_knowledge_chunks.go
+++ b/internal/agent/tools/list_knowledge_chunks.go
@@ -0,0 +1,230 @@
+package tools
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/Tencent/WeKnora/internal/types"
+	"github.com/Tencent/WeKnora/internal/types/interfaces"
+)
+
+// ListKnowledgeChunksTool retrieves chunk snapshots for a specific knowledge document.
+type ListKnowledgeChunksTool struct {
+	BaseTool
+	tenantID         uint
+	chunkService     interfaces.ChunkService
+	knowledgeService interfaces.KnowledgeService
+}
+
+// NewListKnowledgeChunksTool creates a new tool instance.
+func NewListKnowledgeChunksTool(
+	tenantID uint,
+	knowledgeService interfaces.KnowledgeService,
+	chunkService interfaces.ChunkService,
+) *ListKnowledgeChunksTool {
+	description := `Retrieve paged chunks for a document (knowledge) by knowledge_id.
+
+## When to Use
+
+- Need deterministic chunk previews for a known document
+- Want to quickly confirm how many chunks a document contains
+- Require surrounding context around a chunk_index returned by search results
+- Need content snippets without running an additional search query
+
+Avoid when:
+- You don't know the knowledge_id (use knowledge_search first)
+
+## Parameters
+
+- knowledge_id (required): Target document/knowledge ID
+- limit (optional): Number of chunks to fetch (default 20, max 100).
+- offset (optional): Offset to start fetching chunks from (default 0).`
+
+	return &ListKnowledgeChunksTool{
+		BaseTool:         NewBaseTool("list_knowledge_chunks", description),
+		tenantID:         tenantID,
+		chunkService:     chunkService,
+		knowledgeService: knowledgeService,
+	}
+}
+
+// Parameters returns the JSON schema describing accepted arguments.
+func (t *ListKnowledgeChunksTool) Parameters() map[string]interface{} {
+	return map[string]interface{}{
+		"type": "object",
+		"properties": map[string]interface{}{
+			"knowledge_id": map[string]interface{}{
+				"type":        "string",
+				"description": "Knowledge/document ID to inspect",
+			},
+			"limit": map[string]interface{}{
+				"type":        "integer",
+				"description": "Number of chunks to fetch (default 20, max 100)",
+				"default":     20,
+				"minimum":     1,
+				"maximum":     100,
+			},
+			"offset": map[string]interface{}{
+				"type":        "integer",
+				"description": "Offset to start fetching chunks from (default 0)",
+				"default":     0,
+				"minimum":     0,
+			},
+		},
+		"required": []string{"knowledge_id", "limit", "offset"},
+	}
+}
+
+// Execute performs the chunk fetch against the chunk service.
+func (t *ListKnowledgeChunksTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
+	knowledgeID, ok := args["knowledge_id"].(string)
+	if !ok || strings.TrimSpace(knowledgeID) == "" {
+		return &types.ToolResult{
+			Success: false,
+			Error:   "knowledge_id is required",
+		}, fmt.Errorf("knowledge_id is required")
+	}
+	knowledgeID = strings.TrimSpace(knowledgeID)
+
+	chunkLimit := 20
+	offset := 0
+	if rawLimit, exists := args["limit"]; exists {
+		switch v := rawLimit.(type) {
+		case float64:
+			chunkLimit = int(v)
+		case int:
+			chunkLimit = v
+		}
+	}
+	if rawOffset, exists := args["offset"]; exists {
+		switch v := rawOffset.(type) {
+		case float64:
+			offset = int(v)
+		case int:
+			offset = v
+		}
+	}
+	if offset < 0 {
+		offset = 0
+	}
+
+	pagination := &types.Pagination{
+		Page:     offset/chunkLimit + 1,
+		PageSize: chunkLimit,
+	}
+
+	chunks, total, err := t.chunkService.GetRepository().ListPagedChunksByKnowledgeID(ctx,
+		t.tenantID, knowledgeID, pagination, []types.ChunkType{types.ChunkTypeText}, "")
+	if err != nil {
+		return &types.ToolResult{
+			Success: false,
+			Error:   fmt.Sprintf("failed to list chunks: %v", err),
+		}, err
+	}
+	if chunks == nil {
+		return &types.ToolResult{
+			Success: false,
+			Error:   "chunk query returned no data",
+		}, fmt.Errorf("chunk query returned no data")
+	}
+
+	totalChunks := total
+	fetched := len(chunks)
+
+	knowledgeTitle := t.lookupKnowledgeTitle(ctx, knowledgeID)
+
+	output := t.buildOutput(knowledgeID, knowledgeTitle, totalChunks, fetched, chunkLimit, chunks)
+
+	formattedChunks := make([]map[string]interface{}, 0, len(chunks))
+	for idx, c := range chunks {
+		formattedChunks = append(formattedChunks, map[string]interface{}{
+			"seq":             idx + 1,
+			"chunk_id":        c.ID,
+			"chunk_index":     c.ChunkIndex,
+			"content":         c.Content,
+			"chunk_type":      c.ChunkType,
+			"knowledge_id":    c.KnowledgeID,
+			"knowledge_base":  c.KnowledgeBaseID,
+			"start_at":        c.StartAt,
+			"end_at":          c.EndAt,
+			"parent_chunk_id": c.ParentChunkID,
+		})
+	}
+
+	return &types.ToolResult{
+		Success: true,
+		Output:  output,
+		Data: map[string]interface{}{
+			"knowledge_id":    knowledgeID,
+			"knowledge_title": knowledgeTitle,
+			"total_chunks":    totalChunks,
+			"fetched_chunks":  fetched,
+			"page":            pagination.Page,
+			"page_size":       pagination.PageSize,
+			"chunks":          formattedChunks,
+		},
+	}, nil
+}
+
+func (t *ListKnowledgeChunksTool) lookupKnowledgeTitle(ctx context.Context, knowledgeID string) string {
+	if t.knowledgeService == nil {
+		return ""
+	}
+	knowledge, err := t.knowledgeService.GetKnowledgeByID(ctx, knowledgeID)
+	if err != nil || knowledge == nil {
+		return ""
+	}
+	return strings.TrimSpace(knowledge.Title)
+}
+
+func (t *ListKnowledgeChunksTool) buildOutput(
+	knowledgeID string,
+	knowledgeTitle string,
+	total int64,
+	fetched int,
+	chunkLimit int,
+	chunks []*types.Chunk,
+) string {
+	builder := &strings.Builder{}
+	builder.WriteString("=== 知识文档分块 ===\n\n")
+
+	if knowledgeTitle != "" {
+		builder.WriteString(fmt.Sprintf("文档: %s (%s)\n", knowledgeTitle, knowledgeID))
+	} else {
+		builder.WriteString(fmt.Sprintf("文档 ID: %s\n", knowledgeID))
+	}
+	builder.WriteString(fmt.Sprintf("总分块数: %d\n", total))
+	builder.WriteString(fmt.Sprintf("本次拉取: %d 条（offset=%d）\n\n", fetched, chunkLimit))
+
+	if fetched == 0 {
+		builder.WriteString("未找到任何分块，请确认文档是否已完成解析。\n")
+		if total > 0 {
+			builder.WriteString("文档存在但当前页数据为空，请检查分页参数。\n")
+		}
+		return builder.String()
+	}
+
+	builder.WriteString("=== 分块内容预览 ===\n\n")
+	for idx, c := range chunks {
+		builder.WriteString(fmt.Sprintf("Chunk #%d (Index %d)\n", idx+1, c.ChunkIndex+1))
+		builder.WriteString(fmt.Sprintf("  chunk_id: %s\n", c.ID))
+		builder.WriteString(fmt.Sprintf("  类型: %s\n", c.ChunkType))
+		builder.WriteString(fmt.Sprintf("  内容: %s\n\n", summarizeContent(c.Content)))
+	}
+
+	if int64(fetched) < total {
+		builder.WriteString("提示：文档仍有更多分块，可调整 offset 或多次调用以获取全部内容。\n")
+	}
+
+	return builder.String()
+}
+
+func summarizeContent(content string) string {
+	cleaned := strings.TrimSpace(content)
+	if cleaned == "" {
+		return "(空内容)"
+	}
+
+	return strings.TrimSpace(string(cleaned))
+}
--- a/internal/agent/tools/query_knowledge_graph.go
+++ b/internal/agent/tools/query_knowledge_graph.go
@@ -56,7 +56,7 @@ func NewQueryKnowledgeGraphTool(knowledgeService interfaces.KnowledgeBaseService
 ## 配合使用

 1. **关系探索**: query_knowledge_graph → get_chunk_detail（查看详细内容）
-2. **网络分析**: query_knowledge_graph → get_related_chunks（扩展上下文）
+2. **网络分析**: query_knowledge_graph → list_knowledge_chunks（扩展上下文）
 3. **主题研究**: knowledge_search → query_knowledge_graph（深入实体关系）

 ## 当前状态
@@ -360,7 +360,7 @@ func (t *QueryKnowledgeGraphTool) Execute(ctx context.Context, args map[string]i
 	output += "=== 💡 使用提示 ===\n"
 	output += "- ✓ 结果已跨知识库去重并按相关度排序\n"
 	output += "- ✓ 使用 get_chunk_detail 获取完整内容\n"
-	output += "- ✓ 使用 get_related_chunks 探索上下文\n"
+	output += "- ✓ 使用 list_knowledge_chunks 探索上下文\n"
 	if !hasGraphConfig {
 		output += "- ⚠️ 配置图谱抽取以获得更精准的实体关系结果\n"
 	}
--- a/internal/agent/tools/registry.go
+++ b/internal/agent/tools/registry.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"

+	"github.com/Tencent/WeKnora/internal/common"
 	"github.com/Tencent/WeKnora/internal/types"
 	"github.com/Tencent/WeKnora/internal/types/interfaces"
 	"gorm.io/gorm"
@@ -11,26 +12,23 @@ import (

 // ToolRegistry manages the registration and retrieval of tools
 type ToolRegistry struct {
-	tools                map[string]types.Tool
-	knowledgeBaseService interfaces.KnowledgeBaseService
-	knowledgeService     interfaces.KnowledgeService
-	chunkService         interfaces.ChunkService
-	db                   *gorm.DB // gorm.DB interface for database query tool
+	tools            map[string]types.Tool
+	knowledgeService interfaces.KnowledgeService
+	chunkService     interfaces.ChunkService
+	db               *gorm.DB
 }

 // NewToolRegistry creates a new tool registry
 func NewToolRegistry(
-	knowledgeBaseService interfaces.KnowledgeBaseService,
 	knowledgeService interfaces.KnowledgeService,
 	chunkService interfaces.ChunkService,
 	db *gorm.DB, // gorm.DB for database operations
 ) *ToolRegistry {
 	return &ToolRegistry{
-		tools:                make(map[string]types.Tool),
-		knowledgeBaseService: knowledgeBaseService,
-		knowledgeService:     knowledgeService,
-		chunkService:         chunkService,
-		db:                   db,
+		tools:            make(map[string]types.Tool),
+		knowledgeService: knowledgeService,
+		chunkService:     chunkService,
+		db:               db,
 	}
 }

@@ -72,13 +70,41 @@ func (r *ToolRegistry) GetFunctionDefinitions() []types.FunctionDefinition {

 // ExecuteTool executes a tool by name with the given arguments
 func (r *ToolRegistry) ExecuteTool(ctx context.Context, name string, args map[string]interface{}) (*types.ToolResult, error) {
+	common.PipelineInfo(ctx, "AgentTool", "execute_start", map[string]interface{}{
+		"tool": name,
+		"args": args,
+	})
 	tool, err := r.GetTool(name)
 	if err != nil {
+		common.PipelineError(ctx, "AgentTool", "execute_failed", map[string]interface{}{
+			"tool":  name,
+			"error": err.Error(),
+		})
 		return &types.ToolResult{
 			Success: false,
 			Error:   err.Error(),
 		}, err
 	}

-	return tool.Execute(ctx, args)
+	result, execErr := tool.Execute(ctx, args)
+	fields := map[string]interface{}{
+		"tool": name,
+		"args": args,
+	}
+	if result != nil {
+		fields["success"] = result.Success
+		if result.Error != "" {
+			fields["error"] = result.Error
+		}
+	}
+	if execErr != nil {
+		fields["error"] = execErr.Error()
+		common.PipelineError(ctx, "AgentTool", "execute_done", fields)
+	} else if result != nil && !result.Success {
+		common.PipelineWarn(ctx, "AgentTool", "execute_done", fields)
+	} else {
+		common.PipelineInfo(ctx, "AgentTool", "execute_done", fields)
+	}
+
+	return result, execErr
 }
--- a/internal/agent/tools/sequentialthinking.go
+++ b/internal/agent/tools/sequentialthinking.go
@@ -121,7 +121,7 @@ func (t *SequentialThinkingTool) Parameters() map[string]interface{} {
 			"totalThoughts": map[string]interface{}{
 				"type":        "integer",
 				"description": "Estimated total thoughts needed (numeric value, e.g., 5, 10)",
-				"minimum":     1,
+				"minimum":     5,
 			},
 			"isRevision": map[string]interface{}{
 				"type":        "boolean",
@@ -188,6 +188,8 @@ func (t *SequentialThinkingTool) Execute(ctx context.Context, args map[string]in
 		branchKeys = append(branchKeys, k)
 	}

+	incomplete := thoughtData.NextThoughtNeeded || thoughtData.NeedsMoreThoughts || thoughtData.ThoughtNumber < thoughtData.TotalThoughts
+
 	responseData := map[string]interface{}{
 		"thought_number":         thoughtData.ThoughtNumber,
 		"total_thoughts":         thoughtData.TotalThoughts,
@@ -196,13 +198,19 @@ func (t *SequentialThinkingTool) Execute(ctx context.Context, args map[string]in
 		"thought_history_length": len(t.thoughtHistory),
 		"display_type":           "thinking",
 		"thought":                thoughtData.Thought,
+		"incomplete_steps":       incomplete,
 	}

 	logger.Infof(ctx, "[Tool][SequentialThinking] Execute completed - Thought %d/%d", thoughtData.ThoughtNumber, thoughtData.TotalThoughts)

+	outputMsg := "Thought process recorded"
+	if incomplete {
+		outputMsg = "Thought process recorded - unfinished steps remain, continue exploring and calling tools"
+	}
+
 	return &types.ToolResult{
 		Success: true,
-		Output:  "Thought process recorded",
+		Output:  outputMsg,
 		Data:    responseData,
 	}, nil
 }
--- a/internal/agent/tools/todo_write.go
+++ b/internal/agent/tools/todo_write.go
@@ -171,7 +171,7 @@ func (t *TodoWriteTool) Parameters() map[string]interface{} {
 						},
 						"tools_to_use": map[string]interface{}{
 							"type":        "array",
-							"description": "Suggested tools for this step (e.g., ['knowledge_search', 'get_related_chunks'])",
+							"description": "Suggested tools for this step (e.g., ['knowledge_search', 'list_knowledge_chunks'])",
 							"items": map[string]interface{}{
 								"type": "string",
 							},
@@ -268,8 +268,8 @@ func generatePlanOutput(task string, steps []PlanStep) string {
 		output += "注意：未提供具体步骤。建议创建3-7个结构化步骤以系统化研究。\n\n"
 		output += "建议的通用流程：\n"
 		output += "1. 使用 knowledge_search 进行初步信息收集\n"
-		output += "2. 使用 get_related_chunks 获取关键信息详情\n"
-		output += "3. 使用 get_related_chunks 扩展上下文理解\n"
+		output += "2. 使用 list_knowledge_chunks 获取关键信息详情\n"
+		output += "3. 使用 list_knowledge_chunks 扩展上下文理解\n"
 		output += "4. 使用 think 工具评估结果并综合答案\n"
 		return output
 	}
--- a/internal/application/service/agent_service.go
+++ b/internal/application/service/agent_service.go
@@ -23,14 +23,14 @@ const MAX_ITERATIONS = 30 // Max iterations for agent execution
 type agentService struct {
 	cfg                  *config.Config
 	modelService         interfaces.ModelService
-	knowledgeBaseService interfaces.KnowledgeBaseService
-	knowledgeService     interfaces.KnowledgeService
-	chunkService         interfaces.ChunkService
 	mcpServiceService    interfaces.MCPServiceService
 	mcpManager           *mcp.MCPManager
 	eventBus             *event.EventBus
 	db                   *gorm.DB
 	webSearchService     interfaces.WebSearchService
+	knowledgeBaseService interfaces.KnowledgeBaseService
+	knowledgeService     interfaces.KnowledgeService
+	chunkService         interfaces.ChunkService
 }

 // NewAgentService creates a new agent service
@@ -95,7 +95,7 @@ func (s *agentService) CreateAgentEngine(
 	}

 	// Create tool registry
-	toolRegistry := tools.NewToolRegistry(s.knowledgeBaseService, s.knowledgeService, s.chunkService, s.db)
+	toolRegistry := tools.NewToolRegistry(s.knowledgeService, s.chunkService, s.db)

 	// Register tools
 	if err := s.registerTools(ctx, toolRegistry, config, rerankModel, chatModel, sessionID, sessionService); err != nil {
@@ -148,9 +148,9 @@ func (s *agentService) CreateAgentEngine(
 		}
 	}

-	systemPromptTemplate := agent.DefaultSystemPromptTemplate
-	if config.UseCustomSystemPrompt && config.SystemPrompt != "" {
-		systemPromptTemplate = config.SystemPrompt
+	systemPromptTemplate := ""
+	if config.UseCustomSystemPrompt {
+		systemPromptTemplate = config.ResolveSystemPrompt(config.WebSearchEnabled)
 	}

 	// Create engine with provided EventBus and contextManager
@@ -158,7 +158,6 @@ func (s *agentService) CreateAgentEngine(
 		config,
 		chatModel,
 		toolRegistry,
-		s.knowledgeBaseService,
 		eventBus,
 		kbInfos,
 		contextManager,
@@ -182,10 +181,7 @@ func (s *agentService) registerTools(
 	// If no specific tools allowed, register default tools
 	allowedTools := config.AllowedTools
 	if len(allowedTools) == 0 {
-		// Register default tools from config
-		if s.cfg.Agent != nil && len(s.cfg.Agent.DefaultTools) > 0 {
-			allowedTools = s.cfg.Agent.DefaultTools
-		}
+		allowedTools = tools.DefaultAllowedTools()
 	}
 	// If web search is enabled, add web_search to allowedTools
 	if config.WebSearchEnabled {
@@ -216,13 +212,14 @@ func (s *agentService) registerTools(
 					config.KnowledgeBases,
 					rerankModel,
 					chatModel,
+					s.cfg,
 				))
-		case "get_related_chunks":
-			registry.RegisterTool(tools.NewGetRelatedChunksTool(s.chunkService, s.knowledgeBaseService))
+		case "list_knowledge_chunks":
+			registry.RegisterTool(tools.NewListKnowledgeChunksTool(tenantID, s.knowledgeService, s.chunkService))
 		case "query_knowledge_graph":
 			registry.RegisterTool(tools.NewQueryKnowledgeGraphTool(s.knowledgeBaseService))
 		case "get_document_info":
-			registry.RegisterTool(tools.NewGetDocumentInfoTool(s.knowledgeService, s.chunkService))
+			registry.RegisterTool(tools.NewGetDocumentInfoTool(tenantID, s.knowledgeService, s.chunkService))
 		case "database_query":
 			registry.RegisterTool(tools.NewDatabaseQueryTool(s.db, tenantID))
 		case "web_search":
--- a/internal/application/service/chat_pipline/chat_completion.go
+++ b/internal/application/service/chat_pipline/chat_completion.go
@@ -66,7 +66,7 @@ func (p *PluginChatCompletion) OnEvent(
 	}

 	pipelineInfo(ctx, "Completion", "output", map[string]interface{}{
-		"answer_preview":    truncateForLog(chatResponse.Content),
+		"answer_preview":    chatResponse.Content,
 		"finish_reason":     chatResponse.FinishReason,
 		"completion_tokens": chatResponse.Usage.CompletionTokens,
 		"prompt_tokens":     chatResponse.Usage.PromptTokens,
--- a/internal/application/service/chat_pipline/common.go
+++ b/internal/application/service/chat_pipline/common.go
@@ -2,87 +2,24 @@ package chatpipline

 import (
 	"context"
-	"fmt"
-	"sort"
-	"strconv"
-	"strings"

+	"github.com/Tencent/WeKnora/internal/common"
 	"github.com/Tencent/WeKnora/internal/logger"
 	"github.com/Tencent/WeKnora/internal/models/chat"
 	"github.com/Tencent/WeKnora/internal/types"
 	"github.com/Tencent/WeKnora/internal/types/interfaces"
 )

-const (
-	logValueMaxRune     = 300
-	defaultStageName    = "PIPELINE"
-	defaultActionName   = "info"
-	pipelineLogPrefix   = "[PIPELINE]"
-	pipelineTruncateEll = "..."
-)
-
-func pipelineLog(stage, action string, fields map[string]interface{}) string {
-	if stage == "" {
-		stage = defaultStageName
-	}
-	if action == "" {
-		action = defaultActionName
-	}
-
-	builder := strings.Builder{}
-	builder.Grow(128)
-	builder.WriteString(pipelineLogPrefix)
-	builder.WriteString(" stage=")
-	builder.WriteString(stage)
-	builder.WriteString(" action=")
-	builder.WriteString(action)
-
-	if len(fields) > 0 {
-		keys := make([]string, 0, len(fields))
-		for k := range fields {
-			keys = append(keys, k)
-		}
-		sort.Strings(keys)
-		for _, key := range keys {
-			builder.WriteString(" ")
-			builder.WriteString(key)
-			builder.WriteString("=")
-			builder.WriteString(formatLogValue(fields[key]))
-		}
-	}
-	return builder.String()
-}
-
 func pipelineInfo(ctx context.Context, stage, action string, fields map[string]interface{}) {
-	logger.GetLogger(ctx).Info(pipelineLog(stage, action, fields))
+	common.PipelineInfo(ctx, stage, action, fields)
 }

 func pipelineWarn(ctx context.Context, stage, action string, fields map[string]interface{}) {
-	logger.GetLogger(ctx).Warn(pipelineLog(stage, action, fields))
+	common.PipelineWarn(ctx, stage, action, fields)
 }

 func pipelineError(ctx context.Context, stage, action string, fields map[string]interface{}) {
-	logger.GetLogger(ctx).Error(pipelineLog(stage, action, fields))
-}
-
-func formatLogValue(value interface{}) string {
-	switch v := value.(type) {
-	case string:
-		return strconv.Quote(truncateForLog(v))
-	case fmt.Stringer:
-		return strconv.Quote(truncateForLog(v.String()))
-	default:
-		return fmt.Sprintf("%v", v)
-	}
-}
-
-func truncateForLog(content string) string {
-	content = strings.ReplaceAll(content, "\n", "\\n")
-	runes := []rune(content)
-	if len(runes) <= logValueMaxRune {
-		return content
-	}
-	return string(runes[:logValueMaxRune]) + pipelineTruncateEll
+	common.PipelineError(ctx, stage, action, fields)
 }

 // prepareChatModel shared logic to prepare chat model and options
--- a/internal/application/service/chat_pipline/merge.go
+++ b/internal/application/service/chat_pipline/merge.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"sort"
+	"strings"

 	"github.com/Tencent/WeKnora/internal/types"
 	"github.com/Tencent/WeKnora/internal/types/interfaces"
@@ -139,6 +140,7 @@ func (p *PluginMerge) OnEvent(ctx context.Context,
 		"merged_total": len(mergedChunks),
 	})

+	mergedChunks = p.populateFAQAnswers(ctx, chatManage, mergedChunks)
 	mergedChunks = p.expandShortContextWithNeighbors(ctx, chatManage, mergedChunks)

 	chatManage.MergeResult = mergedChunks
@@ -210,6 +212,126 @@ func mergeImageInfo(ctx context.Context, target *types.SearchResult, source *typ
 	return nil
 }

+func (p *PluginMerge) populateFAQAnswers(ctx context.Context, chatManage *types.ChatManage, results []*types.SearchResult) []*types.SearchResult {
+	if len(results) == 0 || p.chunkRepo == nil {
+		return results
+	}
+
+	tenantID, _ := ctx.Value(types.TenantIDContextKey).(uint)
+	if tenantID == 0 && chatManage != nil {
+		tenantID = chatManage.TenantID
+	}
+	if tenantID == 0 {
+		pipelineWarn(ctx, "Merge", "faq_enrich_skip", map[string]interface{}{
+			"reason": "missing_tenant",
+		})
+		return results
+	}
+
+	chunkResultMap := make(map[string][]*types.SearchResult)
+	chunkIDSet := make(map[string]struct{})
+	for _, r := range results {
+		if r == nil || r.ID == "" {
+			continue
+		}
+		if r.ChunkType != string(types.ChunkTypeFAQ) {
+			continue
+		}
+		chunkResultMap[r.ID] = append(chunkResultMap[r.ID], r)
+		if _, exists := chunkIDSet[r.ID]; !exists {
+			chunkIDSet[r.ID] = struct{}{}
+		}
+	}
+
+	if len(chunkIDSet) == 0 {
+		return results
+	}
+
+	chunkIDs := make([]string, 0, len(chunkIDSet))
+	for id := range chunkIDSet {
+		chunkIDs = append(chunkIDs, id)
+	}
+
+	chunks, err := p.chunkRepo.ListChunksByID(ctx, tenantID, chunkIDs)
+	if err != nil {
+		pipelineWarn(ctx, "Merge", "faq_chunk_fetch_failed", map[string]interface{}{
+			"error": err.Error(),
+		})
+		return results
+	}
+
+	updated := 0
+	for _, chunk := range chunks {
+		if chunk == nil {
+			continue
+		}
+		meta, err := chunk.FAQMetadata()
+		if err != nil || meta == nil {
+			if err != nil {
+				pipelineWarn(ctx, "Merge", "faq_metadata_parse_failed", map[string]interface{}{
+					"chunk_id": chunk.ID,
+					"error":    err.Error(),
+				})
+			}
+			continue
+		}
+		content := buildFAQAnswerContent(meta)
+		if content == "" {
+			continue
+		}
+		for _, r := range chunkResultMap[chunk.ID] {
+			if r == nil {
+				continue
+			}
+			r.Content = content
+			updated++
+		}
+	}
+
+	if updated > 0 {
+		pipelineInfo(ctx, "Merge", "faq_content_enriched", map[string]interface{}{
+			"chunk_cnt": updated,
+		})
+	}
+	return results
+}
+
+func buildFAQAnswerContent(meta *types.FAQChunkMetadata) string {
+	if meta == nil {
+		return ""
+	}
+
+	question := strings.TrimSpace(meta.StandardQuestion)
+	answers := make([]string, 0, len(meta.Answers))
+	for _, ans := range meta.Answers {
+		if trimmed := strings.TrimSpace(ans); trimmed != "" {
+			answers = append(answers, trimmed)
+		}
+	}
+
+	if question == "" && len(answers) == 0 {
+		return ""
+	}
+
+	var builder strings.Builder
+	if question != "" {
+		builder.WriteString("Q: ")
+		builder.WriteString(question)
+		builder.WriteString("\n")
+	}
+
+	if len(answers) > 0 {
+		builder.WriteString("Answer:\n")
+		for _, ans := range answers {
+			builder.WriteString("- ")
+			builder.WriteString(ans)
+			builder.WriteString("\n")
+		}
+	}
+
+	return strings.TrimSpace(builder.String())
+}
+
 func (p *PluginMerge) expandShortContextWithNeighbors(ctx context.Context, chatManage *types.ChatManage, results []*types.SearchResult) []*types.SearchResult {
 	const (
 		minLen = 350
--- a/internal/application/service/chat_pipline/preprocess.go
+++ b/internal/application/service/chat_pipline/preprocess.go
@@ -1,26 +1,26 @@
 package chatpipline

 import (
-    "context"
-    "encoding/json"
-    "regexp"
-    "strings"
-    "unicode"
-    "unicode/utf8"
+	"context"
+	"encoding/json"
+	"regexp"
+	"strings"
+	"unicode"
+	"unicode/utf8"

-    "github.com/Tencent/WeKnora/internal/config"
-    "github.com/Tencent/WeKnora/internal/models/chat"
-    "github.com/Tencent/WeKnora/internal/types"
-    "github.com/Tencent/WeKnora/internal/types/interfaces"
-    "github.com/yanyiwu/gojieba"
+	"github.com/Tencent/WeKnora/internal/config"
+	"github.com/Tencent/WeKnora/internal/models/chat"
+	"github.com/Tencent/WeKnora/internal/types"
+	"github.com/Tencent/WeKnora/internal/types/interfaces"
+	"github.com/yanyiwu/gojieba"
 )

 // PluginPreprocess Query preprocessing plugin
 type PluginPreprocess struct {
-    config    *config.Config
-    jieba     *gojieba.Jieba
-    stopwords map[string]struct{}
-    modelService interfaces.ModelService
+	config       *config.Config
+	jieba        *gojieba.Jieba
+	stopwords    map[string]struct{}
+	modelService interfaces.ModelService
 }

 // Regular expressions for text cleaning
@@ -35,10 +35,10 @@ const maxProcessedTokens = 12

 // NewPluginPreprocess Creates a new query preprocessing plugin
 func NewPluginPreprocess(
-    eventManager *EventManager,
-    config *config.Config,
-    cleaner interfaces.ResourceCleaner,
-    modelService interfaces.ModelService,
+	eventManager *EventManager,
+	config *config.Config,
+	cleaner interfaces.ResourceCleaner,
+	modelService interfaces.ModelService,
 ) *PluginPreprocess {
 	// Use default dictionary for Jieba tokenizer
 	jieba := gojieba.NewJieba()
@@ -46,12 +46,12 @@ func NewPluginPreprocess(
 	// Load stopwords from built-in stopword library
 	stopwords := loadStopwords()

-    res := &PluginPreprocess{
-        config:    config,
-        jieba:     jieba,
-        stopwords: stopwords,
-        modelService: modelService,
-    }
+	res := &PluginPreprocess{
+		config:       config,
+		jieba:        jieba,
+		stopwords:    stopwords,
+		modelService: modelService,
+	}

 	// Register resource cleanup function
 	if cleaner != nil {
@@ -92,10 +92,10 @@ func (p *PluginPreprocess) ActivationEvents() []types.EventType {

 // OnEvent Process events
 func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError) *PluginError {
-    rawQuery := strings.TrimSpace(chatManage.RewriteQuery)
-    if rawQuery == "" {
-        return next()
-    }
+	rawQuery := strings.TrimSpace(chatManage.RewriteQuery)
+	if rawQuery == "" {
+		return next()
+	}

 	pipelineInfo(ctx, "Preprocess", "input", map[string]interface{}{
 		"session_id":    chatManage.SessionID,
@@ -108,12 +108,12 @@ func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventTyp
 		sanitized = normalized
 	}

-    var (
-        processed    = sanitized
-        strategy     = "original"
-        tokenPreview string
-        tokenCount   int
-    )
+	var (
+		processed    = sanitized
+		strategy     = "original"
+		tokenPreview string
+		tokenCount   int
+	)

 	switch {
 	case containsChineseCharacters(sanitized):
@@ -141,17 +141,17 @@ func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventTyp
 		strategy = "fallback_original"
 	}

-    chatManage.ProcessedQuery = processed
-    chatManage.QueryIntent = p.detectIntentLLM(ctx, chatManage, sanitized)
+	chatManage.ProcessedQuery = processed
+	chatManage.QueryIntent = p.detectIntentLLM(ctx, chatManage, sanitized)

-    pipelineInfo(ctx, "Preprocess", "output", map[string]interface{}{
-        "session_id":      chatManage.SessionID,
-        "processed_query": processed,
-        "strategy":        strategy,
-        "token_count":     tokenCount,
-        "token_preview":   truncateForLog(tokenPreview),
-        "query_intent":    chatManage.QueryIntent,
-    })
+	pipelineInfo(ctx, "Preprocess", "output", map[string]interface{}{
+		"session_id":      chatManage.SessionID,
+		"processed_query": processed,
+		"strategy":        strategy,
+		"token_count":     tokenCount,
+		"token_preview":   tokenPreview,
+		"query_intent":    chatManage.QueryIntent,
+	})

 	return next()
 }
@@ -258,63 +258,63 @@ func normalizeWhitespace(text string) string {
 }

 func normalizeLatinQuery(text string) string {
-    text = strings.ToLower(text)
-    text = multiSpaceRegex.ReplaceAllString(text, " ")
-    return strings.TrimSpace(text)
+	text = strings.ToLower(text)
+	text = multiSpaceRegex.ReplaceAllString(text, " ")
+	return strings.TrimSpace(text)
 }

 type intentResp struct {
-    Intent     string  `json:"intent"`
-    Confidence float64 `json:"confidence"`
+	Intent     string  `json:"intent"`
+	Confidence float64 `json:"confidence"`
 }

 func (p *PluginPreprocess) detectIntentLLM(ctx context.Context, chatManage *types.ChatManage, text string) string {
-    if p.modelService == nil || chatManage.ChatModelID == "" {
-        pipelineWarn(ctx, "IntentDetect", "skip", map[string]interface{}{ "reason": "no_model", "session_id": chatManage.SessionID })
-        return "general"
-    }
-    chatModel, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
-    if err != nil {
-        pipelineWarn(ctx, "IntentDetect", "get_model_failed", map[string]interface{}{ "error": err.Error(), "model_id": chatManage.ChatModelID })
-        return "general"
-    }
-    pipelineInfo(ctx, "IntentDetect", "start", map[string]interface{}{ "session_id": chatManage.SessionID, "model_id": chatManage.ChatModelID })
-    sys := "You are a query intent classifier. Classify the user's query into one of: definition, howto, compare, qa, general. Respond ONLY with a JSON object {\"intent\": \"...\", \"confidence\": 0.0 } inside a markdown fenced block."
-    usr := text
-    think := false
-    resp, err := chatModel.Chat(ctx, []chat.Message{
-        {Role: "system", Content: sys},
-        {Role: "user", Content: usr},
-    }, &chat.ChatOptions{Temperature: 0.0, MaxCompletionTokens: 64, Thinking: &think})
-    if err != nil || resp.Content == "" {
-        pipelineWarn(ctx, "IntentDetect", "model_call_failed", map[string]interface{}{ "error": err })
-        return "general"
-    }
-    body := extractJSONBody(resp.Content)
-    var ir intentResp
-    if err := json.Unmarshal([]byte(body), &ir); err != nil {
-        pipelineWarn(ctx, "IntentDetect", "parse_failed", map[string]interface{}{ "body": truncateForLog(body), "error": err.Error() })
-        return "general"
-    }
-    pipelineInfo(ctx, "IntentDetect", "result", map[string]interface{}{ "intent": ir.Intent, "confidence": ir.Confidence })
-    switch strings.ToLower(strings.TrimSpace(ir.Intent)) {
-    case "definition", "howto", "compare", "qa", "general":
-        return strings.ToLower(ir.Intent)
-    default:
-        return "general"
-    }
+	if p.modelService == nil || chatManage.ChatModelID == "" {
+		pipelineWarn(ctx, "IntentDetect", "skip", map[string]interface{}{"reason": "no_model", "session_id": chatManage.SessionID})
+		return "general"
+	}
+	chatModel, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
+	if err != nil {
+		pipelineWarn(ctx, "IntentDetect", "get_model_failed", map[string]interface{}{"error": err.Error(), "model_id": chatManage.ChatModelID})
+		return "general"
+	}
+	pipelineInfo(ctx, "IntentDetect", "start", map[string]interface{}{"session_id": chatManage.SessionID, "model_id": chatManage.ChatModelID})
+	sys := "You are a query intent classifier. Classify the user's query into one of: definition, howto, compare, qa, general. Respond ONLY with a JSON object {\"intent\": \"...\", \"confidence\": 0.0 } inside a markdown fenced block."
+	usr := text
+	think := false
+	resp, err := chatModel.Chat(ctx, []chat.Message{
+		{Role: "system", Content: sys},
+		{Role: "user", Content: usr},
+	}, &chat.ChatOptions{Temperature: 0.0, MaxCompletionTokens: 64, Thinking: &think})
+	if err != nil || resp.Content == "" {
+		pipelineWarn(ctx, "IntentDetect", "model_call_failed", map[string]interface{}{"error": err})
+		return "general"
+	}
+	body := extractJSONBody(resp.Content)
+	var ir intentResp
+	if err := json.Unmarshal([]byte(body), &ir); err != nil {
+		pipelineWarn(ctx, "IntentDetect", "parse_failed", map[string]interface{}{"body": body, "error": err.Error()})
+		return "general"
+	}
+	pipelineInfo(ctx, "IntentDetect", "result", map[string]interface{}{"intent": ir.Intent, "confidence": ir.Confidence})
+	switch strings.ToLower(strings.TrimSpace(ir.Intent)) {
+	case "definition", "howto", "compare", "qa", "general":
+		return strings.ToLower(ir.Intent)
+	default:
+		return "general"
+	}
 }

 func extractJSONBody(text string) string {
-    t := strings.TrimSpace(text)
-    // Try fenced block first
-    if i := strings.Index(t, "{"); i >= 0 {
-        j := strings.LastIndex(t, "}")
-        if j > i {
-            return t[i : j+1]
-        }
-    }
-    return "{}"
+	t := strings.TrimSpace(text)
+	// Try fenced block first
+	if i := strings.Index(t, "{"); i >= 0 {
+		j := strings.LastIndex(t, "}")
+		if j > i {
+			return t[i : j+1]
+		}
+	}
+	return "{}"
 }

 // Ensure resources are properly released
--- a/internal/application/service/chat_pipline/search.go
+++ b/internal/application/service/chat_pipline/search.go
@@ -1,17 +1,18 @@
 package chatpipline

 import (
-    "context"
-    "encoding/json"
-    "fmt"
-    "strings"
-    "sync"
-    "time"
+	"context"
+	"encoding/json"
+	"fmt"
+	"sort"
+	"strings"
+	"sync"
+	"time"

-    "github.com/Tencent/WeKnora/internal/config"
-    "github.com/Tencent/WeKnora/internal/models/chat"
-    "github.com/Tencent/WeKnora/internal/types"
-    "github.com/Tencent/WeKnora/internal/types/interfaces"
+	"github.com/Tencent/WeKnora/internal/config"
+	"github.com/Tencent/WeKnora/internal/models/chat"
+	"github.com/Tencent/WeKnora/internal/types"
+	"github.com/Tencent/WeKnora/internal/types/interfaces"
 )

 // PluginSearch implements search functionality for chat pipeline
@@ -54,7 +55,7 @@ func (p *PluginSearch) ActivationEvents() []types.EventType {

 // OnEvent handles search events in the chat pipeline
 func (p *PluginSearch) OnEvent(ctx context.Context,
-    eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError,
+	eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError,
 ) *PluginError {
 	// Get knowledge base IDs list
 	knowledgeBaseIDs := chatManage.KnowledgeBaseIDs
@@ -117,85 +118,85 @@ func (p *PluginSearch) OnEvent(ctx context.Context,
 		}
 	}()

-    wg.Wait()
+	wg.Wait()

-    chatManage.SearchResult = allResults
+	chatManage.SearchResult = allResults

-    // If recall is low, attempt query expansion with keyword-focused search
-    if len(chatManage.SearchResult) < max(1, chatManage.EmbeddingTopK/2) {
-        pipelineInfo(ctx, "Search", "recall_low", map[string]interface{}{
-            "current": len(chatManage.SearchResult),
-            "threshold": chatManage.EmbeddingTopK / 2,
-        })
-        expansions := p.expandQueries(ctx, chatManage)
-        if len(expansions) > 0 {
-            pipelineInfo(ctx, "Search", "expansion_start", map[string]interface{}{
-                "variants": len(expansions),
-            })
-            expTopK := max(chatManage.EmbeddingTopK*2, chatManage.RerankTopK*2)
-            expKwTh := chatManage.KeywordThreshold * 0.8
-            // Concurrent expansion retrieval across queries and KBs
-            expResults := make([]*types.SearchResult, 0, expTopK*len(expansions))
-            var muExp sync.Mutex
-            var wgExp sync.WaitGroup
-            jobs := len(expansions) * len(knowledgeBaseIDs)
-            capSem := 16
-            if jobs < capSem {
-                capSem = jobs
-            }
-            if capSem <= 0 {
-                capSem = 1
-            }
-            sem := make(chan struct{}, capSem)
-            pipelineInfo(ctx, "Search", "expansion_concurrency", map[string]interface{}{
-                "jobs": jobs,
-                "cap":  capSem,
-            })
-            for _, q := range expansions {
-                for _, kbID := range knowledgeBaseIDs {
-                    wgExp.Add(1)
-                    go func(q string, kbID string) {
-                        defer wgExp.Done()
-                        sem <- struct{}{}
-                        defer func() { <-sem }()
-                        paramsExp := types.SearchParams{
-                            QueryText:            q,
-                            VectorThreshold:      chatManage.VectorThreshold,
-                            KeywordThreshold:     expKwTh,
-                            MatchCount:           expTopK,
-                            DisableVectorMatch:   true,
-                            DisableKeywordsMatch: false,
-                        }
-                        res, err := p.knowledgeBaseService.HybridSearch(ctx, kbID, paramsExp)
-                        if err != nil {
-                            pipelineWarn(ctx, "Search", "expansion_error", map[string]interface{}{
-                                "kb_id": kbID,
-                                "error": err.Error(),
-                            })
-                            return
-                        }
-                        if len(res) > 0 {
-                            pipelineInfo(ctx, "Search", "expansion_hits", map[string]interface{}{
-                                "kb_id": kbID,
-                                "query": truncateForLog(q),
-                                "hits":  len(res),
-                            })
-                            muExp.Lock()
-                            expResults = append(expResults, res...)
-                            muExp.Unlock()
-                        }
-                    }(q, kbID)
-                }
-            }
-            wgExp.Wait()
-            if len(expResults) > 0 {
-                pipelineInfo(ctx, "Search", "expansion_done", map[string]interface{}{
-                    "added": len(expResults),
-                })
-                chatManage.SearchResult = append(chatManage.SearchResult, expResults...)
-            }
-        }
-    }
+	// If recall is low, attempt query expansion with keyword-focused search
+	if len(chatManage.SearchResult) < max(1, chatManage.EmbeddingTopK/2) {
+		pipelineInfo(ctx, "Search", "recall_low", map[string]interface{}{
+			"current":   len(chatManage.SearchResult),
+			"threshold": chatManage.EmbeddingTopK / 2,
+		})
+		expansions := p.expandQueries(ctx, chatManage)
+		if len(expansions) > 0 {
+			pipelineInfo(ctx, "Search", "expansion_start", map[string]interface{}{
+				"variants": len(expansions),
+			})
+			expTopK := max(chatManage.EmbeddingTopK*2, chatManage.RerankTopK*2)
+			expKwTh := chatManage.KeywordThreshold * 0.8
+			// Concurrent expansion retrieval across queries and KBs
+			expResults := make([]*types.SearchResult, 0, expTopK*len(expansions))
+			var muExp sync.Mutex
+			var wgExp sync.WaitGroup
+			jobs := len(expansions) * len(knowledgeBaseIDs)
+			capSem := 16
+			if jobs < capSem {
+				capSem = jobs
+			}
+			if capSem <= 0 {
+				capSem = 1
+			}
+			sem := make(chan struct{}, capSem)
+			pipelineInfo(ctx, "Search", "expansion_concurrency", map[string]interface{}{
+				"jobs": jobs,
+				"cap":  capSem,
+			})
+			for _, q := range expansions {
+				for _, kbID := range knowledgeBaseIDs {
+					wgExp.Add(1)
+					go func(q string, kbID string) {
+						defer wgExp.Done()
+						sem <- struct{}{}
+						defer func() { <-sem }()
+						paramsExp := types.SearchParams{
+							QueryText:            q,
+							VectorThreshold:      chatManage.VectorThreshold,
+							KeywordThreshold:     expKwTh,
+							MatchCount:           expTopK,
+							DisableVectorMatch:   true,
+							DisableKeywordsMatch: false,
+						}
+						res, err := p.knowledgeBaseService.HybridSearch(ctx, kbID, paramsExp)
+						if err != nil {
+							pipelineWarn(ctx, "Search", "expansion_error", map[string]interface{}{
+								"kb_id": kbID,
+								"error": err.Error(),
+							})
+							return
+						}
+						if len(res) > 0 {
+							pipelineInfo(ctx, "Search", "expansion_hits", map[string]interface{}{
+								"kb_id": kbID,
+								"query": q,
+								"hits":  len(res),
+							})
+							muExp.Lock()
+							expResults = append(expResults, res...)
+							muExp.Unlock()
+						}
+					}(q, kbID)
+				}
+			}
+			wgExp.Wait()
+			if len(expResults) > 0 {
+				pipelineInfo(ctx, "Search", "expansion_done", map[string]interface{}{
+					"added": len(expResults),
+				})
+				chatManage.SearchResult = append(chatManage.SearchResult, expResults...)
+			}
+		}
+	}

 	// Add relevant results from chat history
 	historyResult := p.getSearchResultFromHistory(chatManage)
@@ -207,13 +208,13 @@ func (p *PluginSearch) OnEvent(ctx context.Context,
 		chatManage.SearchResult = append(chatManage.SearchResult, historyResult...)
 	}

-    // Remove duplicate results
-    before := len(chatManage.SearchResult)
-    chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult)
-    pipelineInfo(ctx, "Search", "dedup_summary", map[string]interface{}{
-        "before": before,
-        "after":  len(chatManage.SearchResult),
-    })
+	// Remove duplicate results
+	before := len(chatManage.SearchResult)
+	chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult)
+	pipelineInfo(ctx, "Search", "dedup_summary", map[string]interface{}{
+		"before": before,
+		"after":  len(chatManage.SearchResult),
+	})

 	// Return if we have results
 	if len(chatManage.SearchResult) != 0 {
@@ -249,52 +250,52 @@ func (p *PluginSearch) getSearchResultFromHistory(chatManage *types.ChatManage)
 }

 func removeDuplicateResults(results []*types.SearchResult) []*types.SearchResult {
-    seen := make(map[string]bool)
-    contentSig := make(map[string]bool)
-    var uniqueResults []*types.SearchResult
-    for _, r := range results {
-        keys := []string{r.ID}
-        if r.ParentChunkID != "" {
-            keys = append(keys, "parent:"+r.ParentChunkID)
-        }
-        if r.KnowledgeID != "" {
-            keys = append(keys, fmt.Sprintf("kb:%s#%d", r.KnowledgeID, r.ChunkIndex))
-        }
-        dup := false
-        for _, k := range keys {
-            if seen[k] {
-                dup = true
-                break
-            }
-        }
-        if dup {
-            continue
-        }
-        sig := buildContentSignature(r.Content)
-        if sig != "" {
-            if contentSig[sig] {
-                continue
-            }
-            contentSig[sig] = true
-        }
-        for _, k := range keys {
-            seen[k] = true
-        }
-        uniqueResults = append(uniqueResults, r)
-    }
-    return uniqueResults
+	seen := make(map[string]bool)
+	contentSig := make(map[string]bool)
+	var uniqueResults []*types.SearchResult
+	for _, r := range results {
+		keys := []string{r.ID}
+		if r.ParentChunkID != "" {
+			keys = append(keys, "parent:"+r.ParentChunkID)
+		}
+		if r.KnowledgeID != "" {
+			keys = append(keys, fmt.Sprintf("kb:%s#%d", r.KnowledgeID, r.ChunkIndex))
+		}
+		dup := false
+		for _, k := range keys {
+			if seen[k] {
+				dup = true
+				break
+			}
+		}
+		if dup {
+			continue
+		}
+		sig := buildContentSignature(r.Content)
+		if sig != "" {
+			if contentSig[sig] {
+				continue
+			}
+			contentSig[sig] = true
+		}
+		for _, k := range keys {
+			seen[k] = true
+		}
+		uniqueResults = append(uniqueResults, r)
+	}
+	return uniqueResults
 }

 func buildContentSignature(content string) string {
-    c := strings.ToLower(strings.TrimSpace(content))
-    if c == "" {
-        return ""
-    }
-    c = strings.Join(strings.Fields(c), " ")
-    if len(c) > 128 {
-        c = c[:128]
-    }
-    return c
+	c := strings.ToLower(strings.TrimSpace(content))
+	if c == "" {
+		return ""
+	}
+	c = strings.Join(strings.Fields(c), " ")
+	if len(c) > 128 {
+		c = c[:128]
+	}
+	return c
 }

 // searchKnowledgeBases performs KB searches for rewrite and processed queries across KB IDs
@@ -375,6 +376,9 @@ func (p *PluginSearch) searchKnowledgeBases(ctx context.Context, knowledgeBaseID
 		wg.Wait()
 	}

+	// Normalize keyword retriever scores after collecting all results from multiple knowledge bases
+	normalizeKeywordSearchResults(ctx, results)
+
 	pipelineInfo(ctx, "Search", "kb_result_summary", map[string]interface{}{
 		"total_hits": len(results),
 	})
@@ -472,8 +476,8 @@ func convertWebSearchResults(webResults []*types.WebSearchResult) []*types.Searc
 			ChunkIndex:     0,
 			KnowledgeTitle: webResult.Title,
 			StartAt:        0,
-			EndAt:          len(content),
-			Seq:            i,
+			EndAt:          runeLen(content),
+			Seq:            1,
 			Score:          score,
 			MatchType:      types.MatchTypeWebSearch,
 			SubChunkID:     []string{},
@@ -483,7 +487,7 @@ func convertWebSearchResults(webResults []*types.WebSearchResult) []*types.Searc
 				"title":   webResult.Title,
 				"snippet": webResult.Snippet,
 			},
-			ChunkType:         "web_search",
+			ChunkType:         string(types.ChunkTypeWebSearch),
 			ParentChunkID:     "",
 			ImageInfo:         "",
 			KnowledgeFilename: "",
@@ -500,79 +504,193 @@ func convertWebSearchResults(webResults []*types.WebSearchResult) []*types.Searc

 	return results
 }
+
 // expandQueries generates paraphrases and synonyms using chat model to improve keyword recall
 func (p *PluginSearch) expandQueries(ctx context.Context, chatManage *types.ChatManage) []string {
-    if p.modelService == nil || chatManage.ChatModelID == "" {
-        pipelineWarn(ctx, "Search", "expansion_skip", map[string]interface{}{
-            "reason": "no_model",
-        })
-        return nil
-    }
-    model, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
-    if err != nil {
-        pipelineWarn(ctx, "Search", "expansion_get_model_failed", map[string]interface{}{
-            "error": err.Error(),
-        })
-        return nil
-    }
-    sys := "Generate up to 5 diverse paraphrases or keyword variants for the user query to improve keyword-based search recall. Respond ONLY with a JSON array of strings inside a fenced code block."
-    usr := chatManage.RewriteQuery
-    think := false
-    resp, err := model.Chat(ctx, []chat.Message{{Role: "system", Content: sys}, {Role: "user", Content: usr}}, &chat.ChatOptions{Temperature: 0.2, MaxCompletionTokens: 80, Thinking: &think})
-    if err != nil || resp.Content == "" {
-        pipelineWarn(ctx, "Search", "expansion_model_call_failed", map[string]interface{}{
-            "error": err,
-        })
-        return nil
-    }
-    body := extractJSONBlock(resp.Content)
-    var arr []string
-    if err := json.Unmarshal([]byte(body), &arr); err != nil || len(arr) == 0 {
-        // Fallback: split lines
-        lines := strings.Split(resp.Content, "\n")
-        for _, l := range lines {
-            l = strings.TrimSpace(l)
-            if l != "" {
-                arr = append(arr, l)
-            }
-        }
-    }
-    uniq := make(map[string]struct{})
-    base := []string{chatManage.Query, chatManage.RewriteQuery, chatManage.ProcessedQuery}
-    for _, b := range base {
-        if s := strings.TrimSpace(b); s != "" {
-            uniq[strings.ToLower(s)] = struct{}{}
-        }
-    }
-    expansions := make([]string, 0, len(arr))
-    for _, a := range arr {
-        s := strings.TrimSpace(a)
-        if s == "" {
-            continue
-        }
-        key := strings.ToLower(s)
-        if _, ok := uniq[key]; ok {
-            continue
-        }
-        uniq[key] = struct{}{}
-        expansions = append(expansions, s)
-        if len(expansions) >= 5 {
-            break
-        }
-    }
-    pipelineInfo(ctx, "Search", "expansion_result", map[string]interface{}{
-        "variants": len(expansions),
-    })
-    return expansions
+	if p.modelService == nil || chatManage.ChatModelID == "" {
+		pipelineWarn(ctx, "Search", "expansion_skip", map[string]interface{}{
+			"reason": "no_model",
+		})
+		return nil
+	}
+	model, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
+	if err != nil {
+		pipelineWarn(ctx, "Search", "expansion_get_model_failed", map[string]interface{}{
+			"error": err.Error(),
+		})
+		return nil
+	}
+	sys := "Generate up to 5 diverse paraphrases or keyword variants for the user query to improve keyword-based search recall. Respond ONLY with a JSON array of strings inside a fenced code block."
+	usr := chatManage.RewriteQuery
+	think := false
+	resp, err := model.Chat(ctx, []chat.Message{
+		{Role: "system", Content: sys},
+		{Role: "user", Content: usr},
+	}, &chat.ChatOptions{Temperature: 0.2, MaxCompletionTokens: 200, Thinking: &think})
+	if err != nil || resp.Content == "" {
+		pipelineWarn(ctx, "Search", "expansion_model_call_failed", map[string]interface{}{
+			"error": err,
+		})
+		return nil
+	}
+	body := extractJSONBlock(resp.Content)
+	var arr []string
+	if err := json.Unmarshal([]byte(body), &arr); err != nil || len(arr) == 0 {
+		// Fallback: split lines
+		lines := strings.Split(resp.Content, "\n")
+		for _, l := range lines {
+			l = strings.TrimSpace(l)
+			if l != "" {
+				arr = append(arr, l)
+			}
+		}
+	}
+	uniq := make(map[string]struct{})
+	base := []string{chatManage.Query, chatManage.RewriteQuery, chatManage.ProcessedQuery}
+	for _, b := range base {
+		if s := strings.TrimSpace(b); s != "" {
+			uniq[strings.ToLower(s)] = struct{}{}
+		}
+	}
+	expansions := make([]string, 0, len(arr))
+	for _, a := range arr {
+		s := strings.TrimSpace(a)
+		if s == "" {
+			continue
+		}
+		key := strings.ToLower(s)
+		if _, ok := uniq[key]; ok {
+			continue
+		}
+		uniq[key] = struct{}{}
+		expansions = append(expansions, s)
+		if len(expansions) >= 5 {
+			break
+		}
+	}
+	pipelineInfo(ctx, "Search", "expansion_result", map[string]interface{}{
+		"variants": len(expansions),
+	})
+	return expansions
 }

 func extractJSONBlock(text string) string {
-    t := strings.TrimSpace(text)
-    if i := strings.Index(t, "["); i >= 0 {
-        j := strings.LastIndex(t, "]")
-        if j > i {
-            return t[i : j+1]
-        }
-    }
-    return "[]"
+	t := strings.TrimSpace(text)
+	if i := strings.Index(t, "["); i >= 0 {
+		j := strings.LastIndex(t, "]")
+		if j > i {
+			return t[i : j+1]
+		}
+	}
+	return "[]"
+}
+
+// normalizeKeywordSearchResults normalizes keyword search result scores into [0,1] globally across all knowledge bases
+// Improvements:
+// 1. Uses robust normalization with percentile-based bounds to handle outliers
+// 2. Handles edge cases: single result, no variance, negative scores
+// 3. Global normalization ensures fair comparison across different knowledge bases
+func normalizeKeywordSearchResults(ctx context.Context, results []*types.SearchResult) {
+	// Filter keyword match results
+	keywordResults := make([]*types.SearchResult, 0)
+	for _, result := range results {
+		if result.MatchType == types.MatchTypeKeywords {
+			keywordResults = append(keywordResults, result)
+		}
+	}
+
+	if len(keywordResults) == 0 {
+		return
+	}
+
+	// Single result: set to 1.0
+	if len(keywordResults) == 1 {
+		keywordResults[0].Score = 1.0
+		return
+	}
+
+	// Find min and max scores globally
+	minS := keywordResults[0].Score
+	maxS := keywordResults[0].Score
+	for _, r := range keywordResults {
+		if r.Score < minS {
+			minS = r.Score
+		}
+		if r.Score > maxS {
+			maxS = r.Score
+		}
+	}
+
+	// No variance: all scores are the same
+	if maxS <= minS {
+		for _, r := range keywordResults {
+			r.Score = 1.0
+		}
+		pipelineInfo(ctx, "Search", "keyword_scores_no_variance", map[string]interface{}{
+			"count": len(keywordResults),
+			"score": minS,
+		})
+		return
+	}
+
+	// Robust normalization: use percentile-based bounds to reduce outlier impact
+	// For small groups, use min/max; for larger groups, use 5th and 95th percentiles
+	normalizeMin := minS
+	normalizeMax := maxS
+
+	if len(keywordResults) >= 10 {
+		// For larger groups, use percentile-based bounds to handle outliers
+		// Sort scores to find percentiles
+		scores := make([]float64, len(keywordResults))
+		for i, r := range keywordResults {
+			scores[i] = r.Score
+		}
+		sort.Float64s(scores)
+
+		// Use 5th and 95th percentiles to reduce outlier impact
+		p5Idx := len(scores) * 5 / 100
+		p95Idx := len(scores) * 95 / 100
+		if p5Idx < len(scores) {
+			normalizeMin = scores[p5Idx]
+		}
+		if p95Idx < len(scores) {
+			normalizeMax = scores[p95Idx]
+		}
+	}
+
+	// Normalize scores with bounds checking
+	rangeSize := normalizeMax - normalizeMin
+	if rangeSize > 0 {
+		for _, r := range keywordResults {
+			// Clamp to [normalizeMin, normalizeMax] before normalization
+			clampedScore := r.Score
+			if clampedScore < normalizeMin {
+				clampedScore = normalizeMin
+			} else if clampedScore > normalizeMax {
+				clampedScore = normalizeMax
+			}
+
+			// Normalize to [0, 1]
+			ns := (clampedScore - normalizeMin) / rangeSize
+			if ns < 0 {
+				ns = 0
+			} else if ns > 1 {
+				ns = 1
+			}
+			r.Score = ns
+		}
+
+		pipelineInfo(ctx, "Search", "normalize_keyword_scores", map[string]interface{}{
+			"count":         len(keywordResults),
+			"raw_min":       minS,
+			"raw_max":       maxS,
+			"normalize_min": normalizeMin,
+			"normalize_max": normalizeMax,
+		})
+	} else {
+		// Fallback: all scores are the same after percentile filtering
+		for _, r := range keywordResults {
+			r.Score = 1.0
+		}
+	}
 }
--- a/internal/application/service/chunk.go
+++ b/internal/application/service/chunk.go
@@ -39,6 +39,16 @@ func NewChunkService(
 	}
 }

+// GetRepository gets the chunk repository
+// Parameters:
+//   - ctx: Context with authentication and request information
+//
+// Returns:
+//   - interfaces.ChunkRepository: Chunk repository
+func (s *chunkService) GetRepository() interfaces.ChunkRepository {
+	return s.chunkRepository
+}
+
 // CreateChunks creates multiple chunks
 // This method persists a batch of document chunks to the repository
 // Parameters:
--- a/internal/application/service/knowledge.go
+++ b/internal/application/service/knowledge.go
@@ -106,6 +106,16 @@ func NewKnowledgeService(
 	}, nil
 }

+// GetRepository gets the knowledge repository
+// Parameters:
+//   - ctx: Context with authentication and request information
+//
+// Returns:
+//   - interfaces.KnowledgeRepository: Knowledge repository
+func (s *knowledgeService) GetRepository() interfaces.KnowledgeRepository {
+	return s.repo
+}
+
 // CreateKnowledgeFromFile creates a knowledge entry from an uploaded file
 func (s *knowledgeService) CreateKnowledgeFromFile(ctx context.Context,
 	kbID string, file *multipart.FileHeader, metadata map[string]string, enableMultimodel *bool,
@@ -413,7 +423,7 @@ func (s *knowledgeService) CreateKnowledgeFromManual(ctx context.Context,
 	now := time.Now()
 	title := safeTitle
 	if title == "" {
-		title = fmt.Sprintf("手工知识-%s", now.Format("20060102-150405"))
+		title = fmt.Sprintf("Knowledge-%s", now.Format("20060102-150405"))
 	}

 	fileName := ensureManualFileName(title)
@@ -844,7 +854,8 @@ func (s *knowledgeService) processDocument(ctx context.Context,
 	}

 	// Split file into chunks using document reader service
-	span.AddEvent("start split file")
+	logger.GetLogger(ctx).Infof("processDocument split file content size: %d, file name: %s, file type: %s, separators: %v, enable multimodal: %v",
+		len(contentBytes), knowledge.FileName, knowledge.FileType, kb.ChunkingConfig.Separators, enableMultimodel)
 	resp, err := s.docReaderClient.ReadFromFile(ctx, &proto.ReadFromFileRequest{
 		FileContent: contentBytes,
 		FileName:    knowledge.FileName,
@@ -3040,41 +3051,70 @@ func ensureManualFileName(title string) string {
 	return trimmed + manualFileExtension
 }

-func splitManualContent(content string) []string {
-	clean := strings.TrimSpace(content)
-	if clean == "" {
-		return []string{}
-	}
-	normalized := strings.ReplaceAll(clean, "\r\n", "\n")
-	segments := strings.Split(normalized, "\n\n")
-	results := make([]string, 0, len(segments))
-	for _, seg := range segments {
-		part := strings.TrimSpace(seg)
-		if part != "" {
-			results = append(results, part)
-		}
-	}
-	if len(results) == 0 {
-		results = append(results, clean)
-	}
-	return results
-}
-
 func (s *knowledgeService) triggerManualProcessing(ctx context.Context,
 	kb *types.KnowledgeBase, knowledge *types.Knowledge, content string, sync bool,
 ) {
-	passages := splitManualContent(content)
-	if len(passages) == 0 {
-		passages = []string{content}
+	clean := strings.TrimSpace(content)
+	if clean == "" {
+		return
+	}
+
+	// 使用 docreader 按照 MD 格式处理，并使用知识库配置的分隔符
+	contentBytes := []byte(clean)
+	fileName := ensureManualFileName(knowledge.Title)
+	fileType := "md"
+
+	// 检查是否需要启用多模态（对于手动内容通常不需要，但保持一致性）
+	enableMultimodel := kb.ChunkingConfig.EnableMultimodal && kb.StorageConfig.Provider != ""
+
+	logger.GetLogger(ctx).Infof("triggerManualProcessing split manual content size: %d, file name: %s, file type: %s, separators: %v, enable multimodal: %v",
+		len(contentBytes), fileName, fileType, kb.ChunkingConfig.Separators, enableMultimodel)
+
+	// 调用 docreader 解析 markdown 内容
+	resp, err := s.docReaderClient.ReadFromFile(ctx, &proto.ReadFromFileRequest{
+		FileContent: contentBytes,
+		FileName:    fileName,
+		FileType:    fileType,
+		ReadConfig: &proto.ReadConfig{
+			ChunkSize:        int32(kb.ChunkingConfig.ChunkSize),
+			ChunkOverlap:     int32(kb.ChunkingConfig.ChunkOverlap),
+			Separators:       kb.ChunkingConfig.Separators,
+			EnableMultimodal: enableMultimodel,
+			StorageConfig: &proto.StorageConfig{
+				Provider:        proto.StorageProvider(proto.StorageProvider_value[strings.ToUpper(kb.StorageConfig.Provider)]),
+				Region:          kb.StorageConfig.Region,
+				BucketName:      kb.StorageConfig.BucketName,
+				AccessKeyId:     kb.StorageConfig.SecretID,
+				SecretAccessKey: kb.StorageConfig.SecretKey,
+				AppId:           kb.StorageConfig.AppID,
+				PathPrefix:      kb.StorageConfig.PathPrefix,
+			},
+			VlmConfig: &proto.VLMConfig{
+				ModelName:     kb.VLMConfig.ModelName,
+				BaseUrl:       kb.VLMConfig.BaseURL,
+				ApiKey:        kb.VLMConfig.APIKey,
+				InterfaceType: kb.VLMConfig.InterfaceType,
+			},
+		},
+		RequestId: ctx.Value(types.RequestIDContextKey).(string),
+	})
+	if err != nil {
+		logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID).
+			WithField("error", err).Errorf("triggerManualProcessing read file failed")
+		knowledge.ParseStatus = "failed"
+		knowledge.ErrorMessage = err.Error()
+		knowledge.UpdatedAt = time.Now()
+		s.repo.UpdateKnowledge(ctx, knowledge)
+		return
 	}

 	if sync {
-		s.processDocumentFromPassage(ctx, kb, knowledge, passages)
+		s.processChunks(ctx, kb, knowledge, resp.Chunks)
 		return
 	}

 	newCtx := logger.CloneContext(ctx)
-	go s.processDocumentFromPassage(newCtx, kb, knowledge, passages)
+	go s.processChunks(newCtx, kb, knowledge, resp.Chunks)
 }

 func (s *knowledgeService) cleanupKnowledgeResources(ctx context.Context, knowledge *types.Knowledge) error {
--- a/internal/application/service/knowledgebase.go
+++ b/internal/application/service/knowledgebase.go
@@ -45,6 +45,16 @@ func NewKnowledgeBaseService(repo interfaces.KnowledgeBaseRepository,
 	}
 }

+// GetRepository gets the knowledge base repository
+// Parameters:
+//   - ctx: Context with authentication and request information
+//
+// Returns:
+//   - interfaces.KnowledgeBaseRepository: Knowledge base repository
+func (s *knowledgeBaseService) GetRepository() interfaces.KnowledgeBaseRepository {
+	return s.repo
+}
+
 // CreateKnowledgeBase creates a new knowledge base
 func (s *knowledgeBaseService) CreateKnowledgeBase(ctx context.Context,
 	kb *types.KnowledgeBase,
@@ -303,18 +313,18 @@ func (s *knowledgeBaseService) HybridSearch(ctx context.Context,
 	var embeddingModel embedding.Embedder
 	var kb *types.KnowledgeBase

+	kb, err = s.repo.GetKnowledgeBaseByID(ctx, id)
+	if err != nil {
+		logger.ErrorWithFields(ctx, err, map[string]interface{}{
+			"knowledge_base_id": id,
+		})
+		return nil, err
+	}
+
 	// Add vector retrieval params if supported
 	if retrieveEngine.SupportRetriever(types.VectorRetrieverType) && !params.DisableVectorMatch {
 		logger.Info(ctx, "Vector retrieval supported, preparing vector retrieval parameters")

-		kb, err = s.repo.GetKnowledgeBaseByID(ctx, id)
-		if err != nil {
-			logger.ErrorWithFields(ctx, err, map[string]interface{}{
-				"knowledge_base_id": id,
-			})
-			return nil, err
-		}
-
 		logger.Infof(ctx, "Getting embedding model, model ID: %s", kb.EmbeddingModelID)
 		embeddingModel, err = s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID)
 		if err != nil {
@@ -343,8 +353,8 @@ func (s *knowledgeBaseService) HybridSearch(ctx context.Context,
 		logger.Info(ctx, "Vector retrieval parameters setup completed")
 	}

-	// Add keyword retrieval params if supported
-	if retrieveEngine.SupportRetriever(types.KeywordsRetrieverType) && !params.DisableKeywordsMatch {
+	// Add keyword retrieval params if supported and not FAQ
+	if retrieveEngine.SupportRetriever(types.KeywordsRetrieverType) && !params.DisableKeywordsMatch && kb.Type != types.KnowledgeBaseTypeFAQ {
 		logger.Info(ctx, "Keyword retrieval supported, preparing keyword retrieval parameters")
 		retrieveParams = append(retrieveParams, types.RetrieveParams{
 			Query:            params.QueryText,
@@ -372,41 +382,6 @@ func (s *knowledgeBaseService) HybridSearch(ctx context.Context,
 		return nil, err
 	}

-	// Normalize keyword retriever scores into [0,1] per-engine batch
-	for i := range retrieveResults {
-		rr := retrieveResults[i]
-		if rr.Error != nil || rr.RetrieverType != types.KeywordsRetrieverType || len(rr.Results) == 0 {
-			continue
-		}
-		minS := rr.Results[0].Score
-		maxS := rr.Results[0].Score
-		for _, r := range rr.Results {
-			if r.Score < minS {
-				minS = r.Score
-			}
-			if r.Score > maxS {
-				maxS = r.Score
-			}
-		}
-		if maxS > minS {
-			for _, r := range rr.Results {
-				ns := (r.Score - minS) / (maxS - minS)
-				if ns < 0 {
-					ns = 0
-				} else if ns > 1 {
-					ns = 1
-				}
-				r.Score = ns
-			}
-			logger.Infof(ctx, "Normalized keyword scores for engine %s: min=%f, max=%f", rr.RetrieverEngineType, minS, maxS)
-		} else {
-			for _, r := range rr.Results {
-				r.Score = 1.0
-			}
-			logger.Infof(ctx, "Keyword scores have no variance for engine %s, set to 1.0", rr.RetrieverEngineType)
-		}
-	}
-
 	// Collect all results from different retrievers and deduplicate by chunk ID
 	logger.Infof(ctx, "Processing retrieval results")
 	matchResults := []*types.IndexWithScore{}
--- a/internal/application/service/session.go
+++ b/internal/application/service/session.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"strings"

-	"github.com/Tencent/WeKnora/internal/agent"
 	chatpipline "github.com/Tencent/WeKnora/internal/application/service/chat_pipline"
 	llmcontext "github.com/Tencent/WeKnora/internal/application/service/llmcontext"
 	"github.com/Tencent/WeKnora/internal/config"
@@ -485,8 +484,8 @@ func (s *sessionService) KnowledgeQA(ctx context.Context, session *types.Session
 		fallbackResponse = tenantConv.FallbackResponse
 		enableRewrite = tenantConv.EnableRewrite

-		if tenantConv.MaxTokens != 0 {
-			summaryConfig.MaxTokens = tenantConv.MaxTokens
+		if tenantConv.MaxCompletionTokens != 0 {
+			summaryConfig.MaxCompletionTokens = tenantConv.MaxCompletionTokens
 		}
 		if tenantConv.Prompt != "" {
 			summaryConfig.Prompt = tenantConv.Prompt
@@ -882,10 +881,10 @@ func (s *sessionService) AgentQA(ctx context.Context, session *types.Session, qu
 		WebSearchEnabled:  session.AgentConfig.WebSearchEnabled, // Web search enabled from session config
 	}

-	if tenantInfo.AgentConfig.UseCustomSystemPrompt {
-		agentConfig.SystemPrompt = tenantInfo.AgentConfig.SystemPrompt
-	} else {
-		agentConfig.SystemPrompt = agent.DefaultSystemPromptTemplate
+	agentConfig.UseCustomSystemPrompt = tenantInfo.AgentConfig.UseCustomSystemPrompt
+	if agentConfig.UseCustomSystemPrompt {
+		agentConfig.SystemPromptWebEnabled = tenantInfo.AgentConfig.ResolveSystemPrompt(true)
+		agentConfig.SystemPromptWebDisabled = tenantInfo.AgentConfig.ResolveSystemPrompt(false)
 	}

 	// Set web search max results from tenant config (default: 5)
--- a/internal/common/tools.go
+++ b/internal/common/tools.go
@@ -1,12 +1,18 @@
 package common

 import (
+	"context"
 	"encoding/json"
+	"fmt"
 	"maps"
 	"regexp"
 	"slices"
+	"sort"
+	"strconv"
 	"strings"
 	"unicode/utf8"
+
+	"github.com/Tencent/WeKnora/internal/logger"
 )

 // ToInterfaceSlice converts a slice of strings to a slice of empty interfaces.
@@ -132,3 +138,83 @@ func CleanInvalidUTF8(s string) string {

 	return b.String()
 }
+
+const (
+	pipelineLogValueMaxRune = 300
+	defaultPipelineStage    = "PIPELINE"
+	defaultPipelineAction   = "info"
+	pipelineLogPrefix       = "[PIPELINE]"
+	pipelineTruncateEll     = "..."
+)
+
+// PipelineLog builds a structured pipeline log string.
+func PipelineLog(stage, action string, fields map[string]interface{}) string {
+	if stage == "" {
+		stage = defaultPipelineStage
+	}
+	if action == "" {
+		action = defaultPipelineAction
+	}
+
+	builder := strings.Builder{}
+	builder.Grow(128)
+	builder.WriteString(pipelineLogPrefix)
+	builder.WriteString(" stage=")
+	builder.WriteString(stage)
+	builder.WriteString(" action=")
+	builder.WriteString(action)
+
+	if len(fields) > 0 {
+		keys := make([]string, 0, len(fields))
+		for k := range fields {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		for _, key := range keys {
+			builder.WriteString(" ")
+			builder.WriteString(key)
+			builder.WriteString("=")
+			builder.WriteString(formatPipelineLogValue(fields[key]))
+		}
+	}
+	return builder.String()
+}
+
+// PipelineInfo logs pipeline info level entries.
+func PipelineInfo(ctx context.Context, stage, action string, fields map[string]interface{}) {
+	logger.GetLogger(ctx).Info(PipelineLog(stage, action, fields))
+}
+
+// PipelineWarn logs pipeline warning level entries.
+func PipelineWarn(ctx context.Context, stage, action string, fields map[string]interface{}) {
+	logger.GetLogger(ctx).Warn(PipelineLog(stage, action, fields))
+}
+
+// PipelineError logs pipeline error level entries.
+func PipelineError(ctx context.Context, stage, action string, fields map[string]interface{}) {
+	logger.GetLogger(ctx).Error(PipelineLog(stage, action, fields))
+}
+
+func formatPipelineLogValue(value interface{}) string {
+	switch v := value.(type) {
+	case string:
+		return strconv.Quote(truncatePipelineValue(v))
+	case fmt.Stringer:
+		return strconv.Quote(truncatePipelineValue(v.String()))
+	default:
+		return fmt.Sprintf("%v", v)
+	}
+}
+
+func truncatePipelineValue(content string) string {
+	content = strings.ReplaceAll(content, "\n", "\\n")
+	runes := []rune(content)
+	if len(runes) <= pipelineLogValueMaxRune {
+		return content
+	}
+	return string(runes[:pipelineLogValueMaxRune]) + pipelineTruncateEll
+}
+
+func TruncateForLog(content string) string {
+	return truncatePipelineValue(content)
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -23,7 +23,6 @@ type Config struct {
 	DocReader      *DocReaderConfig      `yaml:"docreader" json:"docreader"`
 	StreamManager  *StreamManagerConfig  `yaml:"stream_manager" json:"stream_manager"`
 	ExtractManager *ExtractManagerConfig `yaml:"extract" json:"extract"`
-	Agent          *AgentGlobalConfig    `yaml:"agent" json:"agent"`
 	WebSearch      *WebSearchConfig      `yaml:"web_search" json:"web_search"`
 }

@@ -191,15 +190,6 @@ func LoadConfig() (*Config, error) {
 	return &cfg, nil
 }

-// AgentGlobalConfig represents the global agent configuration
-type AgentGlobalConfig struct {
-	Enabled              bool     `yaml:"enabled" json:"enabled"`
-	DefaultMaxIterations int      `yaml:"default_max_iterations" json:"default_max_iterations"`
-	DefaultTemperature   float64  `yaml:"default_temperature" json:"default_temperature"`
-	ReflectionEnabled    bool     `yaml:"reflection_enabled" json:"reflection_enabled"`
-	DefaultTools         []string `yaml:"default_tools" json:"default_tools"`
-}
-
 // WebSearchConfig represents the web search configuration
 type WebSearchConfig struct {
 	Providers []WebSearchProviderConfig `yaml:"providers" json:"providers"`
--- a/internal/handler/session/helpers.go
+++ b/internal/handler/session/helpers.go
@@ -204,8 +204,8 @@ func (h *Handler) createDefaultSummaryConfig(ctx context.Context) *types.Summary
 		if tenant.ConversationConfig.Temperature > 0 {
 			cfg.Temperature = tenant.ConversationConfig.Temperature
 		}
-		if tenant.ConversationConfig.MaxTokens > 0 {
-			cfg.MaxTokens = tenant.ConversationConfig.MaxTokens
+		if tenant.ConversationConfig.MaxCompletionTokens > 0 {
+			cfg.MaxCompletionTokens = tenant.ConversationConfig.MaxCompletionTokens
 		}
 	}

@@ -221,7 +221,7 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
 	// Determine default values: tenant config first, then config.yaml
 	var defaultPrompt, defaultContextTemplate, defaultNoMatchPrefix string
 	var defaultTemperature float64
-	var defaultMaxTokens int
+	var defaultMaxCompletionTokens int

 	if tenant != nil && tenant.ConversationConfig != nil {
 		useSystemPrompt := tenant.ConversationConfig.UseCustomSystemPrompt
@@ -234,7 +234,7 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
 			defaultContextTemplate = tenant.ConversationConfig.ContextTemplate
 		}
 		defaultTemperature = tenant.ConversationConfig.Temperature
-		defaultMaxTokens = tenant.ConversationConfig.MaxTokens
+		defaultMaxCompletionTokens = tenant.ConversationConfig.MaxCompletionTokens
 	}

 	// Fall back to config.yaml if tenant config is empty
@@ -247,8 +247,8 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
 	if defaultTemperature == 0 {
 		defaultTemperature = h.config.Conversation.Summary.Temperature
 	}
-	if defaultMaxTokens == 0 {
-		defaultMaxTokens = h.config.Conversation.Summary.MaxTokens
+	if defaultMaxCompletionTokens == 0 {
+		defaultMaxCompletionTokens = h.config.Conversation.Summary.MaxCompletionTokens
 	}
 	defaultNoMatchPrefix = h.config.Conversation.Summary.NoMatchPrefix

@@ -262,8 +262,8 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
 	if config.Temperature == 0 {
 		config.Temperature = defaultTemperature
 	}
-	if config.MaxTokens == 0 {
-		config.MaxTokens = defaultMaxTokens
+	if config.MaxCompletionTokens == 0 {
+		config.MaxCompletionTokens = defaultMaxCompletionTokens
 	}
 	if config.NoMatchPrefix == "" {
 		config.NoMatchPrefix = defaultNoMatchPrefix
--- a/internal/handler/session/qa.go
+++ b/internal/handler/session/qa.go
@@ -463,7 +463,9 @@ func (h *Handler) handleKnowledgeQARequest(
 	go func() {
 		defer func() {
 			if r := recover(); r != nil {
-				logger.ErrorWithFields(asyncCtx, errors.NewInternalServerError("Knowledge QA service panicked"), nil)
+				buf := make([]byte, 10240)
+				runtime.Stack(buf, true)
+				logger.ErrorWithFields(asyncCtx, errors.NewInternalServerError(fmt.Sprintf("Knowledge QA service panicked: %v\n%s", r, string(buf))), nil)
 			}
 		}()
 		err := h.sessionService.KnowledgeQA(asyncCtx, session, query, knowledgeBaseIDs, assistantMessage.ID, summaryModelID, webSearchEnabled, eventBus)
--- a/internal/handler/tenant.go
+++ b/internal/handler/tenant.go
@@ -233,15 +233,16 @@ func (h *TenantHandler) ListTenants(c *gin.Context) {

 // AgentConfigRequest represents the request body for updating agent configuration
 type AgentConfigRequest struct {
-	Enabled           bool     `json:"enabled"`
-	MaxIterations     int      `json:"max_iterations"`
-	ReflectionEnabled bool     `json:"reflection_enabled"`
-	AllowedTools      []string `json:"allowed_tools"`
-	Temperature       float64  `json:"temperature"`
-	ThinkingModelID   string   `json:"thinking_model_id"`
-	RerankModelID     string   `json:"rerank_model_id"`
-	SystemPrompt      string   `json:"system_prompt,omitempty"` // System prompt template with placeholders (optional)
-	UseCustomPrompt   *bool    `json:"use_custom_system_prompt"`
+	Enabled                 bool     `json:"enabled"`
+	MaxIterations           int      `json:"max_iterations"`
+	ReflectionEnabled       bool     `json:"reflection_enabled"`
+	AllowedTools            []string `json:"allowed_tools"`
+	Temperature             float64  `json:"temperature"`
+	ThinkingModelID         string   `json:"thinking_model_id"`
+	RerankModelID           string   `json:"rerank_model_id"`
+	SystemPromptWebEnabled  string   `json:"system_prompt_web_enabled,omitempty"`
+	SystemPromptWebDisabled string   `json:"system_prompt_web_disabled,omitempty"`
+	UseCustomPrompt         *bool    `json:"use_custom_system_prompt"`
 }

 // GetTenantAgentConfig retrieves the agent configuration for a tenant
@@ -282,46 +283,50 @@ func (h *TenantHandler) GetTenantAgentConfig(c *gin.Context) {
 		c.JSON(http.StatusOK, gin.H{
 			"success": true,
 			"data": gin.H{
-				"max_iterations":           agent.DefaultAgentMaxIterations,
-				"reflection_enabled":       agent.DefaultAgentReflectionEnabled,
-				"allowed_tools":            agenttools.DefaultAllowedTools(),
-				"temperature":              agent.DefaultAgentTemperature,
-				"thinking_model_id":        "",
-				"rerank_model_id":          "",
-				"system_prompt":            agent.DefaultSystemPromptTemplate,
-				"use_custom_system_prompt": false,
-				"available_tools":          availableTools,
-				"available_placeholders":   availablePlaceholders,
+				"max_iterations":             agent.DefaultAgentMaxIterations,
+				"reflection_enabled":         agent.DefaultAgentReflectionEnabled,
+				"allowed_tools":              agenttools.DefaultAllowedTools(),
+				"temperature":                agent.DefaultAgentTemperature,
+				"thinking_model_id":          "",
+				"rerank_model_id":            "",
+				"system_prompt_web_enabled":  agent.ProgressiveRAGSystemPromptWithWeb,
+				"system_prompt_web_disabled": agent.ProgressiveRAGSystemPromptWithoutWeb,
+				"use_custom_system_prompt":   false,
+				"available_tools":            availableTools,
+				"available_placeholders":     availablePlaceholders,
 			},
 		})
 		return
 	}

-	// Get system prompt, use default if empty
-	systemPrompt := tenant.AgentConfig.SystemPrompt
+	// Get system prompts for both web search states, use defaults if empty
+	systemPromptWithWeb := tenant.AgentConfig.ResolveSystemPrompt(true)
+	if systemPromptWithWeb == "" {
+		systemPromptWithWeb = agent.ProgressiveRAGSystemPromptWithWeb
+	}
+	systemPromptWithoutWeb := tenant.AgentConfig.ResolveSystemPrompt(false)
+	if systemPromptWithoutWeb == "" {
+		systemPromptWithoutWeb = agent.ProgressiveRAGSystemPromptWithoutWeb
+	}
+
 	useCustomPrompt := tenant.AgentConfig.UseCustomSystemPrompt
-	if !useCustomPrompt && systemPrompt != "" && systemPrompt != agent.DefaultSystemPromptTemplate {
-		useCustomPrompt = true
-	}
-	if systemPrompt == "" {
-		systemPrompt = agent.DefaultSystemPromptTemplate
-	}

 	logger.Infof(ctx, "Retrieved tenant agent config successfully, Tenant ID: %d", tenant.ID)
 	c.JSON(http.StatusOK, gin.H{
 		"success": true,
 		"data": gin.H{
-			"enabled":                  tenant.AgentConfig.Enabled,
-			"max_iterations":           tenant.AgentConfig.MaxIterations,
-			"reflection_enabled":       tenant.AgentConfig.ReflectionEnabled,
-			"allowed_tools":            tenant.AgentConfig.AllowedTools,
-			"temperature":              tenant.AgentConfig.Temperature,
-			"thinking_model_id":        tenant.AgentConfig.ThinkingModelID,
-			"rerank_model_id":          tenant.AgentConfig.RerankModelID,
-			"system_prompt":            systemPrompt,
-			"use_custom_system_prompt": useCustomPrompt,
-			"available_tools":          availableTools,
-			"available_placeholders":   availablePlaceholders,
+			"enabled":                    tenant.AgentConfig.Enabled,
+			"max_iterations":             tenant.AgentConfig.MaxIterations,
+			"reflection_enabled":         tenant.AgentConfig.ReflectionEnabled,
+			"allowed_tools":              tenant.AgentConfig.AllowedTools,
+			"temperature":                tenant.AgentConfig.Temperature,
+			"thinking_model_id":          tenant.AgentConfig.ThinkingModelID,
+			"rerank_model_id":            tenant.AgentConfig.RerankModelID,
+			"system_prompt_web_enabled":  systemPromptWithWeb,
+			"system_prompt_web_disabled": systemPromptWithoutWeb,
+			"use_custom_system_prompt":   useCustomPrompt,
+			"available_tools":            availableTools,
+			"available_placeholders":     availablePlaceholders,
 		},
 	})
 }
@@ -368,15 +373,16 @@ func (h *TenantHandler) updateTenantAgentConfigInternal(c *gin.Context) {
 	}

 	tenant.AgentConfig = &types.AgentConfig{
-		Enabled:               req.Enabled,
-		MaxIterations:         req.MaxIterations,
-		ReflectionEnabled:     req.ReflectionEnabled,
-		AllowedTools:          req.AllowedTools,
-		Temperature:           req.Temperature,
-		ThinkingModelID:       req.ThinkingModelID,
-		RerankModelID:         req.RerankModelID,
-		SystemPrompt:          req.SystemPrompt,
-		UseCustomSystemPrompt: useCustomPrompt,
+		Enabled:                 req.Enabled,
+		MaxIterations:           req.MaxIterations,
+		ReflectionEnabled:       req.ReflectionEnabled,
+		AllowedTools:            req.AllowedTools,
+		Temperature:             req.Temperature,
+		ThinkingModelID:         req.ThinkingModelID,
+		RerankModelID:           req.RerankModelID,
+		SystemPromptWebEnabled:  req.SystemPromptWebEnabled,
+		SystemPromptWebDisabled: req.SystemPromptWebDisabled,
+		UseCustomSystemPrompt:   useCustomPrompt,
 	}

 	updatedTenant, err := h.service.UpdateTenant(ctx, tenant)
@@ -517,7 +523,7 @@ func (h *TenantHandler) buildDefaultConversationConfig() *types.ConversationConf
 		UseCustomContextTemplate: true,
 		UseCustomSystemPrompt:    true,
 		Temperature:              h.config.Conversation.Summary.Temperature,
-		MaxTokens:                h.config.Conversation.Summary.MaxTokens,
+		MaxCompletionTokens:      h.config.Conversation.Summary.MaxCompletionTokens,
 		MaxRounds:                h.config.Conversation.MaxRounds,
 		EmbeddingTopK:            h.config.Conversation.EmbeddingTopK,
 		KeywordThreshold:         h.config.Conversation.KeywordThreshold,
@@ -555,8 +561,8 @@ func validateConversationConfig(req *types.ConversationConfig) error {
 	if req.Temperature < 0 || req.Temperature > 2 {
 		return errors.NewBadRequestError("temperature must be between 0 and 2")
 	}
-	if req.MaxTokens <= 0 || req.MaxTokens > 100000 {
-		return errors.NewBadRequestError("max_tokens must be between 1 and 100000")
+	if req.MaxCompletionTokens <= 0 || req.MaxCompletionTokens > 100000 {
+		return errors.NewBadRequestError("max_completion_tokens must be between 1 and 100000")
 	}
 	if req.FallbackStrategy != "" &&
 		req.FallbackStrategy != string(types.FallbackStrategyFixed) &&
@@ -609,8 +615,8 @@ func (h *TenantHandler) GetTenantConversationConfig(c *gin.Context) {
 		if tc.Temperature > 0 {
 			defaultCfg.Temperature = tc.Temperature
 		}
-		if tc.MaxTokens > 0 {
-			defaultCfg.MaxTokens = tc.MaxTokens
+		if tc.MaxCompletionTokens > 0 {
+			defaultCfg.MaxCompletionTokens = tc.MaxCompletionTokens
 		}

 		// Retrieval parameters
--- a/internal/models/embedding/openai.go
+++ b/internal/models/embedding/openai.go
@@ -152,7 +152,9 @@ func (e *OpenAIEmbedder) BatchEmbed(ctx context.Context, texts []string) ([][]fl
 		logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch send request error: %v", err)
 		return nil, fmt.Errorf("send request: %w", err)
 	}
-	defer resp.Body.Close()
+	if resp.Body != nil {
+		defer resp.Body.Close()
+	}

 	// Read response
 	body, err := io.ReadAll(resp.Body)
--- a/internal/types/agent.go
+++ b/internal/types/agent.go
@@ -10,18 +10,19 @@ import (
 // AgentConfig represents the full agent configuration (used at tenant level and runtime)
 // This includes all configuration parameters for agent execution
 type AgentConfig struct {
-	Enabled               bool     `json:"enabled"`                  // Whether agent mode is enabled
-	MaxIterations         int      `json:"max_iterations"`           // Maximum number of ReAct iterations
-	ReflectionEnabled     bool     `json:"reflection_enabled"`       // Whether to enable reflection
-	AllowedTools          []string `json:"allowed_tools"`            // List of allowed tool names
-	Temperature           float64  `json:"temperature"`              // LLM temperature for agent
-	ThinkingModelID       string   `json:"thinking_model_id"`        // Model ID for reasoning
-	RerankModelID         string   `json:"rerank_model_id"`          // Model ID for reranking search results
-	KnowledgeBases        []string `json:"knowledge_bases"`          // Accessible knowledge base IDs
-	SystemPrompt          string   `json:"system_prompt,omitempty"`  // System prompt template with placeholders (optional)
-	UseCustomSystemPrompt bool     `json:"use_custom_system_prompt"` // Whether to use custom system prompt instead of default
-	WebSearchEnabled      bool     `json:"web_search_enabled"`       // Whether web search tool is enabled
-	WebSearchMaxResults   int      `json:"web_search_max_results"`   // Maximum number of web search results (default: 5)
+	Enabled                 bool     `json:"enabled"`                              // Whether agent mode is enabled
+	MaxIterations           int      `json:"max_iterations"`                       // Maximum number of ReAct iterations
+	ReflectionEnabled       bool     `json:"reflection_enabled"`                   // Whether to enable reflection
+	AllowedTools            []string `json:"allowed_tools"`                        // List of allowed tool names
+	Temperature             float64  `json:"temperature"`                          // LLM temperature for agent
+	ThinkingModelID         string   `json:"thinking_model_id"`                    // Model ID for reasoning
+	RerankModelID           string   `json:"rerank_model_id"`                      // Model ID for reranking search results
+	KnowledgeBases          []string `json:"knowledge_bases"`                      // Accessible knowledge base IDs
+	SystemPromptWebEnabled  string   `json:"system_prompt_web_enabled,omitempty"`  // Custom prompt when web search is enabled
+	SystemPromptWebDisabled string   `json:"system_prompt_web_disabled,omitempty"` // Custom prompt when web search is disabled
+	UseCustomSystemPrompt   bool     `json:"use_custom_system_prompt"`             // Whether to use custom system prompt instead of default
+	WebSearchEnabled        bool     `json:"web_search_enabled"`                   // Whether web search tool is enabled
+	WebSearchMaxResults     int      `json:"web_search_max_results"`               // Maximum number of web search results (default: 5)
 }

 // SessionAgentConfig represents session-level agent configuration
@@ -66,6 +67,25 @@ func (c *SessionAgentConfig) Scan(value interface{}) error {
 	return json.Unmarshal(b, c)
 }

+// ResolveSystemPrompt returns the prompt template for the given web search state.
+func (c *AgentConfig) ResolveSystemPrompt(webSearchEnabled bool) string {
+	if c == nil {
+		return ""
+	}
+
+	if webSearchEnabled {
+		if c.SystemPromptWebEnabled != "" {
+			return c.SystemPromptWebEnabled
+		}
+	} else {
+		if c.SystemPromptWebDisabled != "" {
+			return c.SystemPromptWebDisabled
+		}
+	}
+
+	return ""
+}
+
 // Tool defines the interface that all agent tools must implement
 type Tool interface {
 	// Name returns the unique identifier for this tool
--- a/internal/types/chunk.go
+++ b/internal/types/chunk.go
@@ -26,6 +26,8 @@ const (
 	ChunkTypeRelationship ChunkType = "relationship"
 	// ChunkTypeFAQ 表示 FAQ 条目 Chunk
 	ChunkTypeFAQ ChunkType = "faq"
+	// ChunkTypeWebSearch 表示 Web 搜索结果的 Chunk
+	ChunkTypeWebSearch ChunkType = "web_search"
 )

 // ImageInfo 表示与 Chunk 关联的图片信息
--- a/internal/types/interfaces/chunk.go
+++ b/internal/types/interfaces/chunk.go
@@ -64,4 +64,6 @@ type ChunkService interface {
 	DeleteByKnowledgeList(ctx context.Context, ids []string) error
 	// ListChunkByParentID lists chunks by parent id
 	ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error)
+	// GetRepository gets the chunk repository
+	GetRepository() ChunkRepository
 }
--- a/internal/types/interfaces/knowledge.go
+++ b/internal/types/interfaces/knowledge.go
@@ -69,6 +69,8 @@ type KnowledgeService interface {
 	UpdateKnowledgeTagBatch(ctx context.Context, updates map[string]*string) error
 	// UpdateFAQEntryTagBatch updates tag for FAQ entries in batch.
 	UpdateFAQEntryTagBatch(ctx context.Context, kbID string, updates map[string]*string) error
+	// GetRepository gets the knowledge repository
+	GetRepository() KnowledgeRepository
 }

 // KnowledgeRepository defines the interface for knowledge repositories.
--- a/internal/types/interfaces/knowledgebase.go
+++ b/internal/types/interfaces/knowledgebase.go
@@ -80,6 +80,14 @@ type KnowledgeBaseService interface {
 	//   - Copied knowledge base object
 	//   - Possible errors such as not existing, insufficient permissions, etc.
 	CopyKnowledgeBase(ctx context.Context, src string, dst string) (*types.KnowledgeBase, *types.KnowledgeBase, error)
+
+	// GetRepository gets the knowledge base repository
+	// Parameters:
+	//   - ctx: Context with authentication and request information
+	//
+	// Returns:
+	//   - interfaces.KnowledgeBaseRepository: Knowledge base repository
+	GetRepository() KnowledgeBaseRepository
 }

 // KnowledgeBaseRepository defines the knowledge base repository interface
--- a/internal/types/tenant.go
+++ b/internal/types/tenant.go
@@ -83,7 +83,7 @@ type ConversationConfig struct {
 	// Temperature controls the randomness of the model output
 	Temperature float64 `json:"temperature"`
 	// MaxTokens is the maximum number of tokens to generate
-	MaxTokens int `json:"max_tokens"`
+	MaxCompletionTokens int `json:"max_completion_tokens"`

 	// Retrieval & strategy parameters
 	MaxRounds        int     `json:"max_rounds"`