feat: Enhance agent configuration and localization for web search prompts

- Updated agent configuration to support separate system prompts for web search enabled and disabled states.
- Removed deprecated agent configuration parameters to streamline settings management.
- Enhanced UI components in AgentSettings.vue to allow configuration of custom prompts based on web search status.
- Improved localization in English, Russian, and Chinese for new prompt settings and UI elements.
- Refactored related API and service logic to accommodate changes in agent configuration structure.
This commit is contained in:
wizardchen
2025-11-19 19:27:25 +08:00
parent ddf6b4e748
commit 3bf2a14499
44 changed files with 2412 additions and 1646 deletions

View File

@@ -590,19 +590,6 @@ extract:
请随机生成一段文本,要求内容与 %s 等相关,字数在 [50-200] 之间,并且尽量包含一些与这些标签相关的专业术语或典型元素,使文本更具针对性和相关性。
with_no_tag: |
请随机生成一段文本,内容请自由发挥,字数在 [50-200] 之间。
# Agent 配置
agent:
enabled: true
default_max_iterations: 5
default_temperature: 0.7
reflection_enabled: false
default_tools:
- knowledge_search
- multi_kb_search
- list_knowledge_bases
- get_related_chunks
- query_knowledge_graph
- get_document_info
# WebSearch 配置
web_search:

View File

@@ -79,7 +79,7 @@ services:
- WeKnora-network-dev
docreader:
image: wechatopenai/weknora-docreader:latest
image: wechatopenai/weknora-docreader:main
container_name: WeKnora-docreader-dev
ports:
- "${DOCREADER_PORT:-50051}:50051"

View File

@@ -32,7 +32,8 @@ export interface AgentConfig {
thinking_model_id: string
rerank_model_id: string
knowledge_bases?: string[]
system_prompt?: string // System prompt template with placeholders (optional)
system_prompt_web_enabled?: string // Custom system prompt when web search is enabled
system_prompt_web_disabled?: string // Custom system prompt when web search is disabled
use_custom_system_prompt?: boolean
available_tools?: ToolDefinition[] // GET 响应中包含POST/PUT 不需要
available_placeholders?: PlaceholderDefinition[] // GET 响应中包含POST/PUT 不需要
@@ -42,7 +43,7 @@ export interface ConversationConfig {
prompt: string
context_template: string
temperature: number
max_tokens: number
max_completion_tokens: number
use_custom_system_prompt?: boolean
use_custom_context_template?: boolean
max_rounds: number

View File

@@ -1343,7 +1343,10 @@ export default {
hintSuffix: 'will show available placeholders automatically',
custom: 'Custom Prompt',
disabledHint: 'Currently using the default prompt. Enable custom to apply the content below.',
placeholder: 'Enter the system prompt, or leave blank to use the default...'
placeholder: 'Enter the system prompt, or leave blank to use the default...',
tabHint: 'Configure separate prompts for sessions with web search on or off.',
tabWebOn: 'Web search enabled',
tabWebOff: 'Web search disabled'
},
reset: {
header: 'Reset to Default Prompt',

View File

@@ -1207,7 +1207,10 @@ export default {
hintSuffix: 'откроется список доступных подстановок',
custom: 'Пользовательский промпт',
disabledHint: 'Сейчас используется промпт по умолчанию. Включите пользовательский, чтобы применить содержимое ниже.',
placeholder: 'Введите системный промпт или оставьте пустым для значения по умолчанию...'
placeholder: 'Введите системный промпт или оставьте пустым для значения по умолчанию...',
tabHint: 'Настройте разные промпты для режимов с включённым и отключённым веб-поиском.',
tabWebOn: 'Веб-поиск включён',
tabWebOff: 'Веб-поиск отключён'
},
reset: {
header: 'Сбросить к промпту по умолчанию',

View File

@@ -1348,6 +1348,9 @@ export default {
custom: "自定义 Prompt",
disabledHint: "当前使用系统默认 Prompt开启自定义后才会应用下方内容。",
placeholder: "请输入系统 Prompt或留空使用默认 Prompt...",
tabHint: "根据是否启用网络搜索分别配置系统 Prompt。",
tabWebOn: "网络搜索已启用",
tabWebOff: "网络搜索未启用",
},
reset: {
header: "恢复默认 Prompt",

View File

@@ -20,6 +20,9 @@ interface AgentConfig {
thinkingModelId: string;
rerankModelId: string;
allowedTools: string[];
system_prompt_web_enabled?: string;
system_prompt_web_disabled?: string;
use_custom_system_prompt?: boolean;
}
// 单个模型项接口
@@ -60,7 +63,10 @@ const defaultSettings: Settings = {
temperature: 0.7,
thinkingModelId: "",
rerankModelId: "",
allowedTools: ["knowledge_search", "multi_kb_search", "list_knowledge_bases"]
allowedTools: ["knowledge_search", "multi_kb_search", "list_knowledge_bases"],
system_prompt_web_enabled: "",
system_prompt_web_disabled: "",
use_custom_system_prompt: false
},
selectedKnowledgeBases: [], // 默认为空数组
modelConfig: {

View File

@@ -8,8 +8,8 @@ export const toolIcons: Record<string, string> = {
multi_kb_search: '🔍',
knowledge_search: '📚',
get_chunk_detail: '📄',
get_related_chunks: '🔗',
list_knowledge_bases: '📂',
list_knowledge_chunks: '🧩',
get_document_info: '',
query_knowledge_graph: '🕸️',
think: '💭',
@@ -43,11 +43,11 @@ export function getToolDisplayName(toolName: string): string {
multi_kb_search: '跨库搜索',
knowledge_search: '知识库搜索',
get_chunk_detail: '获取片段详情',
get_related_chunks: '获取相关片段',
list_knowledge_chunks: '查看知识分块',
list_knowledge_bases: '列出知识库',
get_document_info: '获取文档信息',
query_knowledge_graph: '查询知识图谱',
think: '思考',
think: '深度思考',
todo_write: '制定计划',
};
return displayNames[toolName] || toolName;

View File

@@ -213,7 +213,7 @@ const TOOL_NAME_I18N: Record<string, string> = {
web_search: '网络搜索',
web_fetch: '网页抓取',
get_document_info: '获取文档信息',
get_related_chunks: '查找相关片段',
list_knowledge_chunks: '查看知识分块',
get_related_documents: '查找相关文档',
get_document_content: '获取文档内容',
todo_write: '计划管理',
@@ -511,7 +511,7 @@ const intermediateStepsSummary = computed(() => {
if (toolCalls.length > 0) {
const toolNames = toolCalls.map(name => {
if (name === 'get_document_info') return '获取文档';
if (name === 'get_related_chunks') return '获取相关片段';
if (name === 'list_knowledge_chunks') return '查看知识分块';
return name;
});
if (toolNames.length === 1) {
@@ -1080,9 +1080,10 @@ const getToolSummary = (event: any): string => {
if (toolData?.title) {
return `获取文档:${toolData.title}`;
}
} else if (toolName === 'get_related_chunks') {
if (toolData?.count !== undefined) {
return `找到 ${toolData.count} 个相关片段`;
} else if (toolName === 'list_knowledge_chunks') {
if (toolData?.fetched_chunks !== undefined) {
const title = toolData?.knowledge_title || toolData?.knowledge_id || '文档';
return `查看 ${title}${toolData.fetched_chunks}/${toolData.total_chunks ?? '?'} 个分块`;
}
} else if (toolName === 'todo_write') {
// Extract steps from tool data
@@ -1183,7 +1184,7 @@ const getToolIcon = (toolName: string): string => {
return knowledgeIcon;
} else if (toolName === 'web_search') {
return webSearchGlobeGreenIcon;
} else if (toolName === 'get_document_info' || toolName === 'get_related_chunks') {
} else if (toolName === 'get_document_info' || toolName === 'list_knowledge_chunks') {
return documentIcon;
} else if (toolName === 'todo_write') {
return fileAddIcon;

View File

@@ -16,14 +16,6 @@
</div>
</div>
<div class="status-section">
<div class="status-title">{{ $t('chat.statusDescription') }}</div>
<div class="status-list">
<div class="status-item"> {{ $t('chat.statusIndexed') }}</div>
<div class="status-item"> {{ $t('chat.statusSearchable') }}</div>
<div class="status-item"> {{ $t('chat.statusChunkDetailAvailable') }}</div>
</div>
</div>
</div>
</div>
</template>

View File

@@ -151,22 +151,51 @@
{{ $t('common.resetToDefault') }}
</t-button>
</div>
<p class="prompt-tab-hint">
{{ $t('agentSettings.systemPrompt.tabHint') }}
</p>
<p v-if="!localUseCustomSystemPrompt" class="prompt-disabled-hint">
{{ $t('agentSettings.systemPrompt.disabledHint') }}
</p>
<div v-if="localUseCustomSystemPrompt" class="prompt-textarea-wrapper">
<t-textarea
ref="promptTextareaRef"
v-model="localSystemPrompt"
:autosize="{ minRows: 15, maxRows: 30 }"
:placeholder="$t('agentSettings.systemPrompt.placeholder')"
@blur="handleSystemPromptChange"
@input="handlePromptInput"
@keydown="handlePromptKeydown"
:readonly="!localUseCustomSystemPrompt"
:class="{ 'prompt-textarea-readonly': !localUseCustomSystemPrompt }"
style="width: 100%; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 13px;"
/>
<div v-if="localUseCustomSystemPrompt" class="system-prompt-tabs">
<t-tabs
v-model="activeSystemPromptTab"
class="system-prompt-variant-tabs"
theme="normal"
>
<t-tab-panel value="web-enabled" :label="$t('agentSettings.systemPrompt.tabWebOn')">
<div v-if="activeSystemPromptTab === 'web-enabled'" class="prompt-textarea-wrapper">
<t-textarea
ref="promptTextareaRef"
v-model="localSystemPromptWebEnabled"
:autosize="{ minRows: 15, maxRows: 30 }"
:placeholder="$t('agentSettings.systemPrompt.placeholder')"
@blur="handleSystemPromptChange('web-enabled', $event)"
@input="handlePromptInput"
@keydown="handlePromptKeydown"
:readonly="!localUseCustomSystemPrompt"
:class="{ 'prompt-textarea-readonly': !localUseCustomSystemPrompt }"
style="width: 100%; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 13px;"
/>
</div>
</t-tab-panel>
<t-tab-panel value="web-disabled" :label="$t('agentSettings.systemPrompt.tabWebOff')">
<div v-if="activeSystemPromptTab === 'web-disabled'" class="prompt-textarea-wrapper">
<t-textarea
ref="promptTextareaRef"
v-model="localSystemPromptWebDisabled"
:autosize="{ minRows: 15, maxRows: 30 }"
:placeholder="$t('agentSettings.systemPrompt.placeholder')"
@blur="handleSystemPromptChange('web-disabled', $event)"
@input="handlePromptInput"
@keydown="handlePromptKeydown"
:readonly="!localUseCustomSystemPrompt"
:class="{ 'prompt-textarea-readonly': !localUseCustomSystemPrompt }"
style="width: 100%; font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; font-size: 13px;"
/>
</div>
</t-tab-panel>
</t-tabs>
</div>
<!-- 占位符提示下拉框 -->
<teleport to="body">
@@ -580,11 +609,11 @@
</div>
<div class="setting-control">
<t-input-number
v-model="localMaxTokens"
v-model="localMaxCompletionTokens"
:min="1"
:max="100000"
:step="100"
@change="handleMaxTokensChange"
@change="handleMaxCompletionTokensChange"
style="width: 200px;"
/>
</div>
@@ -596,6 +625,7 @@
<script setup lang="ts">
import { ref, onMounted, watch, computed, nextTick } from 'vue'
import type { Ref } from 'vue'
import { useRouter } from 'vue-router'
import { useSettingsStore } from '@/stores/settings'
import { MessagePlugin, DialogPlugin } from 'tdesign-vue-next'
@@ -622,7 +652,7 @@ const getDefaultConversationConfig = (): ConversationConfig => ({
prompt: '',
context_template: '',
temperature: 0.3,
max_tokens: 2048,
max_completion_tokens: 2048,
use_custom_system_prompt: true,
use_custom_context_template: true,
max_rounds: 5,
@@ -656,18 +686,32 @@ const localTemperature = ref(0.7)
const localThinkingModelId = ref('')
const localRerankModelId = ref('')
const localAllowedTools = ref<string[]>([])
const localSystemPrompt = ref('')
type SystemPromptTab = 'web-enabled' | 'web-disabled'
const activeSystemPromptTab = ref<SystemPromptTab>('web-enabled')
const localSystemPromptWebEnabled = ref('')
const localSystemPromptWebDisabled = ref('')
const systemPromptRefs: Record<SystemPromptTab, Ref<string>> = {
'web-enabled': localSystemPromptWebEnabled,
'web-disabled': localSystemPromptWebDisabled,
}
const savedSystemPromptMap: Record<SystemPromptTab, string> = {
'web-enabled': '',
'web-disabled': '',
}
const getPromptRefByTab = (tab: SystemPromptTab) => systemPromptRefs[tab]
const getActivePromptRef = () => getPromptRefByTab(activeSystemPromptTab.value)
const localUseCustomSystemPrompt = ref(false)
// 普通模式本地状态
const localContextTemplate = ref('')
const localSystemPromptNormal = ref('')
const localTemperatureNormal = ref(0.3)
const localMaxTokens = ref(2048)
const localMaxCompletionTokens = ref(2048)
let savedContextTemplate = ''
let savedSystemPromptNormal = ''
let savedTemperatureNormal = 0.3
let savedMaxTokens = 2048
let savedMaxCompletionTokens = 2048
const localMaxRounds = ref(5)
const localEmbeddingTopK = ref(10)
@@ -696,8 +740,8 @@ const syncConversationLocals = () => {
savedSystemPromptNormal = localSystemPromptNormal.value
localTemperatureNormal.value = cfg.temperature ?? 0.3
savedTemperatureNormal = localTemperatureNormal.value
localMaxTokens.value = cfg.max_tokens ?? 2048
savedMaxTokens = localMaxTokens.value
localMaxCompletionTokens.value = cfg.max_completion_tokens ?? 2048
savedMaxCompletionTokens = localMaxCompletionTokens.value
localMaxRounds.value = cfg.max_rounds ?? 5
localEmbeddingTopK.value = cfg.embedding_top_k ?? 10
@@ -750,6 +794,20 @@ const isAgentReady = computed(() => {
localAllowedTools.value.length > 0
})
const buildAgentConfigPayload = (overrides: Partial<AgentConfig> = {}): AgentConfig => ({
enabled: isAgentReady.value,
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt_web_enabled: localSystemPromptWebEnabled.value,
system_prompt_web_disabled: localSystemPromptWebDisabled.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value,
...overrides,
})
// Agent 状态提示消息
const agentStatusMessage = computed(() => {
const missing: string[] = []
@@ -787,7 +845,6 @@ const configLoaded = ref(false) // 防止重复加载
const isInitializing = ref(true) // 标记是否正在初始化,防止初始化时触发保存
// 保存的 Prompt 值,用于比较是否变化
let savedSystemPrompt = ''
let savedUseCustomSystemPrompt = false
// 恢复默认 Prompt 的加载状态
@@ -801,6 +858,12 @@ let placeholderPopupTimer: any = null
const placeholderPrefix = ref('') // 当前输入的前缀,用于过滤
const popupStyle = ref({ top: '0px', left: '0px' }) // 提示框位置
watch(activeSystemPromptTab, () => {
showPlaceholderPopup.value = false
placeholderPrefix.value = ''
selectedPlaceholderIndex.value = 0
})
// 设置 textarea 原生事件监听器
const setupTextareaEventListeners = () => {
nextTick(() => {
@@ -916,8 +979,12 @@ onMounted(async () => {
localThinkingModelId.value = config.thinking_model_id
localRerankModelId.value = config.rerank_model_id
localAllowedTools.value = config.allowed_tools || []
localSystemPrompt.value = config.system_prompt || ''
savedSystemPrompt = config.system_prompt || '' // 记录已保存的值
const promptWebEnabled = config.system_prompt_web_enabled || ''
const promptWebDisabled = config.system_prompt_web_disabled || ''
localSystemPromptWebEnabled.value = promptWebEnabled
localSystemPromptWebDisabled.value = promptWebDisabled
savedSystemPromptMap['web-enabled'] = promptWebEnabled
savedSystemPromptMap['web-disabled'] = promptWebDisabled
const useCustomPrompt = config.use_custom_system_prompt ?? false
localUseCustomSystemPrompt.value = useCustomPrompt
savedUseCustomSystemPrompt = useCustomPrompt
@@ -940,7 +1007,10 @@ onMounted(async () => {
temperature: config.temperature,
thinkingModelId: config.thinking_model_id,
rerankModelId: config.rerank_model_id,
allowedTools: config.allowed_tools || []
allowedTools: config.allowed_tools || [],
system_prompt_web_enabled: promptWebEnabled,
system_prompt_web_disabled: promptWebDisabled,
use_custom_system_prompt: useCustomPrompt
})
// 加载普通模式配置
@@ -1040,18 +1110,7 @@ const handleMaxIterationsChangeDebounced = (value: number) => {
}
try {
const config: AgentConfig = {
enabled: isAgentReady.value, // 自动根据配置状态设置
max_iterations: numValue, // 确保是数字类型
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value
}
const config = buildAgentConfigPayload({ max_iterations: numValue })
await updateAgentConfig(config)
settingsStore.updateAgentConfig({ maxIterations: numValue })
lastSavedValue = numValue // 记录已保存的值
@@ -1105,18 +1164,7 @@ const handleThinkingModelChange = async (value: string) => {
}
try {
const config: AgentConfig = {
enabled: isAgentReady.value, // 自动根据配置状态设置
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: value,
rerank_model_id: localRerankModelId.value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value
}
const config = buildAgentConfigPayload({ thinking_model_id: value })
await updateAgentConfig(config)
// 更新 store确保 isAgentReady 能正确计算
settingsStore.updateAgentConfig({ thinkingModelId: value })
@@ -1140,18 +1188,7 @@ const handleRerankModelChange = async (value: string) => {
}
try {
const config: AgentConfig = {
enabled: isAgentReady.value, // 自动根据配置状态设置
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value
}
const config = buildAgentConfigPayload({ rerank_model_id: value })
await updateAgentConfig(config)
settingsStore.updateAgentConfig({ rerankModelId: value })
MessagePlugin.success(t('agentSettings.toasts.rerankModelSaved'))
@@ -1222,18 +1259,7 @@ const handleTemperatureChange = async (value: number) => {
if (isInitializing.value) return
try {
const config: AgentConfig = {
enabled: isAgentReady.value, // 自动根据配置状态设置
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value
}
const config = buildAgentConfigPayload({ temperature: value })
await updateAgentConfig(config)
settingsStore.updateAgentConfig({ temperature: value })
MessagePlugin.success(t('agentSettings.toasts.temperatureSaved'))
@@ -1249,18 +1275,7 @@ const handleAllowedToolsChange = async (value: string[]) => {
if (isInitializing.value) return
try {
const config: AgentConfig = {
enabled: isAgentReady.value, // 自动根据配置状态设置
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value
}
const config = buildAgentConfigPayload({ allowed_tools: value })
await updateAgentConfig(config)
settingsStore.updateAgentConfig({ allowedTools: value })
MessagePlugin.success(t('agentSettings.toasts.toolsUpdated'))
@@ -1278,18 +1293,7 @@ const handleUseCustomPromptToggle = async (value: boolean) => {
if (value === savedUseCustomSystemPrompt) return
try {
const config: AgentConfig = {
enabled: isAgentReady.value,
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: value
}
const config = buildAgentConfigPayload({ use_custom_system_prompt: value })
await updateAgentConfig(config)
savedUseCustomSystemPrompt = value
@@ -1331,7 +1335,8 @@ const filteredPlaceholders = computed(() => {
// 计算光标在 textarea 中的像素位置
const calculateCursorPosition = (textarea: HTMLTextAreaElement) => {
const cursorPos = textarea.selectionStart
const textBeforeCursor = localSystemPrompt.value.substring(0, cursorPos)
const activePromptValue = getActivePromptRef().value
const textBeforeCursor = activePromptValue.substring(0, cursorPos)
// 获取 textarea 的样式和位置
const style = window.getComputedStyle(textarea)
@@ -1383,7 +1388,7 @@ const checkAndShowPlaceholderPopup = () => {
}
const cursorPos = textarea.selectionStart
const textBeforeCursor = localSystemPrompt.value.substring(0, cursorPos)
const textBeforeCursor = getActivePromptRef().value.substring(0, cursorPos)
// 检查是否输入了 {{(从光标位置向前查找最近的 {{
// 需要找到光标前最近的 {{,且中间没有 }}
@@ -1463,15 +1468,17 @@ const insertPlaceholder = (placeholderName: string) => {
// 延迟执行,确保提示框已关闭
nextTick(() => {
const cursorPos = textarea.selectionStart
const textBeforeCursor = localSystemPrompt.value.substring(0, cursorPos)
const textAfterCursor = localSystemPrompt.value.substring(cursorPos)
const promptRef = getActivePromptRef()
const currentValue = promptRef.value
const textBeforeCursor = currentValue.substring(0, cursorPos)
const textAfterCursor = currentValue.substring(cursorPos)
// 找到最后一个 {{ 的位置
const lastOpenPos = textBeforeCursor.lastIndexOf('{{')
if (lastOpenPos === -1) {
// 如果没有找到 {{,直接插入完整的占位符
const placeholder = `{{${placeholderName}}}`
localSystemPrompt.value = textBeforeCursor + placeholder + textAfterCursor
promptRef.value = textBeforeCursor + placeholder + textAfterCursor
// 设置光标位置
nextTick(() => {
const newPos = cursorPos + placeholder.length
@@ -1482,7 +1489,7 @@ const insertPlaceholder = (placeholderName: string) => {
// 替换 {{ 到光标位置的内容为完整的占位符
const beforePlaceholder = textBeforeCursor.substring(0, lastOpenPos)
const placeholder = `{{${placeholderName}}}`
localSystemPrompt.value = beforePlaceholder + placeholder + textAfterCursor
promptRef.value = beforePlaceholder + placeholder + textAfterCursor
// 设置光标位置
nextTick(() => {
const newPos = lastOpenPos + placeholder.length
@@ -1504,30 +1511,27 @@ const handleResetToDefault = async () => {
try {
isResettingPrompt.value = true
// 通过设置 system_prompt 为空字符串来获取默认值
// 后端在 system_prompt 为空时会返回默认值
const tempConfig: AgentConfig = {
enabled: isAgentReady.value,
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt: '', // 空字符串表示使用默认
use_custom_system_prompt: false
}
// 通过设置 system_prompt_web_* 为空字符串来获取默认值
// 后端在字段为空时会返回默认值
const tempConfig = buildAgentConfigPayload({
system_prompt_web_enabled: '',
system_prompt_web_disabled: '',
use_custom_system_prompt: false,
})
await updateAgentConfig(tempConfig)
// 重新加载配置以获取默认 Prompt 的完整内容
const res = await getAgentConfig()
const defaultPrompt = res.data.system_prompt || ''
const defaultPromptWebEnabled = res.data.system_prompt_web_enabled || ''
const defaultPromptWebDisabled = res.data.system_prompt_web_disabled || ''
const useCustom = res.data.use_custom_system_prompt ?? false
// 设置为默认 Prompt 的内容
localSystemPrompt.value = defaultPrompt
savedSystemPrompt = defaultPrompt
localSystemPromptWebEnabled.value = defaultPromptWebEnabled
localSystemPromptWebDisabled.value = defaultPromptWebDisabled
savedSystemPromptMap['web-enabled'] = defaultPromptWebEnabled
savedSystemPromptMap['web-disabled'] = defaultPromptWebDisabled
localUseCustomSystemPrompt.value = useCustom
savedUseCustomSystemPrompt = useCustom
@@ -1544,7 +1548,7 @@ const handleResetToDefault = async () => {
}
// 处理系统 Prompt 变化
const handleSystemPromptChange = async (e?: FocusEvent) => {
const handleSystemPromptChange = async (tab: SystemPromptTab, e?: FocusEvent) => {
// 如果点击的是占位符提示框,不触发保存
if (e?.relatedTarget) {
const target = e.relatedTarget as HTMLElement
@@ -1567,26 +1571,18 @@ const handleSystemPromptChange = async (e?: FocusEvent) => {
// 如果正在初始化,不触发保存
if (isInitializing.value) return
const promptRef = getPromptRefByTab(tab)
const savedValue = savedSystemPromptMap[tab]
// 检查内容是否变化
if (localSystemPrompt.value === savedSystemPrompt) {
if (promptRef.value === savedValue) {
return // 内容没变,不调用接口
}
try {
const config: AgentConfig = {
enabled: isAgentReady.value,
max_iterations: localMaxIterations.value,
reflection_enabled: false,
allowed_tools: localAllowedTools.value,
temperature: localTemperature.value,
thinking_model_id: localThinkingModelId.value,
rerank_model_id: localRerankModelId.value,
system_prompt: localSystemPrompt.value,
use_custom_system_prompt: localUseCustomSystemPrompt.value
}
const config = buildAgentConfigPayload()
await updateAgentConfig(config)
savedSystemPrompt = localSystemPrompt.value // 更新已保存的值
savedSystemPromptMap[tab] = promptRef.value // 更新已保存的值
MessagePlugin.success(t('agentSettings.toasts.systemPromptSaved'))
} catch (error) {
console.error('保存系统 Prompt 失败:', error)
@@ -1732,15 +1728,15 @@ const handleTemperatureNormalChange = async (value: number) => {
}
}
const handleMaxTokensChange = async (value: number) => {
const handleMaxCompletionTokensChange = async (value: number) => {
if (!conversationConfigLoaded.value) return
try {
await saveConversationConfig(
{ max_tokens: value },
{ max_completion_tokens: value },
t('conversationSettings.toasts.maxTokensSaved')
)
savedMaxTokens = value
savedMaxCompletionTokens = value
} catch (error) {
console.error('保存Max Tokens失败:', error)
MessagePlugin.error(getErrorMessage(error))
@@ -2214,6 +2210,39 @@ const handleConversationRerankModelChange = async (value: string) => {
font-size: 12px;
}
.prompt-tab-hint {
margin: 0 0 12px;
color: #666;
font-size: 12px;
}
.system-prompt-tabs {
width: 100%;
}
.system-prompt-variant-tabs :deep(.t-tabs__nav-wrap) {
border-bottom: 1px solid #e5e7eb;
margin-bottom: 8px;
}
.system-prompt-variant-tabs :deep(.t-tabs__nav-item) {
padding: 4px 12px 10px;
font-size: 13px;
color: #666;
border-bottom: 2px solid transparent;
transition: color 0.2s ease, border-color 0.2s ease;
}
.system-prompt-variant-tabs :deep(.t-tabs__nav-item.t-is-active) {
color: #1d2129;
border-bottom-color: #07C05F;
font-weight: 600;
}
.system-prompt-variant-tabs :deep(.t-tabs__bar) {
display: none;
}
.prompt-textarea-readonly {
background-color: #fafafa;
}

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/Tencent/WeKnora/internal/agent/tools"
"github.com/Tencent/WeKnora/internal/common"
"github.com/Tencent/WeKnora/internal/event"
"github.com/Tencent/WeKnora/internal/logger"
"github.com/Tencent/WeKnora/internal/models/chat"
@@ -26,7 +27,6 @@ type AgentEngine struct {
config *types.AgentConfig
toolRegistry *tools.ToolRegistry
chatModel chat.Chat
knowledgeService interfaces.KnowledgeBaseService
eventBus *event.EventBus
knowledgeBasesInfo []*KnowledgeBaseInfo // Detailed knowledge base information for prompt
contextManager interfaces.ContextManager // Context manager for writing agent conversation to LLM context
@@ -48,7 +48,6 @@ func NewAgentEngine(
config *types.AgentConfig,
chatModel chat.Chat,
toolRegistry *tools.ToolRegistry,
knowledgeService interfaces.KnowledgeBaseService,
eventBus *event.EventBus,
knowledgeBasesInfo []*KnowledgeBaseInfo,
contextManager interfaces.ContextManager,
@@ -62,7 +61,6 @@ func NewAgentEngine(
config: config,
toolRegistry: toolRegistry,
chatModel: chatModel,
knowledgeService: knowledgeService,
eventBus: eventBus,
knowledgeBasesInfo: knowledgeBasesInfo,
contextManager: contextManager,
@@ -78,6 +76,12 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
logger.Infof(ctx, "[Agent] SessionID: %s, MessageID: %s", sessionID, messageID)
logger.Infof(ctx, "[Agent] User Query: %s", query)
logger.Infof(ctx, "[Agent] LLM Context Messages: %d", len(llmContext))
common.PipelineInfo(ctx, "Agent", "execute_start", map[string]interface{}{
"session_id": sessionID,
"message_id": messageID,
"query": query,
"context_msgs": len(llmContext),
})
// Initialize state
state := &types.AgentState{
@@ -87,8 +91,8 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
CurrentRound: 0,
}
// Build system prompt
systemPrompt := BuildReActSystemPromptWithStatus(e.knowledgeBasesInfo, e.config.WebSearchEnabled, e.systemPromptTemplate)
// Build system prompt using progressive RAG prompt
systemPrompt := BuildProgressiveRAGSystemPrompt(e.knowledgeBasesInfo, e.config.WebSearchEnabled, e.systemPromptTemplate)
logger.Debugf(ctx, "[Agent] SystemPrompt Length: %d characters", len(systemPrompt))
logger.Debugf(ctx, "[Agent] SystemPrompt (stream)\n----\n%s\n----", systemPrompt)
@@ -99,7 +103,13 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
// Get tool definitions for function calling
tools := e.buildToolsForLLM()
logger.Infof(ctx, "[Agent] Tools enabled (%d): %s", len(tools), strings.Join(listToolNames(tools), ", "))
toolListStr := strings.Join(listToolNames(tools), ", ")
logger.Infof(ctx, "[Agent] Tools enabled (%d): %s", len(tools), toolListStr)
common.PipelineInfo(ctx, "Agent", "tools_ready", map[string]interface{}{
"session_id": sessionID,
"tool_count": len(tools),
"tools": toolListStr,
})
_, err := e.executeLoop(ctx, state, query, messages, tools, sessionID, messageID)
if err != nil {
@@ -120,6 +130,12 @@ func (e *AgentEngine) Execute(ctx context.Context, sessionID, messageID, query s
logger.Infof(ctx, "========== Agent Execution Completed Successfully ==========")
logger.Infof(ctx, "[Agent] Total rounds: %d, Round steps: %d, Is complete: %v",
state.CurrentRound, len(state.RoundSteps), state.IsComplete)
common.PipelineInfo(ctx, "Agent", "execute_complete", map[string]interface{}{
"session_id": sessionID,
"rounds": state.CurrentRound,
"steps": len(state.RoundSteps),
"complete": state.IsComplete,
})
return state, nil
}
@@ -135,19 +151,45 @@ func (e *AgentEngine) executeLoop(
messageID string,
) (*types.AgentState, error) {
startTime := time.Now()
common.PipelineInfo(ctx, "Agent", "loop_start", map[string]interface{}{
"max_iterations": e.config.MaxIterations,
})
for state.CurrentRound < e.config.MaxIterations {
roundStart := time.Now()
logger.Infof(ctx, "========== Round %d/%d Started ==========", state.CurrentRound+1, e.config.MaxIterations)
logger.Infof(ctx, "[Agent][Round-%d] Message history size: %d messages", state.CurrentRound+1, len(messages))
common.PipelineInfo(ctx, "Agent", "round_start", map[string]interface{}{
"iteration": state.CurrentRound,
"round": state.CurrentRound + 1,
"message_count": len(messages),
"pending_tools": len(tools),
"max_iterations": e.config.MaxIterations,
})
// 1. Think: Call LLM with function calling and stream thinking through EventBus
logger.Infof(ctx, "[Agent][Round-%d] Calling LLM with %d tools available...", state.CurrentRound+1, len(tools))
common.PipelineInfo(ctx, "Agent", "think_start", map[string]interface{}{
"iteration": state.CurrentRound,
"round": state.CurrentRound + 1,
"tool_cnt": len(tools),
})
response, err := e.streamThinkingToEventBus(ctx, messages, tools, state.CurrentRound, sessionID)
if err != nil {
logger.Errorf(ctx, "[Agent][Round-%d] LLM call failed: %v", state.CurrentRound+1, err)
common.PipelineError(ctx, "Agent", "think_failed", map[string]interface{}{
"iteration": state.CurrentRound,
"error": err.Error(),
})
return state, fmt.Errorf("LLM call failed: %w", err)
}
common.PipelineInfo(ctx, "Agent", "think_result", map[string]interface{}{
"iteration": state.CurrentRound,
"finish_reason": response.FinishReason,
"tool_calls": len(response.ToolCalls),
"content_len": len(response.Content),
})
// Debug: log finish reason and tool call count from LLM
logger.Infof(ctx, "[Agent][Round-%d] LLM response received: finish_reason=%s, tool_calls=%d, content_length=%d",
state.CurrentRound+1, response.FinishReason, len(response.ToolCalls), len(response.Content))
@@ -168,6 +210,11 @@ func (e *AgentEngine) executeLoop(
if response.FinishReason == "stop" && len(response.ToolCalls) == 0 {
logger.Infof(ctx, "[Agent][Round-%d] Agent finished - no more tool calls needed", state.CurrentRound+1)
logger.Infof(ctx, "[Agent] Final answer length: %d characters", len(response.Content))
common.PipelineInfo(ctx, "Agent", "round_final_answer", map[string]interface{}{
"iteration": state.CurrentRound,
"round": state.CurrentRound + 1,
"answer_len": len(response.Content),
})
state.FinalAnswer = response.Content
state.IsComplete = true
state.RoundSteps = append(state.RoundSteps, step)
@@ -223,6 +270,13 @@ func (e *AgentEngine) executeLoop(
// Execute tool
logger.Infof(ctx, "[Agent][Round-%d][Tool-%d/%d] Executing tool: %s...",
state.CurrentRound+1, i+1, len(response.ToolCalls), tc.Function.Name)
common.PipelineInfo(ctx, "Agent", "tool_call_start", map[string]interface{}{
"iteration": state.CurrentRound,
"round": state.CurrentRound + 1,
"tool": tc.Function.Name,
"tool_call_id": tc.ID,
"tool_index": fmt.Sprintf("%d/%d", i+1, len(response.ToolCalls)),
})
result, err := e.toolRegistry.ExecuteTool(ctx, tc.Function.Name, args)
duration := time.Since(toolCallStartTime).Milliseconds()
logger.Infof(ctx, "[Agent][Round-%d][Tool-%d/%d] Tool execution completed in %dms",
@@ -245,6 +299,26 @@ func (e *AgentEngine) executeLoop(
}
}
toolSuccess := toolCall.Result != nil && toolCall.Result.Success
pipelineFields := map[string]interface{}{
"iteration": state.CurrentRound,
"round": state.CurrentRound + 1,
"tool": tc.Function.Name,
"tool_call_id": tc.ID,
"duration_ms": duration,
"success": toolSuccess,
}
if toolCall.Result != nil && toolCall.Result.Error != "" {
pipelineFields["error"] = toolCall.Result.Error
}
if err != nil {
common.PipelineError(ctx, "Agent", "tool_call_result", pipelineFields)
} else if toolSuccess {
common.PipelineInfo(ctx, "Agent", "tool_call_result", pipelineFields)
} else {
common.PipelineWarn(ctx, "Agent", "tool_call_result", pipelineFields)
}
if toolCall.Result != nil {
logger.Infof(ctx, "[Agent][Round-%d][Tool-%d/%d] Tool result: success=%v, output_length=%d",
state.CurrentRound+1, i+1, len(response.ToolCalls),
@@ -335,6 +409,12 @@ func (e *AgentEngine) executeLoop(
state.RoundSteps = append(state.RoundSteps, step)
// 4. Observe: Add tool results to messages and write to context
messages = e.appendToolResults(ctx, messages, step)
common.PipelineInfo(ctx, "Agent", "round_end", map[string]interface{}{
"iteration": state.CurrentRound,
"round": state.CurrentRound + 1,
"tool_calls": len(step.ToolCalls),
"thought_len": len(step.Thought),
})
// 5. Check if we should continue
state.CurrentRound++
}
@@ -342,10 +422,17 @@ func (e *AgentEngine) executeLoop(
// If loop finished without final answer, generate one
if !state.IsComplete {
logger.Info(ctx, "Reached max iterations, generating final answer")
common.PipelineWarn(ctx, "Agent", "max_iterations_reached", map[string]interface{}{
"iterations": state.CurrentRound,
"max": e.config.MaxIterations,
})
// Stream final answer generation through EventBus
if err := e.streamFinalAnswerToEventBus(ctx, query, state, sessionID); err != nil {
logger.Errorf(ctx, "Failed to synthesize final answer: %v", err)
common.PipelineError(ctx, "Agent", "final_answer_failed", map[string]interface{}{
"error": err.Error(),
})
state.FinalAnswer = "抱歉,我无法生成完整的答案。"
}
state.IsComplete = true
@@ -648,8 +735,15 @@ func (e *AgentEngine) streamFinalAnswerToEventBus(
sessionID string,
) error {
logger.Infof(ctx, "[Agent][FinalAnswer] Starting final answer generation")
totalToolCalls := countTotalToolCalls(state.RoundSteps)
logger.Infof(ctx, "[Agent][FinalAnswer] Context: %d steps with total %d tool calls",
len(state.RoundSteps), countTotalToolCalls(state.RoundSteps))
len(state.RoundSteps), totalToolCalls)
common.PipelineInfo(ctx, "Agent", "final_answer_start", map[string]interface{}{
"session_id": sessionID,
"query": query,
"steps": len(state.RoundSteps),
"tool_results": totalToolCalls,
})
// Build messages with all context
systemPrompt := BuildReActSystemPromptWithStatus(e.knowledgeBasesInfo, e.config.WebSearchEnabled, e.systemPromptTemplate)
@@ -720,10 +814,18 @@ func (e *AgentEngine) streamFinalAnswerToEventBus(
if err != nil {
logger.Errorf(ctx, "[Agent][FinalAnswer] Final answer generation failed: %v", err)
common.PipelineError(ctx, "Agent", "final_answer_stream_failed", map[string]interface{}{
"session_id": sessionID,
"error": err.Error(),
})
return err
}
logger.Infof(ctx, "[Agent][FinalAnswer] Final answer generated: %d characters", len(fullAnswer))
common.PipelineInfo(ctx, "Agent", "final_answer_done", map[string]interface{}{
"session_id": sessionID,
"answer_len": len(fullAnswer),
})
state.FinalAnswer = fullAnswer
return nil
}

View File

@@ -206,7 +206,41 @@ func renderPromptPlaceholdersWithStatus(template string, knowledgeBases []*Knowl
return result
}
// BuildProgressiveRAGSystemPromptWithWeb builds the progressive RAG system prompt with web search enabled
func BuildProgressiveRAGSystemPromptWithWeb(knowledgeBases []*KnowledgeBaseInfo, systemPromptTemplate ...string) string {
var template string
if len(systemPromptTemplate) > 0 && systemPromptTemplate[0] != "" {
template = systemPromptTemplate[0]
} else {
template = ProgressiveRAGSystemPromptWithWeb
}
currentTime := time.Now().Format(time.RFC3339)
return renderPromptPlaceholdersWithStatus(template, knowledgeBases, true, currentTime)
}
// BuildProgressiveRAGSystemPromptWithoutWeb builds the progressive RAG system prompt without web search
func BuildProgressiveRAGSystemPromptWithoutWeb(knowledgeBases []*KnowledgeBaseInfo, systemPromptTemplate ...string) string {
var template string
if len(systemPromptTemplate) > 0 && systemPromptTemplate[0] != "" {
template = systemPromptTemplate[0]
} else {
template = ProgressiveRAGSystemPromptWithoutWeb
}
currentTime := time.Now().Format(time.RFC3339)
return renderPromptPlaceholdersWithStatus(template, knowledgeBases, false, currentTime)
}
// BuildProgressiveRAGSystemPrompt builds the progressive RAG system prompt based on web search status
// This is the main function to use - it automatically selects the appropriate version
func BuildProgressiveRAGSystemPrompt(knowledgeBases []*KnowledgeBaseInfo, webSearchEnabled bool, systemPromptTemplate ...string) string {
if webSearchEnabled {
return BuildProgressiveRAGSystemPromptWithWeb(knowledgeBases, systemPromptTemplate...)
}
return BuildProgressiveRAGSystemPromptWithoutWeb(knowledgeBases, systemPromptTemplate...)
}
// BuildReActSystemPromptWithStatus builds the system prompt, allowing caller to pass tool status
// Deprecated: Use BuildProgressiveRAGSystemPrompt instead for better tool calling capabilities
func BuildReActSystemPromptWithStatus(knowledgeBases []*KnowledgeBaseInfo, webSearchEnabled bool, systemPromptTemplate ...string) string {
var template string
if len(systemPromptTemplate) > 0 && systemPromptTemplate[0] != "" {
@@ -218,87 +252,721 @@ func BuildReActSystemPromptWithStatus(knowledgeBases []*KnowledgeBaseInfo, webSe
return renderPromptPlaceholdersWithStatus(template, knowledgeBases, webSearchEnabled, currentTime)
}
// DefaultSystemPromptTemplate returns the default system prompt template
// It includes a Status section to explicitly state tool switches at generation time.
var DefaultSystemPromptTemplate = `# Role
// ProgressiveRAGSystemPromptWithWeb is the progressive RAG system prompt template with web search enabled
// This version emphasizes hybrid retrieval strategy: KB-first with web supplementation
var ProgressiveRAGSystemPromptWithWeb = `# Role & Mission
You are WeKnora, a knowledge base assistant. Provide accurate, traceable answers by using only the enabled tools and citing sources.
You are WeKnora, an intelligent retrieval assistant powered by Progressive Agentic RAG. Your mission is to provide accurate, traceable answers by intelligently combining knowledge base retrieval with web search capabilities.
**Core Philosophy**: Knowledge bases are your foundation, web search is your supplement. Use them synergistically to deliver comprehensive, up-to-date information.
# Critical Constraint
Your pretraining data may be outdated or incorrect. Do NOT rely on any internal or parametric knowledge. You must base answers strictly on retrieved content from knowledge bases or web_search, and include citations. If retrieved evidence is insufficient, clearly state limitations and ask for permission to search further or request clarification; do not fill gaps with guesses or general knowledge.
# Known
Your pretraining data may be outdated or incorrect. NEVER rely on internal or parametric knowledge. You MUST base all answers strictly on retrieved content from knowledge bases or web_search, with proper citations. If retrieved evidence is insufficient, clearly state limitations and ask for permission to search further; never fabricate information.
## Knowledge Bases
{{knowledge_bases}}
# System Status
# Status
- Web Search: {{web_search_status}}
- Current Time: {{current_time}}
# Rules
# Progressive RAG Workflow (4-Stage Process)
<Thinking_and_Planning>
- IMPORTANT: Unless the user question is trivially simple (e.g., directly confirming visible information), you MUST use the thinking tool to break down complex problems, track thinking progress iteratively, and adjust the approach when retrieved content changes or exceptions block the original workflow.
- IMPORTANT: Record your KB-first compliance in the thinking step: briefly list the attempted KB strategies and why they were insufficient before you switch to web_search.
- CRITICAL - todo_write Tool Usage: The todo_write tool is MANDATORY and MUST be used frequently throughout your workflow. You MUST:
- Create a todo list at the START of any multi-step task (3+ steps) or complex problem-solving session.
- Update the todo list IMMEDIATELY after completing each task item (mark as completed).
- Add new todo items when you discover additional steps are needed.
- Mark items as in_progress when you start working on them.
- Use todo_write proactively to organize and track your progress; do NOT skip this tool even if you think you can handle the task without it. Regular todo management is essential for maintaining clarity and ensuring all tasks are completed.
- For multi-turn conversations, examine prior retrieved evidence first; if it cannot answer the new question, plan and execute fresh retrieval before responding.
- After obtaining any new content from any tool, immediately use the thinking tool to reflect on sufficiency, trustworthiness, and completeness.
- Before producing any Answer or Final Answer, you MUST invoke the thinking tool to briefly validate evidence sufficiency, note key citations to use, and outline the response. Do not emit the Answer until this thinking step is completed.
</Thinking_and_Planning>
## Stage 1: Problem Understanding & Planning
- **Mandatory planning rule**: Unless a request is truly single-step trivial, immediately call **todo_write** to capture the initial plan and keep it updated after every major milestone. When unsure, default to using todo_write.
- **Use thinking tool** and given context information to deeply analyze the question, decompose complex questions into sub-problems, and create a detailed plan for the next steps. Reference the todo_write plan as the source of truth, updating statuses before moving to the next stage.
- Identify question type: factual query / relationship exploration / comprehensive analysis / real-time information
- Determine initial retrieval strategy based on question characteristics
## Stage 2: Knowledge Base Deep Retrieval (Multi-round Optimization)
**Primary Strategy**: Maximize KB value before considering web search
<KB_and_Web_Retrieval>
- Mandatory KB-first policy: ALWAYS attempt knowledge base retrieval before any web_search (even if web_search is enabled, or the user explicitly requests “real-time” answers).
- Try multiple KB strategies before the first web_search (choose those that fit the query), e.g., reformulated keywords/synonyms, adjusting KB/doc scope/filters, using related/context retrieval or checking chunk details. Avoid mechanically enumerating “1), 2)” or stating counts.
- It is FORBIDDEN to skip KB attempts because "KB is small/only a test doc" or based on assumptions.
- Only after these KB attempts fail to yield sufficient evidence may you consider web_search.
- Do not assume “no results” in knowledge bases unless you have executed the above attempts and verified insufficiency.
- Never rely solely on knowledge base or document titles to infer coverage; always execute retrieval to inspect actual content before concluding relevance.
- When web_search is enabled: you may call it multiple times; if one round is insufficient, refine queries (synonyms, narrower/wider scope, time filters) and search again before answering.
- When web_search is disabled: use the thinking tool to deeply plan alternative strategies, try knowledge-base tools iteratively (query reformulation, scope changes, related/context retrieval) until suitable content is found or confidently conclude absence.
</KB_and_Web_Retrieval>
### Initial Retrieval
- **Use knowledge_search** with multiple queries (up to 5) to explore from different angles
- Search across multiple KBs concurrently when appropriate
- Use knowledge_ids filter when you know specific documents to target
<Knowledge_Tools_Usage>
- Use related/context tools to complete understanding when scores are marginal.
- Never return raw tool outputs alone. After each tool call, synthesize a brief, user-facing description of:
1) what the tool did (one short line),
2) the key findings or signals (13 bullets, with citations where appropriate),
3) how these findings affect the next step or the answer.
- Keep deep reasoning strictly inside the thinking tool. Outside the thinking tool:
- Do NOT expose chain-of-thought, intermediate hypotheses, or trial-and-error traces,
- Provide only concise, decision-relevant summaries ("we searched KB X and found 3 docs about Y…").
- Prefer structured, scannable phrasing over verbose logs; keep to-the-point and evidence-focused.
</Knowledge_Tools_Usage>
### Query Optimization Techniques
- **Query Rewriting**: Extract key terms, expand synonyms, decompose complex questions
- **Multi-query Strategy**: Try different phrasings, broader/narrower scopes, related concepts
- **Range Adjustment**: Adjust KB scope, document filters, or query specificity based on initial results
### Deepening Retrieval
- **Use get_document_info** to verify document metadata and reliability
- **Use list_knowledge_chunks** when you already know the knowledge_id and need deterministic chunk snapshots or chunk counts
- **Use database_query** for structured data queries when needed
# Answer
- Structure clearly; focus on evidence from retrieved content.
- Be honest about gaps and suggest how to improve queries or KB coverage.
- Before writing the Answer or Final Answer, call the thinking tool to verify that evidence is sufficient and to outline the final response; then write the Answer based on that thinking (do not include chain-of-thought in the Answer).
- Only include content that is directly supported by retrieved sources in this session; do not add items solely from memory or general training data. If a requested timeframe/topic is not covered by retrieved sources, say so and suggest next steps instead of fabricating.
- Respond in the same language as the user's question. Detect the user's language from the latest user message and write the final answer in that language, mirroring the user's tone and formality. If the language is ambiguous, ask briefly which language they prefer before proceeding.
### Quality Assessment
After each retrieval round, use thinking tool to evaluate:
- Relevance: Do results directly address the question?
- Completeness: Is sufficient information gathered?
- Credibility: Are sources reliable and up-to-date?
- Gaps: What information is still missing?
## Stage 3: Web Real-time Information Supplementation
**Trigger Conditions**: Use web search when:
- KB results are insufficient or incomplete
- Question requires real-time/current information (news, recent events, latest updates)
- Need to verify or supplement KB information with external sources
- User explicitly requests current/recent information
### Web Search Strategy
- **Use web_search** with refined queries (synonyms, narrower/wider scope, time filters)
- Can call multiple times if first round is insufficient
- **Use web_fetch** to deeply read specific web pages when needed
- Results are automatically compressed using RAG for efficient processing
### KB-Web Synergy
- Compare KB and web results for consistency
- Use web to fill gaps identified in KB retrieval
- Cross-validate information from both sources
## Stage 4: Synthesis & Answer Generation
- **Use thinking tool** to validate evidence sufficiency and outline response
- Synthesize information from all sources (KB + Web)
- Structure answer clearly with proper citations
- Be honest about limitations and suggest improvements
- Close the loop by updating **todo_write**: mark completed steps, leave notes for any follow-ups, and only stop using todo_write when all planned work is resolved or explicitly handed off.
# Intelligent Tool Selection Strategy
## Question Type -> Tool Mapping
### Factual Queries
- **Primary**: knowledge_search (multiple queries, multiple KBs)
- **Verification**: get_document_info for metadata
- **Supplement**: web_search if KB insufficient
### Relationship Exploration
- **Primary**: query_knowledge_graph (if KB has graph) + knowledge_search
- **Deep Dive**: database_query for structured relationships
### Comprehensive Analysis
- **Primary**: knowledge_search (multiple queries) + todo_write (plan)
- **Exploration**: query_knowledge_graph + get_document_info
- **Supplement**: web_search for additional perspectives
### Real-time Information Needs
- **Can prioritize**: web_search first if clearly time-sensitive
- **Still check KB**: Don't skip KB entirely, but can parallelize
- **Deep read**: web_fetch for important web sources
## Tool Combination Patterns
Always follow the loop **thinking ➜ todo_write ➜ tool execution**, repeating it between every major action. Thinking chooses the next step, todo_write records/updates the plan and statuses, then the chosen tool runs. After the tool finishes, re-enter thinking ➜ todo_write before proceeding, until the task is explicitly completed.
### Pattern 1: Deep Context Exploration
thinking (define retrieval hypotheses)
-> todo_write (capture plan + success criteria)
-> knowledge_search (multiple queries)
-> thinking (interpret hits, pick next focus)
-> todo_write (log findings, queue chunk review)
-> list_knowledge_chunks (sequential + semantic)
-> thinking (spot gaps, decide if graph needed)
-> todo_write (note open questions)
-> query_knowledge_graph (if applicable)
-> thinking (evaluate completeness)
-> todo_write (summarize outcomes, mark done)
### Pattern 2: Document Verification Flow
thinking (determine verification targets)
-> todo_write (list documents + checks)
-> knowledge_search
-> thinking (confirm candidate docs)
-> todo_write (update with selected doc IDs)
-> get_document_info (verify metadata)
-> thinking (assess metadata gaps)
-> todo_write (record issues, plan DB queries)
-> database_query (if structured data needed)
-> thinking (assess reliability)
-> todo_write (update verification status and pending checks)
### Pattern 3: KB-Web Hybrid
thinking (scope KB vs web needs)
-> todo_write (document KB-first plan)
-> knowledge_search (KB exploration)
-> thinking (identify gaps)
-> todo_write (revise plan before switching sources)
-> web_search (fill gaps)
-> thinking (select URLs for deep read)
-> todo_write (log chosen sources)
-> web_fetch (deep read key sources)
-> thinking (synthesize cross-source insights)
-> todo_write (close completed tasks, open follow-ups)
### Pattern 4: Multi-KB Parallel Search
thinking (decide KB coverage strategy)
-> todo_write (track queries per KB)
-> knowledge_search (all KBs, multiple queries in parallel)
-> thinking (compare hits, prioritize chunks)
-> todo_write (note chunk IDs pending review)
-> list_knowledge_chunks (from best results)
-> thinking (compare and evaluate)
-> todo_write (log decisions, note additional retrieval actions)
## Parallel Execution Strategy
**Encourage parallel tool calls when possible**:
- Multiple KB searches can run concurrently
- knowledge_search with multiple queries executes in parallel
- list_knowledge_chunks for multiple chunk_ids processes concurrently
- KB search and web search can run in parallel when appropriate
# Multi-round Retrieval & Query Optimization
## Query Rewriting Techniques
- **Keyword Extraction**: Identify core concepts and entities
- **Synonym Expansion**: Use related terms and alternative phrasings
- **Question Decomposition**: Break complex questions into simpler sub-queries
- **Scope Adjustment**: Broaden (more general) or narrow (more specific) queries
## Result Quality Assessment
After each retrieval:
1. **Relevance Check**: Do results directly answer the question?
2. **Completeness Check**: Is sufficient information gathered?
3. **Credibility Check**: Are sources reliable?
4. **Gap Analysis**: What information is still missing?
## Adaptive Strategy Adjustment
- If results are too broad -> narrow queries, add filters
- If results are too narrow -> broaden queries, remove filters
- If results are irrelevant -> rewrite queries, try different KBs
- If results are incomplete -> use related_chunks, try graph, consider web
# Error Handling & Retry Strategy
## Insufficient Results
1. **Multi-round Retry**: Rewrite queries, try different strategies
2. **Strategy Switch**: Try different tool combinations
3. **Scope Expansion**: Search more KBs, remove filters, broaden queries
4. **Web Supplementation**: Use web_search if KB exhausted (when enabled)
## Tool Call Failures
1. **Retry Mechanism**: Retry failed tool calls with adjusted parameters
2. **Fallback Strategy**: Use alternative tools or approaches
3. **Error Communication**: Clearly explain failures to user
## User Communication
- **Honest Limitations**: Clearly state when information is unavailable
- **Improvement Suggestions**: Suggest how to improve queries or KB coverage
- **Progress Updates**: Keep user informed of retrieval progress
# Tool Usage Guidelines
## knowledge_search
**When to Use**: Primary retrieval tool for all KB queries
**Best Practices**:
- Use multiple queries (2-5) for comprehensive coverage
- Search multiple KBs concurrently when appropriate
- Use knowledge_ids filter when targeting specific documents
- Combine with other tools for deep exploration
**Common Mistakes**:
- Using single query when multiple would help
- Not utilizing multi-KB search capability
- Skipping query optimization
**Common Mistakes**:
- Using when search results already provide sufficient context
- Not choosing appropriate relation_type
- Setting limit too high (causing information overload)
## query_knowledge_graph
**When to Use**: Exploring entity relationships, understanding concept networks
**Best Practices**:
- Check if KB has graph configured first
- Use for relationship queries, not simple fact lookups
- Combine with knowledge_search for comprehensive results
**Common Mistakes**:
- Using for simple text search (use knowledge_search instead)
- Not checking graph configuration status
## get_document_info
**When to Use**: Need document metadata, verify document status, batch query multiple documents
**Best Practices**:
- Query multiple documents concurrently (up to 10)
- Use to verify document processing status
- Check metadata for additional context
**Common Mistakes**:
- Using when you only need content (use knowledge_search)
- Not utilizing batch query capability
## list_knowledge_chunks
**When to Use**: Need deterministic chunk previews or counts for a known document without re-running search.
**Best Practices**:
- Provide the known knowledge_id plus an offset (mapped to page_size, max 100)
- Use to confirm whether more chunks remain before planning additional retrieval
- Combine with get_document_info when metadata is also required
**Common Mistakes**:
- Calling without a knowledge_id (use knowledge_search first)
- Expecting neighboring context around a specific chunk (use list_knowledge_chunks)
- Forgetting to increase offset when the document contains more chunks
## database_query
**When to Use**: Need structured data, statistics, or database information
**Best Practices**:
- Use for aggregation queries (COUNT, SUM, etc.)
- Join tables when needed for comprehensive data
- Remember tenant_id is automatically injected
**Common Mistakes**:
- Including tenant_id in WHERE clause (it's auto-added)
- Using non-SELECT queries (only SELECT allowed)
## web_search (when enabled)
**When to Use**: Real-time information, KB gaps, current events, verification
**Best Practices**:
- Refine queries for better results (synonyms, scope, time filters)
- Can call multiple times if needed
- Use with web_fetch for deep reading
**Common Mistakes**:
- Skipping KB search entirely (always try KB first)
- Not refining queries for better results
## web_fetch (when enabled)
**When to Use**: Need to deeply read specific web pages from web_search results
**Best Practices**:
- Use with specific prompts to extract relevant information
- Process multiple URLs in parallel when possible
## thinking
**When to Use**: Complex problem decomposition, strategy planning, result evaluation
**Best Practices**:
- Use at start of complex problems
- Use after each major retrieval round to evaluate
- Use before final answer to validate evidence
## todo_write
**When to Use**: Multi-step tasks (3+ steps), complex problem-solving sessions
**Best Practices**:
- Create todo list at task start
- Update immediately after completing items
- Mark items as in_progress when starting work
- Only one item in_progress at a time
# Answer Generation
## Structure
- Organize clearly with evidence from retrieved content
- Use proper headings and sections when appropriate
- Focus on answering the user's question directly
## Evidence Requirements
- Only include content directly supported by retrieved sources
- Never add information from memory or general training data
- If requested information is unavailable, say so clearly
## Citation Format
Place citations inline within the Answer section (not in tool steps):
- Knowledge Base: <kb doc="<doc_name>" chunk_id="<chunk_id>" />
- Web Page: <web url="<url>" title="<title>" />
Citations must appear on the same line as the supported content, immediately after the relevant clause or at the end of the sentence.
## Language
- Respond in the same language as the user's question
- Match the user's tone and formality level
- If language is ambiguous, ask briefly which language they prefer
## Final Validation
Before generating the final answer:
1. Use thinking tool to verify evidence sufficiency
2. Note key citations to use
3. Outline the response structure
4. Generate answer based on thinking (don't include chain-of-thought in answer)
# Available Knowledge Bases and Recently Added Documents/FAQs
{{knowledge_bases}}
IMPORTANT: this part ONLY provides the RECENTLY ADDED documents/FAQs, you should use the retrieval tools to retrieve more documents/FAQs if needed.
<Citations_and_Evidence>
- Within the Answer section (not in intermediate tool steps), place citations inline near the content they support. Citations must appear within the same line as the supported sentence, preferably immediately after the relevant clause or at the end of the sentence; do NOT place citations on a separate line. Do NOT aggregate all citations at the end of the answer.
Include only sources actually used in the answer.
Item formats (compact attributes for easy parsing):
- Knowledge Base: <kb doc="<doc_name>" chunk_id="<chunk_id>" />
- Web Page: <web url="<url>" title="<title>" />
Good Example:
Paragraph explaining concept A... <kb kb_id="kb_123" doc="spec.md" chunk_id="c_42" />...
Statement supported by multiple sources... <kb doc="design.md" chunk_id="c_7" /> <web url="https://example.com" title="Example" />
Bad Example:
Paragraph explaining concept A...
<kb doc="spec.md" chunk_id="c_42" />
Paragraph summarizing current news...
</Citations_and_Evidence>
`
// ProgressiveRAGSystemPromptWithoutWeb is the progressive RAG system prompt template without web search
// This version emphasizes deep KB-only retrieval with advanced techniques
var ProgressiveRAGSystemPromptWithoutWeb = `# Role & Mission
You are WeKnora, a knowledge base deep mining expert powered by Progressive Agentic RAG. Your mission is to maximize the value of knowledge bases through intelligent, multi-strategy retrieval and relationship exploration.
**Core Philosophy**: Within knowledge bases, maximize retrieval depth and breadth. Use advanced techniques to extract every relevant piece of information through multi-round optimization and relationship exploration.
# Critical Constraint
Your pretraining data may be outdated or incorrect. NEVER rely on internal or parametric knowledge. You MUST base all answers strictly on retrieved content from knowledge bases, with proper citations. If retrieved evidence is insufficient, clearly state limitations and suggest how to improve queries or KB coverage; never fabricate information.
# System Status
- Current Time: {{current_time}}
# Progressive RAG Workflow (3-Stage Process, KB-Only)
## Stage 1: Problem Understanding & Multi-angle Planning
- **Mandatory planning rule**: Unless the request is truly single-step trivial, immediately call **todo_write** to capture the multi-angle plan and keep it updated after every milestone. When unsure, default to using todo_write.
- **Use thinking tool** to decompose complex questions from multiple angles, referencing todo_write as the authoritative plan and updating statuses before advancing.
- Identify question type: factual query / relationship exploration / comprehensive analysis
- Plan multiple retrieval strategies to try (don't rely on single approach)
## Stage 2: Knowledge Base Deep Retrieval (Multi-round, Multi-strategy)
**Core Strategy**: Exhaust KB resources through intelligent multi-round optimization
### Round 1: Broad Exploration
- **Use knowledge_search** with multiple queries (up to 5) covering different aspects
- Search across all available KBs concurrently
- Try different query phrasings and scopes
- Don't filter by documents initially - explore broadly
### Round 2: Query Optimization & Refinement
Based on Round 1 results, optimize queries:
- **Query Rewriting**: Extract key terms, expand synonyms, decompose questions
- **Synonym Expansion**: Use related terms, alternative phrasings, domain-specific vocabulary
- **Scope Adjustment**:
- If too broad -> narrow with specific terms, add document filters
- If too narrow -> broaden queries, remove filters, try related concepts
- **Multi-query Strategy**: Try 3-5 different query variations in parallel
### Round 3: Deep Context & Relationship Exploration
- **Use query_knowledge_graph** to explore entity relationships (if KB has graph configured)
- **Use get_document_info** to verify document metadata and understand document structure
- **Use list_knowledge_chunks** when you already know the knowledge_id and need deterministic chunk snapshots or chunk counts
- **Use database_query** for structured data when applicable
### Round 4: Cross-document Relationship Mining
- Identify connections between different documents from previous rounds
- Use list_knowledge_chunks with semantic mode to find cross-document relationships
- Use query_knowledge_graph to explore concept networks
- Synthesize information from multiple sources
### Quality Assessment After Each Round
Use thinking tool to evaluate:
- **Relevance**: Do results directly address the question?
- **Completeness**: Is sufficient information gathered?
- **Coverage**: Have we explored all relevant angles?
- **Gaps**: What information is still missing? Can we find it with different strategies?
## Stage 3: Relationship Exploration & Context Extension
**Final Deep Dive**: Maximize KB value through relationship and context exploration
- **Graph Exploration**: Use query_knowledge_graph to understand entity relationships
- **Context Extension**: Use list_knowledge_chunks to expand understanding
- **Document Verification**: Use get_document_info to verify sources
- **Synthesis**: Use thinking to synthesize all retrieved information
- **Close the loop**: Update **todo_write** after synthesis—mark finished items, capture outstanding follow-ups, and explicitly signal completion before handing off.
# KB-Only Tool Selection Strategy
## Question Type -> Tool Mapping
### Factual Queries
- **Primary**: knowledge_search (multiple queries, all KBs, multiple rounds)
- **Verification**: get_document_info for document metadata
- **Deep Dive**: database_query if structured data is relevant
### Relationship Exploration
- **Primary**: query_knowledge_graph (if KB has graph) + knowledge_search
- **Cross-reference**: Multiple knowledge_search queries to find connections
- **Structured**: database_query for relationship data
### Comprehensive Analysis
- **Primary**: knowledge_search (multiple queries, multiple rounds) + todo_write (plan)
- **Exploration**: query_knowledge_graph + get_document_info
- **Synthesis**: thinking tool for comprehensive analysis
## Tool Combination Patterns (KB Only)
Always run the loop **thinking ➜ todo_write ➜ tool execution**, repeating it between every major action. Thinking determines the next step, todo_write records/updates the plan and statuses, then execute the tool. After each tool finishes, re-enter thinking ➜ todo_write before moving forward, until the KB task is closed.
### Pattern 1: Multi-query Deep Context
thinking (define hypotheses & KB scope)
-> todo_write (capture multi-query plan, success criteria)
-> knowledge_search (5 queries, all KBs, parallel)
-> thinking (evaluate results, pick documents)
-> todo_write (log findings, schedule chunk review)
-> list_knowledge_chunks (from best results)
-> thinking (decide if graph exploration needed)
-> todo_write (note open relationships to explore)
-> query_knowledge_graph (if applicable)
-> thinking (synthesize)
-> todo_write (summarize outcomes, close tasks)
### Pattern 2: Relationship-First Exploration
thinking (identify key entities/relations)
-> todo_write (record graph-first plan)
-> query_knowledge_graph (explore relationships)
-> thinking (translate graph insights into search targets)
-> todo_write (list targeted queries)
-> knowledge_search (targeted queries based on graph insights)
-> thinking (select chunks needing detail)
-> todo_write (queue chunk/doc review)
-> list_knowledge_chunks (from best results)
-> thinking (verify source reliability)
-> todo_write (track verification items)
-> get_document_info (verify sources)
-> thinking (build comprehensive understanding)
-> todo_write (close or escalate remaining actions)
### Pattern 3: Document-Centric Deep Dive
thinking (decide document-level strategy)
-> todo_write (store target doc list + checks)
-> knowledge_search (identify key documents)
-> thinking (confirm doc priorities)
-> todo_write (mark selected doc IDs)
-> get_document_info (verify and understand documents)
-> thinking (determine chunk coverage needs)
-> todo_write (outline chunk offsets to inspect)
-> list_knowledge_chunks (from best results)
-> thinking (spot structured data gaps)
-> todo_write (add DB query tasks)
-> database_query (if structured data needed)
-> thinking (synthesize)
-> todo_write (finalize notes, mark done)
### Pattern 4: Multi-round Query Optimization
thinking (set baseline query angles)
-> todo_write (plan multi-round experiment)
-> Round 1: knowledge_search (broad queries)
-> thinking (identify gaps)
-> todo_write (document adjustments)
-> Round 2: knowledge_search (optimized queries, different angles)
-> thinking (evaluate improvement)
-> todo_write (capture remaining gaps)
-> Round 3: list_knowledge_chunks + query_knowledge_graph
-> thinking (final synthesis)
-> todo_write (publish final summary, close loop)
## Parallel Execution Strategy
**Maximize parallel execution**:
- Multiple KB searches run concurrently
- knowledge_search with multiple queries executes in parallel
- list_knowledge_chunks for multiple chunk_ids processes concurrently
- get_document_info for multiple documents queries in parallel
# Advanced KB Retrieval Techniques
## Multi-round Query Optimization
### Query Rewriting Strategies
1. **Keyword Extraction**: Identify core concepts, entities, and relationships
2. **Synonym Expansion**: Use domain-specific synonyms, related terms, alternative phrasings
3. **Question Decomposition**: Break complex questions into simpler, focused sub-queries
4. **Concept Expansion**: Include broader and narrower concepts related to the question
### Scope Adjustment Techniques
- **KB Scope**: Try different KB combinations, search all KBs, then focus on specific KBs
- **Document Filtering**: Start broad, then filter to specific documents if needed
- **Query Specificity**: Adjust from general to specific or vice versa based on results
### Result Evaluation Methods
After each retrieval round:
1. **Relevance Scoring**: Do results directly answer the question?
2. **Completeness Check**: Is sufficient information gathered?
3. **Coverage Analysis**: Have we explored all relevant angles?
4. **Gap Identification**: What information is still missing?
## Cross-document Relationship Mining
### Techniques
- Use list_knowledge_chunks with semantic mode to find similar content across documents
- Use query_knowledge_graph to discover entity relationships spanning documents
- Compare results from different KBs to identify connections
- Use thinking tool to identify patterns and relationships
### Context Window Extension
- Use list_knowledge_chunks (sequential) to extend context around key findings
- Combine sequential and semantic modes for comprehensive coverage
- Process multiple chunks in parallel for efficiency
## Graph Relationship Reasoning
### When KB Has Graph Configured
- Use query_knowledge_graph to explore entity relationships
- Follow relationship chains to discover related concepts
- Combine graph results with search results for comprehensive understanding
### Graph-Search Synergy
- Use graph to identify key entities
- Use search to find detailed content about those entities
- Use list_knowledge_chunks to expand context around graph findings
# Error Handling & Retry Strategy
## Insufficient KB Results
### Multi-round Retry Strategy
1. **Round 1**: Try different query phrasings and scopes
2. **Round 2**: Expand synonyms, try related concepts, remove filters
3. **Round 3**: Use different tools (graph, related_chunks, document_info)
4. **Round 4**: Cross-reference and relationship mining
### Strategy Switching
- If direct search fails -> try relationship exploration (graph)
- If single document insufficient -> try cross-document relationships
- If text search insufficient -> try structured data (database_query)
### Scope Expansion
- Search more KBs (if not already searching all)
- Remove document filters
- Broaden query scope
- Try completely different query angles
## Tool Call Failures
1. **Retry with Adjusted Parameters**: Modify parameters and retry
2. **Alternative Tools**: Use different tools to achieve similar goals
3. **Error Communication**: Clearly explain failures and limitations to user
## User Communication
- **Honest KB Limitations**: Clearly state when information is not available in KBs
- **Improvement Suggestions**: Suggest how to improve queries, add documents to KB, or configure graph
- **Progress Transparency**: Keep user informed of retrieval progress and strategies tried
# Tool Usage Guidelines (KB-Only Focus)
## knowledge_search
**When to Use**: Primary retrieval tool - use extensively and creatively
**Best Practices**:
- ALWAYS use multiple queries (3-5) for comprehensive coverage
- Search all available KBs concurrently
- Use multiple rounds with query optimization
- Combine with other tools for maximum depth
**Advanced Techniques**:
- Query variation: Try different phrasings, synonyms, related terms
- Scope adjustment: Start broad, then narrow or vice versa
- Document filtering: Use knowledge_ids when you identify key documents
**Common Mistakes**:
- Using single query (always use multiple)
- Not utilizing multi-KB search
- Giving up after first round (optimize and retry)
- Not trying different query angles
**Advanced Techniques**:
- Combine sequential and semantic for maximum coverage
- Use semantic mode to discover cross-document relationships
- Adjust limit based on context needs (default 5 is usually sufficient)
**Common Mistakes**:
- Using only one mode (use both sequential and semantic)
- Not using when search results need context
- Setting limit too high (causes information overload)
## query_knowledge_graph
**When to Use**: Explore entity relationships, understand concept networks
**Best Practices**:
- Check if KB has graph configured (tool will indicate)
- Use for relationship queries, not simple fact lookups
- Combine with knowledge_search for comprehensive results
- Follow relationship chains to discover related concepts
**Advanced Techniques**:
- Use graph to identify key entities, then search for details
- Combine graph results with search results
- Use graph insights to refine search queries
**Common Mistakes**:
- Using for simple text search (use knowledge_search instead)
- Not checking graph configuration status
- Not combining with other tools
## get_document_info
**When to Use**: Verify document metadata, understand document structure, batch queries
**Best Practices**:
- Query multiple documents concurrently (up to 10)
- Use to verify document processing status
- Check metadata for additional context
- Use to understand document relationships
**Common Mistakes**:
- Using when you only need content (use knowledge_search)
- Not utilizing batch query capability
- Not checking document status before relying on it
## list_knowledge_chunks
**When to Use**: Need deterministic chunk previews or counts for a known document without re-running search.
**Best Practices**:
- Provide the known knowledge_id plus an offset (mapped to page_size, max 100)
- Use to confirm whether more chunks remain before planning additional retrieval
- Combine with get_document_info when metadata is also required
**Common Mistakes**:
- Calling without a knowledge_id (use knowledge_search first)
- Expecting neighboring context around a specific chunk (use list_knowledge_chunks)
- Forgetting to increase offset when the document contains more chunks
## database_query
**When to Use**: Structured data queries, statistics, aggregations
**Best Practices**:
- Use for COUNT, SUM, GROUP BY queries
- Join tables when needed
- Remember tenant_id is automatically injected
**Common Mistakes**:
- Including tenant_id in WHERE clause (it's auto-added)
- Using non-SELECT queries (only SELECT allowed)
- Not utilizing JOIN capabilities
## thinking
**When to Use**: Problem decomposition, strategy planning, result evaluation, synthesis
**Best Practices**:
- Use at start of complex problems
- Use after each major retrieval round
- Use before final answer to validate evidence
- Use for multi-angle analysis
## todo_write
**When to Use**: Multi-step tasks (3+ steps), complex problem-solving sessions
**Best Practices**:
- Create todo list at task start
- Update immediately after completing items
- Mark items as in_progress when starting work
- Only one item in_progress at a time
- Add new items when discovering additional steps
# Answer Generation
## Structure
- Organize clearly with evidence from retrieved KB content
- Use proper headings and sections when appropriate
- Focus on answering the user's question directly
## Evidence Requirements
- Only include content directly supported by retrieved KB sources
- Never add information from memory or general training data
- If requested information is unavailable in KBs, say so clearly and suggest:
- How to improve queries
- What documents might help if added to KB
- How graph configuration might help
## Citation Format
Place citations inline within the Answer section (not in tool steps):
- Knowledge Base: <kb doc="<doc_name>" chunk_id="<chunk_id>" />
Citations must appear on the same line as the supported content, immediately after the relevant clause or at the end of the sentence.
## Language
- Respond in the same language as the user's question
- Match the user's tone and formality level
- If language is ambiguous, ask briefly which language they prefer
## Final Validation
Before generating the final answer:
1. Use thinking tool to verify evidence sufficiency
2. Note key citations to use
3. Outline the response structure
4. Generate answer based on thinking (don't include chain-of-thought in answer)
## KB Limitation Communication
When KB information is insufficient:
- Clearly state what information is available vs. unavailable
- Suggest specific improvements (query optimization, document addition, graph configuration)
- Be honest about limitations - never fabricate information
# Available Knowledge Bases and Recently Added Documents/FAQs
{{knowledge_bases}}
IMPORTANT: this part ONLY provides the RECENTLY ADDED documents/FAQs, you should use the retrieval tools to retrieve more documents/FAQs if needed.
`

View File

@@ -14,7 +14,7 @@ func AvailableToolDefinitions() []AvailableTool {
{Name: "thinking", Label: "思考", Description: "动态和反思性的问题解决思考工具"},
{Name: "todo_write", Label: "制定计划", Description: "创建结构化的研究计划"},
{Name: "knowledge_search", Label: "知识搜索", Description: "在知识库中搜索相关信息"},
{Name: "get_related_chunks", Label: "获取相关片段", Description: "查找相关的知识片段"},
{Name: "list_knowledge_chunks", Label: "查看知识分块", Description: "按 knowledge_id 拉取文档分块列表"},
{Name: "query_knowledge_graph", Label: "查询知识图谱", Description: "从知识图谱中查询关系"},
{Name: "get_document_info", Label: "获取文档信息", Description: "查看文档元数据"},
{Name: "database_query", Label: "查询数据库", Description: "查询数据库中的信息"},
@@ -27,7 +27,7 @@ func DefaultAllowedTools() []string {
"thinking",
"todo_write",
"knowledge_search",
"get_related_chunks",
"list_knowledge_chunks",
"query_knowledge_graph",
"get_document_info",
"database_query",

View File

@@ -12,12 +12,14 @@ import (
// GetDocumentInfoTool retrieves detailed information about a document/knowledge
type GetDocumentInfoTool struct {
BaseTool
tenantID uint
knowledgeService interfaces.KnowledgeService
chunkService interfaces.ChunkService
}
// NewGetDocumentInfoTool creates a new get document info tool
func NewGetDocumentInfoTool(
tenantID uint,
knowledgeService interfaces.KnowledgeService,
chunkService interfaces.ChunkService,
) *GetDocumentInfoTool {
@@ -52,6 +54,7 @@ Do not use when:
return &GetDocumentInfoTool{
BaseTool: NewBaseTool("get_document_info", description),
tenantID: tenantID,
knowledgeService: knowledgeService,
chunkService: chunkService,
}
@@ -119,7 +122,7 @@ func (t *GetDocumentInfoTool) Execute(ctx context.Context, args map[string]inter
defer wg.Done()
// Get knowledge metadata
knowledge, err := t.knowledgeService.GetKnowledgeByID(ctx, id)
knowledge, err := t.knowledgeService.GetRepository().GetKnowledgeByID(ctx, t.tenantID, id)
if err != nil {
mu.Lock()
results[id] = &docInfo{
@@ -130,7 +133,7 @@ func (t *GetDocumentInfoTool) Execute(ctx context.Context, args map[string]inter
}
// Get chunk count
chunks, err := t.chunkService.ListChunksByKnowledgeID(ctx, id)
chunks, err := t.chunkService.GetRepository().ListChunksByKnowledgeID(ctx, t.tenantID, id)
chunkCount := 0
if err == nil {
chunkCount = len(chunks)
@@ -217,24 +220,24 @@ func (t *GetDocumentInfoTool) Execute(ctx context.Context, args map[string]inter
output += "\n"
formattedDocs = append(formattedDocs, map[string]interface{}{
"knowledge_id": k.ID,
"title": k.Title,
"description": k.Description,
"type": k.Type,
"source": k.Source,
"file_name": k.FileName,
"file_type": k.FileType,
"file_size": k.FileSize,
"parse_status": k.ParseStatus,
"chunk_count": doc.chunkCount,
"metadata": k.GetMetadata(),
"type_icon": typeIcon,
"knowledge_id": k.ID,
"title": k.Title,
"description": k.Description,
"type": k.Type,
"source": k.Source,
"file_name": k.FileName,
"file_type": k.FileType,
"file_size": k.FileSize,
"parse_status": k.ParseStatus,
"chunk_count_min": doc.chunkCount,
"metadata": k.GetMetadata(),
"type_icon": typeIcon,
})
}
output += "=== Usage Tips ===\n"
output += "- Use knowledge_search to search document content\n"
output += "- Use get_related_chunks to view context and related chunks\n"
output += "- Use list_knowledge_chunks to view context and related chunks\n"
output += "- Search results already contain full chunk content\n"
// Extract first document title for summary

View File

@@ -1,427 +0,0 @@
package tools
import (
"context"
"fmt"
"sync"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
)
// GetRelatedChunksTool retrieves chunks related to a given chunk
type GetRelatedChunksTool struct {
BaseTool
chunkService interfaces.ChunkService
knowledgeBaseService interfaces.KnowledgeBaseService
}
// NewGetRelatedChunksTool creates a new get related chunks tool
func NewGetRelatedChunksTool(chunkService interfaces.ChunkService, knowledgeBaseService interfaces.KnowledgeBaseService) *GetRelatedChunksTool {
description := `Retrieve chunks related to specified reference chunks. Supports sequential (adjacent) and semantic (similar) relation types.
## When to Use
Use this tool when:
- Search results need additional context for full understanding
- You need to see content before/after a specific chunk
- Looking for semantically similar content across the document
- Understanding the complete narrative flow of a topic
Do not use when:
- Search results already provide sufficient complete content
- Only need a single specific chunk without context
## Parameters
chunk_ids (required): Array of reference chunk IDs (1-10)
- Obtained from search results
- Supports concurrent batch processing
- Example: ["chunk_abc", "chunk_def"]
relation_type (optional): Type of relation
- "sequential" (default): Get adjacent chunks before and after
- "semantic": Get semantically similar chunks regardless of position
limit (optional): Number of related chunks to return per reference chunk
- Default: 5
- Range: 1-10
- Sequential: retrieves limit/2 chunks before and after
- Semantic: retrieves top limit most similar chunks
## Relation Types
Sequential:
- Retrieves adjacent chunks in document order
- Useful for understanding complete narrative flow
- Ideal for scenarios requiring continuous reading
- Example: viewing complete configuration steps
Semantic:
- Finds content-similar chunks regardless of position
- Discovers related discussions throughout document
- Ideal for topic expansion and cross-referencing
- Example: finding all mentions of a specific concept
## Usage Patterns
1. Context expansion: knowledge_search -> get_related_chunks(sequential)
2. Topic exploration: knowledge_search -> get_related_chunks(semantic)
3. Deep research: knowledge_search -> get_related_chunks(both sequential and semantic)
## Notes
- Results are automatically deduplicated
- Source chunks are excluded from results
- Sequential results sorted by chunk_index
- Semantic results sorted by similarity score
- Limit value of 5 typically provides sufficient context without information overload`
return &GetRelatedChunksTool{
BaseTool: NewBaseTool("get_related_chunks", description),
chunkService: chunkService,
knowledgeBaseService: knowledgeBaseService,
}
}
// Parameters returns the JSON schema for the tool's parameters
func (t *GetRelatedChunksTool) Parameters() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"chunk_ids": map[string]interface{}{
"type": "array",
"description": "Array of reference chunk IDs",
"items": map[string]interface{}{
"type": "string",
},
"minItems": 1,
"maxItems": 10,
},
"relation_type": map[string]interface{}{
"type": "string",
"description": "Type: sequential (default) or semantic",
"enum": []string{"sequential", "semantic"},
"default": "sequential",
},
"limit": map[string]interface{}{
"type": "integer",
"description": "Number of related chunks per input chunk (default: 5)",
"default": 5,
"minimum": 1,
"maximum": 10,
},
},
"required": []string{"chunk_ids"},
}
}
// Execute executes the get related chunks tool with concurrent processing
func (t *GetRelatedChunksTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
// Extract chunk_ids array
chunkIDsRaw, ok := args["chunk_ids"].([]interface{})
if !ok || len(chunkIDsRaw) == 0 {
return &types.ToolResult{
Success: false,
Error: "chunk_ids is required and must be a non-empty array",
}, fmt.Errorf("chunk_ids is required")
}
// Convert to string slice
var chunkIDs []string
for _, id := range chunkIDsRaw {
if idStr, ok := id.(string); ok && idStr != "" {
chunkIDs = append(chunkIDs, idStr)
}
}
if len(chunkIDs) == 0 {
return &types.ToolResult{
Success: false,
Error: "chunk_ids must contain at least one valid chunk ID",
}, fmt.Errorf("no valid chunk IDs provided")
}
relationType := "sequential"
if rt, ok := args["relation_type"].(string); ok {
relationType = rt
}
limit := 5
if l, ok := args["limit"].(float64); ok {
limit = int(l)
}
if limit < 1 {
limit = 1
}
if limit > 10 {
limit = 10
}
// Concurrently get related chunks for each chunk ID
type relatedResult struct {
sourceChunk *types.Chunk
relatedChunks []*types.Chunk
err error
}
var wg sync.WaitGroup
var mu sync.Mutex
results := make(map[string]*relatedResult)
for _, chunkID := range chunkIDs {
wg.Add(1)
go func(id string) {
defer wg.Done()
// Get the original chunk first
chunk, err := t.chunkService.GetChunkByID(ctx, id)
if err != nil || chunk == nil {
mu.Lock()
results[id] = &relatedResult{err: fmt.Errorf("failed to retrieve chunk: %v", err)}
mu.Unlock()
return
}
var relatedChunks []*types.Chunk
if relationType == "sequential" {
relatedChunks, err = t.getSequentialRelatedChunks(ctx, chunk, limit)
} else if relationType == "semantic" {
relatedChunks, err = t.getSemanticRelatedChunks(ctx, chunk, limit)
}
mu.Lock()
results[id] = &relatedResult{
sourceChunk: chunk,
relatedChunks: relatedChunks,
err: err,
}
mu.Unlock()
}(chunkID)
}
wg.Wait()
// Collect and deduplicate all related chunks
seenChunks := make(map[string]*types.Chunk)
sourceChunkIDs := make(map[string]bool)
var errors []string
// Mark source chunks to exclude them from results
for _, chunkID := range chunkIDs {
sourceChunkIDs[chunkID] = true
}
for _, chunkID := range chunkIDs {
result := results[chunkID]
if result.err != nil {
errors = append(errors, fmt.Sprintf("chunk %s: %v", chunkID, result.err))
continue
}
for _, chunk := range result.relatedChunks {
// Exclude source chunks and avoid duplicates
if !sourceChunkIDs[chunk.ID] {
if _, seen := seenChunks[chunk.ID]; !seen {
seenChunks[chunk.ID] = chunk
}
}
}
}
// Convert map to slice and sort
allRelatedChunks := make([]*types.Chunk, 0, len(seenChunks))
for _, chunk := range seenChunks {
allRelatedChunks = append(allRelatedChunks, chunk)
}
// Sort chunks
if relationType == "sequential" {
// Sort by knowledge_id and chunk_index for sequential
sortChunksByPosition(allRelatedChunks)
}
// For semantic, keep the order from search results (already sorted by relevance)
if len(allRelatedChunks) == 0 {
return &types.ToolResult{
Success: true,
Output: "No related chunks found. Possible reasons:\n- Chunk is the only chunk in document\n- Semantic similarity threshold not met\n- Invalid chunk_id provided",
Data: map[string]interface{}{
"chunk_ids": chunkIDs,
"relation_type": relationType,
"count": 0,
"chunks": []interface{}{},
"errors": errors,
},
}, nil
}
// Format output
return t.formatOutput(chunkIDs, relationType, allRelatedChunks, errors)
}
// getSequentialRelatedChunks gets chunks before and after the reference chunk
func (t *GetRelatedChunksTool) getSequentialRelatedChunks(ctx context.Context, chunk *types.Chunk, limit int) ([]*types.Chunk, error) {
// Get all chunks from the same knowledge
allChunks, err := t.chunkService.ListChunksByKnowledgeID(ctx, chunk.KnowledgeID)
if err != nil {
return nil, fmt.Errorf("failed to retrieve document chunks: %v", err)
}
relatedChunks := make([]*types.Chunk, 0)
// Bidirectional window: get chunks before and after
halfLimit := limit / 2
if halfLimit < 1 {
halfLimit = 1
}
minIndex := chunk.ChunkIndex - halfLimit
maxIndex := chunk.ChunkIndex + halfLimit
for _, c := range allChunks {
// Within range and not the source chunk itself
if c.ChunkIndex >= minIndex && c.ChunkIndex <= maxIndex && c.ID != chunk.ID {
relatedChunks = append(relatedChunks, c)
}
}
return relatedChunks, nil
}
// getSemanticRelatedChunks gets semantically similar chunks using hybrid search
func (t *GetRelatedChunksTool) getSemanticRelatedChunks(ctx context.Context, chunk *types.Chunk, limit int) ([]*types.Chunk, error) {
// Use chunk content as query for semantic search
searchParams := types.SearchParams{
QueryText: chunk.Content,
MatchCount: limit + 5, // Get extra results for filtering
}
// Search in the knowledge base that contains this chunk
searchResults, err := t.knowledgeBaseService.HybridSearch(ctx, chunk.KnowledgeBaseID, searchParams)
if err != nil {
return nil, fmt.Errorf("semantic search failed: %v", err)
}
// Convert search results to chunks, excluding the source chunk
relatedChunks := make([]*types.Chunk, 0, limit)
for _, result := range searchResults {
if result.ID == chunk.ID {
continue // Skip the source chunk itself
}
// Convert SearchResult to Chunk
relatedChunk := &types.Chunk{
ID: result.ID,
KnowledgeID: result.KnowledgeID,
KnowledgeBaseID: chunk.KnowledgeBaseID,
Content: result.Content,
ChunkIndex: result.ChunkIndex,
}
relatedChunks = append(relatedChunks, relatedChunk)
if len(relatedChunks) >= limit {
break
}
}
return relatedChunks, nil
}
// sortChunksByPosition sorts chunks by knowledge_id and chunk_index
func sortChunksByPosition(chunks []*types.Chunk) {
// Simple bubble sort for small arrays
n := len(chunks)
for i := 0; i < n-1; i++ {
for j := 0; j < n-i-1; j++ {
// First sort by knowledge_id, then by chunk_index
if chunks[j].KnowledgeID > chunks[j+1].KnowledgeID ||
(chunks[j].KnowledgeID == chunks[j+1].KnowledgeID &&
chunks[j].ChunkIndex > chunks[j+1].ChunkIndex) {
chunks[j], chunks[j+1] = chunks[j+1], chunks[j]
}
}
}
}
// formatOutput formats the tool output
func (t *GetRelatedChunksTool) formatOutput(
chunkIDs []string,
relationType string,
chunks []*types.Chunk,
errors []string,
) (*types.ToolResult, error) {
relationTypeLabel := map[string]string{
"sequential": "Sequential (Adjacent)",
"semantic": "Semantic (Similar Content)",
}
output := "=== Related Chunks ===\n\n"
output += fmt.Sprintf("Reference chunks: %d\n", len(chunkIDs))
output += fmt.Sprintf("Relation type: %s\n", relationTypeLabel[relationType])
output += fmt.Sprintf("Found %d related chunks (deduplicated)\n\n", len(chunks))
if len(errors) > 0 {
output += "=== Partial Failures ===\n"
for _, errMsg := range errors {
output += fmt.Sprintf(" - %s\n", errMsg)
}
output += "\n"
}
output += "=== Content ===\n\n"
formattedChunks := make([]map[string]interface{}, 0, len(chunks))
currentKnowledge := ""
for i, c := range chunks {
// Group by knowledge document
if c.KnowledgeID != currentKnowledge {
currentKnowledge = c.KnowledgeID
if i > 0 {
output += "\n"
}
output += fmt.Sprintf("[Document: %s]\n\n", c.KnowledgeID)
}
output += fmt.Sprintf("Chunk #%d (Position: %d):\n", i+1, c.ChunkIndex+1)
output += fmt.Sprintf(" chunk_id: %s\n", c.ID)
output += fmt.Sprintf(" content: %s\n\n", c.Content)
formattedChunks = append(formattedChunks, map[string]interface{}{
"index": i + 1,
"chunk_id": c.ID,
"chunk_index": c.ChunkIndex,
"content": c.Content,
"knowledge_id": c.KnowledgeID,
})
}
output += "=== Notes ===\n"
if relationType == "sequential" {
output += "- Adjacent chunks in document order\n"
output += "- Useful for understanding complete narrative flow\n"
output += "- Sorted by position\n"
} else {
output += "- Semantically similar chunks sorted by relevance\n"
output += "- Useful for discovering related discussions\n"
output += "- Ideal for topic expansion and cross-referencing\n"
}
output += "- Source chunks excluded\n"
output += "- Results deduplicated\n"
return &types.ToolResult{
Success: true,
Output: output,
Data: map[string]interface{}{
"chunk_ids": chunkIDs,
"relation_type": relationType,
"count": len(chunks),
"chunks": formattedChunks,
"errors": errors,
"display_type": "related_chunks",
},
}, nil
}

View File

@@ -9,6 +9,7 @@ import (
"strings"
"sync"
"github.com/Tencent/WeKnora/internal/config"
"github.com/Tencent/WeKnora/internal/logger"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/models/rerank"
@@ -28,48 +29,39 @@ type searchResultWithMeta struct {
// KnowledgeSearchTool searches knowledge bases with flexible query modes
type KnowledgeSearchTool struct {
BaseTool
knowledgeService interfaces.KnowledgeBaseService
chunkService interfaces.ChunkService
tenantID uint
allowedKBs []string
rerankModel rerank.Reranker
chatModel chat.Chat // Optional chat model for LLM-based reranking
knowledgeBaseService interfaces.KnowledgeBaseService
chunkService interfaces.ChunkService
tenantID uint
allowedKBs []string
rerankModel rerank.Reranker
chatModel chat.Chat // Optional chat model for LLM-based reranking
config *config.Config // Global config for fallback values
}
// NewKnowledgeSearchTool creates a new knowledge search tool
func NewKnowledgeSearchTool(
knowledgeService interfaces.KnowledgeBaseService,
knowledgeBaseService interfaces.KnowledgeBaseService,
chunkService interfaces.ChunkService,
tenantID uint,
allowedKBs []string,
rerankModel rerank.Reranker,
chatModel chat.Chat,
cfg *config.Config,
) *KnowledgeSearchTool {
description := `Search within knowledge bases with flexible query modes. Unified tool that supports both targeted and broad searches.
description := `Search within knowledge bases. Unified tool that supports both targeted and broad searches.
## Features
- Multi-KB search: Search across multiple knowledge bases concurrently
- Flexible queries: Support vector, keyword, or hybrid search modes
- Quality filtering: Automatically filters low-quality chunks
## Usage
**Use when**:
- You know which knowledge bases to target (specify knowledge_base_ids)
- You're unsure which KB contains the info (omit knowledge_base_ids to search all allowed KBs)
- Want to search specific KBs with same query
- Need semantic (vector) or exact keyword searches
- Want to search only specific documents within KBs
- Want to search with multiple queries to get comprehensive results
- Want to filter results from specific documents (use knowledge_ids)
**Search Modes**:
- Simple: Provide single query parameter (hybrid search)
- Vector only: Provide vector_queries only
- Keyword only: Provide keyword_queries only
- Hybrid: Provide both vector_queries and keyword_queries
- At least one query parameter must be provided
**Returns**: Merged and deduplicated search results from all KBs
**Returns**: Merged and deduplicated search results from KBs
## Examples
@@ -77,48 +69,37 @@ func NewKnowledgeSearchTool(
# Simple search in specific KBs
{
"knowledge_base_ids": ["kb1", "kb2"],
"query": "什么是向量数据库"
"queries": ["什么是向量数据库"]
}
# Search all allowed KBs with vector queries
# Search all allowed KBs with multiple queries
{
"vector_queries": ["什么是向量数据库", "向量数据库的定义"]
}
# Multiple query types with thresholds
{
"knowledge_base_ids": ["kb1"],
"vector_queries": ["向量数据库应用"],
"keyword_queries": ["Docker", "部署"],
"vector_threshold": 0.7,
"keyword_threshold": 0.6
"queries": ["什么是向量数据库", "向量数据库的应用场景"]
}
# Search specific documents
{
"knowledge_base_ids": ["kb1"],
"query": "彗星的起源",
"queries": ["彗星的起源"],
"knowledge_ids": ["doc1", "doc2"]
}
` + "`" + `
## Tips
- Concurrent search across multiple KBs and queries
- Concurrent search across multiple KBs
- Results are automatically reranked to unify scores from different sources
- Reranked scores are in 0-1 range and directly comparable
- Results are merged, deduplicated and sorted by relevance
- Use vector_queries for semantic/conceptual searches
- Use keyword_queries for exact term matching`
- Results are merged, deduplicated and sorted by relevance`
return &KnowledgeSearchTool{
BaseTool: NewBaseTool("knowledge_search", description),
knowledgeService: knowledgeService,
chunkService: chunkService,
tenantID: tenantID,
allowedKBs: allowedKBs,
rerankModel: rerankModel,
chatModel: chatModel,
BaseTool: NewBaseTool("knowledge_search", description),
knowledgeBaseService: knowledgeBaseService,
chunkService: chunkService,
tenantID: tenantID,
allowedKBs: allowedKBs,
rerankModel: rerankModel,
chatModel: chatModel,
config: cfg,
}
}
@@ -127,80 +108,39 @@ func (t *KnowledgeSearchTool) Parameters() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"queries": map[string]interface{}{
"type": "array",
"description": "Array of search queries",
"items": map[string]interface{}{
"type": "string",
},
"minItems": 1,
"maxItems": 5,
},
"knowledge_base_ids": map[string]interface{}{
"type": "array",
"description": "Array of knowledge base IDs to search in (optional, if omitted searches all allowed KBs)",
"items": map[string]interface{}{
"type": "string",
},
"minItems": 1,
"minItems": 0,
"maxItems": 10,
},
"query": map[string]interface{}{
"type": "string",
"description": "Single search query for simple hybrid search",
},
"vector_queries": map[string]interface{}{
"type": "array",
"description": "Array of semantic queries for vector search (1-5 queries)",
"items": map[string]interface{}{
"type": "string",
},
"minItems": 1,
"maxItems": 5,
},
"keyword_queries": map[string]interface{}{
"type": "array",
"description": "Array of keyword queries for keyword search (1-5 queries)",
"items": map[string]interface{}{
"type": "string",
},
"minItems": 1,
"maxItems": 5,
},
"top_k": map[string]interface{}{
"type": "integer",
"description": "Number of results per knowledge base per query (default: 5)",
"default": 5,
"minimum": 1,
"maximum": 20,
},
"vector_threshold": map[string]interface{}{
"type": "number",
"description": "Minimum score for vector results (default: 0.6)",
"default": 0.6,
"minimum": 0.0,
"maximum": 1.0,
},
"keyword_threshold": map[string]interface{}{
"type": "number",
"description": "Minimum score for keyword results (default: 0.5)",
"default": 0.5,
"minimum": 0.0,
"maximum": 1.0,
},
"knowledge_ids": map[string]interface{}{
"type": "array",
"description": "Optional array of document IDs to filter results (only return results from these specific documents)",
"items": map[string]interface{}{
"type": "string",
},
"minItems": 1,
"minItems": 0,
"maxItems": 50,
},
"min_score": map[string]interface{}{
"type": "number",
"description": "Absolute minimum score threshold for filtering very low quality results (default: 0.3)",
"default": 0.3,
"minimum": 0.0,
"maximum": 1.0,
},
},
"required": []string{},
"required": []string{"queries"},
}
}
// Execute executes the knowledge search tool with flexible query modes
// Execute executes the knowledge search tool
func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
logger.Infof(ctx, "[Tool][KnowledgeSearch] Execute started")
@@ -232,90 +172,74 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
logger.Infof(ctx, "[Tool][KnowledgeSearch] Using all allowed KBs (%d): %v", len(kbIDs), kbIDs)
}
// Parse query parameters
var singleQuery string
var vectorQueries, keywordQueries []string
// Parse single query
if q, ok := args["query"].(string); ok && q != "" {
singleQuery = q
}
// Parse vector_queries
if vq, ok := args["vector_queries"].([]interface{}); ok {
for _, q := range vq {
if queryStr, ok := q.(string); ok && queryStr != "" {
vectorQueries = append(vectorQueries, queryStr)
// Parse query parameter
var queries []string
if queriesRaw, ok := args["queries"].([]interface{}); ok && len(queriesRaw) > 0 {
for _, q := range queriesRaw {
if qStr, ok := q.(string); ok && qStr != "" {
queries = append(queries, qStr)
}
}
}
// Parse keyword_queries
if kq, ok := args["keyword_queries"].([]interface{}); ok {
for _, q := range kq {
if queryStr, ok := q.(string); ok && queryStr != "" {
keywordQueries = append(keywordQueries, queryStr)
}
}
}
// If single query provided, treat it as both vector and keyword query
if singleQuery != "" {
if len(vectorQueries) == 0 && len(keywordQueries) == 0 {
vectorQueries = []string{singleQuery}
keywordQueries = []string{singleQuery}
}
}
// Validate: at least one query must be provided
if len(vectorQueries) == 0 && len(keywordQueries) == 0 {
logger.Errorf(ctx, "[Tool][KnowledgeSearch] No query provided")
// Validate: query must be provided
if len(queries) == 0 {
logger.Errorf(ctx, "[Tool][KnowledgeSearch] No queries provided")
return &types.ToolResult{
Success: false,
Error: "at least one of query, vector_queries, or keyword_queries must be provided",
}, fmt.Errorf("no query provided")
Error: "queries parameter is required",
}, fmt.Errorf("no queries provided")
}
logger.Infof(ctx, "[Tool][KnowledgeSearch] Query mode: single=%v, vector_queries=%d, keyword_queries=%d",
singleQuery != "", len(vectorQueries), len(keywordQueries))
if singleQuery != "" {
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Single query: %s", singleQuery)
}
if len(vectorQueries) > 0 {
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Vector queries: %v", vectorQueries)
}
if len(keywordQueries) > 0 {
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Keyword queries: %v", keywordQueries)
}
logger.Infof(ctx, "[Tool][KnowledgeSearch] Queries: %v", queries)
// Parse thresholds
vectorThreshold := 0.6
if vt, ok := args["vector_threshold"].(float64); ok {
vectorThreshold = vt
}
// Get search parameters from tenant conversation config, fallback to global config
var topK int
var vectorThreshold, keywordThreshold, minScore float64
keywordThreshold := 0.5
if kt, ok := args["keyword_threshold"].(float64); ok {
keywordThreshold = kt
}
// Parse min_score for absolute filtering
minScore := 0.3
if ms, ok := args["min_score"].(float64); ok {
minScore = ms
}
// Parse top_k
topK := 5
if topKVal, ok := args["top_k"]; ok {
switch v := topKVal.(type) {
case float64:
topK = int(v)
case int:
topK = v
// Try to get from tenant conversation config
if tenantVal := ctx.Value(types.TenantInfoContextKey); tenantVal != nil {
if tenant, ok := tenantVal.(*types.Tenant); ok && tenant != nil && tenant.ConversationConfig != nil {
cc := tenant.ConversationConfig
if cc.EmbeddingTopK > 0 {
topK = cc.EmbeddingTopK
}
if cc.VectorThreshold > 0 {
vectorThreshold = cc.VectorThreshold
}
if cc.KeywordThreshold > 0 {
keywordThreshold = cc.KeywordThreshold
}
// minScore is not in ConversationConfig, use default or config
minScore = 0.3
}
}
// Fallback to global config if not set
if topK == 0 && t.config != nil {
topK = t.config.Conversation.EmbeddingTopK
}
if vectorThreshold == 0 && t.config != nil {
vectorThreshold = t.config.Conversation.VectorThreshold
}
if keywordThreshold == 0 && t.config != nil {
keywordThreshold = t.config.Conversation.KeywordThreshold
}
// Final fallback to hardcoded defaults if config is not available
if topK == 0 {
topK = 5
}
if vectorThreshold == 0 {
vectorThreshold = 0.6
}
if keywordThreshold == 0 {
keywordThreshold = 0.5
}
if minScore == 0 {
minScore = 0.3
}
logger.Infof(ctx, "[Tool][KnowledgeSearch] Search params: top_k=%d, vector_threshold=%.2f, keyword_threshold=%.2f, min_score=%.2f",
topK, vectorThreshold, keywordThreshold, minScore)
@@ -330,11 +254,11 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
}
}
// Execute concurrent search
// Execute concurrent search (hybrid search handles both vector and keyword)
logger.Infof(ctx, "[Tool][KnowledgeSearch] Starting concurrent search across %d KBs", len(kbIDs))
kbTypeMap := t.getKnowledgeBaseTypes(ctx, kbIDs)
allResults := t.concurrentSearch(ctx, vectorQueries, keywordQueries, kbIDs,
allResults := t.concurrentSearch(ctx, queries, kbIDs,
topK, vectorThreshold, keywordThreshold, kbTypeMap)
logger.Infof(ctx, "[Tool][KnowledgeSearch] Concurrent search completed: %d raw results", len(allResults))
@@ -353,55 +277,54 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
}
// Filter by threshold first
logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying threshold filter...")
filteredResults := t.filterByThreshold(allResults, vectorThreshold, keywordThreshold)
logger.Infof(ctx, "[Tool][KnowledgeSearch] After threshold filter: %d results (from %d)",
len(filteredResults), len(allResults))
// Deduplicate before reranking to reduce processing overhead
deduplicatedBeforeRerank := t.deduplicateResults(filteredResults)
logger.Infof(ctx, "[Tool][KnowledgeSearch] After deduplication before rerank: %d results (from %d)",
len(deduplicatedBeforeRerank), len(filteredResults))
// Apply ReRank if model is configured
// Prefer chatModel (LLM-based reranking) over rerankModel if both are available
if t.chatModel != nil && len(filteredResults) > 0 {
logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying LLM-based rerank with model: %s, input: %d results",
t.chatModel.GetModelName(), len(filteredResults))
rerankQuery := singleQuery
if rerankQuery == "" && len(vectorQueries) > 0 {
rerankQuery = vectorQueries[0] // Use first vector query as rerank query
} else if rerankQuery == "" && len(keywordQueries) > 0 {
rerankQuery = keywordQueries[0] // Use first keyword query as fallback
// Use first query for reranking (or combine all queries if needed)
rerankQuery := ""
if len(queries) > 0 {
rerankQuery = queries[0]
if len(queries) > 1 {
// Combine multiple queries for reranking
rerankQuery = strings.Join(queries, " ")
}
}
if rerankQuery != "" {
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Rerank query: %s", rerankQuery)
rerankedResults, err := t.rerankResults(ctx, rerankQuery, filteredResults)
if err != nil {
logger.Warnf(ctx, "[Tool][KnowledgeSearch] LLM rerank failed, using original results: %v", err)
} else {
filteredResults = rerankedResults
logger.Infof(ctx, "[Tool][KnowledgeSearch] LLM rerank completed successfully: %d results",
len(filteredResults))
}
if t.chatModel != nil && len(deduplicatedBeforeRerank) > 0 && rerankQuery != "" {
logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying LLM-based rerank with model: %s, input: %d results, queries: %v",
t.chatModel.GetModelName(), len(deduplicatedBeforeRerank), queries)
rerankedResults, err := t.rerankResults(ctx, rerankQuery, deduplicatedBeforeRerank)
if err != nil {
logger.Warnf(ctx, "[Tool][KnowledgeSearch] LLM rerank failed, using original results: %v", err)
filteredResults = deduplicatedBeforeRerank
} else {
filteredResults = rerankedResults
logger.Infof(ctx, "[Tool][KnowledgeSearch] LLM rerank completed successfully: %d results",
len(filteredResults))
}
} else if t.rerankModel != nil && len(filteredResults) > 0 {
logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying rerank with model: %s, input: %d results",
t.rerankModel.GetModelName(), len(filteredResults))
rerankQuery := singleQuery
if rerankQuery == "" && len(vectorQueries) > 0 {
rerankQuery = vectorQueries[0] // Use first vector query as rerank query
} else if rerankQuery == "" && len(keywordQueries) > 0 {
rerankQuery = keywordQueries[0] // Use first keyword query as fallback
}
if rerankQuery != "" {
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Rerank query: %s", rerankQuery)
rerankedResults, err := t.rerankResults(ctx, rerankQuery, filteredResults)
if err != nil {
logger.Warnf(ctx, "[Tool][KnowledgeSearch] Rerank failed, using original results: %v", err)
} else {
filteredResults = rerankedResults
logger.Infof(ctx, "[Tool][KnowledgeSearch] Rerank completed successfully: %d results",
len(filteredResults))
}
} else if t.rerankModel != nil && len(deduplicatedBeforeRerank) > 0 && rerankQuery != "" {
logger.Infof(ctx, "[Tool][KnowledgeSearch] Applying rerank with model: %s, input: %d results, queries: %v",
t.rerankModel.GetModelName(), len(deduplicatedBeforeRerank), queries)
rerankedResults, err := t.rerankResults(ctx, rerankQuery, deduplicatedBeforeRerank)
if err != nil {
logger.Warnf(ctx, "[Tool][KnowledgeSearch] Rerank failed, using original results: %v", err)
filteredResults = deduplicatedBeforeRerank
} else {
filteredResults = rerankedResults
logger.Infof(ctx, "[Tool][KnowledgeSearch] Rerank completed successfully: %d results",
len(filteredResults))
}
} else {
// No reranking, use deduplicated results
filteredResults = deduplicatedBeforeRerank
}
// Apply absolute minimum score filter to remove very low quality chunks
@@ -409,44 +332,37 @@ func (t *KnowledgeSearchTool) Execute(ctx context.Context, args map[string]inter
filteredResults = t.filterByMinScore(filteredResults, minScore)
logger.Infof(ctx, "[Tool][KnowledgeSearch] After min_score filter: %d results", len(filteredResults))
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Deduplicating results...")
// Final deduplication after rerank (in case rerank changed scores/order but duplicates remain)
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Final deduplication after rerank...")
deduplicatedResults := t.deduplicateResults(filteredResults)
logger.Infof(ctx, "[Tool][KnowledgeSearch] After deduplication: %d results (from %d)",
logger.Infof(ctx, "[Tool][KnowledgeSearch] After final deduplication: %d results (from %d)",
len(deduplicatedResults), len(filteredResults))
// Sort results by score (descending)
logger.Debugf(ctx, "[Tool][KnowledgeSearch] Sorting results by score...")
sort.Slice(deduplicatedResults, func(i, j int) bool {
if deduplicatedResults[i].Score != deduplicatedResults[j].Score {
return deduplicatedResults[i].Score > deduplicatedResults[j].Score
}
// If scores are equal, prefer vector matches
if deduplicatedResults[i].QueryType != deduplicatedResults[j].QueryType {
return deduplicatedResults[i].QueryType == "vector"
}
// If scores are equal, sort by knowledge ID for consistency
return deduplicatedResults[i].KnowledgeID < deduplicatedResults[j].KnowledgeID
})
// Log top results
if len(deduplicatedResults) > 0 {
logger.Infof(ctx, "[Tool][KnowledgeSearch] Top 5 results by score:")
for i := 0; i < len(deduplicatedResults) && i < 5; i++ {
r := deduplicatedResults[i]
logger.Infof(ctx, "[Tool][KnowledgeSearch] #%d: score=%.3f, type=%s, kb=%s, chunk_id=%s",
logger.Infof(ctx, "[Tool][KnowledgeSearch][Top %d] score=%.3f, type=%s, kb=%s, chunk_id=%s",
i+1, r.Score, r.QueryType, r.KnowledgeID, r.ID)
}
}
// Build output
logger.Infof(ctx, "[Tool][KnowledgeSearch] Formatting output with %d final results", len(deduplicatedResults))
result, err := t.formatOutput(ctx, deduplicatedResults, vectorQueries, keywordQueries,
kbIDs, len(allResults), vectorThreshold, keywordThreshold, knowledgeIDsFilter, singleQuery)
result, err := t.formatOutput(ctx, deduplicatedResults, kbIDs, len(allResults), knowledgeIDsFilter, queries)
if err != nil {
logger.Errorf(ctx, "[Tool][KnowledgeSearch] Failed to format output: %v", err)
return result, err
}
logger.Infof(ctx, "[Tool][KnowledgeSearch] Execute completed successfully")
return result, nil
}
@@ -462,7 +378,7 @@ func (t *KnowledgeSearchTool) getKnowledgeBaseTypes(ctx context.Context, kbIDs [
continue
}
kb, err := t.knowledgeService.GetKnowledgeBaseByID(ctx, kbID)
kb, err := t.knowledgeBaseService.GetKnowledgeBaseByID(ctx, kbID)
if err != nil {
logger.Warnf(ctx, "[Tool][KnowledgeSearch] Failed to fetch knowledge base %s info: %v", kbID, err)
continue
@@ -474,57 +390,13 @@ func (t *KnowledgeSearchTool) getKnowledgeBaseTypes(ctx context.Context, kbIDs [
return kbTypeMap
}
// concurrentSearch executes vector and keyword searches concurrently
// concurrentSearch executes hybrid search across multiple KBs concurrently
func (t *KnowledgeSearchTool) concurrentSearch(
ctx context.Context,
vectorQueries, keywordQueries []string,
kbsToSearch []string,
topK int,
vectorThreshold, keywordThreshold float64,
kbTypeMap map[string]string,
) []*searchResultWithMeta {
var wg sync.WaitGroup
var mu sync.Mutex
allResults := make([]*searchResultWithMeta, 0)
// Launch vector searches
if len(vectorQueries) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
results := t.searchWithQueries(ctx, vectorQueries, kbsToSearch, topK,
vectorThreshold, 1.0, "vector", kbTypeMap)
mu.Lock()
allResults = append(allResults, results...)
mu.Unlock()
}()
}
// Launch keyword searches
if len(keywordQueries) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
results := t.searchWithQueries(ctx, keywordQueries, kbsToSearch, topK,
1.0, keywordThreshold, "keyword", kbTypeMap)
mu.Lock()
allResults = append(allResults, results...)
mu.Unlock()
}()
}
wg.Wait()
return allResults
}
// searchWithQueries executes multiple queries concurrently
func (t *KnowledgeSearchTool) searchWithQueries(
ctx context.Context,
queries []string,
kbsToSearch []string,
topK int,
vectorThreshold, keywordThreshold float64,
queryType string,
kbTypeMap map[string]string,
) []*searchResultWithMeta {
var wg sync.WaitGroup
@@ -532,72 +404,46 @@ func (t *KnowledgeSearchTool) searchWithQueries(
allResults := make([]*searchResultWithMeta, 0)
for _, query := range queries {
wg.Add(1)
go func(q string) {
defer wg.Done()
results := t.searchSingleQuery(ctx, q, kbsToSearch, topK,
vectorThreshold, keywordThreshold, queryType, kbTypeMap)
mu.Lock()
allResults = append(allResults, results...)
mu.Unlock()
}(query)
}
// Capture query in local variable to avoid closure issues
q := query
for _, kbID := range kbsToSearch {
// Capture kbID in local variable to avoid closure issues
kb := kbID
wg.Add(1)
go func() {
defer wg.Done()
searchParams := types.SearchParams{
QueryText: q,
MatchCount: topK,
VectorThreshold: vectorThreshold,
KeywordThreshold: keywordThreshold,
}
kbResults, err := t.knowledgeBaseService.HybridSearch(ctx, kb, searchParams)
if err != nil {
// Log error but continue with other KBs
logger.Warnf(ctx, "[Tool][KnowledgeSearch] Failed to search knowledge base %s: %v", kb, err)
return
}
// Wrap results with metadata
mu.Lock()
for _, r := range kbResults {
allResults = append(allResults, &searchResultWithMeta{
SearchResult: r,
SourceQuery: q,
QueryType: "hybrid", // Hybrid search combines both vector and keyword
KnowledgeBaseID: kb,
KnowledgeBaseType: kbTypeMap[kb],
})
}
mu.Unlock()
}()
}
}
wg.Wait()
return allResults
}
// searchSingleQuery searches a single query across multiple KBs concurrently
func (t *KnowledgeSearchTool) searchSingleQuery(
ctx context.Context,
query string,
kbsToSearch []string,
topK int,
vectorThreshold, keywordThreshold float64,
queryType string,
kbTypeMap map[string]string,
) []*searchResultWithMeta {
var wg sync.WaitGroup
var mu sync.Mutex
results := make([]*searchResultWithMeta, 0)
searchParams := types.SearchParams{
QueryText: query,
MatchCount: topK,
VectorThreshold: vectorThreshold,
KeywordThreshold: keywordThreshold,
}
for _, kbID := range kbsToSearch {
wg.Add(1)
go func(kb string) {
defer wg.Done()
kbResults, err := t.knowledgeService.HybridSearch(ctx, kb, searchParams)
if err != nil {
// Log error but continue with other KBs
return
}
// Wrap results with metadata
mu.Lock()
for _, r := range kbResults {
results = append(results, &searchResultWithMeta{
SearchResult: r,
SourceQuery: query,
QueryType: queryType,
KnowledgeBaseID: kb,
KnowledgeBaseType: kbTypeMap[kb],
})
}
mu.Unlock()
}(kbID)
}
wg.Wait()
return results
}
// filterByThreshold filters results based on match type and threshold
func (t *KnowledgeSearchTool) filterByThreshold(
results []*searchResultWithMeta,
@@ -1011,12 +857,10 @@ func (t *KnowledgeSearchTool) deduplicateResults(results []*searchResultWithMeta
func (t *KnowledgeSearchTool) formatOutput(
ctx context.Context,
results []*searchResultWithMeta,
vectorQueries, keywordQueries []string,
kbsToSearch []string,
totalBeforeFilter int,
vectorThreshold, keywordThreshold float64,
knowledgeIDsFilter map[string]bool,
singleQuery string,
queries []string,
) (*types.ToolResult, error) {
if len(results) == 0 {
data := map[string]interface{}{
@@ -1031,8 +875,8 @@ func (t *KnowledgeSearchTool) formatOutput(
}
data["knowledge_ids"] = filterList
}
if singleQuery != "" {
data["query"] = singleQuery
if len(queries) > 0 {
data["queries"] = queries
}
return &types.ToolResult{
Success: true,
@@ -1041,14 +885,6 @@ func (t *KnowledgeSearchTool) formatOutput(
}, nil
}
// Determine search mode
searchMode := "Hybrid (Vector + Keyword)"
if len(vectorQueries) > 0 && len(keywordQueries) == 0 {
searchMode = "Vector"
} else if len(vectorQueries) == 0 && len(keywordQueries) > 0 {
searchMode = "Keyword"
}
// Build output header
output := "=== Search Results ===\n"
output += fmt.Sprintf("Knowledge Bases: %v\n", kbsToSearch)
@@ -1059,22 +895,13 @@ func (t *KnowledgeSearchTool) formatOutput(
}
output += fmt.Sprintf("Document Filter: %v\n", filterList)
}
output += fmt.Sprintf("Search Mode: %s\n", searchMode)
if singleQuery != "" {
output += fmt.Sprintf("Query: %s\n", singleQuery)
} else {
if len(vectorQueries) > 0 {
output += fmt.Sprintf("Vector Queries: %v\n", vectorQueries)
output += fmt.Sprintf("Vector Threshold: %.2f\n", vectorThreshold)
}
if len(keywordQueries) > 0 {
output += fmt.Sprintf("Keyword Queries: %v\n", keywordQueries)
output += fmt.Sprintf("Keyword Threshold: %.2f\n", keywordThreshold)
}
if len(queries) == 1 {
output += fmt.Sprintf("Query: %s\n", queries[0])
} else if len(queries) > 1 {
output += fmt.Sprintf("Queries (%d): %v\n", len(queries), queries)
}
output += fmt.Sprintf("Found %d relevant results (deduplicated)", len(results))
output += fmt.Sprintf("Found %d relevant results", len(results))
if totalBeforeFilter > len(results) {
output += fmt.Sprintf(" (filtered from %d)", totalBeforeFilter)
}
@@ -1118,16 +945,9 @@ func (t *KnowledgeSearchTool) formatOutput(
output += fmt.Sprintf("[Source Document: %s]\n", result.KnowledgeTitle)
}
relevanceLevel := GetRelevanceLevel(result.Score)
// relevanceLevel := GetRelevanceLevel(result.Score)
output += fmt.Sprintf("\nResult #%d:\n", i+1)
output += fmt.Sprintf(" Relevance: %.2f (%s)\n", result.Score, relevanceLevel)
output += fmt.Sprintf(" Match Type: %s", FormatMatchType(result.MatchType))
if result.SourceQuery != "" && result.SourceQuery != singleQuery {
output += fmt.Sprintf(" (Query: \"%s\")", result.SourceQuery)
}
output += "\n"
output += fmt.Sprintf(" Content: %s\n", result.Content)
output += fmt.Sprintf(" [chunk_id: %s - full content included above]\n", result.ID)
output += fmt.Sprintf(" [chunk_id: %s][chunk_index: %d]\nContent: %s\n", result.ID, result.ChunkIndex, result.Content)
if faqMeta != nil {
if faqMeta.StandardQuestion != "" {
@@ -1139,17 +959,17 @@ func (t *KnowledgeSearchTool) formatOutput(
if len(faqMeta.Answers) > 0 {
output += " FAQ Answers:\n"
for _, ans := range faqMeta.Answers {
output += fmt.Sprintf(" - %s\n", ans)
output += fmt.Sprintf(" Answer Choice %d: %s\n", i+1, ans)
}
}
}
formattedResults = append(formattedResults, map[string]interface{}{
"result_index": i + 1,
"chunk_id": result.ID,
"content": result.Content,
"score": result.Score,
"relevance_level": relevanceLevel,
"result_index": i + 1,
"chunk_id": result.ID,
"content": result.Content,
// "score": result.Score,
// "relevance_level": relevanceLevel,
"knowledge_id": result.KnowledgeID,
"knowledge_title": result.KnowledgeTitle,
"match_type": result.MatchType,
@@ -1182,14 +1002,13 @@ func (t *KnowledgeSearchTool) formatOutput(
// }
// output += "- Full content is already included in search results above\n"
// output += "- Results are deduplicated across knowledge bases and sorted by relevance\n"
// output += "- Use get_related_chunks to expand context if needed\n"
// output += "- Use list_knowledge_chunks to expand context if needed\n"
data := map[string]interface{}{
"knowledge_base_ids": kbsToSearch,
"results": formattedResults,
"count": len(results),
"kb_counts": kbCounts,
"search_mode": searchMode,
"display_type": "search_results",
}
if len(knowledgeIDsFilter) > 0 {
@@ -1199,14 +1018,8 @@ func (t *KnowledgeSearchTool) formatOutput(
}
data["knowledge_ids"] = filterList
}
if singleQuery != "" {
data["query"] = singleQuery
}
if len(vectorQueries) > 0 {
data["vector_queries"] = vectorQueries
}
if len(keywordQueries) > 0 {
data["keyword_queries"] = keywordQueries
if len(queries) > 0 {
data["queries"] = queries
}
if totalBeforeFilter > len(results) {
data["total_before_filter"] = totalBeforeFilter

View File

@@ -0,0 +1,230 @@
package tools
import (
"context"
"fmt"
"strings"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
)
// ListKnowledgeChunksTool retrieves chunk snapshots for a specific knowledge document.
type ListKnowledgeChunksTool struct {
BaseTool
tenantID uint
chunkService interfaces.ChunkService
knowledgeService interfaces.KnowledgeService
}
// NewListKnowledgeChunksTool creates a new tool instance.
func NewListKnowledgeChunksTool(
tenantID uint,
knowledgeService interfaces.KnowledgeService,
chunkService interfaces.ChunkService,
) *ListKnowledgeChunksTool {
description := `Retrieve paged chunks for a document (knowledge) by knowledge_id.
## When to Use
- Need deterministic chunk previews for a known document
- Want to quickly confirm how many chunks a document contains
- Require surrounding context around a chunk_index returned by search results
- Need content snippets without running an additional search query
Avoid when:
- You don't know the knowledge_id (use knowledge_search first)
## Parameters
- knowledge_id (required): Target document/knowledge ID
- limit (optional): Number of chunks to fetch (default 20, max 100).
- offset (optional): Offset to start fetching chunks from (default 0).`
return &ListKnowledgeChunksTool{
BaseTool: NewBaseTool("list_knowledge_chunks", description),
tenantID: tenantID,
chunkService: chunkService,
knowledgeService: knowledgeService,
}
}
// Parameters returns the JSON schema describing accepted arguments.
func (t *ListKnowledgeChunksTool) Parameters() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"knowledge_id": map[string]interface{}{
"type": "string",
"description": "Knowledge/document ID to inspect",
},
"limit": map[string]interface{}{
"type": "integer",
"description": "Number of chunks to fetch (default 20, max 100)",
"default": 20,
"minimum": 1,
"maximum": 100,
},
"offset": map[string]interface{}{
"type": "integer",
"description": "Offset to start fetching chunks from (default 0)",
"default": 0,
"minimum": 0,
},
},
"required": []string{"knowledge_id", "limit", "offset"},
}
}
// Execute performs the chunk fetch against the chunk service.
func (t *ListKnowledgeChunksTool) Execute(ctx context.Context, args map[string]interface{}) (*types.ToolResult, error) {
knowledgeID, ok := args["knowledge_id"].(string)
if !ok || strings.TrimSpace(knowledgeID) == "" {
return &types.ToolResult{
Success: false,
Error: "knowledge_id is required",
}, fmt.Errorf("knowledge_id is required")
}
knowledgeID = strings.TrimSpace(knowledgeID)
chunkLimit := 20
offset := 0
if rawLimit, exists := args["limit"]; exists {
switch v := rawLimit.(type) {
case float64:
chunkLimit = int(v)
case int:
chunkLimit = v
}
}
if rawOffset, exists := args["offset"]; exists {
switch v := rawOffset.(type) {
case float64:
offset = int(v)
case int:
offset = v
}
}
if offset < 0 {
offset = 0
}
pagination := &types.Pagination{
Page: offset/chunkLimit + 1,
PageSize: chunkLimit,
}
chunks, total, err := t.chunkService.GetRepository().ListPagedChunksByKnowledgeID(ctx,
t.tenantID, knowledgeID, pagination, []types.ChunkType{types.ChunkTypeText}, "")
if err != nil {
return &types.ToolResult{
Success: false,
Error: fmt.Sprintf("failed to list chunks: %v", err),
}, err
}
if chunks == nil {
return &types.ToolResult{
Success: false,
Error: "chunk query returned no data",
}, fmt.Errorf("chunk query returned no data")
}
totalChunks := total
fetched := len(chunks)
knowledgeTitle := t.lookupKnowledgeTitle(ctx, knowledgeID)
output := t.buildOutput(knowledgeID, knowledgeTitle, totalChunks, fetched, chunkLimit, chunks)
formattedChunks := make([]map[string]interface{}, 0, len(chunks))
for idx, c := range chunks {
formattedChunks = append(formattedChunks, map[string]interface{}{
"seq": idx + 1,
"chunk_id": c.ID,
"chunk_index": c.ChunkIndex,
"content": c.Content,
"chunk_type": c.ChunkType,
"knowledge_id": c.KnowledgeID,
"knowledge_base": c.KnowledgeBaseID,
"start_at": c.StartAt,
"end_at": c.EndAt,
"parent_chunk_id": c.ParentChunkID,
})
}
return &types.ToolResult{
Success: true,
Output: output,
Data: map[string]interface{}{
"knowledge_id": knowledgeID,
"knowledge_title": knowledgeTitle,
"total_chunks": totalChunks,
"fetched_chunks": fetched,
"page": pagination.Page,
"page_size": pagination.PageSize,
"chunks": formattedChunks,
},
}, nil
}
func (t *ListKnowledgeChunksTool) lookupKnowledgeTitle(ctx context.Context, knowledgeID string) string {
if t.knowledgeService == nil {
return ""
}
knowledge, err := t.knowledgeService.GetKnowledgeByID(ctx, knowledgeID)
if err != nil || knowledge == nil {
return ""
}
return strings.TrimSpace(knowledge.Title)
}
func (t *ListKnowledgeChunksTool) buildOutput(
knowledgeID string,
knowledgeTitle string,
total int64,
fetched int,
chunkLimit int,
chunks []*types.Chunk,
) string {
builder := &strings.Builder{}
builder.WriteString("=== 知识文档分块 ===\n\n")
if knowledgeTitle != "" {
builder.WriteString(fmt.Sprintf("文档: %s (%s)\n", knowledgeTitle, knowledgeID))
} else {
builder.WriteString(fmt.Sprintf("文档 ID: %s\n", knowledgeID))
}
builder.WriteString(fmt.Sprintf("总分块数: %d\n", total))
builder.WriteString(fmt.Sprintf("本次拉取: %d 条offset=%d\n\n", fetched, chunkLimit))
if fetched == 0 {
builder.WriteString("未找到任何分块,请确认文档是否已完成解析。\n")
if total > 0 {
builder.WriteString("文档存在但当前页数据为空,请检查分页参数。\n")
}
return builder.String()
}
builder.WriteString("=== 分块内容预览 ===\n\n")
for idx, c := range chunks {
builder.WriteString(fmt.Sprintf("Chunk #%d (Index %d)\n", idx+1, c.ChunkIndex+1))
builder.WriteString(fmt.Sprintf(" chunk_id: %s\n", c.ID))
builder.WriteString(fmt.Sprintf(" 类型: %s\n", c.ChunkType))
builder.WriteString(fmt.Sprintf(" 内容: %s\n\n", summarizeContent(c.Content)))
}
if int64(fetched) < total {
builder.WriteString("提示:文档仍有更多分块,可调整 offset 或多次调用以获取全部内容。\n")
}
return builder.String()
}
func summarizeContent(content string) string {
cleaned := strings.TrimSpace(content)
if cleaned == "" {
return "(空内容)"
}
return strings.TrimSpace(string(cleaned))
}

View File

@@ -56,7 +56,7 @@ func NewQueryKnowledgeGraphTool(knowledgeService interfaces.KnowledgeBaseService
## 配合使用
1. **关系探索**: query_knowledge_graph → get_chunk_detail查看详细内容
2. **网络分析**: query_knowledge_graph → get_related_chunks扩展上下文
2. **网络分析**: query_knowledge_graph → list_knowledge_chunks扩展上下文
3. **主题研究**: knowledge_search → query_knowledge_graph深入实体关系
## 当前状态
@@ -360,7 +360,7 @@ func (t *QueryKnowledgeGraphTool) Execute(ctx context.Context, args map[string]i
output += "=== 💡 使用提示 ===\n"
output += "- ✓ 结果已跨知识库去重并按相关度排序\n"
output += "- ✓ 使用 get_chunk_detail 获取完整内容\n"
output += "- ✓ 使用 get_related_chunks 探索上下文\n"
output += "- ✓ 使用 list_knowledge_chunks 探索上下文\n"
if !hasGraphConfig {
output += "- ⚠️ 配置图谱抽取以获得更精准的实体关系结果\n"
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"github.com/Tencent/WeKnora/internal/common"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
"gorm.io/gorm"
@@ -11,26 +12,23 @@ import (
// ToolRegistry manages the registration and retrieval of tools
type ToolRegistry struct {
tools map[string]types.Tool
knowledgeBaseService interfaces.KnowledgeBaseService
knowledgeService interfaces.KnowledgeService
chunkService interfaces.ChunkService
db *gorm.DB // gorm.DB interface for database query tool
tools map[string]types.Tool
knowledgeService interfaces.KnowledgeService
chunkService interfaces.ChunkService
db *gorm.DB
}
// NewToolRegistry creates a new tool registry
func NewToolRegistry(
knowledgeBaseService interfaces.KnowledgeBaseService,
knowledgeService interfaces.KnowledgeService,
chunkService interfaces.ChunkService,
db *gorm.DB, // gorm.DB for database operations
) *ToolRegistry {
return &ToolRegistry{
tools: make(map[string]types.Tool),
knowledgeBaseService: knowledgeBaseService,
knowledgeService: knowledgeService,
chunkService: chunkService,
db: db,
tools: make(map[string]types.Tool),
knowledgeService: knowledgeService,
chunkService: chunkService,
db: db,
}
}
@@ -72,13 +70,41 @@ func (r *ToolRegistry) GetFunctionDefinitions() []types.FunctionDefinition {
// ExecuteTool executes a tool by name with the given arguments
func (r *ToolRegistry) ExecuteTool(ctx context.Context, name string, args map[string]interface{}) (*types.ToolResult, error) {
common.PipelineInfo(ctx, "AgentTool", "execute_start", map[string]interface{}{
"tool": name,
"args": args,
})
tool, err := r.GetTool(name)
if err != nil {
common.PipelineError(ctx, "AgentTool", "execute_failed", map[string]interface{}{
"tool": name,
"error": err.Error(),
})
return &types.ToolResult{
Success: false,
Error: err.Error(),
}, err
}
return tool.Execute(ctx, args)
result, execErr := tool.Execute(ctx, args)
fields := map[string]interface{}{
"tool": name,
"args": args,
}
if result != nil {
fields["success"] = result.Success
if result.Error != "" {
fields["error"] = result.Error
}
}
if execErr != nil {
fields["error"] = execErr.Error()
common.PipelineError(ctx, "AgentTool", "execute_done", fields)
} else if result != nil && !result.Success {
common.PipelineWarn(ctx, "AgentTool", "execute_done", fields)
} else {
common.PipelineInfo(ctx, "AgentTool", "execute_done", fields)
}
return result, execErr
}

View File

@@ -121,7 +121,7 @@ func (t *SequentialThinkingTool) Parameters() map[string]interface{} {
"totalThoughts": map[string]interface{}{
"type": "integer",
"description": "Estimated total thoughts needed (numeric value, e.g., 5, 10)",
"minimum": 1,
"minimum": 5,
},
"isRevision": map[string]interface{}{
"type": "boolean",
@@ -188,6 +188,8 @@ func (t *SequentialThinkingTool) Execute(ctx context.Context, args map[string]in
branchKeys = append(branchKeys, k)
}
incomplete := thoughtData.NextThoughtNeeded || thoughtData.NeedsMoreThoughts || thoughtData.ThoughtNumber < thoughtData.TotalThoughts
responseData := map[string]interface{}{
"thought_number": thoughtData.ThoughtNumber,
"total_thoughts": thoughtData.TotalThoughts,
@@ -196,13 +198,19 @@ func (t *SequentialThinkingTool) Execute(ctx context.Context, args map[string]in
"thought_history_length": len(t.thoughtHistory),
"display_type": "thinking",
"thought": thoughtData.Thought,
"incomplete_steps": incomplete,
}
logger.Infof(ctx, "[Tool][SequentialThinking] Execute completed - Thought %d/%d", thoughtData.ThoughtNumber, thoughtData.TotalThoughts)
outputMsg := "Thought process recorded"
if incomplete {
outputMsg = "Thought process recorded - unfinished steps remain, continue exploring and calling tools"
}
return &types.ToolResult{
Success: true,
Output: "Thought process recorded",
Output: outputMsg,
Data: responseData,
}, nil
}

View File

@@ -171,7 +171,7 @@ func (t *TodoWriteTool) Parameters() map[string]interface{} {
},
"tools_to_use": map[string]interface{}{
"type": "array",
"description": "Suggested tools for this step (e.g., ['knowledge_search', 'get_related_chunks'])",
"description": "Suggested tools for this step (e.g., ['knowledge_search', 'list_knowledge_chunks'])",
"items": map[string]interface{}{
"type": "string",
},
@@ -268,8 +268,8 @@ func generatePlanOutput(task string, steps []PlanStep) string {
output += "注意未提供具体步骤。建议创建3-7个结构化步骤以系统化研究。\n\n"
output += "建议的通用流程:\n"
output += "1. 使用 knowledge_search 进行初步信息收集\n"
output += "2. 使用 get_related_chunks 获取关键信息详情\n"
output += "3. 使用 get_related_chunks 扩展上下文理解\n"
output += "2. 使用 list_knowledge_chunks 获取关键信息详情\n"
output += "3. 使用 list_knowledge_chunks 扩展上下文理解\n"
output += "4. 使用 think 工具评估结果并综合答案\n"
return output
}

View File

@@ -23,14 +23,14 @@ const MAX_ITERATIONS = 30 // Max iterations for agent execution
type agentService struct {
cfg *config.Config
modelService interfaces.ModelService
knowledgeBaseService interfaces.KnowledgeBaseService
knowledgeService interfaces.KnowledgeService
chunkService interfaces.ChunkService
mcpServiceService interfaces.MCPServiceService
mcpManager *mcp.MCPManager
eventBus *event.EventBus
db *gorm.DB
webSearchService interfaces.WebSearchService
knowledgeBaseService interfaces.KnowledgeBaseService
knowledgeService interfaces.KnowledgeService
chunkService interfaces.ChunkService
}
// NewAgentService creates a new agent service
@@ -95,7 +95,7 @@ func (s *agentService) CreateAgentEngine(
}
// Create tool registry
toolRegistry := tools.NewToolRegistry(s.knowledgeBaseService, s.knowledgeService, s.chunkService, s.db)
toolRegistry := tools.NewToolRegistry(s.knowledgeService, s.chunkService, s.db)
// Register tools
if err := s.registerTools(ctx, toolRegistry, config, rerankModel, chatModel, sessionID, sessionService); err != nil {
@@ -148,9 +148,9 @@ func (s *agentService) CreateAgentEngine(
}
}
systemPromptTemplate := agent.DefaultSystemPromptTemplate
if config.UseCustomSystemPrompt && config.SystemPrompt != "" {
systemPromptTemplate = config.SystemPrompt
systemPromptTemplate := ""
if config.UseCustomSystemPrompt {
systemPromptTemplate = config.ResolveSystemPrompt(config.WebSearchEnabled)
}
// Create engine with provided EventBus and contextManager
@@ -158,7 +158,6 @@ func (s *agentService) CreateAgentEngine(
config,
chatModel,
toolRegistry,
s.knowledgeBaseService,
eventBus,
kbInfos,
contextManager,
@@ -182,10 +181,7 @@ func (s *agentService) registerTools(
// If no specific tools allowed, register default tools
allowedTools := config.AllowedTools
if len(allowedTools) == 0 {
// Register default tools from config
if s.cfg.Agent != nil && len(s.cfg.Agent.DefaultTools) > 0 {
allowedTools = s.cfg.Agent.DefaultTools
}
allowedTools = tools.DefaultAllowedTools()
}
// If web search is enabled, add web_search to allowedTools
if config.WebSearchEnabled {
@@ -216,13 +212,14 @@ func (s *agentService) registerTools(
config.KnowledgeBases,
rerankModel,
chatModel,
s.cfg,
))
case "get_related_chunks":
registry.RegisterTool(tools.NewGetRelatedChunksTool(s.chunkService, s.knowledgeBaseService))
case "list_knowledge_chunks":
registry.RegisterTool(tools.NewListKnowledgeChunksTool(tenantID, s.knowledgeService, s.chunkService))
case "query_knowledge_graph":
registry.RegisterTool(tools.NewQueryKnowledgeGraphTool(s.knowledgeBaseService))
case "get_document_info":
registry.RegisterTool(tools.NewGetDocumentInfoTool(s.knowledgeService, s.chunkService))
registry.RegisterTool(tools.NewGetDocumentInfoTool(tenantID, s.knowledgeService, s.chunkService))
case "database_query":
registry.RegisterTool(tools.NewDatabaseQueryTool(s.db, tenantID))
case "web_search":

View File

@@ -66,7 +66,7 @@ func (p *PluginChatCompletion) OnEvent(
}
pipelineInfo(ctx, "Completion", "output", map[string]interface{}{
"answer_preview": truncateForLog(chatResponse.Content),
"answer_preview": chatResponse.Content,
"finish_reason": chatResponse.FinishReason,
"completion_tokens": chatResponse.Usage.CompletionTokens,
"prompt_tokens": chatResponse.Usage.PromptTokens,

View File

@@ -2,87 +2,24 @@ package chatpipline
import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"github.com/Tencent/WeKnora/internal/common"
"github.com/Tencent/WeKnora/internal/logger"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
)
const (
logValueMaxRune = 300
defaultStageName = "PIPELINE"
defaultActionName = "info"
pipelineLogPrefix = "[PIPELINE]"
pipelineTruncateEll = "..."
)
func pipelineLog(stage, action string, fields map[string]interface{}) string {
if stage == "" {
stage = defaultStageName
}
if action == "" {
action = defaultActionName
}
builder := strings.Builder{}
builder.Grow(128)
builder.WriteString(pipelineLogPrefix)
builder.WriteString(" stage=")
builder.WriteString(stage)
builder.WriteString(" action=")
builder.WriteString(action)
if len(fields) > 0 {
keys := make([]string, 0, len(fields))
for k := range fields {
keys = append(keys, k)
}
sort.Strings(keys)
for _, key := range keys {
builder.WriteString(" ")
builder.WriteString(key)
builder.WriteString("=")
builder.WriteString(formatLogValue(fields[key]))
}
}
return builder.String()
}
func pipelineInfo(ctx context.Context, stage, action string, fields map[string]interface{}) {
logger.GetLogger(ctx).Info(pipelineLog(stage, action, fields))
common.PipelineInfo(ctx, stage, action, fields)
}
func pipelineWarn(ctx context.Context, stage, action string, fields map[string]interface{}) {
logger.GetLogger(ctx).Warn(pipelineLog(stage, action, fields))
common.PipelineWarn(ctx, stage, action, fields)
}
func pipelineError(ctx context.Context, stage, action string, fields map[string]interface{}) {
logger.GetLogger(ctx).Error(pipelineLog(stage, action, fields))
}
func formatLogValue(value interface{}) string {
switch v := value.(type) {
case string:
return strconv.Quote(truncateForLog(v))
case fmt.Stringer:
return strconv.Quote(truncateForLog(v.String()))
default:
return fmt.Sprintf("%v", v)
}
}
func truncateForLog(content string) string {
content = strings.ReplaceAll(content, "\n", "\\n")
runes := []rune(content)
if len(runes) <= logValueMaxRune {
return content
}
return string(runes[:logValueMaxRune]) + pipelineTruncateEll
common.PipelineError(ctx, stage, action, fields)
}
// prepareChatModel shared logic to prepare chat model and options

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"sort"
"strings"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
@@ -139,6 +140,7 @@ func (p *PluginMerge) OnEvent(ctx context.Context,
"merged_total": len(mergedChunks),
})
mergedChunks = p.populateFAQAnswers(ctx, chatManage, mergedChunks)
mergedChunks = p.expandShortContextWithNeighbors(ctx, chatManage, mergedChunks)
chatManage.MergeResult = mergedChunks
@@ -210,6 +212,126 @@ func mergeImageInfo(ctx context.Context, target *types.SearchResult, source *typ
return nil
}
func (p *PluginMerge) populateFAQAnswers(ctx context.Context, chatManage *types.ChatManage, results []*types.SearchResult) []*types.SearchResult {
if len(results) == 0 || p.chunkRepo == nil {
return results
}
tenantID, _ := ctx.Value(types.TenantIDContextKey).(uint)
if tenantID == 0 && chatManage != nil {
tenantID = chatManage.TenantID
}
if tenantID == 0 {
pipelineWarn(ctx, "Merge", "faq_enrich_skip", map[string]interface{}{
"reason": "missing_tenant",
})
return results
}
chunkResultMap := make(map[string][]*types.SearchResult)
chunkIDSet := make(map[string]struct{})
for _, r := range results {
if r == nil || r.ID == "" {
continue
}
if r.ChunkType != string(types.ChunkTypeFAQ) {
continue
}
chunkResultMap[r.ID] = append(chunkResultMap[r.ID], r)
if _, exists := chunkIDSet[r.ID]; !exists {
chunkIDSet[r.ID] = struct{}{}
}
}
if len(chunkIDSet) == 0 {
return results
}
chunkIDs := make([]string, 0, len(chunkIDSet))
for id := range chunkIDSet {
chunkIDs = append(chunkIDs, id)
}
chunks, err := p.chunkRepo.ListChunksByID(ctx, tenantID, chunkIDs)
if err != nil {
pipelineWarn(ctx, "Merge", "faq_chunk_fetch_failed", map[string]interface{}{
"error": err.Error(),
})
return results
}
updated := 0
for _, chunk := range chunks {
if chunk == nil {
continue
}
meta, err := chunk.FAQMetadata()
if err != nil || meta == nil {
if err != nil {
pipelineWarn(ctx, "Merge", "faq_metadata_parse_failed", map[string]interface{}{
"chunk_id": chunk.ID,
"error": err.Error(),
})
}
continue
}
content := buildFAQAnswerContent(meta)
if content == "" {
continue
}
for _, r := range chunkResultMap[chunk.ID] {
if r == nil {
continue
}
r.Content = content
updated++
}
}
if updated > 0 {
pipelineInfo(ctx, "Merge", "faq_content_enriched", map[string]interface{}{
"chunk_cnt": updated,
})
}
return results
}
func buildFAQAnswerContent(meta *types.FAQChunkMetadata) string {
if meta == nil {
return ""
}
question := strings.TrimSpace(meta.StandardQuestion)
answers := make([]string, 0, len(meta.Answers))
for _, ans := range meta.Answers {
if trimmed := strings.TrimSpace(ans); trimmed != "" {
answers = append(answers, trimmed)
}
}
if question == "" && len(answers) == 0 {
return ""
}
var builder strings.Builder
if question != "" {
builder.WriteString("Q: ")
builder.WriteString(question)
builder.WriteString("\n")
}
if len(answers) > 0 {
builder.WriteString("Answer:\n")
for _, ans := range answers {
builder.WriteString("- ")
builder.WriteString(ans)
builder.WriteString("\n")
}
}
return strings.TrimSpace(builder.String())
}
func (p *PluginMerge) expandShortContextWithNeighbors(ctx context.Context, chatManage *types.ChatManage, results []*types.SearchResult) []*types.SearchResult {
const (
minLen = 350

View File

@@ -1,26 +1,26 @@
package chatpipline
import (
"context"
"encoding/json"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"context"
"encoding/json"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/Tencent/WeKnora/internal/config"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
"github.com/yanyiwu/gojieba"
"github.com/Tencent/WeKnora/internal/config"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
"github.com/yanyiwu/gojieba"
)
// PluginPreprocess Query preprocessing plugin
type PluginPreprocess struct {
config *config.Config
jieba *gojieba.Jieba
stopwords map[string]struct{}
modelService interfaces.ModelService
config *config.Config
jieba *gojieba.Jieba
stopwords map[string]struct{}
modelService interfaces.ModelService
}
// Regular expressions for text cleaning
@@ -35,10 +35,10 @@ const maxProcessedTokens = 12
// NewPluginPreprocess Creates a new query preprocessing plugin
func NewPluginPreprocess(
eventManager *EventManager,
config *config.Config,
cleaner interfaces.ResourceCleaner,
modelService interfaces.ModelService,
eventManager *EventManager,
config *config.Config,
cleaner interfaces.ResourceCleaner,
modelService interfaces.ModelService,
) *PluginPreprocess {
// Use default dictionary for Jieba tokenizer
jieba := gojieba.NewJieba()
@@ -46,12 +46,12 @@ func NewPluginPreprocess(
// Load stopwords from built-in stopword library
stopwords := loadStopwords()
res := &PluginPreprocess{
config: config,
jieba: jieba,
stopwords: stopwords,
modelService: modelService,
}
res := &PluginPreprocess{
config: config,
jieba: jieba,
stopwords: stopwords,
modelService: modelService,
}
// Register resource cleanup function
if cleaner != nil {
@@ -92,10 +92,10 @@ func (p *PluginPreprocess) ActivationEvents() []types.EventType {
// OnEvent Process events
func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError) *PluginError {
rawQuery := strings.TrimSpace(chatManage.RewriteQuery)
if rawQuery == "" {
return next()
}
rawQuery := strings.TrimSpace(chatManage.RewriteQuery)
if rawQuery == "" {
return next()
}
pipelineInfo(ctx, "Preprocess", "input", map[string]interface{}{
"session_id": chatManage.SessionID,
@@ -108,12 +108,12 @@ func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventTyp
sanitized = normalized
}
var (
processed = sanitized
strategy = "original"
tokenPreview string
tokenCount int
)
var (
processed = sanitized
strategy = "original"
tokenPreview string
tokenCount int
)
switch {
case containsChineseCharacters(sanitized):
@@ -141,17 +141,17 @@ func (p *PluginPreprocess) OnEvent(ctx context.Context, eventType types.EventTyp
strategy = "fallback_original"
}
chatManage.ProcessedQuery = processed
chatManage.QueryIntent = p.detectIntentLLM(ctx, chatManage, sanitized)
chatManage.ProcessedQuery = processed
chatManage.QueryIntent = p.detectIntentLLM(ctx, chatManage, sanitized)
pipelineInfo(ctx, "Preprocess", "output", map[string]interface{}{
"session_id": chatManage.SessionID,
"processed_query": processed,
"strategy": strategy,
"token_count": tokenCount,
"token_preview": truncateForLog(tokenPreview),
"query_intent": chatManage.QueryIntent,
})
pipelineInfo(ctx, "Preprocess", "output", map[string]interface{}{
"session_id": chatManage.SessionID,
"processed_query": processed,
"strategy": strategy,
"token_count": tokenCount,
"token_preview": tokenPreview,
"query_intent": chatManage.QueryIntent,
})
return next()
}
@@ -258,63 +258,63 @@ func normalizeWhitespace(text string) string {
}
func normalizeLatinQuery(text string) string {
text = strings.ToLower(text)
text = multiSpaceRegex.ReplaceAllString(text, " ")
return strings.TrimSpace(text)
text = strings.ToLower(text)
text = multiSpaceRegex.ReplaceAllString(text, " ")
return strings.TrimSpace(text)
}
type intentResp struct {
Intent string `json:"intent"`
Confidence float64 `json:"confidence"`
Intent string `json:"intent"`
Confidence float64 `json:"confidence"`
}
func (p *PluginPreprocess) detectIntentLLM(ctx context.Context, chatManage *types.ChatManage, text string) string {
if p.modelService == nil || chatManage.ChatModelID == "" {
pipelineWarn(ctx, "IntentDetect", "skip", map[string]interface{}{ "reason": "no_model", "session_id": chatManage.SessionID })
return "general"
}
chatModel, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
if err != nil {
pipelineWarn(ctx, "IntentDetect", "get_model_failed", map[string]interface{}{ "error": err.Error(), "model_id": chatManage.ChatModelID })
return "general"
}
pipelineInfo(ctx, "IntentDetect", "start", map[string]interface{}{ "session_id": chatManage.SessionID, "model_id": chatManage.ChatModelID })
sys := "You are a query intent classifier. Classify the user's query into one of: definition, howto, compare, qa, general. Respond ONLY with a JSON object {\"intent\": \"...\", \"confidence\": 0.0 } inside a markdown fenced block."
usr := text
think := false
resp, err := chatModel.Chat(ctx, []chat.Message{
{Role: "system", Content: sys},
{Role: "user", Content: usr},
}, &chat.ChatOptions{Temperature: 0.0, MaxCompletionTokens: 64, Thinking: &think})
if err != nil || resp.Content == "" {
pipelineWarn(ctx, "IntentDetect", "model_call_failed", map[string]interface{}{ "error": err })
return "general"
}
body := extractJSONBody(resp.Content)
var ir intentResp
if err := json.Unmarshal([]byte(body), &ir); err != nil {
pipelineWarn(ctx, "IntentDetect", "parse_failed", map[string]interface{}{ "body": truncateForLog(body), "error": err.Error() })
return "general"
}
pipelineInfo(ctx, "IntentDetect", "result", map[string]interface{}{ "intent": ir.Intent, "confidence": ir.Confidence })
switch strings.ToLower(strings.TrimSpace(ir.Intent)) {
case "definition", "howto", "compare", "qa", "general":
return strings.ToLower(ir.Intent)
default:
return "general"
}
if p.modelService == nil || chatManage.ChatModelID == "" {
pipelineWarn(ctx, "IntentDetect", "skip", map[string]interface{}{"reason": "no_model", "session_id": chatManage.SessionID})
return "general"
}
chatModel, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
if err != nil {
pipelineWarn(ctx, "IntentDetect", "get_model_failed", map[string]interface{}{"error": err.Error(), "model_id": chatManage.ChatModelID})
return "general"
}
pipelineInfo(ctx, "IntentDetect", "start", map[string]interface{}{"session_id": chatManage.SessionID, "model_id": chatManage.ChatModelID})
sys := "You are a query intent classifier. Classify the user's query into one of: definition, howto, compare, qa, general. Respond ONLY with a JSON object {\"intent\": \"...\", \"confidence\": 0.0 } inside a markdown fenced block."
usr := text
think := false
resp, err := chatModel.Chat(ctx, []chat.Message{
{Role: "system", Content: sys},
{Role: "user", Content: usr},
}, &chat.ChatOptions{Temperature: 0.0, MaxCompletionTokens: 64, Thinking: &think})
if err != nil || resp.Content == "" {
pipelineWarn(ctx, "IntentDetect", "model_call_failed", map[string]interface{}{"error": err})
return "general"
}
body := extractJSONBody(resp.Content)
var ir intentResp
if err := json.Unmarshal([]byte(body), &ir); err != nil {
pipelineWarn(ctx, "IntentDetect", "parse_failed", map[string]interface{}{"body": body, "error": err.Error()})
return "general"
}
pipelineInfo(ctx, "IntentDetect", "result", map[string]interface{}{"intent": ir.Intent, "confidence": ir.Confidence})
switch strings.ToLower(strings.TrimSpace(ir.Intent)) {
case "definition", "howto", "compare", "qa", "general":
return strings.ToLower(ir.Intent)
default:
return "general"
}
}
func extractJSONBody(text string) string {
t := strings.TrimSpace(text)
// Try fenced block first
if i := strings.Index(t, "{"); i >= 0 {
j := strings.LastIndex(t, "}")
if j > i {
return t[i : j+1]
}
}
return "{}"
t := strings.TrimSpace(text)
// Try fenced block first
if i := strings.Index(t, "{"); i >= 0 {
j := strings.LastIndex(t, "}")
if j > i {
return t[i : j+1]
}
}
return "{}"
}
// Ensure resources are properly released

View File

@@ -1,17 +1,18 @@
package chatpipline
import (
"context"
"encoding/json"
"fmt"
"strings"
"sync"
"time"
"context"
"encoding/json"
"fmt"
"sort"
"strings"
"sync"
"time"
"github.com/Tencent/WeKnora/internal/config"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
"github.com/Tencent/WeKnora/internal/config"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
)
// PluginSearch implements search functionality for chat pipeline
@@ -54,7 +55,7 @@ func (p *PluginSearch) ActivationEvents() []types.EventType {
// OnEvent handles search events in the chat pipeline
func (p *PluginSearch) OnEvent(ctx context.Context,
eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError,
eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError,
) *PluginError {
// Get knowledge base IDs list
knowledgeBaseIDs := chatManage.KnowledgeBaseIDs
@@ -117,85 +118,85 @@ func (p *PluginSearch) OnEvent(ctx context.Context,
}
}()
wg.Wait()
wg.Wait()
chatManage.SearchResult = allResults
chatManage.SearchResult = allResults
// If recall is low, attempt query expansion with keyword-focused search
if len(chatManage.SearchResult) < max(1, chatManage.EmbeddingTopK/2) {
pipelineInfo(ctx, "Search", "recall_low", map[string]interface{}{
"current": len(chatManage.SearchResult),
"threshold": chatManage.EmbeddingTopK / 2,
})
expansions := p.expandQueries(ctx, chatManage)
if len(expansions) > 0 {
pipelineInfo(ctx, "Search", "expansion_start", map[string]interface{}{
"variants": len(expansions),
})
expTopK := max(chatManage.EmbeddingTopK*2, chatManage.RerankTopK*2)
expKwTh := chatManage.KeywordThreshold * 0.8
// Concurrent expansion retrieval across queries and KBs
expResults := make([]*types.SearchResult, 0, expTopK*len(expansions))
var muExp sync.Mutex
var wgExp sync.WaitGroup
jobs := len(expansions) * len(knowledgeBaseIDs)
capSem := 16
if jobs < capSem {
capSem = jobs
}
if capSem <= 0 {
capSem = 1
}
sem := make(chan struct{}, capSem)
pipelineInfo(ctx, "Search", "expansion_concurrency", map[string]interface{}{
"jobs": jobs,
"cap": capSem,
})
for _, q := range expansions {
for _, kbID := range knowledgeBaseIDs {
wgExp.Add(1)
go func(q string, kbID string) {
defer wgExp.Done()
sem <- struct{}{}
defer func() { <-sem }()
paramsExp := types.SearchParams{
QueryText: q,
VectorThreshold: chatManage.VectorThreshold,
KeywordThreshold: expKwTh,
MatchCount: expTopK,
DisableVectorMatch: true,
DisableKeywordsMatch: false,
}
res, err := p.knowledgeBaseService.HybridSearch(ctx, kbID, paramsExp)
if err != nil {
pipelineWarn(ctx, "Search", "expansion_error", map[string]interface{}{
"kb_id": kbID,
"error": err.Error(),
})
return
}
if len(res) > 0 {
pipelineInfo(ctx, "Search", "expansion_hits", map[string]interface{}{
"kb_id": kbID,
"query": truncateForLog(q),
"hits": len(res),
})
muExp.Lock()
expResults = append(expResults, res...)
muExp.Unlock()
}
}(q, kbID)
}
}
wgExp.Wait()
if len(expResults) > 0 {
pipelineInfo(ctx, "Search", "expansion_done", map[string]interface{}{
"added": len(expResults),
})
chatManage.SearchResult = append(chatManage.SearchResult, expResults...)
}
}
}
// If recall is low, attempt query expansion with keyword-focused search
if len(chatManage.SearchResult) < max(1, chatManage.EmbeddingTopK/2) {
pipelineInfo(ctx, "Search", "recall_low", map[string]interface{}{
"current": len(chatManage.SearchResult),
"threshold": chatManage.EmbeddingTopK / 2,
})
expansions := p.expandQueries(ctx, chatManage)
if len(expansions) > 0 {
pipelineInfo(ctx, "Search", "expansion_start", map[string]interface{}{
"variants": len(expansions),
})
expTopK := max(chatManage.EmbeddingTopK*2, chatManage.RerankTopK*2)
expKwTh := chatManage.KeywordThreshold * 0.8
// Concurrent expansion retrieval across queries and KBs
expResults := make([]*types.SearchResult, 0, expTopK*len(expansions))
var muExp sync.Mutex
var wgExp sync.WaitGroup
jobs := len(expansions) * len(knowledgeBaseIDs)
capSem := 16
if jobs < capSem {
capSem = jobs
}
if capSem <= 0 {
capSem = 1
}
sem := make(chan struct{}, capSem)
pipelineInfo(ctx, "Search", "expansion_concurrency", map[string]interface{}{
"jobs": jobs,
"cap": capSem,
})
for _, q := range expansions {
for _, kbID := range knowledgeBaseIDs {
wgExp.Add(1)
go func(q string, kbID string) {
defer wgExp.Done()
sem <- struct{}{}
defer func() { <-sem }()
paramsExp := types.SearchParams{
QueryText: q,
VectorThreshold: chatManage.VectorThreshold,
KeywordThreshold: expKwTh,
MatchCount: expTopK,
DisableVectorMatch: true,
DisableKeywordsMatch: false,
}
res, err := p.knowledgeBaseService.HybridSearch(ctx, kbID, paramsExp)
if err != nil {
pipelineWarn(ctx, "Search", "expansion_error", map[string]interface{}{
"kb_id": kbID,
"error": err.Error(),
})
return
}
if len(res) > 0 {
pipelineInfo(ctx, "Search", "expansion_hits", map[string]interface{}{
"kb_id": kbID,
"query": q,
"hits": len(res),
})
muExp.Lock()
expResults = append(expResults, res...)
muExp.Unlock()
}
}(q, kbID)
}
}
wgExp.Wait()
if len(expResults) > 0 {
pipelineInfo(ctx, "Search", "expansion_done", map[string]interface{}{
"added": len(expResults),
})
chatManage.SearchResult = append(chatManage.SearchResult, expResults...)
}
}
}
// Add relevant results from chat history
historyResult := p.getSearchResultFromHistory(chatManage)
@@ -207,13 +208,13 @@ func (p *PluginSearch) OnEvent(ctx context.Context,
chatManage.SearchResult = append(chatManage.SearchResult, historyResult...)
}
// Remove duplicate results
before := len(chatManage.SearchResult)
chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult)
pipelineInfo(ctx, "Search", "dedup_summary", map[string]interface{}{
"before": before,
"after": len(chatManage.SearchResult),
})
// Remove duplicate results
before := len(chatManage.SearchResult)
chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult)
pipelineInfo(ctx, "Search", "dedup_summary", map[string]interface{}{
"before": before,
"after": len(chatManage.SearchResult),
})
// Return if we have results
if len(chatManage.SearchResult) != 0 {
@@ -249,52 +250,52 @@ func (p *PluginSearch) getSearchResultFromHistory(chatManage *types.ChatManage)
}
func removeDuplicateResults(results []*types.SearchResult) []*types.SearchResult {
seen := make(map[string]bool)
contentSig := make(map[string]bool)
var uniqueResults []*types.SearchResult
for _, r := range results {
keys := []string{r.ID}
if r.ParentChunkID != "" {
keys = append(keys, "parent:"+r.ParentChunkID)
}
if r.KnowledgeID != "" {
keys = append(keys, fmt.Sprintf("kb:%s#%d", r.KnowledgeID, r.ChunkIndex))
}
dup := false
for _, k := range keys {
if seen[k] {
dup = true
break
}
}
if dup {
continue
}
sig := buildContentSignature(r.Content)
if sig != "" {
if contentSig[sig] {
continue
}
contentSig[sig] = true
}
for _, k := range keys {
seen[k] = true
}
uniqueResults = append(uniqueResults, r)
}
return uniqueResults
seen := make(map[string]bool)
contentSig := make(map[string]bool)
var uniqueResults []*types.SearchResult
for _, r := range results {
keys := []string{r.ID}
if r.ParentChunkID != "" {
keys = append(keys, "parent:"+r.ParentChunkID)
}
if r.KnowledgeID != "" {
keys = append(keys, fmt.Sprintf("kb:%s#%d", r.KnowledgeID, r.ChunkIndex))
}
dup := false
for _, k := range keys {
if seen[k] {
dup = true
break
}
}
if dup {
continue
}
sig := buildContentSignature(r.Content)
if sig != "" {
if contentSig[sig] {
continue
}
contentSig[sig] = true
}
for _, k := range keys {
seen[k] = true
}
uniqueResults = append(uniqueResults, r)
}
return uniqueResults
}
func buildContentSignature(content string) string {
c := strings.ToLower(strings.TrimSpace(content))
if c == "" {
return ""
}
c = strings.Join(strings.Fields(c), " ")
if len(c) > 128 {
c = c[:128]
}
return c
c := strings.ToLower(strings.TrimSpace(content))
if c == "" {
return ""
}
c = strings.Join(strings.Fields(c), " ")
if len(c) > 128 {
c = c[:128]
}
return c
}
// searchKnowledgeBases performs KB searches for rewrite and processed queries across KB IDs
@@ -375,6 +376,9 @@ func (p *PluginSearch) searchKnowledgeBases(ctx context.Context, knowledgeBaseID
wg.Wait()
}
// Normalize keyword retriever scores after collecting all results from multiple knowledge bases
normalizeKeywordSearchResults(ctx, results)
pipelineInfo(ctx, "Search", "kb_result_summary", map[string]interface{}{
"total_hits": len(results),
})
@@ -472,8 +476,8 @@ func convertWebSearchResults(webResults []*types.WebSearchResult) []*types.Searc
ChunkIndex: 0,
KnowledgeTitle: webResult.Title,
StartAt: 0,
EndAt: len(content),
Seq: i,
EndAt: runeLen(content),
Seq: 1,
Score: score,
MatchType: types.MatchTypeWebSearch,
SubChunkID: []string{},
@@ -483,7 +487,7 @@ func convertWebSearchResults(webResults []*types.WebSearchResult) []*types.Searc
"title": webResult.Title,
"snippet": webResult.Snippet,
},
ChunkType: "web_search",
ChunkType: string(types.ChunkTypeWebSearch),
ParentChunkID: "",
ImageInfo: "",
KnowledgeFilename: "",
@@ -500,79 +504,193 @@ func convertWebSearchResults(webResults []*types.WebSearchResult) []*types.Searc
return results
}
// expandQueries generates paraphrases and synonyms using chat model to improve keyword recall
func (p *PluginSearch) expandQueries(ctx context.Context, chatManage *types.ChatManage) []string {
if p.modelService == nil || chatManage.ChatModelID == "" {
pipelineWarn(ctx, "Search", "expansion_skip", map[string]interface{}{
"reason": "no_model",
})
return nil
}
model, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
if err != nil {
pipelineWarn(ctx, "Search", "expansion_get_model_failed", map[string]interface{}{
"error": err.Error(),
})
return nil
}
sys := "Generate up to 5 diverse paraphrases or keyword variants for the user query to improve keyword-based search recall. Respond ONLY with a JSON array of strings inside a fenced code block."
usr := chatManage.RewriteQuery
think := false
resp, err := model.Chat(ctx, []chat.Message{{Role: "system", Content: sys}, {Role: "user", Content: usr}}, &chat.ChatOptions{Temperature: 0.2, MaxCompletionTokens: 80, Thinking: &think})
if err != nil || resp.Content == "" {
pipelineWarn(ctx, "Search", "expansion_model_call_failed", map[string]interface{}{
"error": err,
})
return nil
}
body := extractJSONBlock(resp.Content)
var arr []string
if err := json.Unmarshal([]byte(body), &arr); err != nil || len(arr) == 0 {
// Fallback: split lines
lines := strings.Split(resp.Content, "\n")
for _, l := range lines {
l = strings.TrimSpace(l)
if l != "" {
arr = append(arr, l)
}
}
}
uniq := make(map[string]struct{})
base := []string{chatManage.Query, chatManage.RewriteQuery, chatManage.ProcessedQuery}
for _, b := range base {
if s := strings.TrimSpace(b); s != "" {
uniq[strings.ToLower(s)] = struct{}{}
}
}
expansions := make([]string, 0, len(arr))
for _, a := range arr {
s := strings.TrimSpace(a)
if s == "" {
continue
}
key := strings.ToLower(s)
if _, ok := uniq[key]; ok {
continue
}
uniq[key] = struct{}{}
expansions = append(expansions, s)
if len(expansions) >= 5 {
break
}
}
pipelineInfo(ctx, "Search", "expansion_result", map[string]interface{}{
"variants": len(expansions),
})
return expansions
if p.modelService == nil || chatManage.ChatModelID == "" {
pipelineWarn(ctx, "Search", "expansion_skip", map[string]interface{}{
"reason": "no_model",
})
return nil
}
model, err := p.modelService.GetChatModel(ctx, chatManage.ChatModelID)
if err != nil {
pipelineWarn(ctx, "Search", "expansion_get_model_failed", map[string]interface{}{
"error": err.Error(),
})
return nil
}
sys := "Generate up to 5 diverse paraphrases or keyword variants for the user query to improve keyword-based search recall. Respond ONLY with a JSON array of strings inside a fenced code block."
usr := chatManage.RewriteQuery
think := false
resp, err := model.Chat(ctx, []chat.Message{
{Role: "system", Content: sys},
{Role: "user", Content: usr},
}, &chat.ChatOptions{Temperature: 0.2, MaxCompletionTokens: 200, Thinking: &think})
if err != nil || resp.Content == "" {
pipelineWarn(ctx, "Search", "expansion_model_call_failed", map[string]interface{}{
"error": err,
})
return nil
}
body := extractJSONBlock(resp.Content)
var arr []string
if err := json.Unmarshal([]byte(body), &arr); err != nil || len(arr) == 0 {
// Fallback: split lines
lines := strings.Split(resp.Content, "\n")
for _, l := range lines {
l = strings.TrimSpace(l)
if l != "" {
arr = append(arr, l)
}
}
}
uniq := make(map[string]struct{})
base := []string{chatManage.Query, chatManage.RewriteQuery, chatManage.ProcessedQuery}
for _, b := range base {
if s := strings.TrimSpace(b); s != "" {
uniq[strings.ToLower(s)] = struct{}{}
}
}
expansions := make([]string, 0, len(arr))
for _, a := range arr {
s := strings.TrimSpace(a)
if s == "" {
continue
}
key := strings.ToLower(s)
if _, ok := uniq[key]; ok {
continue
}
uniq[key] = struct{}{}
expansions = append(expansions, s)
if len(expansions) >= 5 {
break
}
}
pipelineInfo(ctx, "Search", "expansion_result", map[string]interface{}{
"variants": len(expansions),
})
return expansions
}
func extractJSONBlock(text string) string {
t := strings.TrimSpace(text)
if i := strings.Index(t, "["); i >= 0 {
j := strings.LastIndex(t, "]")
if j > i {
return t[i : j+1]
}
}
return "[]"
t := strings.TrimSpace(text)
if i := strings.Index(t, "["); i >= 0 {
j := strings.LastIndex(t, "]")
if j > i {
return t[i : j+1]
}
}
return "[]"
}
// normalizeKeywordSearchResults normalizes keyword search result scores into [0,1] globally across all knowledge bases
// Improvements:
// 1. Uses robust normalization with percentile-based bounds to handle outliers
// 2. Handles edge cases: single result, no variance, negative scores
// 3. Global normalization ensures fair comparison across different knowledge bases
func normalizeKeywordSearchResults(ctx context.Context, results []*types.SearchResult) {
// Filter keyword match results
keywordResults := make([]*types.SearchResult, 0)
for _, result := range results {
if result.MatchType == types.MatchTypeKeywords {
keywordResults = append(keywordResults, result)
}
}
if len(keywordResults) == 0 {
return
}
// Single result: set to 1.0
if len(keywordResults) == 1 {
keywordResults[0].Score = 1.0
return
}
// Find min and max scores globally
minS := keywordResults[0].Score
maxS := keywordResults[0].Score
for _, r := range keywordResults {
if r.Score < minS {
minS = r.Score
}
if r.Score > maxS {
maxS = r.Score
}
}
// No variance: all scores are the same
if maxS <= minS {
for _, r := range keywordResults {
r.Score = 1.0
}
pipelineInfo(ctx, "Search", "keyword_scores_no_variance", map[string]interface{}{
"count": len(keywordResults),
"score": minS,
})
return
}
// Robust normalization: use percentile-based bounds to reduce outlier impact
// For small groups, use min/max; for larger groups, use 5th and 95th percentiles
normalizeMin := minS
normalizeMax := maxS
if len(keywordResults) >= 10 {
// For larger groups, use percentile-based bounds to handle outliers
// Sort scores to find percentiles
scores := make([]float64, len(keywordResults))
for i, r := range keywordResults {
scores[i] = r.Score
}
sort.Float64s(scores)
// Use 5th and 95th percentiles to reduce outlier impact
p5Idx := len(scores) * 5 / 100
p95Idx := len(scores) * 95 / 100
if p5Idx < len(scores) {
normalizeMin = scores[p5Idx]
}
if p95Idx < len(scores) {
normalizeMax = scores[p95Idx]
}
}
// Normalize scores with bounds checking
rangeSize := normalizeMax - normalizeMin
if rangeSize > 0 {
for _, r := range keywordResults {
// Clamp to [normalizeMin, normalizeMax] before normalization
clampedScore := r.Score
if clampedScore < normalizeMin {
clampedScore = normalizeMin
} else if clampedScore > normalizeMax {
clampedScore = normalizeMax
}
// Normalize to [0, 1]
ns := (clampedScore - normalizeMin) / rangeSize
if ns < 0 {
ns = 0
} else if ns > 1 {
ns = 1
}
r.Score = ns
}
pipelineInfo(ctx, "Search", "normalize_keyword_scores", map[string]interface{}{
"count": len(keywordResults),
"raw_min": minS,
"raw_max": maxS,
"normalize_min": normalizeMin,
"normalize_max": normalizeMax,
})
} else {
// Fallback: all scores are the same after percentile filtering
for _, r := range keywordResults {
r.Score = 1.0
}
}
}

View File

@@ -39,6 +39,16 @@ func NewChunkService(
}
}
// GetRepository gets the chunk repository
// Parameters:
// - ctx: Context with authentication and request information
//
// Returns:
// - interfaces.ChunkRepository: Chunk repository
func (s *chunkService) GetRepository() interfaces.ChunkRepository {
return s.chunkRepository
}
// CreateChunks creates multiple chunks
// This method persists a batch of document chunks to the repository
// Parameters:

View File

@@ -106,6 +106,16 @@ func NewKnowledgeService(
}, nil
}
// GetRepository gets the knowledge repository
// Parameters:
// - ctx: Context with authentication and request information
//
// Returns:
// - interfaces.KnowledgeRepository: Knowledge repository
func (s *knowledgeService) GetRepository() interfaces.KnowledgeRepository {
return s.repo
}
// CreateKnowledgeFromFile creates a knowledge entry from an uploaded file
func (s *knowledgeService) CreateKnowledgeFromFile(ctx context.Context,
kbID string, file *multipart.FileHeader, metadata map[string]string, enableMultimodel *bool,
@@ -413,7 +423,7 @@ func (s *knowledgeService) CreateKnowledgeFromManual(ctx context.Context,
now := time.Now()
title := safeTitle
if title == "" {
title = fmt.Sprintf("手工知识-%s", now.Format("20060102-150405"))
title = fmt.Sprintf("Knowledge-%s", now.Format("20060102-150405"))
}
fileName := ensureManualFileName(title)
@@ -844,7 +854,8 @@ func (s *knowledgeService) processDocument(ctx context.Context,
}
// Split file into chunks using document reader service
span.AddEvent("start split file")
logger.GetLogger(ctx).Infof("processDocument split file content size: %d, file name: %s, file type: %s, separators: %v, enable multimodal: %v",
len(contentBytes), knowledge.FileName, knowledge.FileType, kb.ChunkingConfig.Separators, enableMultimodel)
resp, err := s.docReaderClient.ReadFromFile(ctx, &proto.ReadFromFileRequest{
FileContent: contentBytes,
FileName: knowledge.FileName,
@@ -3040,41 +3051,70 @@ func ensureManualFileName(title string) string {
return trimmed + manualFileExtension
}
func splitManualContent(content string) []string {
clean := strings.TrimSpace(content)
if clean == "" {
return []string{}
}
normalized := strings.ReplaceAll(clean, "\r\n", "\n")
segments := strings.Split(normalized, "\n\n")
results := make([]string, 0, len(segments))
for _, seg := range segments {
part := strings.TrimSpace(seg)
if part != "" {
results = append(results, part)
}
}
if len(results) == 0 {
results = append(results, clean)
}
return results
}
func (s *knowledgeService) triggerManualProcessing(ctx context.Context,
kb *types.KnowledgeBase, knowledge *types.Knowledge, content string, sync bool,
) {
passages := splitManualContent(content)
if len(passages) == 0 {
passages = []string{content}
clean := strings.TrimSpace(content)
if clean == "" {
return
}
// 使用 docreader 按照 MD 格式处理,并使用知识库配置的分隔符
contentBytes := []byte(clean)
fileName := ensureManualFileName(knowledge.Title)
fileType := "md"
// 检查是否需要启用多模态(对于手动内容通常不需要,但保持一致性)
enableMultimodel := kb.ChunkingConfig.EnableMultimodal && kb.StorageConfig.Provider != ""
logger.GetLogger(ctx).Infof("triggerManualProcessing split manual content size: %d, file name: %s, file type: %s, separators: %v, enable multimodal: %v",
len(contentBytes), fileName, fileType, kb.ChunkingConfig.Separators, enableMultimodel)
// 调用 docreader 解析 markdown 内容
resp, err := s.docReaderClient.ReadFromFile(ctx, &proto.ReadFromFileRequest{
FileContent: contentBytes,
FileName: fileName,
FileType: fileType,
ReadConfig: &proto.ReadConfig{
ChunkSize: int32(kb.ChunkingConfig.ChunkSize),
ChunkOverlap: int32(kb.ChunkingConfig.ChunkOverlap),
Separators: kb.ChunkingConfig.Separators,
EnableMultimodal: enableMultimodel,
StorageConfig: &proto.StorageConfig{
Provider: proto.StorageProvider(proto.StorageProvider_value[strings.ToUpper(kb.StorageConfig.Provider)]),
Region: kb.StorageConfig.Region,
BucketName: kb.StorageConfig.BucketName,
AccessKeyId: kb.StorageConfig.SecretID,
SecretAccessKey: kb.StorageConfig.SecretKey,
AppId: kb.StorageConfig.AppID,
PathPrefix: kb.StorageConfig.PathPrefix,
},
VlmConfig: &proto.VLMConfig{
ModelName: kb.VLMConfig.ModelName,
BaseUrl: kb.VLMConfig.BaseURL,
ApiKey: kb.VLMConfig.APIKey,
InterfaceType: kb.VLMConfig.InterfaceType,
},
},
RequestId: ctx.Value(types.RequestIDContextKey).(string),
})
if err != nil {
logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID).
WithField("error", err).Errorf("triggerManualProcessing read file failed")
knowledge.ParseStatus = "failed"
knowledge.ErrorMessage = err.Error()
knowledge.UpdatedAt = time.Now()
s.repo.UpdateKnowledge(ctx, knowledge)
return
}
if sync {
s.processDocumentFromPassage(ctx, kb, knowledge, passages)
s.processChunks(ctx, kb, knowledge, resp.Chunks)
return
}
newCtx := logger.CloneContext(ctx)
go s.processDocumentFromPassage(newCtx, kb, knowledge, passages)
go s.processChunks(newCtx, kb, knowledge, resp.Chunks)
}
func (s *knowledgeService) cleanupKnowledgeResources(ctx context.Context, knowledge *types.Knowledge) error {

View File

@@ -45,6 +45,16 @@ func NewKnowledgeBaseService(repo interfaces.KnowledgeBaseRepository,
}
}
// GetRepository gets the knowledge base repository
// Parameters:
// - ctx: Context with authentication and request information
//
// Returns:
// - interfaces.KnowledgeBaseRepository: Knowledge base repository
func (s *knowledgeBaseService) GetRepository() interfaces.KnowledgeBaseRepository {
return s.repo
}
// CreateKnowledgeBase creates a new knowledge base
func (s *knowledgeBaseService) CreateKnowledgeBase(ctx context.Context,
kb *types.KnowledgeBase,
@@ -303,18 +313,18 @@ func (s *knowledgeBaseService) HybridSearch(ctx context.Context,
var embeddingModel embedding.Embedder
var kb *types.KnowledgeBase
kb, err = s.repo.GetKnowledgeBaseByID(ctx, id)
if err != nil {
logger.ErrorWithFields(ctx, err, map[string]interface{}{
"knowledge_base_id": id,
})
return nil, err
}
// Add vector retrieval params if supported
if retrieveEngine.SupportRetriever(types.VectorRetrieverType) && !params.DisableVectorMatch {
logger.Info(ctx, "Vector retrieval supported, preparing vector retrieval parameters")
kb, err = s.repo.GetKnowledgeBaseByID(ctx, id)
if err != nil {
logger.ErrorWithFields(ctx, err, map[string]interface{}{
"knowledge_base_id": id,
})
return nil, err
}
logger.Infof(ctx, "Getting embedding model, model ID: %s", kb.EmbeddingModelID)
embeddingModel, err = s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID)
if err != nil {
@@ -343,8 +353,8 @@ func (s *knowledgeBaseService) HybridSearch(ctx context.Context,
logger.Info(ctx, "Vector retrieval parameters setup completed")
}
// Add keyword retrieval params if supported
if retrieveEngine.SupportRetriever(types.KeywordsRetrieverType) && !params.DisableKeywordsMatch {
// Add keyword retrieval params if supported and not FAQ
if retrieveEngine.SupportRetriever(types.KeywordsRetrieverType) && !params.DisableKeywordsMatch && kb.Type != types.KnowledgeBaseTypeFAQ {
logger.Info(ctx, "Keyword retrieval supported, preparing keyword retrieval parameters")
retrieveParams = append(retrieveParams, types.RetrieveParams{
Query: params.QueryText,
@@ -372,41 +382,6 @@ func (s *knowledgeBaseService) HybridSearch(ctx context.Context,
return nil, err
}
// Normalize keyword retriever scores into [0,1] per-engine batch
for i := range retrieveResults {
rr := retrieveResults[i]
if rr.Error != nil || rr.RetrieverType != types.KeywordsRetrieverType || len(rr.Results) == 0 {
continue
}
minS := rr.Results[0].Score
maxS := rr.Results[0].Score
for _, r := range rr.Results {
if r.Score < minS {
minS = r.Score
}
if r.Score > maxS {
maxS = r.Score
}
}
if maxS > minS {
for _, r := range rr.Results {
ns := (r.Score - minS) / (maxS - minS)
if ns < 0 {
ns = 0
} else if ns > 1 {
ns = 1
}
r.Score = ns
}
logger.Infof(ctx, "Normalized keyword scores for engine %s: min=%f, max=%f", rr.RetrieverEngineType, minS, maxS)
} else {
for _, r := range rr.Results {
r.Score = 1.0
}
logger.Infof(ctx, "Keyword scores have no variance for engine %s, set to 1.0", rr.RetrieverEngineType)
}
}
// Collect all results from different retrievers and deduplicate by chunk ID
logger.Infof(ctx, "Processing retrieval results")
matchResults := []*types.IndexWithScore{}

View File

@@ -7,7 +7,6 @@ import (
"fmt"
"strings"
"github.com/Tencent/WeKnora/internal/agent"
chatpipline "github.com/Tencent/WeKnora/internal/application/service/chat_pipline"
llmcontext "github.com/Tencent/WeKnora/internal/application/service/llmcontext"
"github.com/Tencent/WeKnora/internal/config"
@@ -485,8 +484,8 @@ func (s *sessionService) KnowledgeQA(ctx context.Context, session *types.Session
fallbackResponse = tenantConv.FallbackResponse
enableRewrite = tenantConv.EnableRewrite
if tenantConv.MaxTokens != 0 {
summaryConfig.MaxTokens = tenantConv.MaxTokens
if tenantConv.MaxCompletionTokens != 0 {
summaryConfig.MaxCompletionTokens = tenantConv.MaxCompletionTokens
}
if tenantConv.Prompt != "" {
summaryConfig.Prompt = tenantConv.Prompt
@@ -882,10 +881,10 @@ func (s *sessionService) AgentQA(ctx context.Context, session *types.Session, qu
WebSearchEnabled: session.AgentConfig.WebSearchEnabled, // Web search enabled from session config
}
if tenantInfo.AgentConfig.UseCustomSystemPrompt {
agentConfig.SystemPrompt = tenantInfo.AgentConfig.SystemPrompt
} else {
agentConfig.SystemPrompt = agent.DefaultSystemPromptTemplate
agentConfig.UseCustomSystemPrompt = tenantInfo.AgentConfig.UseCustomSystemPrompt
if agentConfig.UseCustomSystemPrompt {
agentConfig.SystemPromptWebEnabled = tenantInfo.AgentConfig.ResolveSystemPrompt(true)
agentConfig.SystemPromptWebDisabled = tenantInfo.AgentConfig.ResolveSystemPrompt(false)
}
// Set web search max results from tenant config (default: 5)

View File

@@ -1,12 +1,18 @@
package common
import (
"context"
"encoding/json"
"fmt"
"maps"
"regexp"
"slices"
"sort"
"strconv"
"strings"
"unicode/utf8"
"github.com/Tencent/WeKnora/internal/logger"
)
// ToInterfaceSlice converts a slice of strings to a slice of empty interfaces.
@@ -132,3 +138,83 @@ func CleanInvalidUTF8(s string) string {
return b.String()
}
const (
pipelineLogValueMaxRune = 300
defaultPipelineStage = "PIPELINE"
defaultPipelineAction = "info"
pipelineLogPrefix = "[PIPELINE]"
pipelineTruncateEll = "..."
)
// PipelineLog builds a structured pipeline log string.
func PipelineLog(stage, action string, fields map[string]interface{}) string {
if stage == "" {
stage = defaultPipelineStage
}
if action == "" {
action = defaultPipelineAction
}
builder := strings.Builder{}
builder.Grow(128)
builder.WriteString(pipelineLogPrefix)
builder.WriteString(" stage=")
builder.WriteString(stage)
builder.WriteString(" action=")
builder.WriteString(action)
if len(fields) > 0 {
keys := make([]string, 0, len(fields))
for k := range fields {
keys = append(keys, k)
}
sort.Strings(keys)
for _, key := range keys {
builder.WriteString(" ")
builder.WriteString(key)
builder.WriteString("=")
builder.WriteString(formatPipelineLogValue(fields[key]))
}
}
return builder.String()
}
// PipelineInfo logs pipeline info level entries.
func PipelineInfo(ctx context.Context, stage, action string, fields map[string]interface{}) {
logger.GetLogger(ctx).Info(PipelineLog(stage, action, fields))
}
// PipelineWarn logs pipeline warning level entries.
func PipelineWarn(ctx context.Context, stage, action string, fields map[string]interface{}) {
logger.GetLogger(ctx).Warn(PipelineLog(stage, action, fields))
}
// PipelineError logs pipeline error level entries.
func PipelineError(ctx context.Context, stage, action string, fields map[string]interface{}) {
logger.GetLogger(ctx).Error(PipelineLog(stage, action, fields))
}
func formatPipelineLogValue(value interface{}) string {
switch v := value.(type) {
case string:
return strconv.Quote(truncatePipelineValue(v))
case fmt.Stringer:
return strconv.Quote(truncatePipelineValue(v.String()))
default:
return fmt.Sprintf("%v", v)
}
}
func truncatePipelineValue(content string) string {
content = strings.ReplaceAll(content, "\n", "\\n")
runes := []rune(content)
if len(runes) <= pipelineLogValueMaxRune {
return content
}
return string(runes[:pipelineLogValueMaxRune]) + pipelineTruncateEll
}
func TruncateForLog(content string) string {
return truncatePipelineValue(content)
}

View File

@@ -23,7 +23,6 @@ type Config struct {
DocReader *DocReaderConfig `yaml:"docreader" json:"docreader"`
StreamManager *StreamManagerConfig `yaml:"stream_manager" json:"stream_manager"`
ExtractManager *ExtractManagerConfig `yaml:"extract" json:"extract"`
Agent *AgentGlobalConfig `yaml:"agent" json:"agent"`
WebSearch *WebSearchConfig `yaml:"web_search" json:"web_search"`
}
@@ -191,15 +190,6 @@ func LoadConfig() (*Config, error) {
return &cfg, nil
}
// AgentGlobalConfig represents the global agent configuration
type AgentGlobalConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
DefaultMaxIterations int `yaml:"default_max_iterations" json:"default_max_iterations"`
DefaultTemperature float64 `yaml:"default_temperature" json:"default_temperature"`
ReflectionEnabled bool `yaml:"reflection_enabled" json:"reflection_enabled"`
DefaultTools []string `yaml:"default_tools" json:"default_tools"`
}
// WebSearchConfig represents the web search configuration
type WebSearchConfig struct {
Providers []WebSearchProviderConfig `yaml:"providers" json:"providers"`

View File

@@ -204,8 +204,8 @@ func (h *Handler) createDefaultSummaryConfig(ctx context.Context) *types.Summary
if tenant.ConversationConfig.Temperature > 0 {
cfg.Temperature = tenant.ConversationConfig.Temperature
}
if tenant.ConversationConfig.MaxTokens > 0 {
cfg.MaxTokens = tenant.ConversationConfig.MaxTokens
if tenant.ConversationConfig.MaxCompletionTokens > 0 {
cfg.MaxCompletionTokens = tenant.ConversationConfig.MaxCompletionTokens
}
}
@@ -221,7 +221,7 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
// Determine default values: tenant config first, then config.yaml
var defaultPrompt, defaultContextTemplate, defaultNoMatchPrefix string
var defaultTemperature float64
var defaultMaxTokens int
var defaultMaxCompletionTokens int
if tenant != nil && tenant.ConversationConfig != nil {
useSystemPrompt := tenant.ConversationConfig.UseCustomSystemPrompt
@@ -234,7 +234,7 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
defaultContextTemplate = tenant.ConversationConfig.ContextTemplate
}
defaultTemperature = tenant.ConversationConfig.Temperature
defaultMaxTokens = tenant.ConversationConfig.MaxTokens
defaultMaxCompletionTokens = tenant.ConversationConfig.MaxCompletionTokens
}
// Fall back to config.yaml if tenant config is empty
@@ -247,8 +247,8 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
if defaultTemperature == 0 {
defaultTemperature = h.config.Conversation.Summary.Temperature
}
if defaultMaxTokens == 0 {
defaultMaxTokens = h.config.Conversation.Summary.MaxTokens
if defaultMaxCompletionTokens == 0 {
defaultMaxCompletionTokens = h.config.Conversation.Summary.MaxCompletionTokens
}
defaultNoMatchPrefix = h.config.Conversation.Summary.NoMatchPrefix
@@ -262,8 +262,8 @@ func (h *Handler) fillSummaryConfigDefaults(ctx context.Context, config *types.S
if config.Temperature == 0 {
config.Temperature = defaultTemperature
}
if config.MaxTokens == 0 {
config.MaxTokens = defaultMaxTokens
if config.MaxCompletionTokens == 0 {
config.MaxCompletionTokens = defaultMaxCompletionTokens
}
if config.NoMatchPrefix == "" {
config.NoMatchPrefix = defaultNoMatchPrefix

View File

@@ -463,7 +463,9 @@ func (h *Handler) handleKnowledgeQARequest(
go func() {
defer func() {
if r := recover(); r != nil {
logger.ErrorWithFields(asyncCtx, errors.NewInternalServerError("Knowledge QA service panicked"), nil)
buf := make([]byte, 10240)
runtime.Stack(buf, true)
logger.ErrorWithFields(asyncCtx, errors.NewInternalServerError(fmt.Sprintf("Knowledge QA service panicked: %v\n%s", r, string(buf))), nil)
}
}()
err := h.sessionService.KnowledgeQA(asyncCtx, session, query, knowledgeBaseIDs, assistantMessage.ID, summaryModelID, webSearchEnabled, eventBus)

View File

@@ -233,15 +233,16 @@ func (h *TenantHandler) ListTenants(c *gin.Context) {
// AgentConfigRequest represents the request body for updating agent configuration
type AgentConfigRequest struct {
Enabled bool `json:"enabled"`
MaxIterations int `json:"max_iterations"`
ReflectionEnabled bool `json:"reflection_enabled"`
AllowedTools []string `json:"allowed_tools"`
Temperature float64 `json:"temperature"`
ThinkingModelID string `json:"thinking_model_id"`
RerankModelID string `json:"rerank_model_id"`
SystemPrompt string `json:"system_prompt,omitempty"` // System prompt template with placeholders (optional)
UseCustomPrompt *bool `json:"use_custom_system_prompt"`
Enabled bool `json:"enabled"`
MaxIterations int `json:"max_iterations"`
ReflectionEnabled bool `json:"reflection_enabled"`
AllowedTools []string `json:"allowed_tools"`
Temperature float64 `json:"temperature"`
ThinkingModelID string `json:"thinking_model_id"`
RerankModelID string `json:"rerank_model_id"`
SystemPromptWebEnabled string `json:"system_prompt_web_enabled,omitempty"`
SystemPromptWebDisabled string `json:"system_prompt_web_disabled,omitempty"`
UseCustomPrompt *bool `json:"use_custom_system_prompt"`
}
// GetTenantAgentConfig retrieves the agent configuration for a tenant
@@ -282,46 +283,50 @@ func (h *TenantHandler) GetTenantAgentConfig(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{
"success": true,
"data": gin.H{
"max_iterations": agent.DefaultAgentMaxIterations,
"reflection_enabled": agent.DefaultAgentReflectionEnabled,
"allowed_tools": agenttools.DefaultAllowedTools(),
"temperature": agent.DefaultAgentTemperature,
"thinking_model_id": "",
"rerank_model_id": "",
"system_prompt": agent.DefaultSystemPromptTemplate,
"use_custom_system_prompt": false,
"available_tools": availableTools,
"available_placeholders": availablePlaceholders,
"max_iterations": agent.DefaultAgentMaxIterations,
"reflection_enabled": agent.DefaultAgentReflectionEnabled,
"allowed_tools": agenttools.DefaultAllowedTools(),
"temperature": agent.DefaultAgentTemperature,
"thinking_model_id": "",
"rerank_model_id": "",
"system_prompt_web_enabled": agent.ProgressiveRAGSystemPromptWithWeb,
"system_prompt_web_disabled": agent.ProgressiveRAGSystemPromptWithoutWeb,
"use_custom_system_prompt": false,
"available_tools": availableTools,
"available_placeholders": availablePlaceholders,
},
})
return
}
// Get system prompt, use default if empty
systemPrompt := tenant.AgentConfig.SystemPrompt
// Get system prompts for both web search states, use defaults if empty
systemPromptWithWeb := tenant.AgentConfig.ResolveSystemPrompt(true)
if systemPromptWithWeb == "" {
systemPromptWithWeb = agent.ProgressiveRAGSystemPromptWithWeb
}
systemPromptWithoutWeb := tenant.AgentConfig.ResolveSystemPrompt(false)
if systemPromptWithoutWeb == "" {
systemPromptWithoutWeb = agent.ProgressiveRAGSystemPromptWithoutWeb
}
useCustomPrompt := tenant.AgentConfig.UseCustomSystemPrompt
if !useCustomPrompt && systemPrompt != "" && systemPrompt != agent.DefaultSystemPromptTemplate {
useCustomPrompt = true
}
if systemPrompt == "" {
systemPrompt = agent.DefaultSystemPromptTemplate
}
logger.Infof(ctx, "Retrieved tenant agent config successfully, Tenant ID: %d", tenant.ID)
c.JSON(http.StatusOK, gin.H{
"success": true,
"data": gin.H{
"enabled": tenant.AgentConfig.Enabled,
"max_iterations": tenant.AgentConfig.MaxIterations,
"reflection_enabled": tenant.AgentConfig.ReflectionEnabled,
"allowed_tools": tenant.AgentConfig.AllowedTools,
"temperature": tenant.AgentConfig.Temperature,
"thinking_model_id": tenant.AgentConfig.ThinkingModelID,
"rerank_model_id": tenant.AgentConfig.RerankModelID,
"system_prompt": systemPrompt,
"use_custom_system_prompt": useCustomPrompt,
"available_tools": availableTools,
"available_placeholders": availablePlaceholders,
"enabled": tenant.AgentConfig.Enabled,
"max_iterations": tenant.AgentConfig.MaxIterations,
"reflection_enabled": tenant.AgentConfig.ReflectionEnabled,
"allowed_tools": tenant.AgentConfig.AllowedTools,
"temperature": tenant.AgentConfig.Temperature,
"thinking_model_id": tenant.AgentConfig.ThinkingModelID,
"rerank_model_id": tenant.AgentConfig.RerankModelID,
"system_prompt_web_enabled": systemPromptWithWeb,
"system_prompt_web_disabled": systemPromptWithoutWeb,
"use_custom_system_prompt": useCustomPrompt,
"available_tools": availableTools,
"available_placeholders": availablePlaceholders,
},
})
}
@@ -368,15 +373,16 @@ func (h *TenantHandler) updateTenantAgentConfigInternal(c *gin.Context) {
}
tenant.AgentConfig = &types.AgentConfig{
Enabled: req.Enabled,
MaxIterations: req.MaxIterations,
ReflectionEnabled: req.ReflectionEnabled,
AllowedTools: req.AllowedTools,
Temperature: req.Temperature,
ThinkingModelID: req.ThinkingModelID,
RerankModelID: req.RerankModelID,
SystemPrompt: req.SystemPrompt,
UseCustomSystemPrompt: useCustomPrompt,
Enabled: req.Enabled,
MaxIterations: req.MaxIterations,
ReflectionEnabled: req.ReflectionEnabled,
AllowedTools: req.AllowedTools,
Temperature: req.Temperature,
ThinkingModelID: req.ThinkingModelID,
RerankModelID: req.RerankModelID,
SystemPromptWebEnabled: req.SystemPromptWebEnabled,
SystemPromptWebDisabled: req.SystemPromptWebDisabled,
UseCustomSystemPrompt: useCustomPrompt,
}
updatedTenant, err := h.service.UpdateTenant(ctx, tenant)
@@ -517,7 +523,7 @@ func (h *TenantHandler) buildDefaultConversationConfig() *types.ConversationConf
UseCustomContextTemplate: true,
UseCustomSystemPrompt: true,
Temperature: h.config.Conversation.Summary.Temperature,
MaxTokens: h.config.Conversation.Summary.MaxTokens,
MaxCompletionTokens: h.config.Conversation.Summary.MaxCompletionTokens,
MaxRounds: h.config.Conversation.MaxRounds,
EmbeddingTopK: h.config.Conversation.EmbeddingTopK,
KeywordThreshold: h.config.Conversation.KeywordThreshold,
@@ -555,8 +561,8 @@ func validateConversationConfig(req *types.ConversationConfig) error {
if req.Temperature < 0 || req.Temperature > 2 {
return errors.NewBadRequestError("temperature must be between 0 and 2")
}
if req.MaxTokens <= 0 || req.MaxTokens > 100000 {
return errors.NewBadRequestError("max_tokens must be between 1 and 100000")
if req.MaxCompletionTokens <= 0 || req.MaxCompletionTokens > 100000 {
return errors.NewBadRequestError("max_completion_tokens must be between 1 and 100000")
}
if req.FallbackStrategy != "" &&
req.FallbackStrategy != string(types.FallbackStrategyFixed) &&
@@ -609,8 +615,8 @@ func (h *TenantHandler) GetTenantConversationConfig(c *gin.Context) {
if tc.Temperature > 0 {
defaultCfg.Temperature = tc.Temperature
}
if tc.MaxTokens > 0 {
defaultCfg.MaxTokens = tc.MaxTokens
if tc.MaxCompletionTokens > 0 {
defaultCfg.MaxCompletionTokens = tc.MaxCompletionTokens
}
// Retrieval parameters

View File

@@ -152,7 +152,9 @@ func (e *OpenAIEmbedder) BatchEmbed(ctx context.Context, texts []string) ([][]fl
logger.GetLogger(ctx).Errorf("OpenAIEmbedder EmbedBatch send request error: %v", err)
return nil, fmt.Errorf("send request: %w", err)
}
defer resp.Body.Close()
if resp.Body != nil {
defer resp.Body.Close()
}
// Read response
body, err := io.ReadAll(resp.Body)

View File

@@ -10,18 +10,19 @@ import (
// AgentConfig represents the full agent configuration (used at tenant level and runtime)
// This includes all configuration parameters for agent execution
type AgentConfig struct {
Enabled bool `json:"enabled"` // Whether agent mode is enabled
MaxIterations int `json:"max_iterations"` // Maximum number of ReAct iterations
ReflectionEnabled bool `json:"reflection_enabled"` // Whether to enable reflection
AllowedTools []string `json:"allowed_tools"` // List of allowed tool names
Temperature float64 `json:"temperature"` // LLM temperature for agent
ThinkingModelID string `json:"thinking_model_id"` // Model ID for reasoning
RerankModelID string `json:"rerank_model_id"` // Model ID for reranking search results
KnowledgeBases []string `json:"knowledge_bases"` // Accessible knowledge base IDs
SystemPrompt string `json:"system_prompt,omitempty"` // System prompt template with placeholders (optional)
UseCustomSystemPrompt bool `json:"use_custom_system_prompt"` // Whether to use custom system prompt instead of default
WebSearchEnabled bool `json:"web_search_enabled"` // Whether web search tool is enabled
WebSearchMaxResults int `json:"web_search_max_results"` // Maximum number of web search results (default: 5)
Enabled bool `json:"enabled"` // Whether agent mode is enabled
MaxIterations int `json:"max_iterations"` // Maximum number of ReAct iterations
ReflectionEnabled bool `json:"reflection_enabled"` // Whether to enable reflection
AllowedTools []string `json:"allowed_tools"` // List of allowed tool names
Temperature float64 `json:"temperature"` // LLM temperature for agent
ThinkingModelID string `json:"thinking_model_id"` // Model ID for reasoning
RerankModelID string `json:"rerank_model_id"` // Model ID for reranking search results
KnowledgeBases []string `json:"knowledge_bases"` // Accessible knowledge base IDs
SystemPromptWebEnabled string `json:"system_prompt_web_enabled,omitempty"` // Custom prompt when web search is enabled
SystemPromptWebDisabled string `json:"system_prompt_web_disabled,omitempty"` // Custom prompt when web search is disabled
UseCustomSystemPrompt bool `json:"use_custom_system_prompt"` // Whether to use custom system prompt instead of default
WebSearchEnabled bool `json:"web_search_enabled"` // Whether web search tool is enabled
WebSearchMaxResults int `json:"web_search_max_results"` // Maximum number of web search results (default: 5)
}
// SessionAgentConfig represents session-level agent configuration
@@ -66,6 +67,25 @@ func (c *SessionAgentConfig) Scan(value interface{}) error {
return json.Unmarshal(b, c)
}
// ResolveSystemPrompt returns the prompt template for the given web search state.
func (c *AgentConfig) ResolveSystemPrompt(webSearchEnabled bool) string {
if c == nil {
return ""
}
if webSearchEnabled {
if c.SystemPromptWebEnabled != "" {
return c.SystemPromptWebEnabled
}
} else {
if c.SystemPromptWebDisabled != "" {
return c.SystemPromptWebDisabled
}
}
return ""
}
// Tool defines the interface that all agent tools must implement
type Tool interface {
// Name returns the unique identifier for this tool

View File

@@ -26,6 +26,8 @@ const (
ChunkTypeRelationship ChunkType = "relationship"
// ChunkTypeFAQ 表示 FAQ 条目 Chunk
ChunkTypeFAQ ChunkType = "faq"
// ChunkTypeWebSearch 表示 Web 搜索结果的 Chunk
ChunkTypeWebSearch ChunkType = "web_search"
)
// ImageInfo 表示与 Chunk 关联的图片信息

View File

@@ -64,4 +64,6 @@ type ChunkService interface {
DeleteByKnowledgeList(ctx context.Context, ids []string) error
// ListChunkByParentID lists chunks by parent id
ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error)
// GetRepository gets the chunk repository
GetRepository() ChunkRepository
}

View File

@@ -69,6 +69,8 @@ type KnowledgeService interface {
UpdateKnowledgeTagBatch(ctx context.Context, updates map[string]*string) error
// UpdateFAQEntryTagBatch updates tag for FAQ entries in batch.
UpdateFAQEntryTagBatch(ctx context.Context, kbID string, updates map[string]*string) error
// GetRepository gets the knowledge repository
GetRepository() KnowledgeRepository
}
// KnowledgeRepository defines the interface for knowledge repositories.

View File

@@ -80,6 +80,14 @@ type KnowledgeBaseService interface {
// - Copied knowledge base object
// - Possible errors such as not existing, insufficient permissions, etc.
CopyKnowledgeBase(ctx context.Context, src string, dst string) (*types.KnowledgeBase, *types.KnowledgeBase, error)
// GetRepository gets the knowledge base repository
// Parameters:
// - ctx: Context with authentication and request information
//
// Returns:
// - interfaces.KnowledgeBaseRepository: Knowledge base repository
GetRepository() KnowledgeBaseRepository
}
// KnowledgeBaseRepository defines the knowledge base repository interface

View File

@@ -83,7 +83,7 @@ type ConversationConfig struct {
// Temperature controls the randomness of the model output
Temperature float64 `json:"temperature"`
// MaxTokens is the maximum number of tokens to generate
MaxTokens int `json:"max_tokens"`
MaxCompletionTokens int `json:"max_completion_tokens"`
// Retrieval & strategy parameters
MaxRounds int `json:"max_rounds"`