mirror of
https://github.com/Tencent/WeKnora.git
synced 2026-06-04 13:30:32 +08:00
fix(agent): pass reasoning_content back to providers that require it
MiMo and DeepSeek V3.2/V4 reject multi-turn requests in thinking mode when the prior assistant message lacks reasoning_content with HTTP 400: "The reasoning_content in the thinking mode must be passed back to the API." Agent's ReAct loop is the worst-case scenario — every round produces tool calls, exactly the case DeepSeek's docs specify reasoning_content MUST participate in subsequent context. Plumb reasoning_content through the full assistant-message round-trip: - chat.Message / types.ChatResponse / types.AgentStep gain a reasoning_content field (AgentStep persists via the existing Message.AgentSteps jsonb column, no migration needed). - streamLLMToEventBus accumulates reasoning chunks into result.ReasoningContent and surfaces it on the round's ChatResponse. - engine.runReActIteration writes it onto AgentStep so cross-turn replay preserves it. - observe.appendToolResults attaches it to the same-turn assistant message; agent_history.buildAssistantHistoryMessages does the same on cross-turn replay. - RemoteAPIChat.ConvertMessages forwards it on assistant turns to openai.ChatCompletionMessage.ReasoningContent (already supported by go-openai); providers that don't recognize the field ignore it. Tests cover the three boundaries: ConvertMessages serializes it for upstream, appendToolResults preserves it within the same turn, and buildAssistantHistoryMessages replays it across turns. Scope is intentionally limited to Agent mode — KnowledgeQA's chat pipeline and Anthropic's signed thinking_blocks are separate fixes that require schema changes (rendered_content / thinking_blocks columns). Fixes #1302
This commit is contained in:
@@ -568,10 +568,11 @@ func (e *AgentEngine) runReActIteration(
|
||||
|
||||
// Create agent step
|
||||
step := types.AgentStep{
|
||||
Iteration: state.CurrentRound,
|
||||
Thought: response.Content,
|
||||
ToolCalls: make([]types.ToolCall, 0),
|
||||
Timestamp: time.Now(),
|
||||
Iteration: state.CurrentRound,
|
||||
Thought: response.Content,
|
||||
ReasoningContent: response.ReasoningContent,
|
||||
ToolCalls: make([]types.ToolCall, 0),
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
// If the request was cancelled while the LLM was streaming (e.g. the
|
||||
|
||||
@@ -377,10 +377,11 @@ func (e *AgentEngine) appendToolResults(
|
||||
step types.AgentStep,
|
||||
) []chat.Message {
|
||||
// Add assistant message with tool calls (if any)
|
||||
if step.Thought != "" || len(step.ToolCalls) > 0 {
|
||||
if step.Thought != "" || len(step.ToolCalls) > 0 || step.ReasoningContent != "" {
|
||||
assistantMsg := chat.Message{
|
||||
Role: "assistant",
|
||||
Content: step.Thought,
|
||||
Role: "assistant",
|
||||
Content: step.Thought,
|
||||
ReasoningContent: step.ReasoningContent,
|
||||
}
|
||||
|
||||
// Add tool calls to assistant message (following OpenAI format)
|
||||
|
||||
@@ -6,8 +6,10 @@ import (
|
||||
"time"
|
||||
|
||||
agenttools "github.com/Tencent/WeKnora/internal/agent/tools"
|
||||
"github.com/Tencent/WeKnora/internal/models/chat"
|
||||
"github.com/Tencent/WeKnora/internal/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// newFinalAnswerResponse builds a ChatResponse that carries a single
|
||||
@@ -124,3 +126,89 @@ func TestAnalyzeResponse_NonFinalAnswerTool_DoesNotTerminate(t *testing.T) {
|
||||
assert.False(t, verdict.isDone,
|
||||
"non-terminal tool calls must keep the loop running")
|
||||
}
|
||||
|
||||
// TestAppendToolResults_PreservesReasoningContent verifies that the assistant
|
||||
// message produced by appendToolResults carries the reasoning_content emitted
|
||||
// by the model in the same round. Without this, MiMo and DeepSeek V3.2+
|
||||
// thinking-mode reject the next ReAct round with HTTP 400
|
||||
// "The reasoning_content in the thinking mode must be passed back to the API."
|
||||
// (issue #1302).
|
||||
func TestAppendToolResults_PreservesReasoningContent(t *testing.T) {
|
||||
engine := &AgentEngine{}
|
||||
|
||||
t.Run("assistant message carries reasoning_content alongside thought and tool_calls", func(t *testing.T) {
|
||||
step := types.AgentStep{
|
||||
Iteration: 0,
|
||||
Thought: "I will call search.",
|
||||
ReasoningContent: "Detailed chain of thought from MiMo/DeepSeek.",
|
||||
ToolCalls: []types.ToolCall{{
|
||||
ID: "call_1",
|
||||
Name: "knowledge_search",
|
||||
Args: map[string]interface{}{"query": "hi"},
|
||||
Result: &types.ToolResult{
|
||||
Success: true,
|
||||
Output: "result text",
|
||||
},
|
||||
}},
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
out := engine.appendToolResults(nil, step)
|
||||
|
||||
require.Len(t, out, 2, "expect one assistant + one tool message")
|
||||
assert.Equal(t, "assistant", out[0].Role)
|
||||
assert.Equal(t, "I will call search.", out[0].Content)
|
||||
assert.Equal(t, "Detailed chain of thought from MiMo/DeepSeek.", out[0].ReasoningContent,
|
||||
"reasoning_content must be propagated to the assistant message so providers like MiMo "+
|
||||
"and DeepSeek thinking-mode see it on the next round (issue #1302)")
|
||||
require.Len(t, out[0].ToolCalls, 1)
|
||||
assert.Equal(t, "call_1", out[0].ToolCalls[0].ID)
|
||||
|
||||
assert.Equal(t, "tool", out[1].Role)
|
||||
assert.Equal(t, "result text", out[1].Content)
|
||||
})
|
||||
|
||||
t.Run("reasoning_content alone produces an assistant message", func(t *testing.T) {
|
||||
// A pure thinking emission with no visible content / tool calls is
|
||||
// unusual but legal — preserve it so the next round's request still
|
||||
// carries reasoning_content for strict providers.
|
||||
step := types.AgentStep{
|
||||
Iteration: 0,
|
||||
ReasoningContent: "reasoning only",
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
out := engine.appendToolResults(nil, step)
|
||||
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "assistant", out[0].Role)
|
||||
assert.Equal(t, "reasoning only", out[0].ReasoningContent)
|
||||
assert.Empty(t, out[0].Content)
|
||||
assert.Empty(t, out[0].ToolCalls)
|
||||
})
|
||||
|
||||
t.Run("step without thought/tool_calls/reasoning produces no assistant message", func(t *testing.T) {
|
||||
step := types.AgentStep{Iteration: 0, Timestamp: time.Now()}
|
||||
out := engine.appendToolResults(nil, step)
|
||||
assert.Empty(t, out, "empty steps must not inject empty assistant messages")
|
||||
})
|
||||
|
||||
t.Run("appends to existing message slice", func(t *testing.T) {
|
||||
prior := []chat.Message{
|
||||
{Role: "system", Content: "sys"},
|
||||
{Role: "user", Content: "hi"},
|
||||
}
|
||||
step := types.AgentStep{
|
||||
Iteration: 1,
|
||||
Thought: "answer",
|
||||
ReasoningContent: "thinking",
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
out := engine.appendToolResults(prior, step)
|
||||
require.Len(t, out, 3)
|
||||
assert.Equal(t, "system", out[0].Role)
|
||||
assert.Equal(t, "user", out[1].Role)
|
||||
assert.Equal(t, "assistant", out[2].Role)
|
||||
assert.Equal(t, "thinking", out[2].ReasoningContent)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -15,12 +15,12 @@ import (
|
||||
|
||||
// streamLLMResult holds accumulated output from a streaming LLM call.
|
||||
type streamLLMResult struct {
|
||||
Content string
|
||||
ThinkingContent string // accumulated thinking/reasoning content, kept separate from answer
|
||||
ToolCalls []types.LLMToolCall
|
||||
Usage *types.TokenUsage
|
||||
FinishReason string // actual finish_reason from LLM (captured from last stream chunk)
|
||||
StreamError string // error message from stream (e.g., timeout), kept separate from Content
|
||||
Content string
|
||||
ReasoningContent string // accumulated reasoning content, kept separate from answer
|
||||
ToolCalls []types.LLMToolCall
|
||||
Usage *types.TokenUsage
|
||||
FinishReason string // actual finish_reason from LLM (captured from last stream chunk)
|
||||
StreamError string // error message from stream (e.g., timeout), kept separate from Content
|
||||
}
|
||||
|
||||
// streamLLMToEventBus streams LLM response through EventBus (generic method)
|
||||
@@ -66,7 +66,7 @@ func (e *AgentEngine) streamLLMToEventBus(
|
||||
isExtracted := chunk.Data != nil && chunk.Data["source"] != nil
|
||||
if !isExtracted {
|
||||
if chunk.ResponseType == types.ResponseTypeThinking {
|
||||
result.ThinkingContent += chunk.Content
|
||||
result.ReasoningContent += chunk.Content
|
||||
} else {
|
||||
result.Content += chunk.Content
|
||||
}
|
||||
@@ -239,9 +239,10 @@ func (e *AgentEngine) streamThinkingToEventBus(
|
||||
}
|
||||
|
||||
resp := &types.ChatResponse{
|
||||
Content: fullContent,
|
||||
ToolCalls: llmResult.ToolCalls,
|
||||
FinishReason: finishReason,
|
||||
Content: fullContent,
|
||||
ReasoningContent: llmResult.ReasoningContent,
|
||||
ToolCalls: llmResult.ToolCalls,
|
||||
FinishReason: finishReason,
|
||||
}
|
||||
if llmResult.Usage != nil {
|
||||
resp.Usage = *llmResult.Usage
|
||||
|
||||
@@ -158,9 +158,10 @@ func buildAssistantHistoryMessages(m *types.Message) []chat.Message {
|
||||
continue
|
||||
}
|
||||
assistantMsg := chat.Message{
|
||||
Role: "assistant",
|
||||
Content: step.Thought,
|
||||
ToolCalls: make([]chat.ToolCall, 0, len(nonTerminalCalls)),
|
||||
Role: "assistant",
|
||||
Content: step.Thought,
|
||||
ReasoningContent: step.ReasoningContent,
|
||||
ToolCalls: make([]chat.ToolCall, 0, len(nonTerminalCalls)),
|
||||
}
|
||||
for _, tc := range nonTerminalCalls {
|
||||
argsJSON, _ := json.Marshal(tc.Args)
|
||||
|
||||
@@ -230,3 +230,40 @@ func TestFilterNonTerminalToolCalls(t *testing.T) {
|
||||
assert.Equal(t, agenttools.ToolWebSearch, out[1].Name)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildAssistantHistoryMessages_ReplaysReasoningContent guards the
|
||||
// cross-turn replay path: AgentStep.ReasoningContent persisted on a prior turn
|
||||
// must be re-attached to the rebuilt assistant message, otherwise MiMo and
|
||||
// DeepSeek thinking-mode reject the next turn with HTTP 400 (issue #1302).
|
||||
func TestBuildAssistantHistoryMessages_ReplaysReasoningContent(t *testing.T) {
|
||||
msg := &types.Message{
|
||||
Role: "assistant",
|
||||
Content: "Found 3 matches in the docs.",
|
||||
AgentSteps: types.AgentSteps{
|
||||
{
|
||||
Iteration: 0,
|
||||
Thought: "Let me search.",
|
||||
ReasoningContent: "model's chain of thought",
|
||||
ToolCalls: []types.ToolCall{{
|
||||
ID: "call_1",
|
||||
Name: agenttools.ToolKnowledgeSearch,
|
||||
Args: map[string]interface{}{"query": "foo"},
|
||||
Result: &types.ToolResult{
|
||||
Success: true,
|
||||
Output: "doc A",
|
||||
},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
got := buildAssistantHistoryMessages(msg)
|
||||
if !assert.Len(t, got, 3) {
|
||||
return
|
||||
}
|
||||
assert.Equal(t, "model's chain of thought", got[0].ReasoningContent,
|
||||
"reasoning_content from AgentStep must be replayed onto the rebuilt assistant message "+
|
||||
"so MiMo/DeepSeek thinking-mode does not 400 on multi-turn (issue #1302)")
|
||||
// Tool message and final answer message must NOT carry reasoning_content.
|
||||
assert.Empty(t, got[1].ReasoningContent)
|
||||
assert.Empty(t, got[2].ReasoningContent)
|
||||
}
|
||||
|
||||
@@ -63,6 +63,11 @@ type Message struct {
|
||||
ToolCallID string `json:"tool_call_id,omitempty"` // Tool call ID (for tool role)
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"` // Tool calls (for assistant role)
|
||||
Images []string `json:"images,omitempty"` // Image URLs for multimodal (only for current user message)
|
||||
// ReasoningContent 是 assistant 推理类模型(DeepSeek thinking、小米 MiMo、vLLM reasoning 等)
|
||||
// 上一轮输出的思考内容。部分供应商(MiMo、DeepSeek V3.2/V4 thinking 模式)要求多轮对话中
|
||||
// 把 assistant 的 reasoning_content 原样回传,否则会以 400 拒绝请求;其他不要求的供应商
|
||||
// 会忽略未知字段,无副作用。
|
||||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||||
}
|
||||
|
||||
// ToolCall represents a tool call in a message
|
||||
|
||||
@@ -249,6 +249,15 @@ func (c *RemoteAPIChat) ConvertMessages(messages []Message) []openai.ChatComplet
|
||||
openaiMsg.Name = msg.Name
|
||||
}
|
||||
|
||||
// Round-trip reasoning_content on assistant turns. MiMo and DeepSeek V3.2+
|
||||
// thinking mode reject multi-turn requests where the prior assistant
|
||||
// message lacks its reasoning_content with HTTP 400 ("The reasoning_content
|
||||
// in the thinking mode must be passed back to the API."). Providers that
|
||||
// don't recognize the field ignore it harmlessly.
|
||||
if msg.Role == "assistant" && msg.ReasoningContent != "" {
|
||||
openaiMsg.ReasoningContent = msg.ReasoningContent
|
||||
}
|
||||
|
||||
openaiMessages = append(openaiMessages, openaiMsg)
|
||||
}
|
||||
return openaiMessages
|
||||
|
||||
@@ -223,6 +223,52 @@ func TestBuildChatCompletionRequest_ToolChoice(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestConvertMessages_ReasoningContentRoundTrip verifies that assistant
|
||||
// reasoning_content is propagated through ConvertMessages so that providers
|
||||
// like MiMo / DeepSeek thinking-mode can read it back from prior turns.
|
||||
// See issue #1302: MiMo rejects multi-turn requests with HTTP 400
|
||||
// "The reasoning_content in the thinking mode must be passed back to the API."
|
||||
// when this field is dropped.
|
||||
func TestConvertMessages_ReasoningContentRoundTrip(t *testing.T) {
|
||||
c := newTestRemoteChat(t)
|
||||
|
||||
t.Run("assistant reasoning_content propagated", func(t *testing.T) {
|
||||
messages := []Message{
|
||||
{Role: "user", Content: "hi"},
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "the answer",
|
||||
ReasoningContent: "let me think about this carefully",
|
||||
},
|
||||
{Role: "user", Content: "follow-up"},
|
||||
}
|
||||
out := c.ConvertMessages(messages)
|
||||
require.Len(t, out, 3)
|
||||
assert.Equal(t, "let me think about this carefully", out[1].ReasoningContent,
|
||||
"assistant reasoning_content must be retained for multi-turn replay")
|
||||
assert.Empty(t, out[0].ReasoningContent, "user message must not carry reasoning_content")
|
||||
assert.Empty(t, out[2].ReasoningContent, "user message must not carry reasoning_content")
|
||||
})
|
||||
|
||||
t.Run("non-assistant role drops reasoning_content even if set", func(t *testing.T) {
|
||||
messages := []Message{
|
||||
{Role: "user", Content: "hi", ReasoningContent: "should be dropped"},
|
||||
}
|
||||
out := c.ConvertMessages(messages)
|
||||
require.Len(t, out, 1)
|
||||
assert.Empty(t, out[0].ReasoningContent, "non-assistant roles must never carry reasoning_content upstream")
|
||||
})
|
||||
|
||||
t.Run("empty assistant reasoning_content stays empty", func(t *testing.T) {
|
||||
messages := []Message{
|
||||
{Role: "assistant", Content: "no thinking"},
|
||||
}
|
||||
out := c.ConvertMessages(messages)
|
||||
require.Len(t, out, 1)
|
||||
assert.Empty(t, out[0].ReasoningContent)
|
||||
})
|
||||
}
|
||||
|
||||
// TestRemoteAPIChat 综合测试 Remote API Chat 的所有功能
|
||||
func TestRemoteAPIChat(t *testing.T) {
|
||||
// 获取环境变量
|
||||
|
||||
@@ -186,10 +186,15 @@ type ToolCall struct {
|
||||
|
||||
// AgentStep represents one iteration of the ReAct loop
|
||||
type AgentStep struct {
|
||||
Iteration int `json:"iteration"` // Iteration number (0-indexed)
|
||||
Thought string `json:"thought"` // LLM's reasoning/thinking (Think phase)
|
||||
ToolCalls []ToolCall `json:"tool_calls"` // Tools called in this step (Act phase)
|
||||
Timestamp time.Time `json:"timestamp"` // When this step occurred
|
||||
Iteration int `json:"iteration"` // Iteration number (0-indexed)
|
||||
Thought string `json:"thought"` // LLM's reasoning/thinking (Think phase)
|
||||
// ReasoningContent stores the OpenAI-protocol reasoning_content emitted by the
|
||||
// model in this round. Persisted on AgentStep so cross-turn replay can put it
|
||||
// back on the assistant message — required by MiMo / DeepSeek V3.2+ thinking
|
||||
// mode, ignored by providers that don't recognize the field.
|
||||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||||
ToolCalls []ToolCall `json:"tool_calls"` // Tools called in this step (Act phase)
|
||||
Timestamp time.Time `json:"timestamp"` // When this step occurred
|
||||
}
|
||||
|
||||
// GetObservations returns observations from all tool calls in this step
|
||||
|
||||
@@ -27,10 +27,13 @@ type FunctionCall struct {
|
||||
|
||||
// ChatResponse chat response
|
||||
type ChatResponse struct {
|
||||
Content string `json:"content"`
|
||||
ToolCalls []LLMToolCall `json:"tool_calls,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Usage TokenUsage `json:"usage"`
|
||||
Content string `json:"content"`
|
||||
// ReasoningContent 是支持思考链的模型(DeepSeek thinking、小米 MiMo、vLLM reasoning 等)
|
||||
// 在本轮输出的推理内容。需要在后续多轮请求中原样回传给那些严格校验的供应商。
|
||||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||||
ToolCalls []LLMToolCall `json:"tool_calls,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Usage TokenUsage `json:"usage"`
|
||||
}
|
||||
|
||||
// Response type
|
||||
|
||||
Reference in New Issue
Block a user