fix(agent): pass reasoning_content back to providers that require it

MiMo and DeepSeek V3.2/V4 reject multi-turn requests in thinking mode
when the prior assistant message lacks reasoning_content with HTTP 400:
"The reasoning_content in the thinking mode must be passed back to the API."

Agent's ReAct loop is the worst-case scenario — every round produces tool
calls, exactly the case DeepSeek's docs specify reasoning_content MUST
participate in subsequent context.

Plumb reasoning_content through the full assistant-message round-trip:
- chat.Message / types.ChatResponse / types.AgentStep gain a
  reasoning_content field (AgentStep persists via the existing
  Message.AgentSteps jsonb column, no migration needed).
- streamLLMToEventBus accumulates reasoning chunks into
  result.ReasoningContent and surfaces it on the round's ChatResponse.
- engine.runReActIteration writes it onto AgentStep so cross-turn replay
  preserves it.
- observe.appendToolResults attaches it to the same-turn assistant
  message; agent_history.buildAssistantHistoryMessages does the same on
  cross-turn replay.
- RemoteAPIChat.ConvertMessages forwards it on assistant turns to
  openai.ChatCompletionMessage.ReasoningContent (already supported by
  go-openai); providers that don't recognize the field ignore it.

Tests cover the three boundaries: ConvertMessages serializes it for
upstream, appendToolResults preserves it within the same turn, and
buildAssistantHistoryMessages replays it across turns.

Scope is intentionally limited to Agent mode — KnowledgeQA's chat
pipeline and Anthropic's signed thinking_blocks are separate fixes that
require schema changes (rendered_content / thinking_blocks columns).

Fixes #1302
This commit is contained in:
wizardchen
2026-05-13 12:26:11 +08:00
committed by lyingbug
parent 1ae06fb857
commit b00bc84f35
11 changed files with 225 additions and 28 deletions

View File

@@ -568,10 +568,11 @@ func (e *AgentEngine) runReActIteration(
// Create agent step
step := types.AgentStep{
Iteration: state.CurrentRound,
Thought: response.Content,
ToolCalls: make([]types.ToolCall, 0),
Timestamp: time.Now(),
Iteration: state.CurrentRound,
Thought: response.Content,
ReasoningContent: response.ReasoningContent,
ToolCalls: make([]types.ToolCall, 0),
Timestamp: time.Now(),
}
// If the request was cancelled while the LLM was streaming (e.g. the

View File

@@ -377,10 +377,11 @@ func (e *AgentEngine) appendToolResults(
step types.AgentStep,
) []chat.Message {
// Add assistant message with tool calls (if any)
if step.Thought != "" || len(step.ToolCalls) > 0 {
if step.Thought != "" || len(step.ToolCalls) > 0 || step.ReasoningContent != "" {
assistantMsg := chat.Message{
Role: "assistant",
Content: step.Thought,
Role: "assistant",
Content: step.Thought,
ReasoningContent: step.ReasoningContent,
}
// Add tool calls to assistant message (following OpenAI format)

View File

@@ -6,8 +6,10 @@ import (
"time"
agenttools "github.com/Tencent/WeKnora/internal/agent/tools"
"github.com/Tencent/WeKnora/internal/models/chat"
"github.com/Tencent/WeKnora/internal/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// newFinalAnswerResponse builds a ChatResponse that carries a single
@@ -124,3 +126,89 @@ func TestAnalyzeResponse_NonFinalAnswerTool_DoesNotTerminate(t *testing.T) {
assert.False(t, verdict.isDone,
"non-terminal tool calls must keep the loop running")
}
// TestAppendToolResults_PreservesReasoningContent verifies that the assistant
// message produced by appendToolResults carries the reasoning_content emitted
// by the model in the same round. Without this, MiMo and DeepSeek V3.2+
// thinking-mode reject the next ReAct round with HTTP 400
// "The reasoning_content in the thinking mode must be passed back to the API."
// (issue #1302).
func TestAppendToolResults_PreservesReasoningContent(t *testing.T) {
engine := &AgentEngine{}
t.Run("assistant message carries reasoning_content alongside thought and tool_calls", func(t *testing.T) {
step := types.AgentStep{
Iteration: 0,
Thought: "I will call search.",
ReasoningContent: "Detailed chain of thought from MiMo/DeepSeek.",
ToolCalls: []types.ToolCall{{
ID: "call_1",
Name: "knowledge_search",
Args: map[string]interface{}{"query": "hi"},
Result: &types.ToolResult{
Success: true,
Output: "result text",
},
}},
Timestamp: time.Now(),
}
out := engine.appendToolResults(nil, step)
require.Len(t, out, 2, "expect one assistant + one tool message")
assert.Equal(t, "assistant", out[0].Role)
assert.Equal(t, "I will call search.", out[0].Content)
assert.Equal(t, "Detailed chain of thought from MiMo/DeepSeek.", out[0].ReasoningContent,
"reasoning_content must be propagated to the assistant message so providers like MiMo "+
"and DeepSeek thinking-mode see it on the next round (issue #1302)")
require.Len(t, out[0].ToolCalls, 1)
assert.Equal(t, "call_1", out[0].ToolCalls[0].ID)
assert.Equal(t, "tool", out[1].Role)
assert.Equal(t, "result text", out[1].Content)
})
t.Run("reasoning_content alone produces an assistant message", func(t *testing.T) {
// A pure thinking emission with no visible content / tool calls is
// unusual but legal — preserve it so the next round's request still
// carries reasoning_content for strict providers.
step := types.AgentStep{
Iteration: 0,
ReasoningContent: "reasoning only",
Timestamp: time.Now(),
}
out := engine.appendToolResults(nil, step)
require.Len(t, out, 1)
assert.Equal(t, "assistant", out[0].Role)
assert.Equal(t, "reasoning only", out[0].ReasoningContent)
assert.Empty(t, out[0].Content)
assert.Empty(t, out[0].ToolCalls)
})
t.Run("step without thought/tool_calls/reasoning produces no assistant message", func(t *testing.T) {
step := types.AgentStep{Iteration: 0, Timestamp: time.Now()}
out := engine.appendToolResults(nil, step)
assert.Empty(t, out, "empty steps must not inject empty assistant messages")
})
t.Run("appends to existing message slice", func(t *testing.T) {
prior := []chat.Message{
{Role: "system", Content: "sys"},
{Role: "user", Content: "hi"},
}
step := types.AgentStep{
Iteration: 1,
Thought: "answer",
ReasoningContent: "thinking",
Timestamp: time.Now(),
}
out := engine.appendToolResults(prior, step)
require.Len(t, out, 3)
assert.Equal(t, "system", out[0].Role)
assert.Equal(t, "user", out[1].Role)
assert.Equal(t, "assistant", out[2].Role)
assert.Equal(t, "thinking", out[2].ReasoningContent)
})
}

View File

@@ -15,12 +15,12 @@ import (
// streamLLMResult holds accumulated output from a streaming LLM call.
type streamLLMResult struct {
Content string
ThinkingContent string // accumulated thinking/reasoning content, kept separate from answer
ToolCalls []types.LLMToolCall
Usage *types.TokenUsage
FinishReason string // actual finish_reason from LLM (captured from last stream chunk)
StreamError string // error message from stream (e.g., timeout), kept separate from Content
Content string
ReasoningContent string // accumulated reasoning content, kept separate from answer
ToolCalls []types.LLMToolCall
Usage *types.TokenUsage
FinishReason string // actual finish_reason from LLM (captured from last stream chunk)
StreamError string // error message from stream (e.g., timeout), kept separate from Content
}
// streamLLMToEventBus streams LLM response through EventBus (generic method)
@@ -66,7 +66,7 @@ func (e *AgentEngine) streamLLMToEventBus(
isExtracted := chunk.Data != nil && chunk.Data["source"] != nil
if !isExtracted {
if chunk.ResponseType == types.ResponseTypeThinking {
result.ThinkingContent += chunk.Content
result.ReasoningContent += chunk.Content
} else {
result.Content += chunk.Content
}
@@ -239,9 +239,10 @@ func (e *AgentEngine) streamThinkingToEventBus(
}
resp := &types.ChatResponse{
Content: fullContent,
ToolCalls: llmResult.ToolCalls,
FinishReason: finishReason,
Content: fullContent,
ReasoningContent: llmResult.ReasoningContent,
ToolCalls: llmResult.ToolCalls,
FinishReason: finishReason,
}
if llmResult.Usage != nil {
resp.Usage = *llmResult.Usage

View File

@@ -158,9 +158,10 @@ func buildAssistantHistoryMessages(m *types.Message) []chat.Message {
continue
}
assistantMsg := chat.Message{
Role: "assistant",
Content: step.Thought,
ToolCalls: make([]chat.ToolCall, 0, len(nonTerminalCalls)),
Role: "assistant",
Content: step.Thought,
ReasoningContent: step.ReasoningContent,
ToolCalls: make([]chat.ToolCall, 0, len(nonTerminalCalls)),
}
for _, tc := range nonTerminalCalls {
argsJSON, _ := json.Marshal(tc.Args)

View File

@@ -230,3 +230,40 @@ func TestFilterNonTerminalToolCalls(t *testing.T) {
assert.Equal(t, agenttools.ToolWebSearch, out[1].Name)
}
}
// TestBuildAssistantHistoryMessages_ReplaysReasoningContent guards the
// cross-turn replay path: AgentStep.ReasoningContent persisted on a prior turn
// must be re-attached to the rebuilt assistant message, otherwise MiMo and
// DeepSeek thinking-mode reject the next turn with HTTP 400 (issue #1302).
func TestBuildAssistantHistoryMessages_ReplaysReasoningContent(t *testing.T) {
msg := &types.Message{
Role: "assistant",
Content: "Found 3 matches in the docs.",
AgentSteps: types.AgentSteps{
{
Iteration: 0,
Thought: "Let me search.",
ReasoningContent: "model's chain of thought",
ToolCalls: []types.ToolCall{{
ID: "call_1",
Name: agenttools.ToolKnowledgeSearch,
Args: map[string]interface{}{"query": "foo"},
Result: &types.ToolResult{
Success: true,
Output: "doc A",
},
}},
},
},
}
got := buildAssistantHistoryMessages(msg)
if !assert.Len(t, got, 3) {
return
}
assert.Equal(t, "model's chain of thought", got[0].ReasoningContent,
"reasoning_content from AgentStep must be replayed onto the rebuilt assistant message "+
"so MiMo/DeepSeek thinking-mode does not 400 on multi-turn (issue #1302)")
// Tool message and final answer message must NOT carry reasoning_content.
assert.Empty(t, got[1].ReasoningContent)
assert.Empty(t, got[2].ReasoningContent)
}

View File

@@ -63,6 +63,11 @@ type Message struct {
ToolCallID string `json:"tool_call_id,omitempty"` // Tool call ID (for tool role)
ToolCalls []ToolCall `json:"tool_calls,omitempty"` // Tool calls (for assistant role)
Images []string `json:"images,omitempty"` // Image URLs for multimodal (only for current user message)
// ReasoningContent 是 assistant 推理类模型DeepSeek thinking、小米 MiMo、vLLM reasoning 等)
// 上一轮输出的思考内容。部分供应商MiMo、DeepSeek V3.2/V4 thinking 模式)要求多轮对话中
// 把 assistant 的 reasoning_content 原样回传,否则会以 400 拒绝请求;其他不要求的供应商
// 会忽略未知字段,无副作用。
ReasoningContent string `json:"reasoning_content,omitempty"`
}
// ToolCall represents a tool call in a message

View File

@@ -249,6 +249,15 @@ func (c *RemoteAPIChat) ConvertMessages(messages []Message) []openai.ChatComplet
openaiMsg.Name = msg.Name
}
// Round-trip reasoning_content on assistant turns. MiMo and DeepSeek V3.2+
// thinking mode reject multi-turn requests where the prior assistant
// message lacks its reasoning_content with HTTP 400 ("The reasoning_content
// in the thinking mode must be passed back to the API."). Providers that
// don't recognize the field ignore it harmlessly.
if msg.Role == "assistant" && msg.ReasoningContent != "" {
openaiMsg.ReasoningContent = msg.ReasoningContent
}
openaiMessages = append(openaiMessages, openaiMsg)
}
return openaiMessages

View File

@@ -223,6 +223,52 @@ func TestBuildChatCompletionRequest_ToolChoice(t *testing.T) {
})
}
// TestConvertMessages_ReasoningContentRoundTrip verifies that assistant
// reasoning_content is propagated through ConvertMessages so that providers
// like MiMo / DeepSeek thinking-mode can read it back from prior turns.
// See issue #1302: MiMo rejects multi-turn requests with HTTP 400
// "The reasoning_content in the thinking mode must be passed back to the API."
// when this field is dropped.
func TestConvertMessages_ReasoningContentRoundTrip(t *testing.T) {
c := newTestRemoteChat(t)
t.Run("assistant reasoning_content propagated", func(t *testing.T) {
messages := []Message{
{Role: "user", Content: "hi"},
{
Role: "assistant",
Content: "the answer",
ReasoningContent: "let me think about this carefully",
},
{Role: "user", Content: "follow-up"},
}
out := c.ConvertMessages(messages)
require.Len(t, out, 3)
assert.Equal(t, "let me think about this carefully", out[1].ReasoningContent,
"assistant reasoning_content must be retained for multi-turn replay")
assert.Empty(t, out[0].ReasoningContent, "user message must not carry reasoning_content")
assert.Empty(t, out[2].ReasoningContent, "user message must not carry reasoning_content")
})
t.Run("non-assistant role drops reasoning_content even if set", func(t *testing.T) {
messages := []Message{
{Role: "user", Content: "hi", ReasoningContent: "should be dropped"},
}
out := c.ConvertMessages(messages)
require.Len(t, out, 1)
assert.Empty(t, out[0].ReasoningContent, "non-assistant roles must never carry reasoning_content upstream")
})
t.Run("empty assistant reasoning_content stays empty", func(t *testing.T) {
messages := []Message{
{Role: "assistant", Content: "no thinking"},
}
out := c.ConvertMessages(messages)
require.Len(t, out, 1)
assert.Empty(t, out[0].ReasoningContent)
})
}
// TestRemoteAPIChat 综合测试 Remote API Chat 的所有功能
func TestRemoteAPIChat(t *testing.T) {
// 获取环境变量

View File

@@ -186,10 +186,15 @@ type ToolCall struct {
// AgentStep represents one iteration of the ReAct loop
type AgentStep struct {
Iteration int `json:"iteration"` // Iteration number (0-indexed)
Thought string `json:"thought"` // LLM's reasoning/thinking (Think phase)
ToolCalls []ToolCall `json:"tool_calls"` // Tools called in this step (Act phase)
Timestamp time.Time `json:"timestamp"` // When this step occurred
Iteration int `json:"iteration"` // Iteration number (0-indexed)
Thought string `json:"thought"` // LLM's reasoning/thinking (Think phase)
// ReasoningContent stores the OpenAI-protocol reasoning_content emitted by the
// model in this round. Persisted on AgentStep so cross-turn replay can put it
// back on the assistant message — required by MiMo / DeepSeek V3.2+ thinking
// mode, ignored by providers that don't recognize the field.
ReasoningContent string `json:"reasoning_content,omitempty"`
ToolCalls []ToolCall `json:"tool_calls"` // Tools called in this step (Act phase)
Timestamp time.Time `json:"timestamp"` // When this step occurred
}
// GetObservations returns observations from all tool calls in this step

View File

@@ -27,10 +27,13 @@ type FunctionCall struct {
// ChatResponse chat response
type ChatResponse struct {
Content string `json:"content"`
ToolCalls []LLMToolCall `json:"tool_calls,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
Usage TokenUsage `json:"usage"`
Content string `json:"content"`
// ReasoningContent 是支持思考链的模型DeepSeek thinking、小米 MiMo、vLLM reasoning 等)
// 在本轮输出的推理内容。需要在后续多轮请求中原样回传给那些严格校验的供应商。
ReasoningContent string `json:"reasoning_content,omitempty"`
ToolCalls []LLMToolCall `json:"tool_calls,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
Usage TokenUsage `json:"usage"`
}
// Response type