fix(agent): pass reasoning_content back to providers that require it

MiMo and DeepSeek V3.2/V4 reject multi-turn requests in thinking mode when the prior assistant message lacks reasoning_content with HTTP 400: "The reasoning_content in the thinking mode must be passed back to the API." Agent's ReAct loop is the worst-case scenario — every round produces tool calls, exactly the case DeepSeek's docs specify reasoning_content MUST participate in subsequent context. Plumb reasoning_content through the full assistant-message round-trip: - chat.Message / types.ChatResponse / types.AgentStep gain a reasoning_content field (AgentStep persists via the existing Message.AgentSteps jsonb column, no migration needed). - streamLLMToEventBus accumulates reasoning chunks into result.ReasoningContent and surfaces it on the round's ChatResponse. - engine.runReActIteration writes it onto AgentStep so cross-turn replay preserves it. - observe.appendToolResults attaches it to the same-turn assistant message; agent_history.buildAssistantHistoryMessages does the same on cross-turn replay. - RemoteAPIChat.ConvertMessages forwards it on assistant turns to openai.ChatCompletionMessage.ReasoningContent (already supported by go-openai); providers that don't recognize the field ignore it. Tests cover the three boundaries: ConvertMessages serializes it for upstream, appendToolResults preserves it within the same turn, and buildAssistantHistoryMessages replays it across turns. Scope is intentionally limited to Agent mode — KnowledgeQA's chat pipeline and Anthropic's signed thinking_blocks are separate fixes that require schema changes (rendered_content / thinking_blocks columns). Fixes #1302
2026-06-04 13:30:32 +08:00 · 2026-05-13 12:26:11 +08:00
parent 1ae06fb857
commit b00bc84f35
11 changed files with 225 additions and 28 deletions
--- a/internal/agent/engine.go
+++ b/internal/agent/engine.go
@@ -568,10 +568,11 @@ func (e *AgentEngine) runReActIteration(

 	// Create agent step
 	step := types.AgentStep{
-		Iteration: state.CurrentRound,
-		Thought:   response.Content,
-		ToolCalls: make([]types.ToolCall, 0),
-		Timestamp: time.Now(),
+		Iteration:        state.CurrentRound,
+		Thought:          response.Content,
+		ReasoningContent: response.ReasoningContent,
+		ToolCalls:        make([]types.ToolCall, 0),
+		Timestamp:        time.Now(),
 	}

 	// If the request was cancelled while the LLM was streaming (e.g. the
--- a/internal/agent/observe.go
+++ b/internal/agent/observe.go
@@ -377,10 +377,11 @@ func (e *AgentEngine) appendToolResults(
 	step types.AgentStep,
 ) []chat.Message {
 	// Add assistant message with tool calls (if any)
-	if step.Thought != "" || len(step.ToolCalls) > 0 {
+	if step.Thought != "" || len(step.ToolCalls) > 0 || step.ReasoningContent != "" {
 		assistantMsg := chat.Message{
-			Role:    "assistant",
-			Content: step.Thought,
+			Role:             "assistant",
+			Content:          step.Thought,
+			ReasoningContent: step.ReasoningContent,
 		}

 		// Add tool calls to assistant message (following OpenAI format)
--- a/internal/agent/observe_test.go
+++ b/internal/agent/observe_test.go
@@ -6,8 +6,10 @@ import (
 	"time"

 	agenttools "github.com/Tencent/WeKnora/internal/agent/tools"
+	"github.com/Tencent/WeKnora/internal/models/chat"
 	"github.com/Tencent/WeKnora/internal/types"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 // newFinalAnswerResponse builds a ChatResponse that carries a single
@@ -124,3 +126,89 @@ func TestAnalyzeResponse_NonFinalAnswerTool_DoesNotTerminate(t *testing.T) {
 	assert.False(t, verdict.isDone,
 		"non-terminal tool calls must keep the loop running")
 }
+
+// TestAppendToolResults_PreservesReasoningContent verifies that the assistant
+// message produced by appendToolResults carries the reasoning_content emitted
+// by the model in the same round. Without this, MiMo and DeepSeek V3.2+
+// thinking-mode reject the next ReAct round with HTTP 400
+// "The reasoning_content in the thinking mode must be passed back to the API."
+// (issue #1302).
+func TestAppendToolResults_PreservesReasoningContent(t *testing.T) {
+	engine := &AgentEngine{}
+
+	t.Run("assistant message carries reasoning_content alongside thought and tool_calls", func(t *testing.T) {
+		step := types.AgentStep{
+			Iteration:        0,
+			Thought:          "I will call search.",
+			ReasoningContent: "Detailed chain of thought from MiMo/DeepSeek.",
+			ToolCalls: []types.ToolCall{{
+				ID:   "call_1",
+				Name: "knowledge_search",
+				Args: map[string]interface{}{"query": "hi"},
+				Result: &types.ToolResult{
+					Success: true,
+					Output:  "result text",
+				},
+			}},
+			Timestamp: time.Now(),
+		}
+
+		out := engine.appendToolResults(nil, step)
+
+		require.Len(t, out, 2, "expect one assistant + one tool message")
+		assert.Equal(t, "assistant", out[0].Role)
+		assert.Equal(t, "I will call search.", out[0].Content)
+		assert.Equal(t, "Detailed chain of thought from MiMo/DeepSeek.", out[0].ReasoningContent,
+			"reasoning_content must be propagated to the assistant message so providers like MiMo "+
+				"and DeepSeek thinking-mode see it on the next round (issue #1302)")
+		require.Len(t, out[0].ToolCalls, 1)
+		assert.Equal(t, "call_1", out[0].ToolCalls[0].ID)
+
+		assert.Equal(t, "tool", out[1].Role)
+		assert.Equal(t, "result text", out[1].Content)
+	})
+
+	t.Run("reasoning_content alone produces an assistant message", func(t *testing.T) {
+		// A pure thinking emission with no visible content / tool calls is
+		// unusual but legal — preserve it so the next round's request still
+		// carries reasoning_content for strict providers.
+		step := types.AgentStep{
+			Iteration:        0,
+			ReasoningContent: "reasoning only",
+			Timestamp:        time.Now(),
+		}
+
+		out := engine.appendToolResults(nil, step)
+
+		require.Len(t, out, 1)
+		assert.Equal(t, "assistant", out[0].Role)
+		assert.Equal(t, "reasoning only", out[0].ReasoningContent)
+		assert.Empty(t, out[0].Content)
+		assert.Empty(t, out[0].ToolCalls)
+	})
+
+	t.Run("step without thought/tool_calls/reasoning produces no assistant message", func(t *testing.T) {
+		step := types.AgentStep{Iteration: 0, Timestamp: time.Now()}
+		out := engine.appendToolResults(nil, step)
+		assert.Empty(t, out, "empty steps must not inject empty assistant messages")
+	})
+
+	t.Run("appends to existing message slice", func(t *testing.T) {
+		prior := []chat.Message{
+			{Role: "system", Content: "sys"},
+			{Role: "user", Content: "hi"},
+		}
+		step := types.AgentStep{
+			Iteration:        1,
+			Thought:          "answer",
+			ReasoningContent: "thinking",
+			Timestamp:        time.Now(),
+		}
+		out := engine.appendToolResults(prior, step)
+		require.Len(t, out, 3)
+		assert.Equal(t, "system", out[0].Role)
+		assert.Equal(t, "user", out[1].Role)
+		assert.Equal(t, "assistant", out[2].Role)
+		assert.Equal(t, "thinking", out[2].ReasoningContent)
+	})
+}
--- a/internal/agent/think.go
+++ b/internal/agent/think.go
@@ -15,12 +15,12 @@ import (

 // streamLLMResult holds accumulated output from a streaming LLM call.
 type streamLLMResult struct {
-	Content         string
-	ThinkingContent string // accumulated thinking/reasoning content, kept separate from answer
-	ToolCalls       []types.LLMToolCall
-	Usage           *types.TokenUsage
-	FinishReason    string // actual finish_reason from LLM (captured from last stream chunk)
-	StreamError     string // error message from stream (e.g., timeout), kept separate from Content
+	Content          string
+	ReasoningContent string // accumulated reasoning content, kept separate from answer
+	ToolCalls        []types.LLMToolCall
+	Usage            *types.TokenUsage
+	FinishReason     string // actual finish_reason from LLM (captured from last stream chunk)
+	StreamError      string // error message from stream (e.g., timeout), kept separate from Content
 }

 // streamLLMToEventBus streams LLM response through EventBus (generic method)
@@ -66,7 +66,7 @@ func (e *AgentEngine) streamLLMToEventBus(
 			isExtracted := chunk.Data != nil && chunk.Data["source"] != nil
 			if !isExtracted {
 				if chunk.ResponseType == types.ResponseTypeThinking {
-					result.ThinkingContent += chunk.Content
+					result.ReasoningContent += chunk.Content
 				} else {
 					result.Content += chunk.Content
 				}
@@ -239,9 +239,10 @@ func (e *AgentEngine) streamThinkingToEventBus(
 	}

 	resp := &types.ChatResponse{
-		Content:      fullContent,
-		ToolCalls:    llmResult.ToolCalls,
-		FinishReason: finishReason,
+		Content:          fullContent,
+		ReasoningContent: llmResult.ReasoningContent,
+		ToolCalls:        llmResult.ToolCalls,
+		FinishReason:     finishReason,
 	}
 	if llmResult.Usage != nil {
 		resp.Usage = *llmResult.Usage
--- a/internal/application/service/agent_history.go
+++ b/internal/application/service/agent_history.go
@@ -158,9 +158,10 @@ func buildAssistantHistoryMessages(m *types.Message) []chat.Message {
 			continue
 		}
 		assistantMsg := chat.Message{
-			Role:      "assistant",
-			Content:   step.Thought,
-			ToolCalls: make([]chat.ToolCall, 0, len(nonTerminalCalls)),
+			Role:             "assistant",
+			Content:          step.Thought,
+			ReasoningContent: step.ReasoningContent,
+			ToolCalls:        make([]chat.ToolCall, 0, len(nonTerminalCalls)),
 		}
 		for _, tc := range nonTerminalCalls {
 			argsJSON, _ := json.Marshal(tc.Args)
--- a/internal/application/service/agent_history_test.go
+++ b/internal/application/service/agent_history_test.go
@@ -230,3 +230,40 @@ func TestFilterNonTerminalToolCalls(t *testing.T) {
 		assert.Equal(t, agenttools.ToolWebSearch, out[1].Name)
 	}
 }
+
+// TestBuildAssistantHistoryMessages_ReplaysReasoningContent guards the
+// cross-turn replay path: AgentStep.ReasoningContent persisted on a prior turn
+// must be re-attached to the rebuilt assistant message, otherwise MiMo and
+// DeepSeek thinking-mode reject the next turn with HTTP 400 (issue #1302).
+func TestBuildAssistantHistoryMessages_ReplaysReasoningContent(t *testing.T) {
+	msg := &types.Message{
+		Role:    "assistant",
+		Content: "Found 3 matches in the docs.",
+		AgentSteps: types.AgentSteps{
+			{
+				Iteration:        0,
+				Thought:          "Let me search.",
+				ReasoningContent: "model's chain of thought",
+				ToolCalls: []types.ToolCall{{
+					ID:   "call_1",
+					Name: agenttools.ToolKnowledgeSearch,
+					Args: map[string]interface{}{"query": "foo"},
+					Result: &types.ToolResult{
+						Success: true,
+						Output:  "doc A",
+					},
+				}},
+			},
+		},
+	}
+	got := buildAssistantHistoryMessages(msg)
+	if !assert.Len(t, got, 3) {
+		return
+	}
+	assert.Equal(t, "model's chain of thought", got[0].ReasoningContent,
+		"reasoning_content from AgentStep must be replayed onto the rebuilt assistant message "+
+			"so MiMo/DeepSeek thinking-mode does not 400 on multi-turn (issue #1302)")
+	// Tool message and final answer message must NOT carry reasoning_content.
+	assert.Empty(t, got[1].ReasoningContent)
+	assert.Empty(t, got[2].ReasoningContent)
+}
--- a/internal/models/chat/chat.go
+++ b/internal/models/chat/chat.go
@@ -63,6 +63,11 @@ type Message struct {
 	ToolCallID   string               `json:"tool_call_id,omitempty"`  // Tool call ID (for tool role)
 	ToolCalls    []ToolCall           `json:"tool_calls,omitempty"`    // Tool calls (for assistant role)
 	Images       []string             `json:"images,omitempty"`        // Image URLs for multimodal (only for current user message)
+	// ReasoningContent 是 assistant 推理类模型（DeepSeek thinking、小米 MiMo、vLLM reasoning 等）
+	// 上一轮输出的思考内容。部分供应商（MiMo、DeepSeek V3.2/V4 thinking 模式）要求多轮对话中
+	// 把 assistant 的 reasoning_content 原样回传，否则会以 400 拒绝请求；其他不要求的供应商
+	// 会忽略未知字段，无副作用。
+	ReasoningContent string `json:"reasoning_content,omitempty"`
 }

 // ToolCall represents a tool call in a message
--- a/internal/models/chat/remote_api.go
+++ b/internal/models/chat/remote_api.go
@@ -249,6 +249,15 @@ func (c *RemoteAPIChat) ConvertMessages(messages []Message) []openai.ChatComplet
 			openaiMsg.Name = msg.Name
 		}

+		// Round-trip reasoning_content on assistant turns. MiMo and DeepSeek V3.2+
+		// thinking mode reject multi-turn requests where the prior assistant
+		// message lacks its reasoning_content with HTTP 400 ("The reasoning_content
+		// in the thinking mode must be passed back to the API."). Providers that
+		// don't recognize the field ignore it harmlessly.
+		if msg.Role == "assistant" && msg.ReasoningContent != "" {
+			openaiMsg.ReasoningContent = msg.ReasoningContent
+		}
+
 		openaiMessages = append(openaiMessages, openaiMsg)
 	}
 	return openaiMessages
--- a/internal/models/chat/remote_api_test.go
+++ b/internal/models/chat/remote_api_test.go
@@ -223,6 +223,52 @@ func TestBuildChatCompletionRequest_ToolChoice(t *testing.T) {
 	})
 }

+// TestConvertMessages_ReasoningContentRoundTrip verifies that assistant
+// reasoning_content is propagated through ConvertMessages so that providers
+// like MiMo / DeepSeek thinking-mode can read it back from prior turns.
+// See issue #1302: MiMo rejects multi-turn requests with HTTP 400
+// "The reasoning_content in the thinking mode must be passed back to the API."
+// when this field is dropped.
+func TestConvertMessages_ReasoningContentRoundTrip(t *testing.T) {
+	c := newTestRemoteChat(t)
+
+	t.Run("assistant reasoning_content propagated", func(t *testing.T) {
+		messages := []Message{
+			{Role: "user", Content: "hi"},
+			{
+				Role:             "assistant",
+				Content:          "the answer",
+				ReasoningContent: "let me think about this carefully",
+			},
+			{Role: "user", Content: "follow-up"},
+		}
+		out := c.ConvertMessages(messages)
+		require.Len(t, out, 3)
+		assert.Equal(t, "let me think about this carefully", out[1].ReasoningContent,
+			"assistant reasoning_content must be retained for multi-turn replay")
+		assert.Empty(t, out[0].ReasoningContent, "user message must not carry reasoning_content")
+		assert.Empty(t, out[2].ReasoningContent, "user message must not carry reasoning_content")
+	})
+
+	t.Run("non-assistant role drops reasoning_content even if set", func(t *testing.T) {
+		messages := []Message{
+			{Role: "user", Content: "hi", ReasoningContent: "should be dropped"},
+		}
+		out := c.ConvertMessages(messages)
+		require.Len(t, out, 1)
+		assert.Empty(t, out[0].ReasoningContent, "non-assistant roles must never carry reasoning_content upstream")
+	})
+
+	t.Run("empty assistant reasoning_content stays empty", func(t *testing.T) {
+		messages := []Message{
+			{Role: "assistant", Content: "no thinking"},
+		}
+		out := c.ConvertMessages(messages)
+		require.Len(t, out, 1)
+		assert.Empty(t, out[0].ReasoningContent)
+	})
+}
+
 // TestRemoteAPIChat 综合测试 Remote API Chat 的所有功能
 func TestRemoteAPIChat(t *testing.T) {
 	// 获取环境变量
--- a/internal/types/agent.go
+++ b/internal/types/agent.go
@@ -186,10 +186,15 @@ type ToolCall struct {

 // AgentStep represents one iteration of the ReAct loop
 type AgentStep struct {
-	Iteration int        `json:"iteration"`  // Iteration number (0-indexed)
-	Thought   string     `json:"thought"`    // LLM's reasoning/thinking (Think phase)
-	ToolCalls []ToolCall `json:"tool_calls"` // Tools called in this step (Act phase)
-	Timestamp time.Time  `json:"timestamp"`  // When this step occurred
+	Iteration int    `json:"iteration"` // Iteration number (0-indexed)
+	Thought   string `json:"thought"`   // LLM's reasoning/thinking (Think phase)
+	// ReasoningContent stores the OpenAI-protocol reasoning_content emitted by the
+	// model in this round. Persisted on AgentStep so cross-turn replay can put it
+	// back on the assistant message — required by MiMo / DeepSeek V3.2+ thinking
+	// mode, ignored by providers that don't recognize the field.
+	ReasoningContent string     `json:"reasoning_content,omitempty"`
+	ToolCalls        []ToolCall `json:"tool_calls"` // Tools called in this step (Act phase)
+	Timestamp        time.Time  `json:"timestamp"`  // When this step occurred
 }

 // GetObservations returns observations from all tool calls in this step
--- a/internal/types/chat.go
+++ b/internal/types/chat.go
@@ -27,10 +27,13 @@ type FunctionCall struct {

 // ChatResponse chat response
 type ChatResponse struct {
-	Content      string        `json:"content"`
-	ToolCalls    []LLMToolCall `json:"tool_calls,omitempty"`
-	FinishReason string        `json:"finish_reason,omitempty"`
-	Usage        TokenUsage    `json:"usage"`
+	Content string `json:"content"`
+	// ReasoningContent 是支持思考链的模型（DeepSeek thinking、小米 MiMo、vLLM reasoning 等）
+	// 在本轮输出的推理内容。需要在后续多轮请求中原样回传给那些严格校验的供应商。
+	ReasoningContent string        `json:"reasoning_content,omitempty"`
+	ToolCalls        []LLMToolCall `json:"tool_calls,omitempty"`
+	FinishReason     string        `json:"finish_reason,omitempty"`
+	Usage            TokenUsage    `json:"usage"`
 }

 // Response type