WeKnora/internal/application/service/knowledge_process.go

package service

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"slices"
	"sort"
	"strings"
	"time"

	"github.com/Tencent/WeKnora/internal/application/service/retriever"
	werrors "github.com/Tencent/WeKnora/internal/errors"
	"github.com/Tencent/WeKnora/internal/infrastructure/chunker"
	"github.com/Tencent/WeKnora/internal/infrastructure/docparser"
	"github.com/Tencent/WeKnora/internal/logger"
	"github.com/Tencent/WeKnora/internal/models/chat"
	"github.com/Tencent/WeKnora/internal/models/embedding"
	"github.com/Tencent/WeKnora/internal/searchutil"
	"github.com/Tencent/WeKnora/internal/tracing"
	"github.com/Tencent/WeKnora/internal/tracing/langfuse"
	"github.com/Tencent/WeKnora/internal/types"
	"github.com/Tencent/WeKnora/internal/types/interfaces"
	secutils "github.com/Tencent/WeKnora/internal/utils"
	"github.com/google/uuid"
	"github.com/hibiken/asynq"
	"go.opentelemetry.io/otel/attribute"
)

func (s *knowledgeService) cloneKnowledge(
	ctx context.Context,
	src *types.Knowledge,
	targetKB *types.KnowledgeBase,
) (err error) {
	if src.ParseStatus != "completed" {
		logger.GetLogger(ctx).WithField("knowledge_id", src.ID).Errorf("MoveKnowledge parse status is not completed")
		return nil
	}
	tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant)
	dst := &types.Knowledge{
		ID:               uuid.New().String(),
		TenantID:         targetKB.TenantID,
		KnowledgeBaseID:  targetKB.ID,
		Type:             src.Type,
		Channel:          src.Channel,
		Title:            src.Title,
		Description:      src.Description,
		Source:           src.Source,
		ParseStatus:      "processing",
		EnableStatus:     "disabled",
		EmbeddingModelID: targetKB.EmbeddingModelID,
		FileName:         src.FileName,
		FileType:         src.FileType,
		FileSize:         src.FileSize,
		FileHash:         src.FileHash,
		FilePath:         src.FilePath,
		StorageSize:      src.StorageSize,
		Metadata:         src.Metadata,
	}

	// Deep-copy the source document file into an object owned by the destination
	// knowledge. Without this the clone only shares the source's storage path, so
	// deleting the source knowledge would destroy the clone's file too. The new
	// object is tracked for cleanup if the clone fails downstream.
	var copiedFilePaths []string
	if src.FilePath != "" {
		srcKB, kbErr := s.kbService.GetKnowledgeBaseByID(ctx, src.KnowledgeBaseID)
		if kbErr != nil {
			return fmt.Errorf("clone knowledge: failed to load source knowledge base: %w", kbErr)
		}
		srcSvc := s.resolveFileServiceForPath(ctx, srcKB, src.FilePath)
		dstSvc := s.resolveFileService(ctx, targetKB)
		newPath, copyErr := copyOwnedObject(ctx, srcSvc, dstSvc, src.FilePath, targetKB.TenantID, dst.ID)
		if copyErr != nil {
			return fmt.Errorf("clone knowledge file copy failed: %w", copyErr)
		}
		dst.FilePath = newPath
		copiedFilePaths = append(copiedFilePaths, newPath)
	}

	defer func() {
		if err != nil {
			if len(copiedFilePaths) > 0 {
				cleanupCopiedObjects(ctx, s.resolveFileService(ctx, targetKB), copiedFilePaths)
			}
			dst.ParseStatus = "failed"
			dst.ErrorMessage = err.Error()
			_ = s.repo.UpdateKnowledge(ctx, dst)
			logger.GetLogger(ctx).WithField("error", err).Errorf("MoveKnowledge failed to move knowledge")
		} else {
			dst.ParseStatus = "completed"
			dst.EnableStatus = "enabled"
			_ = s.repo.UpdateKnowledge(ctx, dst)
			logger.GetLogger(ctx).WithField("knowledge_id", dst.ID).Infof("MoveKnowledge move knowledge successfully")
		}
	}()

	if err = s.repo.CreateKnowledge(ctx, dst); err != nil {
		logger.GetLogger(ctx).WithField("error", err).Errorf("MoveKnowledge create knowledge failed")
		return
	}
	tenantInfo.StorageUsed += dst.StorageSize
	if err = s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, dst.StorageSize); err != nil {
		logger.GetLogger(ctx).WithField("error", err).Errorf("MoveKnowledge update tenant storage used failed")
		return
	}
	if err = s.CloneChunk(ctx, src, dst); err != nil {
		logger.GetLogger(ctx).WithField("knowledge_id", dst.ID).
			WithField("error", err).Errorf("MoveKnowledge move chunks failed")
		return
	}
	return
}

// processDocumentFromPassage handles asynchronous processing of text passages
func (s *knowledgeService) processDocumentFromPassage(ctx context.Context,
	kb *types.KnowledgeBase, knowledge *types.Knowledge, passage []string,
) {
	// Update status to processing
	knowledge.ParseStatus = "processing"
	knowledge.UpdatedAt = time.Now()
	if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
		return
	}

	// Convert passages to chunks
	chunks := make([]types.ParsedChunk, 0, len(passage))
	start, end := 0, 0
	for i, p := range passage {
		if p == "" {
			continue
		}
		end += len([]rune(p))
		chunks = append(chunks, types.ParsedChunk{
			Content: p,
			Seq:     i,
			Start:   start,
			End:     end,
		})
		start = end
	}
	// Process and store chunks
	var opts ProcessChunksOptions
	if kb.QuestionGenerationConfig != nil && kb.QuestionGenerationConfig.Enabled {
		opts.EnableQuestionGeneration = true
		opts.QuestionCount = kb.QuestionGenerationConfig.QuestionCount
		if opts.QuestionCount <= 0 {
			opts.QuestionCount = 3
		}
	}
	s.processChunks(ctx, kb, knowledge, chunks, opts)
}

// ProcessChunksOptions contains options for processing chunks
type ProcessChunksOptions struct {
	EnableQuestionGeneration bool
	QuestionCount            int
	EnableMultimodel         bool
	StoredImages             []docparser.StoredImage
	// ParentChunks holds parent chunk data when parent-child chunking is enabled.
	// When set, the chunks passed to processChunks are child chunks, and each
	// child's ParentIndex references an entry in this slice.
	ParentChunks []types.ParsedParentChunk
	Metadata     map[string]string
}

// finalizeIndexedKnowledgeState makes a document retrievable as soon as chunks
// and indexes are persisted (enable_status=enabled), but it deliberately does
// NOT mark the row completed when enrichment is still expected. Whenever the
// document still has work to fan out — pending multimodal image tasks, or text
// chunks that feed summary/question/graph generation — parse_status stays
// "processing" so KnowledgePostProcess remains the single authority that drives
// processing → finalizing → completed. Marking the row completed here would make
// post-process hit its "non-processing status" guard and skip the summary
// fan-out, stranding summary_status on "pending" forever.
func finalizeIndexedKnowledgeState(
	knowledge *types.Knowledge,
	totalStorageSize int64,
	textChunkCount int,
	hasPendingMultimodal bool,
	now time.Time,
) {
	if hasPendingMultimodal || textChunkCount > 0 {
		knowledge.ParseStatus = types.ParseStatusProcessing
		knowledge.SummaryStatus = types.SummaryStatusNone
	} else {
		// No text chunks and no pending multimodal work: there is nothing for
		// post-process to enrich, so complete immediately.
		knowledge.ParseStatus = types.ParseStatusCompleted
		knowledge.SummaryStatus = types.SummaryStatusNone
	}

	knowledge.EnableStatus = "enabled"
	knowledge.StorageSize = totalStorageSize
	knowledge.ProcessedAt = &now
	knowledge.UpdatedAt = now
}

// buildSplitterConfig creates a SplitterConfig with fallbacks from a KnowledgeBase.
// Defaults mirror chunker.DefaultChunkSize / DefaultChunkOverlap so behavior is
// identical whether callers come through this path or invoke the chunker
// directly with a zero-value config.
func buildSplitterConfig(kb *types.KnowledgeBase) chunker.SplitterConfig {
	chunkCfg := chunker.SplitterConfig{
		ChunkSize:    kb.ChunkingConfig.ChunkSize,
		ChunkOverlap: kb.ChunkingConfig.ChunkOverlap,
		Separators:   kb.ChunkingConfig.Separators,
		Strategy:     kb.ChunkingConfig.Strategy,
		TokenLimit:   kb.ChunkingConfig.TokenLimit,
		Languages:    kb.ChunkingConfig.Languages,
	}
	if chunkCfg.ChunkSize <= 0 {
		chunkCfg.ChunkSize = chunker.DefaultChunkSize
	}
	if chunkCfg.ChunkOverlap <= 0 {
		chunkCfg.ChunkOverlap = chunker.DefaultChunkOverlap
	}
	if len(chunkCfg.Separators) == 0 {
		chunkCfg.Separators = []string{"\n\n", "\n", "。"}
	}
	return chunkCfg
}

// buildParentChildConfigs derives parent and child SplitterConfig from ChunkingConfig.
// The base config (already validated with defaults) is used for separators.
func buildParentChildConfigs(cc types.ChunkingConfig, base chunker.SplitterConfig) (parent, child chunker.SplitterConfig) {
	parentSize := cc.ParentChunkSize
	if parentSize <= 0 {
		parentSize = 4096
	}
	childSize := cc.ChildChunkSize
	if childSize <= 0 {
		childSize = 384
	}
	parent = chunker.SplitterConfig{
		ChunkSize:    parentSize,
		ChunkOverlap: base.ChunkOverlap, // reuse configured overlap for parents
		Separators:   base.Separators,
	}
	child = chunker.SplitterConfig{
		ChunkSize:    childSize,
		ChunkOverlap: childSize / 5, // ~20% overlap for child chunks
		Separators:   base.Separators,
	}
	return
}

// processChunks processes chunks and creates embeddings for knowledge content
func (s *knowledgeService) processChunks(ctx context.Context,
	kb *types.KnowledgeBase, knowledge *types.Knowledge, chunks []types.ParsedChunk,
	opts ...ProcessChunksOptions,
) {
	// Get options
	var options ProcessChunksOptions
	if len(opts) > 0 {
		options = opts[0]
	}

	ctx, span := tracing.ContextWithSpan(ctx, "knowledgeService.processChunks")
	defer span.End()
	span.SetAttributes(
		attribute.Int("tenant_id", int(knowledge.TenantID)),
		attribute.String("knowledge_base_id", knowledge.KnowledgeBaseID),
		attribute.String("knowledge_id", knowledge.ID),
		attribute.String("embedding_model_id", kb.EmbeddingModelID),
		attribute.Int("chunk_count", len(chunks)),
	)

	// Check if knowledge is being deleted/cancelled before processing.
	// Both statuses short-circuit identically here — there's nothing to clean
	// up yet so the branch is purely "stop early".
	if aborted, status := s.isKnowledgeAborted(ctx, knowledge.TenantID, knowledge.ID); aborted {
		logger.Infof(ctx, "Knowledge aborted (%s), skipping chunk processing: %s", status, knowledge.ID)
		span.AddEvent("aborted: knowledge " + status)
		return
	}

	// Get embedding model for vectorization — only needed when vector/keyword indexing is enabled
	var embeddingModel embedding.Embedder
	if kb.NeedsEmbeddingModel() {
		var err error
		embeddingModel, err = s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID)
		if err != nil {
			logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks get embedding model failed")
			span.RecordError(err)
			return
		}
	} else {
		logger.Infof(ctx, "Vector/keyword indexing disabled for KB %s, skipping embedding model", kb.ID)
	}

	// 幂等性处理：清理旧的chunks和索引数据，避免重复数据
	logger.Infof(ctx, "Cleaning up existing chunks and index data for knowledge: %s", knowledge.ID)

	// 删除旧的chunks
	if err := s.chunkService.DeleteChunksByKnowledgeID(ctx, knowledge.ID); err != nil {
		logger.Warnf(ctx, "Failed to delete existing chunks (may not exist): %v", err)
		// 不返回错误，继续处理（可能没有旧数据）
	}

	// 删除旧的索引数据 — only when vector/keyword indexing is enabled
	tenantInfo := ctx.Value(types.TenantInfoContextKey).(*types.Tenant)
	retrieveEngine, err := retriever.CreateRetrieveEngineForKB(
		ctx, s.retrieveEngine, s.ownership, tenantInfo.ID, kb.VectorStoreID)
	if err == nil && embeddingModel != nil {
		if err := retrieveEngine.DeleteByKnowledgeIDList(ctx, []string{knowledge.ID}, embeddingModel.GetDimensions(), knowledge.Type); err != nil {
			logger.Warnf(ctx, "Failed to delete existing index data (may not exist): %v", err)
			// 不返回错误，继续处理（可能没有旧数据）
		} else {
			logger.Infof(ctx, "Successfully deleted existing index data for knowledge: %s", knowledge.ID)
		}
	}

	// 删除知识图谱数据（如果存在）
	namespace := types.NameSpace{KnowledgeBase: knowledge.KnowledgeBaseID, Knowledge: knowledge.ID}
	if err := s.graphEngine.DelGraph(ctx, []types.NameSpace{namespace}); err != nil {
		logger.Warnf(ctx, "Failed to delete existing graph data (may not exist): %v", err)
		// 不返回错误，继续处理
	}

	logger.Infof(ctx, "Cleanup completed, starting to process new chunks")

	// ========== DocReader 解析结果日志 ==========
	logger.Infof(ctx, "[DocReader] ========== 解析结果概览 ==========")
	logger.Infof(ctx, "[DocReader] 知识ID: %s, 知识库ID: %s", knowledge.ID, knowledge.KnowledgeBaseID)
	logger.Infof(ctx, "[DocReader] 总Chunk数量: %d", len(chunks))

	// 统计图片信息
	totalImages := 0
	chunksWithImages := 0
	for _, chunkData := range chunks {
		if len(chunkData.Images) > 0 {
			chunksWithImages++
			totalImages += len(chunkData.Images)
		}
	}
	logger.Infof(ctx, "[DocReader] 包含图片的Chunk数: %d, 总图片数: %d", chunksWithImages, totalImages)

	// 打印每个Chunk的详细信息
	for idx, chunkData := range chunks {
		contentPreview := chunkData.Content
		if len(contentPreview) > 200 {
			contentPreview = contentPreview[:200] + "..."
		}
		logger.Infof(ctx, "[DocReader] Chunk #%d (seq=%d): 内容长度=%d, 图片数=%d, 范围=[%d-%d]",
			idx, chunkData.Seq, len(chunkData.Content), len(chunkData.Images), chunkData.Start, chunkData.End)
		logger.Debugf(ctx, "[DocReader] Chunk #%d 内容预览: %s", idx, contentPreview)

		// 打印图片详细信息
		for imgIdx, img := range chunkData.Images {
			logger.Infof(ctx, "[DocReader]   图片 #%d: URL=%s", imgIdx, img.URL)
			logger.Infof(ctx, "[DocReader]   图片 #%d: OriginalURL=%s", imgIdx, img.OriginalURL)
			if img.Caption != "" {
				captionPreview := img.Caption
				if len(captionPreview) > 100 {
					captionPreview = captionPreview[:100] + "..."
				}
				logger.Infof(ctx, "[DocReader]   图片 #%d: Caption=%s", imgIdx, captionPreview)
			}
			if img.OCRText != "" {
				ocrPreview := img.OCRText
				if len(ocrPreview) > 100 {
					ocrPreview = ocrPreview[:100] + "..."
				}
				logger.Infof(ctx, "[DocReader]   图片 #%d: OCRText=%s", imgIdx, ocrPreview)
			}
			logger.Infof(ctx, "[DocReader]   图片 #%d: 位置=[%d-%d]", imgIdx, img.Start, img.End)
		}
	}
	logger.Infof(ctx, "[DocReader] ========== 解析结果概览结束 ==========")

	// Create chunk objects from proto chunks
	maxSeq := 0

	// 统计图片相关的子Chunk数量，用于扩展insertChunks的容量
	imageChunkCount := 0
	for _, chunkData := range chunks {
		if len(chunkData.Images) > 0 {
			// 为每个图片的OCR和Caption分别创建一个Chunk
			imageChunkCount += len(chunkData.Images) * 2
		}
		if int(chunkData.Seq) > maxSeq {
			maxSeq = int(chunkData.Seq)
		}
	}

	// === Parent-Child Chunking: create parent chunks first ===
	hasParentChild := len(options.ParentChunks) > 0
	var parentDBChunks []*types.Chunk // indexed by ParsedParentChunk position
	if hasParentChild {
		parentDBChunks = make([]*types.Chunk, len(options.ParentChunks))
		for i, pc := range options.ParentChunks {
			parentDBChunks[i] = &types.Chunk{
				ID:              uuid.New().String(),
				TenantID:        knowledge.TenantID,
				KnowledgeID:     knowledge.ID,
				KnowledgeBaseID: knowledge.KnowledgeBaseID,
				Content:         pc.Content,
				ChunkIndex:      pc.Seq,
				IsEnabled:       true,
				CreatedAt:       time.Now(),
				UpdatedAt:       time.Now(),
				StartAt:         pc.Start,
				EndAt:           pc.End,
				ChunkType:       types.ChunkTypeParentText,
			}
		}
		// Set prev/next links for parent chunks
		for i := range parentDBChunks {
			if i > 0 {
				parentDBChunks[i-1].NextChunkID = parentDBChunks[i].ID
				parentDBChunks[i].PreChunkID = parentDBChunks[i-1].ID
			}
		}
		logger.Infof(ctx, "Created %d parent chunks for parent-child strategy", len(parentDBChunks))
	}

	// 重新分配容量，考虑图片相关的Chunk + parent chunks
	parentCount := len(options.ParentChunks)
	insertChunks := make([]*types.Chunk, 0, len(chunks)+imageChunkCount+parentCount)
	// Add parent chunks first (they go into DB but NOT into the vector index)
	if hasParentChild {
		insertChunks = append(insertChunks, parentDBChunks...)
	}

	for idx, chunkData := range chunks {
		if strings.TrimSpace(chunkData.Content) == "" {
			continue
		}

		// 创建主文本Chunk
		textChunk := &types.Chunk{
			ID:              uuid.New().String(),
			TenantID:        knowledge.TenantID,
			KnowledgeID:     knowledge.ID,
			KnowledgeBaseID: knowledge.KnowledgeBaseID,
			Content:         chunkData.Content,
			ContextHeader:   chunkData.ContextHeader,
			ChunkIndex:      int(chunkData.Seq),
			IsEnabled:       true,
			CreatedAt:       time.Now(),
			UpdatedAt:       time.Now(),
			StartAt:         int(chunkData.Start),
			EndAt:           int(chunkData.End),
			ChunkType:       types.ChunkTypeText,
		}

		// Wire up ParentChunkID for child chunks
		if hasParentChild && chunkData.ParentIndex >= 0 && chunkData.ParentIndex < len(parentDBChunks) {
			textChunk.ParentChunkID = parentDBChunks[chunkData.ParentIndex].ID
		}

		chunks[idx].ChunkID = textChunk.ID
		insertChunks = append(insertChunks, textChunk)
	}

	// Sort chunks by index for proper ordering
	sort.Slice(insertChunks, func(i, j int) bool {
		return insertChunks[i].ChunkIndex < insertChunks[j].ChunkIndex
	})

	// 仅为文本类型的Chunk设置前后关系（child chunks only, parents already linked above）
	textChunks := make([]*types.Chunk, 0, len(chunks))
	for _, chunk := range insertChunks {
		if chunk.ChunkType == types.ChunkTypeText && chunk.ParentChunkID != "" {
			// This is a child chunk in parent-child mode
			textChunks = append(textChunks, chunk)
		} else if chunk.ChunkType == types.ChunkTypeText && !hasParentChild {
			// Normal flat chunk (no parent-child mode)
			textChunks = append(textChunks, chunk)
		}
	}

	// 设置文本Chunk之间的前后关系 (skip if parent-child, children don't need prev/next links)
	if !hasParentChild {
		for i, chunk := range textChunks {
			if i > 0 {
				textChunks[i-1].NextChunkID = chunk.ID
			}
			if i < len(textChunks)-1 {
				textChunks[i+1].PreChunkID = chunk.ID
			}
		}
	}

	// Check if knowledge is being deleted/cancelled before writing chunks.
	// Nothing has been persisted yet, so both branches just bail.
	if aborted, status := s.isKnowledgeAborted(ctx, knowledge.TenantID, knowledge.ID); aborted {
		logger.Infof(ctx, "Knowledge aborted (%s), skipping chunk write: %s", status, knowledge.ID)
		span.AddEvent("aborted: knowledge " + status + " before saving")
		return
	}

	// Save chunks to database — ALWAYS, regardless of indexing strategy.
	// Chunks are needed for wiki generation, graph extraction, and summary generation
	// even when vector/keyword indexing is disabled.
	span.AddEvent("create chunks")
	s.beginStage(ctx, knowledge.ID, types.StageChunking, types.JSONMap{
		"chunks_planned": len(insertChunks),
	})
	if err := s.chunkService.CreateChunks(ctx, insertChunks); err != nil {
		knowledge.ParseStatus = types.ParseStatusFailed
		knowledge.ErrorMessage = err.Error()
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		span.RecordError(err)
		s.failStage(ctx, knowledge.ID, types.StageChunking,
			werrors.ErrCodeChunkingFailed, "create chunks failed", err)
		return
	}
	totalChunkChars := 0
	for _, c := range insertChunks {
		totalChunkChars += len(c.Content)
	}
	s.endStage(ctx, knowledge.ID, types.StageChunking, types.JSONMap{
		"chunks_written":   len(insertChunks),
		"total_text_chars": totalChunkChars,
	})

	// Create index information and perform vector indexing — only when vector/keyword is enabled.
	// Chunks are ALWAYS saved to DB (above) because wiki and graph need them even without vector indexing.
	var totalStorageSize int64
	if kb.NeedsEmbeddingModel() && embeddingModel != nil {
		embedInput := types.JSONMap{
			"chunks_to_embed": len(textChunks),
			"model_id":        kb.EmbeddingModelID,
		}
		if dim := embeddingModel.GetDimensions(); dim > 0 {
			embedInput["dim"] = dim
		}
		s.beginStage(ctx, knowledge.ID, types.StageEmbedding, embedInput)
		// Create index information — only for child/flat chunks, NOT parent chunks.
		// Parent chunks are stored for context retrieval but do not need vector embeddings.
		// Prepend the document title to improve semantic alignment between
		// question-style queries and statement-style chunk content.
		indexInfoList := make([]*types.IndexInfo, 0, len(textChunks))
		titlePrefix := ""
		if t := strings.TrimSpace(knowledge.Title); t != "" {
			titlePrefix = t + "\n"
		}
		for _, chunk := range textChunks {
			// chunk.EmbeddingContent prepends ContextHeader (heading breadcrumb)
			// when the chunker populated it during Tier-1 splitting; falls back
			// to plain Content otherwise. Title prefix sits outermost.
			indexContent := titlePrefix + chunk.EmbeddingContent()
			indexInfoList = append(indexInfoList, &types.IndexInfo{
				Content:         indexContent,
				SourceID:        chunk.ID,
				SourceType:      types.ChunkSourceType,
				ChunkID:         chunk.ID,
				KnowledgeID:     knowledge.ID,
				KnowledgeBaseID: knowledge.KnowledgeBaseID,
				IsEnabled:       true,
			})
		}

		// Calculate storage size required for embeddings
		span.AddEvent("estimate storage size")
		totalStorageSize = retrieveEngine.EstimateStorageSize(ctx, embeddingModel, indexInfoList)
		if tenantInfo.StorageQuota > 0 {
			// Re-fetch tenant storage information
			tenantInfo, err = s.tenantRepo.GetTenantByID(ctx, tenantInfo.ID)
			if err != nil {
				knowledge.ParseStatus = types.ParseStatusFailed
				knowledge.ErrorMessage = err.Error()
				knowledge.UpdatedAt = time.Now()
				s.repo.UpdateKnowledge(ctx, knowledge)
				span.RecordError(err)
				return
			}
			// Check if there's enough storage quota available
			if tenantInfo.StorageUsed+totalStorageSize > tenantInfo.StorageQuota {
				knowledge.ParseStatus = types.ParseStatusFailed
				knowledge.ErrorMessage = "存储空间不足"
				knowledge.UpdatedAt = time.Now()
				s.repo.UpdateKnowledge(ctx, knowledge)
				span.RecordError(errors.New("storage quota exceeded"))
				return
			}
		}

		// Check again before batch indexing (heavy operation).
		// deleting → row is going away anyway, drop the chunks we just wrote.
		// cancelled → user wants to keep what was already persisted, just stop.
		if aborted, status := s.isKnowledgeAborted(ctx, knowledge.TenantID, knowledge.ID); aborted {
			logger.Infof(ctx, "Knowledge aborted (%s) before indexing: %s", status, knowledge.ID)
			if status == types.ParseStatusDeleting {
				if err := s.chunkService.DeleteChunksByKnowledgeID(ctx, knowledge.ID); err != nil {
					logger.Warnf(ctx, "Failed to cleanup chunks after deletion detected: %v", err)
				}
			}
			span.AddEvent("aborted: knowledge " + status + " before indexing")
			return
		}

		span.AddEvent("batch index")
		err = retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfoList)
		if err != nil {
			knowledge.ParseStatus = types.ParseStatusFailed
			knowledge.ErrorMessage = err.Error()
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)

			// delete failed chunks
			if err := s.chunkService.DeleteChunksByKnowledgeID(ctx, knowledge.ID); err != nil {
				logger.Errorf(ctx, "Delete chunks failed: %v", err)
			}

			// delete index
			if err := retrieveEngine.DeleteByKnowledgeIDList(
				ctx, []string{knowledge.ID}, embeddingModel.GetDimensions(), kb.Type,
			); err != nil {
				logger.Errorf(ctx, "Delete index failed: %v", err)
			}
			span.RecordError(err)
			// Map vector store / embedding rate-limit errors to a
			// stable code so the UI can offer "retry later" hints.
			code := werrors.ErrCodeVectorStoreWriteFailed
			if isLikelyRateLimitError(err) {
				code = werrors.ErrCodeEmbeddingRateLimit
			}
			s.failStage(ctx, knowledge.ID, types.StageEmbedding,
				code, "batch index failed", err)
			return
		}
		logger.GetLogger(ctx).Infof("processChunks batch index successfully, with %d index", len(indexInfoList))
		s.endStage(ctx, knowledge.ID, types.StageEmbedding, types.JSONMap{
			"vectors_written": len(indexInfoList),
			"storage_bytes":   totalStorageSize,
		})

		// Final check before marking as completed.
		// deleting → drop chunks+index we just wrote.
		// cancelled → keep persisted data; the row stays in cancelled status
		// and downstream stages skip via the entry guards.
		if aborted, status := s.isKnowledgeAborted(ctx, knowledge.TenantID, knowledge.ID); aborted {
			logger.Infof(ctx, "Knowledge aborted (%s) after indexing: %s", status, knowledge.ID)
			if status == types.ParseStatusDeleting {
				if err := s.chunkService.DeleteChunksByKnowledgeID(ctx, knowledge.ID); err != nil {
					logger.Warnf(ctx, "Failed to cleanup chunks after deletion detected: %v", err)
				}
				if err := retrieveEngine.DeleteByKnowledgeIDList(ctx, []string{knowledge.ID}, embeddingModel.GetDimensions(), kb.Type); err != nil {
					logger.Warnf(ctx, "Failed to cleanup index after deletion detected: %v", err)
				}
			}
			span.AddEvent("aborted: knowledge " + status + " during processing")
			return
		}
	} else {
		logger.Infof(ctx, "Vector/keyword indexing disabled for KB %s, skipping BatchIndex", kb.ID)
		s.skipStage(ctx, knowledge.ID, types.StageEmbedding, "skipped")
	}

	// Check if this document has extracted images that will be processed asynchronously
	isImage := IsImageType(knowledge.FileType)
	isVideo := IsVideoType(knowledge.FileType)
	pendingMultimodal := isImage && options.EnableMultimodel && len(options.StoredImages) > 0
	pendingPDFMultimodal := !isImage && !isVideo && options.EnableMultimodel && len(options.StoredImages) > 0

	now := time.Now()
	finalizeIndexedKnowledgeState(
		knowledge,
		totalStorageSize,
		len(textChunks),
		pendingMultimodal || pendingPDFMultimodal,
		now,
	)

	if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
		logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks update knowledge failed")
	}

	// Enqueue multimodal tasks for images (async, non-blocking)
	if options.EnableMultimodel && len(options.StoredImages) > 0 {
		s.beginStage(ctx, knowledge.ID, types.StageMultimodal, types.JSONMap{
			"image_count":    len(options.StoredImages),
			"enable_ocr":     true,
			"enable_caption": true,
		})
		s.enqueueImageMultimodalTasks(ctx, knowledge, kb, options.StoredImages, chunks, options.Metadata)
	} else {
		s.skipStage(ctx, knowledge.ID, types.StageMultimodal, "skipped")
		// If there are no multimodal tasks, enqueue the post process task immediately
		lang, _ := types.LanguageFromContext(ctx)
		postProcessPayload := types.KnowledgePostProcessPayload{
			TenantID:        knowledge.TenantID,
			KnowledgeID:     knowledge.ID,
			KnowledgeBaseID: knowledge.KnowledgeBaseID,
			Language:        lang,
			Attempt:         attemptFromCtx(ctx),
		}
		langfuse.InjectTracing(ctx, &postProcessPayload)
		payloadBytes, err := json.Marshal(postProcessPayload)
		if err == nil {
			task := asynq.NewTask(types.TypeKnowledgePostProcess, payloadBytes, asynq.Queue("default"), asynq.MaxRetry(3))
			if _, err := s.task.Enqueue(task); err != nil {
				logger.Errorf(ctx, "Failed to enqueue knowledge post process task: %v", err)
			} else {
				logger.Infof(ctx, "Enqueued knowledge post process task for %s", knowledge.ID)
			}
		} else {
			logger.Errorf(ctx, "Failed to marshal knowledge post process payload: %v", err)
		}
	}

	// Update tenant's storage usage
	tenantInfo.StorageUsed += totalStorageSize
	if err := s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, totalStorageSize); err != nil {
		logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks update tenant storage used failed")
	}
	logger.GetLogger(ctx).Infof("processChunks successfully")
}

// defaultMaxInputChars is the default maximum characters used as input for summary generation.
const defaultMaxInputChars = 1024 * 24

// imageDominatedTextThreshold is the rune count below which a document is
// considered "image-dominated" — i.e. the body text is so sparse that we
// should fall back to full image enrichment (caption + OCR) for the summary
// LLM call. Above this threshold the document has enough native text that
// caption-only enrichment is preferable (OCR text from incidental figures
// would otherwise add noise without contributing to the main topic).
const imageDominatedTextThreshold = 200

// errInsufficientSummaryContent signals that getSummary refused to call the
// LLM because the document had no usable text after image markup was stripped
// (typical for scanned PDFs where VLM OCR yielded nothing). Callers should
// mark the knowledge's summary as failed instead of falling back to the first
// chunk's raw content (which would just be a bare image reference).
var errInsufficientSummaryContent = errors.New("insufficient text content for summary generation")

// checkSufficientSummaryContent returns errInsufficientSummaryContent if the
// given content does not carry enough real text (after stripping image markup)
// for an LLM summary call, and logs a warning at the call site. Returns nil
// when the content passes the threshold.
//
// Extracted so the threshold gate can be unit-tested without standing up the
// full ProcessSummaryGeneration dependency graph.
func checkSufficientSummaryContent(ctx context.Context, knowledgeID, content string) error {
	realTextLen := realTextRuneCount(content)
	if realTextLen < minTextContentRunes {
		logger.GetLogger(ctx).Warnf(
			"summary content check: knowledge %s has insufficient text after stripping image markup (real_text_runes=%d, min=%d); skipping LLM call",
			knowledgeID, realTextLen, minTextContentRunes,
		)
		return errInsufficientSummaryContent
	}
	return nil
}

// getSummary generates a summary for knowledge content using an AI model
func (s *knowledgeService) getSummary(ctx context.Context,
	summaryModel chat.Chat, knowledge *types.Knowledge, chunks []*types.Chunk,
) (string, error) {
	// Get knowledge info from the first chunk
	if len(chunks) == 0 {
		return "", fmt.Errorf("no chunks provided for summary generation")
	}

	// Determine max input chars from config
	maxInputChars := defaultMaxInputChars
	if s.config.Conversation.Summary != nil && s.config.Conversation.Summary.MaxInputChars > 0 {
		maxInputChars = s.config.Conversation.Summary.MaxInputChars
	}

	// Sort chunks by StartAt for proper concatenation
	sortedChunks := make([]*types.Chunk, len(chunks))
	copy(sortedChunks, chunks)
	sort.Slice(sortedChunks, func(i, j int) bool {
		return sortedChunks[i].StartAt < sortedChunks[j].StartAt
	})

	// Concatenate original chunk contents by StartAt offset to reconstruct the
	// document, then enrich with image info in a second pass. Enrichment must
	// happen AFTER concatenation because StartAt is based on original document
	// offsets — enriched (longer) content would break the positioning.
	chunkContents := ""
	for _, chunk := range sortedChunks {
		runes := []rune(chunkContents)
		if chunk.StartAt <= len(runes) {
			chunkContents = string(runes[:chunk.StartAt]) + chunk.Content
		} else {
			chunkContents = chunkContents + chunk.Content
		}
	}

	// Collect image_info from image_ocr/image_caption children and enrich
	chunkIDs := make([]string, len(sortedChunks))
	for i, c := range sortedChunks {
		chunkIDs[i] = c.ID
	}
	imageInfoMap := searchutil.CollectImageInfoByChunkIDs(ctx, s.chunkRepo, knowledge.TenantID, chunkIDs)
	mergedImageInfo := searchutil.MergeImageInfoJSON(imageInfoMap)
	if mergedImageInfo != "" {
		// For image-dominated documents (e.g. a docx whose only payload is a
		// single embedded picture, or a screenshot-only file), captions alone
		// often carry too little signal — the real content lives in OCR text.
		// Detect that case by measuring the document's real (non-image-markup)
		// text BEFORE enrichment, and switch to full enrichment (caption + OCR)
		// when the body is essentially empty. Text-heavy documents stay on the
		// caption-only path to avoid OCR noise (page headers/footers/watermarks
		// from many figures diluting the main topic).
		if realTextRuneCount(chunkContents) < imageDominatedTextThreshold {
			// Caption + OCR (no URL/original wrappers — those are pure noise
			// for the summary LLM and have been observed to trigger the
			// "image reference with no extracted text" refusal heuristic).
			chunkContents = searchutil.EnrichContentCaptionAndOCR(chunkContents, mergedImageInfo)
		} else {
			chunkContents = searchutil.EnrichContentCaptionOnly(chunkContents, mergedImageInfo)
		}
	}

	// Apply length limit: sample long content to fit within maxInputChars
	chunkContents = sampleLongContent(chunkContents, maxInputChars)

	logger.GetLogger(ctx).Infof("getSummary: content length=%d chars (max=%d) for knowledge %s",
		len([]rune(chunkContents)), maxInputChars, knowledge.ID)

	// Bail out before the LLM call when there is not enough actual text to
	// summarise. We deliberately do not pass filename/file-type metadata to the
	// LLM: scanned PDFs frequently carry filenames like "MX5280.pdf" (the
	// scanner model), and feeding that to the model would invite it to
	// hallucinate a scanner manual instead of admitting the document had no
	// extractable text.
	if err := checkSufficientSummaryContent(ctx, knowledge.ID, chunkContents); err != nil {
		return "", err
	}

	// Pass the raw chunk text to the LLM with no filename / file-type framing.
	contentWithMetadata := chunkContents

	// Determine max output tokens from config
	maxTokens := 2048
	if s.config.Conversation.Summary != nil && s.config.Conversation.Summary.MaxCompletionTokens > 0 {
		maxTokens = s.config.Conversation.Summary.MaxCompletionTokens
	}

	// Generate summary using AI model
	summaryPrompt := types.RenderPromptPlaceholders(s.config.Conversation.GenerateSummaryPrompt, types.PlaceholderValues{
		"language": types.LanguageNameFromContext(ctx),
	})
	thinking := false
	summary, err := summaryModel.Chat(ctx, []chat.Message{
		{
			Role:    "system",
			Content: summaryPrompt,
		},
		{
			Role:    "user",
			Content: contentWithMetadata,
		},
	}, &chat.ChatOptions{
		Temperature: 0.3,
		MaxTokens:   maxTokens,
		Thinking:    &thinking,
	})
	if err != nil {
		logger.GetLogger(ctx).WithField("error", err).Errorf("GetSummary failed")
		return "", err
	}
	logger.GetLogger(ctx).WithField("summary", summary.Content).Infof("GetSummary success")
	return summary.Content, nil
}

// sampleLongContent returns content that fits within maxChars.
// For short content (≤ maxChars), it is returned as-is.
// For long content, it samples: head (60%), tail (20%), and evenly-spaced middle (20%),
// joined by "[...content omitted...]" markers so the LLM knows content was skipped.
func sampleLongContent(content string, maxChars int) string {
	runes := []rune(content)
	if len(runes) <= maxChars {
		return content
	}

	const omitMarker = "\n\n[...content omitted...]\n\n"
	omitRunes := len([]rune(omitMarker))

	// Reserve space for two omit markers (head→middle, middle→tail)
	usable := maxChars - 2*omitRunes
	if usable < 100 {
		// Fallback: just truncate
		return string(runes[:maxChars])
	}

	headLen := usable * 60 / 100
	tailLen := usable * 20 / 100
	midLen := usable - headLen - tailLen

	head := string(runes[:headLen])
	tail := string(runes[len(runes)-tailLen:])

	// Sample middle portion: take a contiguous block from the center of the document
	midStart := len(runes)/2 - midLen/2
	if midStart < headLen {
		midStart = headLen
	}
	midEnd := midStart + midLen
	if midEnd > len(runes)-tailLen {
		midEnd = len(runes) - tailLen
		midStart = midEnd - midLen
		if midStart < headLen {
			midStart = headLen
		}
	}
	middle := string(runes[midStart:midEnd])

	return head + omitMarker + middle + omitMarker + tail
}

// ProcessSummaryGeneration handles async summary generation task
func (s *knowledgeService) ProcessSummaryGeneration(ctx context.Context, t *asynq.Task) (retErr error) {
	var payload types.SummaryGenerationPayload
	if err := json.Unmarshal(t.Payload(), &payload); err != nil {
		logger.Errorf(ctx, "Failed to unmarshal summary generation payload: %v", err)
		return nil // Don't retry on unmarshal error
	}

	logger.Infof(ctx, "Processing summary generation for knowledge: %s", payload.KnowledgeID)

	// Set tenant and language context
	ctx = context.WithValue(ctx, types.TenantIDContextKey, payload.TenantID)
	if payload.Language != "" {
		ctx = context.WithValue(ctx, types.LanguageContextKey, payload.Language)
	}

	// A newer attempt (re-upload / edit / reparse) has superseded this one:
	// skip before opening the span or registering the FinalizeSubtask defer
	// so we neither read stale chunks nor decrement the new attempt's counter.
	if attemptSuperseded(ctx, s.tracker(), payload.KnowledgeID, payload.Attempt) {
		logger.Infof(ctx, "summary: attempt %d superseded for %s, skipping stale enrichment",
			payload.Attempt, payload.KnowledgeID)
		return nil
	}

	// Open a subspan under the parent attempt's postprocess stage so the
	// trace surface shows the real summary-generation duration (LLM call
	// + chunk write + index) instead of just the upstream's enqueue time.
	// Closes via the deferred handler below — every return path lands in
	// the defer, including the early returns ahead.
	span := s.beginPostprocessSubspan(ctx, payload.KnowledgeID, payload.Attempt, "postprocess.summary",
		types.JSONMap{
			"language": payload.Language,
		})
	var summaryErr error
	summaryOut := types.JSONMap{}
	defer func() {
		// Decrement the parent's enrichment counter on terminal exit.
		// "Terminal" is keyed on the value RETURNED to asynq, not on
		// summaryErr: several branches record a failure on the span
		// (summaryErr != nil) yet deliberately `return nil` so asynq does
		// NOT retry (e.g. insufficient text content, KB/knowledge fetch
		// failures). Those are terminal and must drain — keying on
		// summaryErr would skip them and leave the row stuck in
		// "finalizing". When we DO return an error asynq will retry, so
		// we only drain on the final attempt.
		finalizeSubtaskDetached(ctx, s.repo, payload.KnowledgeID, "summary",
			retErr, false, isFinalAsynqAttempt(ctx))
		if span == nil {
			return
		}
		if summaryErr != nil {
			s.failPostprocessSubspan(ctx, span, "SUMMARY_FAILED", summaryErr.Error(), summaryErr)
		} else {
			s.endPostprocessSubspan(ctx, span, summaryOut)
		}
	}()

	// Get knowledge base
	kb, err := s.kbService.GetKnowledgeBaseByID(ctx, payload.KnowledgeBaseID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get knowledge base: %v", err)
		summaryErr = err
		return nil
	}
	// Capture the resolved model id on the span output the moment we
	// know it — debugging "summary stage took 60s" benefits hugely from
	// seeing WHICH chat model was actually used (kb config drift, fall-
	// throughs to a slow upstream, etc.).
	summaryOut["model_id"] = kb.SummaryModelID

	if kb.SummaryModelID == "" {
		logger.Warn(ctx, "Knowledge base summary model ID is empty, skipping summary generation")
		summaryOut["skipped"] = "no_summary_model"
		return nil
	}

	// Get knowledge
	knowledge, err := s.repo.GetKnowledgeByID(ctx, payload.TenantID, payload.KnowledgeID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get knowledge: %v", err)
		summaryErr = err
		return nil
	}
	// Short-circuit when the user cancelled parsing or the row is being deleted.
	if knowledge != nil {
		switch knowledge.ParseStatus {
		case types.ParseStatusCancelled, types.ParseStatusDeleting:
			logger.Infof(ctx, "Summary generation: knowledge aborted (%s), skipping: %s",
				knowledge.ParseStatus, payload.KnowledgeID)
			summaryOut["skipped"] = "knowledge_" + knowledge.ParseStatus
			return nil
		}
	}

	// Update summary status to processing
	knowledge.SummaryStatus = types.SummaryStatusProcessing
	knowledge.UpdatedAt = time.Now()
	if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
		logger.Warnf(ctx, "Failed to update summary status to processing: %v", err)
	}

	// Helper function to mark summary as failed
	markSummaryFailed := func() {
		knowledge.SummaryStatus = types.SummaryStatusFailed
		knowledge.UpdatedAt = time.Now()
		if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
			logger.Warnf(ctx, "Failed to update summary status to failed: %v", err)
		}
	}

	// Get text chunks for this knowledge
	chunks, err := s.chunkService.ListChunksByKnowledgeID(ctx, payload.KnowledgeID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get chunks: %v", err)
		markSummaryFailed()
		summaryErr = err
		return nil
	}

	// Filter text chunks only
	textChunks := make([]*types.Chunk, 0)
	for _, chunk := range chunks {
		if chunk.ChunkType == types.ChunkTypeText {
			textChunks = append(textChunks, chunk)
		}
	}
	summaryOut["text_chunks"] = len(textChunks)

	if len(textChunks) == 0 {
		logger.Infof(ctx, "No text chunks found for knowledge: %s", payload.KnowledgeID)
		// Mark as completed since there's nothing to summarize
		knowledge.SummaryStatus = types.SummaryStatusCompleted
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		summaryOut["skipped"] = "no_text_chunks"
		return nil
	}

	// Sort chunks by ChunkIndex for proper ordering
	sort.Slice(textChunks, func(i, j int) bool {
		return textChunks[i].ChunkIndex < textChunks[j].ChunkIndex
	})

	// Initialize chat model for summary
	chatModel, err := s.modelService.GetChatModel(ctx, kb.SummaryModelID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get chat model: %v", err)
		markSummaryFailed()
		summaryErr = err
		return fmt.Errorf("failed to get chat model: %w", err)
	}

	// Generate summary
	summary, err := s.getSummary(ctx, chatModel, knowledge, textChunks)
	if err != nil {
		logger.Errorf(ctx, "Failed to generate summary for knowledge %s: %v", payload.KnowledgeID, err)
		// Surface the underlying LLM/IO error on the span so the trace UI
		// can explain "why did this stage take 60s and then fall back?"
		// without forcing the operator to grep worker logs. We also capture
		// the error type to disambiguate timeouts from upstream HTTP errors
		// (deadline exceeded vs unexpected EOF vs 5xx, etc.).
		summaryOut["error"] = previewText(err.Error(), 500)
		summaryOut["error_type"] = fmt.Sprintf("%T", err)
		// For the insufficient-content case (scanned PDF without OCR, etc.)
		// we deliberately do NOT fall back to the first chunk's raw content,
		// since that chunk is typically just a bare markdown image reference
		// and surfacing it in the description is misleading.
		if errors.Is(err, errInsufficientSummaryContent) {
			knowledge.Description = ""
			knowledge.SummaryStatus = types.SummaryStatusFailed
			knowledge.UpdatedAt = time.Now()
			if updateErr := s.repo.UpdateKnowledge(ctx, knowledge); updateErr != nil {
				logger.Errorf(ctx, "Failed to mark summary as failed: %v", updateErr)
				summaryErr = updateErr
				return fmt.Errorf("failed to update knowledge: %w", updateErr)
			}
			summaryOut["fallback"] = "insufficient_content"
			summaryErr = err
			return nil
		}
		// For other errors (LLM API issues etc.), fall back to the first chunk.
		if len(textChunks) > 0 {
			summary = textChunks[0].Content
			if len(summary) > 500 {
				runes := []rune(summary)
				if len(runes) > 500 {
					summary = string(runes[:500])
				}
			}
			summaryOut["fallback"] = "first_chunk"
		}
	}

	// Update knowledge description
	knowledge.Description = summary
	knowledge.SummaryStatus = types.SummaryStatusCompleted
	knowledge.UpdatedAt = time.Now()
	summaryOut["summary_chars"] = len([]rune(summary))
	// Preview the generated summary on the span output so the trace
	// viewer can show "this is what the LLM produced" at a glance,
	// without hopping to the knowledge-detail page. Capped to keep
	// span rows compact.
	summaryOut["summary_preview"] = previewText(summary, 240)
	if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
		logger.Errorf(ctx, "Failed to update knowledge description: %v", err)
		summaryErr = err
		return fmt.Errorf("failed to update knowledge: %w", err)
	}

	// Create summary chunk and index it — only when RAG indexing is enabled.
	// Wiki-only KBs don't need summary chunks in the vector index.
	if strings.TrimSpace(summary) != "" && kb.NeedsEmbeddingModel() {
		// Get max chunk index
		maxChunkIndex := 0
		for _, chunk := range chunks {
			if chunk.ChunkIndex > maxChunkIndex {
				maxChunkIndex = chunk.ChunkIndex
			}
		}

		// Embed only the LLM-generated summary in the indexed chunk.
		// We deliberately omit knowledge.FileName here: filenames are an
		// unreliable signal (e.g. "MX5280.pdf" for a scanned legal letter)
		// and surfacing them in retrieved RAG context can re-introduce the
		// hallucination vector this branch is meant to close.
		summaryChunk := &types.Chunk{
			ID:              uuid.New().String(),
			TenantID:        knowledge.TenantID,
			KnowledgeID:     knowledge.ID,
			KnowledgeBaseID: knowledge.KnowledgeBaseID,
			Content:         fmt.Sprintf("# Summary\n%s", summary),
			ChunkIndex:      maxChunkIndex + 1,
			IsEnabled:       true,
			CreatedAt:       time.Now(),
			UpdatedAt:       time.Now(),
			StartAt:         0,
			EndAt:           0,
			ChunkType:       types.ChunkTypeSummary,
			ParentChunkID:   textChunks[0].ID,
		}

		// Save summary chunk
		if err := s.chunkService.CreateChunks(ctx, []*types.Chunk{summaryChunk}); err != nil {
			logger.Errorf(ctx, "Failed to create summary chunk: %v", err)
			summaryErr = err
			return fmt.Errorf("failed to create summary chunk: %w", err)
		}

		// Index summary chunk
		tenantInfo, err := s.tenantRepo.GetTenantByID(ctx, payload.TenantID)
		if err != nil {
			logger.Errorf(ctx, "Failed to get tenant info: %v", err)
			summaryErr = err
			return fmt.Errorf("failed to get tenant info: %w", err)
		}
		ctx = context.WithValue(ctx, types.TenantInfoContextKey, tenantInfo)

		retrieveEngine, err := retriever.CreateRetrieveEngineForKB(
			ctx, s.retrieveEngine, s.ownership, tenantInfo.ID, kb.VectorStoreID)
		if err != nil {
			logger.Errorf(ctx, "Failed to init retrieve engine: %v", err)
			summaryErr = err
			return fmt.Errorf("failed to init retrieve engine: %w", err)
		}

		embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID)
		if err != nil {
			logger.Errorf(ctx, "Failed to get embedding model: %v", err)
			summaryErr = err
			return fmt.Errorf("failed to get embedding model: %w", err)
		}

		indexInfo := []*types.IndexInfo{{
			Content:         summaryChunk.Content,
			SourceID:        summaryChunk.ID,
			SourceType:      types.ChunkSourceType,
			ChunkID:         summaryChunk.ID,
			KnowledgeID:     knowledge.ID,
			KnowledgeBaseID: knowledge.KnowledgeBaseID,
			IsEnabled:       true,
		}}

		if err := retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfo); err != nil {
			logger.Errorf(ctx, "Failed to index summary chunk: %v", err)
			summaryErr = err
			return fmt.Errorf("failed to index summary chunk: %w", err)
		}

		logger.Infof(ctx, "Successfully created and indexed summary chunk for knowledge: %s", payload.KnowledgeID)
		summaryOut["summary_chunk_indexed"] = true
	}

	logger.Infof(ctx, "Successfully generated summary for knowledge: %s", payload.KnowledgeID)
	summaryOut["status"] = "completed"
	return nil
}

// ProcessQuestionGeneration handles async question generation task. It
// dispatches between the batched fan-out path (current: one task per window of
// text chunks, payload.ChunkIDs set) and the legacy whole-knowledge path (kept
// for tasks enqueued before fan-out shipped, no chunk ids). A lone ChunkID
// (from an interim per-chunk build) is treated as a one-element batch.
func (s *knowledgeService) ProcessQuestionGeneration(ctx context.Context, t *asynq.Task) (retErr error) {
	var payload types.QuestionGenerationPayload
	if err := json.Unmarshal(t.Payload(), &payload); err != nil {
		logger.Errorf(ctx, "Failed to unmarshal question generation payload: %v", err)
		return nil // Don't retry on unmarshal error
	}
	if len(payload.ChunkIDs) > 0 || payload.ChunkID != "" {
		return s.processQuestionGenerationForChunks(ctx, t, payload)
	}
	return s.processQuestionGenerationForKnowledge(ctx, t, payload)
}

// processQuestionGenerationForKnowledge is the legacy whole-knowledge handler:
// it iterates every text chunk of the knowledge in one task. Retained for
// in-flight tasks queued before per-chunk fan-out; new enqueues always set
// payload.ChunkID and take the per-chunk path instead.
func (s *knowledgeService) processQuestionGenerationForKnowledge(ctx context.Context, t *asynq.Task, payload types.QuestionGenerationPayload) (retErr error) {
	taskStartedAt := time.Now()
	retryCount, _ := asynq.GetRetryCount(ctx)
	maxRetry, _ := asynq.GetMaxRetry(ctx)

	exitStatus := "success"
	totalChunks := 0
	totalTextChunks := 0
	emptyContentChunks := 0
	llmCallAttempts := 0
	llmCallSuccess := 0
	llmCallFailed := 0
	llmCallEmpty := 0
	generatedQuestionsTotal := 0
	chunkMetadataSetFailed := 0
	chunkUpdateFailed := 0
	indexEntriesPrepared := 0
	indexBatchAttempted := false
	indexBatchSucceeded := false
	// Sample question + model id surfaced on the span output so the
	// trace viewer can answer "what did the LLM actually produce?" and
	// "which model did it run on?" without joining back to the chunk
	// store. Captured the first time we see a non-empty question batch.
	var sampleQuestion string
	var resolvedModelID string
	// Postprocess subspan for the trace viewer. Opened lazily after we
	// unmarshal the payload (so we have payload.Attempt) and closed in
	// the defer below alongside the stats log so the span output mirrors
	// what we already log to stdout.
	var qSpan *Span
	var qErr error
	// Set when a newer attempt supersedes this run; suppresses the
	// FinalizeSubtask decrement so a stale task can't drain the new
	// attempt's counter.
	superseded := false
	// Decrement enrichment counter on terminal exit. Keyed on the value
	// RETURNED to asynq (retErr), not qErr: some branches record a span
	// failure (qErr != nil) yet `return nil` so asynq won't retry (KB /
	// knowledge fetch failures); those are terminal and must drain.
	// Keying on qErr would skip them and strand the row in "finalizing".
	// When we return an error asynq retries, so we only drain on the
	// final attempt. Runs AFTER the stats-log defer below — defers
	// unwind LIFO, so this one declared first executes last.
	defer func() {
		finalizeSubtaskDetached(ctx, s.repo, payload.KnowledgeID, "question_legacy",
			retErr, superseded, isFinalAsynqAttempt(ctx))
	}()
	defer func() {
		logger.Infof(
			ctx,
			"Question generation stats: knowledge=%s kb=%s retry=%d/%d status=%s elapsed=%s chunks(total=%d,text=%d,empty_text=%d) llm(attempt=%d,success=%d,empty=%d,failed=%d) generated_questions=%d chunk_update_failed=%d metadata_set_failed=%d index(prepared=%d,attempted=%v,succeeded=%v)",
			payload.KnowledgeID,
			payload.KnowledgeBaseID,
			retryCount,
			maxRetry,
			exitStatus,
			time.Since(taskStartedAt).Round(time.Millisecond),
			totalChunks,
			totalTextChunks,
			emptyContentChunks,
			llmCallAttempts,
			llmCallSuccess,
			llmCallEmpty,
			llmCallFailed,
			generatedQuestionsTotal,
			chunkUpdateFailed,
			chunkMetadataSetFailed,
			indexEntriesPrepared,
			indexBatchAttempted,
			indexBatchSucceeded,
		)
		if qSpan != nil {
			out := types.JSONMap{
				"status":                 exitStatus,
				"total_chunks":           totalChunks,
				"text_chunks":            totalTextChunks,
				"empty_content_chunks":   emptyContentChunks,
				"llm_attempts":           llmCallAttempts,
				"llm_success":            llmCallSuccess,
				"llm_empty":              llmCallEmpty,
				"llm_failed":             llmCallFailed,
				"questions_generated":    generatedQuestionsTotal,
				"chunk_update_failed":    chunkUpdateFailed,
				"metadata_set_failed":    chunkMetadataSetFailed,
				"index_entries_prepared": indexEntriesPrepared,
				"index_batch_attempted":  indexBatchAttempted,
				"index_batch_succeeded":  indexBatchSucceeded,
				"retry":                  retryCount,
				"max_retry":              maxRetry,
			}
			// Surface the resolved model id and a sample question on the
			// span output. These help debugging "why is question generation
			// slow" — both questions ("which model was hit?") and ("what
			// did it produce?") are hard to answer from logs alone.
			if resolvedModelID != "" {
				out["model_id"] = resolvedModelID
			}
			if sampleQuestion != "" {
				out["sample_question"] = sampleQuestion
			}
			// Treat any non-success exitStatus as a failed run; the
			// existing stats-string already enumerates them. qErr stays
			// optional for callers that want to surface a Go error.
			if exitStatus != "success" || qErr != nil {
				msg := exitStatus
				var detailErr error = qErr
				if qErr != nil {
					msg = qErr.Error()
				}
				s.failPostprocessSubspan(ctx, qSpan, "QUESTION_FAILED", msg, detailErr)
			} else {
				s.endPostprocessSubspan(ctx, qSpan, out)
			}
		}
	}()

	logger.Infof(ctx, "Processing question generation for knowledge: %s", payload.KnowledgeID)

	// A newer attempt has superseded this one: skip before opening the span
	// so we don't read stale chunks. superseded suppresses the counter
	// decrement in the defer above; qSpan stays nil so the stats defer no-ops.
	if attemptSuperseded(ctx, s.tracker(), payload.KnowledgeID, payload.Attempt) {
		superseded = true
		exitStatus = "superseded"
		logger.Infof(ctx, "question: attempt %d superseded for %s, skipping stale enrichment",
			payload.Attempt, payload.KnowledgeID)
		return nil
	}

	// Open the postprocess.question subspan now that we have payload.Attempt.
	// Closes via the defer above.
	qSpan = s.beginPostprocessSubspan(ctx, payload.KnowledgeID, payload.Attempt, "postprocess.question",
		types.JSONMap{
			"question_count": payload.QuestionCount,
			"language":       payload.Language,
		})

	// Set tenant context
	ctx = context.WithValue(ctx, types.TenantIDContextKey, payload.TenantID)
	if payload.Language != "" {
		ctx = context.WithValue(ctx, types.LanguageContextKey, payload.Language)
	}

	if strings.TrimSpace(s.config.Conversation.GenerateQuestionsPrompt) == "" {
		exitStatus = "prompt_not_configured"
		logger.Errorf(ctx, "GenerateQuestionsPrompt is empty: configure conversation.generate_questions_prompt_id")
		qErr = fmt.Errorf("generate questions prompt not configured")
		return qErr
	}

	// Get knowledge base
	kb, err := s.kbService.GetKnowledgeBaseByID(ctx, payload.KnowledgeBaseID)
	if err != nil {
		exitStatus = "kb_not_found"
		logger.Errorf(ctx, "Failed to get knowledge base: %v", err)
		qErr = err
		return nil
	}

	// Get knowledge
	knowledge, err := s.repo.GetKnowledgeByID(ctx, payload.TenantID, payload.KnowledgeID)
	if err != nil {
		exitStatus = "knowledge_not_found"
		logger.Errorf(ctx, "Failed to get knowledge: %v", err)
		qErr = err
		return nil
	}
	// Short-circuit when the user cancelled parsing or the row is being deleted.
	if knowledge != nil {
		switch knowledge.ParseStatus {
		case types.ParseStatusCancelled, types.ParseStatusDeleting:
			exitStatus = "knowledge_" + knowledge.ParseStatus
			logger.Infof(ctx, "Question generation: knowledge aborted (%s), skipping: %s",
				knowledge.ParseStatus, payload.KnowledgeID)
			return nil
		}
	}

	// Get text chunks for this knowledge
	chunks, err := s.chunkService.ListChunksByKnowledgeID(ctx, payload.KnowledgeID)
	if err != nil {
		exitStatus = "list_chunks_failed"
		logger.Errorf(ctx, "Failed to get chunks: %v", err)
		return nil
	}
	totalChunks = len(chunks)

	// Filter text chunks only
	textChunks := make([]*types.Chunk, 0)
	for _, chunk := range chunks {
		if chunk.ChunkType == types.ChunkTypeText {
			textChunks = append(textChunks, chunk)
		}
	}
	totalTextChunks = len(textChunks)

	if len(textChunks) == 0 {
		exitStatus = "no_text_chunks"
		logger.Infof(ctx, "No text chunks found for knowledge: %s", payload.KnowledgeID)
		return nil
	}

	// Sort chunks by StartAt for context building
	sort.Slice(textChunks, func(i, j int) bool {
		return textChunks[i].StartAt < textChunks[j].StartAt
	})

	// Initialize chat model
	chatModel, err := s.modelService.GetChatModel(ctx, kb.SummaryModelID)
	if err != nil {
		exitStatus = "get_chat_model_failed"
		logger.Errorf(ctx, "Failed to get chat model: %v", err)
		return fmt.Errorf("failed to get chat model: %w", err)
	}
	resolvedModelID = kb.SummaryModelID

	// Initialize embedding model and retrieval engine
	embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID)
	if err != nil {
		exitStatus = "get_embedding_model_failed"
		logger.Errorf(ctx, "Failed to get embedding model: %v", err)
		return fmt.Errorf("failed to get embedding model: %w", err)
	}

	tenantInfo, err := s.tenantRepo.GetTenantByID(ctx, payload.TenantID)
	if err != nil {
		exitStatus = "get_tenant_failed"
		logger.Errorf(ctx, "Failed to get tenant info: %v", err)
		return fmt.Errorf("failed to get tenant info: %w", err)
	}
	ctx = context.WithValue(ctx, types.TenantInfoContextKey, tenantInfo)

	retrieveEngine, err := retriever.CreateRetrieveEngineForKB(
		ctx, s.retrieveEngine, s.ownership, tenantInfo.ID, kb.VectorStoreID)
	if err != nil {
		exitStatus = "init_retrieve_engine_failed"
		logger.Errorf(ctx, "Failed to init retrieve engine: %v", err)
		return fmt.Errorf("failed to init retrieve engine: %w", err)
	}

	questionCount := payload.QuestionCount
	if questionCount <= 0 {
		questionCount = 3
	}
	if questionCount > 10 {
		questionCount = 10
	}

	// Collect image info for all text chunks so question generation can
	// see caption / OCR text instead of bare image links.
	textChunkIDs := make([]string, len(textChunks))
	for i, c := range textChunks {
		textChunkIDs[i] = c.ID
	}
	imageInfoMap := searchutil.CollectImageInfoByChunkIDs(ctx, s.chunkRepo, payload.TenantID, textChunkIDs)

	enrichContent := func(chunk *types.Chunk) string {
		if info, ok := imageInfoMap[chunk.ID]; ok && info != "" {
			return searchutil.EnrichContentWithImageInfo(chunk.Content, info)
		}
		return chunk.Content
	}

	// Generate questions for each chunk with context
	var indexInfoList []*types.IndexInfo
	for i, chunk := range textChunks {
		if strings.TrimSpace(chunk.Content) == "" {
			emptyContentChunks++
			continue
		}

		// Build context from adjacent chunks
		var prevContent, nextContent string
		if i > 0 {
			prevContent = enrichContent(textChunks[i-1])
		}
		if i < len(textChunks)-1 {
			nextContent = enrichContent(textChunks[i+1])
		}

		llmCallAttempts++
		questions, err := s.generateQuestionsWithContext(ctx, chatModel, enrichContent(chunk), prevContent, nextContent, knowledge.Title, questionCount)
		if err != nil {
			llmCallFailed++
			logger.Warnf(ctx, "Failed to generate questions for chunk %s: %v", chunk.ID, err)
			continue
		}

		if len(questions) == 0 {
			llmCallEmpty++
			continue
		}
		llmCallSuccess++
		generatedQuestionsTotal += len(questions)
		if sampleQuestion == "" && len(questions) > 0 {
			sampleQuestion = previewText(questions[0], 200)
		}

		// Update chunk metadata with unique IDs for each question
		generatedQuestions := make([]types.GeneratedQuestion, len(questions))
		for j, question := range questions {
			questionID := fmt.Sprintf("q%d", time.Now().UnixNano()+int64(j))
			generatedQuestions[j] = types.GeneratedQuestion{
				ID:       questionID,
				Question: question,
			}
		}
		meta := &types.DocumentChunkMetadata{
			GeneratedQuestions: generatedQuestions,
		}
		if err := chunk.SetDocumentMetadata(meta); err != nil {
			chunkMetadataSetFailed++
			logger.Warnf(ctx, "Failed to set document metadata for chunk %s: %v", chunk.ID, err)
			continue
		}

		// Update chunk in database
		if err := s.chunkService.UpdateChunk(ctx, chunk); err != nil {
			chunkUpdateFailed++
			logger.Warnf(ctx, "Failed to update chunk %s: %v", chunk.ID, err)
			continue
		}

		// Create index entries for generated questions
		for _, gq := range generatedQuestions {
			sourceID := fmt.Sprintf("%s-%s", chunk.ID, gq.ID)
			indexInfoList = append(indexInfoList, &types.IndexInfo{
				Content:         gq.Question,
				SourceID:        sourceID,
				SourceType:      types.ChunkSourceType,
				ChunkID:         chunk.ID,
				KnowledgeID:     knowledge.ID,
				KnowledgeBaseID: knowledge.KnowledgeBaseID,
				IsEnabled:       true,
			})
		}
		logger.Debugf(ctx, "Generated %d questions for chunk %s", len(questions), chunk.ID)
	}
	indexEntriesPrepared = len(indexInfoList)

	// Index generated questions
	if len(indexInfoList) > 0 {
		indexBatchAttempted = true
		if err := retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfoList); err != nil {
			exitStatus = "index_questions_failed"
			logger.Errorf(ctx, "Failed to index generated questions: %v", err)
			return fmt.Errorf("failed to index questions: %w", err)
		}
		indexBatchSucceeded = true
		logger.Infof(ctx, "Successfully indexed %d generated questions for knowledge: %s", len(indexInfoList), payload.KnowledgeID)
	}

	return nil
}

// processQuestionGenerationForChunks generates questions for a batch (window)
// of text chunks. This is the batched fan-out path (one asynq task per
// questionGenChunkBatchSize chunks), aligned with the graph-extract
// TypeChunkExtract pattern: independent retry, per-batch cancellation, and a
// postprocess.question.batch[i] subspan. The payload carries only chunk ids
// (never content); content is read fresh here, and all questions for the batch
// are indexed in a single embedding BatchIndex call.
func (s *knowledgeService) processQuestionGenerationForChunks(ctx context.Context, t *asynq.Task, payload types.QuestionGenerationPayload) (retErr error) {
	taskStartedAt := time.Now()
	retryCount, _ := asynq.GetRetryCount(ctx)
	maxRetry, _ := asynq.GetMaxRetry(ctx)

	// Normalize the batch: prefer ChunkIDs, fall back to a lone ChunkID
	// (interim per-chunk build) so those in-flight tasks still run.
	batchIDs := payload.ChunkIDs
	if len(batchIDs) == 0 && payload.ChunkID != "" {
		batchIDs = []string{payload.ChunkID}
	}

	exitStatus := "success"
	chunksInBatch := len(batchIDs)
	chunksProcessed := 0
	emptyChunks := 0
	llmCallFailed := 0
	generatedQuestionsTotal := 0
	indexEntriesPrepared := 0
	indexBatchSucceeded := false
	var sampleQuestion string
	var resolvedModelID string
	var qSpan *Span
	var qErr error
	// Suppresses the FinalizeSubtask drain when a newer attempt superseded
	// this run, so a stale task can't decrement the new attempt's counter.
	superseded := false

	ctx = context.WithValue(ctx, types.TenantIDContextKey, payload.TenantID)
	if payload.Language != "" {
		ctx = context.WithValue(ctx, types.LanguageContextKey, payload.Language)
	}

	// Drain the parent's enrichment counter on terminal exit. Keyed on the
	// value RETURNED to asynq (retErr), not qErr: some branches record a
	// span failure yet `return nil` (terminal, must drain). Declared first
	// so it runs LAST (after the stats/span defer below).
	defer func() {
		finalizeSubtaskDetached(ctx, s.repo, payload.KnowledgeID,
			fmt.Sprintf("question_batch[%d]", payload.BatchIndex),
			retErr, superseded, isFinalAsynqAttempt(ctx))
	}()
	defer func() {
		logger.Infof(ctx,
			"Question generation (batch) stats: knowledge=%s batch=%d chunks(in_batch=%d,processed=%d,empty=%d) llm_failed=%d retry=%d/%d status=%s elapsed=%s generated_questions=%d index(entries=%d,succeeded=%v)",
			payload.KnowledgeID, payload.BatchIndex, chunksInBatch, chunksProcessed, emptyChunks, llmCallFailed,
			retryCount, maxRetry, exitStatus, time.Since(taskStartedAt).Round(time.Millisecond),
			generatedQuestionsTotal, indexEntriesPrepared, indexBatchSucceeded,
		)
		if qSpan != nil {
			out := types.JSONMap{
				"status":                 exitStatus,
				"batch_index":            payload.BatchIndex,
				"chunks_in_batch":        chunksInBatch,
				"chunks_processed":       chunksProcessed,
				"empty_chunks":           emptyChunks,
				"llm_failed":             llmCallFailed,
				"questions_generated":    generatedQuestionsTotal,
				"index_entries_prepared": indexEntriesPrepared,
				"index_batch_succeeded":  indexBatchSucceeded,
				"retry":                  retryCount,
				"max_retry":              maxRetry,
			}
			if resolvedModelID != "" {
				out["model_id"] = resolvedModelID
			}
			if sampleQuestion != "" {
				out["sample_question"] = sampleQuestion
			}
			if exitStatus != "success" || qErr != nil {
				msg := exitStatus
				if qErr != nil {
					msg = qErr.Error()
				}
				s.failPostprocessSubspan(ctx, qSpan, "QUESTION_FAILED", msg, qErr)
			} else {
				s.endPostprocessSubspan(ctx, qSpan, out)
			}
		}
	}()

	logger.Infof(ctx, "Processing question generation for knowledge=%s batch=%d chunks=%d",
		payload.KnowledgeID, payload.BatchIndex, chunksInBatch)

	if chunksInBatch == 0 {
		exitStatus = "empty_batch"
		return nil
	}

	// A newer attempt has superseded this one: skip before opening the span
	// so we don't read stale chunks and don't drain the new attempt.
	if attemptSuperseded(ctx, s.tracker(), payload.KnowledgeID, payload.Attempt) {
		superseded = true
		exitStatus = "superseded"
		logger.Infof(ctx, "question: attempt %d superseded for %s, skipping stale enrichment",
			payload.Attempt, payload.KnowledgeID)
		return nil
	}

	qSpan = s.beginQuestionBatchSubspan(ctx, payload.KnowledgeID, payload.Attempt,
		fmt.Sprintf("postprocess.question.batch[%d]", payload.BatchIndex),
		types.JSONMap{
			"batch_index":    payload.BatchIndex,
			"chunks":         chunksInBatch,
			"question_count": payload.QuestionCount,
			"language":       payload.Language,
		})

	if strings.TrimSpace(s.config.Conversation.GenerateQuestionsPrompt) == "" {
		exitStatus = "prompt_not_configured"
		logger.Errorf(ctx, "GenerateQuestionsPrompt is empty: configure conversation.generate_questions_prompt_id")
		qErr = fmt.Errorf("generate questions prompt not configured")
		return qErr
	}

	kb, err := s.kbService.GetKnowledgeBaseByID(ctx, payload.KnowledgeBaseID)
	if err != nil {
		exitStatus = "kb_not_found"
		logger.Errorf(ctx, "Failed to get knowledge base: %v", err)
		qErr = err
		return nil
	}

	knowledge, err := s.repo.GetKnowledgeByID(ctx, payload.TenantID, payload.KnowledgeID)
	if err != nil {
		exitStatus = "knowledge_not_found"
		logger.Errorf(ctx, "Failed to get knowledge: %v", err)
		qErr = err
		return nil
	}
	// Short-circuit when the user cancelled parsing or the row is being
	// deleted — batched fan-out means we get this check for free on every
	// batch, so a cancel stops burning LLM quota on the remaining batches.
	if knowledge != nil {
		switch knowledge.ParseStatus {
		case types.ParseStatusCancelled, types.ParseStatusDeleting:
			exitStatus = "knowledge_" + knowledge.ParseStatus
			logger.Infof(ctx, "Question generation: knowledge aborted (%s), skipping batch %d",
				knowledge.ParseStatus, payload.BatchIndex)
			return nil
		}
	}

	chatModel, err := s.modelService.GetChatModel(ctx, kb.SummaryModelID)
	if err != nil {
		exitStatus = "get_chat_model_failed"
		logger.Errorf(ctx, "Failed to get chat model: %v", err)
		return fmt.Errorf("failed to get chat model: %w", err)
	}
	resolvedModelID = kb.SummaryModelID

	embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, kb.EmbeddingModelID)
	if err != nil {
		exitStatus = "get_embedding_model_failed"
		logger.Errorf(ctx, "Failed to get embedding model: %v", err)
		return fmt.Errorf("failed to get embedding model: %w", err)
	}

	tenantInfo, err := s.tenantRepo.GetTenantByID(ctx, payload.TenantID)
	if err != nil {
		exitStatus = "get_tenant_failed"
		logger.Errorf(ctx, "Failed to get tenant info: %v", err)
		return fmt.Errorf("failed to get tenant info: %w", err)
	}
	ctx = context.WithValue(ctx, types.TenantInfoContextKey, tenantInfo)

	retrieveEngine, err := retriever.CreateRetrieveEngineForKB(
		ctx, s.retrieveEngine, s.ownership, tenantInfo.ID, kb.VectorStoreID)
	if err != nil {
		exitStatus = "init_retrieve_engine_failed"
		logger.Errorf(ctx, "Failed to init retrieve engine: %v", err)
		return fmt.Errorf("failed to init retrieve engine: %w", err)
	}

	questionCount := payload.QuestionCount
	if questionCount <= 0 {
		questionCount = 3
	}
	if questionCount > 10 {
		questionCount = 10
	}

	// Fetch the batch chunks (in payload order) plus the two boundary
	// neighbors so we can rebuild the same surrounding context the legacy
	// loop used, all enriched with image OCR / caption info. A vanished
	// chunk degrades gracefully (skipped / empty context).
	getChunk := func(id string) *types.Chunk {
		if id == "" {
			return nil
		}
		c, gerr := s.chunkRepo.GetChunkByID(ctx, payload.TenantID, id)
		if gerr != nil {
			return nil
		}
		return c
	}
	batchChunks := make([]*types.Chunk, len(batchIDs))
	for i, id := range batchIDs {
		batchChunks[i] = getChunk(id)
	}
	prevChunk := getChunk(payload.PrevChunkID)
	nextChunk := getChunk(payload.NextChunkID)

	infoIDs := make([]string, 0, len(batchIDs)+2)
	infoIDs = append(infoIDs, batchIDs...)
	if payload.PrevChunkID != "" {
		infoIDs = append(infoIDs, payload.PrevChunkID)
	}
	if payload.NextChunkID != "" {
		infoIDs = append(infoIDs, payload.NextChunkID)
	}
	imageInfoMap := searchutil.CollectImageInfoByChunkIDs(ctx, s.chunkRepo, payload.TenantID, infoIDs)
	enrich := func(c *types.Chunk) string {
		if c == nil {
			return ""
		}
		if info, ok := imageInfoMap[c.ID]; ok && info != "" {
			return searchutil.EnrichContentWithImageInfo(c.Content, info)
		}
		return c.Content
	}

	// neighborContent returns the context content for position i within the
	// batch: the in-batch neighbor when present, else the boundary chunk.
	prevContentAt := func(i int) string {
		if i > 0 {
			return enrich(batchChunks[i-1])
		}
		return enrich(prevChunk)
	}
	nextContentAt := func(i int) string {
		if i < len(batchChunks)-1 {
			return enrich(batchChunks[i+1])
		}
		return enrich(nextChunk)
	}

	var indexInfoList []*types.IndexInfo
	for i, chunk := range batchChunks {
		if chunk == nil || strings.TrimSpace(chunk.Content) == "" {
			emptyChunks++
			continue
		}

		questions, gerr := s.generateQuestionsWithContext(
			ctx, chatModel, enrich(chunk), prevContentAt(i), nextContentAt(i), knowledge.Title, questionCount)
		if gerr != nil {
			llmCallFailed++
			logger.Warnf(ctx, "Failed to generate questions for chunk %s: %v", chunk.ID, gerr)
			continue
		}
		if len(questions) == 0 {
			continue
		}
		chunksProcessed++
		generatedQuestionsTotal += len(questions)
		if sampleQuestion == "" {
			sampleQuestion = previewText(questions[0], 200)
		}

		generatedQuestions := make([]types.GeneratedQuestion, len(questions))
		for j, question := range questions {
			generatedQuestions[j] = types.GeneratedQuestion{
				ID:       fmt.Sprintf("q%d", time.Now().UnixNano()+int64(j)),
				Question: question,
			}
		}
		meta := &types.DocumentChunkMetadata{GeneratedQuestions: generatedQuestions}
		if err := chunk.SetDocumentMetadata(meta); err != nil {
			logger.Warnf(ctx, "Failed to set document metadata for chunk %s: %v", chunk.ID, err)
			continue
		}
		if err := s.chunkService.UpdateChunk(ctx, chunk); err != nil {
			logger.Warnf(ctx, "Failed to update chunk %s: %v", chunk.ID, err)
			continue
		}
		for _, gq := range generatedQuestions {
			indexInfoList = append(indexInfoList, &types.IndexInfo{
				Content:         gq.Question,
				SourceID:        fmt.Sprintf("%s-%s", chunk.ID, gq.ID),
				SourceType:      types.ChunkSourceType,
				ChunkID:         chunk.ID,
				KnowledgeID:     knowledge.ID,
				KnowledgeBaseID: knowledge.KnowledgeBaseID,
				IsEnabled:       true,
			})
		}
	}

	indexEntriesPrepared = len(indexInfoList)
	if len(indexInfoList) > 0 {
		if err := retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfoList); err != nil {
			exitStatus = "index_questions_failed"
			qErr = err
			logger.Errorf(ctx, "Failed to index generated questions for batch %d: %v", payload.BatchIndex, err)
			return fmt.Errorf("failed to index questions: %w", err)
		}
		indexBatchSucceeded = true
		logger.Infof(ctx, "Indexed %d generated questions for knowledge=%s batch=%d",
			len(indexInfoList), payload.KnowledgeID, payload.BatchIndex)
	}
	return nil
}

// generateQuestionsWithContext generates questions for a chunk with surrounding context
func (s *knowledgeService) generateQuestionsWithContext(ctx context.Context,
	chatModel chat.Chat, content, prevContent, nextContent, docName string, questionCount int,
) ([]string, error) {
	if content == "" || questionCount <= 0 {
		return nil, nil
	}

	prompt := strings.TrimSpace(s.config.Conversation.GenerateQuestionsPrompt)
	if prompt == "" {
		return nil, fmt.Errorf("generate questions prompt not configured")
	}

	// Build context section
	var contextSection string
	if prevContent != "" || nextContent != "" {
		contextSection = "<surrounding_context>\n"
		if prevContent != "" {
			contextSection += fmt.Sprintf("<preceding_content>\n%s\n\n</preceding_content>\n\n", prevContent)
		}
		if nextContent != "" {
			contextSection += fmt.Sprintf("<following_content>\n%s\n\n</following_content>\n\n", nextContent)
		}
		contextSection += "</surrounding_context>\n\n"
	}

	langName := types.LanguageNameFromContext(ctx)
	prompt = types.RenderPromptPlaceholders(prompt, types.PlaceholderValues{
		"question_count": fmt.Sprintf("%d", questionCount),
		"content":        content,
		"context":        contextSection,
		"doc_name":       docName,
		"language":       langName,
	})

	thinking := false
	response, err := chatModel.Chat(ctx, []chat.Message{
		{
			Role:    "user",
			Content: prompt,
		},
	}, &chat.ChatOptions{
		Temperature: 0.7,
		MaxTokens:   512,
		Thinking:    &thinking,
	})
	if err != nil {
		return nil, fmt.Errorf("failed to generate questions: %w", err)
	}

	// Parse response
	lines := strings.Split(response.Content, "\n")
	questions := make([]string, 0, questionCount)
	for _, line := range lines {
		line = strings.TrimSpace(line)
		if line == "" {
			continue
		}
		line = strings.TrimLeft(line, "0123456789.-*) ")
		line = strings.TrimSpace(line)
		if line != "" && len(line) > 5 {
			questions = append(questions, line)
			if len(questions) >= questionCount {
				break
			}
		}
	}

	return questions, nil
}

// ReparseKnowledge deletes existing document content and re-parses the knowledge asynchronously.
// This method reuses the logic from UpdateManualKnowledge for resource cleanup and async parsing.
func (s *knowledgeService) ReparseKnowledge(ctx context.Context, knowledgeID string) (*types.Knowledge, error) {
	logger.Info(ctx, "Start re-parsing knowledge")

	tenantID := ctx.Value(types.TenantIDContextKey).(uint64)
	existing, err := s.repo.GetKnowledgeByID(ctx, tenantID, knowledgeID)
	if err != nil {
		logger.Errorf(ctx, "Failed to load knowledge: %v", err)
		return nil, err
	}

	// Allocate a fresh span tree attempt up front. Doing this BEFORE
	// the cleanup + enqueue means: (a) the UI immediately sees a new
	// attempt with all five stages back to "pending" instead of the
	// previous run's "failed" badge lingering; (b) the worker's
	// fallback path won't double-allocate when payload.Attempt is
	// already set on the queued task.
	reparseAttempt := 0
	if root, n, err := s.tracker().OpenAttempt(ctx, existing.ID, ""); err == nil && root != nil {
		reparseAttempt = n
	} else if err != nil {
		logger.Warnf(ctx, "[Reparse] OpenAttempt failed for %s: %v (will fall back in worker)", existing.ID, err)
	}

	// Get knowledge base configuration
	kb, err := s.kbService.GetKnowledgeBaseByID(ctx, existing.KnowledgeBaseID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get knowledge base for reparse: %v", err)
		return nil, err
	}

	// Keep wiki's pending queue consistent across both manual and non-manual
	// paths. The destructive work (swapping old wiki contributions for new)
	// happens asynchronously inside mapOneDocument — see its oldPageSlugs
	// handling — once post-process re-enqueues wiki ingest. All we need to
	// do here is stop any stale pending ingest op from firing against the
	// pre-reparse chunk set.
	if kb != nil && kb.IsWikiEnabled() {
		s.prepareWikiForReparse(ctx, existing)
	}

	// For manual knowledge, use async manual processing (cleanup + re-indexing in worker)
	if existing.IsManual() {
		meta, metaErr := existing.ManualMetadata()
		if metaErr != nil || meta == nil {
			logger.Errorf(ctx, "Failed to get manual metadata for reparse: %v", metaErr)
			return nil, werrors.NewBadRequestError("无法获取手工知识内容")
		}

		existing.ParseStatus = "pending"
		existing.EnableStatus = "disabled"
		existing.Description = ""
		existing.ProcessedAt = nil
		existing.EmbeddingModelID = kb.EmbeddingModelID
		// Reset the enrichment counter so a leftover value from a
		// previous attempt (e.g. cancelled before all subtasks decremented)
		// cannot block the new finalizing transition later. This must be
		// an explicit column write: UpdateKnowledge (full-row Save) omits
		// pending_subtasks_count, so the struct assignment alone would not
		// persist.
		existing.PendingSubtasksCount = 0

		if err := s.repo.UpdateKnowledge(ctx, existing); err != nil {
			logger.Errorf(ctx, "Failed to update knowledge status before reparse: %v", err)
			return nil, err
		}
		if err := s.repo.UpdateKnowledgeColumn(ctx, existing.ID, "pending_subtasks_count", 0); err != nil {
			logger.Errorf(ctx, "Failed to reset pending_subtasks_count before reparse: %v", err)
			return nil, err
		}

		if err := s.enqueueManualProcessing(ctx, existing, meta.Content, true); err != nil {
			logger.Errorf(ctx, "Failed to enqueue manual reparse task: %v", err)
			existing.ParseStatus = "failed"
			existing.ErrorMessage = "Failed to enqueue processing task"
			s.repo.UpdateKnowledge(ctx, existing)
		}
		return existing, nil
	}

	// For non-manual knowledge, cleanup synchronously then enqueue document processing
	logger.Infof(ctx, "Cleaning up existing resources for knowledge: %s", knowledgeID)
	if err := s.cleanupKnowledgeResources(ctx, existing); err != nil {
		logger.ErrorWithFields(ctx, err, map[string]interface{}{
			"knowledge_id": knowledgeID,
		})
		return nil, err
	}

	// Step 2: Update knowledge status and metadata
	existing.ParseStatus = "pending"
	existing.EnableStatus = "disabled"
	existing.Description = ""
	existing.ProcessedAt = nil
	existing.EmbeddingModelID = kb.EmbeddingModelID
	// Reset the enrichment counter so a leftover value from a previous
	// attempt cannot block the new finalizing transition later. This must
	// be an explicit column write: UpdateKnowledge (full-row Save) omits
	// pending_subtasks_count, so the struct assignment alone would not
	// persist.
	existing.PendingSubtasksCount = 0

	if err := s.repo.UpdateKnowledge(ctx, existing); err != nil {
		logger.Errorf(ctx, "Failed to update knowledge status before reparse: %v", err)
		return nil, err
	}
	if err := s.repo.UpdateKnowledgeColumn(ctx, existing.ID, "pending_subtasks_count", 0); err != nil {
		logger.Errorf(ctx, "Failed to reset pending_subtasks_count before reparse: %v", err)
		return nil, err
	}

	// Step 3: Trigger async re-parsing based on knowledge type
	logger.Infof(ctx, "Knowledge status updated, scheduling async reparse, ID: %s, Type: %s", existing.ID, existing.Type)

	// For file-based knowledge, enqueue document processing task
	if existing.FilePath != "" {
		tenantID := ctx.Value(types.TenantIDContextKey).(uint64)

		// Determine multimodal setting
		enableMultimodel := kb.IsMultimodalEnabled()

		// Check question generation config
		enableQuestionGeneration := false
		questionCount := 3 // default
		if kb.QuestionGenerationConfig != nil && kb.QuestionGenerationConfig.Enabled {
			enableQuestionGeneration = true
			if kb.QuestionGenerationConfig.QuestionCount > 0 {
				questionCount = kb.QuestionGenerationConfig.QuestionCount
			}
		}

		lang, _ := types.LanguageFromContext(ctx)
		taskPayload := types.DocumentProcessPayload{
			TenantID:                 tenantID,
			KnowledgeID:              existing.ID,
			KnowledgeBaseID:          existing.KnowledgeBaseID,
			FilePath:                 existing.FilePath,
			FileName:                 existing.FileName,
			FileType:                 getFileType(existing.FileName),
			EnableMultimodel:         enableMultimodel,
			EnableQuestionGeneration: enableQuestionGeneration,
			QuestionCount:            questionCount,
			Language:                 lang,
			Attempt:                  reparseAttempt,
		}

		langfuse.InjectTracing(ctx, &taskPayload)
		payloadBytes, err := json.Marshal(taskPayload)
		if err != nil {
			logger.Errorf(ctx, "Failed to marshal reparse task payload: %v", err)
			return existing, nil
		}

		task := asynq.NewTask(
			types.TypeDocumentProcess,
			payloadBytes,
			documentProcessTaskOptions(s.config, asynq.MaxRetry(3))...,
		)
		info, err := s.task.Enqueue(task)
		if err != nil {
			logger.Errorf(ctx, "Failed to enqueue reparse task: %v", err)
			return existing, nil
		}
		logger.Infof(ctx, "Enqueued reparse task: id=%s queue=%s knowledge_id=%s", info.ID, info.Queue, existing.ID)

		// For data tables (csv, xlsx, xls), also enqueue summary task
		if slices.Contains([]string{"csv", "xlsx", "xls"}, getFileType(existing.FileName)) {
			NewDataTableSummaryTask(ctx, s.task, tenantID, existing.ID, kb.SummaryModelID, kb.EmbeddingModelID)
		}

		return existing, nil
	}

	// For file-URL-based knowledge, enqueue document processing task with FileURL field
	if existing.Type == "file_url" && existing.Source != "" {
		tenantID := ctx.Value(types.TenantIDContextKey).(uint64)

		enableMultimodel := kb.IsMultimodalEnabled()

		// Check question generation config
		enableQuestionGeneration := false
		questionCount := 3
		if kb.QuestionGenerationConfig != nil && kb.QuestionGenerationConfig.Enabled {
			enableQuestionGeneration = true
			if kb.QuestionGenerationConfig.QuestionCount > 0 {
				questionCount = kb.QuestionGenerationConfig.QuestionCount
			}
		}

		lang, _ := types.LanguageFromContext(ctx)
		taskPayload := types.DocumentProcessPayload{
			TenantID:                 tenantID,
			KnowledgeID:              existing.ID,
			KnowledgeBaseID:          existing.KnowledgeBaseID,
			FileURL:                  existing.Source,
			FileName:                 existing.FileName,
			FileType:                 existing.FileType,
			EnableMultimodel:         enableMultimodel,
			EnableQuestionGeneration: enableQuestionGeneration,
			QuestionCount:            questionCount,
			Language:                 lang,
			Attempt:                  reparseAttempt,
		}

		langfuse.InjectTracing(ctx, &taskPayload)
		payloadBytes, err := json.Marshal(taskPayload)
		if err != nil {
			logger.Errorf(ctx, "Failed to marshal file URL reparse task payload: %v", err)
			return existing, nil
		}

		task := asynq.NewTask(
			types.TypeDocumentProcess,
			payloadBytes,
			documentProcessTaskOptions(s.config)...,
		)
		info, err := s.task.Enqueue(task)
		if err != nil {
			logger.Errorf(ctx, "Failed to enqueue file URL reparse task: %v", err)
			return existing, nil
		}
		logger.Infof(ctx, "Enqueued file URL reparse task: id=%s queue=%s knowledge_id=%s", info.ID, info.Queue, existing.ID)

		return existing, nil
	}

	// For URL-based knowledge, enqueue URL processing task
	if existing.Type == "url" && existing.Source != "" {
		tenantID := ctx.Value(types.TenantIDContextKey).(uint64)

		enableMultimodel := kb.IsMultimodalEnabled()

		// Check question generation config
		enableQuestionGeneration := false
		questionCount := 3
		if kb.QuestionGenerationConfig != nil && kb.QuestionGenerationConfig.Enabled {
			enableQuestionGeneration = true
			if kb.QuestionGenerationConfig.QuestionCount > 0 {
				questionCount = kb.QuestionGenerationConfig.QuestionCount
			}
		}

		lang, _ := types.LanguageFromContext(ctx)
		taskPayload := types.DocumentProcessPayload{
			TenantID:                 tenantID,
			KnowledgeID:              existing.ID,
			KnowledgeBaseID:          existing.KnowledgeBaseID,
			URL:                      existing.Source,
			EnableMultimodel:         enableMultimodel,
			EnableQuestionGeneration: enableQuestionGeneration,
			QuestionCount:            questionCount,
			Language:                 lang,
			Attempt:                  reparseAttempt,
		}

		langfuse.InjectTracing(ctx, &taskPayload)
		payloadBytes, err := json.Marshal(taskPayload)
		if err != nil {
			logger.Errorf(ctx, "Failed to marshal URL reparse task payload: %v", err)
			return existing, nil
		}

		task := asynq.NewTask(
			types.TypeDocumentProcess,
			payloadBytes,
			documentProcessTaskOptions(s.config, asynq.MaxRetry(3))...,
		)
		info, err := s.task.Enqueue(task)
		if err != nil {
			logger.Errorf(ctx, "Failed to enqueue URL reparse task: %v", err)
			return existing, nil
		}
		logger.Infof(ctx, "Enqueued URL reparse task: id=%s queue=%s knowledge_id=%s", info.ID, info.Queue, existing.ID)

		return existing, nil
	}

	logger.Warnf(ctx, "Knowledge %s has no parseable content (no file, URL, or manual content)", knowledgeID)
	return existing, nil
}

// CancelKnowledgeParse marks an in-progress parse as cancelled by the user.
//
// Semantics (kept aligned with the existing deleting path, but partial work
// is preserved instead of cleaned up):
//   - parse_status is set to "cancelled"; partial chunks/index already written
//     to the database remain on disk. The user can re-trigger parsing via the
//     existing ReparseKnowledge API, which overwrites status back to pending.
//   - Any in-flight worker reads the new status at its next checkpoint and
//     bails (see processChunks / ProcessDocument / downstream handlers).
//   - The asynq inspector (if available) dequeues pending / scheduled / retry
//     tasks for this knowledge_id across the default / critical / low queues
//     and signals active workers to stop. Lite mode (no Redis) skips the
//     dequeue step — the checkpoint-based abort is the only stop signal there.
//   - Idempotent: re-calling on an already-cancelled row is a no-op.
//
// Errors:
//   - ParseStatusCompleted / ParseStatusFailed: the parse has already finished.
//   - ParseStatusDeleting: a delete is in progress; cancel cannot supersede it.
func (s *knowledgeService) CancelKnowledgeParse(
	ctx context.Context, knowledgeID string,
) (*types.Knowledge, error) {
	tenantID := ctx.Value(types.TenantIDContextKey).(uint64)
	existing, err := s.repo.GetKnowledgeByID(ctx, tenantID, knowledgeID)
	if err != nil {
		logger.Errorf(ctx, "CancelKnowledgeParse: failed to load knowledge: %v", err)
		return nil, err
	}
	if existing == nil {
		return nil, werrors.NewNotFoundError("knowledge not found")
	}

	switch existing.ParseStatus {
	case types.ParseStatusCancelled:
		// Idempotent — still attempt the dequeue in case earlier calls
		// raced an enqueue, but skip the row update / span close path.
		s.dequeueKnowledgeTasks(ctx, knowledgeID)
		return existing, nil
	case types.ParseStatusCompleted, types.ParseStatusFailed:
		return nil, werrors.NewBadRequestError("解析已结束，无法取消")
	case types.ParseStatusDeleting:
		return nil, werrors.NewBadRequestError("知识正在删除中，无法取消解析")
	case types.ParseStatusPending, types.ParseStatusProcessing, types.ParseStatusFinalizing:
		// Cancellable. `finalizing` is the post-process fan-out window
		// where graph-extract / summary / question subtasks are still
		// running; cancel here stops the LLM cost they would burn.
	default:
		// Unknown status — let it through but log. Should never happen
		// outside test fixtures or hand-edited rows.
		logger.Warnf(ctx, "CancelKnowledgeParse: unexpected status %q for %s, proceeding",
			existing.ParseStatus, knowledgeID)
	}

	// Flip the row to cancelled and zero the enrichment counter in one
	// update so a late subtask FinalizeSubtask call can't race-promote
	// the row back to completed. Persisted partial data is left in
	// place — the user can reuse it on the next reparse attempt.
	now := time.Now()
	if err := s.repo.UpdateKnowledgeColumns(ctx, existing.ID, map[string]interface{}{
		"parse_status":           types.ParseStatusCancelled,
		"error_message":          "用户已取消解析",
		"pending_subtasks_count": 0,
		"updated_at":             now,
	}); err != nil {
		logger.Errorf(ctx, "CancelKnowledgeParse: failed to mark knowledge cancelled: %v", err)
		return nil, err
	}
	existing.ParseStatus = types.ParseStatusCancelled
	existing.ErrorMessage = "用户已取消解析"
	existing.PendingSubtasksCount = 0
	existing.UpdatedAt = now
	logger.Infof(ctx, "Knowledge %s marked as cancelled by user", knowledgeID)

	// Close the active attempt span tree so the UI stops showing "进行中"
	// for the cancelled run. AbortAttempt cascade-cancels every still-
	// running descendant (multimodal per-image, postprocess subtasks,
	// graph chunks) BEFORE closing the root, otherwise the trace
	// viewer would leave those striped/running bars hanging forever
	// because workers exit via their abort-guard without ever calling
	// FailSpan on their own subspan. Best-effort: nil tracker / missing
	// attempt no-ops.
	if attempt := s.tracker().LatestAttempt(ctx, knowledgeID); attempt > 0 {
		s.tracker().AbortAttempt(ctx, knowledgeID, attempt,
			"USER_CANCELLED", "用户已取消解析", "用户已取消解析")
	}

	// Best-effort dequeue. Failures here don't block the cancel — the
	// downstream tasks will still self-abort at their entry guards.
	s.dequeueKnowledgeTasks(ctx, knowledgeID)
	// Wiki ingest lives in its own per-KB pending queue (task_pending_ops)
	// rather than asynq, so dequeueKnowledgeTasks above can't see it.
	// Mirror the deletion path's scrub so a cancelled knowledge doesn't
	// get picked up by the next 30s batch and burn a wiki LLM call on a
	// doc the user already abandoned. The in-flight worker would skip it
	// at isWikiKnowledgeAborted anyway, but scrubbing avoids waking the
	// batch in the first place.
	s.scrubWikiPendingIngest(ctx, existing.KnowledgeBaseID, knowledgeID, "cancel")
	return existing, nil
}

// dequeueKnowledgeTasks asks the task inspector to remove any queued
// tasks for this knowledge and signal active workers to stop. Safe to
// call when the inspector is a no-op (Lite mode).
func (s *knowledgeService) dequeueKnowledgeTasks(ctx context.Context, knowledgeID string) {
	if s.taskInspector == nil {
		return
	}
	if _, _, err := s.taskInspector.CancelTasksForKnowledge(ctx, knowledgeID); err != nil {
		logger.Warnf(ctx, "CancelKnowledgeParse: dequeue best-effort failed for %s: %v", knowledgeID, err)
	}
}

func (s *knowledgeService) updateChunkVector(ctx context.Context, kbID string, chunks []*types.Chunk) error {
	// Get embedding model from knowledge base
	sourceKB, err := s.kbService.GetKnowledgeBaseByID(ctx, kbID)
	if err != nil {
		return err
	}
	embeddingModel, err := s.modelService.GetEmbeddingModel(ctx, sourceKB.EmbeddingModelID)
	if err != nil {
		return err
	}

	// Initialize composite retrieve engine from tenant configuration
	indexInfo := make([]*types.IndexInfo, 0, len(chunks))
	ids := make([]string, 0, len(chunks))
	for _, chunk := range chunks {
		if chunk.KnowledgeBaseID != kbID {
			logger.Warnf(ctx, "Knowledge base ID mismatch: %s != %s", chunk.KnowledgeBaseID, kbID)
			continue
		}
		indexInfo = append(indexInfo, &types.IndexInfo{
			Content:         chunk.Content,
			SourceID:        chunk.ID,
			SourceType:      types.ChunkSourceType,
			ChunkID:         chunk.ID,
			KnowledgeID:     chunk.KnowledgeID,
			KnowledgeBaseID: chunk.KnowledgeBaseID,
			IsEnabled:       true,
		})
		ids = append(ids, chunk.ID)
	}

	retrieveEngine, err := retriever.CreateRetrieveEngineForKB(
		ctx, s.retrieveEngine, s.ownership, types.MustTenantIDFromContext(ctx), sourceKB.VectorStoreID)
	if err != nil {
		return err
	}

	// Delete old vector representation of the chunk
	err = retrieveEngine.DeleteByChunkIDList(ctx, ids, embeddingModel.GetDimensions(), sourceKB.Type)
	if err != nil {
		return err
	}

	// Index updated chunk content with new vector representation
	err = retrieveEngine.BatchIndex(ctx, embeddingModel, indexInfo)
	if err != nil {
		return err
	}
	return nil
}

func (s *knowledgeService) UpdateImageInfo(
	ctx context.Context,
	knowledgeID string,
	chunkID string,
	imageInfo string,
) error {
	var images []*types.ImageInfo
	if err := json.Unmarshal([]byte(imageInfo), &images); err != nil {
		logger.Errorf(ctx, "Failed to unmarshal image info: %v", err)
		return err
	}
	if len(images) != 1 {
		logger.Warnf(ctx, "Expected exactly one image info, got %d", len(images))
		return nil
	}
	image := images[0]

	// Retrieve all chunks with the given parent chunk ID
	chunk, err := s.chunkService.GetChunkByID(ctx, chunkID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get chunk: %v", err)
		return err
	}
	chunk.ImageInfo = imageInfo
	tenantID := ctx.Value(types.TenantIDContextKey).(uint64)
	chunkChildren, err := s.chunkService.ListChunkByParentID(ctx, tenantID, chunkID)
	if err != nil {
		logger.ErrorWithFields(ctx, err, map[string]interface{}{
			"parent_chunk_id": chunkID,
			"tenant_id":       tenantID,
		})
		return err
	}
	logger.Infof(ctx, "Found %d chunks with parent chunk ID: %s", len(chunkChildren), chunkID)

	// Iterate through each chunk and update its content based on the image information
	updateChunk := []*types.Chunk{chunk}
	var addChunk []*types.Chunk

	// Track whether we've found OCR and caption child chunks for this image
	hasOCRChunk := false
	hasCaptionChunk := false

	for i, child := range chunkChildren {
		// Skip chunks that are not image types
		var cImageInfo []*types.ImageInfo
		err = json.Unmarshal([]byte(child.ImageInfo), &cImageInfo)
		if err != nil {
			logger.Warnf(ctx, "Failed to unmarshal image %s info: %v", child.ID, err)
			continue
		}
		if len(cImageInfo) == 0 {
			continue
		}
		if cImageInfo[0].OriginalURL != image.OriginalURL {
			logger.Warnf(ctx, "Skipping chunk ID: %s, image URL mismatch: %s != %s",
				child.ID, cImageInfo[0].OriginalURL, image.OriginalURL)
			continue
		}

		// Mark that we've found chunks for this image
		switch child.ChunkType {
		case types.ChunkTypeImageCaption:
			hasCaptionChunk = true
			// Update caption if it has changed
			if image.Caption != cImageInfo[0].Caption {
				child.Content = image.Caption
				child.ImageInfo = imageInfo
				updateChunk = append(updateChunk, chunkChildren[i])
			}
		case types.ChunkTypeImageOCR:
			hasOCRChunk = true
			// Update OCR if it has changed
			if image.OCRText != cImageInfo[0].OCRText {
				child.Content = image.OCRText
				child.ImageInfo = imageInfo
				updateChunk = append(updateChunk, chunkChildren[i])
			}
		}
	}

	// Create a new caption chunk if it doesn't exist and we have caption data
	if !hasCaptionChunk && image.Caption != "" {
		captionChunk := &types.Chunk{
			ID:              uuid.New().String(),
			TenantID:        tenantID,
			KnowledgeID:     chunk.KnowledgeID,
			KnowledgeBaseID: chunk.KnowledgeBaseID,
			Content:         image.Caption,
			ChunkType:       types.ChunkTypeImageCaption,
			ParentChunkID:   chunk.ID,
			ImageInfo:       imageInfo,
		}
		addChunk = append(addChunk, captionChunk)
		logger.Infof(ctx, "Created new caption chunk ID: %s for image URL: %s", captionChunk.ID, image.OriginalURL)
	}

	// Create a new OCR chunk if it doesn't exist and we have OCR data
	if !hasOCRChunk && image.OCRText != "" {
		ocrChunk := &types.Chunk{
			ID:              uuid.New().String(),
			TenantID:        tenantID,
			KnowledgeID:     chunk.KnowledgeID,
			KnowledgeBaseID: chunk.KnowledgeBaseID,
			Content:         image.OCRText,
			ChunkType:       types.ChunkTypeImageOCR,
			ParentChunkID:   chunk.ID,
			ImageInfo:       imageInfo,
		}
		addChunk = append(addChunk, ocrChunk)
		logger.Infof(ctx, "Created new OCR chunk ID: %s for image URL: %s", ocrChunk.ID, image.OriginalURL)
	}
	logger.Infof(ctx, "Updated %d chunks out of %d total chunks", len(updateChunk), len(chunkChildren)+1)

	if len(addChunk) > 0 {
		err := s.chunkService.CreateChunks(ctx, addChunk)
		if err != nil {
			logger.ErrorWithFields(ctx, err, map[string]interface{}{
				"add_chunk_size": len(addChunk),
			})
			return err
		}
	}

	// Update the chunks
	for _, c := range updateChunk {
		err := s.chunkService.UpdateChunk(ctx, c)
		if err != nil {
			logger.ErrorWithFields(ctx, err, map[string]interface{}{
				"chunk_id":     c.ID,
				"knowledge_id": c.KnowledgeID,
			})
			return err
		}
	}

	// Update the chunk vector
	err = s.updateChunkVector(ctx, chunk.KnowledgeBaseID, append(updateChunk, addChunk...))
	if err != nil {
		logger.ErrorWithFields(ctx, err, map[string]interface{}{
			"chunk_id":     chunk.ID,
			"knowledge_id": chunk.KnowledgeID,
		})
		return err
	}

	// Update the knowledge file hash
	knowledge, err := s.repo.GetKnowledgeByID(ctx, tenantID, knowledgeID)
	if err != nil {
		logger.Errorf(ctx, "Failed to get knowledge: %v", err)
		return err
	}
	fileHash := calculateStr(knowledgeID, knowledge.FileHash, imageInfo)
	knowledge.FileHash = fileHash
	err = s.repo.UpdateKnowledge(ctx, knowledge)
	if err != nil {
		logger.Warnf(ctx, "Failed to update knowledge file hash: %v", err)
	}

	logger.Infof(ctx, "Updated chunk successfully, chunk ID: %s, knowledge ID: %s", chunk.ID, chunk.KnowledgeID)
	return nil
}

// ProcessManualUpdate handles Asynq manual knowledge update tasks.
// It performs cleanup of old indexes/chunks (when NeedCleanup is true) and re-indexes the content.
func (s *knowledgeService) ProcessManualUpdate(ctx context.Context, t *asynq.Task) error {
	var payload types.ManualProcessPayload
	if err := json.Unmarshal(t.Payload(), &payload); err != nil {
		logger.Errorf(ctx, "failed to unmarshal manual process task payload: %v", err)
		return nil
	}

	ctx = logger.WithRequestID(ctx, payload.RequestId)
	ctx = logger.WithField(ctx, "manual_process", payload.KnowledgeID)
	ctx = context.WithValue(ctx, types.TenantIDContextKey, payload.TenantID)

	tenantInfo, err := s.tenantRepo.GetTenantByID(ctx, payload.TenantID)
	if err != nil {
		logger.Errorf(ctx, "ProcessManualUpdate: failed to get tenant: %v", err)
		return nil
	}
	ctx = context.WithValue(ctx, types.TenantInfoContextKey, tenantInfo)

	knowledge, err := s.repo.GetKnowledgeByID(ctx, payload.TenantID, payload.KnowledgeID)
	if err != nil {
		logger.Errorf(ctx, "ProcessManualUpdate: failed to get knowledge: %v", err)
		return nil
	}
	if knowledge == nil {
		logger.Warnf(ctx, "ProcessManualUpdate: knowledge not found: %s", payload.KnowledgeID)
		return nil
	}

	// Skip if already completed or being deleted
	if knowledge.ParseStatus == types.ParseStatusCompleted {
		logger.Infof(ctx, "ProcessManualUpdate: already completed, skipping: %s", payload.KnowledgeID)
		return nil
	}
	if knowledge.ParseStatus == types.ParseStatusDeleting {
		logger.Infof(ctx, "ProcessManualUpdate: being deleted, skipping: %s", payload.KnowledgeID)
		return nil
	}
	if knowledge.ParseStatus == types.ParseStatusCancelled {
		logger.Infof(ctx, "ProcessManualUpdate: cancelled by user, skipping: %s", payload.KnowledgeID)
		return nil
	}

	kb, err := s.kbService.GetKnowledgeBaseByID(ctx, payload.KnowledgeBaseID)
	if err != nil {
		logger.Errorf(ctx, "ProcessManualUpdate: failed to get knowledge base: %v", err)
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = fmt.Sprintf("failed to get knowledge base: %v", err)
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		return nil
	}

	// Re-check abort status right before marking processing — see the same
	// note in ProcessDocument for the cancel race this guards.
	if aborted, status := s.isKnowledgeAborted(ctx, knowledge.TenantID, knowledge.ID); aborted {
		logger.Infof(ctx, "ProcessManualUpdate: knowledge aborted (%s), skipping: %s", status, knowledge.ID)
		return nil
	}
	// Update status to processing
	knowledge.ParseStatus = "processing"
	knowledge.UpdatedAt = time.Now()
	if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
		logger.Errorf(ctx, "ProcessManualUpdate: failed to update status to processing: %v", err)
		return nil
	}

	// Allocate a fresh span-tracking attempt for this manual (re)index.
	// Without it attemptFromCtx stays 0, so processChunks drops all stage
	// spans and KnowledgePostProcess falls back to LatestAttempt — piling
	// this run's summary/wiki subspans onto the previous attempt's trace.
	attempt := 0
	if root, n, err := s.tracker().OpenAttempt(ctx, knowledge.ID, payload.LangfuseTraceID); err == nil && root != nil {
		attempt = n
	} else if err != nil {
		logger.Warnf(ctx, "ProcessManualUpdate: OpenAttempt failed for %s: %v", knowledge.ID, err)
	}
	ctx = withAttempt(ctx, attempt)

	// Cleanup old resources (indexes, chunks, graph) for update operations
	if payload.NeedCleanup {
		if err := s.cleanupKnowledgeResources(ctx, knowledge); err != nil {
			logger.ErrorWithFields(ctx, err, map[string]interface{}{
				"knowledge_id": payload.KnowledgeID,
			})
			knowledge.ParseStatus = "failed"
			knowledge.ErrorMessage = fmt.Sprintf("failed to cleanup old resources: %v", err)
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)
			return nil
		}
	}

	// Run manual processing (image resolution + chunking + embedding) synchronously within the worker
	s.triggerManualProcessing(ctx, kb, knowledge, payload.Content, true)
	return nil
}

// ProcessDocument handles Asynq document processing tasks
func (s *knowledgeService) ProcessDocument(ctx context.Context, t *asynq.Task) error {
	var payload types.DocumentProcessPayload
	if err := json.Unmarshal(t.Payload(), &payload); err != nil {
		logger.Errorf(ctx, "failed to unmarshal document process task payload: %v", err)
		return nil
	}

	ctx = logger.WithRequestID(ctx, payload.RequestId)
	ctx = logger.WithField(ctx, "document_process", payload.KnowledgeID)
	ctx = context.WithValue(ctx, types.TenantIDContextKey, payload.TenantID)
	if payload.Language != "" {
		ctx = context.WithValue(ctx, types.LanguageContextKey, payload.Language)
	}

	// 获取任务重试信息，用于判断是否是最后一次重试
	retryCount, _ := asynq.GetRetryCount(ctx)
	maxRetry, _ := asynq.GetMaxRetry(ctx)
	isLastRetry := retryCount >= maxRetry

	tenantInfo, err := s.tenantRepo.GetTenantByID(ctx, payload.TenantID)
	if err != nil {
		logger.Errorf(ctx, "failed to get tenant: %v", err)
		return nil
	}
	ctx = context.WithValue(ctx, types.TenantInfoContextKey, tenantInfo)

	logger.Infof(ctx, "Processing document task: knowledge_id=%s, file_path=%s, retry=%d/%d",
		payload.KnowledgeID, payload.FilePath, retryCount, maxRetry)

	// 幂等性检查：获取knowledge记录
	knowledge, err := s.repo.GetKnowledgeByID(ctx, payload.TenantID, payload.KnowledgeID)
	if err != nil {
		logger.Errorf(ctx, "failed to get knowledge: %v", err)
		return nil
	}

	if knowledge == nil {
		return nil
	}

	// 检查是否正在删除 / 已被用户取消 - 如果是则直接退出
	if knowledge.ParseStatus == types.ParseStatusDeleting {
		logger.Infof(ctx, "Knowledge is being deleted, aborting processing: %s", payload.KnowledgeID)
		return nil
	}
	if knowledge.ParseStatus == types.ParseStatusCancelled {
		logger.Infof(ctx, "Knowledge cancelled by user, aborting processing: %s", payload.KnowledgeID)
		return nil
	}

	// 检查任务状态 - 幂等性处理
	if knowledge.ParseStatus == types.ParseStatusCompleted {
		logger.Infof(ctx, "Document already completed, skipping: %s", payload.KnowledgeID)
		return nil // 幂等：已完成的任务直接返回
	}

	if knowledge.ParseStatus == types.ParseStatusFailed {
		// 检查是否可恢复（例如：超时、临时错误等）
		// 对于不可恢复的错误，直接返回
		logger.Warnf(
			ctx,
			"Document processing previously failed: %s, error: %s",
			payload.KnowledgeID,
			knowledge.ErrorMessage,
		)
		// 这里可以根据错误类型判断是否可恢复，暂时允许重试
	}

	// 检查是否有部分处理（有chunks但状态不是completed）
	if knowledge.ParseStatus != "completed" && knowledge.ParseStatus != "pending" &&
		knowledge.ParseStatus != "processing" {
		// 状态异常，记录日志但继续处理
		logger.Warnf(ctx, "Unexpected parse status: %s for knowledge: %s", knowledge.ParseStatus, payload.KnowledgeID)
	}

	// 获取知识库信息
	kb, err := s.kbService.GetKnowledgeBaseByID(ctx, payload.KnowledgeBaseID)
	if err != nil {
		logger.Errorf(ctx, "failed to get knowledge base: %v", err)
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = fmt.Sprintf("failed to get knowledge base: %v", err)
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		return nil
	}

	// Re-check abort status right before flipping to "processing" — closes
	// the race where the user cancels between the entry guard above and
	// this write (otherwise the worker would overwrite cancelled→processing
	// and downstream checkpoints would treat the run as live).
	if aborted, status := s.isKnowledgeAborted(ctx, knowledge.TenantID, knowledge.ID); aborted {
		logger.Infof(ctx, "Knowledge aborted (%s) before marking processing: %s", status, knowledge.ID)
		return nil
	}
	knowledge.ParseStatus = "processing"
	knowledge.UpdatedAt = time.Now()
	if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
		logger.Errorf(ctx, "failed to update knowledge status to processing: %v", err)
		return nil
	}

	// Resolve the attempt for span tracking. The enqueue site sets
	// payload.Attempt to a fresh number for the initial parse and to
	// max+1 for each user-initiated reparse. Asynq retries within a
	// single user action keep the same payload (so retries record
	// onto the same attempt). For payloads predating this code we
	// fall back to OpenAttempt.
	attempt := payload.Attempt
	if attempt <= 0 {
		if root, n, err := s.tracker().OpenAttempt(ctx, knowledge.ID, payload.LangfuseTraceID); err == nil && root != nil {
			attempt = n
		}
	}
	ctx = withAttempt(ctx, attempt)

	// 检查多模态配置（仅对文件导入）
	if payload.FilePath != "" && !payload.EnableMultimodel && IsImageType(payload.FileType) {
		logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID).
			WithField("error", ErrImageNotParse).Errorf("processDocument image without enable multimodel")
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = ErrImageNotParse.Error()
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		return nil
	}

	// 检查音频ASR配置（仅对文件导入）
	if payload.FilePath != "" && IsAudioType(payload.FileType) && !kb.ASRConfig.IsASREnabled() {
		logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID).
			Errorf("processDocument audio without ASR model configured")
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = "上传音频文件需要设置ASR语音识别模型"
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		return nil
	}

	// 视频文件不再支持入库解析
	if payload.FilePath != "" && IsVideoType(payload.FileType) {
		logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID).
			Errorf("processDocument video not supported")
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = "暂不支持视频文件"
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		return nil
	}

	// New pipeline: convert -> store images -> chunk -> vectorize -> multimodal tasks
	var convertResult *types.ReadResult
	var chunks []types.ParsedChunk

	if payload.FileURL != "" {
		// file_url import: SSRF re-check (防 DNS 重绑定), download, persist, then delegate to convert()
		if err := secutils.ValidateURLForSSRF(payload.FileURL); err != nil {
			logger.Errorf(ctx, "File URL rejected for SSRF protection in ProcessDocument: %s, err: %v", payload.FileURL, err)
			knowledge.ParseStatus = "failed"
			knowledge.ErrorMessage = "File URL is not allowed for security reasons"
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)
			return nil
		}

		resolvedFileName := payload.FileName
		resolvedFileType := payload.FileType
		contentBytes, err := downloadFileFromURL(ctx, payload.FileURL, &resolvedFileName, &resolvedFileType)
		if err != nil {
			logger.Errorf(ctx, "Failed to download file from URL: %s, error: %v", payload.FileURL, err)
			if isLastRetry {
				knowledge.ParseStatus = "failed"
				knowledge.ErrorMessage = err.Error()
				knowledge.UpdatedAt = time.Now()
				s.repo.UpdateKnowledge(ctx, knowledge)
			}
			return fmt.Errorf("failed to download file from URL: %w", err)
		}

		if resolvedFileType != "" && !allowedFileURLExtensions[strings.ToLower(resolvedFileType)] {
			logger.Errorf(ctx, "Unsupported file type resolved from file URL: %s", resolvedFileType)
			knowledge.ParseStatus = "failed"
			knowledge.ErrorMessage = fmt.Sprintf("unsupported file type: %s", resolvedFileType)
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)
			return nil
		}

		if resolvedFileName != "" && knowledge.FileName == "" {
			knowledge.FileName = resolvedFileName
		}
		if resolvedFileType != "" && knowledge.FileType == "" {
			knowledge.FileType = resolvedFileType
			s.repo.UpdateKnowledge(ctx, knowledge)
		}

		fileSvc := s.resolveFileService(ctx, kb)
		filePath, err := fileSvc.SaveBytes(ctx, contentBytes, payload.TenantID, resolvedFileName, true)
		if err != nil {
			if isLastRetry {
				knowledge.ParseStatus = "failed"
				knowledge.ErrorMessage = err.Error()
				knowledge.UpdatedAt = time.Now()
				s.repo.UpdateKnowledge(ctx, knowledge)
			}
			return fmt.Errorf("failed to save downloaded file: %w", err)
		}

		payload.FilePath = filePath
		payload.FileName = resolvedFileName
		payload.FileType = resolvedFileType
		convertResult, err = s.convert(ctx, payload, kb, knowledge, isLastRetry)
		if err != nil {
			return err
		}
		if convertResult == nil {
			return nil
		}
	} else if payload.URL != "" {
		// URL import
		convertResult, err = s.convert(ctx, payload, kb, knowledge, isLastRetry)
		if err != nil {
			return err
		}
		if convertResult == nil {
			return nil
		}
		// Update knowledge title from extracted page title if not already set
		if knowledge.Title == "" || knowledge.Title == payload.URL {
			if extractedTitle := convertResult.Metadata["title"]; extractedTitle != "" {
				knowledge.Title = extractedTitle
				knowledge.UpdatedAt = time.Now()
				if err := s.repo.UpdateKnowledge(ctx, knowledge); err != nil {
					logger.Warnf(ctx, "Failed to update knowledge title from extracted page title: %v", err)
				} else {
					logger.Infof(ctx, "Updated knowledge title to extracted page title: %s", extractedTitle)
				}
			}
		}
	} else if len(payload.Passages) > 0 {
		// Text passage import - direct chunking, no conversion needed
		passageChunks := make([]types.ParsedChunk, 0, len(payload.Passages))
		start, end := 0, 0
		for i, p := range payload.Passages {
			if p == "" {
				continue
			}
			end += len([]rune(p))
			passageChunks = append(passageChunks, types.ParsedChunk{
				Content: p,
				Seq:     i,
				Start:   start,
				End:     end,
			})
			start = end
		}
		passageOpts := ProcessChunksOptions{
			EnableQuestionGeneration: payload.EnableQuestionGeneration,
			QuestionCount:            payload.QuestionCount,
		}
		s.processChunks(ctx, kb, knowledge, passageChunks, passageOpts)
		return nil
	} else {
		// File import
		convertResult, err = s.convert(ctx, payload, kb, knowledge, isLastRetry)
		if err != nil {
			return err
		}
		if convertResult == nil {
			return nil
		}
	}

	// Step 1.5: ASR transcription for audio files
	if convertResult != nil && convertResult.IsAudio && len(convertResult.AudioData) > 0 {
		if !kb.ASRConfig.IsASREnabled() {
			logger.Error(ctx, "Audio file detected but ASR is not configured")
			knowledge.ParseStatus = "failed"
			knowledge.ErrorMessage = "ASR model is not configured for audio transcription"
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)
			return nil
		}

		logger.Infof(ctx, "[ASR] Starting audio transcription for knowledge %s, audio size=%d bytes",
			knowledge.ID, len(convertResult.AudioData))

		asrModel, err := s.modelService.GetASRModel(ctx, kb.ASRConfig.ModelID)
		if err != nil {
			logger.Errorf(ctx, "[ASR] Failed to get ASR model: %v", err)
			knowledge.ParseStatus = "failed"
			knowledge.ErrorMessage = fmt.Sprintf("failed to get ASR model: %v", err)
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)
			return nil
		}

		transcriptionResult, err := asrModel.Transcribe(ctx, convertResult.AudioData, knowledge.FileName)
		if err != nil {
			logger.Errorf(ctx, "[ASR] Transcription failed: %v", err)
			if isLastRetry {
				knowledge.ParseStatus = "failed"
				knowledge.ErrorMessage = fmt.Sprintf("audio transcription failed: %v", err)
				knowledge.UpdatedAt = time.Now()
				s.repo.UpdateKnowledge(ctx, knowledge)
			}
			return fmt.Errorf("audio transcription failed: %w", err)
		}

		var transcribedText string
		if transcriptionResult != nil {
			transcribedText = transcriptionResult.Text
		}

		if transcribedText == "" {
			logger.Warn(ctx, "[ASR] Transcription returned empty text")
			transcribedText = "[No speech detected in audio file]"
		}

		logger.Infof(ctx, "[ASR] Transcription completed, text length=%d", len(transcribedText))
		// Replace the audio placeholder with the transcribed text
		convertResult.MarkdownContent = transcribedText
		convertResult.IsAudio = false
		convertResult.AudioData = nil
	}

	// Step 2: Store images and update markdown references
	var storedImages []docparser.StoredImage

	if s.imageResolver != nil && convertResult != nil {
		fileSvc := s.resolveFileService(ctx, kb)
		tenantID, _ := ctx.Value(types.TenantIDContextKey).(uint64)
		updatedMarkdown, images, resolveErr := s.imageResolver.ResolveAndStore(ctx, convertResult, fileSvc, tenantID)
		if resolveErr != nil {
			logger.Warnf(ctx, "Image resolution partially failed: %v", resolveErr)
		}
		if updatedMarkdown != "" {
			convertResult.MarkdownContent = updatedMarkdown
		}
		storedImages = images

		// Resolve remote http(s) images (e.g. markdown external URLs) → download + upload to storage.
		// ResolveAndStore handles inline bytes and base64; ResolveRemoteImages handles http/https URLs.
		updatedContent, remoteImages, remoteErr := s.imageResolver.ResolveRemoteImages(ctx, convertResult.MarkdownContent, fileSvc, tenantID)
		if remoteErr != nil {
			logger.Warnf(ctx, "Remote image resolution partially failed: %v", remoteErr)
		}
		if len(remoteImages) > 0 {
			logger.Infof(ctx, "Resolved %d remote images for knowledge %s", len(remoteImages), knowledge.ID)
			convertResult.MarkdownContent = updatedContent
			storedImages = append(storedImages, remoteImages...)
		}

		logger.Infof(ctx, "Resolved %d total images for knowledge %s", len(storedImages), knowledge.ID)
	}

	// Step 3: Split into chunks using Go chunker
	chunkCfg := buildSplitterConfig(kb)

	processOpts := ProcessChunksOptions{
		EnableQuestionGeneration: payload.EnableQuestionGeneration,
		QuestionCount:            payload.QuestionCount,
		EnableMultimodel:         payload.EnableMultimodel,
		StoredImages:             storedImages,
	}

	if convertResult != nil {
		processOpts.Metadata = convertResult.Metadata
	}

	if kb.ChunkingConfig.EnableParentChild {
		parentCfg, childCfg := buildParentChildConfigs(kb.ChunkingConfig, chunkCfg)
		pcResult := chunker.SplitParentChild(convertResult.MarkdownContent, parentCfg, childCfg)
		chunks = make([]types.ParsedChunk, len(pcResult.Children))
		for i, c := range pcResult.Children {
			chunks[i] = types.ParsedChunk{
				Content:       c.Content,
				ContextHeader: c.ContextHeader,
				Seq:           c.Seq,
				Start:         c.Start,
				End:           c.End,
				ParentIndex:   c.ParentIndex,
			}
		}
		parentChunks := make([]types.ParsedParentChunk, len(pcResult.Parents))
		for i, p := range pcResult.Parents {
			parentChunks[i] = types.ParsedParentChunk{Content: p.Content, Seq: p.Seq, Start: p.Start, End: p.End}
		}
		processOpts.ParentChunks = parentChunks
		logger.Infof(ctx, "Split document into %d parent + %d child chunks for knowledge %s",
			len(pcResult.Parents), len(pcResult.Children), knowledge.ID)
	} else {
		splitChunks := chunker.Split(convertResult.MarkdownContent, chunkCfg)
		chunks = make([]types.ParsedChunk, len(splitChunks))
		for i, c := range splitChunks {
			chunks[i] = types.ParsedChunk{
				Content:       c.Content,
				ContextHeader: c.ContextHeader,
				Seq:           c.Seq,
				Start:         c.Start,
				End:           c.End,
			}
		}
		logger.Infof(ctx, "Split document into %d chunks for knowledge %s", len(chunks), knowledge.ID)
	}

	// Step 4: Process chunks (vectorize + index + enqueue async tasks)
	s.processChunks(ctx, kb, knowledge, chunks, processOpts)

	return nil
}

// convert handles both file and URL reading using a unified ReadRequest.
func (s *knowledgeService) convert(
	ctx context.Context,
	payload types.DocumentProcessPayload,
	kb *types.KnowledgeBase,
	knowledge *types.Knowledge,
	isLastRetry bool,
) (*types.ReadResult, error) {
	// Stage tracking: docreader. Mark the stage as running here so the
	// timeline reflects "DocReader" the moment a worker picks the task
	// up — before that, the stage stays "pending" from the initial
	// upload. Failure/skip transitions are emitted at the specific
	// failure points below; success is emitted at the bottom.
	docInput := types.JSONMap{
		"file_name": payload.FileName,
		"file_type": payload.FileType,
		"is_url":    payload.URL != "",
	}
	if payload.URL != "" {
		docInput["url"] = payload.URL
	}
	s.beginStage(ctx, knowledge.ID, types.StageDocReader, docInput)
	isURL := payload.URL != ""
	fileType := payload.FileType
	overrides := s.getParserEngineOverridesFromContext(ctx)

	if isURL {
		if err := secutils.ValidateURLForSSRF(payload.URL); err != nil {
			logger.Errorf(ctx, "URL rejected for SSRF protection: %s, err: %v", payload.URL, err)
			knowledge.ParseStatus = "failed"
			knowledge.ErrorMessage = "URL is not allowed for security reasons"
			knowledge.UpdatedAt = time.Now()
			s.repo.UpdateKnowledge(ctx, knowledge)
			s.failStage(ctx, knowledge.ID, types.StageDocReader,
				werrors.ErrCodeDocReaderParseFailed, "URL rejected for security reasons", err)
			return nil, nil
		}
	}

	parserEngine := kb.ChunkingConfig.ResolveParserEngine(fileType)
	if isURL {
		parserEngine = kb.ChunkingConfig.ResolveParserEngine("url")
	}

	logger.Infof(ctx, "[convert] kb=%s fileType=%s isURL=%v engine=%q rules=%+v",
		kb.ID, fileType, isURL, parserEngine, kb.ChunkingConfig.ParserEngineRules)

	var reader interfaces.DocReader = s.resolveDocReader(ctx, parserEngine, fileType, isURL, overrides)
	if reader == nil {
		logger.Errorf(ctx, "[convert] no doc reader for kb=%s knowledge=%s fileType=%s engine=%q isURL=%v",
			kb.ID, knowledge.ID, fileType, parserEngine, isURL)
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = "Document parsing service is not configured. Please use text/paragraph import or set DOCREADER_ADDR."
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		s.failStage(ctx, knowledge.ID, types.StageDocReader,
			werrors.ErrCodeDocReaderUnavailable, knowledge.ErrorMessage, nil)
		return nil, nil
	}

	req := &types.ReadRequest{
		URL:                   payload.URL,
		Title:                 knowledge.Title,
		ParserEngine:          parserEngine,
		RequestID:             payload.RequestId,
		ParserEngineOverrides: overrides,
	}

	if !isURL {
		fileReader, err := s.resolveFileServiceForPath(ctx, kb, payload.FilePath).GetFile(ctx, payload.FilePath)
		if err != nil {
			s.failStage(ctx, knowledge.ID, types.StageDocReader,
				werrors.ErrCodeDocReaderParseFailed, "failed to get file", err)
			return s.failKnowledge(ctx, knowledge, isLastRetry, "failed to get file: %v", err)
		}
		defer fileReader.Close()
		contentBytes, err := io.ReadAll(fileReader)
		if err != nil {
			s.failStage(ctx, knowledge.ID, types.StageDocReader,
				werrors.ErrCodeDocReaderParseFailed, "failed to read file", err)
			return s.failKnowledge(ctx, knowledge, isLastRetry, "failed to read file: %v", err)
		}
		req.FileContent = contentBytes
		req.FileName = payload.FileName
		req.FileType = fileType
	}

	result, err := s.callDocReaderWithTimeout(ctx, reader, req)
	if err != nil {
		// Distinguish DocReader timeout (a knowable user-facing
		// failure) from generic read errors so the UI can suggest
		// "split this large file" specifically when relevant.
		code := werrors.ErrCodeDocReaderParseFailed
		if errors.Is(err, context.DeadlineExceeded) || strings.Contains(err.Error(), "docreader call timeout") {
			code = werrors.ErrCodeDocReaderTimeout
		}
		s.failStage(ctx, knowledge.ID, types.StageDocReader,
			code, "document read failed", err)
		return s.failKnowledge(ctx, knowledge, isLastRetry, "document read failed: %v", err)
	}
	if result.Error != "" {
		logger.Errorf(ctx, "[convert] parser returned error kb=%s knowledge=%s file=%q type=%s engine=%q: %s",
			kb.ID, knowledge.ID, req.FileName, fileType, parserEngine, result.Error)
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = result.Error
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
		s.failStage(ctx, knowledge.ID, types.StageDocReader,
			werrors.ErrCodeDocReaderParseFailed, result.Error, nil)
		return nil, nil
	}
	docOutput := types.JSONMap{
		"text_length":  len(result.MarkdownContent),
		"images_found": len(result.ImageRefs),
		"is_audio":     result.IsAudio,
	}
	if pages := result.Metadata["pages"]; pages != "" {
		docOutput["pages"] = pages
	}
	s.endStage(ctx, knowledge.ID, types.StageDocReader, docOutput)
	return result, nil
}

// callDocReaderWithTimeout wraps the DocReader RPC in a child context whose
// deadline is min(parent_deadline, DocReaderCallTimeout). Without this cap,
// a hung docreader (network partition, GC pause, OCR runaway) silently
// burns the whole DocumentProcessTimeout budget and pins a worker for hours
// — the #1 cause of "knowledge stuck in processing" reports.
//
// On timeout we annotate the error so retries / dead-letter consumers can
// distinguish "docreader was slow" from "docreader returned an error".
func (s *knowledgeService) callDocReaderWithTimeout(
	ctx context.Context, reader interfaces.DocReader, req *types.ReadRequest,
) (*types.ReadResult, error) {
	timeout := 30 * time.Minute
	if s.config != nil && s.config.KnowledgeBase != nil && s.config.KnowledgeBase.DocReaderCallTimeout > 0 {
		timeout = s.config.KnowledgeBase.DocReaderCallTimeout
	}
	callCtx, cancel := context.WithTimeout(ctx, timeout)
	defer cancel()

	start := time.Now()
	result, err := reader.Read(callCtx, req)
	elapsed := time.Since(start)
	if err != nil {
		// Promote DeadlineExceeded into a clearer message; retain underlying
		// error via %w so errors.Is(callCtx.Err(), context.DeadlineExceeded)
		// still works for upstream classification.
		if errors.Is(callCtx.Err(), context.DeadlineExceeded) && !errors.Is(ctx.Err(), context.DeadlineExceeded) {
			logger.Errorf(ctx, "[convert] docreader call timed out after %s (limit %s) for %q",
				elapsed, timeout, req.FileName)
			return nil, fmt.Errorf("docreader call timeout after %s: %w", timeout, err)
		}
		return nil, err
	}
	logger.Infof(ctx, "[convert] docreader call ok in %s for %q", elapsed, req.FileName)
	return result, nil
}

// isLikelyRateLimitError performs a fuzzy classification of an error as a
// rate-limit / quota / backpressure failure. We only need a hint — the
// caller maps to one of two error_codes so the UI can offer "retry later"
// vs. "fix configuration" advice. False positives are harmless (the
// detail is preserved in error_detail anyway).
func isLikelyRateLimitError(err error) bool {
	if err == nil {
		return false
	}
	msg := strings.ToLower(err.Error())
	for _, needle := range []string{"rate limit", "ratelimit", "429", "too many requests", "quota"} {
		if strings.Contains(msg, needle) {
			return true
		}
	}
	return false
}

// Returns nil when the required service is unavailable.
func (s *knowledgeService) resolveDocReader(ctx context.Context, engine, fileType string, isURL bool, overrides map[string]string) interfaces.DocReader {
	switch engine {
	case docparser.SimpleEngineName:
		return &docparser.SimpleFormatReader{}
	case docparser.WeKnoraCloudEngineName:
		creds := s.tenantService.GetWeKnoraCloudCredentials(ctx)
		if creds == nil {
			logger.Warnf(ctx, "[resolveDocReader] WeKnoraCloud: no tenant credentials (fileType=%s)", fileType)
			return nil
		}
		reader, err := docparser.NewWeKnoraCloudSignedDocumentReader(creds.AppID, creds.AppSecret)
		if err != nil {
			logger.Errorf(ctx, "[resolveDocReader] WeKnoraCloud reader init failed: %v", err)
			return nil
		}
		return reader
	case "mineru":
		return docparser.NewMinerUReader(overrides)
	case "mineru_cloud":
		return docparser.NewMinerUCloudReader(overrides)
	case "paddleocr_vl":
		return docparser.NewPaddleOCRVLReader(overrides)
	case "paddleocr_vl_cloud":
		return docparser.NewPaddleOCRVLCloudReader(overrides)
	case "builtin":
		// 明确指定使用 builtin 引擎（docreader），不使用 simple format 兜底
		return s.documentReader
	default:
		// 未指定引擎时的兜底逻辑：simple format 使用 Go 原生处理，其他使用 docreader
		if !isURL && docparser.IsSimpleFormat(fileType) {
			return &docparser.SimpleFormatReader{}
		}
		return s.documentReader
	}
}

// failKnowledge marks knowledge as failed (only on last retry) and returns an error.
func (s *knowledgeService) failKnowledge(
	ctx context.Context,
	knowledge *types.Knowledge,
	isLastRetry bool,
	format string,
	args ...interface{},
) (*types.ReadResult, error) {
	errMsg := fmt.Sprintf(format, args...)
	if isLastRetry {
		knowledge.ParseStatus = "failed"
		knowledge.ErrorMessage = errMsg
		knowledge.UpdatedAt = time.Now()
		s.repo.UpdateKnowledge(ctx, knowledge)
	}
	return nil, fmt.Errorf(format, args...)
}

// enqueueImageMultimodalTasks enqueues asynq tasks for multimodal image processing.
func (s *knowledgeService) enqueueImageMultimodalTasks(
	ctx context.Context,
	knowledge *types.Knowledge,
	kb *types.KnowledgeBase,
	images []docparser.StoredImage,
	chunks []types.ParsedChunk,
	metadata map[string]string,
) {
	if s.task == nil || len(images) == 0 {
		return
	}

	attempt := attemptFromCtx(ctx)
	redisKey := fmt.Sprintf("multimodal:pending:%s", knowledge.ID)
	if s.redisClient != nil {
		if err := s.redisClient.Set(ctx, redisKey, len(images), 24*time.Hour).Err(); err != nil {
			logger.Warnf(ctx, "Failed to set multimodal pending count for %s: %v", knowledge.ID, err)
		}
	}

	for idx, img := range images {
		// Match image to the ParsedChunk whose content contains the image URL.
		// ChunkID was populated by processChunks with the real DB UUID.
		chunkID := ""
		for _, c := range chunks {
			if strings.Contains(c.Content, img.ServingURL) {
				chunkID = c.ChunkID
				break
			}
		}
		if chunkID == "" && len(chunks) > 0 {
			chunkID = chunks[0].ChunkID
		}

		lang, _ := types.LanguageFromContext(ctx)
		payload := types.ImageMultimodalPayload{
			TenantID:        knowledge.TenantID,
			KnowledgeID:     knowledge.ID,
			KnowledgeBaseID: kb.ID,
			ChunkID:         chunkID,
			ImageURL:        img.ServingURL,
			EnableOCR:       true,
			EnableCaption:   true,
			Language:        lang,
			ImageSourceType: metadata["image_source_type"],
			Attempt:         attempt,
			ImageIndex:      idx,
		}

		langfuse.InjectTracing(ctx, &payload)
		payloadBytes, err := json.Marshal(payload)
		if err != nil {
			logger.Warnf(ctx, "Failed to marshal image multimodal payload: %v", err)
			continue
		}

		task := asynq.NewTask(types.TypeImageMultimodal, payloadBytes, asynq.Queue(types.QueueMultimodal))
		if _, err := s.task.Enqueue(task); err != nil {
			logger.Warnf(ctx, "Failed to enqueue image multimodal task for %s: %v", img.ServingURL, err)
		} else {
			logger.Infof(ctx, "Enqueued image:multimodal task for %s", img.ServingURL)
		}
	}
}