From d55b52652cfaeae915f696e82e66621f61cf7f52 Mon Sep 17 00:00:00 2001 From: wizardchen Date: Wed, 29 Apr 2026 11:11:54 +0800 Subject: [PATCH] fix(docparser): preserve standalone image uploads from icon filter When the uploaded file is itself an image, the image reference now carries an IsOriginal flag so ResolveAndStore skips the small-icon size filter. Otherwise small standalone images (e.g. avatars below 64x64) were silently dropped before reaching multimodal OCR/caption processing. --- internal/infrastructure/docparser/builtin_converter.go | 2 ++ internal/infrastructure/docparser/image_resolver.go | 7 +++++-- internal/types/docparser.go | 5 +++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/internal/infrastructure/docparser/builtin_converter.go b/internal/infrastructure/docparser/builtin_converter.go index 3779cfa7..0cf3860a 100644 --- a/internal/infrastructure/docparser/builtin_converter.go +++ b/internal/infrastructure/docparser/builtin_converter.go @@ -98,6 +98,7 @@ func imageToResult(fileName string, data []byte) *types.ReadResult { OriginalRef: safeRef, MimeType: mime, ImageData: data, + IsOriginal: true, }, }, } @@ -170,6 +171,7 @@ func ensureOriginalImageRef(req *types.ReadRequest, mdContent string, imageRefs OriginalRef: refPath, MimeType: mime, ImageData: req.FileContent, + IsOriginal: true, }) return mdContent, imageRefs diff --git a/internal/infrastructure/docparser/image_resolver.go b/internal/infrastructure/docparser/image_resolver.go index eebe30b4..9c2b35bf 100644 --- a/internal/infrastructure/docparser/image_resolver.go +++ b/internal/infrastructure/docparser/image_resolver.go @@ -126,8 +126,11 @@ func (r *ImageResolver) ResolveAndStore( continue } - // Filter out small icons and decorative images - if isIconImage(ref.ImageData) { + // Filter out small icons and decorative images. Skip the filter + // when the reference is the originally uploaded file itself, so + // that a standalone image upload is never silently dropped even + // if its dimensions are below the icon threshold. + if !ref.IsOriginal && isIconImage(ref.ImageData) { // Remove the image reference from markdown entirely markdown = markdown[:m[0]] + markdown[m[1]:] continue diff --git a/internal/types/docparser.go b/internal/types/docparser.go index 861a8af4..b2e1f164 100644 --- a/internal/types/docparser.go +++ b/internal/types/docparser.go @@ -31,6 +31,11 @@ type ImageRef struct { MimeType string StorageKey string ImageData []byte // inline image bytes (universal fallback for cross-machine deployments) + // IsOriginal marks references that point to the originally uploaded file + // itself (e.g. when the user uploads a standalone image). Such references + // must not be dropped by the icon/size filter — otherwise a small image + // upload would be silently discarded before multimodal processing. + IsOriginal bool } // ParserEngineInfo describes a registered parser engine.