Add image extractor options

2025-05-02 22:17:06 +08:00 · 2019-03-08 18:59:23 +02:00 · 2019-03-08 18:59:23 +02:00 · 7012c8b097
commit 7012c8b097
parent 0dc0219e8f
1 changed files with 25 additions and 2 deletions
--- a/pdf/extractor/image.go
+++ b/pdf/extractor/image.go
@ -12,10 +12,21 @@ import (
 	"github.com/unidoc/unidoc/pdf/model"
 )

+// ExtractImagesOpts contains options for controlling image extraction from
+// PDF pages.
+type ExtractPageImagesOpts struct {
+	IncludeInlineStencilMasks bool
+}
+
 // ExtractPageImages returns the image contents of the page extractor, including data
 // and position, size information for each image.
-func (e *Extractor) ExtractPageImages() (*PageImages, error) {
-	ctx := &imageExtractContext{}
+// A set of options to control page image extraction can be passed in. The opts
+// parameter can be nil for the default options. By default, inline stencil masks
+// are not extracted.
+func (e *Extractor) ExtractPageImages(opts *ExtractPageImagesOpts) (*PageImages, error) {
+	ctx := &imageExtractContext{
+		opts: opts,
+	}

 	err := ctx.extractContentStreamImages(e.contents, e.resources)
 	if err != nil {
@ -59,6 +70,9 @@ type imageExtractContext struct {

 	// Cache to avoid processing same image many times.
 	cacheXObjectImages map[*core.PdfObjectStream]*cachedImage
+
+	// Extract options.
+	opts *ExtractPageImagesOpts
 }

 type cachedImage struct {
@ -76,6 +90,9 @@ func (ctx *imageExtractContext) extractContentStreamImages(contents string, reso
 	if ctx.cacheXObjectImages == nil {
 		ctx.cacheXObjectImages = map[*core.PdfObjectStream]*cachedImage{}
 	}
+	if ctx.opts == nil {
+		ctx.opts = &ExtractPageImagesOpts{}
+	}

 	processor := contentstream.NewContentStreamProcessor(*operations)
 	processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "",
@ -95,6 +112,12 @@ func (ctx *imageExtractContext) processOperand(op *contentstream.ContentStreamOp
 			return nil
 		}

+		if isImageMask, ok := core.GetBoolVal(iimg.ImageMask); ok {
+			if isImageMask && !ctx.opts.IncludeInlineStencilMasks {
+				return nil
+			}
+		}
+
 		return ctx.extractInlineImage(iimg, gs, resources)
 	} else if op.Operand == "Do" && len(op.Params) == 1 {
 		// Do: XObject.