From 7012c8b0972c5e332b41f78cc8d179731fec4d7f Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Fri, 8 Mar 2019 18:59:23 +0200 Subject: [PATCH] Add image extractor options --- pdf/extractor/image.go | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/pdf/extractor/image.go b/pdf/extractor/image.go index d9550b3f..4adaf376 100644 --- a/pdf/extractor/image.go +++ b/pdf/extractor/image.go @@ -12,10 +12,21 @@ import ( "github.com/unidoc/unidoc/pdf/model" ) +// ExtractImagesOpts contains options for controlling image extraction from +// PDF pages. +type ExtractPageImagesOpts struct { + IncludeInlineStencilMasks bool +} + // ExtractPageImages returns the image contents of the page extractor, including data // and position, size information for each image. -func (e *Extractor) ExtractPageImages() (*PageImages, error) { - ctx := &imageExtractContext{} +// A set of options to control page image extraction can be passed in. The opts +// parameter can be nil for the default options. By default, inline stencil masks +// are not extracted. +func (e *Extractor) ExtractPageImages(opts *ExtractPageImagesOpts) (*PageImages, error) { + ctx := &imageExtractContext{ + opts: opts, + } err := ctx.extractContentStreamImages(e.contents, e.resources) if err != nil { @@ -59,6 +70,9 @@ type imageExtractContext struct { // Cache to avoid processing same image many times. cacheXObjectImages map[*core.PdfObjectStream]*cachedImage + + // Extract options. + opts *ExtractPageImagesOpts } type cachedImage struct { @@ -76,6 +90,9 @@ func (ctx *imageExtractContext) extractContentStreamImages(contents string, reso if ctx.cacheXObjectImages == nil { ctx.cacheXObjectImages = map[*core.PdfObjectStream]*cachedImage{} } + if ctx.opts == nil { + ctx.opts = &ExtractPageImagesOpts{} + } processor := contentstream.NewContentStreamProcessor(*operations) processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "", @@ -95,6 +112,12 @@ func (ctx *imageExtractContext) processOperand(op *contentstream.ContentStreamOp return nil } + if isImageMask, ok := core.GetBoolVal(iimg.ImageMask); ok { + if isImageMask && !ctx.opts.IncludeInlineStencilMasks { + return nil + } + } + return ctx.extractInlineImage(iimg, gs, resources) } else if op.Operand == "Do" && len(op.Params) == 1 { // Do: XObject.