Add image extractor options

This commit is contained in:
Adrian-George Bostan 2019-03-08 18:59:23 +02:00
parent 0dc0219e8f
commit 7012c8b097

View File

@ -12,10 +12,21 @@ import (
"github.com/unidoc/unidoc/pdf/model" "github.com/unidoc/unidoc/pdf/model"
) )
// ExtractImagesOpts contains options for controlling image extraction from
// PDF pages.
type ExtractPageImagesOpts struct {
IncludeInlineStencilMasks bool
}
// ExtractPageImages returns the image contents of the page extractor, including data // ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image. // and position, size information for each image.
func (e *Extractor) ExtractPageImages() (*PageImages, error) { // A set of options to control page image extraction can be passed in. The opts
ctx := &imageExtractContext{} // parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (e *Extractor) ExtractPageImages(opts *ExtractPageImagesOpts) (*PageImages, error) {
ctx := &imageExtractContext{
opts: opts,
}
err := ctx.extractContentStreamImages(e.contents, e.resources) err := ctx.extractContentStreamImages(e.contents, e.resources)
if err != nil { if err != nil {
@ -59,6 +70,9 @@ type imageExtractContext struct {
// Cache to avoid processing same image many times. // Cache to avoid processing same image many times.
cacheXObjectImages map[*core.PdfObjectStream]*cachedImage cacheXObjectImages map[*core.PdfObjectStream]*cachedImage
// Extract options.
opts *ExtractPageImagesOpts
} }
type cachedImage struct { type cachedImage struct {
@ -76,6 +90,9 @@ func (ctx *imageExtractContext) extractContentStreamImages(contents string, reso
if ctx.cacheXObjectImages == nil { if ctx.cacheXObjectImages == nil {
ctx.cacheXObjectImages = map[*core.PdfObjectStream]*cachedImage{} ctx.cacheXObjectImages = map[*core.PdfObjectStream]*cachedImage{}
} }
if ctx.opts == nil {
ctx.opts = &ExtractPageImagesOpts{}
}
processor := contentstream.NewContentStreamProcessor(*operations) processor := contentstream.NewContentStreamProcessor(*operations)
processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "", processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "",
@ -95,6 +112,12 @@ func (ctx *imageExtractContext) processOperand(op *contentstream.ContentStreamOp
return nil return nil
} }
if isImageMask, ok := core.GetBoolVal(iimg.ImageMask); ok {
if isImageMask && !ctx.opts.IncludeInlineStencilMasks {
return nil
}
}
return ctx.extractInlineImage(iimg, gs, resources) return ctx.extractInlineImage(iimg, gs, resources)
} else if op.Operand == "Do" && len(op.Params) == 1 { } else if op.Operand == "Do" && len(op.Params) == 1 {
// Do: XObject. // Do: XObject.