From 362ba7349d42067d56d3ac3355a9570ea7df6e49 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 29 Oct 2019 07:49:07 +1100 Subject: [PATCH] Changes to make the lazy reader work on the PaperCut corpus (#194) --- contentstream/encoding.go | 4 +-- contentstream/inline-image.go | 2 +- contentstream/processor.go | 3 ++- core/encoding.go | 8 +++--- core/symbols.go | 46 ++++++++--------------------------- model/page.go | 2 +- model/pattern.go | 6 ++--- model/resources.go | 14 ++++------- model/shading.go | 6 ++--- 9 files changed, 31 insertions(+), 60 deletions(-) diff --git a/contentstream/encoding.go b/contentstream/encoding.go index 05f1a7dd..bd9057a8 100644 --- a/contentstream/encoding.go +++ b/contentstream/encoding.go @@ -88,7 +88,7 @@ func newFlateEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decod if decodeParams == nil { obj := inlineImage.DecodeParms if obj != nil { - dp, isDict := obj.(*core.PdfObjectDictionary) + dp, isDict := core.GetDict(obj) if !isDict { common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj) return nil, fmt.Errorf("invalid DecodeParms") @@ -163,7 +163,7 @@ func newLZWEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeP // If decodeParams not provided, see if we can get from the inline image directly. if decodeParams == nil { if inlineImage.DecodeParms != nil { - dp, isDict := inlineImage.DecodeParms.(*core.PdfObjectDictionary) + dp, isDict := core.GetDict(inlineImage.DecodeParms) if !isDict { common.Log.Debug("Error: DecodeParms not a dictionary (%T)", inlineImage.DecodeParms) return nil, fmt.Errorf("invalid DecodeParms") diff --git a/contentstream/inline-image.go b/contentstream/inline-image.go index fd6df07c..96eadbea 100644 --- a/contentstream/inline-image.go +++ b/contentstream/inline-image.go @@ -316,7 +316,7 @@ func (csp *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, e if !isOperand { // Not an operand.. Read key value properties.. - param, ok := obj.(*core.PdfObjectName) + param, ok := core.GetName(obj) if !ok { common.Log.Debug("Invalid inline image property (expecting name) - %T", obj) return nil, fmt.Errorf("invalid inline image property (expecting name) - %T", obj) diff --git a/contentstream/processor.go b/contentstream/processor.go index 1a3ce08b..ccfb13e4 100644 --- a/contentstream/processor.go +++ b/contentstream/processor.go @@ -7,6 +7,7 @@ package contentstream import ( "errors" + "fmt" "github.com/unidoc/unipdf/v3/common" "github.com/unidoc/unipdf/v3/core" @@ -141,7 +142,7 @@ func (proc *ContentStreamProcessor) getColorspace(name string, resources *model. // Otherwise unsupported. common.Log.Debug("Unknown colorspace requested: %s", name) - return nil, errors.New("unsupported colorspace") + return nil, fmt.Errorf("unsupported colorspace: %s", name) } // Get initial color for a given colorspace. diff --git a/core/encoding.go b/core/encoding.go index 31517ac1..b268e970 100644 --- a/core/encoding.go +++ b/core/encoding.go @@ -443,7 +443,7 @@ func (enc *FlateEncoder) EncodeBytes(data []byte) ([]byte, error) { rowLength := int(enc.Columns) rows := len(data) / rowLength if len(data)%rowLength != 0 { - common.Log.Error("Invalid column length") + common.Log.Error("Invalid row length") return nil, errors.New("invalid row length") } @@ -590,7 +590,7 @@ func newLZWEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObject // If decodeParams not provided, see if we can get from the stream. if decodeParams == nil { - obj := encDict.Get("DecodeParms") + obj := TraceToDirectObject(encDict.Get("DecodeParms")) if obj != nil { if dp, isDict := obj.(*PdfObjectDictionary); isDict { decodeParams = dp @@ -1751,7 +1751,7 @@ func newCCITTFaxEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfO // If decodeParams not provided, see if we can get from the stream. if decodeParams == nil { - obj := encDict.Get("DecodeParms") + obj := TraceToDirectObject(encDict.Get("DecodeParms")) if obj != nil { switch t := obj.(type) { case *PdfObjectDictionary: @@ -2061,7 +2061,7 @@ func newJBIG2EncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObje if decodeParams != nil { if globals := decodeParams.Get("JBIG2Globals"); globals != nil { - globalsStream, ok := globals.(*PdfObjectStream) + globalsStream, ok := GetStream(globals) if !ok { err := errors.New("the Globals stream should be an Object Stream") common.Log.Debug("ERROR: %s", err.Error()) diff --git a/core/symbols.go b/core/symbols.go index 4fd5aef5..dbd22f79 100644 --- a/core/symbols.go +++ b/core/symbols.go @@ -22,52 +22,26 @@ func IsFloatDigit(c byte) bool { // IsDecimalDigit checks if the character is a part of a decimal number string. func IsDecimalDigit(c byte) bool { - if c >= '0' && c <= '9' { - return true - } - - return false + return '0' <= c && c <= '9' } // IsOctalDigit checks if a character can be part of an octal digit string. func IsOctalDigit(c byte) bool { - if c >= '0' && c <= '7' { - return true - } - - return false + return '0' <= c && c <= '7' } // IsPrintable checks if a character is printable. // Regular characters that are outside the range EXCLAMATION MARK(21h) // (!) to TILDE (7Eh) (~) should be written using the hexadecimal notation. -func IsPrintable(char byte) bool { - if char < 0x21 || char > 0x7E { - return false - } - return true +func IsPrintable(c byte) bool { + return 0x21 <= c && c <= 0x7E } // IsDelimiter checks if a character represents a delimiter. -func IsDelimiter(char byte) bool { - if char == '(' || char == ')' { - return true - } - if char == '<' || char == '>' { - return true - } - if char == '[' || char == ']' { - return true - } - if char == '{' || char == '}' { - return true - } - if char == '/' { - return true - } - if char == '%' { - return true - } - - return false +func IsDelimiter(c byte) bool { + return c == '(' || c == ')' || + c == '<' || c == '>' || + c == '[' || c == ']' || + c == '{' || c == '}' || + c == '/' || c == '%' } diff --git a/model/page.go b/model/page.go index 15af4eea..f69dd778 100644 --- a/model/page.go +++ b/model/page.go @@ -905,7 +905,7 @@ func newPdfPageResourcesColorspacesFromPdfObject(obj core.PdfObject) (*PdfPageRe obj = indObj.PdfObject } - dict, ok := obj.(*core.PdfObjectDictionary) + dict, ok := core.GetDict(obj) if !ok { return nil, errors.New("CS attribute type error") } diff --git a/model/pattern.go b/model/pattern.go index bd5c2c5e..2e2ccb40 100644 --- a/model/pattern.go +++ b/model/pattern.go @@ -162,7 +162,7 @@ func newPdfPatternFromPdfObject(container core.PdfObject) (*PdfPattern, error) { pattern := &PdfPattern{} var dict *core.PdfObjectDictionary - if indObj, is := container.(*core.PdfIndirectObject); is { + if indObj, is := core.GetIndirect(container); is { pattern.container = indObj d, ok := indObj.PdfObject.(*core.PdfObjectDictionary) if !ok { @@ -170,11 +170,11 @@ func newPdfPatternFromPdfObject(container core.PdfObject) (*PdfPattern, error) { return nil, core.ErrTypeError } dict = d - } else if streamObj, is := container.(*core.PdfObjectStream); is { + } else if streamObj, is := core.GetStream(container); is { pattern.container = streamObj dict = streamObj.PdfObjectDictionary } else { - common.Log.Debug("Pattern not an indirect object or stream") + common.Log.Debug("Pattern not an indirect object or stream. %T", container) return nil, core.ErrTypeError } diff --git a/model/resources.go b/model/resources.go index aee90eb9..a76496af 100644 --- a/model/resources.go +++ b/model/resources.go @@ -335,11 +335,7 @@ func (r *PdfPageResources) SetColorspaceByName(keyName core.PdfObjectName, cs Pd // HasXObjectByName checks if an XObject with a specified keyName is defined. func (r *PdfPageResources) HasXObjectByName(keyName core.PdfObjectName) bool { obj, _ := r.GetXObjectByName(keyName) - if obj != nil { - return true - } - - return false + return obj != nil } // GenerateXObjectName generates an unused XObject name that can be used for @@ -362,10 +358,10 @@ type XObjectType int // XObject types. const ( XObjectTypeUndefined XObjectType = iota - XObjectTypeImage XObjectType = iota - XObjectTypeForm XObjectType = iota - XObjectTypePS XObjectType = iota - XObjectTypeUnknown XObjectType = iota + XObjectTypeImage + XObjectTypeForm + XObjectTypePS + XObjectTypeUnknown ) // GetXObjectByName returns the XObject with the specified keyName and the object type. diff --git a/model/shading.go b/model/shading.go index ee175b3c..a611431b 100644 --- a/model/shading.go +++ b/model/shading.go @@ -141,7 +141,7 @@ func newPdfShadingFromPdfObject(obj core.PdfObject) (*PdfShading, error) { shading := &PdfShading{} var dict *core.PdfObjectDictionary - if indObj, isInd := obj.(*core.PdfIndirectObject); isInd { + if indObj, isInd := core.GetIndirect(obj); isInd { shading.container = indObj d, ok := indObj.PdfObject.(*core.PdfObjectDictionary) @@ -151,10 +151,10 @@ func newPdfShadingFromPdfObject(obj core.PdfObject) (*PdfShading, error) { } dict = d - } else if streamObj, isStream := obj.(*core.PdfObjectStream); isStream { + } else if streamObj, isStream := core.GetStream(obj); isStream { shading.container = streamObj dict = streamObj.PdfObjectDictionary - } else if d, isDict := obj.(*core.PdfObjectDictionary); isDict { + } else if d, isDict := core.GetDict(obj); isDict { shading.container = d dict = d } else {