Changes to make the lazy reader work on the PaperCut corpus (#194)

This commit is contained in:
Peter Williams 2019-10-29 07:49:07 +11:00 committed by Gunnsteinn Hall
parent ddbb5671e3
commit 362ba7349d
9 changed files with 31 additions and 60 deletions

View File

@ -88,7 +88,7 @@ func newFlateEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decod
if decodeParams == nil {
obj := inlineImage.DecodeParms
if obj != nil {
dp, isDict := obj.(*core.PdfObjectDictionary)
dp, isDict := core.GetDict(obj)
if !isDict {
common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj)
return nil, fmt.Errorf("invalid DecodeParms")
@ -163,7 +163,7 @@ func newLZWEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeP
// If decodeParams not provided, see if we can get from the inline image directly.
if decodeParams == nil {
if inlineImage.DecodeParms != nil {
dp, isDict := inlineImage.DecodeParms.(*core.PdfObjectDictionary)
dp, isDict := core.GetDict(inlineImage.DecodeParms)
if !isDict {
common.Log.Debug("Error: DecodeParms not a dictionary (%T)", inlineImage.DecodeParms)
return nil, fmt.Errorf("invalid DecodeParms")

View File

@ -316,7 +316,7 @@ func (csp *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, e
if !isOperand {
// Not an operand.. Read key value properties..
param, ok := obj.(*core.PdfObjectName)
param, ok := core.GetName(obj)
if !ok {
common.Log.Debug("Invalid inline image property (expecting name) - %T", obj)
return nil, fmt.Errorf("invalid inline image property (expecting name) - %T", obj)

View File

@ -7,6 +7,7 @@ package contentstream
import (
"errors"
"fmt"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
@ -141,7 +142,7 @@ func (proc *ContentStreamProcessor) getColorspace(name string, resources *model.
// Otherwise unsupported.
common.Log.Debug("Unknown colorspace requested: %s", name)
return nil, errors.New("unsupported colorspace")
return nil, fmt.Errorf("unsupported colorspace: %s", name)
}
// Get initial color for a given colorspace.

View File

@ -443,7 +443,7 @@ func (enc *FlateEncoder) EncodeBytes(data []byte) ([]byte, error) {
rowLength := int(enc.Columns)
rows := len(data) / rowLength
if len(data)%rowLength != 0 {
common.Log.Error("Invalid column length")
common.Log.Error("Invalid row length")
return nil, errors.New("invalid row length")
}
@ -590,7 +590,7 @@ func newLZWEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObject
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
obj := TraceToDirectObject(encDict.Get("DecodeParms"))
if obj != nil {
if dp, isDict := obj.(*PdfObjectDictionary); isDict {
decodeParams = dp
@ -1751,7 +1751,7 @@ func newCCITTFaxEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfO
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
obj := TraceToDirectObject(encDict.Get("DecodeParms"))
if obj != nil {
switch t := obj.(type) {
case *PdfObjectDictionary:
@ -2061,7 +2061,7 @@ func newJBIG2EncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObje
if decodeParams != nil {
if globals := decodeParams.Get("JBIG2Globals"); globals != nil {
globalsStream, ok := globals.(*PdfObjectStream)
globalsStream, ok := GetStream(globals)
if !ok {
err := errors.New("the Globals stream should be an Object Stream")
common.Log.Debug("ERROR: %s", err.Error())

View File

@ -22,52 +22,26 @@ func IsFloatDigit(c byte) bool {
// IsDecimalDigit checks if the character is a part of a decimal number string.
func IsDecimalDigit(c byte) bool {
if c >= '0' && c <= '9' {
return true
}
return false
return '0' <= c && c <= '9'
}
// IsOctalDigit checks if a character can be part of an octal digit string.
func IsOctalDigit(c byte) bool {
if c >= '0' && c <= '7' {
return true
}
return false
return '0' <= c && c <= '7'
}
// IsPrintable checks if a character is printable.
// Regular characters that are outside the range EXCLAMATION MARK(21h)
// (!) to TILDE (7Eh) (~) should be written using the hexadecimal notation.
func IsPrintable(char byte) bool {
if char < 0x21 || char > 0x7E {
return false
}
return true
func IsPrintable(c byte) bool {
return 0x21 <= c && c <= 0x7E
}
// IsDelimiter checks if a character represents a delimiter.
func IsDelimiter(char byte) bool {
if char == '(' || char == ')' {
return true
}
if char == '<' || char == '>' {
return true
}
if char == '[' || char == ']' {
return true
}
if char == '{' || char == '}' {
return true
}
if char == '/' {
return true
}
if char == '%' {
return true
}
return false
func IsDelimiter(c byte) bool {
return c == '(' || c == ')' ||
c == '<' || c == '>' ||
c == '[' || c == ']' ||
c == '{' || c == '}' ||
c == '/' || c == '%'
}

View File

@ -905,7 +905,7 @@ func newPdfPageResourcesColorspacesFromPdfObject(obj core.PdfObject) (*PdfPageRe
obj = indObj.PdfObject
}
dict, ok := obj.(*core.PdfObjectDictionary)
dict, ok := core.GetDict(obj)
if !ok {
return nil, errors.New("CS attribute type error")
}

View File

@ -162,7 +162,7 @@ func newPdfPatternFromPdfObject(container core.PdfObject) (*PdfPattern, error) {
pattern := &PdfPattern{}
var dict *core.PdfObjectDictionary
if indObj, is := container.(*core.PdfIndirectObject); is {
if indObj, is := core.GetIndirect(container); is {
pattern.container = indObj
d, ok := indObj.PdfObject.(*core.PdfObjectDictionary)
if !ok {
@ -170,11 +170,11 @@ func newPdfPatternFromPdfObject(container core.PdfObject) (*PdfPattern, error) {
return nil, core.ErrTypeError
}
dict = d
} else if streamObj, is := container.(*core.PdfObjectStream); is {
} else if streamObj, is := core.GetStream(container); is {
pattern.container = streamObj
dict = streamObj.PdfObjectDictionary
} else {
common.Log.Debug("Pattern not an indirect object or stream")
common.Log.Debug("Pattern not an indirect object or stream. %T", container)
return nil, core.ErrTypeError
}

View File

@ -335,11 +335,7 @@ func (r *PdfPageResources) SetColorspaceByName(keyName core.PdfObjectName, cs Pd
// HasXObjectByName checks if an XObject with a specified keyName is defined.
func (r *PdfPageResources) HasXObjectByName(keyName core.PdfObjectName) bool {
obj, _ := r.GetXObjectByName(keyName)
if obj != nil {
return true
}
return false
return obj != nil
}
// GenerateXObjectName generates an unused XObject name that can be used for
@ -362,10 +358,10 @@ type XObjectType int
// XObject types.
const (
XObjectTypeUndefined XObjectType = iota
XObjectTypeImage XObjectType = iota
XObjectTypeForm XObjectType = iota
XObjectTypePS XObjectType = iota
XObjectTypeUnknown XObjectType = iota
XObjectTypeImage
XObjectTypeForm
XObjectTypePS
XObjectTypeUnknown
)
// GetXObjectByName returns the XObject with the specified keyName and the object type.

View File

@ -141,7 +141,7 @@ func newPdfShadingFromPdfObject(obj core.PdfObject) (*PdfShading, error) {
shading := &PdfShading{}
var dict *core.PdfObjectDictionary
if indObj, isInd := obj.(*core.PdfIndirectObject); isInd {
if indObj, isInd := core.GetIndirect(obj); isInd {
shading.container = indObj
d, ok := indObj.PdfObject.(*core.PdfObjectDictionary)
@ -151,10 +151,10 @@ func newPdfShadingFromPdfObject(obj core.PdfObject) (*PdfShading, error) {
}
dict = d
} else if streamObj, isStream := obj.(*core.PdfObjectStream); isStream {
} else if streamObj, isStream := core.GetStream(obj); isStream {
shading.container = streamObj
dict = streamObj.PdfObjectDictionary
} else if d, isDict := obj.(*core.PdfObjectDictionary); isDict {
} else if d, isDict := core.GetDict(obj); isDict {
shading.container = d
dict = d
} else {