Merge pull request #105 from peterwilliams97/xmaster

Fixed some bugs found while getting pdf_descibe.go to work
This commit is contained in:
Gunnsteinn Hall 2017-08-05 12:56:16 +00:00 committed by GitHub
commit 008469dfd2
8 changed files with 204 additions and 26 deletions

View File

@ -162,11 +162,11 @@ func (this *ContentStreamInlineImage) GetColorSpace(resources *PdfPageResources)
return nil, errors.New("Invalid type")
}
if *name == "G" {
if *name == "G" || *name == "DeviceGray" {
return NewPdfColorspaceDeviceGray(), nil
} else if *name == "RGB" {
} else if *name == "RGB" || *name == "DeviceRGB" {
return NewPdfColorspaceDeviceRGB(), nil
} else if *name == "CMYK" {
} else if *name == "CMYK" || *name == "DeviceCMYK" {
return NewPdfColorspaceDeviceCMYK(), nil
} else if *name == "I" {
return nil, errors.New("Unsupported Index colorspace")

View File

@ -135,6 +135,10 @@ func (csp *ContentStreamProcessor) getInitialColor(cs PdfColorspace) (PdfColor,
return NewPdfColorDeviceRGB(0.0, 0.0, 0.0), nil
case *PdfColorspaceDeviceCMYK:
return NewPdfColorDeviceCMYK(0.0, 0.0, 0.0, 1.0), nil
case *PdfColorspaceCalGray:
return NewPdfColorCalGray(0.0), nil
case *PdfColorspaceCalRGB:
return NewPdfColorCalRGB(0.0, 0.0, 0.0), nil
case *PdfColorspaceLab:
l := 0.0
a := 0.0

View File

@ -10,6 +10,7 @@ package core
// - FlateDecode
// - LZW
// - DCT Decode (JPEG)
// - RunLength
// - ASCII Hex
// - ASCII85
@ -33,12 +34,13 @@ import (
)
const (
StreamEncodingFilterNameFlate = "FlateDecode"
StreamEncodingFilterNameLZW = "LZWDecode"
StreamEncodingFilterNameDCT = "DCTDecode"
StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode"
StreamEncodingFilterNameASCII85 = "ASCII85Decode"
StreamEncodingFilterNameRaw = "Raw"
StreamEncodingFilterNameFlate = "FlateDecode"
StreamEncodingFilterNameLZW = "LZWDecode"
StreamEncodingFilterNameDCT = "DCTDecode"
StreamEncodingFilterNameRunLength = "RunLengthDecode"
StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode"
StreamEncodingFilterNameASCII85 = "ASCII85Decode"
StreamEncodingFilterNameRaw = "Raw"
)
const (
@ -141,7 +143,7 @@ func newFlateEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObje
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
obj := TraceToDirectObject(encDict.Get("DecodeParms"))
if obj != nil {
dp, isDict := obj.(*PdfObjectDictionary)
if !isDict {
@ -507,12 +509,19 @@ func newLZWEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObject
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
if obj != nil {
dp, isDict := obj.(*PdfObjectDictionary)
if !isDict {
common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj)
if dp, isDict := obj.(*PdfObjectDictionary); isDict {
decodeParams = dp
} else if a, isArr := obj.(*PdfObjectArray); isArr {
if len(*a) == 1 {
if dp, isDict := (*a)[0].(*PdfObjectDictionary); isDict {
decodeParams = dp
}
}
}
if decodeParams == nil {
common.Log.Error("DecodeParms not a dictionary %#v", obj)
return nil, fmt.Errorf("Invalid DecodeParms")
}
decodeParams = dp
}
}
@ -1057,6 +1066,148 @@ func (this *DCTEncoder) EncodeBytes(data []byte) ([]byte, error) {
return buf.Bytes(), nil
}
// Run length encoding.
type RunLengthEncoder struct {
}
// Make a new run length encoder
func NewRunLengthEncoder() *RunLengthEncoder {
return &RunLengthEncoder{}
}
func (this *RunLengthEncoder) GetFilterName() string {
return StreamEncodingFilterNameRunLength
}
// Create a new run length decoder from a stream object.
func newRunLengthEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*RunLengthEncoder, error) {
return NewRunLengthEncoder(), nil
}
/*
7.4.5 RunLengthDecode Filter
The RunLengthDecode filter decodes data that has been encoded in a simple byte-oriented format based on run length.
The encoded data shall be a sequence of runs, where each run shall consist of a length byte followed by 1 to 128
bytes of data. If the length byte is in the range 0 to 127, the following length + 1 (1 to 128) bytes shall be
copied literally during decompression. If length is in the range 129 to 255, the following single byte shall be
copied 257 - length (2 to 128) times during decompression. A length value of 128 shall denote EOD.
*/
func (this *RunLengthEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
bufReader := bytes.NewReader(encoded)
inb := []byte{}
for {
b, err := bufReader.ReadByte()
if err != nil {
return nil, err
}
if b > 128 {
v, err := bufReader.ReadByte()
if err != nil {
return nil, err
}
for i := 0; i < 257-int(b); i++ {
inb = append(inb, v)
}
} else if b < 128 {
for i := 0; i < int(b)+1; i++ {
v, err := bufReader.ReadByte()
if err != nil {
return nil, err
}
inb = append(inb, v)
}
} else {
break
}
}
return inb, nil
}
// Decode RunLengthEncoded stream object and give back decoded bytes.
func (this *RunLengthEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return this.DecodeBytes(streamObj.Stream)
}
// Encode a bytes array and return the encoded value based on the encoder parameters.
func (this *RunLengthEncoder) EncodeBytes(data []byte) ([]byte, error) {
bufReader := bytes.NewReader(data)
inb := []byte{}
literal := []byte{}
b0, err := bufReader.ReadByte()
if err == io.EOF {
return []byte{}, nil
} else if err != nil {
return nil, err
}
runLen := 1
for {
b, err := bufReader.ReadByte()
if err == io.EOF {
break
} else if err != nil {
return nil, err
}
if b == b0 {
if len(literal) > 0 {
literal = literal[:len(literal)-1]
if len(literal) > 0 {
inb = append(inb, byte(len(literal)-1))
inb = append(inb, literal...)
}
runLen = 1
literal = []byte{}
}
runLen++
if runLen >= 127 {
inb = append(inb, byte(257-runLen), b0)
runLen = 0
}
} else {
if runLen > 0 {
if runLen == 1 {
literal = []byte{b0}
} else {
inb = append(inb, byte(257-runLen), b0)
}
runLen = 0
}
literal = append(literal, b)
if len(literal) >= 127 {
inb = append(inb, byte(len(literal)-1))
inb = append(inb, literal...)
literal = []byte{}
}
}
b0 = b
}
if len(literal) > 0 {
inb = append(inb, byte(len(literal)-1))
inb = append(inb, literal...)
} else if runLen > 0 {
inb = append(inb, byte(257-runLen), b0)
}
inb = append(inb, 128)
return inb, nil
}
func (this *RunLengthEncoder) MakeDecodeParams() PdfObject {
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
func (this *RunLengthEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := MakeDict()
dict.Set("Filter", MakeName(this.GetFilterName()))
return dict
}
/////
// ASCII hex encoder/decoder.
type ASCIIHexEncoder struct {

View File

@ -70,6 +70,27 @@ func TestLZWEncoding(t *testing.T) {
return
}
}
// Test run length encoding.
func TestRunLengthEncoding(t *testing.T) {
rawStream := []byte("this is a dummy text with some \x01\x02\x03 binary data")
encoder := NewRunLengthEncoder()
encoded, err := encoder.EncodeBytes(rawStream)
if err != nil {
t.Errorf("Failed to RunLength encode data: %v", err)
return
}
decoded, err := encoder.DecodeBytes(encoded)
if err != nil {
t.Errorf("Failed to RunLength decode data: %v", err)
return
}
if !compareSlices(decoded, rawStream) {
t.Errorf("Slices not matching. RunLength")
t.Errorf("Decoded (%d): % x", len(encoded), encoded)
t.Errorf("Raw (%d): % x", len(rawStream), rawStream)
return
}
}
// Test ASCII hex encoding.
func TestASCIIHexEncoding(t *testing.T) {

View File

@ -13,7 +13,7 @@ import (
// NewEncoderFromStream creates a StreamEncoder based on the stream's dictionary.
func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
filterObj := streamObj.PdfObjectDictionary.Get("Filter")
filterObj := TraceToDirectObject(streamObj.PdfObjectDictionary.Get("Filter"))
if filterObj == nil {
// No filter, return raw data back.
return NewRawEncoder(), nil
@ -61,6 +61,8 @@ func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
return newLZWEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameDCT {
return newDCTEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameRunLength {
return newRunLengthEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameASCIIHex {
return NewASCIIHexEncoder(), nil
} else if *method == StreamEncodingFilterNameASCII85 {

View File

@ -590,7 +590,7 @@ func (this *PdfColorspaceDeviceCMYK) ImageToRGB(img Image) (Image, error) {
decode = []float64{0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0}
}
if len(decode) != 8 {
common.Log.Debug("Invalid decode array (%d): % d", len(decode), decode)
common.Log.Debug("Invalid decode array (%d): % .3f", len(decode), decode)
return img, errors.New("Invalid decode array")
}
common.Log.Trace("Decode array: % f", decode)
@ -809,7 +809,7 @@ func (this *PdfColorspaceCalGray) ColorFromFloats(vals []float64) (PdfColor, err
}
func (this *PdfColorspaceCalGray) ColorFromPdfObjects(objects []PdfObject) (PdfColor, error) {
if len(objects) != 4 {
if len(objects) != 1 {
return nil, errors.New("Range check")
}
@ -953,7 +953,7 @@ func (this *PdfColorspaceCalRGB) String() string {
}
func (this *PdfColorspaceCalRGB) GetNumComponents() int {
return 1
return 3
}
func newPdfColorspaceCalRGBFromPdfObject(obj PdfObject) (*PdfColorspaceCalRGB, error) {
@ -1183,7 +1183,7 @@ func (this *PdfColorspaceCalRGB) ImageToRGB(img Image) (Image, error) {
maxVal := math.Pow(2, float64(img.BitsPerComponent)) - 1
rgbSamples := []uint32{}
for i := 0; i < len(samples); i++ {
for i := 0; i < len(samples)-2; i++ {
// A, B, C in range 0.0 to 1.0
aVal := float64(samples[i]) / maxVal
bVal := float64(samples[i+1]) / maxVal

View File

@ -138,7 +138,7 @@ func (r *PdfPageResources) GetShadingByName(keyName PdfObjectName) (*PdfShading,
return nil, false
}
shadingDict, ok := r.Shading.(*PdfObjectDictionary)
shadingDict, ok := TraceToDirectObject(r.Shading).(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Invalid Shading entry - not a dict (got %T)", r.Shading)
return nil, false
@ -178,7 +178,7 @@ func (r *PdfPageResources) GetPatternByName(keyName PdfObjectName) (*PdfPattern,
return nil, false
}
patternDict, ok := r.Pattern.(*PdfObjectDictionary)
patternDict, ok := TraceToDirectObject(r.Pattern).(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Invalid Pattern entry - not a dict (got %T)", r.Pattern)
return nil, false
@ -326,7 +326,7 @@ func (r *PdfPageResources) GetXObjectByName(keyName PdfObjectName) (*PdfObjectSt
}
dict := stream.PdfObjectDictionary
name, ok := dict.Get("Subtype").(*PdfObjectName)
name, ok := TraceToDirectObject(dict.Get("Subtype")).(*PdfObjectName)
if !ok {
common.Log.Debug("XObject Subtype not a Name, dict: %s", dict.String())
return nil, XObjectTypeUndefined

View File

@ -300,7 +300,7 @@ func NewXObjectImageFromStream(stream *PdfObjectStream) (*XObjectImage, error) {
}
img.Filter = encoder
if obj := dict.Get("Width"); obj != nil {
if obj := TraceToDirectObject(dict.Get("Width")); obj != nil {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid image width object")
@ -311,7 +311,7 @@ func NewXObjectImageFromStream(stream *PdfObjectStream) (*XObjectImage, error) {
return nil, errors.New("Width missing")
}
if obj := dict.Get("Height"); obj != nil {
if obj := TraceToDirectObject(dict.Get("Height")); obj != nil {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid image height object")
@ -322,7 +322,7 @@ func NewXObjectImageFromStream(stream *PdfObjectStream) (*XObjectImage, error) {
return nil, errors.New("Height missing")
}
if obj := dict.Get("ColorSpace"); obj != nil {
if obj := TraceToDirectObject(dict.Get("ColorSpace")); obj != nil {
cs, err := newPdfColorspaceFromPdfObject(obj)
if err != nil {
return nil, err
@ -334,7 +334,7 @@ func NewXObjectImageFromStream(stream *PdfObjectStream) (*XObjectImage, error) {
img.ColorSpace = NewPdfColorspaceDeviceGray()
}
if obj := dict.Get("BitsPerComponent"); obj != nil {
if obj := TraceToDirectObject(dict.Get("BitsPerComponent")); obj != nil {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid image height object")