Implemented ASCII85Decode filter

This commit is contained in:
Gunnsteinn Hall 2017-02-23 15:25:23 +00:00
parent b8a3ec7180
commit b4c259460c
3 changed files with 278 additions and 7 deletions

View File

@ -15,6 +15,7 @@ import (
"bytes"
"compress/zlib"
"encoding/hex"
"errors"
"fmt"
"io"
@ -29,6 +30,7 @@ const (
StreamEncodingFilterNameFlate = "FlateDecode"
StreamEncodingFilterNameLZW = "LZWDecode"
StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode"
StreamEncodingFilterNameASCII85 = "ASCII85Decode"
)
type StreamEncoder interface {
@ -67,7 +69,7 @@ func NewFlateEncoder() *FlateEncoder {
}
func (this *FlateEncoder) GetFilterName() string {
return "FlateDecode"
return StreamEncodingFilterNameFlate
}
func (this *FlateEncoder) MakeDecodeParams() PdfObject {
@ -351,7 +353,7 @@ func NewLZWEncoder() *LZWEncoder {
}
func (this *LZWEncoder) GetFilterName() string {
return "LZWDecode"
return StreamEncodingFilterNameLZW
}
func (this *LZWEncoder) MakeDecodeParams() PdfObject {
@ -646,14 +648,14 @@ func (this *LZWEncoder) EncodeBytes(data []byte) ([]byte, error) {
type ASCIIHexEncoder struct {
}
// Make a new LZW encoder with default parameters.
// Make a new ASCII hex encoder.
func NewASCIIHexEncoder() *ASCIIHexEncoder {
encoder := &ASCIIHexEncoder{}
return encoder
}
func (this *ASCIIHexEncoder) GetFilterName() string {
return "ASCIIHexDecode"
return StreamEncodingFilterNameASCIIHex
}
func (this *ASCIIHexEncoder) MakeDecodeParams() PdfObject {
@ -717,6 +719,174 @@ func (this *ASCIIHexEncoder) EncodeBytes(data []byte) ([]byte, error) {
return encoded.Bytes(), nil
}
//
// ASCII85 encoder/decoder.
//
type ASCII85Encoder struct {
}
// Make a new ASCII85 encoder.
func NewASCII85Encoder() *ASCII85Encoder {
encoder := &ASCII85Encoder{}
return encoder
}
func (this *ASCII85Encoder) GetFilterName() string {
return StreamEncodingFilterNameASCII85
}
func (this *ASCII85Encoder) MakeDecodeParams() PdfObject {
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
func (this *ASCII85Encoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
return &dict
}
// 5 ASCII characters -> 4 raw binary bytes
func (this *ASCII85Encoder) DecodeBytes(encoded []byte) ([]byte, error) {
decoded := []byte{}
i := 0
eod := false
for i < len(encoded) && !eod {
codes := [5]byte{0, 0, 0, 0, 0}
spaces := 0 // offset due to whitespace.
j := 0
toWrite := 4
for j < 5+spaces {
if i+j == len(encoded) {
break
}
code := encoded[i+j]
if IsWhiteSpace(code) {
// Skip whitespace.
spaces++
j++
continue
} else if code == '~' && i+j+1 < len(encoded) && encoded[i+j+1] == '>' {
toWrite = (j - spaces) - 1
if toWrite < 0 {
toWrite = 0
}
// EOD marker. Marks end of data.
eod = true
break
} else if code >= '!' && code <= 'u' {
// Valid code.
code -= '!'
} else if code == 'z' && j-spaces == 0 {
// 'z' in beginning of the byte sequence means that all 5 codes are 0.
// Already all 0 initialized, so can break here.
toWrite = 4
j++
break
} else {
common.Log.Error("Failed decoding, invalid code")
return nil, errors.New("Invalid code encountered")
}
codes[j-spaces] = code
j++
}
i += j
// Pad with 'u' 84 (unused ones)
// Takes care of issues at ends for input data that is not a multiple of 4-bytes.
for m := toWrite + 1; m < 5; m++ {
codes[m] = 84
}
// Convert to a uint32 value.
value := uint32(codes[0])*85*85*85*85 + uint32(codes[1])*85*85*85 + uint32(codes[2])*85*85 + uint32(codes[3])*85 + uint32(codes[4])
// Convert to 4 bytes.
decodedBytes := []byte{
byte((value >> 24) & 0xff),
byte((value >> 16) & 0xff),
byte((value >> 8) & 0xff),
byte(value & 0xff)}
// This accounts for the end of data, where the original data length is not a multiple of 4.
// In that case, 0 bytes are assumed but only
decoded = append(decoded, decodedBytes[:toWrite]...)
}
return decoded, nil
}
// ASCII85 stream decoding.
func (this *ASCII85Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return this.DecodeBytes(streamObj.Stream)
}
// Convert a base 256 number to a series of base 85 values (5 codes).
// 85^5 = 4437053125 > 256^4 = 4294967296
// So 5 base-85 numbers will always be enough to cover 4 base-256 numbers.
// The base 256 value is already converted to an uint32 value.
func (this *ASCII85Encoder) base256Tobase85(base256val uint32) [5]byte {
base85 := [5]byte{0, 0, 0, 0, 0}
remainder := base256val
for i := 0; i < 5; i++ {
divider := uint32(1)
for j := 0; j < 4-i; j++ {
divider *= 85
}
val := remainder / divider
remainder = remainder % divider
base85[i] = byte(val)
}
return base85
}
// Encode data into ASCII85 encoded format.
func (this *ASCII85Encoder) EncodeBytes(data []byte) ([]byte, error) {
var encoded bytes.Buffer
for i := 0; i < len(data); i += 4 {
b1 := data[i]
n := 1
b2 := byte(0)
if i+1 < len(data) {
b2 = data[i+1]
n++
}
b3 := byte(0)
if i+2 < len(data) {
b3 = data[i+2]
n++
}
b4 := byte(0)
if i+3 < len(data) {
b4 = data[i+3]
n++
}
// Convert to a uint32 number.
base256 := (uint32(b1) << 24) | (uint32(b2) << 16) | (uint32(b3) << 8) | uint32(b4)
if base256 == 0 {
encoded.WriteByte('z')
} else {
base85vals := this.base256Tobase85(base256)
for _, val := range base85vals[:n+1] {
encoded.WriteByte(val + '!')
}
}
}
// EOD.
encoded.WriteString("~>")
return encoded.Bytes(), nil
}
//
// Raw encoder/decoder (no encoding, pass through)
//
@ -853,6 +1023,9 @@ func newMultiEncoderFromStream(streamObj *PdfObjectStream) (*MultiEncoder, error
} else if *name == StreamEncodingFilterNameASCIIHex {
encoder := NewASCIIHexEncoder()
mencoder.AddEncoder(encoder)
} else if *name == StreamEncodingFilterNameASCII85 {
encoder := NewASCII85Encoder()
mencoder.AddEncoder(encoder)
} else {
common.Log.Error("Unsupported filter %s", *name)
return nil, fmt.Errorf("Invalid filter in multi filter array")

View File

@ -6,6 +6,7 @@
package core
import (
"encoding/base64"
"testing"
"github.com/unidoc/unidoc/common"
@ -90,6 +91,101 @@ func TestASCIIHexEncoding(t *testing.T) {
}
}
// ASCII85.
func TestASCII85EncodingWikipediaExample(t *testing.T) {
expected := `Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.`
// Base 64 encoded, Ascii85 encoded version (wikipedia).
encodedInBase64 := `OWpxb15CbGJELUJsZUIxREorKitGKGYscS8wSmhLRjxHTD5DakAuNEdwJGQ3RiEsTDdAPDZAKS8wSkRFRjxHJTwrRVY6MkYhLE88REorKi5APCpLMEA8NkwoRGYtXDBFYzVlO0RmZlooRVplZS5CbC45cEYiQUdYQlBDc2krREdtPkAzQkIvRiomT0NBZnUyL0FLWWkoREliOkBGRCwqKStDXVU9QDNCTiNFY1lmOEFURDNzQHE/ZCRBZnRWcUNoW05xRjxHOjgrRVY6LitDZj4tRkQ1VzhBUmxvbERJYWwoRElkPGpAPD8zckA6RiVhK0Q1OCdBVEQ0JEJsQGwzRGU6LC1ESnNgOEFSb0ZiLzBKTUtAcUI0XkYhLFI8QUtaJi1EZlRxQkclRz51RC5SVHBBS1lvJytDVC81K0NlaSNESUk/KEUsOSlvRioyTTcvY34+`
encoded, _ := base64.StdEncoding.DecodeString(encodedInBase64)
encoder := NewASCII85Encoder()
enc1, err := encoder.EncodeBytes([]byte(expected))
if err != nil {
t.Errorf("Fail")
return
}
if string(enc1) != string(encoded) {
t.Errorf("ASCII85 encoding wiki example fail")
return
}
decoded, err := encoder.DecodeBytes([]byte(encoded))
if err != nil {
t.Errorf("Fail, error: %v", err)
return
}
if expected != string(decoded) {
t.Errorf("Mismatch! '%s' vs '%s'", decoded, expected)
return
}
}
func TestASCII85Encoding(t *testing.T) {
encoded := `FD,B0+EVmJAKYo'+D#G#De*R"B-:o0+E_a:A0>T(+AbuZ@;]Tu:ddbqAnc'mEr~>`
expected := "this type of encoding is used in PS and PDF files"
encoder := NewASCII85Encoder()
enc1, err := encoder.EncodeBytes([]byte(expected))
if err != nil {
t.Errorf("Fail")
return
}
if encoded != string(enc1) {
t.Errorf("Encoding error")
return
}
decoded, err := encoder.DecodeBytes([]byte(encoded))
if err != nil {
t.Errorf("Fail, error: %v", err)
return
}
if expected != string(decoded) {
t.Errorf("Mismatch! '%s' vs '%s'", decoded, expected)
return
}
}
type TestASCII85DecodingTestCase struct {
Encoded string
Expected string
}
func TestASCII85Decoding(t *testing.T) {
// Map encoded -> Decoded
testcases := []TestASCII85DecodingTestCase{
{"z~>", "\x00\x00\x00\x00"},
{"z ~>", "\x00\x00\x00\x00"},
{"zz~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
{" zz~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
{" z z~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
{" z z ~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
{"+T~>", `!`},
{"+`d~>", `!s`},
{"+`hr~>", `!sz`},
{"+`hsS~>", `!szx`},
{"+`hsS+T~>", `!szx!`},
{"+ `hs S +T ~>", `!szx!`},
}
encoder := NewASCII85Encoder()
for _, testcase := range testcases {
encoded := testcase.Encoded
expected := testcase.Expected
decoded, err := encoder.DecodeBytes([]byte(encoded))
if err != nil {
t.Errorf("Fail, error: %v", err)
return
}
if expected != string(decoded) {
t.Errorf("Mismatch! '%s' vs '%s'", decoded, expected)
return
}
}
}
// Test multi encoder with FlateDecode and ASCIIHexDecode.
func TestMultiEncoder(t *testing.T) {
rawStream := []byte("this is a dummy text with some \x01\x02\x03 binary data")

View File

@ -57,10 +57,12 @@ func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
return NewASCIIHexEncoder(), nil
} else if *method == StreamEncodingFilterNameLZW {
return newLZWEncoderFromStream(streamObj, nil)
} else if *method == StreamEncodingFilterNameASCII85 {
return NewASCII85Encoder(), nil
} else {
common.Log.Debug("ERROR: Unsupported encoding method!")
return nil, fmt.Errorf("Unsupported encoding method (%s)", *method)
}
common.Log.Debug("ERROR: Unsupported encoding method!")
return nil, fmt.Errorf("Unsupported encoding method (%s)", *method)
}
// Decodes the stream.