mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00
Implemented ASCII85Decode filter
This commit is contained in:
parent
b8a3ec7180
commit
b4c259460c
@ -15,6 +15,7 @@ import (
|
||||
"bytes"
|
||||
"compress/zlib"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
@ -29,6 +30,7 @@ const (
|
||||
StreamEncodingFilterNameFlate = "FlateDecode"
|
||||
StreamEncodingFilterNameLZW = "LZWDecode"
|
||||
StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode"
|
||||
StreamEncodingFilterNameASCII85 = "ASCII85Decode"
|
||||
)
|
||||
|
||||
type StreamEncoder interface {
|
||||
@ -67,7 +69,7 @@ func NewFlateEncoder() *FlateEncoder {
|
||||
}
|
||||
|
||||
func (this *FlateEncoder) GetFilterName() string {
|
||||
return "FlateDecode"
|
||||
return StreamEncodingFilterNameFlate
|
||||
}
|
||||
|
||||
func (this *FlateEncoder) MakeDecodeParams() PdfObject {
|
||||
@ -351,7 +353,7 @@ func NewLZWEncoder() *LZWEncoder {
|
||||
}
|
||||
|
||||
func (this *LZWEncoder) GetFilterName() string {
|
||||
return "LZWDecode"
|
||||
return StreamEncodingFilterNameLZW
|
||||
}
|
||||
|
||||
func (this *LZWEncoder) MakeDecodeParams() PdfObject {
|
||||
@ -646,14 +648,14 @@ func (this *LZWEncoder) EncodeBytes(data []byte) ([]byte, error) {
|
||||
type ASCIIHexEncoder struct {
|
||||
}
|
||||
|
||||
// Make a new LZW encoder with default parameters.
|
||||
// Make a new ASCII hex encoder.
|
||||
func NewASCIIHexEncoder() *ASCIIHexEncoder {
|
||||
encoder := &ASCIIHexEncoder{}
|
||||
return encoder
|
||||
}
|
||||
|
||||
func (this *ASCIIHexEncoder) GetFilterName() string {
|
||||
return "ASCIIHexDecode"
|
||||
return StreamEncodingFilterNameASCIIHex
|
||||
}
|
||||
|
||||
func (this *ASCIIHexEncoder) MakeDecodeParams() PdfObject {
|
||||
@ -717,6 +719,174 @@ func (this *ASCIIHexEncoder) EncodeBytes(data []byte) ([]byte, error) {
|
||||
return encoded.Bytes(), nil
|
||||
}
|
||||
|
||||
//
|
||||
// ASCII85 encoder/decoder.
|
||||
//
|
||||
type ASCII85Encoder struct {
|
||||
}
|
||||
|
||||
// Make a new ASCII85 encoder.
|
||||
func NewASCII85Encoder() *ASCII85Encoder {
|
||||
encoder := &ASCII85Encoder{}
|
||||
return encoder
|
||||
}
|
||||
|
||||
func (this *ASCII85Encoder) GetFilterName() string {
|
||||
return StreamEncodingFilterNameASCII85
|
||||
}
|
||||
|
||||
func (this *ASCII85Encoder) MakeDecodeParams() PdfObject {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Make a new instance of an encoding dictionary for a stream object.
|
||||
func (this *ASCII85Encoder) MakeStreamDict() *PdfObjectDictionary {
|
||||
dict := PdfObjectDictionary{}
|
||||
|
||||
dict["Filter"] = MakeName(this.GetFilterName())
|
||||
return &dict
|
||||
}
|
||||
|
||||
// 5 ASCII characters -> 4 raw binary bytes
|
||||
func (this *ASCII85Encoder) DecodeBytes(encoded []byte) ([]byte, error) {
|
||||
decoded := []byte{}
|
||||
|
||||
i := 0
|
||||
eod := false
|
||||
|
||||
for i < len(encoded) && !eod {
|
||||
codes := [5]byte{0, 0, 0, 0, 0}
|
||||
spaces := 0 // offset due to whitespace.
|
||||
j := 0
|
||||
toWrite := 4
|
||||
for j < 5+spaces {
|
||||
if i+j == len(encoded) {
|
||||
break
|
||||
}
|
||||
code := encoded[i+j]
|
||||
if IsWhiteSpace(code) {
|
||||
// Skip whitespace.
|
||||
spaces++
|
||||
j++
|
||||
continue
|
||||
} else if code == '~' && i+j+1 < len(encoded) && encoded[i+j+1] == '>' {
|
||||
toWrite = (j - spaces) - 1
|
||||
if toWrite < 0 {
|
||||
toWrite = 0
|
||||
}
|
||||
// EOD marker. Marks end of data.
|
||||
eod = true
|
||||
break
|
||||
} else if code >= '!' && code <= 'u' {
|
||||
// Valid code.
|
||||
code -= '!'
|
||||
} else if code == 'z' && j-spaces == 0 {
|
||||
// 'z' in beginning of the byte sequence means that all 5 codes are 0.
|
||||
// Already all 0 initialized, so can break here.
|
||||
toWrite = 4
|
||||
j++
|
||||
break
|
||||
} else {
|
||||
common.Log.Error("Failed decoding, invalid code")
|
||||
return nil, errors.New("Invalid code encountered")
|
||||
}
|
||||
|
||||
codes[j-spaces] = code
|
||||
j++
|
||||
}
|
||||
i += j
|
||||
|
||||
// Pad with 'u' 84 (unused ones)
|
||||
// Takes care of issues at ends for input data that is not a multiple of 4-bytes.
|
||||
for m := toWrite + 1; m < 5; m++ {
|
||||
codes[m] = 84
|
||||
}
|
||||
|
||||
// Convert to a uint32 value.
|
||||
value := uint32(codes[0])*85*85*85*85 + uint32(codes[1])*85*85*85 + uint32(codes[2])*85*85 + uint32(codes[3])*85 + uint32(codes[4])
|
||||
|
||||
// Convert to 4 bytes.
|
||||
decodedBytes := []byte{
|
||||
byte((value >> 24) & 0xff),
|
||||
byte((value >> 16) & 0xff),
|
||||
byte((value >> 8) & 0xff),
|
||||
byte(value & 0xff)}
|
||||
|
||||
// This accounts for the end of data, where the original data length is not a multiple of 4.
|
||||
// In that case, 0 bytes are assumed but only
|
||||
decoded = append(decoded, decodedBytes[:toWrite]...)
|
||||
}
|
||||
|
||||
return decoded, nil
|
||||
}
|
||||
|
||||
// ASCII85 stream decoding.
|
||||
func (this *ASCII85Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
|
||||
return this.DecodeBytes(streamObj.Stream)
|
||||
}
|
||||
|
||||
// Convert a base 256 number to a series of base 85 values (5 codes).
|
||||
// 85^5 = 4437053125 > 256^4 = 4294967296
|
||||
// So 5 base-85 numbers will always be enough to cover 4 base-256 numbers.
|
||||
// The base 256 value is already converted to an uint32 value.
|
||||
func (this *ASCII85Encoder) base256Tobase85(base256val uint32) [5]byte {
|
||||
base85 := [5]byte{0, 0, 0, 0, 0}
|
||||
remainder := base256val
|
||||
for i := 0; i < 5; i++ {
|
||||
divider := uint32(1)
|
||||
for j := 0; j < 4-i; j++ {
|
||||
divider *= 85
|
||||
}
|
||||
val := remainder / divider
|
||||
remainder = remainder % divider
|
||||
base85[i] = byte(val)
|
||||
}
|
||||
return base85
|
||||
}
|
||||
|
||||
// Encode data into ASCII85 encoded format.
|
||||
func (this *ASCII85Encoder) EncodeBytes(data []byte) ([]byte, error) {
|
||||
var encoded bytes.Buffer
|
||||
|
||||
for i := 0; i < len(data); i += 4 {
|
||||
b1 := data[i]
|
||||
n := 1
|
||||
|
||||
b2 := byte(0)
|
||||
if i+1 < len(data) {
|
||||
b2 = data[i+1]
|
||||
n++
|
||||
}
|
||||
|
||||
b3 := byte(0)
|
||||
if i+2 < len(data) {
|
||||
b3 = data[i+2]
|
||||
n++
|
||||
}
|
||||
|
||||
b4 := byte(0)
|
||||
if i+3 < len(data) {
|
||||
b4 = data[i+3]
|
||||
n++
|
||||
}
|
||||
|
||||
// Convert to a uint32 number.
|
||||
base256 := (uint32(b1) << 24) | (uint32(b2) << 16) | (uint32(b3) << 8) | uint32(b4)
|
||||
if base256 == 0 {
|
||||
encoded.WriteByte('z')
|
||||
} else {
|
||||
base85vals := this.base256Tobase85(base256)
|
||||
for _, val := range base85vals[:n+1] {
|
||||
encoded.WriteByte(val + '!')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// EOD.
|
||||
encoded.WriteString("~>")
|
||||
return encoded.Bytes(), nil
|
||||
}
|
||||
|
||||
//
|
||||
// Raw encoder/decoder (no encoding, pass through)
|
||||
//
|
||||
@ -853,6 +1023,9 @@ func newMultiEncoderFromStream(streamObj *PdfObjectStream) (*MultiEncoder, error
|
||||
} else if *name == StreamEncodingFilterNameASCIIHex {
|
||||
encoder := NewASCIIHexEncoder()
|
||||
mencoder.AddEncoder(encoder)
|
||||
} else if *name == StreamEncodingFilterNameASCII85 {
|
||||
encoder := NewASCII85Encoder()
|
||||
mencoder.AddEncoder(encoder)
|
||||
} else {
|
||||
common.Log.Error("Unsupported filter %s", *name)
|
||||
return nil, fmt.Errorf("Invalid filter in multi filter array")
|
||||
|
@ -6,6 +6,7 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"testing"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
@ -90,6 +91,101 @@ func TestASCIIHexEncoding(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ASCII85.
|
||||
func TestASCII85EncodingWikipediaExample(t *testing.T) {
|
||||
expected := `Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.`
|
||||
// Base 64 encoded, Ascii85 encoded version (wikipedia).
|
||||
encodedInBase64 := `OWpxb15CbGJELUJsZUIxREorKitGKGYscS8wSmhLRjxHTD5DakAuNEdwJGQ3RiEsTDdAPDZAKS8wSkRFRjxHJTwrRVY6MkYhLE88REorKi5APCpLMEA8NkwoRGYtXDBFYzVlO0RmZlooRVplZS5CbC45cEYiQUdYQlBDc2krREdtPkAzQkIvRiomT0NBZnUyL0FLWWkoREliOkBGRCwqKStDXVU9QDNCTiNFY1lmOEFURDNzQHE/ZCRBZnRWcUNoW05xRjxHOjgrRVY6LitDZj4tRkQ1VzhBUmxvbERJYWwoRElkPGpAPD8zckA6RiVhK0Q1OCdBVEQ0JEJsQGwzRGU6LC1ESnNgOEFSb0ZiLzBKTUtAcUI0XkYhLFI8QUtaJi1EZlRxQkclRz51RC5SVHBBS1lvJytDVC81K0NlaSNESUk/KEUsOSlvRioyTTcvY34+`
|
||||
encoded, _ := base64.StdEncoding.DecodeString(encodedInBase64)
|
||||
|
||||
encoder := NewASCII85Encoder()
|
||||
enc1, err := encoder.EncodeBytes([]byte(expected))
|
||||
if err != nil {
|
||||
t.Errorf("Fail")
|
||||
return
|
||||
}
|
||||
if string(enc1) != string(encoded) {
|
||||
t.Errorf("ASCII85 encoding wiki example fail")
|
||||
return
|
||||
}
|
||||
|
||||
decoded, err := encoder.DecodeBytes([]byte(encoded))
|
||||
if err != nil {
|
||||
t.Errorf("Fail, error: %v", err)
|
||||
return
|
||||
}
|
||||
if expected != string(decoded) {
|
||||
t.Errorf("Mismatch! '%s' vs '%s'", decoded, expected)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func TestASCII85Encoding(t *testing.T) {
|
||||
encoded := `FD,B0+EVmJAKYo'+D#G#De*R"B-:o0+E_a:A0>T(+AbuZ@;]Tu:ddbqAnc'mEr~>`
|
||||
expected := "this type of encoding is used in PS and PDF files"
|
||||
|
||||
encoder := NewASCII85Encoder()
|
||||
|
||||
enc1, err := encoder.EncodeBytes([]byte(expected))
|
||||
if err != nil {
|
||||
t.Errorf("Fail")
|
||||
return
|
||||
}
|
||||
if encoded != string(enc1) {
|
||||
t.Errorf("Encoding error")
|
||||
return
|
||||
}
|
||||
|
||||
decoded, err := encoder.DecodeBytes([]byte(encoded))
|
||||
if err != nil {
|
||||
t.Errorf("Fail, error: %v", err)
|
||||
return
|
||||
}
|
||||
if expected != string(decoded) {
|
||||
t.Errorf("Mismatch! '%s' vs '%s'", decoded, expected)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type TestASCII85DecodingTestCase struct {
|
||||
Encoded string
|
||||
Expected string
|
||||
}
|
||||
|
||||
func TestASCII85Decoding(t *testing.T) {
|
||||
// Map encoded -> Decoded
|
||||
testcases := []TestASCII85DecodingTestCase{
|
||||
{"z~>", "\x00\x00\x00\x00"},
|
||||
{"z ~>", "\x00\x00\x00\x00"},
|
||||
{"zz~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
|
||||
{" zz~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
|
||||
{" z z~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
|
||||
{" z z ~>", "\x00\x00\x00\x00\x00\x00\x00\x00"},
|
||||
{"+T~>", `!`},
|
||||
{"+`d~>", `!s`},
|
||||
{"+`hr~>", `!sz`},
|
||||
{"+`hsS~>", `!szx`},
|
||||
{"+`hsS+T~>", `!szx!`},
|
||||
{"+ `hs S +T ~>", `!szx!`},
|
||||
}
|
||||
|
||||
encoder := NewASCII85Encoder()
|
||||
|
||||
for _, testcase := range testcases {
|
||||
encoded := testcase.Encoded
|
||||
expected := testcase.Expected
|
||||
decoded, err := encoder.DecodeBytes([]byte(encoded))
|
||||
if err != nil {
|
||||
t.Errorf("Fail, error: %v", err)
|
||||
return
|
||||
}
|
||||
if expected != string(decoded) {
|
||||
t.Errorf("Mismatch! '%s' vs '%s'", decoded, expected)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test multi encoder with FlateDecode and ASCIIHexDecode.
|
||||
func TestMultiEncoder(t *testing.T) {
|
||||
rawStream := []byte("this is a dummy text with some \x01\x02\x03 binary data")
|
||||
|
@ -57,10 +57,12 @@ func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
|
||||
return NewASCIIHexEncoder(), nil
|
||||
} else if *method == StreamEncodingFilterNameLZW {
|
||||
return newLZWEncoderFromStream(streamObj, nil)
|
||||
}
|
||||
|
||||
} else if *method == StreamEncodingFilterNameASCII85 {
|
||||
return NewASCII85Encoder(), nil
|
||||
} else {
|
||||
common.Log.Debug("ERROR: Unsupported encoding method!")
|
||||
return nil, fmt.Errorf("Unsupported encoding method (%s)", *method)
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes the stream.
|
||||
|
Loading…
x
Reference in New Issue
Block a user