unipdf/pdf/core/encoding.go

1512 lines
40 KiB
Go
Raw Normal View History

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package core
// Implement encoders for PDF. Currently supported:
// - Raw (Identity)
// - FlateDecode
// - LZW
// - ASCII Hex
import (
"bytes"
"compress/zlib"
"encoding/hex"
2017-02-23 15:25:23 +00:00
"errors"
"fmt"
goimage "image"
gocolor "image/color"
"image/jpeg"
"io"
// Need two slightly different implementations of LZW (EarlyChange parameter).
lzw0 "compress/lzw"
2017-03-02 12:49:25 +00:00
lzw1 "golang.org/x/image/tiff/lzw"
"github.com/unidoc/unidoc/common"
)
const (
StreamEncodingFilterNameFlate = "FlateDecode"
StreamEncodingFilterNameLZW = "LZWDecode"
StreamEncodingFilterNameDCT = "DCTDecode"
StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode"
2017-02-23 15:25:23 +00:00
StreamEncodingFilterNameASCII85 = "ASCII85Decode"
)
type StreamEncoder interface {
GetFilterName() string
MakeDecodeParams() PdfObject
MakeStreamDict() *PdfObjectDictionary
EncodeBytes(data []byte) ([]byte, error)
DecodeBytes(encoded []byte) ([]byte, error)
DecodeStream(streamObj *PdfObjectStream) ([]byte, error)
}
// Flate encoding.
type FlateEncoder struct {
Predictor int
BitsPerComponent int
// For predictors
Columns int
Colors int
}
// Make a new flate encoder with default parameters, predictor 1 and bits per component 8.
func NewFlateEncoder() *FlateEncoder {
encoder := &FlateEncoder{}
// Default (No prediction)
encoder.Predictor = 1
// Currently only supporting 8.
encoder.BitsPerComponent = 8
encoder.Colors = 1
encoder.Columns = 1
return encoder
}
// Set the predictor function. Specify the number of columns per row.
// The columns indicates the number of samples per row.
// Used for grouping data together for compression.
func (this *FlateEncoder) SetPredictor(columns int) {
// Only supporting PNG sub predictor for encoding.
this.Predictor = 11
this.Columns = columns
}
func (this *FlateEncoder) GetFilterName() string {
2017-02-23 15:25:23 +00:00
return StreamEncodingFilterNameFlate
}
func (this *FlateEncoder) MakeDecodeParams() PdfObject {
if this.Predictor > 1 {
decodeParams := PdfObjectDictionary{}
decodeParams["Predictor"] = MakeInteger(int64(this.Predictor))
// Only add if not default option.
if this.BitsPerComponent != 8 {
decodeParams["BitsPerComponent"] = MakeInteger(int64(this.BitsPerComponent))
}
if this.Columns != 1 {
decodeParams["Columns"] = MakeInteger(int64(this.Columns))
}
if this.Colors != 1 {
decodeParams["Colors"] = MakeInteger(int64(this.Colors))
}
return &decodeParams
}
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
// Has the Filter set and the DecodeParms.
func (this *FlateEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
decodeParams := this.MakeDecodeParams()
if decodeParams != nil {
dict["DecodeParms"] = decodeParams
}
return &dict
}
// Create a new flate decoder from a stream object, getting all the encoding parameters
// from the DecodeParms stream object dictionary entry.
func newFlateEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*FlateEncoder, error) {
encoder := NewFlateEncoder()
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return encoder, nil
}
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := (*encDict)["DecodeParms"]
if obj != nil {
dp, isDict := obj.(*PdfObjectDictionary)
if !isDict {
common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj)
return nil, fmt.Errorf("Invalid DecodeParms")
}
decodeParams = dp
}
}
if decodeParams == nil {
// Can safely return here if no decode params, as the following depend on the decode params.
return encoder, nil
}
common.Log.Trace("decode params: %s", decodeParams.String())
obj, has := (*decodeParams)["Predictor"]
if !has {
common.Log.Debug("Error: Predictor missing from DecodeParms - Continue with default (1)")
} else {
predictor, ok := obj.(*PdfObjectInteger)
if !ok {
common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj)
return nil, fmt.Errorf("Invalid Predictor")
}
encoder.Predictor = int(*predictor)
}
// Bits per component. Use default if not specified (8).
obj, has = (*decodeParams)["BitsPerComponent"]
if has {
bpc, ok := obj.(*PdfObjectInteger)
if !ok {
common.Log.Debug("ERROR: Invalid BitsPerComponent")
return nil, fmt.Errorf("Invalid BitsPerComponent")
}
encoder.BitsPerComponent = int(*bpc)
}
if encoder.Predictor > 1 {
// Columns.
encoder.Columns = 1
obj, has = (*decodeParams)["Columns"]
if has {
columns, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("Predictor column invalid")
}
encoder.Columns = int(*columns)
}
// Colors.
// Number of interleaved color components per sample (Default 1 if not specified)
encoder.Colors = 1
obj, has = (*decodeParams)["Colors"]
if has {
colors, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("Predictor colors not an integer")
}
encoder.Colors = int(*colors)
}
}
return encoder, nil
}
func (this *FlateEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
2017-03-02 12:49:25 +00:00
common.Log.Trace("FlateDecode bytes")
bufReader := bytes.NewReader(encoded)
r, err := zlib.NewReader(bufReader)
if err != nil {
common.Log.Debug("Decoding error %v\n", err)
common.Log.Debug("Stream (%d) % x", len(encoded), encoded)
return nil, err
}
defer r.Close()
var outBuf bytes.Buffer
outBuf.ReadFrom(r)
2017-03-05 22:42:30 +00:00
common.Log.Trace("En: % x\n", encoded)
common.Log.Trace("De: % x\n", outBuf.Bytes())
return outBuf.Bytes(), nil
}
// Decode a FlateEncoded stream object and give back decoded bytes.
func (this *FlateEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
// TODO: Revamp this support to handle TIFF predictor (2).
// Also handle more filter bytes and support more values of BitsPerComponent.
2017-03-02 12:49:25 +00:00
common.Log.Trace("FlateDecode stream")
common.Log.Trace("Predictor: %d", this.Predictor)
if this.BitsPerComponent != 8 {
return nil, fmt.Errorf("Invalid BitsPerComponent (only 8 supported)")
}
outData, err := this.DecodeBytes(streamObj.Stream)
if err != nil {
return nil, err
}
common.Log.Trace("En: % x\n", streamObj.Stream)
common.Log.Trace("De: % x\n", outData)
if this.Predictor > 1 {
if this.Predictor == 2 { // TIFF encoding: Needs some tests.
common.Log.Trace("Tiff encoding")
common.Log.Trace("Colors: %d", this.Colors)
rowLength := int(this.Columns) * this.Colors
rows := len(outData) / rowLength
if len(outData)%rowLength != 0 {
common.Log.Debug("ERROR: TIFF encoding: Invalid row length...")
return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength)
}
if rowLength%this.Colors != 0 {
return nil, fmt.Errorf("Invalid row length (%d) for colors %d", rowLength, this.Colors)
}
common.Log.Trace("inp outData (%d): % x", len(outData), outData)
pOutBuffer := bytes.NewBuffer(nil)
// 0-255 -255 255 ; 0-255=-255;
for i := 0; i < rows; i++ {
rowData := outData[rowLength*i : rowLength*(i+1)]
// Predicts the same as the sample to the left.
// Interleaved by colors.
for j := this.Colors; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+rowData[j-this.Colors]) % 256)
}
pOutBuffer.Write(rowData)
}
pOutData := pOutBuffer.Bytes()
common.Log.Trace("POutData (%d): % x", len(pOutData), pOutData)
return pOutData, nil
} else if this.Predictor >= 10 && this.Predictor <= 15 {
common.Log.Trace("PNG Encoding")
2017-03-08 16:48:40 +00:00
// Columns represents the number of samples per row; Each sample can contain multiple color
// components.
rowLength := int(this.Columns*this.Colors + 1) // 1 byte to specify predictor algorithms per row.
rows := len(outData) / rowLength
if len(outData)%rowLength != 0 {
return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength)
}
pOutBuffer := bytes.NewBuffer(nil)
common.Log.Trace("Predictor columns: %d", this.Columns)
common.Log.Trace("Length: %d / %d = %d rows", len(outData), rowLength, rows)
prevRowData := make([]byte, rowLength)
for i := 0; i < rowLength; i++ {
prevRowData[i] = 0
}
for i := 0; i < rows; i++ {
rowData := outData[rowLength*i : rowLength*(i+1)]
fb := rowData[0]
switch fb {
case 0:
// No prediction. (No operation).
case 1:
// Sub: Predicts the same as the sample to the left.
for j := 2; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+rowData[j-1]) % 256)
}
case 2:
// Up: Predicts the same as the sample above
for j := 1; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256)
}
default:
2017-03-08 16:48:40 +00:00
common.Log.Debug("ERROR: Invalid filter byte (%d) @row %d", fb, i)
return nil, fmt.Errorf("Invalid filter byte (%d)", fb)
}
for i := 0; i < rowLength; i++ {
prevRowData[i] = rowData[i]
}
pOutBuffer.Write(rowData[1:])
}
pOutData := pOutBuffer.Bytes()
return pOutData, nil
} else {
common.Log.Debug("ERROR: Unsupported predictor (%d)", this.Predictor)
return nil, fmt.Errorf("Unsupported predictor (%d)", this.Predictor)
}
}
return outData, nil
}
// Encode a bytes array and return the encoded value based on the encoder parameters.
func (this *FlateEncoder) EncodeBytes(data []byte) ([]byte, error) {
if this.Predictor != 1 && this.Predictor != 11 {
return nil, fmt.Errorf("FlateEncoder Predictor = 1, 11 only supported")
}
if this.Predictor == 11 {
// The length of each output row in number of samples.
// N.B. Each output row has one extra sample as compared to the input to indicate the
// predictor type.
rowLength := int(this.Columns)
rows := len(data) / rowLength
if len(data)%rowLength != 0 {
common.Log.Error("Invalid column length")
return nil, errors.New("Invalid row length")
}
pOutBuffer := bytes.NewBuffer(nil)
tmpData := make([]byte, rowLength)
for i := 0; i < rows; i++ {
rowData := data[rowLength*i : rowLength*(i+1)]
// PNG SUB method.
// Sub: Predicts the same as the sample to the left.
tmpData[0] = rowData[0]
for j := 1; j < rowLength; j++ {
tmpData[j] = byte(int(rowData[j]-rowData[j-1]) % 256)
}
pOutBuffer.WriteByte(1) // sub method
pOutBuffer.Write(tmpData)
}
data = pOutBuffer.Bytes()
}
var b bytes.Buffer
w := zlib.NewWriter(&b)
w.Write(data)
w.Close()
return b.Bytes(), nil
}
// LZW encoding/decoding functionality.
type LZWEncoder struct {
Predictor int
BitsPerComponent int
// For predictors
Columns int
Colors int
// LZW algorithm setting.
EarlyChange int
}
// Make a new LZW encoder with default parameters.
func NewLZWEncoder() *LZWEncoder {
encoder := &LZWEncoder{}
// Default (No prediction)
encoder.Predictor = 1
// Currently only supporting 8.
encoder.BitsPerComponent = 8
encoder.Colors = 1
encoder.Columns = 1
encoder.EarlyChange = 1
return encoder
}
func (this *LZWEncoder) GetFilterName() string {
2017-02-23 15:25:23 +00:00
return StreamEncodingFilterNameLZW
}
func (this *LZWEncoder) MakeDecodeParams() PdfObject {
if this.Predictor > 1 {
decodeParams := PdfObjectDictionary{}
decodeParams["Predictor"] = MakeInteger(int64(this.Predictor))
// Only add if not default option.
if this.BitsPerComponent != 8 {
decodeParams["BitsPerComponent"] = MakeInteger(int64(this.BitsPerComponent))
}
if this.Columns != 1 {
decodeParams["Columns"] = MakeInteger(int64(this.Columns))
}
if this.Colors != 1 {
decodeParams["Colors"] = MakeInteger(int64(this.Colors))
}
return &decodeParams
}
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
// Has the Filter set and the DecodeParms.
func (this *LZWEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
decodeParams := this.MakeDecodeParams()
if decodeParams != nil {
dict["DecodeParms"] = decodeParams
}
dict["EarlyChange"] = MakeInteger(int64(this.EarlyChange))
return &dict
}
// Create a new LZW encoder/decoder from a stream object, getting all the encoding parameters
// from the DecodeParms stream object dictionary entry.
func newLZWEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*LZWEncoder, error) {
// Start with default settings.
encoder := NewLZWEncoder()
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return encoder, nil
}
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := (*encDict)["DecodeParms"]
if obj != nil {
dp, isDict := obj.(*PdfObjectDictionary)
if !isDict {
common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj)
return nil, fmt.Errorf("Invalid DecodeParms")
}
decodeParams = dp
}
}
// The EarlyChange indicates when to increase code length, as different
// implementations use a different mechanisms. Essentially this chooses
// which LZW implementation to use.
// The default is 1 (one code early)
obj, has := (*encDict)["EarlyChange"]
if has {
earlyChange, ok := obj.(*PdfObjectInteger)
if !ok {
common.Log.Debug("Error: EarlyChange specified but not numeric (%T)", obj)
return nil, fmt.Errorf("Invalid EarlyChange")
}
if *earlyChange != 0 && *earlyChange != 1 {
return nil, fmt.Errorf("Invalid EarlyChange value (not 0 or 1)")
}
encoder.EarlyChange = int(*earlyChange)
} else {
encoder.EarlyChange = 1 // default
}
if decodeParams == nil {
// No decode parameters. Can safely return here if not set as the following options
// are related to the decode Params.
return encoder, nil
}
obj, has = (*decodeParams)["Predictor"]
if has {
predictor, ok := obj.(*PdfObjectInteger)
if !ok {
common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj)
return nil, fmt.Errorf("Invalid Predictor")
}
encoder.Predictor = int(*predictor)
}
// Bits per component. Use default if not specified (8).
obj, has = (*decodeParams)["BitsPerComponent"]
if has {
bpc, ok := obj.(*PdfObjectInteger)
if !ok {
common.Log.Debug("ERROR: Invalid BitsPerComponent")
return nil, fmt.Errorf("Invalid BitsPerComponent")
}
encoder.BitsPerComponent = int(*bpc)
}
if encoder.Predictor > 1 {
// Columns.
encoder.Columns = 1
obj, has = (*decodeParams)["Columns"]
if has {
columns, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("Predictor column invalid")
}
encoder.Columns = int(*columns)
}
// Colors.
// Number of interleaved color components per sample (Default 1 if not specified)
encoder.Colors = 1
obj, has = (*decodeParams)["Colors"]
if has {
colors, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, fmt.Errorf("Predictor colors not an integer")
}
encoder.Colors = int(*colors)
}
}
common.Log.Trace("decode params: %s", decodeParams.String())
return encoder, nil
}
func (this *LZWEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
var outBuf bytes.Buffer
bufReader := bytes.NewReader(encoded)
var r io.ReadCloser
if this.EarlyChange == 1 {
// LZW implementation with code length increases one code early (1).
r = lzw1.NewReader(bufReader, lzw1.MSB, 8)
} else {
// 0: LZW implementation with postponed code length increases (0).
r = lzw0.NewReader(bufReader, lzw0.MSB, 8)
}
defer r.Close()
_, err := outBuf.ReadFrom(r)
if err != nil {
return nil, err
}
return outBuf.Bytes(), nil
}
func (this *LZWEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
// Revamp this support to handle TIFF predictor (2).
// Also handle more filter bytes and check
// BitsPerComponent. Default value is 8, currently we are only
// supporting that one.
common.Log.Trace("LZW Decoding")
common.Log.Trace("Predictor: %d", this.Predictor)
outData, err := this.DecodeBytes(streamObj.Stream)
if err != nil {
return nil, err
}
common.Log.Trace(" IN: (%d) % x", len(streamObj.Stream), streamObj.Stream)
common.Log.Trace("OUT: (%d) % x", len(outData), outData)
if this.Predictor > 1 {
if this.Predictor == 2 { // TIFF encoding: Needs some tests.
common.Log.Trace("Tiff encoding")
rowLength := int(this.Columns) * this.Colors
rows := len(outData) / rowLength
if len(outData)%rowLength != 0 {
common.Log.Debug("ERROR: TIFF encoding: Invalid row length...")
return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength)
}
if rowLength%this.Colors != 0 {
return nil, fmt.Errorf("Invalid row length (%d) for colors %d", rowLength, this.Colors)
}
common.Log.Trace("inp outData (%d): % x", len(outData), outData)
pOutBuffer := bytes.NewBuffer(nil)
// 0-255 -255 255 ; 0-255=-255;
for i := 0; i < rows; i++ {
rowData := outData[rowLength*i : rowLength*(i+1)]
// Predicts the same as the sample to the left.
// Interleaved by colors.
for j := this.Colors; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+rowData[j-this.Colors]) % 256)
}
// GH: Appears that this is not working as expected...
pOutBuffer.Write(rowData)
}
pOutData := pOutBuffer.Bytes()
common.Log.Trace("POutData (%d): % x", len(pOutData), pOutData)
return pOutData, nil
} else if this.Predictor >= 10 && this.Predictor <= 15 {
common.Log.Trace("PNG Encoding")
2017-03-08 16:56:29 +00:00
// Columns represents the number of samples per row; Each sample can contain multiple color
// components.
rowLength := int(this.Columns*this.Colors + 1) // 1 byte to specify predictor algorithms per row.
rows := len(outData) / rowLength
if len(outData)%rowLength != 0 {
return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength)
}
pOutBuffer := bytes.NewBuffer(nil)
common.Log.Trace("Predictor columns: %d", this.Columns)
common.Log.Trace("Length: %d / %d = %d rows", len(outData), rowLength, rows)
prevRowData := make([]byte, rowLength)
for i := 0; i < rowLength; i++ {
prevRowData[i] = 0
}
for i := 0; i < rows; i++ {
rowData := outData[rowLength*i : rowLength*(i+1)]
fb := rowData[0]
switch fb {
case 0:
// No prediction. (No operation).
case 1:
// Sub: Predicts the same as the sample to the left.
for j := 2; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+rowData[j-1]) % 256)
}
case 2:
// Up: Predicts the same as the sample above
for j := 1; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256)
}
default:
common.Log.Debug("ERROR: Invalid filter byte (%d)", fb)
return nil, fmt.Errorf("Invalid filter byte (%d)", fb)
}
for i := 0; i < rowLength; i++ {
prevRowData[i] = rowData[i]
}
pOutBuffer.Write(rowData[1:])
}
pOutData := pOutBuffer.Bytes()
return pOutData, nil
} else {
common.Log.Debug("ERROR: Unsupported predictor (%d)", this.Predictor)
return nil, fmt.Errorf("Unsupported predictor (%d)", this.Predictor)
}
}
return outData, nil
}
// Support for encoding LZW. Currently not supporting predictors (raw compressed data only).
// Only supports the Early change = 1 algorithm (compress/lzw) as the other implementation
// does not have a write method.
// TODO: Consider refactoring compress/lzw to allow both.
func (this *LZWEncoder) EncodeBytes(data []byte) ([]byte, error) {
if this.Predictor != 1 {
return nil, fmt.Errorf("LZW Predictor = 1 only supported yet")
}
if this.EarlyChange == 1 {
return nil, fmt.Errorf("LZW Early Change = 0 only supported yet")
}
var b bytes.Buffer
w := lzw0.NewWriter(&b, lzw0.MSB, 8)
w.Write(data)
w.Close()
return b.Bytes(), nil
}
//
// DCT (JPG) encoding/decoding functionality for images.
type DCTEncoder struct {
ColorComponents int // 1 (gray), 3 (rgb), 4 (cmyk)
BitsPerComponent int // 8 or 16 bit
Width int
Height int
Quality int
}
// Make a new DCT encoder with default parameters.
func NewDCTEncoder() *DCTEncoder {
encoder := &DCTEncoder{}
encoder.ColorComponents = 3
encoder.BitsPerComponent = 8
encoder.Quality = 75
return encoder
}
func (this *DCTEncoder) GetFilterName() string {
return StreamEncodingFilterNameDCT
}
func (this *DCTEncoder) MakeDecodeParams() PdfObject {
// Does not have decode params.
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
// Has the Filter set. Some other parameters are generated elsewhere.
func (this *DCTEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
return &dict
}
// Create a new DCT encoder/decoder from a stream object, getting all the encoding parameters
// from the stream object dictionary entry and the image data itself.
2017-03-05 22:42:30 +00:00
// TODO: Support if used with other filters [ASCII85Decode FlateDecode DCTDecode]...
// need to apply the other filters prior to this one...
func newDCTEncoderFromStream(streamObj *PdfObjectStream, multiEnc *MultiEncoder) (*DCTEncoder, error) {
// Start with default settings.
encoder := NewDCTEncoder()
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return encoder, nil
}
2017-03-05 22:42:30 +00:00
// If using DCTDecode in combination with other filters, make sure to decode that first...
encoded := streamObj.Stream
if multiEnc != nil {
e, err := multiEnc.DecodeBytes(encoded)
if err != nil {
return nil, err
}
encoded = e
}
bufReader := bytes.NewReader(encoded)
cfg, err := jpeg.DecodeConfig(bufReader)
//img, _, err := goimage.Decode(bufReader)
if err != nil {
common.Log.Debug("Error decoding file: %s", err)
return nil, err
}
switch cfg.ColorModel {
case gocolor.RGBAModel:
encoder.BitsPerComponent = 8
encoder.ColorComponents = 3 // alpha is not included in pdf.
case gocolor.RGBA64Model:
encoder.BitsPerComponent = 16
encoder.ColorComponents = 3
case gocolor.GrayModel:
encoder.BitsPerComponent = 8
encoder.ColorComponents = 1
case gocolor.Gray16Model:
encoder.BitsPerComponent = 16
encoder.ColorComponents = 1
case gocolor.CMYKModel:
encoder.BitsPerComponent = 8
encoder.ColorComponents = 4
case gocolor.YCbCrModel:
// YCbCr is not supported by PDF, but it could be a different colorspace
// with 3 components. Would be specified by the ColorSpace entry.
encoder.BitsPerComponent = 8
encoder.ColorComponents = 3
default:
return nil, errors.New("Unsupported color model")
}
encoder.Width = cfg.Width
encoder.Height = cfg.Height
common.Log.Trace("DCT Encoder: %+v", encoder)
encoder.Quality = 75
return encoder, nil
}
func (this *DCTEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
bufReader := bytes.NewReader(encoded)
2017-03-05 22:42:30 +00:00
//img, _, err := goimage.Decode(bufReader)
img, err := jpeg.Decode(bufReader)
if err != nil {
common.Log.Debug("Error decoding image: %s", err)
return nil, err
}
bounds := img.Bounds()
var decoded = make([]byte, bounds.Dx()*bounds.Dy()*this.ColorComponents*this.BitsPerComponent/8)
index := 0
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
for i := bounds.Min.X; i < bounds.Max.X; i++ {
color := img.At(i, j)
// Gray scale.
if this.ColorComponents == 1 {
if this.BitsPerComponent == 16 {
// Gray - 16 bit.
val, ok := color.(gocolor.Gray16)
if !ok {
return nil, errors.New("Color type error")
}
decoded[index] = byte((val.Y >> 8) & 0xff)
index++
decoded[index] = byte(val.Y & 0xff)
index++
} else {
// Gray - 8 bit.
val, ok := color.(gocolor.Gray)
if !ok {
return nil, errors.New("Color type error")
}
decoded[index] = byte(val.Y & 0xff)
index++
}
} else if this.ColorComponents == 3 {
if this.BitsPerComponent == 16 {
val, ok := color.(gocolor.RGBA64)
if !ok {
return nil, errors.New("Color type error")
}
decoded[index] = byte((val.R >> 8) & 0xff)
index++
decoded[index] = byte(val.R & 0xff)
index++
decoded[index] = byte((val.G >> 8) & 0xff)
index++
decoded[index] = byte(val.G & 0xff)
index++
decoded[index] = byte((val.B >> 8) & 0xff)
index++
decoded[index] = byte(val.B & 0xff)
index++
} else {
// RGB - 8 bit.
val, isRGB := color.(gocolor.RGBA)
if isRGB {
decoded[index] = val.R & 0xff
index++
decoded[index] = val.G & 0xff
index++
decoded[index] = val.B & 0xff
index++
} else {
// Hack around YCbCr from go jpeg package.
val, ok := color.(gocolor.YCbCr)
if !ok {
return nil, errors.New("Color type error")
}
r, g, b, _ := val.RGBA()
// The fact that we cannot use the Y, Cb, Cr values directly,
// indicates that either the jpeg package is converting the raw
// data into YCbCr with some kind of mapping, or that the original
// data is not in R,G,B...
// XXX: This is not good as it means we end up with R, G, B... even
// if the original colormap was different. Unless calling the RGBA()
// call exactly reverses the previous conversion to YCbCr (even if
// real data is not rgb)... ?
// TODO: Test more. Consider whether we need to implement our own jpeg filter.
decoded[index] = byte(r >> 8) //byte(val.Y & 0xff)
index++
decoded[index] = byte(g >> 8) //val.Cb & 0xff)
index++
decoded[index] = byte(b >> 8) //val.Cr & 0xff)
index++
}
}
} else if this.ColorComponents == 4 {
// CMYK - 8 bit.
val, ok := color.(gocolor.CMYK)
if !ok {
return nil, errors.New("Color type error")
}
2017-03-05 22:42:30 +00:00
// TODO: Is the inversion not handled right in the JPEG package for APP14?
// Should not need to invert here...
decoded[index] = 255 - val.C&0xff
index++
2017-03-05 22:42:30 +00:00
decoded[index] = 255 - val.M&0xff
index++
2017-03-05 22:42:30 +00:00
decoded[index] = 255 - val.Y&0xff
index++
2017-03-05 22:42:30 +00:00
decoded[index] = 255 - val.K&0xff
index++
}
}
}
return decoded, nil
}
func (this *DCTEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return this.DecodeBytes(streamObj.Stream)
}
type DrawableImage interface {
ColorModel() gocolor.Model
Bounds() goimage.Rectangle
At(x, y int) gocolor.Color
Set(x, y int, c gocolor.Color)
}
func (this *DCTEncoder) EncodeBytes(data []byte) ([]byte, error) {
bounds := goimage.Rect(0, 0, this.Width, this.Height)
var img DrawableImage
if this.ColorComponents == 1 {
if this.BitsPerComponent == 16 {
img = goimage.NewGray16(bounds)
} else {
img = goimage.NewGray(bounds)
}
} else if this.ColorComponents == 3 {
if this.BitsPerComponent == 16 {
img = goimage.NewRGBA64(bounds)
} else {
img = goimage.NewRGBA(bounds)
}
} else if this.ColorComponents == 4 {
img = goimage.NewCMYK(bounds)
} else {
return nil, errors.New("Unsupported")
}
// Draw the data on the image..
x := 0
y := 0
bytesPerColor := this.ColorComponents * this.BitsPerComponent / 8
for i := 0; i+bytesPerColor-1 < len(data); i += bytesPerColor {
var c gocolor.Color
if this.ColorComponents == 1 {
if this.BitsPerComponent == 16 {
val := uint16(data[i])<<8 | uint16(data[i+1])
c = gocolor.Gray16{val}
} else {
val := uint8(data[i] & 0xff)
c = gocolor.Gray{val}
}
} else if this.ColorComponents == 3 {
if this.BitsPerComponent == 16 {
r := uint16(data[i])<<8 | uint16(data[i+1])
g := uint16(data[i+2])<<8 | uint16(data[i+3])
b := uint16(data[i+4])<<8 | uint16(data[i+5])
c = gocolor.RGBA64{R: r, G: g, B: b, A: 0}
} else {
r := uint8(data[i] & 0xff)
g := uint8(data[i+1] & 0xff)
b := uint8(data[i+2] & 0xff)
c = gocolor.RGBA{R: r, G: g, B: b, A: 0}
}
} else if this.ColorComponents == 4 {
c1 := uint8(data[i] & 0xff)
m1 := uint8(data[i+1] & 0xff)
y1 := uint8(data[i+2] & 0xff)
k1 := uint8(data[i+3] & 0xff)
c = gocolor.CMYK{C: c1, M: m1, Y: y1, K: k1}
}
img.Set(x, y, c)
x++
if x == this.Width {
x = 0
y++
}
}
// The quality is specified from 0-100 (with 100 being the best quality) in the DCT structure.
// N.B. even 100 is lossy, as still is transformed, but as good as it gets for DCT.
// This is not related to the DPI, but rather inherent transformation losses.
opt := jpeg.Options{}
opt.Quality = this.Quality
var buf bytes.Buffer
err := jpeg.Encode(&buf, img, &opt)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
/////
// ASCII hex encoder/decoder.
type ASCIIHexEncoder struct {
}
2017-02-23 15:25:23 +00:00
// Make a new ASCII hex encoder.
func NewASCIIHexEncoder() *ASCIIHexEncoder {
encoder := &ASCIIHexEncoder{}
return encoder
}
func (this *ASCIIHexEncoder) GetFilterName() string {
2017-02-23 15:25:23 +00:00
return StreamEncodingFilterNameASCIIHex
}
func (this *ASCIIHexEncoder) MakeDecodeParams() PdfObject {
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
func (this *ASCIIHexEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
return &dict
}
func (this *ASCIIHexEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
bufReader := bytes.NewReader(encoded)
inb := []byte{}
for {
b, err := bufReader.ReadByte()
if err != nil {
return nil, err
}
if b == '>' {
break
}
if IsWhiteSpace(b) {
continue
}
if (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F') || (b >= '0' && b <= '9') {
inb = append(inb, b)
} else {
common.Log.Debug("ERROR: Invalid ascii hex character (%c)", b)
return nil, fmt.Errorf("Invalid ascii hex character (%c)", b)
}
}
if len(inb)%2 == 1 {
inb = append(inb, '0')
}
common.Log.Trace("Inbound %s", inb)
outb := make([]byte, hex.DecodedLen(len(inb)))
_, err := hex.Decode(outb, inb)
if err != nil {
return nil, err
}
return outb, nil
}
// ASCII hex decoding.
func (this *ASCIIHexEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return this.DecodeBytes(streamObj.Stream)
}
func (this *ASCIIHexEncoder) EncodeBytes(data []byte) ([]byte, error) {
var encoded bytes.Buffer
for _, b := range data {
encoded.WriteString(fmt.Sprintf("%.2X ", b))
}
encoded.WriteByte('>')
return encoded.Bytes(), nil
}
2017-02-23 15:25:23 +00:00
//
// ASCII85 encoder/decoder.
//
type ASCII85Encoder struct {
}
// Make a new ASCII85 encoder.
func NewASCII85Encoder() *ASCII85Encoder {
encoder := &ASCII85Encoder{}
return encoder
}
func (this *ASCII85Encoder) GetFilterName() string {
return StreamEncodingFilterNameASCII85
}
func (this *ASCII85Encoder) MakeDecodeParams() PdfObject {
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
func (this *ASCII85Encoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
return &dict
}
// 5 ASCII characters -> 4 raw binary bytes
func (this *ASCII85Encoder) DecodeBytes(encoded []byte) ([]byte, error) {
decoded := []byte{}
2017-03-02 12:49:25 +00:00
common.Log.Trace("ASCII85 Decode")
2017-02-23 15:25:23 +00:00
i := 0
eod := false
for i < len(encoded) && !eod {
codes := [5]byte{0, 0, 0, 0, 0}
spaces := 0 // offset due to whitespace.
j := 0
toWrite := 4
for j < 5+spaces {
if i+j == len(encoded) {
break
}
code := encoded[i+j]
if IsWhiteSpace(code) {
// Skip whitespace.
spaces++
j++
continue
} else if code == '~' && i+j+1 < len(encoded) && encoded[i+j+1] == '>' {
toWrite = (j - spaces) - 1
if toWrite < 0 {
toWrite = 0
}
// EOD marker. Marks end of data.
eod = true
break
} else if code >= '!' && code <= 'u' {
// Valid code.
code -= '!'
} else if code == 'z' && j-spaces == 0 {
// 'z' in beginning of the byte sequence means that all 5 codes are 0.
// Already all 0 initialized, so can break here.
toWrite = 4
j++
break
} else {
common.Log.Error("Failed decoding, invalid code")
return nil, errors.New("Invalid code encountered")
}
codes[j-spaces] = code
j++
}
i += j
// Pad with 'u' 84 (unused ones)
// Takes care of issues at ends for input data that is not a multiple of 4-bytes.
for m := toWrite + 1; m < 5; m++ {
codes[m] = 84
}
// Convert to a uint32 value.
value := uint32(codes[0])*85*85*85*85 + uint32(codes[1])*85*85*85 + uint32(codes[2])*85*85 + uint32(codes[3])*85 + uint32(codes[4])
// Convert to 4 bytes.
decodedBytes := []byte{
byte((value >> 24) & 0xff),
byte((value >> 16) & 0xff),
byte((value >> 8) & 0xff),
byte(value & 0xff)}
// This accounts for the end of data, where the original data length is not a multiple of 4.
// In that case, 0 bytes are assumed but only
decoded = append(decoded, decodedBytes[:toWrite]...)
}
2017-03-02 12:49:25 +00:00
common.Log.Trace("ASCII85, encoded: % X", encoded)
common.Log.Trace("ASCII85, decoded: % X", decoded)
2017-02-23 15:25:23 +00:00
return decoded, nil
}
// ASCII85 stream decoding.
func (this *ASCII85Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return this.DecodeBytes(streamObj.Stream)
}
// Convert a base 256 number to a series of base 85 values (5 codes).
// 85^5 = 4437053125 > 256^4 = 4294967296
// So 5 base-85 numbers will always be enough to cover 4 base-256 numbers.
// The base 256 value is already converted to an uint32 value.
func (this *ASCII85Encoder) base256Tobase85(base256val uint32) [5]byte {
base85 := [5]byte{0, 0, 0, 0, 0}
remainder := base256val
for i := 0; i < 5; i++ {
divider := uint32(1)
for j := 0; j < 4-i; j++ {
divider *= 85
}
val := remainder / divider
remainder = remainder % divider
base85[i] = byte(val)
}
return base85
}
// Encode data into ASCII85 encoded format.
func (this *ASCII85Encoder) EncodeBytes(data []byte) ([]byte, error) {
var encoded bytes.Buffer
for i := 0; i < len(data); i += 4 {
b1 := data[i]
n := 1
b2 := byte(0)
if i+1 < len(data) {
b2 = data[i+1]
n++
}
b3 := byte(0)
if i+2 < len(data) {
b3 = data[i+2]
n++
}
b4 := byte(0)
if i+3 < len(data) {
b4 = data[i+3]
n++
}
// Convert to a uint32 number.
base256 := (uint32(b1) << 24) | (uint32(b2) << 16) | (uint32(b3) << 8) | uint32(b4)
if base256 == 0 {
encoded.WriteByte('z')
} else {
base85vals := this.base256Tobase85(base256)
for _, val := range base85vals[:n+1] {
encoded.WriteByte(val + '!')
}
}
}
// EOD.
encoded.WriteString("~>")
return encoded.Bytes(), nil
}
//
// Raw encoder/decoder (no encoding, pass through)
//
type RawEncoder struct{}
func NewRawEncoder() *RawEncoder {
return &RawEncoder{}
}
func (this *RawEncoder) GetFilterName() string {
return "Raw (no encoding)"
}
func (this *RawEncoder) MakeDecodeParams() PdfObject {
return nil
}
// Make a new instance of an encoding dictionary for a stream object.
func (this *RawEncoder) MakeStreamDict() *PdfObjectDictionary {
return &PdfObjectDictionary{}
}
func (this *RawEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
return encoded, nil
}
func (this *RawEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return streamObj.Stream, nil
}
func (this *RawEncoder) EncodeBytes(data []byte) ([]byte, error) {
return data, nil
}
//
// Multi encoder: support serial encoding.
//
type MultiEncoder struct {
// Encoders in the order that they are to be applied.
encoders []StreamEncoder
}
func NewMultiEncoder() *MultiEncoder {
encoder := MultiEncoder{}
encoder.encoders = []StreamEncoder{}
return &encoder
}
func newMultiEncoderFromStream(streamObj *PdfObjectStream) (*MultiEncoder, error) {
mencoder := NewMultiEncoder()
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return mencoder, nil
}
// Prepare the decode params array (one for each filter type)
// Optional, not always present.
var decodeParamsDict *PdfObjectDictionary
decodeParamsArray := []PdfObject{}
obj := (*encDict)["DecodeParms"]
if obj != nil {
// If it is a dictionary, assume it applies to all
dict, isDict := obj.(*PdfObjectDictionary)
if isDict {
decodeParamsDict = dict
}
// If it is an array, assume there is one for each
arr, isArray := obj.(*PdfObjectArray)
if isArray {
for _, dictObj := range *arr {
if dict, is := dictObj.(*PdfObjectDictionary); is {
decodeParamsArray = append(decodeParamsArray, dict)
} else {
decodeParamsArray = append(decodeParamsArray, nil)
}
}
}
}
obj = (*encDict)["Filter"]
if obj == nil {
return nil, fmt.Errorf("Filter missing")
}
array, ok := obj.(*PdfObjectArray)
if !ok {
return nil, fmt.Errorf("Multi filter can only be made from array")
}
for idx, obj := range *array {
name, ok := obj.(*PdfObjectName)
if !ok {
return nil, fmt.Errorf("Multi filter array element not a name")
}
var dp PdfObject
// If decode params dict is set, use it. Otherwise take from array..
if decodeParamsDict != nil {
dp = decodeParamsDict
} else {
// Only get the dp if provided. Oftentimes there is no decode params dict
// provided.
if len(decodeParamsArray) > 0 {
if idx >= len(decodeParamsArray) {
return nil, fmt.Errorf("Missing elements in decode params array")
}
dp = decodeParamsArray[idx]
}
}
var dParams *PdfObjectDictionary
if dict, is := dp.(*PdfObjectDictionary); is {
dParams = dict
}
2017-03-05 22:42:30 +00:00
common.Log.Trace("Next name: %s", *name)
if *name == StreamEncodingFilterNameFlate {
// XXX: need to separate out the DecodeParms..
encoder, err := newFlateEncoderFromStream(streamObj, dParams)
if err != nil {
return nil, err
}
mencoder.AddEncoder(encoder)
} else if *name == StreamEncodingFilterNameLZW {
encoder, err := newLZWEncoderFromStream(streamObj, dParams)
if err != nil {
return nil, err
}
mencoder.AddEncoder(encoder)
} else if *name == StreamEncodingFilterNameASCIIHex {
encoder := NewASCIIHexEncoder()
mencoder.AddEncoder(encoder)
2017-02-23 15:25:23 +00:00
} else if *name == StreamEncodingFilterNameASCII85 {
encoder := NewASCII85Encoder()
mencoder.AddEncoder(encoder)
2017-03-05 22:42:30 +00:00
} else if *name == StreamEncodingFilterNameDCT {
encoder, err := newDCTEncoderFromStream(streamObj, mencoder)
if err != nil {
return nil, err
}
mencoder.AddEncoder(encoder)
common.Log.Trace("Added DCT encoder...")
common.Log.Trace("Multi encoder: %#v", mencoder)
} else {
common.Log.Error("Unsupported filter %s", *name)
return nil, fmt.Errorf("Invalid filter in multi filter array")
}
}
return mencoder, nil
}
func (this *MultiEncoder) GetFilterName() string {
name := ""
for idx, encoder := range this.encoders {
name += encoder.GetFilterName()
if idx < len(this.encoders)-1 {
name += " "
}
}
return name
}
func (this *MultiEncoder) MakeDecodeParams() PdfObject {
if len(this.encoders) == 0 {
return nil
}
if len(this.encoders) == 1 {
return this.encoders[0].MakeDecodeParams()
}
array := PdfObjectArray{}
for _, encoder := range this.encoders {
decodeParams := encoder.MakeDecodeParams()
if decodeParams == nil {
array = append(array, MakeNull())
} else {
array = append(array, decodeParams)
}
}
return &array
}
func (this *MultiEncoder) AddEncoder(encoder StreamEncoder) {
this.encoders = append(this.encoders, encoder)
}
func (this *MultiEncoder) MakeStreamDict() *PdfObjectDictionary {
dict := PdfObjectDictionary{}
dict["Filter"] = MakeName(this.GetFilterName())
// Pass all values from children, except Filter and DecodeParms.
for _, encoder := range this.encoders {
encDict := encoder.MakeStreamDict()
for key, val := range *encDict {
if key != "Filter" && key != "DecodeParms" {
dict[key] = val
}
}
}
// Make the decode params array or dict.
decodeParams := this.MakeDecodeParams()
if decodeParams != nil {
dict["DecodeParms"] = decodeParams
}
return &dict
}
func (this *MultiEncoder) DecodeBytes(encoded []byte) ([]byte, error) {
decoded := encoded
var err error
2017-03-02 12:49:25 +00:00
// Apply in forward order.
for _, encoder := range this.encoders {
common.Log.Trace("Multi Encoder Decode: Applying Filter: %v %T", encoder, encoder)
decoded, err = encoder.DecodeBytes(decoded)
if err != nil {
return nil, err
}
}
return decoded, nil
}
func (this *MultiEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return this.DecodeBytes(streamObj.Stream)
}
func (this *MultiEncoder) EncodeBytes(data []byte) ([]byte, error) {
encoded := data
var err error
// Apply in inverse order.
2017-03-02 12:49:25 +00:00
for i := len(this.encoders) - 1; i >= 0; i-- {
encoder := this.encoders[i]
encoded, err = encoder.EncodeBytes(encoded)
if err != nil {
return nil, err
}
}
return encoded, nil
}