Merge branch 'up_v2_dev' of https://github.com/peterwilliams97/unidoc into up_v2_dev

This commit is contained in:
Peter Williams 2017-07-03 15:11:24 +10:00
commit cc549c6662
19 changed files with 852 additions and 604 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*.gox
.idea
*.mdb
*.userprefs

View File

@ -33,11 +33,8 @@ type ContentStreamInlineImage struct {
// Make a new content stream inline image object from an image.
func NewInlineImageFromImage(img Image, encoder StreamEncoder) (*ContentStreamInlineImage, error) {
filterName := ""
if encoder == nil {
encoder = NewRawEncoder()
} else {
filterName = encoder.GetFilterName()
}
common.Log.Debug("NewInlineImageFromImage: encoder=%T", encoder)
@ -62,7 +59,9 @@ func NewInlineImageFromImage(img Image, encoder StreamEncoder) (*ContentStreamIn
}
inlineImage.stream = encoded
if len(filterName) > 0 {
filterName := encoder.GetFilterName()
if filterName != StreamEncodingFilterNameRaw {
inlineImage.Filter = MakeName(filterName)
}
// XXX/FIXME: Add decode params?

View File

@ -127,7 +127,7 @@ func (csp *ContentStreamProcessor) getColorspace(name string, resources *PdfPage
}
// Otherwise unsupported.
common.Log.Debug("Unknown colorspace requested: %s", name)
common.Log.Error("Unknown colorspace requested: %s", name)
return nil, errors.New("Unsupported colorspace")
}
@ -140,6 +140,10 @@ func (csp *ContentStreamProcessor) getInitialColor(cs PdfColorspace) (PdfColor,
return NewPdfColorDeviceRGB(0.0, 0.0, 0.0), nil
case *PdfColorspaceDeviceCMYK:
return NewPdfColorDeviceCMYK(0.0, 0.0, 0.0, 1.0), nil
case *PdfColorspaceCalGray:
return NewPdfColorCalGray(0.0), nil
case *PdfColorspaceCalRGB:
return NewPdfColorCalRGB(0.0, 0.0, 0.0), nil
case *PdfColorspaceLab:
l := 0.0
a := 0.0
@ -190,7 +194,7 @@ func (csp *ContentStreamProcessor) getInitialColor(cs PdfColorspace) (PdfColor,
return nil, nil
}
common.Log.Debug("Unable to determine initial color for unknown colorspace: %T", cs)
common.Log.Error("Unable to determine initial color for unknown colorspace: %T", cs)
return nil, errors.New("Unsupported colorspace")
}
@ -364,8 +368,8 @@ func (this *ContentStreamProcessor) handleCommand_SC(op *ContentStreamOperation,
cs := this.graphicsState.ColorspaceStroking
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
@ -389,8 +393,8 @@ func (this *ContentStreamProcessor) handleCommand_SCN(op *ContentStreamOperation
if !isPatternCS(cs) {
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
}
@ -411,8 +415,8 @@ func (this *ContentStreamProcessor) handleCommand_sc(op *ContentStreamOperation,
if !isPatternCS(cs) {
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
}
@ -433,8 +437,8 @@ func (this *ContentStreamProcessor) handleCommand_scn(op *ContentStreamOperation
if !isPatternCS(cs) {
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
}
@ -454,8 +458,8 @@ func (this *ContentStreamProcessor) handleCommand_scn(op *ContentStreamOperation
func (this *ContentStreamProcessor) handleCommand_G(op *ContentStreamOperation, resources *PdfPageResources) error {
cs := NewPdfColorspaceDeviceGray()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
@ -475,8 +479,8 @@ func (this *ContentStreamProcessor) handleCommand_G(op *ContentStreamOperation,
func (this *ContentStreamProcessor) handleCommand_g(op *ContentStreamOperation, resources *PdfPageResources) error {
cs := NewPdfColorspaceDeviceGray()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
@ -496,8 +500,8 @@ func (this *ContentStreamProcessor) handleCommand_g(op *ContentStreamOperation,
func (this *ContentStreamProcessor) handleCommand_RG(op *ContentStreamOperation, resources *PdfPageResources) error {
cs := NewPdfColorspaceDeviceRGB()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
@ -516,8 +520,8 @@ func (this *ContentStreamProcessor) handleCommand_RG(op *ContentStreamOperation,
func (this *ContentStreamProcessor) handleCommand_rg(op *ContentStreamOperation, resources *PdfPageResources) error {
cs := NewPdfColorspaceDeviceRGB()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
@ -537,8 +541,8 @@ func (this *ContentStreamProcessor) handleCommand_rg(op *ContentStreamOperation,
func (this *ContentStreamProcessor) handleCommand_K(op *ContentStreamOperation, resources *PdfPageResources) error {
cs := NewPdfColorspaceDeviceCMYK()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}
@ -557,8 +561,8 @@ func (this *ContentStreamProcessor) handleCommand_K(op *ContentStreamOperation,
func (this *ContentStreamProcessor) handleCommand_k(op *ContentStreamOperation, resources *PdfPageResources) error {
cs := NewPdfColorspaceDeviceCMYK()
if len(op.Params) != cs.GetNumComponents() {
common.Log.Debug("Invalid number of parameters for SC")
common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
common.Log.Error("Invalid number of parameters for SC")
common.Log.Error("Number %d not matching colorspace %T", len(op.Params), cs)
return errors.New("Invalid number of parameters")
}

12
pdf/core/const.go Normal file
View File

@ -0,0 +1,12 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package core
import "errors"
var (
ErrUnsupportedEncodingParameters = errors.New("Unsupported encoding parameters")
)

View File

@ -519,6 +519,7 @@ func (this *PdfCrypt) decryptBytes(buf []byte, filter string, okey []byte) ([]by
common.Log.Debug("ERROR AES invalid buf %s", buf)
return buf, fmt.Errorf("AES: Buf len < 16 (%d)", len(buf))
}
iv := buf[:16]
buf = buf[16:]
@ -534,8 +535,20 @@ func (this *PdfCrypt) decryptBytes(buf []byte, filter string, okey []byte) ([]by
common.Log.Trace("chop AES Decrypt (%d): % x", len(buf), buf)
mode.CryptBlocks(buf, buf)
common.Log.Trace("to (%d): % x", len(buf), buf)
//copy(buf[0:], buf[16:])
//common.Log.Debug("chop to (%d): % x", len(buf), buf)
if len(buf) == 0 {
common.Log.Trace("Empty buf, returning empty string")
return buf, nil
}
// The padded length is indicated by the last values. Remove those.
padLen := int(buf[len(buf)-1])
if padLen >= len(buf) {
common.Log.Debug("Illegal pad length")
return buf, fmt.Errorf("Invalid pad length")
}
buf = buf[:len(buf)-padLen]
return buf, nil
}
return nil, fmt.Errorf("Unsupported crypt filter method (%s)", cfMethod)

View File

@ -36,6 +36,11 @@ const (
StreamEncodingFilterNameDCT = "DCTDecode"
StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode"
StreamEncodingFilterNameASCII85 = "ASCII85Decode"
StreamEncodingFilterNameRaw = "Raw"
)
const (
DefaultJPEGQuality = 75
)
type StreamEncoder interface {
@ -314,6 +319,41 @@ func (this *FlateEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, erro
for j := 1; j < rowLength; j++ {
rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256)
}
case 3:
// Avg: Predicts the same as the average of the sample to the left and above.
for j := 1; j < rowLength; j++ {
if j == 1 {
rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256)
} else {
avg := (rowData[j-1] + prevRowData[j]) / 2
rowData[j] = byte(int(rowData[j]+avg) % 256)
}
}
case 4:
// Paeth: a nonlinear function of the sample above, the sample to the left and the sample
// to the upper left.
for j := 2; j < rowLength; j++ {
a := rowData[j-1] // left
b := prevRowData[j] // above
c := prevRowData[j-1] // upper left
p := int(a + b - c)
pa := absInt(p - int(a))
pb := absInt(p - int(b))
pc := absInt(p - int(c))
if pa <= pb && pa <= pc {
// Use a (left).
rowData[j] = byte(int(rowData[j]+a) % 256)
} else if pb <= pc {
// Use b (upper).
rowData[j] = byte(int(rowData[j]+b) % 256)
} else {
// Use c (upper left).
rowData[j] = byte(int(rowData[j]+c) % 256)
}
}
default:
common.Log.Debug("ERROR: Invalid filter byte (%d) @row %d", fb, i)
return nil, fmt.Errorf("Invalid filter byte (%d)", fb)
@ -337,9 +377,10 @@ func (this *FlateEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, erro
// Encode a bytes array and return the encoded value based on the encoder parameters.
func (this *FlateEncoder) EncodeBytes(data []byte) ([]byte, error) {
if this.Predictor != 1 && !(11 <= this.Predictor && this.Predictor <= 15) {
common.Log.Error("FlateEncoder: Predictor=%d. Only 1, 11-15 supported", this.Predictor)
return nil, fmt.Errorf("FlateEncoder Predictor = 1, 11-15 only supported")
if this.Predictor != 1 && !(11 <= this.Predictor && this.Predictor <= 11) {
common.Log.Debug("Encoding error: FlateEncoder Predictor=%d. Only 1, 11 supported",
this.Predictor)
return nil, ErrUnsupportedEncodingParameters
}
if 11 <= this.Predictor && this.Predictor <= 15 {
@ -721,7 +762,7 @@ func NewDCTEncoder() *DCTEncoder {
encoder.ColorComponents = 3
encoder.BitsPerComponent = 8
encoder.Quality = 75
encoder.Quality = DefaultJPEGQuality
return encoder
}
@ -806,7 +847,7 @@ func newDCTEncoderFromStream(streamObj *PdfObjectStream, multiEnc *MultiEncoder)
encoder.Width = cfg.Width
encoder.Height = cfg.Height
common.Log.Trace("DCT Encoder: %+v", encoder)
encoder.Quality = 75
encoder.Quality = DefaultJPEGQuality
return encoder, nil
}
@ -1273,7 +1314,7 @@ func NewRawEncoder() *RawEncoder {
}
func (this *RawEncoder) GetFilterName() string {
return "Raw (no encoding)"
return StreamEncodingFilterNameRaw
}
func (this *RawEncoder) MakeDecodeParams() PdfObject {

View File

@ -647,7 +647,13 @@ func (this *PdfParser) parsePdfVersion() (int, int, error) {
result1 := rePdfVersion.FindStringSubmatch(string(b))
if len(result1) < 3 {
common.Log.Debug("Error: PDF Version not found!")
major, minor, err := this.seekPdfVersionTopDown()
if err == nil {
common.Log.Debug("Failed recovery - unable to find version")
return 0, 0, err
}
return major, minor, nil
return 0, 0, errors.New("PDF version not found")
}
@ -742,6 +748,7 @@ func (this *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
continue
}
if (len(txt) > 6) && (txt[:7] == "trailer") {
common.Log.Trace("Found trailer - %s", txt)
// Sometimes get "trailer << ...."
// Need to rewind to end of trailer text.
if len(txt) > 9 {
@ -1001,13 +1008,64 @@ func (this *PdfParser) parseXref() (*PdfObjectDictionary, error) {
return nil, err
}
} else {
common.Log.Debug("ERROR: Invalid xref.... starting with \"%s\"", string(bb))
return nil, errors.New("Invalid xref format")
common.Log.Debug("Warning: Unable to find xref table or stream. Repair attempted: Looking for earliest xref from bottom.")
err := this.repairSeekXrefMarker()
if err != nil {
common.Log.Debug("Repair failed - %v", err)
return nil, err
}
trailerDict, err = this.parseXrefTable()
if err != nil {
return nil, err
}
}
return trailerDict, err
}
// Look for EOF marker and seek to its beginning.
// Define an offset position from the end of the file.
func (this *PdfParser) seekToEOFMarker(fSize int64) error {
// Define the starting point (from the end of the file) to search from.
var offset int64 = 0
// Define an buffer length in terms of how many bytes to read from the end of the file.
var buflen int64 = 1000
for offset < fSize {
if fSize <= (buflen + offset) {
buflen = fSize - offset
}
// Move back enough (as we need to read forward).
_, err := this.rs.Seek(-offset-buflen, os.SEEK_END)
if err != nil {
return err
}
// Read the data.
b1 := make([]byte, buflen)
this.rs.Read(b1)
common.Log.Trace("Looking for EOF marker: \"%s\"", string(b1))
ind := reEOF.FindAllStringIndex(string(b1), -1)
if ind != nil {
// Found it.
lastInd := ind[len(ind)-1]
common.Log.Trace("Ind: % d", ind)
this.rs.Seek(-offset-buflen+int64(lastInd[0]), os.SEEK_END)
return nil
} else {
common.Log.Debug("Warning: EOF marker not found! - continue seeking")
}
offset += buflen
}
common.Log.Debug("Error: EOF marker was not found.")
return errors.New("EOF not found")
}
//
// Load the xrefs from the bottom of file prior to parsing the file.
// 1. Look for %%EOF marker, then
@ -1031,39 +1089,29 @@ func (this *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
this.xrefs = make(XrefTable)
this.objstms = make(ObjectStreams)
// Look for EOF marker and seek to its beginning.
// Define an offset position from the end of the file.
var offset int64 = 1000
// Get the file size.
fSize, err := this.rs.Seek(0, os.SEEK_END)
if err != nil {
return nil, err
}
common.Log.Trace("fsize: %d", fSize)
if fSize <= offset {
offset = fSize
}
_, err = this.rs.Seek(-offset, os.SEEK_END)
// Seek the EOF marker.
err = this.seekToEOFMarker(fSize)
if err != nil {
common.Log.Debug("Failed seek to eof marker: %v", err)
return nil, err
}
b1 := make([]byte, offset)
this.rs.Read(b1)
common.Log.Trace("Looking for EOF marker: \"%s\"", string(b1))
ind := reEOF.FindAllStringIndex(string(b1), -1)
if ind == nil {
common.Log.Debug("Error: EOF marker not found!")
return nil, errors.New("EOF marker not found")
}
lastInd := ind[len(ind)-1]
common.Log.Trace("Ind: % d", ind)
this.rs.Seek(-offset+int64(lastInd[0]), os.SEEK_END)
// Look for startxref and get the xref offset.
offset = 64
var offset int64 = 64
this.rs.Seek(-offset, os.SEEK_CUR)
b2 := make([]byte, offset)
this.rs.Read(b2)
_, err = this.rs.Read(b2)
if err != nil {
common.Log.Debug("Failed reading while looking for startxref: %v", err)
return nil, err
}
result := reStartXref.FindStringSubmatch(string(b2))
if len(result) < 2 {
@ -1071,7 +1119,6 @@ func (this *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
return nil, errors.New("Startxref not found")
}
if len(result) > 2 {
// GH: Take the last one? Make a test case.
common.Log.Debug("ERROR: Multiple startxref (%s)!", b2)
return nil, errors.New("Multiple startxref entries?")
}
@ -1133,8 +1180,9 @@ func (this *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
ptrailerDict, err := this.parseXref()
if err != nil {
common.Log.Debug("ERROR: Failed loading another (Prev) trailer")
return nil, err
common.Log.Debug("Warning: Error - Failed loading another (Prev) trailer")
common.Log.Debug("Attempting to continue by ignoring it")
break
}
xx, present = (*ptrailerDict)["Prev"]

View File

@ -13,6 +13,10 @@ import (
"os"
"regexp"
"bufio"
"io"
"strconv"
"github.com/unidoc/unidoc/common"
)
@ -77,9 +81,22 @@ func (this *PdfParser) rebuildXrefTable() error {
return nil
}
// Parses and returns the object and generation number from a string such as "12 0 obj" -> (12,0,nil).
func parseObjectNumberFromString(str string) (int, int, error) {
result := reIndirectObject.FindStringSubmatch(str)
if len(result) < 3 {
return 0, 0, errors.New("Unable to detect indirect object signature")
}
on, _ := strconv.Atoi(result[1])
gn, _ := strconv.Atoi(result[2])
return on, gn, nil
}
// Parse the entire file from top down.
// Currently not supporting object streams...
// Also need to detect object streams and load the object numbers.
// Goes through the file byte-by-byte looking for "<num> <generation> obj" patterns.
// N.B. This collects the XREF_TABLE_ENTRY data only.
func (this *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
if this.repairsAttempted {
// Avoid multiple repairs (only try once).
@ -87,60 +104,183 @@ func (this *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
}
this.repairsAttempted = true
reRepairIndirectObject := regexp.MustCompile(`^(\d+)\s+(\d+)\s+obj`)
// Go to beginning, reset reader.
this.rs.Seek(0, os.SEEK_SET)
this.reader = bufio.NewReader(this.rs)
this.SetFileOffset(0)
// Keep a running buffer of last bytes.
bufLen := 20
last := make([]byte, bufLen)
xrefTable := XrefTable{}
for {
this.skipComments()
curOffset := this.GetFileOffset()
peakBuf, err := this.reader.Peek(10)
b, err := this.reader.ReadByte()
if err != nil {
// EOF
break
if err == io.EOF {
break
} else {
return nil, err
}
}
// Indirect object?
results := reRepairIndirectObject.FindIndex(peakBuf)
if len(results) > 0 {
obj, err := this.ParseIndirectObject()
// Format:
// object number - whitespace - generation number - obj
// e.g. "12 0 obj"
if b == 'j' && last[bufLen-1] == 'b' && last[bufLen-2] == 'o' && IsWhiteSpace(last[bufLen-3]) {
i := bufLen - 4
// Go past whitespace
for IsWhiteSpace(last[i]) && i > 0 {
i--
}
if i == 0 || !IsDecimalDigit(last[i]) {
continue
}
// Go past generation number
for IsDecimalDigit(last[i]) && i > 0 {
i--
}
if i == 0 || !IsWhiteSpace(last[i]) {
continue
}
// Go past whitespace
for IsWhiteSpace(last[i]) && i > 0 {
i--
}
if i == 0 || !IsDecimalDigit(last[i]) {
continue
}
// Go past object number.
for IsDecimalDigit(last[i]) && i > 0 {
i--
}
if i == 0 {
continue // Probably too long to be a valid object...
}
objOffset := this.GetFileOffset() - int64(bufLen-i)
objstr := append(last[i+1:], b)
objNum, genNum, err := parseObjectNumberFromString(string(objstr))
if err != nil {
common.Log.Debug("ERROR: Unable to parse indirect object (%s)", err)
common.Log.Debug("Unable to parse object number: %v", err)
return nil, err
}
if indObj, ok := obj.(*PdfIndirectObject); ok {
// Create and insert the XREF entry if not existing, or the generation number is higher.
if curXref, has := xrefTable[objNum]; !has || curXref.generation < genNum {
// Make the entry for the cross ref table.
xrefEntry := XrefObject{}
xrefEntry.xtype = XREF_TABLE_ENTRY
xrefEntry.objectNumber = int(indObj.ObjectNumber)
xrefEntry.generation = int(indObj.GenerationNumber)
xrefEntry.offset = curOffset
xrefTable[int(indObj.ObjectNumber)] = xrefEntry
} else if streamObj, ok := obj.(*PdfObjectStream); ok {
// Make the entry for the cross ref table.
xrefEntry := XrefObject{}
xrefEntry.xtype = XREF_TABLE_ENTRY
xrefEntry.objectNumber = int(streamObj.ObjectNumber)
xrefEntry.generation = int(streamObj.GenerationNumber)
xrefEntry.offset = curOffset
xrefTable[int(streamObj.ObjectNumber)] = xrefEntry
} else {
return nil, fmt.Errorf("Not an indirect object or stream (%T)", obj) // Should never happen.
xrefEntry.objectNumber = int(objNum)
xrefEntry.generation = int(genNum)
xrefEntry.offset = objOffset
xrefTable[objNum] = xrefEntry
}
} else if string(peakBuf[0:6]) == "endobj" {
this.reader.Discard(6)
} else {
// Stop once we reach xrefs/trailer section etc. Technically this could fail for complex
// cases, but lets keep it simple for now. Add more complexity when needed (problematic user committed files).
// In general more likely that more complex files would have better understanding of the PDF standard.
common.Log.Debug("Not an object - stop repair rebuilding xref here (%s)", peakBuf)
break
}
last = append(last[1:bufLen], b)
}
return &xrefTable, nil
}
// Look for first sign of xref table from end of file.
func (this *PdfParser) repairSeekXrefMarker() error {
// Get the file size.
fSize, err := this.rs.Seek(0, os.SEEK_END)
if err != nil {
return err
}
reXrefTableStart := regexp.MustCompile(`\sxref\s*`)
// Define the starting point (from the end of the file) to search from.
var offset int64 = 0
// Define an buffer length in terms of how many bytes to read from the end of the file.
var buflen int64 = 1000
for offset < fSize {
if fSize <= (buflen + offset) {
buflen = fSize - offset
}
// Move back enough (as we need to read forward).
_, err := this.rs.Seek(-offset-buflen, os.SEEK_END)
if err != nil {
return err
}
// Read the data.
b1 := make([]byte, buflen)
this.rs.Read(b1)
common.Log.Trace("Looking for xref : \"%s\"", string(b1))
ind := reXrefTableStart.FindAllStringIndex(string(b1), -1)
if ind != nil {
// Found it.
lastInd := ind[len(ind)-1]
common.Log.Trace("Ind: % d", ind)
this.rs.Seek(-offset-buflen+int64(lastInd[0]), os.SEEK_END)
this.reader = bufio.NewReader(this.rs)
// Go past whitespace, finish at 'x'.
for {
bb, err := this.reader.Peek(1)
if err != nil {
return err
}
common.Log.Trace("B: %d %c", bb[0], bb[0])
if !IsWhiteSpace(bb[0]) {
break
}
this.reader.Discard(1)
}
return nil
} else {
common.Log.Debug("Warning: EOF marker not found! - continue seeking")
}
offset += buflen
}
common.Log.Debug("Error: Xref table marker was not found.")
return errors.New("xref not found ")
}
// Called when Pdf version not found normally. Looks for the PDF version by scanning top-down.
// %PDF-1.7
func (this *PdfParser) seekPdfVersionTopDown() (int, int, error) {
// Go to beginning, reset reader.
this.rs.Seek(0, os.SEEK_SET)
this.reader = bufio.NewReader(this.rs)
// Keep a running buffer of last bytes.
bufLen := 20
last := make([]byte, bufLen)
for {
b, err := this.reader.ReadByte()
if err != nil {
if err == io.EOF {
break
} else {
return 0, 0, err
}
}
// Format:
// object number - whitespace - generation number - obj
// e.g. "12 0 obj"
if IsDecimalDigit(b) && last[bufLen-1] == '.' && IsDecimalDigit(last[bufLen-2]) && last[bufLen-3] == '-' &&
last[bufLen-4] == 'F' && last[bufLen-5] == 'D' && last[bufLen-6] == 'P' {
major := int(last[bufLen-2] - '0')
minor := int(b - '0')
return major, minor, nil
}
last = append(last[1:bufLen], b)
}
return 0, 0, errors.New("Version not found")
}

View File

@ -143,3 +143,11 @@ func (this *PdfParser) inspect() (map[string]int, error) {
return objTypes, nil
}
func absInt(x int) int {
if x < 0 {
return -x
} else {
return x
}
}

View File

@ -680,10 +680,13 @@ func (r *PdfReader) newPdfAnnotationFromIndirectObject(container *PdfIndirectObj
subtypeObj, has := (*d)["Subtype"]
if !has {
return nil, fmt.Errorf("Missing Subtype")
common.Log.Debug("WARNING: Compatibility issue - annotation Subtype missing - assuming no subtype")
annot.context = nil
return annot, nil
}
subtype, ok := subtypeObj.(*PdfObjectName)
if !ok {
common.Log.Debug("ERROR: Invalid Subtype object type != name (%T)", subtypeObj)
return nil, fmt.Errorf("Invalid Subtype object type != name (%T)", subtypeObj)
}
switch *subtype {
@ -1597,6 +1600,8 @@ func (this *PdfAnnotation) GetContainingPdfObject() PdfObject {
return this.primitive
}
// Note: Call the sub-annotation's ToPdfObject to set both the generic and non-generic information.
// TODO/FIXME: Consider doing it here instead.
func (this *PdfAnnotation) ToPdfObject() PdfObject {
container := this.primitive
d := container.PdfObject.(*PdfObjectDictionary)

View File

@ -354,6 +354,7 @@ func (this *PdfColorspaceDeviceRGB) ImageToRGB(img Image) (Image, error) {
}
func (this *PdfColorspaceDeviceRGB) ImageToGray(img Image) (Image, error) {
grayImage := img
samples := img.GetSamples()
@ -374,7 +375,9 @@ func (this *PdfColorspaceDeviceRGB) ImageToGray(img Image) (Image, error) {
// Convert to uint32
val := uint32(grayValue * maxVal)
graySamples = append(graySamples, val)
}
grayImage.SetSamples(graySamples)
grayImage.ColorComponents = 1
@ -893,7 +896,7 @@ func (this *PdfColorspaceCalRGB) String() string {
}
func (this *PdfColorspaceCalRGB) GetNumComponents() int {
return 1
return 3
}
func newPdfColorspaceCalRGBFromPdfObject(obj PdfObject) (*PdfColorspaceCalRGB, error) {
@ -1119,7 +1122,7 @@ func (this *PdfColorspaceCalRGB) ImageToRGB(img Image) (Image, error) {
maxVal := math.Pow(2, float64(img.BitsPerComponent)) - 1
rgbSamples := []uint32{}
for i := 0; i < len(samples); i++ {
for i := 0; i < len(samples)-2; i++ {
// A, B, C in range 0.0 to 1.0
aVal := float64(samples[i]) / maxVal
bVal := float64(samples[i+1]) / maxVal
@ -1916,6 +1919,7 @@ func (this *PdfColorspaceSpecialPattern) ColorFromFloats(vals []float64) (PdfCol
// the name of the pattern.
func (this *PdfColorspaceSpecialPattern) ColorFromPdfObjects(objects []PdfObject) (PdfColor, error) {
if len(objects) < 1 {
common.Log.Error("ColorFromPdfObjects: len(objects)=%d", len(objects))
return nil, errors.New("Invalid number of parameters")
}
patternColor := &PdfColorPattern{}

View File

@ -17,12 +17,12 @@ import (
//
type PdfAcroForm struct {
Fields *[]*PdfField
NeedAppearances PdfObject
SigFlags PdfObject
CO PdfObject
DR PdfObject
DA PdfObject
Q PdfObject
NeedAppearances *PdfObjectBool
SigFlags *PdfObjectInteger
CO *PdfObjectArray
DR *PdfPageResources
DA *PdfObjectString
Q *PdfObjectInteger
XFA PdfObject
primitive *PdfIndirectObject
@ -78,23 +78,66 @@ func (r *PdfReader) newPdfAcroFormFromDict(d *PdfObjectDictionary) (*PdfAcroForm
}
if obj, has := (*d)["NeedAppearances"]; has {
acroForm.NeedAppearances = obj
val, ok := obj.(*PdfObjectBool)
if ok {
acroForm.NeedAppearances = val
} else {
common.Log.Debug("ERROR: NeedAppearances invalid (got %T)", obj)
}
}
if obj, has := (*d)["SigFlags"]; has {
acroForm.SigFlags = obj
val, ok := obj.(*PdfObjectInteger)
if ok {
acroForm.SigFlags = val
} else {
common.Log.Debug("ERROR: SigFlags invalid (got %T)", obj)
}
}
if obj, has := (*d)["CO"]; has {
acroForm.CO = obj
obj = TraceToDirectObject(obj)
arr, ok := obj.(*PdfObjectArray)
if ok {
acroForm.CO = arr
} else {
common.Log.Debug("ERROR: CO invalid (got %T)", obj)
}
}
if obj, has := (*d)["DR"]; has {
acroForm.DR = obj
obj = TraceToDirectObject(obj)
if d, ok := obj.(*PdfObjectDictionary); ok {
resources, err := NewPdfPageResourcesFromDict(d)
if err != nil {
common.Log.Error("Invalid DR: %v", err)
return nil, err
}
acroForm.DR = resources
} else {
common.Log.Debug("ERROR: DR invalid (got %T)", obj)
}
}
if obj, has := (*d)["DA"]; has {
acroForm.DA = obj
str, ok := obj.(*PdfObjectString)
if ok {
acroForm.DA = str
} else {
common.Log.Debug("ERROR: DA invalid (got %T)", obj)
}
}
if obj, has := (*d)["Q"]; has {
acroForm.Q = obj
val, ok := obj.(*PdfObjectInteger)
if ok {
acroForm.Q = val
} else {
common.Log.Debug("ERROR: Q invalid (got %T)", obj)
}
}
if obj, has := (*d)["XFA"]; has {
acroForm.XFA = obj
}
@ -128,7 +171,7 @@ func (this *PdfAcroForm) ToPdfObject() PdfObject {
(*dict)["CO"] = this.CO
}
if this.DR != nil {
(*dict)["DR"] = this.DR
(*dict)["DR"] = this.DR.ToPdfObject()
}
if this.DA != nil {
(*dict)["DA"] = this.DA

View File

@ -5,7 +5,7 @@
//
// Allow higher level manipulation of PDF files and pages.
// This can be continously expanded to support more and more features.
// This can be continuously expanded to support more and more features.
// Generic handling can be done by defining elements as PdfObject which
// can later be replaced and fully defined.
//
@ -17,29 +17,27 @@ import (
"fmt"
"strings"
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
)
// PDF page object (7.7.3.3 - Table 30).
type PdfPage struct {
Parent PdfObject
LastModified *PdfDate
Resources *PdfPageResources
CropBox *PdfRectangle
MediaBox *PdfRectangle
BleedBox *PdfRectangle
TrimBox *PdfRectangle
ArtBox *PdfRectangle
BoxColorInfo PdfObject
Contents PdfObject
Rotate *int64
Group PdfObject
Thumb PdfObject
B PdfObject
Dur PdfObject
Trans PdfObject
//Annots PdfObject
Parent PdfObject
LastModified *PdfDate
Resources *PdfPageResources
CropBox *PdfRectangle
MediaBox *PdfRectangle
BleedBox *PdfRectangle
TrimBox *PdfRectangle
ArtBox *PdfRectangle
BoxColorInfo PdfObject
Contents PdfObject
Rotate *int64
Group PdfObject
Thumb PdfObject
B PdfObject
Dur PdfObject
Trans PdfObject
AA PdfObject
Metadata PdfObject
PieceInfo PdfObject
@ -52,7 +50,7 @@ type PdfPage struct {
PresSteps PdfObject
UserUnit PdfObject
VP PdfObject
//Annotations
Annotations []*PdfAnnotation
// Primitive container.
@ -477,7 +475,12 @@ func (this *PdfPage) GetPageDict() *PdfObjectDictionary {
if this.Annotations != nil {
arr := PdfObjectArray{}
for _, annot := range this.Annotations {
arr = append(arr, annot.GetContext().ToPdfObject())
if subannot := annot.GetContext(); subannot != nil {
arr = append(arr, subannot.ToPdfObject())
} else {
// Generic annotation dict (without subtype).
arr = append(arr, annot.ToPdfObject())
}
}
p.Set("Annots", &arr)
}
@ -760,6 +763,7 @@ func (this *PdfPage) GetAllContentStreams() (string, error) {
return strings.Join(cstreams, " "), nil
}
// Needs to have matching name and colorspace map entry. The Names define the order.
type PdfPageResourcesColorspaces struct {
Names []string
Colorspaces map[string]PdfColorspace
@ -767,19 +771,7 @@ type PdfPageResourcesColorspaces struct {
container *PdfIndirectObject
}
func NewPdfPageResourcesColorspaces() *PdfPageResourcesColorspaces {
return &PdfPageResourcesColorspaces{
Names: []string{},
Colorspaces: map[string]PdfColorspace{},
}
}
func (this *PdfPageResourcesColorspaces) Add(name string, colorspace PdfColorspace) {
if _, ok := this.Colorspaces[name]; ok {
common.Log.Error("Colorspace name=%#q already exists", name)
}
this.Names = append(this.Names, name)
this.Colorspaces[name] = colorspace
}
func newPdfPageResourcesColorspacesFromPdfObject(obj PdfObject) (*PdfPageResourcesColorspaces, error) {
colorspaces := &PdfPageResourcesColorspaces{}
@ -821,305 +813,3 @@ func (this *PdfPageResourcesColorspaces) ToPdfObject() PdfObject {
return dict
}
// Page resources model.
// Implements PdfModel.
type PdfPageResources struct {
ExtGState PdfObject
//ColorSpace PdfObject
ColorSpace *PdfPageResourcesColorspaces
Pattern PdfObject
Shading PdfObject
XObject PdfObject
Font PdfObject
ProcSet PdfObject
Properties PdfObject
// Primitive.
primitive *PdfObjectDictionary
}
func NewPdfPageResources() *PdfPageResources {
r := &PdfPageResources{}
r.primitive = &PdfObjectDictionary{}
return r
}
func NewPdfPageResourcesFromDict(dict *PdfObjectDictionary) (*PdfPageResources, error) {
r := NewPdfPageResources()
if obj, isDefined := (*dict)["ExtGState"]; isDefined {
r.ExtGState = obj
}
if obj, isDefined := (*dict)["ColorSpace"]; isDefined {
//r.ColorSpace = obj
colorspaces, err := newPdfPageResourcesColorspacesFromPdfObject(obj)
if err != nil {
return nil, err
}
r.ColorSpace = colorspaces
}
if obj, isDefined := (*dict)["Pattern"]; isDefined {
r.Pattern = obj
}
if obj, isDefined := (*dict)["Shading"]; isDefined {
r.Shading = obj
}
if obj, isDefined := (*dict)["XObject"]; isDefined {
r.XObject = obj
}
if obj, isDefined := (*dict)["Font"]; isDefined {
r.Font = obj
}
if obj, isDefined := (*dict)["ProcSet"]; isDefined {
r.ProcSet = obj
}
if obj, isDefined := (*dict)["Properties"]; isDefined {
r.Properties = obj
}
return r, nil
}
func (r *PdfPageResources) GetContainingPdfObject() PdfObject {
return r.primitive
}
func (r *PdfPageResources) ToPdfObject() PdfObject {
d := r.primitive
d.SetIfNotNil("ExtGState", r.ExtGState)
if r.ColorSpace != nil {
d.SetIfNotNil("ColorSpace", r.ColorSpace.ToPdfObject())
}
d.SetIfNotNil("Pattern", r.Pattern)
d.SetIfNotNil("Shading", r.Shading)
d.SetIfNotNil("XObject", r.XObject)
d.SetIfNotNil("Font", r.Font)
d.SetIfNotNil("ProcSet", r.ProcSet)
d.SetIfNotNil("Properties", r.Properties)
return d
}
// Add External Graphics State (GState). The gsDict can be specified either directly as a dictionary or an indirect
// object containing a dictionary.
func (r *PdfPageResources) AddExtGState(gsName PdfObjectName, gsDict PdfObject) error {
if r.ExtGState == nil {
r.ExtGState = &PdfObjectDictionary{}
}
obj := r.ExtGState
dict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ExtGState type error (got %T/%T)", obj, TraceToDirectObject(obj))
return ErrTypeError
}
(*dict)[gsName] = gsDict
return nil
}
// Get the shading specified by keyName. Returns nil if not existing. The bool flag indicated whether it was found
// or not.
func (r *PdfPageResources) GetShadingByName(keyName string) (*PdfShading, bool) {
if r.Shading == nil {
return nil, false
}
shadingDict, ok := r.Shading.(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Invalid Shading entry - not a dict (got %T)", r.Shading)
return nil, false
}
if obj, has := (*shadingDict)[PdfObjectName(keyName)]; has {
shading, err := newPdfShadingFromPdfObject(obj)
if err != nil {
common.Log.Debug("ERROR: failed to load pdf shading: %v", err)
return nil, false
}
return shading, true
} else {
return nil, false
}
}
// Set a shading resource specified by keyName.
func (r *PdfPageResources) SetShadingByName(keyName string, shadingObj PdfObject) error {
if r.Shading == nil {
r.Shading = &PdfObjectDictionary{}
}
shadingDict, has := r.Shading.(*PdfObjectDictionary)
if !has {
return ErrTypeError
}
(*shadingDict)[PdfObjectName(keyName)] = shadingObj
return nil
}
// Get the pattern specified by keyName. Returns nil if not existing. The bool flag indicated whether it was found
// or not.
func (r *PdfPageResources) GetPatternByName(keyName string) (*PdfPattern, bool) {
if r.Pattern == nil {
return nil, false
}
patternDict, ok := r.Pattern.(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Invalid Pattern entry - not a dict (got %T)", r.Pattern)
return nil, false
}
if obj, has := (*patternDict)[PdfObjectName(keyName)]; has {
pattern, err := newPdfPatternFromPdfObject(obj)
if err != nil {
common.Log.Debug("ERROR: failed to load pdf pattern: %v", err)
return nil, false
}
return pattern, true
} else {
return nil, false
}
}
// Set a pattern resource specified by keyName.
func (r *PdfPageResources) SetPatternByName(keyName string, pattern PdfObject) error {
if r.Pattern == nil {
r.Pattern = &PdfObjectDictionary{}
}
patternDict, has := r.Pattern.(*PdfObjectDictionary)
if !has {
return ErrTypeError
}
(*patternDict)[PdfObjectName(keyName)] = pattern
return nil
}
// Check if an XObject with a specified keyName is defined.
func (r *PdfPageResources) HasXObjectByName(keyName string) bool {
obj, _ := r.GetXObjectByName(keyName)
if obj != nil {
return true
} else {
return false
}
}
type XObjectType int
const (
XObjectTypeUndefined XObjectType = iota
XObjectTypeImage XObjectType = iota
XObjectTypeForm XObjectType = iota
XObjectTypePS XObjectType = iota
XObjectTypeUnknown XObjectType = iota
)
// Returns the XObject with the specified keyName and the object type.
func (r *PdfPageResources) GetXObjectByName(keyName string) (*PdfObjectStream, XObjectType) {
if r.XObject == nil {
return nil, XObjectTypeUndefined
}
xresDict, has := TraceToDirectObject(r.XObject).(*PdfObjectDictionary)
if !has {
common.Log.Debug("ERROR: XObject not a dictionary! (got %T)", TraceToDirectObject(r.XObject))
return nil, XObjectTypeUndefined
}
if obj, has := (*xresDict)[PdfObjectName(keyName)]; has {
stream, ok := obj.(*PdfObjectStream)
if !ok {
common.Log.Debug("XObject not pointing to a stream %T", obj)
return nil, XObjectTypeUndefined
}
dict := stream.PdfObjectDictionary
name, ok := (*dict)["Subtype"].(*PdfObjectName)
if !ok {
common.Log.Debug("XObject Subtype not a Name, dict: %s", dict.String())
return nil, XObjectTypeUndefined
}
if *name == "Image" {
return stream, XObjectTypeImage
} else if *name == "Form" {
return stream, XObjectTypeForm
} else if *name == "PS" {
return stream, XObjectTypePS
} else {
common.Log.Debug("XObject Subtype not known (%s)", *name)
return nil, XObjectTypeUndefined
}
} else {
return nil, XObjectTypeUndefined
}
}
func (r *PdfPageResources) setXObjectByName(keyName string, stream *PdfObjectStream) error {
if r.XObject == nil {
r.XObject = &PdfObjectDictionary{}
}
obj := TraceToDirectObject(r.XObject)
xresDict, has := obj.(*PdfObjectDictionary)
if !has {
common.Log.Debug("Invalid XObject, got %T/%T", r.XObject, obj)
return errors.New("Type check error")
}
(*xresDict)[PdfObjectName(keyName)] = stream
return nil
}
func (r *PdfPageResources) GetXObjectImageByName(keyName string) (*XObjectImage, error) {
stream, xtype := r.GetXObjectByName(keyName)
if stream == nil {
return nil, nil
}
if xtype != XObjectTypeImage {
return nil, errors.New("Not an image")
}
ximg, err := NewXObjectImageFromStream(stream)
if err != nil {
return nil, err
}
return ximg, nil
}
func (r *PdfPageResources) SetXObjectImageByName(keyName string, ximg *XObjectImage) error {
stream := ximg.ToPdfObject().(*PdfObjectStream)
err := r.setXObjectByName(keyName, stream)
return err
}
func (r *PdfPageResources) GetXObjectFormByName(keyName string) (*XObjectForm, error) {
stream, xtype := r.GetXObjectByName(keyName)
if stream == nil {
return nil, nil
}
if xtype != XObjectTypeForm {
return nil, errors.New("Not a form")
}
xform, err := NewXObjectFormFromStream(stream)
if err != nil {
return nil, err
}
return xform, nil
}
func (r *PdfPageResources) SetXObjectFormByName(keyName string, xform *XObjectForm) error {
stream := xform.ToPdfObject().(*PdfObjectStream)
err := r.setXObjectByName(keyName, stream)
return err
}

View File

@ -24,7 +24,6 @@ type PdfReader struct {
pageCount int
catalog *PdfObjectDictionary
outlineTree *PdfOutlineTreeNode
forms *PdfObjectDictionary
AcroForm *PdfAcroForm
modelManager *ModelManager
@ -199,13 +198,7 @@ func (this *PdfReader) loadStructure() error {
return err
}
// Get forms.
this.forms, err = this.GetForms()
if err != nil {
return err
}
// Get fields
// Load interactive forms and fields.
this.AcroForm, err = this.loadForms()
if err != nil {
return err
@ -437,48 +430,6 @@ func (this *PdfReader) GetOutlinesFlattened() ([]*PdfOutlineTreeNode, []string,
return outlineNodeList, flattenedTitleList, nil
}
// Get document form data.
func (this *PdfReader) GetForms() (*PdfObjectDictionary, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File need to be decrypted first")
}
// Has forms?
catalog := this.catalog
var formsDict *PdfObjectDictionary
if dict, hasFormsDict := (*catalog)["AcroForm"].(*PdfObjectDictionary); hasFormsDict {
common.Log.Trace("Has Acro forms - dictionary under Catalog")
formsDict = dict
} else if formsRef, hasFormsRef := (*catalog)["AcroForm"].(*PdfObjectReference); hasFormsRef {
common.Log.Trace("Has Acro forms - Indirect object")
formsObj, err := this.parser.LookupByReference(*formsRef)
if err != nil {
common.Log.Debug("ERROR: Failed to read forms")
return nil, err
}
if iobj, ok := formsObj.(*PdfIndirectObject); ok {
if dict, ok := iobj.PdfObject.(*PdfObjectDictionary); ok {
formsDict = dict
}
}
}
if formsDict == nil {
common.Log.Trace("Does not have forms")
return nil, nil
}
common.Log.Trace("Has Acro forms")
common.Log.Trace("Traverse the Acroforms structure")
err := this.traverseObjectData(formsDict)
if err != nil {
common.Log.Debug("ERROR: Unable to traverse AcroForms (%s)", err)
return nil, err
}
return formsDict, nil
}
func (this *PdfReader) loadForms() (*PdfAcroForm, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File need to be decrypted first")
@ -511,6 +462,15 @@ func (this *PdfReader) loadForms() (*PdfAcroForm, error) {
common.Log.Trace("Has Acro forms")
// Load it.
// Ensure we have access to everything.
common.Log.Trace("Traverse the Acroforms structure")
err = this.traverseObjectData(formsDict)
if err != nil {
common.Log.Debug("ERROR: Unable to traverse AcroForms (%s)", err)
return nil, err
}
// Create the acro forms object.
acroForm, err := this.newPdfAcroFormFromDict(formsDict)
if err != nil {
return nil, err

312
pdf/model/resources.go Normal file
View File

@ -0,0 +1,312 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"errors"
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
)
// Page resources model.
// Implements PdfModel.
type PdfPageResources struct {
ExtGState PdfObject
//ColorSpace PdfObject
ColorSpace *PdfPageResourcesColorspaces
Pattern PdfObject
Shading PdfObject
XObject PdfObject
Font PdfObject
ProcSet PdfObject
Properties PdfObject
// Primitive reource container.
primitive *PdfObjectDictionary
}
func NewPdfPageResources() *PdfPageResources {
r := &PdfPageResources{}
r.primitive = &PdfObjectDictionary{}
return r
}
func NewPdfPageResourcesFromDict(dict *PdfObjectDictionary) (*PdfPageResources, error) {
r := NewPdfPageResources()
if obj, isDefined := (*dict)["ExtGState"]; isDefined {
r.ExtGState = obj
}
if obj, isDefined := (*dict)["ColorSpace"]; isDefined && !isNullObject(obj) {
colorspaces, err := newPdfPageResourcesColorspacesFromPdfObject(obj)
if err != nil {
return nil, err
}
r.ColorSpace = colorspaces
}
if obj, isDefined := (*dict)["Pattern"]; isDefined {
r.Pattern = obj
}
if obj, isDefined := (*dict)["Shading"]; isDefined {
r.Shading = obj
}
if obj, isDefined := (*dict)["XObject"]; isDefined {
r.XObject = obj
}
if obj, isDefined := (*dict)["Font"]; isDefined {
r.Font = obj
}
if obj, isDefined := (*dict)["ProcSet"]; isDefined {
r.ProcSet = obj
}
if obj, isDefined := (*dict)["Properties"]; isDefined {
r.Properties = obj
}
return r, nil
}
func (r *PdfPageResources) GetContainingPdfObject() PdfObject {
return r.primitive
}
func (r *PdfPageResources) ToPdfObject() PdfObject {
d := r.primitive
d.SetIfNotNil("ExtGState", r.ExtGState)
if r.ColorSpace != nil {
d.SetIfNotNil("ColorSpace", r.ColorSpace.ToPdfObject())
}
d.SetIfNotNil("Pattern", r.Pattern)
d.SetIfNotNil("Shading", r.Shading)
d.SetIfNotNil("XObject", r.XObject)
d.SetIfNotNil("Font", r.Font)
d.SetIfNotNil("ProcSet", r.ProcSet)
d.SetIfNotNil("Properties", r.Properties)
return d
}
// Add External Graphics State (GState). The gsDict can be specified either directly as a dictionary or an indirect
// object containing a dictionary.
func (r *PdfPageResources) AddExtGState(gsName PdfObjectName, gsDict PdfObject) error {
if r.ExtGState == nil {
r.ExtGState = &PdfObjectDictionary{}
}
obj := r.ExtGState
dict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ExtGState type error (got %T/%T)", obj, TraceToDirectObject(obj))
return ErrTypeError
}
(*dict)[gsName] = gsDict
return nil
}
// Get the shading specified by keyName. Returns nil if not existing. The bool flag indicated whether it was found
// or not.
func (r *PdfPageResources) GetShadingByName(keyName string) (*PdfShading, bool) {
if r.Shading == nil {
return nil, false
}
shadingDict, ok := r.Shading.(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Invalid Shading entry - not a dict (got %T)", r.Shading)
return nil, false
}
if obj, has := (*shadingDict)[PdfObjectName(keyName)]; has {
shading, err := newPdfShadingFromPdfObject(obj)
if err != nil {
common.Log.Debug("ERROR: failed to load pdf shading: %v", err)
return nil, false
}
return shading, true
} else {
return nil, false
}
}
// Set a shading resource specified by keyName.
func (r *PdfPageResources) SetShadingByName(keyName string, shadingObj PdfObject) error {
if r.Shading == nil {
r.Shading = &PdfObjectDictionary{}
}
shadingDict, has := r.Shading.(*PdfObjectDictionary)
if !has {
return ErrTypeError
}
(*shadingDict)[PdfObjectName(keyName)] = shadingObj
return nil
}
// Get the pattern specified by keyName. Returns nil if not existing. The bool flag indicated whether it was found
// or not.
func (r *PdfPageResources) GetPatternByName(keyName string) (*PdfPattern, bool) {
if r.Pattern == nil {
return nil, false
}
patternDict, ok := r.Pattern.(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Invalid Pattern entry - not a dict (got %T)", r.Pattern)
return nil, false
}
if obj, has := (*patternDict)[PdfObjectName(keyName)]; has {
pattern, err := newPdfPatternFromPdfObject(obj)
if err != nil {
common.Log.Debug("ERROR: failed to load pdf pattern: %v", err)
return nil, false
}
return pattern, true
} else {
return nil, false
}
}
// Set a pattern resource specified by keyName.
func (r *PdfPageResources) SetPatternByName(keyName string, pattern PdfObject) error {
if r.Pattern == nil {
r.Pattern = &PdfObjectDictionary{}
}
patternDict, has := r.Pattern.(*PdfObjectDictionary)
if !has {
return ErrTypeError
}
(*patternDict)[PdfObjectName(keyName)] = pattern
return nil
}
// Check if an XObject with a specified keyName is defined.
func (r *PdfPageResources) HasXObjectByName(keyName string) bool {
obj, _ := r.GetXObjectByName(keyName)
if obj != nil {
return true
} else {
return false
}
}
type XObjectType int
const (
XObjectTypeUndefined XObjectType = iota
XObjectTypeImage XObjectType = iota
XObjectTypeForm XObjectType = iota
XObjectTypePS XObjectType = iota
XObjectTypeUnknown XObjectType = iota
)
// Returns the XObject with the specified keyName and the object type.
func (r *PdfPageResources) GetXObjectByName(keyName string) (*PdfObjectStream, XObjectType) {
if r.XObject == nil {
return nil, XObjectTypeUndefined
}
xresDict, has := TraceToDirectObject(r.XObject).(*PdfObjectDictionary)
if !has {
common.Log.Debug("ERROR: XObject not a dictionary! (got %T)", TraceToDirectObject(r.XObject))
return nil, XObjectTypeUndefined
}
if obj, has := (*xresDict)[PdfObjectName(keyName)]; has {
stream, ok := obj.(*PdfObjectStream)
if !ok {
common.Log.Debug("XObject not pointing to a stream %T", obj)
return nil, XObjectTypeUndefined
}
dict := stream.PdfObjectDictionary
name, ok := (*dict)["Subtype"].(*PdfObjectName)
if !ok {
common.Log.Debug("XObject Subtype not a Name, dict: %s", dict.String())
return nil, XObjectTypeUndefined
}
if *name == "Image" {
return stream, XObjectTypeImage
} else if *name == "Form" {
return stream, XObjectTypeForm
} else if *name == "PS" {
return stream, XObjectTypePS
} else {
common.Log.Debug("XObject Subtype not known (%s)", *name)
return nil, XObjectTypeUndefined
}
} else {
return nil, XObjectTypeUndefined
}
}
func (r *PdfPageResources) setXObjectByName(keyName string, stream *PdfObjectStream) error {
if r.XObject == nil {
r.XObject = &PdfObjectDictionary{}
}
obj := TraceToDirectObject(r.XObject)
xresDict, has := obj.(*PdfObjectDictionary)
if !has {
common.Log.Debug("Invalid XObject, got %T/%T", r.XObject, obj)
return errors.New("Type check error")
}
(*xresDict)[PdfObjectName(keyName)] = stream
return nil
}
func (r *PdfPageResources) GetXObjectImageByName(keyName string) (*XObjectImage, error) {
stream, xtype := r.GetXObjectByName(keyName)
if stream == nil {
return nil, nil
}
if xtype != XObjectTypeImage {
return nil, errors.New("Not an image")
}
ximg, err := NewXObjectImageFromStream(stream)
if err != nil {
return nil, err
}
return ximg, nil
}
func (r *PdfPageResources) SetXObjectImageByName(keyName string, ximg *XObjectImage) error {
stream := ximg.ToPdfObject().(*PdfObjectStream)
err := r.setXObjectByName(keyName, stream)
return err
}
func (r *PdfPageResources) GetXObjectFormByName(keyName string) (*XObjectForm, error) {
stream, xtype := r.GetXObjectByName(keyName)
if stream == nil {
return nil, nil
}
if xtype != XObjectTypeForm {
return nil, errors.New("Not a form")
}
xform, err := NewXObjectFormFromStream(stream)
if err != nil {
return nil, err
}
return xform, nil
}
func (r *PdfPageResources) SetXObjectFormByName(keyName string, xform *XObjectForm) error {
stream := xform.ToPdfObject().(*PdfObjectStream)
err := r.setXObjectByName(keyName, stream)
return err
}

View File

@ -420,8 +420,8 @@ func newPdfShadingType3FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
common.Log.Debug("Coords not an array (got %T)", obj)
return nil, ErrTypeError
}
if len(*arr) != 4 {
common.Log.Debug("Coords length not 4 (got %d)", len(*arr))
if len(*arr) != 6 {
common.Log.Debug("Coords length not 6 (got %d)", len(*arr))
return nil, ErrInvalidAttribute
}
shading.Coords = arr

View File

@ -28,6 +28,14 @@ func getNumberAsFloat(obj PdfObject) (float64, error) {
return 0, errors.New("Not a number")
}
func isNullObject(obj PdfObject) bool {
if _, isNull := obj.(*PdfObjectNull); isNull {
return true
} else {
return false
}
}
// Convert a list of pdf objects representing floats or integers to a slice of float64 values.
func getNumbersAsFloat(objects []PdfObject) ([]float64, error) {
floats := []float64{}
@ -73,3 +81,20 @@ func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
return nil, errors.New("Not a number")
}
// Handy function for debugging in development.
func debugObject(obj PdfObject) {
common.Log.Debug("obj: %T %s", obj, obj.String())
if stream, is := obj.(*PdfObjectStream); is {
decoded, err := DecodeStream(stream)
if err != nil {
common.Log.Debug("Error: %v", err)
return
}
common.Log.Debug("Decoded: %s", decoded)
} else if indObj, is := obj.(*PdfIndirectObject); is {
common.Log.Debug("%T %v", indObj.PdfObject, indObj.PdfObject)
common.Log.Debug("%s", indObj.PdfObject.String())
}
}

View File

@ -224,7 +224,7 @@ func (this *PdfWriter) addObjects(obj PdfObject) error {
}
if hasObj := this.hasObject(v); !hasObj {
common.Log.Debug("Parent obj is missing!! %T %p %v\n", v, v, v)
common.Log.Debug("Parent obj is missing!! %T %p %v", v, v, v)
this.pendingObjects[v] = dict
// Although it is missing at this point, it could be added later...
}
@ -400,84 +400,8 @@ func (this *PdfWriter) seekByName(obj PdfObject, followKeys []string, key string
return list, nil
}
// Add Acroforms to a PDF file.
func (this *PdfWriter) AddForms(forms *PdfObjectDictionary) error {
// Traverse the forms object...
// Keep a list of stuff?
// Forms dictionary should have:
// Fields array.
if forms == nil {
return errors.New("forms == nil")
}
// For now, support only regular forms with fields
var fieldsArray *PdfObjectArray
if fields, hasFields := (*forms)["Fields"]; hasFields {
if arr, isArray := fields.(*PdfObjectArray); isArray {
fieldsArray = arr
} else if ind, isInd := fields.(*PdfIndirectObject); isInd {
if arr, isArray := ind.PdfObject.(*PdfObjectArray); isArray {
fieldsArray = arr
}
}
}
if fieldsArray == nil {
common.Log.Debug("Writer - no fields to be added to forms")
return nil
}
// Add the fields.
for _, field := range *fieldsArray {
fieldObj, ok := field.(*PdfIndirectObject)
if !ok {
return errors.New("Field not pointing indirect object")
}
followKeys := []string{"Fields", "Kids"}
list, err := this.seekByName(fieldObj, followKeys, "P")
common.Log.Trace("Done seeking!")
if err != nil {
return err
}
common.Log.Trace("List of P objects %d", len(list))
if len(list) < 1 {
continue
}
includeField := false
for _, p := range list {
if po, ok := p.(*PdfIndirectObject); ok {
common.Log.Trace("P entry is an indirect object (page)")
if this.hasObject(po) {
includeField = true
} else {
return errors.New("P pointing outside of write pages")
}
} else {
common.Log.Debug("ERROR: P entry not an indirect object (%T)", p)
}
}
// This won't work. There can be many sub objects.
// Need to specifically go and check the page object!
// P or the appearance dictionary.
if includeField {
common.Log.Trace("Add the field! (%T)", field)
// Add if nothing referenced outside of the writer.
// Probably need to add some objects first...
this.addObject(field)
this.fields = append(this.fields, field)
} else {
common.Log.Trace("Field not relevant!")
}
}
return nil
}
// Add Acroforms to a PDF file.
func (this *PdfWriter) AddForms2(form *PdfAcroForm) error {
//form.ToPdfObject(true)
// Add Acroforms to a PDF file. Sets the specified form for writing.
func (this *PdfWriter) SetForms(form *PdfAcroForm) error {
this.acroForm = form
return nil
}
@ -613,24 +537,8 @@ func (this *PdfWriter) Write(ws io.WriteSeeker) error {
return err
}
}
// Form fields.
/*
if len(this.fields) > 0 {
forms := PdfIndirectObject{}
formsDict := PdfObjectDictionary{}
forms.PdfObject = &formsDict
fieldsArray := PdfObjectArray{}
for _, field := range this.fields {
fieldsArray = append(fieldsArray, field)
}
formsDict[PdfObjectName("Fields")] = &fieldsArray
(*this.catalog)[PdfObjectName("AcroForm")] = &forms
err := this.addObjects(&forms)
if err != nil {
return err
}
}*/
// Acroform.
if this.acroForm != nil {
common.Log.Trace("Writing acro forms")
indObj := this.acroForm.ToPdfObject()

View File

@ -242,13 +242,30 @@ func NewXObjectImage() *XObjectImage {
return xobj
}
// Creates a new XObject Image from an image object with default
// options.
func NewXObjectImageFromImage(name PdfObjectName, img *Image, cs PdfColorspace) (*XObjectImage, error) {
xobj := NewXObjectImage()
// Creates a new XObject Image from an image object with default options.
// If encoder is nil, uses raw encoding (none).
func NewXObjectImageFromImage(name PdfObjectName, img *Image, cs PdfColorspace, encoder StreamEncoder) (*XObjectImage, error) {
baseXObj := NewXObjectImage()
return UpdateXObjectImageFromImage(baseXObj, name, img, cs, encoder)
}
func UpdateXObjectImageFromImage(baseXObj *XObjectImage, name PdfObjectName, img *Image, cs PdfColorspace, encoder StreamEncoder) (*XObjectImage, error) {
dupObj := *baseXObj
xobj := &dupObj
if encoder == nil {
encoder = NewRawEncoder()
}
encoded, err := encoder.EncodeBytes(img.Data)
if err != nil {
common.Log.Debug("Error with encoding: %v", err)
return nil, err
}
xobj.Name = &name
xobj.Stream = img.Data
xobj.Filter = encoder
xobj.Stream = encoded
// Width and height.
imWidth := img.Width
@ -416,6 +433,24 @@ func (ximg *XObjectImage) SetImage(img *Image, cs PdfColorspace) error {
return nil
}
// Set compression filter. Decodes with current filter sets and encodes the data with the new filter.
func (ximg *XObjectImage) SetFilter(encoder StreamEncoder) error {
encoded := ximg.Stream
decoded, err := ximg.Filter.DecodeBytes(encoded)
if err != nil {
return err
}
ximg.Filter = encoder
encoded, err = encoder.EncodeBytes(decoded)
if err != nil {
return err
}
ximg.Stream = encoded
return nil
}
// Compress with default settings, updating the underlying stream also.
// XXX/TODO: Add flate encoding as an option (although lossy). Need to be able
// to set default settings and override.