mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-01 22:17:29 +08:00

* 'master' of https://github.com/peterwilliams97/unidoc: (50 commits) Fixing lab colorspace component input ranges. Fix Indexed cs Image to rgb conversion. Make float parsing more like gs Fixed Lab bounds Added dummy encodings Added dummy encodings Fix PS processing of dup operand. Fixes #98. Check sizes for memory allocation based on pdf user inputs. Fixes #107. Check to avoid division by zero. Fixes #106. Add GetObjectNums Address go vet issues Fix comment typo Fixed some bugs found while getting pdf_descibe.go to work Address golint recommendations in core Address core golint recommendations in crypt, io Add check for base colorspace type when loading Indexed colorspace. Fixes #95. Address more golint recommendations #89 Checks on stated byte lengths in xref stream objects. Closes #94. Address golint recommendations. Add TODO comments for recommended future refactoring work in next major release. Only attempt to load annotation from a valid indirect object for annotation Popup entries. Fixes #91. Address godoc code block line wrapping ...
378 lines
11 KiB
Go
378 lines
11 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package core
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/unidoc/unidoc/common"
|
|
)
|
|
|
|
// TODO (v3): Create a new type xrefType which can be an integer and can be used for improved type checking.
|
|
// TODO (v3): Unexport these constants and rename with camelCase.
|
|
const (
|
|
// XREF_TABLE_ENTRY indicates a normal xref table entry.
|
|
XREF_TABLE_ENTRY = iota
|
|
|
|
// XREF_OBJECT_STREAM indicates an xref entry in an xref object stream.
|
|
XREF_OBJECT_STREAM = iota
|
|
)
|
|
|
|
// XrefObject defines a cross reference entry which is a map between object number (with generation number) and the
|
|
// location of the actual object, either as a file offset (xref table entry), or as a location within an xref
|
|
// stream object (xref object stream).
|
|
// TODO (v3): Unexport.
|
|
type XrefObject struct {
|
|
xtype int
|
|
objectNumber int
|
|
generation int
|
|
// For normal xrefs (defined by OFFSET)
|
|
offset int64
|
|
// For xrefs to object streams.
|
|
osObjNumber int
|
|
osObjIndex int
|
|
}
|
|
|
|
// XrefTable is a map between object number and corresponding XrefObject.
|
|
// TODO (v3): Unexport.
|
|
// TODO: Consider changing to a slice, so can maintain the object order without sorting when analyzing.
|
|
type XrefTable map[int]XrefObject
|
|
|
|
// ObjectStream represents an object stream's information which can contain multiple indirect objects.
|
|
// The information specifies the number of objects and has information about offset locations for
|
|
// each object.
|
|
// TODO (v3): Unexport.
|
|
type ObjectStream struct {
|
|
N int // TODO (v3): Unexport.
|
|
ds []byte
|
|
offsets map[int]int64
|
|
}
|
|
|
|
// ObjectStreams defines a map between object numbers (object streams only) and underlying ObjectStream information.
|
|
type ObjectStreams map[int]ObjectStream
|
|
|
|
// ObjectCache defines a map between object numbers and corresponding PdfObject. Serves as a cache for PdfObjects that
|
|
// have already been parsed.
|
|
// TODO (v3): Unexport.
|
|
type ObjectCache map[int]PdfObject
|
|
|
|
// Get an object from an object stream.
|
|
func (parser *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObject, error) {
|
|
var bufReader *bytes.Reader
|
|
var objstm ObjectStream
|
|
var cached bool
|
|
|
|
objstm, cached = parser.objstms[sobjNumber]
|
|
if !cached {
|
|
soi, err := parser.LookupByNumber(sobjNumber)
|
|
if err != nil {
|
|
common.Log.Debug("Missing object stream with number %d", sobjNumber)
|
|
return nil, err
|
|
}
|
|
|
|
so, ok := soi.(*PdfObjectStream)
|
|
if !ok {
|
|
return nil, errors.New("Invalid object stream")
|
|
}
|
|
|
|
if parser.crypter != nil && !parser.crypter.isDecrypted(so) {
|
|
return nil, errors.New("Need to decrypt the stream")
|
|
}
|
|
|
|
sod := so.PdfObjectDictionary
|
|
common.Log.Trace("so d: %s\n", *sod)
|
|
name, ok := sod.Get("Type").(*PdfObjectName)
|
|
if !ok {
|
|
common.Log.Debug("ERROR: Object stream should always have a Type")
|
|
return nil, errors.New("Object stream missing Type")
|
|
}
|
|
if strings.ToLower(string(*name)) != "objstm" {
|
|
common.Log.Debug("ERROR: Object stream type shall always be ObjStm !")
|
|
return nil, errors.New("Object stream type != ObjStm")
|
|
}
|
|
|
|
N, ok := sod.Get("N").(*PdfObjectInteger)
|
|
if !ok {
|
|
return nil, errors.New("Invalid N in stream dictionary")
|
|
}
|
|
firstOffset, ok := sod.Get("First").(*PdfObjectInteger)
|
|
if !ok {
|
|
return nil, errors.New("Invalid First in stream dictionary")
|
|
}
|
|
|
|
common.Log.Trace("type: %s number of objects: %d", name, *N)
|
|
ds, err := DecodeStream(so)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
common.Log.Trace("Decoded: %s", ds)
|
|
|
|
// Temporarily change the reader object to this decoded buffer.
|
|
// Change back afterwards.
|
|
bakOffset := parser.GetFileOffset()
|
|
defer func() { parser.SetFileOffset(bakOffset) }()
|
|
|
|
bufReader = bytes.NewReader(ds)
|
|
parser.reader = bufio.NewReader(bufReader)
|
|
|
|
common.Log.Trace("Parsing offset map")
|
|
// Load the offset map (relative to the beginning of the stream...)
|
|
offsets := map[int]int64{}
|
|
// Object list and offsets.
|
|
for i := 0; i < int(*N); i++ {
|
|
parser.skipSpaces()
|
|
// Object number.
|
|
obj, err := parser.parseNumber()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
onum, ok := obj.(*PdfObjectInteger)
|
|
if !ok {
|
|
return nil, errors.New("Invalid object stream offset table")
|
|
}
|
|
|
|
parser.skipSpaces()
|
|
// Offset.
|
|
obj, err = parser.parseNumber()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
offset, ok := obj.(*PdfObjectInteger)
|
|
if !ok {
|
|
return nil, errors.New("Invalid object stream offset table")
|
|
}
|
|
|
|
common.Log.Trace("obj %d offset %d", *onum, *offset)
|
|
offsets[int(*onum)] = int64(*firstOffset + *offset)
|
|
}
|
|
|
|
objstm = ObjectStream{N: int(*N), ds: ds, offsets: offsets}
|
|
parser.objstms[sobjNumber] = objstm
|
|
} else {
|
|
// Temporarily change the reader object to this decoded buffer.
|
|
// Point back afterwards.
|
|
bakOffset := parser.GetFileOffset()
|
|
defer func() { parser.SetFileOffset(bakOffset) }()
|
|
|
|
bufReader = bytes.NewReader(objstm.ds)
|
|
// Temporarily change the reader object to this decoded buffer.
|
|
parser.reader = bufio.NewReader(bufReader)
|
|
}
|
|
|
|
offset := objstm.offsets[objNum]
|
|
common.Log.Trace("ACTUAL offset[%d] = %d", objNum, offset)
|
|
|
|
bufReader.Seek(offset, os.SEEK_SET)
|
|
parser.reader = bufio.NewReader(bufReader)
|
|
|
|
bb, _ := parser.reader.Peek(100)
|
|
common.Log.Trace("OBJ peek \"%s\"", string(bb))
|
|
|
|
val, err := parser.parseObject()
|
|
if err != nil {
|
|
common.Log.Debug("ERROR Fail to read object (%s)", err)
|
|
return nil, err
|
|
}
|
|
if val == nil {
|
|
return nil, errors.New("Object cannot be null")
|
|
}
|
|
|
|
// Make an indirect object around it.
|
|
io := PdfIndirectObject{}
|
|
io.ObjectNumber = int64(objNum)
|
|
io.PdfObject = val
|
|
|
|
return &io, nil
|
|
}
|
|
|
|
// LookupByNumber looks up a PdfObject by object number. Returns an error on failure.
|
|
// TODO (v3): Unexport.
|
|
func (parser *PdfParser) LookupByNumber(objNumber int) (PdfObject, error) {
|
|
// Outside interface for lookupByNumberWrapper. Default attempts repairs of bad xref tables.
|
|
obj, _, err := parser.lookupByNumberWrapper(objNumber, true)
|
|
return obj, err
|
|
}
|
|
|
|
// Wrapper for lookupByNumber, checks if object encrypted etc.
|
|
|
|
func (parser *PdfParser) lookupByNumberWrapper(objNumber int, attemptRepairs bool) (PdfObject, bool, error) {
|
|
obj, inObjStream, err := parser.lookupByNumber(objNumber, attemptRepairs)
|
|
if err != nil {
|
|
return nil, inObjStream, err
|
|
}
|
|
|
|
// If encrypted, decrypt it prior to returning.
|
|
// Do not attempt to decrypt objects within object streams.
|
|
if !inObjStream && parser.crypter != nil && !parser.crypter.isDecrypted(obj) {
|
|
err := parser.crypter.Decrypt(obj, 0, 0)
|
|
if err != nil {
|
|
return nil, inObjStream, err
|
|
}
|
|
}
|
|
|
|
return obj, inObjStream, nil
|
|
}
|
|
|
|
func getObjectNumber(obj PdfObject) (int64, int64, error) {
|
|
if io, isIndirect := obj.(*PdfIndirectObject); isIndirect {
|
|
return io.ObjectNumber, io.GenerationNumber, nil
|
|
}
|
|
if so, isStream := obj.(*PdfObjectStream); isStream {
|
|
return so.ObjectNumber, so.GenerationNumber, nil
|
|
}
|
|
return 0, 0, errors.New("Not an indirect/stream object")
|
|
}
|
|
|
|
// LookupByNumber
|
|
// Repair signals whether to repair if broken.
|
|
func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (PdfObject, bool, error) {
|
|
obj, ok := parser.ObjCache[objNumber]
|
|
if ok {
|
|
common.Log.Trace("Returning cached object %d", objNumber)
|
|
return obj, false, nil
|
|
}
|
|
|
|
xref, ok := parser.xrefs[objNumber]
|
|
if !ok {
|
|
// An indirect reference to an undefined object shall not be
|
|
// considered an error by a conforming reader; it shall be
|
|
// treated as a reference to the null object.
|
|
common.Log.Trace("Unable to locate object in xrefs! - Returning null object")
|
|
var nullObj PdfObjectNull
|
|
return &nullObj, false, nil
|
|
}
|
|
|
|
common.Log.Trace("Lookup obj number %d", objNumber)
|
|
if xref.xtype == XREF_TABLE_ENTRY {
|
|
common.Log.Trace("xrefobj obj num %d", xref.objectNumber)
|
|
common.Log.Trace("xrefobj gen %d", xref.generation)
|
|
common.Log.Trace("xrefobj offset %d", xref.offset)
|
|
|
|
parser.rs.Seek(xref.offset, os.SEEK_SET)
|
|
parser.reader = bufio.NewReader(parser.rs)
|
|
|
|
obj, err := parser.ParseIndirectObject()
|
|
if err != nil {
|
|
common.Log.Debug("ERROR Failed reading xref (%s)", err)
|
|
// Offset pointing to a non-object. Try to repair the file.
|
|
if attemptRepairs {
|
|
common.Log.Debug("Attempting to repair xrefs (top down)")
|
|
xrefTable, err := parser.repairRebuildXrefsTopDown()
|
|
if err != nil {
|
|
common.Log.Debug("ERROR Failed repair (%s)", err)
|
|
return nil, false, err
|
|
}
|
|
parser.xrefs = *xrefTable
|
|
return parser.lookupByNumber(objNumber, false)
|
|
}
|
|
return nil, false, err
|
|
}
|
|
|
|
if attemptRepairs {
|
|
// Check the object number..
|
|
// If it does not match, then try to rebuild, i.e. loop through
|
|
// all the items in the xref and look each one up and correct.
|
|
realObjNum, _, _ := getObjectNumber(obj)
|
|
if int(realObjNum) != objNumber {
|
|
common.Log.Debug("Invalid xrefs: Rebuilding")
|
|
err := parser.rebuildXrefTable()
|
|
if err != nil {
|
|
return nil, false, err
|
|
}
|
|
// Empty the cache.
|
|
parser.ObjCache = ObjectCache{}
|
|
// Try looking up again and return.
|
|
return parser.lookupByNumberWrapper(objNumber, false)
|
|
}
|
|
}
|
|
|
|
common.Log.Trace("Returning obj")
|
|
parser.ObjCache[objNumber] = obj
|
|
return obj, false, nil
|
|
} else if xref.xtype == XREF_OBJECT_STREAM {
|
|
common.Log.Trace("xref from object stream!")
|
|
common.Log.Trace(">Load via OS!")
|
|
common.Log.Trace("Object stream available in object %d/%d", xref.osObjNumber, xref.osObjIndex)
|
|
|
|
if xref.osObjNumber == objNumber {
|
|
common.Log.Debug("ERROR Circular reference!?!")
|
|
return nil, true, errors.New("Xref circular reference")
|
|
}
|
|
_, exists := parser.xrefs[xref.osObjNumber]
|
|
if exists {
|
|
optr, err := parser.lookupObjectViaOS(xref.osObjNumber, objNumber) //xref.osObjIndex)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR Returning ERR (%s)", err)
|
|
return nil, true, err
|
|
}
|
|
common.Log.Trace("<Loaded via OS")
|
|
parser.ObjCache[objNumber] = optr
|
|
if parser.crypter != nil {
|
|
// Mark as decrypted (inside object stream) for caching.
|
|
// and avoid decrypting decrypted object.
|
|
parser.crypter.DecryptedObjects[optr] = true
|
|
}
|
|
return optr, true, nil
|
|
} else {
|
|
common.Log.Debug("?? Belongs to a non-cross referenced object ...!")
|
|
return nil, true, errors.New("OS belongs to a non cross referenced object")
|
|
}
|
|
}
|
|
return nil, false, errors.New("Unknown xref type")
|
|
}
|
|
|
|
// LookupByReference looks up a PdfObject by a reference.
|
|
func (parser *PdfParser) LookupByReference(ref PdfObjectReference) (PdfObject, error) {
|
|
common.Log.Trace("Looking up reference %s", ref.String())
|
|
return parser.LookupByNumber(int(ref.ObjectNumber))
|
|
}
|
|
|
|
// Trace traces a PdfObject to direct object, looking up and resolving references as needed (unlike TraceToDirect).
|
|
// TODO (v3): Unexport.
|
|
func (parser *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
|
|
ref, isRef := obj.(*PdfObjectReference)
|
|
if !isRef {
|
|
// Direct object already.
|
|
return obj, nil
|
|
}
|
|
|
|
bakOffset := parser.GetFileOffset()
|
|
defer func() { parser.SetFileOffset(bakOffset) }()
|
|
|
|
o, err := parser.LookupByReference(*ref)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
io, isInd := o.(*PdfIndirectObject)
|
|
if !isInd {
|
|
// Not indirect (Stream or null object).
|
|
return o, nil
|
|
}
|
|
o = io.PdfObject
|
|
_, isRef = o.(*PdfObjectReference)
|
|
if isRef {
|
|
return io, errors.New("Multi depth trace pointer to pointer")
|
|
}
|
|
|
|
return o, nil
|
|
}
|
|
|
|
func printXrefTable(xrefTable XrefTable) {
|
|
common.Log.Debug("=X=X=X=")
|
|
common.Log.Debug("Xref table:")
|
|
i := 0
|
|
for _, xref := range xrefTable {
|
|
common.Log.Debug("i+1: %d (obj num: %d gen: %d) -> %d", i+1, xref.objectNumber, xref.generation, xref.offset)
|
|
i++
|
|
}
|
|
}
|