Address golint recommendations. Add TODO comments for recommended future refactoring work in next major release.

This commit is contained in:
Gunnsteinn Hall 2017-08-02 12:56:32 +00:00
parent 30ffbe3cbe
commit 28aea0d8d7
6 changed files with 119 additions and 52 deletions

View File

@ -8,5 +8,8 @@ package core
import "errors"
var (
// ErrUnsupportedEncodingParameters error indicates that encoding/decoding was attempted with unsupported
// encoding parameters.
// For example when trying to encode with an unsupported Predictor (flate).
ErrUnsupportedEncodingParameters = errors.New("Unsupported encoding parameters")
)

View File

@ -15,8 +15,13 @@ import (
"github.com/unidoc/unidoc/common"
)
// TODO (v3): Create a new type xrefType which can be an integer and can be used for improved type checking.
// TODO (v3): Unexport these constants and rename with camelCase.
const (
XREF_TABLE_ENTRY = iota
// XREF_TABLE_ENTRY indicates a normal xref table entry.
XREF_TABLE_ENTRY = iota
// XREF_OBJECT_STREAM indicates an xref entry in an xref object stream.
XREF_OBJECT_STREAM = iota
)
@ -67,7 +72,7 @@ func (this *PdfParser) lookupObjectViaOS(sobjNumber int, objNum int) (PdfObject,
}
if this.crypter != nil && !this.crypter.isDecrypted(so) {
return nil, errors.New("Need to decrypt the stream !")
return nil, errors.New("Need to decrypt the stream")
}
sod := so.PdfObjectDictionary

9
pdf/core/doc.go Normal file
View File

@ -0,0 +1,9 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
// Package core defines and implements the primitive PDF object types in golang, and provides functionality
// for parsing those from a PDF file stream. This includes I/O handling, cross references, repairs, encryption,
// encoding and other core capabilities.
package core

View File

@ -13,6 +13,8 @@ import (
"github.com/unidoc/unidoc/common"
)
// ReadAtLeast reads at least n bytes into slice p.
// Returns the number of bytes read (should always be == n), and an error on failure.
func (this *PdfParser) ReadAtLeast(p []byte, n int) (int, error) {
remaining := n
start := 0

View File

@ -3,9 +3,6 @@
* file 'LICENSE.md', which is part of this source code package.
*/
// The core package provides fundamental functionality for handling PDFs, including definitions of the core PDF objects
// (primitives), parsing a PDF file as a series of primitives, io, cross references, repairs, encryption, encoding and
// other core capabilities.
package core
import (
@ -35,6 +32,7 @@ var reIndirectObject = regexp.MustCompile(`(\d+)\s+(\d+)\s+obj`)
var reXrefSubsection = regexp.MustCompile(`(\d+)\s+(\d+)\s*$`)
var reXrefEntry = regexp.MustCompile(`(\d+)\s+(\d+)\s+([nf])\s*$`)
// PdfParser parses a PDF file and provides access to the object structure of the PDF.
type PdfParser struct {
majorVersion int
minorVersion int
@ -45,7 +43,7 @@ type PdfParser struct {
xrefs XrefTable
objstms ObjectStreams
trailer *PdfObjectDictionary
ObjCache ObjectCache
ObjCache ObjectCache // TODO: Unexport (v3).
crypter *PdfCrypt
repairsAttempted bool // Avoid multiple attempts for repair.
@ -56,14 +54,18 @@ type PdfParser struct {
streamLengthReferenceLookupInProgress map[int64]bool
}
// GetCrypter returns the PdfCrypt instance which has information about the PDFs encryption.
func (this *PdfParser) GetCrypter() *PdfCrypt {
return this.crypter
}
// IsAuthenticated returns true if the PDF has already been authenticated for accessing.
func (this *PdfParser) IsAuthenticated() bool {
return this.crypter.Authenticated
}
// GetTrailer returns the PDFs trailer dictionary. The trailer dictionary is typically the starting point for a PDF,
// referencing other key objects that are important in the document structure.
func (this *PdfParser) GetTrailer() *PdfObjectDictionary {
return this.trailer
}
@ -573,6 +575,7 @@ func (this *PdfParser) parseObject() (PdfObject, error) {
}
// Reads and parses a PDF dictionary object enclosed with '<<' and '>>'
// TODO: Unexport (v3).
func (this *PdfParser) ParseDict() (*PdfObjectDictionary, error) {
common.Log.Trace("Reading PDF Dict!")
@ -777,7 +780,7 @@ func (this *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
if txt == "%%EOF" {
common.Log.Debug("ERROR: end of file - trailer not found - error!")
return nil, errors.New("End of file - trailer not found!")
return nil, errors.New("End of file - trailer not found")
}
common.Log.Trace("xref more : %s", txt)
@ -806,7 +809,7 @@ func (this *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDictio
xs, ok := xrefObj.(*PdfObjectStream)
if !ok {
common.Log.Debug("ERROR: XRefStm pointing to non-stream object!")
return nil, errors.New("XRefStm pointing to a non-stream object!")
return nil, errors.New("XRefStm pointing to a non-stream object")
}
trailerDict := xs.PdfObjectDictionary
@ -932,7 +935,6 @@ func (this *PdfParser) parseXrefStream(xstm *PdfObjectInteger) (*PdfObjectDictio
var tmp int64 = 0
for i := 0; i < len(v); i++ {
tmp += int64(v[i]) * (1 << uint(8*(len(v)-i-1)))
}
return tmp
}
@ -1118,7 +1120,7 @@ func (this *PdfParser) seekToEOFMarker(fSize int64) error {
// 3. Check the Prev xref
// 4. Continue looking for Prev until not found.
//
// The earlier xrefs have higher precedance. If objects already
// The earlier xrefs have higher precedence. If objects already
// loaded will ignore older versions.
//
func (this *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
@ -1298,8 +1300,9 @@ func (this *PdfParser) traceStreamLength(lengthObj PdfObject) (PdfObject, error)
return slo, nil
}
// Parse an indirect object from the input stream.
// Can also be an object stream.
// Parse an indirect object from the input stream. Can also be an object stream.
// Returns the indirect object (*PdfIndirectObject) or the stream object (*PdfObjectStream).
// TODO: Unexport (v3).
func (this *PdfParser) ParseIndirectObject() (PdfObject, error) {
indirect := PdfIndirectObject{}
@ -1475,6 +1478,7 @@ func (this *PdfParser) ParseIndirectObject() (PdfObject, error) {
}
// For testing purposes.
// TODO: Unexport (v3) or move to test files, if needed by external test cases.
func NewParserFromString(txt string) *PdfParser {
parser := PdfParser{}
buf := []byte(txt)
@ -1490,8 +1494,8 @@ func NewParserFromString(txt string) *PdfParser {
return &parser
}
// Creates a new parser for a PDF file via ReadSeeker. Loads the
// cross reference stream and trailer.
// NewParser creates a new parser for a PDF file via ReadSeeker. Loads the cross reference stream and trailer.
// An error is returned on failure.
func NewParser(rs io.ReadSeeker) (*PdfParser, error) {
parser := &PdfParser{}
@ -1499,22 +1503,19 @@ func NewParser(rs io.ReadSeeker) (*PdfParser, error) {
parser.ObjCache = make(ObjectCache)
parser.streamLengthReferenceLookupInProgress = map[int64]bool{}
// Start by reading xrefs from bottom
// Start by reading the xrefs (from bottom).
trailer, err := parser.loadXrefs()
if err != nil {
common.Log.Debug("ERROR: Failed to load xref table! %s", err)
// Try to rebuild entire xref table?
return nil, err
}
common.Log.Trace("Trailer: %s", trailer)
if len(parser.xrefs) == 0 {
return nil, fmt.Errorf("Empty XREF table. Invalid.")
return nil, fmt.Errorf("Empty XREF table - Invalid")
}
// printXrefTable(parser.xrefs)
majorVersion, minorVersion, err := parser.parsePdfVersion()
if err != nil {
common.Log.Error("Unable to parse version: %v", err)
@ -1528,11 +1529,10 @@ func NewParser(rs io.ReadSeeker) (*PdfParser, error) {
return parser, nil
}
// Check if the document is encrypted. First time when called, will
// check if the Encrypt dictionary is accessible through the trailer
// dictionary.
// If encrypted, prepares a crypt datastructure which can be used to
// authenticate and decrypt the document.
// IsEncrypted checks if the document is encrypted. A bool flag is returned indicating the result.
// First time when called, will check if the Encrypt dictionary is accessible through the trailer dictionary.
// If encrypted, prepares a crypt datastructure which can be used to authenticate and decrypt the document.
// On failure, an error is returned.
func (this *PdfParser) IsEncrypted() (bool, error) {
if this.crypter != nil {
return true, nil
@ -1574,9 +1574,9 @@ func (this *PdfParser) IsEncrypted() (bool, error) {
return false, nil
}
// Decrypt the PDF file with a specified password. Also tries to
// decrypt with an empty password. Returns true if successful,
// false otherwise.
// Decrypt attempts to decrypt the PDF file with a specified password. Also tries to
// decrypt with an empty password. Returns true if successful, false otherwise.
// An error is returned when there is a problem with decrypting.
func (this *PdfParser) Decrypt(password []byte) (bool, error) {
// Also build the encryption/decryption key.
if this.crypter == nil {
@ -1595,8 +1595,8 @@ func (this *PdfParser) Decrypt(password []byte) (bool, error) {
return authenticated, err
}
// Check access rights and permissions for a specified password. If either user/owner password is specified,
// full rights are granted, otherwise the access rights are specified by the Permissions flag.
// CheckAccessRights checks access rights and permissions for a specified password. If either user/owner password is
// specified, full rights are granted, otherwise the access rights are specified by the Permissions flag.
//
// The bool flag indicates that the user can access and view the file.
// The AccessPermissions shows what access the user has for editing etc.

View File

@ -3,11 +3,6 @@
* file 'LICENSE.md', which is part of this source code package.
*/
// Defines PDF primitive objects as per the standard. Also defines a PdfObject
// interface allowing to universally work with these objects. It allows
// recursive writing of the objects to file as well and stringifying for
// debug purposes.
package core
import (
@ -17,41 +12,63 @@ import (
"github.com/unidoc/unidoc/common"
)
// PDF Primitives implement the PdfObject interface.
// PdfObject is an interface which all primitive PDF objects must implement.
type PdfObject interface {
String() string // Output a string representation of the primitive (for debugging).
DefaultWriteString() string // Output the PDF primitive as expected by the standard.
// Output a string representation of the primitive (for debugging).
String() string
// Output the PDF primitive as written to file as expected by the standard.
DefaultWriteString() string
}
// PdfObjectBool represents the primitive PDF boolean object.
type PdfObjectBool bool
// PdfObjectInteger represents the primitive PDF integer numerical object.
type PdfObjectInteger int64
// PdfObjectFloat represents the primitive PDF floating point numerical object.
type PdfObjectFloat float64
// PdfObjectString represents the primitive PDF string object.
// TODO (v3): Change to a struct and add a flag for hex/plaintext.
type PdfObjectString string
// PdfObjectName represents the primitive PDF name object.
type PdfObjectName string
// PdfObjectArray represents the primitive PDF array object.
type PdfObjectArray []PdfObject
// PdfObjectDictionary represents the primitive PDF dictionary/map object.
type PdfObjectDictionary struct {
dict map[PdfObjectName]PdfObject
keys []PdfObjectName
}
// PdfObjectNull represents the primitive PDF null object.
type PdfObjectNull struct{}
// PdfObjectReference represents the primitive PDF reference object.
type PdfObjectReference struct {
ObjectNumber int64
GenerationNumber int64
}
// PdfIndirectObject represents the primitive PDF indirect object.
type PdfIndirectObject struct {
PdfObjectReference
PdfObject
}
// PdfObjectStream represents the primitive PDF Object stream.
type PdfObjectStream struct {
PdfObjectReference
*PdfObjectDictionary
Stream []byte
}
// Quick functions to make pdf objects form primitive objects.
// MakeDict creates and returns an empty PdfObjectDictionary.
func MakeDict() *PdfObjectDictionary {
d := &PdfObjectDictionary{}
d.dict = map[PdfObjectName]PdfObject{}
@ -59,16 +76,19 @@ func MakeDict() *PdfObjectDictionary {
return d
}
// MakeName creates a PdfObjectName from a string.
func MakeName(s string) *PdfObjectName {
name := PdfObjectName(s)
return &name
}
// MakeInteger creates a PdfObjectInteger from an int64.
func MakeInteger(val int64) *PdfObjectInteger {
num := PdfObjectInteger(val)
return &num
}
// MakeArray creates an PdfObjectArray from a list of PdfObjects.
func MakeArray(objects ...PdfObject) *PdfObjectArray {
array := PdfObjectArray{}
for _, obj := range objects {
@ -77,6 +97,8 @@ func MakeArray(objects ...PdfObject) *PdfObjectArray {
return &array
}
// MakeArrayFromIntegers creates an PdfObjectArray from a slice of ints, where each array element is
// an PdfObjectInteger.
func MakeArrayFromIntegers(vals []int) *PdfObjectArray {
array := PdfObjectArray{}
for _, val := range vals {
@ -85,6 +107,8 @@ func MakeArrayFromIntegers(vals []int) *PdfObjectArray {
return &array
}
// MakeArrayFromIntegers64 creates an PdfObjectArray from a slice of int64s, where each array element
// is an PdfObjectInteger.
func MakeArrayFromIntegers64(vals []int64) *PdfObjectArray {
array := PdfObjectArray{}
for _, val := range vals {
@ -93,6 +117,8 @@ func MakeArrayFromIntegers64(vals []int64) *PdfObjectArray {
return &array
}
// MakeArrayFromFloats creates an PdfObjectArray from a slice of float64s, where each array element is an
// PdfObjectFloat.
func MakeArrayFromFloats(vals []float64) *PdfObjectArray {
array := PdfObjectArray{}
for _, val := range vals {
@ -101,27 +127,33 @@ func MakeArrayFromFloats(vals []float64) *PdfObjectArray {
return &array
}
// MakeFloat creates an PdfObjectFloat from a float64.
func MakeFloat(val float64) *PdfObjectFloat {
num := PdfObjectFloat(val)
return &num
}
// MakeString creates an PdfObjectString from a string.
func MakeString(s string) *PdfObjectString {
str := PdfObjectString(s)
return &str
}
// MakeNull creates an PdfObjectNull.
func MakeNull() *PdfObjectNull {
null := PdfObjectNull{}
return &null
}
// MakeIndirectObject creates an PdfIndirectObject with a specified direct object PdfObject.
func MakeIndirectObject(obj PdfObject) *PdfIndirectObject {
ind := &PdfIndirectObject{}
ind.PdfObject = obj
return ind
}
// MakeStream creates an PdfObjectStream with specified contents and encoding. If encoding is nil, then raw encoding
// will be used (i.e. no encoding applied).
func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error) {
stream := &PdfObjectStream{}
@ -149,6 +181,7 @@ func (this *PdfObjectBool) String() string {
}
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectBool) DefaultWriteString() string {
if *this {
return "true"
@ -161,6 +194,7 @@ func (this *PdfObjectInteger) String() string {
return fmt.Sprintf("%d", *this)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectInteger) DefaultWriteString() string {
return fmt.Sprintf("%d", *this)
}
@ -169,6 +203,7 @@ func (this *PdfObjectFloat) String() string {
return fmt.Sprintf("%f", *this)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectFloat) DefaultWriteString() string {
return fmt.Sprintf("%f", *this)
}
@ -177,6 +212,7 @@ func (this *PdfObjectString) String() string {
return fmt.Sprintf("%s", string(*this))
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectString) DefaultWriteString() string {
var output bytes.Buffer
@ -209,6 +245,7 @@ func (this *PdfObjectName) String() string {
return fmt.Sprintf("%s", string(*this))
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectName) DefaultWriteString() string {
var output bytes.Buffer
@ -229,6 +266,8 @@ func (this *PdfObjectName) DefaultWriteString() string {
return output.String()
}
// ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is returned if the array
// contains non-numeric objects (each element can be either PdfObjectInteger or PdfObjectFloat).
func (this *PdfObjectArray) ToFloat64Array() ([]float64, error) {
vals := []float64{}
@ -245,6 +284,8 @@ func (this *PdfObjectArray) ToFloat64Array() ([]float64, error) {
return vals, nil
}
// ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the array contains
// non-integer objects. Each element can only be PdfObjectInteger.
func (this *PdfObjectArray) ToIntegerArray() ([]int, error) {
vals := []int{}
@ -271,6 +312,7 @@ func (this *PdfObjectArray) String() string {
return outStr
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectArray) DefaultWriteString() string {
outStr := "["
for ind, o := range *this {
@ -283,6 +325,7 @@ func (this *PdfObjectArray) DefaultWriteString() string {
return outStr
}
// Append adds an PdfObject to the array.
func (this *PdfObjectArray) Append(obj PdfObject) {
*this = append(*this, obj)
}
@ -299,8 +342,8 @@ func getNumberAsFloat(obj PdfObject) (float64, error) {
return 0, fmt.Errorf("Not a number")
}
// For numeric array: Get the array in []float64 slice representation.
// Will return error if not entirely numeric.
// GetAsFloat64Slice returns the array as []float64 slice.
// Returns an error if not entirely numeric (only PdfObjectIntegers, PdfObjectFloats).
func (this *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) {
slice := []float64{}
@ -316,7 +359,7 @@ func (this *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) {
return slice, nil
}
// Merge in key/values from another dictionary. Overwriting if has same keys.
// Merge merges in key/values from another dictionary. Overwriting if has same keys.
func (this *PdfObjectDictionary) Merge(another *PdfObjectDictionary) {
if another != nil {
for _, key := range another.Keys() {
@ -336,6 +379,7 @@ func (this *PdfObjectDictionary) String() string {
return outStr
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectDictionary) DefaultWriteString() string {
outStr := "<<"
for _, k := range this.keys {
@ -349,6 +393,7 @@ func (this *PdfObjectDictionary) DefaultWriteString() string {
return outStr
}
// Set sets the dictionary's key -> val mapping entry. Overwrites if key already set.
func (d *PdfObjectDictionary) Set(key PdfObjectName, val PdfObject) {
found := false
for _, k := range d.keys {
@ -365,7 +410,7 @@ func (d *PdfObjectDictionary) Set(key PdfObjectName, val PdfObject) {
d.dict[key] = val
}
// Get PdfObject corresponding to the specified key.
// Get returns the PdfObject corresponding to the specified key.
// Returns a nil value if the key is not set.
//
// The design is such that we only return 1 value.
@ -380,12 +425,12 @@ func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject {
return val
}
// Get the list of keys.
// Keys returns the list of keys in the dictionary.
func (d *PdfObjectDictionary) Keys() []PdfObjectName {
return d.keys
}
// Remove an element specified by key.
// Remove removes an element specified by key.
func (d *PdfObjectDictionary) Remove(key PdfObjectName) {
idx := -1
for i, k := range d.keys {
@ -402,9 +447,7 @@ func (d *PdfObjectDictionary) Remove(key PdfObjectName) {
}
}
// Check if the value's PdfObject interface, or its containing value is nil. Only set the
// key/value pair if not nil.
//
// SetIfNotNil sets the dictionary's key -> val mapping entry -IF- val is not nil.
// Note that we take care to perform a type switch. Otherwise if we would supply a nil value
// of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus
// would get set.
@ -466,6 +509,7 @@ func (this *PdfObjectReference) String() string {
return fmt.Sprintf("Ref(%d %d)", this.ObjectNumber, this.GenerationNumber)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectReference) DefaultWriteString() string {
return fmt.Sprintf("%d %d R", this.ObjectNumber, this.GenerationNumber)
}
@ -476,6 +520,7 @@ func (this *PdfIndirectObject) String() string {
return fmt.Sprintf("IObject:%d", (*this).ObjectNumber)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfIndirectObject) DefaultWriteString() string {
outStr := fmt.Sprintf("%d 0 R", (*this).ObjectNumber)
return outStr
@ -485,6 +530,7 @@ func (this *PdfObjectStream) String() string {
return fmt.Sprintf("Object stream %d: %s", this.ObjectNumber, this.PdfObjectDictionary)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectStream) DefaultWriteString() string {
outStr := fmt.Sprintf("%d 0 R", (*this).ObjectNumber)
return outStr
@ -494,18 +540,20 @@ func (this *PdfObjectNull) String() string {
return "null"
}
// DefaultWriteString outputs the object as it is to be written to file.
func (this *PdfObjectNull) DefaultWriteString() string {
return "null"
}
// Handy functions to work with primitive objects.
// Traces a pdf object to a direct object. For example contained
// in indirect objects (can be double referenced even).
//
// Note: This function does not trace/resolve references.
// That needs to be done beforehand.
// TraceMaxDepth specifies the maximum recursion depth allowed.
const TraceMaxDepth = 20
// TraceToDirectObject traces a PdfObject to a direct object. For example direct objects contained
// in indirect objects (can be double referenced even).
//
// Note: This function does not trace/resolve references. That needs to be done beforehand.
func TraceToDirectObject(obj PdfObject) PdfObject {
iobj, isIndirectObj := obj.(*PdfIndirectObject)
depth := 0