/* * This file is subject to the terms and conditions defined in * file 'LICENSE.md', which is part of this source code package. */ package core import ( "bytes" "encoding/hex" "errors" "fmt" "github.com/unidoc/unidoc/common" ) // PdfObject is an interface which all primitive PDF objects must implement. type PdfObject interface { // Output a string representation of the primitive (for debugging). String() string // Output the PDF primitive as written to file as expected by the standard. DefaultWriteString() string } // PdfObjectBool represents the primitive PDF boolean object. type PdfObjectBool bool // PdfObjectInteger represents the primitive PDF integer numerical object. type PdfObjectInteger int64 // PdfObjectFloat represents the primitive PDF floating point numerical object. type PdfObjectFloat float64 // PdfObjectString represents the primitive PDF string object. type PdfObjectString struct { val string isHex bool } // PdfObjectName represents the primitive PDF name object. type PdfObjectName string // PdfObjectArray represents the primitive PDF array object. type PdfObjectArray struct { vec []PdfObject } // PdfObjectDictionary represents the primitive PDF dictionary/map object. type PdfObjectDictionary struct { dict map[PdfObjectName]PdfObject keys []PdfObjectName } // PdfObjectNull represents the primitive PDF null object. type PdfObjectNull struct{} // PdfObjectReference represents the primitive PDF reference object. type PdfObjectReference struct { ObjectNumber int64 GenerationNumber int64 } // PdfIndirectObject represents the primitive PDF indirect object. type PdfIndirectObject struct { PdfObjectReference PdfObject } // PdfObjectStream represents the primitive PDF Object stream. type PdfObjectStream struct { PdfObjectReference *PdfObjectDictionary Stream []byte } // MakeDict creates and returns an empty PdfObjectDictionary. func MakeDict() *PdfObjectDictionary { d := &PdfObjectDictionary{} d.dict = map[PdfObjectName]PdfObject{} d.keys = []PdfObjectName{} return d } // MakeName creates a PdfObjectName from a string. func MakeName(s string) *PdfObjectName { name := PdfObjectName(s) return &name } // MakeInteger creates a PdfObjectInteger from an int64. func MakeInteger(val int64) *PdfObjectInteger { num := PdfObjectInteger(val) return &num } // MakeArray creates an PdfObjectArray from a list of PdfObjects. func MakeArray(objects ...PdfObject) *PdfObjectArray { array := &PdfObjectArray{} array.vec = []PdfObject{} for _, obj := range objects { array.vec = append(array.vec, obj) } return array } // MakeArrayFromIntegers creates an PdfObjectArray from a slice of ints, where each array element is // an PdfObjectInteger. func MakeArrayFromIntegers(vals []int) *PdfObjectArray { array := &PdfObjectArray{} array.vec = []PdfObject{} for _, val := range vals { array.vec = append(array.vec, MakeInteger(int64(val))) } return array } // MakeArrayFromIntegers64 creates an PdfObjectArray from a slice of int64s, where each array element // is an PdfObjectInteger. func MakeArrayFromIntegers64(vals []int64) *PdfObjectArray { array := &PdfObjectArray{} array.vec = []PdfObject{} for _, val := range vals { array.vec = append(array.vec, MakeInteger(val)) } return array } // MakeArrayFromFloats creates an PdfObjectArray from a slice of float64s, where each array element is an // PdfObjectFloat. func MakeArrayFromFloats(vals []float64) *PdfObjectArray { array := &PdfObjectArray{} array.vec = []PdfObject{} for _, val := range vals { array.vec = append(array.vec, MakeFloat(val)) } return array } // MakeFloat creates an PdfObjectFloat from a float64. func MakeFloat(val float64) *PdfObjectFloat { num := PdfObjectFloat(val) return &num } // MakeString creates an PdfObjectString from a string. // NOTE: PDF does not use utf-8 string encoding like Go so `s` will often not be a utf-8 encoded // string. func MakeString(s string) *PdfObjectString { str := PdfObjectString{val: s} return &str } // MakeStringFromBytes creates an PdfObjectString from a byte array. // This is more natural than MakeString as `data` is usually not utf-8 encoded. func MakeStringFromBytes(data []byte) *PdfObjectString { return MakeString(string(data)) } // MakeHexString creates an PdfObjectString from a string intended for output as a hexadecimal string. func MakeHexString(s string) *PdfObjectString { str := PdfObjectString{val: s, isHex: true} return &str } // MakeNull creates an PdfObjectNull. func MakeNull() *PdfObjectNull { null := PdfObjectNull{} return &null } // MakeIndirectObject creates an PdfIndirectObject with a specified direct object PdfObject. func MakeIndirectObject(obj PdfObject) *PdfIndirectObject { ind := &PdfIndirectObject{} ind.PdfObject = obj return ind } // MakeStream creates an PdfObjectStream with specified contents and encoding. If encoding is nil, then raw encoding // will be used (i.e. no encoding applied). func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error) { stream := &PdfObjectStream{} if encoder == nil { encoder = NewRawEncoder() } stream.PdfObjectDictionary = encoder.MakeStreamDict() encoded, err := encoder.EncodeBytes(contents) if err != nil { return nil, err } stream.PdfObjectDictionary.Set("Length", MakeInteger(int64(len(encoded)))) stream.Stream = encoded return stream, nil } func (bool *PdfObjectBool) String() string { if *bool { return "true" } else { return "false" } } // DefaultWriteString outputs the object as it is to be written to file. func (bool *PdfObjectBool) DefaultWriteString() string { if *bool { return "true" } else { return "false" } } func (int *PdfObjectInteger) String() string { return fmt.Sprintf("%d", *int) } // DefaultWriteString outputs the object as it is to be written to file. func (int *PdfObjectInteger) DefaultWriteString() string { return fmt.Sprintf("%d", *int) } func (float *PdfObjectFloat) String() string { return fmt.Sprintf("%f", *float) } // DefaultWriteString outputs the object as it is to be written to file. func (float *PdfObjectFloat) DefaultWriteString() string { return fmt.Sprintf("%f", *float) } // String returns a string representation of the *PdfObjectString. func (str *PdfObjectString) String() string { return str.val } // Str returns the string value of the PdfObjectString. Defined in addition to String() function to clarify that // this function returns the underlying string directly, whereas the String function technically could include // debug info. func (str *PdfObjectString) Str() string { return str.val } // Bytes returns the PdfObjectString content as a []byte array. func (str *PdfObjectString) Bytes() []byte { return []byte(str.val) } // DefaultWriteString outputs the object as it is to be written to file. func (str *PdfObjectString) DefaultWriteString() string { var output bytes.Buffer // Handle hex representation. if str.isHex { shex := hex.EncodeToString(str.Bytes()) output.WriteString("<") output.WriteString(shex) output.WriteString(">") return output.String() } // Otherwise regular string. escapeSequences := map[byte]string{ '\n': "\\n", '\r': "\\r", '\t': "\\t", '\b': "\\b", '\f': "\\f", '(': "\\(", ')': "\\)", '\\': "\\\\", } output.WriteString("(") for i := 0; i < len(str.val); i++ { char := str.val[i] if escStr, useEsc := escapeSequences[char]; useEsc { output.WriteString(escStr) } else { output.WriteByte(char) } } output.WriteString(")") return output.String() } func (name *PdfObjectName) String() string { return string(*name) } // DefaultWriteString outputs the object as it is to be written to file. func (name *PdfObjectName) DefaultWriteString() string { var output bytes.Buffer if len(*name) > 127 { common.Log.Debug("ERROR: Name too long (%s)", *name) } output.WriteString("/") for i := 0; i < len(*name); i++ { char := (*name)[i] if !IsPrintable(char) || char == '#' || IsDelimiter(char) { output.WriteString(fmt.Sprintf("#%.2x", char)) } else { output.WriteByte(char) } } return output.String() } // Elements returns a slice of the PdfObject elements in the array. // Preferred over accessing the array directly as type may be changed in future major versions (v3). func (array *PdfObjectArray) Elements() []PdfObject { if array == nil { return nil } return array.vec } // Len returns the number of elements in the array. func (array *PdfObjectArray) Len() int { if array == nil { return 0 } return len(array.vec) } // Get returns the i-th element of the array or nil if out of bounds (by index). func (array *PdfObjectArray) Get(i int) PdfObject { if array == nil || i >= len(array.vec) || i < 0 { return nil } return array.vec[i] } // Set sets the PdfObject at index i of the array. An error is returned if the index is outside bounds. func (array *PdfObjectArray) Set(i int, obj PdfObject) error { if i < 0 || i >= len(array.vec) { return errors.New("Outside bounds") } array.vec[i] = obj return nil } // Append appends PdfObject(s) to the array. func (array *PdfObjectArray) Append(objects ...PdfObject) { if array == nil { common.Log.Debug("Warn - Attempt to append to a nil array") return } if array.vec == nil { array.vec = []PdfObject{} } for _, obj := range objects { array.vec = append(array.vec, obj) } } // ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is // returned if the array contains non-numeric objects (each element can be either PdfObjectInteger // or PdfObjectFloat). func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) { vals := []float64{} for _, obj := range array.Elements() { switch t := obj.(type) { case *PdfObjectInteger: vals = append(vals, float64(*t)) case *PdfObjectFloat: vals = append(vals, float64(*t)) default: return nil, ErrTypeError } } return vals, nil } // ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the // array non-integer objects. Each element can only be PdfObjectInteger. func (array *PdfObjectArray) ToIntegerArray() ([]int, error) { vals := []int{} for _, obj := range array.Elements() { if number, is := obj.(*PdfObjectInteger); is { vals = append(vals, int(*number)) } else { return nil, fmt.Errorf("Type error") } } return vals, nil } // String returns a string describing `array`. func (array *PdfObjectArray) String() string { outStr := "[" for ind, o := range array.Elements() { outStr += o.String() if ind < (array.Len() - 1) { outStr += ", " } } outStr += "]" return outStr } // DefaultWriteString outputs the object as it is to be written to file. func (array *PdfObjectArray) DefaultWriteString() string { outStr := "[" for ind, o := range array.Elements() { outStr += o.DefaultWriteString() if ind < (array.Len() - 1) { outStr += " " } } outStr += "]" return outStr } // GetNumberAsFloat returns the contents of `obj` as a float if it is an integer or float, or an // error if it isn't. func GetNumberAsFloat(obj PdfObject) (float64, error) { switch t := obj.(type) { case *PdfObjectFloat: return float64(*t), nil case *PdfObjectInteger: return float64(*t), nil } return 0, ErrNotANumber } // IsNullObject returns true if `obj` is a PdfObjectNull. func IsNullObject(obj PdfObject) bool { _, isNull := obj.(*PdfObjectNull) return isNull } // GetNumbersAsFloat converts a list of pdf objects representing floats or integers to a slice of // float64 values. func GetNumbersAsFloat(objects []PdfObject) (floats []float64, err error) { for _, obj := range objects { val, err := GetNumberAsFloat(obj) if err != nil { return nil, err } floats = append(floats, val) } return floats, nil } // GetNumberAsInt64 returns the contents of `obj` as an int64 if it is an integer or float, or an // error if it isn't. This is for cases where expecting an integer, but some implementations // actually store the number in a floating point format. func GetNumberAsInt64(obj PdfObject) (int64, error) { switch t := obj.(type) { case *PdfObjectFloat: common.Log.Debug("Number expected as integer was stored as float (type casting used)") return int64(*t), nil case *PdfObjectInteger: return int64(*t), nil } return 0, ErrNotANumber } // getNumberAsFloatOrNull returns the contents of `obj` as a *float if it is an integer or float, // or nil if it `obj` is nil. In other cases an error is returned. func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) { switch t := obj.(type) { case *PdfObjectFloat: val := float64(*t) return &val, nil case *PdfObjectInteger: val := float64(*t) return &val, nil case *PdfObjectNull: return nil, nil } return nil, ErrNotANumber } // GetAsFloat64Slice returns the array as []float64 slice. // Returns an error if not entirely numeric (only PdfObjectIntegers, PdfObjectFloats). func (array *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) { slice := []float64{} for _, obj := range array.Elements() { number, err := GetNumberAsFloat(TraceToDirectObject(obj)) if err != nil { return nil, fmt.Errorf("Array element not a number") } slice = append(slice, number) } return slice, nil } // Merge merges in key/values from another dictionary. Overwriting if has same keys. func (d *PdfObjectDictionary) Merge(another *PdfObjectDictionary) { if another != nil { for _, key := range another.Keys() { val := another.Get(key) d.Set(key, val) } } } // String returns a string describing `d`. func (d *PdfObjectDictionary) String() string { outStr := "Dict(" for _, k := range d.keys { v := d.dict[k] outStr += fmt.Sprintf("\"%s\": %s, ", k, v.String()) } outStr += ")" return outStr } // DefaultWriteString outputs the object as it is to be written to file. func (d *PdfObjectDictionary) DefaultWriteString() string { outStr := "<<" for _, k := range d.keys { v := d.dict[k] common.Log.Trace("Writing k: %s %T %v %v", k, v, k, v) outStr += k.DefaultWriteString() outStr += " " outStr += v.DefaultWriteString() } outStr += ">>" return outStr } // Set sets the dictionary's key -> val mapping entry. Overwrites if key already set. func (d *PdfObjectDictionary) Set(key PdfObjectName, val PdfObject) { found := false for _, k := range d.keys { if k == key { found = true break } } if !found { d.keys = append(d.keys, key) } d.dict[key] = val } // Get returns the PdfObject corresponding to the specified key. // Returns a nil value if the key is not set. // // The design is such that we only return 1 value. // The reason is that, it will be easy to do type casts such as // name, ok := dict.Get("mykey").(*PdfObjectName) // if !ok .... func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject { val, has := d.dict[key] if !has { return nil } return val } // Keys returns the list of keys in the dictionary. func (d *PdfObjectDictionary) Keys() []PdfObjectName { return d.keys } // Remove removes an element specified by key. func (d *PdfObjectDictionary) Remove(key PdfObjectName) { idx := -1 for i, k := range d.keys { if k == key { idx = i break } } if idx >= 0 { // Found. Remove from key list and map. d.keys = append(d.keys[:idx], d.keys[idx+1:]...) delete(d.dict, key) } } // SetIfNotNil sets the dictionary's key -> val mapping entry -IF- val is not nil. // Note that we take care to perform a type switch. Otherwise if we would supply a nil value // of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus // would get set. func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) { if val != nil { switch t := val.(type) { case *PdfObjectName: if t != nil { d.Set(key, val) } case *PdfObjectDictionary: if t != nil { d.Set(key, val) } case *PdfObjectStream: if t != nil { d.Set(key, val) } case *PdfObjectString: if t != nil { d.Set(key, val) } case *PdfObjectNull: if t != nil { d.Set(key, val) } case *PdfObjectInteger: if t != nil { d.Set(key, val) } case *PdfObjectArray: if t != nil { d.Set(key, val) } case *PdfObjectBool: if t != nil { d.Set(key, val) } case *PdfObjectFloat: if t != nil { d.Set(key, val) } case *PdfObjectReference: if t != nil { d.Set(key, val) } case *PdfIndirectObject: if t != nil { d.Set(key, val) } default: common.Log.Error("ERROR: Unknown type: %T - should never happen!", val) } } } // String returns a string describing `ref`. func (ref *PdfObjectReference) String() string { return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber) } // DefaultWriteString outputs the object as it is to be written to file. func (ref *PdfObjectReference) DefaultWriteString() string { return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber) } // String returns a string describing `ind`. func (ind *PdfIndirectObject) String() string { // Avoid printing out the object, can cause problems with circular // references. return fmt.Sprintf("IObject:%d", (*ind).ObjectNumber) } // DefaultWriteString outputs the object as it is to be written to file. func (ind *PdfIndirectObject) DefaultWriteString() string { outStr := fmt.Sprintf("%d 0 R", (*ind).ObjectNumber) return outStr } // String returns a string describing `stream`. func (stream *PdfObjectStream) String() string { return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary) } // DefaultWriteString outputs the object as it is to be written to file. func (stream *PdfObjectStream) DefaultWriteString() string { outStr := fmt.Sprintf("%d 0 R", (*stream).ObjectNumber) return outStr } // String returns a string describing `null`. func (null *PdfObjectNull) String() string { return "null" } // DefaultWriteString outputs the object as it is to be written to file. func (null *PdfObjectNull) DefaultWriteString() string { return "null" } // Handy functions to work with primitive objects. // TraceMaxDepth specifies the maximum recursion depth allowed. const TraceMaxDepth = 20 // TraceToDirectObject traces a PdfObject to a direct object. For example direct objects contained // in indirect objects (can be double referenced even). // // Note: This function does not trace/resolve references. That needs to be done beforehand. func TraceToDirectObject(obj PdfObject) PdfObject { iobj, isIndirectObj := obj.(*PdfIndirectObject) depth := 0 for isIndirectObj { obj = iobj.PdfObject iobj, isIndirectObj = obj.(*PdfIndirectObject) depth++ if depth > TraceMaxDepth { common.Log.Error("Trace depth level beyond 20 - error!") return nil } } return obj } // Convenience methods for converting PdfObject to underlying types. // GetBool returns the *PdfObjectBool object that is represented by a PdfObject directly or indirectly // within an indirect object. The bool flag indicates whether a match was found. func GetBool(obj PdfObject) (bo *PdfObjectBool, found bool) { bo, found = TraceToDirectObject(obj).(*PdfObjectBool) return bo, found } // GetBoolVal returns the bool value within a *PdObjectBool represented by an PdfObject interface directly or indirectly. // If the PdfObject does not represent a bool value, a default value of false is returned (found = false also). func GetBoolVal(obj PdfObject) (b bool, found bool) { bo, found := TraceToDirectObject(obj).(*PdfObjectBool) if found { return bool(*bo), true } return false, false } // GetInt returns the *PdfObjectBool object that is represented by a PdfObject either directly or indirectly // within an indirect object. The bool flag indicates whether a match was found. func GetInt(obj PdfObject) (into *PdfObjectInteger, found bool) { into, found = TraceToDirectObject(obj).(*PdfObjectInteger) return into, found } // GetIntVal returns the int value represented by the PdfObject directly or indirectly if contained within an // indirect object. On type mismatch the found bool flag returned is false and a nil pointer is returned. func GetIntVal(obj PdfObject) (val int, found bool) { into, found := TraceToDirectObject(obj).(*PdfObjectInteger) if found { return int(*into), true } return 0, false } // GetFloat returns the *PdfObjectFloat represented by the PdfObject directly or indirectly within an indirect // object. On type mismatch the found bool flag is false and a nil pointer is returned. func GetFloat(obj PdfObject) (fo *PdfObjectFloat, found bool) { fo, found = TraceToDirectObject(obj).(*PdfObjectFloat) return fo, found } // GetFloatVal returns the float64 value represented by the PdfObject directly or indirectly if contained within an // indirect object. On type mismatch the found bool flag returned is false and a nil pointer is returned. func GetFloatVal(obj PdfObject) (val float64, found bool) { fo, found := TraceToDirectObject(obj).(*PdfObjectFloat) if found { return float64(*fo), true } return 0, false } // GetString returns the *PdfObjectString represented by the PdfObject directly or indirectly within an indirect // object. On type mismatch the found bool flag is false and a nil pointer is returned. func GetString(obj PdfObject) (so *PdfObjectString, found bool) { so, found = TraceToDirectObject(obj).(*PdfObjectString) return so, found } // GetStringVal returns the string value represented by the PdfObject directly or indirectly if // contained within an indirect object. On type mismatch the found bool flag returned is false and // an empty string is returned. func GetStringVal(obj PdfObject) (val string, found bool) { so, found := TraceToDirectObject(obj).(*PdfObjectString) if found { return so.Str(), true } return } // GetStringVal is like GetStringVal except that it returns the string as a []byte. // It is for convenience. func GetStringBytes(obj PdfObject) (val []byte, found bool) { so, found := TraceToDirectObject(obj).(*PdfObjectString) if found { return so.Bytes(), true } return } // GetName returns the *PdfObjectName represented by the PdfObject directly or indirectly within an indirect // object. On type mismatch the found bool flag is false and a nil pointer is returned. func GetName(obj PdfObject) (name *PdfObjectName, found bool) { name, found = TraceToDirectObject(obj).(*PdfObjectName) return name, found } // GetNameVal returns the string value represented by the PdfObject directly or indirectly if // contained within an indirect object. On type mismatch the found bool flag returned is false and // an empty string is returned. func GetNameVal(obj PdfObject) (val string, found bool) { name, found := TraceToDirectObject(obj).(*PdfObjectName) if found { return string(*name), true } return } // GetArray returns the *PdfObjectArray represented by the PdfObject directly or indirectly within an indirect // object. On type mismatch the found bool flag is false and a nil pointer is returned. func GetArray(obj PdfObject) (arr *PdfObjectArray, found bool) { arr, found = TraceToDirectObject(obj).(*PdfObjectArray) return arr, found } // GetDict returns the *PdfObjectDictionary represented by the PdfObject directly or indirectly within an indirect // object. On type mismatch the found bool flag is false and a nil pointer is returned. func GetDict(obj PdfObject) (dict *PdfObjectDictionary, found bool) { dict, found = TraceToDirectObject(obj).(*PdfObjectDictionary) return dict, found } // GetIndirect returns the *PdfIndirectObject represented by the PdfObject. On type mismatch the found bool flag is // false and a nil pointer is returned. func GetIndirect(obj PdfObject) (ind *PdfIndirectObject, found bool) { ind, found = obj.(*PdfIndirectObject) return ind, found } // GetStream returns the *PdfObjectStream represented by the PdfObject. On type mismatch the found bool flag is // false and a nil pointer is returned. func GetStream(obj PdfObject) (stream *PdfObjectStream, found bool) { stream, found = obj.(*PdfObjectStream) return stream, found }