diff --git a/pdf/core/crossrefs.go b/pdf/core/crossrefs.go index ebca6ffa..d0b5667a 100644 --- a/pdf/core/crossrefs.go +++ b/pdf/core/crossrefs.go @@ -316,7 +316,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf if parser.crypter != nil { // Mark as decrypted (inside object stream) for caching. // and avoid decrypting decrypted object. - parser.crypter.DecryptedObjects[optr] = true + parser.crypter.decryptedObjects[optr] = true } return optr, true, nil } else { diff --git a/pdf/core/crypt.go b/pdf/core/crypt.go index 3d74b044..3402d4b0 100644 --- a/pdf/core/crypt.go +++ b/pdf/core/crypt.go @@ -6,168 +6,313 @@ package core import ( - "bytes" - "crypto/aes" - "crypto/cipher" "crypto/md5" "crypto/rand" - "crypto/rc4" - "crypto/sha256" - "crypto/sha512" - "encoding/binary" "errors" "fmt" - "hash" - "io" - "math" + "time" "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core/security" + crypto "github.com/unidoc/unidoc/pdf/core/security/crypt" ) +// EncryptInfo contains an information generated by the document encrypter. +type EncryptInfo struct { + // Version is minimal PDF version that supports specified encryption algorithm. + Version + // Encrypt is an encryption dictionary that contains all necessary parameters. + // It should be stored in all copies of the document trailer. + Encrypt *PdfObjectDictionary + // ID0 and ID1 are IDs used in the trailer. Older algorithms such as RC4 uses them for encryption. + ID0, ID1 string +} + +// PdfCryptNewEncrypt makes the document crypt handler based on a specified crypt filter. +func PdfCryptNewEncrypt(cf crypto.Filter, userPass, ownerPass []byte, perm security.Permissions) (*PdfCrypt, *EncryptInfo, error) { + crypter := &PdfCrypt{ + encryptedObjects: make(map[PdfObject]bool), + cryptFilters: make(cryptFilters), + encryptStd: security.StdEncryptDict{ + P: perm, + EncryptMetadata: true, + }, + } + var vers Version + if cf != nil { + v := cf.PDFVersion() + vers.Major, vers.Minor = v[0], v[1] + + V, R := cf.HandlerVersion() + crypter.encrypt.V = V + crypter.encryptStd.R = R + + crypter.encrypt.Length = cf.KeyLength() * 8 + } + const ( + defaultFilter = stdCryptFilter + ) + crypter.cryptFilters[defaultFilter] = cf + if crypter.encrypt.V >= 4 { + crypter.streamFilter = defaultFilter + crypter.stringFilter = defaultFilter + } + ed := crypter.newEncryptDict() + + // Prepare the ID object for the trailer. + hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850))) + id0 := string(hashcode[:]) + b := make([]byte, 100) + rand.Read(b) + hashcode = md5.Sum(b) + id1 := string(hashcode[:]) + common.Log.Trace("Random b: % x", b) + + common.Log.Trace("Gen Id 0: % x", id0) + + crypter.id0 = string(id0) + + err := crypter.generateParams(userPass, ownerPass) + if err != nil { + return nil, nil, err + } + // encode parameters generated by the Standard security handler + encodeEncryptStd(&crypter.encryptStd, ed) + if crypter.encrypt.V >= 4 { + if err := crypter.saveCryptFilters(ed); err != nil { + return nil, nil, err + } + } + + return crypter, &EncryptInfo{ + Version: vers, + Encrypt: ed, + ID0: id0, ID1: id1, + }, nil +} + // PdfCrypt provides PDF encryption/decryption support. // The PDF standard supports encryption of strings and streams (Section 7.6). // TODO (v3): Consider unexporting. type PdfCrypt struct { - Filter string - Subfilter string - V int - Length int - R int - O []byte - U []byte - OE []byte // R=6 - UE []byte // R=6 - P int // TODO (v3): uint32 - Perms []byte // R=6 - EncryptMetadata bool - Id0 string - EncryptionKey []byte - DecryptedObjects map[PdfObject]bool - EncryptedObjects map[PdfObject]bool - Authenticated bool + encrypt encryptDict + encryptStd security.StdEncryptDict + + id0 string + encryptionKey []byte + decryptedObjects map[PdfObject]bool + encryptedObjects map[PdfObject]bool + authenticated bool // Crypt filters (V4). - CryptFilters CryptFilters - StreamFilter string - StringFilter string + cryptFilters cryptFilters + streamFilter string + stringFilter string parser *PdfParser decryptedObjNum map[int]struct{} - ivAESZero []byte // a zero buffer used as an initialization vector for AES } -// AccessPermissions is a list of access permissions for a PDF file. -type AccessPermissions struct { - Printing bool - Modify bool - ExtractGraphics bool - Annotate bool +// encodeEncryptStd encodes fields of standard security handler to an Encrypt dictionary. +func encodeEncryptStd(d *security.StdEncryptDict, ed *PdfObjectDictionary) { + ed.Set("R", MakeInteger(int64(d.R))) + ed.Set("P", MakeInteger(int64(d.P))) - // Allow form filling, if annotation is disabled? If annotation enabled, is not looked at. - FillForms bool - DisabilityExtract bool // not clear what this means! - - // Allow rotating, editing page order. - RotateInsert bool - - // Limit print quality (lowres), assuming Printing is true. - FullPrintQuality bool -} - -const padding = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF" + - "\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C" + - "\xA9\xFE\x64\x53\x69\x7A" - -// StandardCryptFilter is a default name for a standard crypt filter. -const StandardCryptFilter = "StdCF" - -// CryptFilter represents information from a CryptFilter dictionary. -// TODO (v3): Replace with cryptFilterMethod interface. -type CryptFilter struct { - Cfm string - Length int - cfm cryptFilterMethod -} - -func (cf CryptFilter) getCFM() (cryptFilterMethod, error) { - // TODO (v3): remove this method and access cf.cfm directly - if cf.cfm != nil { - return cf.cfm, nil - } - // There is a non-zero chance that someone relies on the ability to - // add crypt filters manually using the library. - // So if we hit such case - be nice and find a filter by name. - return getCryptFilterMethod(cf.Cfm) -} - -// Encryption filters names. -// Table 25, CFM (page 92) -const ( - CryptFilterNone = "None" // do not decrypt data - CryptFilterV2 = "V2" // RC4-based filter - CryptFilterAESV2 = "AESV2" // AES-based filter (128 bit key, PDF 1.6) - CryptFilterAESV3 = "AESV3" // AES-based filter (256 bit key, PDF 2.0) -) - -func newCryptFiltersV2(length int) CryptFilters { - return CryptFilters{ - StandardCryptFilter: NewCryptFilterV2(length), + ed.Set("O", MakeStringFromBytes(d.O)) + ed.Set("U", MakeStringFromBytes(d.U)) + if d.R >= 5 { + ed.Set("OE", MakeStringFromBytes(d.OE)) + ed.Set("UE", MakeStringFromBytes(d.UE)) + ed.Set("EncryptMetadata", MakeBool(d.EncryptMetadata)) + if d.R > 5 { + ed.Set("Perms", MakeStringFromBytes(d.Perms)) + } } } -// NewCryptFilterV2 creates a RC4-based filter with a specified key length (in bytes). -func NewCryptFilterV2(length int) CryptFilter { - // TODO (v3): Unexport. - return CryptFilter{ - Cfm: CryptFilterV2, - Length: length, - cfm: cryptFilterV2{}, - } -} - -// NewCryptFilterAESV2 creates an AES-based filter with a 128 bit key (AESV2). -func NewCryptFilterAESV2() CryptFilter { - // TODO (v3): Unexport. - return CryptFilter{ - Cfm: CryptFilterAESV2, - Length: 16, - cfm: cryptFilterAESV2{}, - } -} - -// NewCryptFilterAESV3 creates an AES-based filter with a 256 bit key (AESV3). -func NewCryptFilterAESV3() CryptFilter { - // TODO (v3): Unexport. - return CryptFilter{ - Cfm: CryptFilterAESV3, - Length: 32, - cfm: cryptFilterAESV3{}, - } -} - -// CryptFilters is a map of crypt filter name and underlying CryptFilter info. -// TODO (v3): Unexport. -type CryptFilters map[string]CryptFilter - -func (m CryptFilters) byName(cfm string) (cryptFilterMethod, error) { - cf, ok := m[cfm] +// decodeEncryptStd decodes fields of standard security handler from an Encrypt dictionary. +func decodeEncryptStd(d *security.StdEncryptDict, ed *PdfObjectDictionary) error { + // TODO(dennwc): this code is too verbose; maybe use reflection to populate fields and validate afterwards? + R, ok := ed.Get("R").(*PdfObjectInteger) if !ok { - err := fmt.Errorf("Unsupported crypt filter (%s)", cfm) - common.Log.Debug("%s", err) - return nil, err + return errors.New("encrypt dictionary missing R") } - f, err := cf.getCFM() - if err != nil { - common.Log.Debug("%s", err) - return nil, err + // TODO(dennwc): according to spec, R should be validated according to V value + if *R < 2 || *R > 6 { + return fmt.Errorf("invalid R (%d)", *R) } - return f, nil + d.R = int(*R) + + O, ok := ed.GetString("O") + if !ok { + return errors.New("encrypt dictionary missing O") + } + if d.R == 5 || d.R == 6 { + // the spec says =48 bytes, but Acrobat pads them out longer + if len(O) < 48 { + return fmt.Errorf("Length(O) < 48 (%d)", len(O)) + } + } else if len(O) != 32 { + return fmt.Errorf("Length(O) != 32 (%d)", len(O)) + } + d.O = []byte(O) + + U, ok := ed.GetString("U") + if !ok { + return errors.New("encrypt dictionary missing U") + } + if d.R == 5 || d.R == 6 { + // the spec says =48 bytes, but Acrobat pads them out longer + if len(U) < 48 { + return fmt.Errorf("Length(U) < 48 (%d)", len(U)) + } + } else if len(U) != 32 { + // Strictly this does not cause an error. + // If O is OK and others then can still read the file. + common.Log.Debug("Warning: Length(U) != 32 (%d)", len(U)) + //return crypter, errors.New("Length(U) != 32") + } + d.U = []byte(U) + + if d.R >= 5 { + OE, ok := ed.GetString("OE") + if !ok { + return errors.New("encrypt dictionary missing OE") + } else if len(OE) != 32 { + return fmt.Errorf("Length(OE) != 32 (%d)", len(OE)) + } + d.OE = []byte(OE) + + UE, ok := ed.GetString("UE") + if !ok { + return errors.New("encrypt dictionary missing UE") + } else if len(UE) != 32 { + return fmt.Errorf("Length(UE) != 32 (%d)", len(UE)) + } + d.UE = []byte(UE) + } + + P, ok := ed.Get("P").(*PdfObjectInteger) + if !ok { + return errors.New("encrypt dictionary missing permissions attr") + } + d.P = security.Permissions(*P) + + if d.R == 6 { + Perms, ok := ed.GetString("Perms") + if !ok { + return errors.New("encrypt dictionary missing Perms") + } else if len(Perms) != 16 { + return fmt.Errorf("Length(Perms) != 16 (%d)", len(Perms)) + } + d.Perms = []byte(Perms) + } + + if em, ok := ed.Get("EncryptMetadata").(*PdfObjectBool); ok { + d.EncryptMetadata = bool(*em) + } else { + d.EncryptMetadata = true // True by default. + } + return nil } -// LoadCryptFilters loads crypt filter information from the encryption dictionary (V>=4). -// TODO (v3): Unexport. -func (crypt *PdfCrypt) LoadCryptFilters(ed *PdfObjectDictionary) error { - crypt.CryptFilters = CryptFilters{} +func decodeCryptFilter(cf *crypto.FilterDict, d *PdfObjectDictionary) error { + // If Type present, should be CryptFilter. + if typename, ok := d.Get("Type").(*PdfObjectName); ok { + if string(*typename) != "CryptFilter" { + return fmt.Errorf("CF dict type != CryptFilter (%s)", typename) + } + } + + // Method. + name, ok := d.Get("CFM").(*PdfObjectName) + if !ok { + return fmt.Errorf("Unsupported crypt filter (None)") + } + cf.CFM = string(*name) + + // Auth event + if event, ok := d.Get("AuthEvent").(*PdfObjectName); ok { + cf.AuthEvent = security.AuthEvent(*event) + } else { + cf.AuthEvent = security.EventDocOpen + } + + if length, ok := d.Get("Length").(*PdfObjectInteger); ok { + cf.Length = int(*length) + } + return nil +} + +func (crypt *PdfCrypt) newEncryptDict() *PdfObjectDictionary { + // Generate the encryption dictionary. + ed := MakeDict() + ed.Set("Filter", MakeName("Standard")) + ed.Set("V", MakeInteger(int64(crypt.encrypt.V))) + ed.Set("Length", MakeInteger(int64(crypt.encrypt.Length))) + return ed +} + +// String returns a descriptive information string about the encryption method used. +func (crypt *PdfCrypt) String() string { + if crypt == nil { + return "" + } + // TODO(dennwc): define a String method on CF + str := crypt.encrypt.Filter + " - " + + if crypt.encrypt.V == 0 { + str += "Undocumented algorithm" + } else if crypt.encrypt.V == 1 { + // RC4 or AES (bits: 40) + str += "RC4: 40 bits" + } else if crypt.encrypt.V == 2 { + str += fmt.Sprintf("RC4: %d bits", crypt.encrypt.Length) + } else if crypt.encrypt.V == 3 { + str += "Unpublished algorithm" + } else if crypt.encrypt.V >= 4 { + // Look at CF, StmF, StrF + str += fmt.Sprintf("Stream filter: %s - String filter: %s", crypt.streamFilter, crypt.stringFilter) + str += "; Crypt filters:" + for name, cf := range crypt.cryptFilters { + str += fmt.Sprintf(" - %s: %s (%d)", name, cf.Name(), cf.KeyLength()) + } + } + perms := crypt.GetAccessPermissions() + str += fmt.Sprintf(" - %#v", perms) + + return str +} + +// encryptDict is a set of field common to all encryption dictionaries. +type encryptDict struct { + Filter string // (Required) The name of the preferred security handler for this document. + V int // (Required) A code specifying the algorithm to be used in encrypting and decrypting the document. + SubFilter string // Completely specifies the format and interpretation of the encryption dictionary. + Length int // The length of the encryption key, in bits. + + StmF string // The filter that shall be used by default when decrypting streams. + StrF string // The filter that shall be used when decrypting all strings in the document. + EFF string // The filter that shall be used when decrypting embedded file streams. + + CF map[string]crypto.FilterDict // Crypt filters dictionary. +} + +// stdCryptFilter is a default name for a standard crypt filter. +const stdCryptFilter = "StdCF" + +func newCryptFiltersV2(length int) cryptFilters { + return cryptFilters{ + stdCryptFilter: crypto.NewFilterV2(length), + } +} + +// cryptFilters is a map of crypt filter name and underlying CryptFilter info. +type cryptFilters map[string]crypto.Filter + +// loadCryptFilters loads crypt filter information from the encryption dictionary (V>=4). +func (crypt *PdfCrypt) loadCryptFilters(ed *PdfObjectDictionary) error { + crypt.cryptFilters = cryptFilters{} obj := ed.Get("CF") obj = TraceToDirectObject(obj) // XXX may need to resolve reference... @@ -208,109 +353,79 @@ func (crypt *PdfCrypt) LoadCryptFilters(ed *PdfObjectDictionary) error { continue } - // If Type present, should be CryptFilter. - if typename, ok := dict.Get("Type").(*PdfObjectName); ok { - if string(*typename) != "CryptFilter" { - return fmt.Errorf("CF dict type != CryptFilter (%s)", typename) - } + var cfd crypto.FilterDict + if err := decodeCryptFilter(&cfd, dict); err != nil { + return err } - - cf := CryptFilter{} - - // Method. - cfmName, ok := dict.Get("CFM").(*PdfObjectName) - if !ok { - return fmt.Errorf("Unsupported crypt filter (None)") - } - cf.Cfm = string(*cfmName) - - cfm, err := getCryptFilterMethod(cf.Cfm) + cf, err := crypto.NewFilter(cfd) if err != nil { return err } - cf.cfm = cfm - - // Length. - cf.Length = 0 - length, ok := dict.Get("Length").(*PdfObjectInteger) - if ok { - // TODO(dennwc): pass length to getCryptFilterMethod and allow filter to validate it - if *length%8 != 0 { - return fmt.Errorf("Crypt filter length not multiple of 8 (%d)", *length) - } - - // Standard security handler expresses the length in multiples of 8 (16 means 128) - // We only deal with standard so far. (Public key not supported yet). - if *length < 5 || *length > 16 { - if *length == 64 || *length == 128 { - common.Log.Debug("STANDARD VIOLATION: Crypt Length appears to be in bits rather than bytes - assuming bits (%d)", *length) - *length /= 8 - } else if !(*length == 32 && cf.Cfm == CryptFilterAESV3) { - return fmt.Errorf("Crypt filter length not in range 40 - 128 bit (%d)", *length) - } - } - cf.Length = int(*length) - } - - crypt.CryptFilters[string(name)] = cf + crypt.cryptFilters[string(name)] = cf } // Cannot be overwritten. - crypt.CryptFilters["Identity"] = CryptFilter{} + crypt.cryptFilters["Identity"] = crypto.NewIdentity() // StrF strings filter. - crypt.StringFilter = "Identity" + crypt.stringFilter = "Identity" if strf, ok := ed.Get("StrF").(*PdfObjectName); ok { - if _, exists := crypt.CryptFilters[string(*strf)]; !exists { + if _, exists := crypt.cryptFilters[string(*strf)]; !exists { return fmt.Errorf("Crypt filter for StrF not specified in CF dictionary (%s)", *strf) } - crypt.StringFilter = string(*strf) + crypt.stringFilter = string(*strf) } // StmF streams filter. - crypt.StreamFilter = "Identity" + crypt.streamFilter = "Identity" if stmf, ok := ed.Get("StmF").(*PdfObjectName); ok { - if _, exists := crypt.CryptFilters[string(*stmf)]; !exists { + if _, exists := crypt.cryptFilters[string(*stmf)]; !exists { return fmt.Errorf("Crypt filter for StmF not specified in CF dictionary (%s)", *stmf) } - crypt.StreamFilter = string(*stmf) + crypt.streamFilter = string(*stmf) } return nil } -// SaveCryptFilters saves crypt filter information to the encryption dictionary (V>=4). -// TODO (v3): Unexport. -func (crypt *PdfCrypt) SaveCryptFilters(ed *PdfObjectDictionary) error { - if crypt.V < 4 { +func encodeCryptFilter(cf crypto.Filter, event security.AuthEvent) *PdfObjectDictionary { + if event == "" { + event = security.EventDocOpen + } + v := MakeDict() + v.Set("Type", MakeName("CryptFilter")) // optional + v.Set("AuthEvent", MakeName(string(event))) + v.Set("CFM", MakeName(cf.Name())) + v.Set("Length", MakeInteger(int64(cf.KeyLength()))) + return v +} + +// saveCryptFilters saves crypt filter information to the encryption dictionary (V>=4). +func (crypt *PdfCrypt) saveCryptFilters(ed *PdfObjectDictionary) error { + if crypt.encrypt.V < 4 { return errors.New("can only be used with V>=4") } cf := MakeDict() ed.Set("CF", cf) - for name, filter := range crypt.CryptFilters { + for name, filter := range crypt.cryptFilters { if name == "Identity" { continue } - v := MakeDict() + v := encodeCryptFilter(filter, "") cf.Set(PdfObjectName(name), v) - - v.Set("Type", MakeName("CryptFilter")) - v.Set("AuthEvent", MakeName("DocOpen")) - v.Set("CFM", MakeName(string(filter.Cfm))) - v.Set("Length", MakeInteger(int64(filter.Length))) } - ed.Set("StrF", MakeName(crypt.StringFilter)) - ed.Set("StmF", MakeName(crypt.StreamFilter)) + ed.Set("StrF", MakeName(crypt.stringFilter)) + ed.Set("StmF", MakeName(crypt.streamFilter)) return nil } -// PdfCryptMakeNew makes the document crypt handler based on the encryption dictionary +// PdfCryptNewDecrypt makes the document crypt handler based on the encryption dictionary // and trailer dictionary. Returns an error on failure to process. -func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCrypt, error) { - crypter := PdfCrypt{ - Authenticated: false, - DecryptedObjects: make(map[PdfObject]bool), - EncryptedObjects: make(map[PdfObject]bool), +func PdfCryptNewDecrypt(parser *PdfParser, ed, trailer *PdfObjectDictionary) (*PdfCrypt, error) { + crypter := &PdfCrypt{ + authenticated: false, + decryptedObjects: make(map[PdfObject]bool), + encryptedObjects: make(map[PdfObject]bool), decryptedObjNum: make(map[int]struct{}), parser: parser, } @@ -324,11 +439,10 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr common.Log.Debug("ERROR Unsupported filter (%s)", *filter) return crypter, errors.New("Unsupported Filter") } - crypter.Filter = string(*filter) + crypter.encrypt.Filter = string(*filter) - subfilter, ok := ed.Get("SubFilter").(*PdfObjectString) - if ok { - crypter.Subfilter = subfilter.Str() + if subfilter, ok := ed.Get("SubFilter").(*PdfObjectString); ok { + crypter.encrypt.SubFilter = subfilter.Str() common.Log.Debug("Using subfilter %s", subfilter) } @@ -337,20 +451,20 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr common.Log.Debug("ERROR Invalid encryption length") return crypter, errors.New("Invalid encryption length") } - crypter.Length = int(*L) + crypter.encrypt.Length = int(*L) } else { - crypter.Length = 40 + crypter.encrypt.Length = 40 } - crypter.V = 0 + crypter.encrypt.V = 0 if v, ok := ed.Get("V").(*PdfObjectInteger); ok { V := int(*v) - crypter.V = V + crypter.encrypt.V = V if V >= 1 && V <= 2 { // Default algorithm is V2. - crypter.CryptFilters = newCryptFiltersV2(crypter.Length) + crypter.cryptFilters = newCryptFiltersV2(crypter.encrypt.Length) } else if V >= 4 && V <= 5 { - if err := crypter.LoadCryptFilters(ed); err != nil { + if err := crypter.loadCryptFilters(ed); err != nil { return crypter, err } } else { @@ -359,89 +473,9 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr } } - R, ok := ed.Get("R").(*PdfObjectInteger) - if !ok { - return crypter, errors.New("Encrypt dictionary missing R") - } - // TODO(dennwc): according to spec, R should be validated according to V value - if *R < 2 || *R > 6 { - return crypter, fmt.Errorf("Invalid R (%d)", *R) - } - crypter.R = int(*R) - - O, ok := ed.Get("O").(*PdfObjectString) - if !ok { - return crypter, errors.New("Encrypt dictionary missing O") - } - if crypter.R == 5 || crypter.R == 6 { - // the spec says =48 bytes, but Acrobat pads them out longer - if len(O.Str()) < 48 { - return crypter, fmt.Errorf("Length(O) < 48 (%d)", len(O.Str())) - } - } else if len(O.Str()) != 32 { - return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(O.Str())) - } - crypter.O = O.Bytes() - - U, ok := ed.Get("U").(*PdfObjectString) - if !ok { - return crypter, errors.New("Encrypt dictionary missing U") - } - if crypter.R == 5 || crypter.R == 6 { - // the spec says =48 bytes, but Acrobat pads them out longer - if len(U.Str()) < 48 { - return crypter, fmt.Errorf("Length(U) < 48 (%d)", len(U.Str())) - } - } else if len(U.Str()) != 32 { - // Strictly this does not cause an error. - // If O is OK and others then can still read the file. - common.Log.Debug("Warning: Length(U) != 32 (%d)", len(U.Str())) - //return crypter, errors.New("Length(U) != 32") - } - crypter.U = U.Bytes() - - if crypter.R >= 5 { - OE, ok := ed.Get("OE").(*PdfObjectString) - if !ok { - return crypter, errors.New("Encrypt dictionary missing OE") - } - if len(OE.Str()) != 32 { - return crypter, fmt.Errorf("Length(OE) != 32 (%d)", len(OE.Str())) - } - crypter.OE = OE.Bytes() - - UE, ok := ed.Get("UE").(*PdfObjectString) - if !ok { - return crypter, errors.New("Encrypt dictionary missing UE") - } - if len(UE.Str()) != 32 { - return crypter, fmt.Errorf("Length(UE) != 32 (%d)", len(UE.Str())) - } - crypter.UE = UE.Bytes() - } - - P, ok := ed.Get("P").(*PdfObjectInteger) - if !ok { - return crypter, errors.New("Encrypt dictionary missing permissions attr") - } - crypter.P = int(*P) - - if crypter.R == 6 { - Perms, ok := ed.Get("Perms").(*PdfObjectString) - if !ok { - return crypter, errors.New("Encrypt dictionary missing Perms") - } - if len(Perms.Str()) != 16 { - return crypter, fmt.Errorf("Length(Perms) != 16 (%d)", len(Perms.Str())) - } - crypter.Perms = Perms.Bytes() - } - - em, ok := ed.Get("EncryptMetadata").(*PdfObjectBool) - if ok { - crypter.EncryptMetadata = bool(*em) - } else { - crypter.EncryptMetadata = true // True by default. + // decode Standard security handler parameters + if err := decodeEncryptStd(&crypter.encryptStd, ed); err != nil { + return crypter, err } // Default: empty ID. @@ -457,115 +491,37 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr } else { common.Log.Debug("Trailer ID array missing or invalid!") } - crypter.Id0 = id0 + crypter.id0 = id0 return crypter, nil } // GetAccessPermissions returns the PDF access permissions as an AccessPermissions object. -func (crypt *PdfCrypt) GetAccessPermissions() AccessPermissions { - perms := AccessPermissions{} - - P := crypt.P - if P&(1<<2) > 0 { - perms.Printing = true - } - if P&(1<<3) > 0 { - perms.Modify = true - } - if P&(1<<4) > 0 { - perms.ExtractGraphics = true - } - if P&(1<<5) > 0 { - perms.Annotate = true - } - if P&(1<<8) > 0 { - perms.FillForms = true - } - if P&(1<<9) > 0 { - perms.DisabilityExtract = true - } - if P&(1<<10) > 0 { - perms.RotateInsert = true - } - if P&(1<<11) > 0 { - perms.FullPrintQuality = true - } - return perms +func (crypt *PdfCrypt) GetAccessPermissions() security.Permissions { + return crypt.encryptStd.P } -// GetP returns the P entry to be used in Encrypt dictionary based on AccessPermissions settings. -func (perms AccessPermissions) GetP() int32 { - var P int32 = 0 - - if perms.Printing { // bit 3 - P |= (1 << 2) +func (crypt *PdfCrypt) securityHandler() security.StdHandler { + if crypt.encryptStd.R >= 5 { + return security.NewHandlerR6() } - if perms.Modify { // bit 4 - P |= (1 << 3) - } - if perms.ExtractGraphics { // bit 5 - P |= (1 << 4) - } - if perms.Annotate { // bit 6 - P |= (1 << 5) - } - if perms.FillForms { - P |= (1 << 8) // bit 9 - } - if perms.DisabilityExtract { - P |= (1 << 9) // bit 10 - } - if perms.RotateInsert { - P |= (1 << 10) // bit 11 - } - if perms.FullPrintQuality { - P |= (1 << 11) // bit 12 - } - return P + return security.NewHandlerR4(crypt.id0, crypt.encrypt.Length) } // Check whether the specified password can be used to decrypt the document. +// Also build the encryption/decryption key. func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) { - // Also build the encryption/decryption key. - - crypt.Authenticated = false - if crypt.R >= 5 { - authenticated, err := crypt.alg2a(password) - if err != nil { - return false, err - } - crypt.Authenticated = authenticated - return authenticated, err - } - - // Try user password. - common.Log.Trace("Debugging authentication - user pass") - authenticated, err := crypt.alg6(password) + crypt.authenticated = false + h := crypt.securityHandler() + fkey, perm, err := h.Authenticate(&crypt.encryptStd, password) if err != nil { return false, err + } else if perm == 0 || len(fkey) == 0 { + return false, nil } - if authenticated { - common.Log.Trace("this.Authenticated = True") - crypt.Authenticated = true - return true, nil - } - - // Try owner password also. - // May not be necessary if only want to get all contents. - // (user pass needs to be known or empty). - common.Log.Trace("Debugging authentication - owner pass") - authenticated, err = crypt.alg7(password) - if err != nil { - return false, err - } - if authenticated { - common.Log.Trace("this.Authenticated = True") - crypt.Authenticated = true - return true, nil - } - - return false, nil + crypt.authenticated = true + crypt.encryptionKey = fkey + return true, nil } // Check access rights and permissions for a specified password. If either user/owner password is specified, @@ -574,87 +530,24 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) { // The bool flag indicates that the user can access and can view the file. // The AccessPermissions shows what access the user has for editing etc. // An error is returned if there was a problem performing the authentication. -func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissions, error) { - perms := AccessPermissions{} - - // Try owner password -> full rights. - var ( - isOwner bool - err error - ) - if crypt.R >= 5 { - var h []byte - h, err = crypt.alg12(password) - if err != nil { - return false, perms, err - } - isOwner = len(h) != 0 - } else { - isOwner, err = crypt.alg7(password) - } +func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, security.Permissions, error) { + h := crypt.securityHandler() + // TODO(dennwc): it computes an encryption key as well; if necessary, define a new interface method to optimize this + fkey, perm, err := h.Authenticate(&crypt.encryptStd, password) if err != nil { - return false, perms, err + return false, 0, err + } else if perm == 0 || len(fkey) == 0 { + return false, 0, nil } - if isOwner { - // owner -> full rights. - perms.Annotate = true - perms.DisabilityExtract = true - perms.ExtractGraphics = true - perms.FillForms = true - perms.FullPrintQuality = true - perms.Modify = true - perms.Printing = true - perms.RotateInsert = true - return true, perms, nil - } - - // Try user password. - var isUser bool - if crypt.R >= 5 { - var h []byte - h, err = crypt.alg11(password) - if err != nil { - return false, perms, err - } - isUser = len(h) != 0 - } else { - isUser, err = crypt.alg6(password) - } - if err != nil { - return false, perms, err - } - if isUser { - // User password specified correctly -> access granted with specified permissions. - return true, crypt.GetAccessPermissions(), nil - } - - // Cannot even view the file. - return false, perms, nil -} - -func (crypt *PdfCrypt) paddedPass(pass []byte) []byte { - key := make([]byte, 32) - if len(pass) >= 32 { - for i := 0; i < 32; i++ { - key[i] = pass[i] - } - } else { - for i := 0; i < len(pass); i++ { - key[i] = pass[i] - } - for i := len(pass); i < 32; i++ { - key[i] = padding[i-len(pass)] - } - } - return key + return true, perm, nil } // Generates a key for encrypting a specific object based on the // object and generation number, as well as the document encryption key. func (crypt *PdfCrypt) makeKey(filter string, objNum, genNum uint32, ekey []byte) ([]byte, error) { - f, err := crypt.CryptFilters.byName(filter) - if err != nil { - return nil, err + f, ok := crypt.cryptFilters[filter] + if !ok { + return nil, fmt.Errorf("Unknown crypt filter (%s)", filter) } return f.MakeKey(objNum, genNum, ekey) } @@ -667,14 +560,14 @@ var encryptDictKeys = []PdfObjectName{ // Check if object has already been processed. func (crypt *PdfCrypt) isDecrypted(obj PdfObject) bool { - _, ok := crypt.DecryptedObjects[obj] + _, ok := crypt.decryptedObjects[obj] if ok { common.Log.Trace("Already decrypted") return true } switch obj := obj.(type) { case *PdfObjectStream: - if crypt.R != 5 { + if crypt.encryptStd.R != 5 { if name, ok := obj.Get("Type").(*PdfObjectName); ok && *name == "XRef" { return true // Cross-reference streams should not be encrypted } @@ -707,9 +600,9 @@ func (crypt *PdfCrypt) isDecrypted(obj PdfObject) bool { // Decrypt a buffer with a selected crypt filter. func (crypt *PdfCrypt) decryptBytes(buf []byte, filter string, okey []byte) ([]byte, error) { common.Log.Trace("Decrypt bytes") - f, err := crypt.CryptFilters.byName(filter) - if err != nil { - return nil, err + f, ok := crypt.cryptFilters[filter] + if !ok { + return nil, fmt.Errorf("Unknown crypt filter (%s)", filter) } return f.DecryptBytes(buf, okey) } @@ -727,7 +620,7 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) switch obj := obj.(type) { case *PdfIndirectObject: - crypt.DecryptedObjects[obj] = true + crypt.decryptedObjects[obj] = true common.Log.Trace("Decrypting indirect %d %d obj!", obj.ObjectNumber, obj.GenerationNumber) @@ -741,10 +634,10 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) return nil case *PdfObjectStream: // Mark as decrypted first to avoid recursive issues. - crypt.DecryptedObjects[obj] = true + crypt.decryptedObjects[obj] = true dict := obj.PdfObjectDictionary - if crypt.R != 5 { + if crypt.encryptStd.R != 5 { if s, ok := dict.Get("Type").(*PdfObjectName); ok && *s == "XRef" { return nil // Cross-reference streams should not be encrypted } @@ -757,10 +650,10 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) // TODO: Check for crypt filter (V4). // The Crypt filter shall be the first filter in the Filter array entry. - streamFilter := StandardCryptFilter // Default RC4. - if crypt.V >= 4 { - streamFilter = crypt.StreamFilter - common.Log.Trace("this.StreamFilter = %s", crypt.StreamFilter) + streamFilter := stdCryptFilter // Default RC4. + if crypt.encrypt.V >= 4 { + streamFilter = crypt.streamFilter + common.Log.Trace("this.streamFilter = %s", crypt.streamFilter) if filters, ok := dict.Get("Filter").(*PdfObjectArray); ok { // Crypt filter can only be the first entry. @@ -773,7 +666,7 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) // Check if valid crypt filter specified in the decode params. if decodeParams, ok := dict.Get("DecodeParms").(*PdfObjectDictionary); ok { if filterName, ok := decodeParams.Get("Name").(*PdfObjectName); ok { - if _, ok := crypt.CryptFilters[string(*filterName)]; ok { + if _, ok := crypt.cryptFilters[string(*filterName)]; ok { common.Log.Trace("Using stream filter %s", *filterName) streamFilter = string(*filterName) } @@ -795,7 +688,7 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) return err } - okey, err := crypt.makeKey(streamFilter, uint32(objNum), uint32(genNum), crypt.EncryptionKey) + okey, err := crypt.makeKey(streamFilter, uint32(objNum), uint32(genNum), crypt.encryptionKey) if err != nil { return err } @@ -811,18 +704,18 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) case *PdfObjectString: common.Log.Trace("Decrypting string!") - stringFilter := StandardCryptFilter - if crypt.V >= 4 { + stringFilter := stdCryptFilter + if crypt.encrypt.V >= 4 { // Currently only support Identity / RC4. - common.Log.Trace("with %s filter", crypt.StringFilter) - if crypt.StringFilter == "Identity" { + common.Log.Trace("with %s filter", crypt.stringFilter) + if crypt.stringFilter == "Identity" { // Identity: pass unchanged: No action. return nil } - stringFilter = crypt.StringFilter + stringFilter = crypt.stringFilter } - key, err := crypt.makeKey(stringFilter, uint32(parentObjNum), uint32(parentGenNum), crypt.EncryptionKey) + key, err := crypt.makeKey(stringFilter, uint32(parentObjNum), uint32(parentGenNum), crypt.encryptionKey) if err != nil { return err } @@ -881,7 +774,7 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) // Check if object has already been processed. func (crypt *PdfCrypt) isEncrypted(obj PdfObject) bool { - _, ok := crypt.EncryptedObjects[obj] + _, ok := crypt.encryptedObjects[obj] if ok { common.Log.Trace("Already encrypted") return true @@ -894,9 +787,9 @@ func (crypt *PdfCrypt) isEncrypted(obj PdfObject) bool { // Encrypt a buffer with the specified crypt filter and key. func (crypt *PdfCrypt) encryptBytes(buf []byte, filter string, okey []byte) ([]byte, error) { common.Log.Trace("Encrypt bytes") - f, err := crypt.CryptFilters.byName(filter) - if err != nil { - return nil, err + f, ok := crypt.cryptFilters[filter] + if !ok { + return nil, fmt.Errorf("Unknown crypt filter (%s)", filter) } return f.EncryptBytes(buf, okey) } @@ -913,7 +806,7 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) } switch obj := obj.(type) { case *PdfIndirectObject: - crypt.EncryptedObjects[obj] = true + crypt.encryptedObjects[obj] = true common.Log.Trace("Encrypting indirect %d %d obj!", obj.ObjectNumber, obj.GenerationNumber) @@ -926,7 +819,7 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) } return nil case *PdfObjectStream: - crypt.EncryptedObjects[obj] = true + crypt.encryptedObjects[obj] = true dict := obj.PdfObjectDictionary if s, ok := dict.Get("Type").(*PdfObjectName); ok && *s == "XRef" { @@ -940,12 +833,12 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) // TODO: Check for crypt filter (V4). // The Crypt filter shall be the first filter in the Filter array entry. - streamFilter := StandardCryptFilter // Default RC4. - if crypt.V >= 4 { + streamFilter := stdCryptFilter // Default RC4. + if crypt.encrypt.V >= 4 { // For now. Need to change when we add support for more than // Identity / RC4. - streamFilter = crypt.StreamFilter - common.Log.Trace("this.StreamFilter = %s", crypt.StreamFilter) + streamFilter = crypt.streamFilter + common.Log.Trace("this.streamFilter = %s", crypt.streamFilter) if filters, ok := dict.Get("Filter").(*PdfObjectArray); ok { // Crypt filter can only be the first entry. @@ -958,7 +851,7 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) // Check if valid crypt filter specified in the decode params. if decodeParams, ok := dict.Get("DecodeParms").(*PdfObjectDictionary); ok { if filterName, ok := decodeParams.Get("Name").(*PdfObjectName); ok { - if _, ok := crypt.CryptFilters[string(*filterName)]; ok { + if _, ok := crypt.cryptFilters[string(*filterName)]; ok { common.Log.Trace("Using stream filter %s", *filterName) streamFilter = string(*filterName) } @@ -980,7 +873,7 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) return err } - okey, err := crypt.makeKey(streamFilter, uint32(objNum), uint32(genNum), crypt.EncryptionKey) + okey, err := crypt.makeKey(streamFilter, uint32(objNum), uint32(genNum), crypt.encryptionKey) if err != nil { return err } @@ -996,17 +889,17 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) case *PdfObjectString: common.Log.Trace("Encrypting string!") - stringFilter := StandardCryptFilter - if crypt.V >= 4 { - common.Log.Trace("with %s filter", crypt.StringFilter) - if crypt.StringFilter == "Identity" { + stringFilter := stdCryptFilter + if crypt.encrypt.V >= 4 { + common.Log.Trace("with %s filter", crypt.stringFilter) + if crypt.stringFilter == "Identity" { // Identity: pass unchanged: No action. return nil } - stringFilter = crypt.StringFilter + stringFilter = crypt.stringFilter } - key, err := crypt.makeKey(stringFilter, uint32(parentObjNum), uint32(parentGenNum), crypt.EncryptionKey) + key, err := crypt.makeKey(stringFilter, uint32(parentObjNum), uint32(parentGenNum), crypt.encryptionKey) if err != nil { return err } @@ -1062,710 +955,13 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) return nil } -// aesZeroIV allocates a zero-filled buffer that serves as an initialization vector for AESv3. -func (crypt *PdfCrypt) aesZeroIV() []byte { - if crypt.ivAESZero == nil { - crypt.ivAESZero = make([]byte, aes.BlockSize) - } - return crypt.ivAESZero -} - -// alg2a retrieves the encryption key from an encrypted document (R >= 5). -// It returns false if the password was wrong. -// 7.6.4.3.2 Algorithm 2.A (page 83) -func (crypt *PdfCrypt) alg2a(pass []byte) (bool, error) { - // O & U: 32 byte hash + 8 byte Validation Salt + 8 byte Key Salt - - // step a: Unicode normalization - // TODO(dennwc): make sure that UTF-8 strings are normalized - - // step b: truncate to 127 bytes - if len(pass) > 127 { - pass = pass[:127] - } - - // step c: test pass against the owner key - h, err := crypt.alg12(pass) +// generateParams generates encryption parameters for specified passwords. +func (crypt *PdfCrypt) generateParams(upass, opass []byte) error { + h := crypt.securityHandler() + ekey, err := h.GenerateParams(&crypt.encryptStd, opass, upass) if err != nil { - return false, err - } - var ( - data []byte // data to hash - ekey []byte // encrypted file key - ukey []byte // user key; set only when using owner's password - ) - if len(h) != 0 { - // owner password valid - - // step d: compute an intermediate owner key - str := make([]byte, len(pass)+8+48) - i := copy(str, pass) - i += copy(str[i:], crypt.O[40:48]) // owner Key Salt - i += copy(str[i:], crypt.U[0:48]) - - data = str - ekey = crypt.OE - ukey = crypt.U[0:48] - } else { - // check user password - h, err = crypt.alg11(pass) - if err == nil && len(h) == 0 { - // try default password - h, err = crypt.alg11([]byte("")) - } - if err != nil { - return false, err - } else if len(h) == 0 { - // wrong password - return false, nil - } - // step e: compute an intermediate user key - str := make([]byte, len(pass)+8) - i := copy(str, pass) - i += copy(str[i:], crypt.U[40:48]) // user Key Salt - - data = str - ekey = crypt.UE - ukey = nil - } - ekey = ekey[:32] - - // intermediate key - ikey := crypt.alg2b(data, pass, ukey) - - ac, err := aes.NewCipher(ikey[:32]) - if err != nil { - panic(err) - } - - iv := crypt.aesZeroIV() - cbc := cipher.NewCBCDecrypter(ac, iv) - fkey := make([]byte, 32) - cbc.CryptBlocks(fkey, ekey) - - crypt.EncryptionKey = fkey - - if crypt.R == 5 { - return true, nil - } - - return crypt.alg13(fkey) -} - -// alg2b computes a hash for R=5 and R=6. -func (crypt *PdfCrypt) alg2b(data, pwd, userKey []byte) []byte { - if crypt.R == 5 { - return alg2b_R5(data) - } - return alg2b(data, pwd, userKey) -} - -// alg2b_R5 computes a hash for R=5, used in a deprecated extension. -// It's used the same way as a hash described in Algorithm 2.B, but it doesn't use the original password -// and the user key to calculate the hash. -func alg2b_R5(data []byte) []byte { - h := sha256.New() - h.Write(data) - return h.Sum(nil) -} - -// repeat repeats first n bytes of buf until the end of the buffer. -// It assumes that the length of buf is a multiple of n. -func repeat(buf []byte, n int) { - bp := n - for bp < len(buf) { - copy(buf[bp:], buf[:bp]) - bp *= 2 - } -} - -// alg2b computes a hash for R=6. -// 7.6.4.3.3 Algorithm 2.B (page 83) -func alg2b(data, pwd, userKey []byte) []byte { - var ( - s256, s384, s512 hash.Hash - ) - s256 = sha256.New() - hbuf := make([]byte, 64) - - h := s256 - h.Write(data) - K := h.Sum(hbuf[:0]) - - buf := make([]byte, 64*(127+64+48)) - - round := func(rnd int) (E []byte) { - // step a: repeat pass+K 64 times - n := len(pwd) + len(K) + len(userKey) - part := buf[:n] - i := copy(part, pwd) - i += copy(part[i:], K[:]) - i += copy(part[i:], userKey) - if i != n { - panic("wrong size") - } - K1 := buf[:n*64] - repeat(K1, n) - - // step b: encrypt K1 with AES-128 CBC - ac, err := aes.NewCipher(K[0:16]) - if err != nil { - panic(err) - } - cbc := cipher.NewCBCEncrypter(ac, K[16:32]) - cbc.CryptBlocks(K1, K1) - E = K1 - - // step c: use 16 bytes of E as big-endian int, select the next hash - b := 0 - for i := 0; i < 16; i++ { - b += int(E[i] % 3) - } - var h hash.Hash - switch b % 3 { - case 0: - h = s256 - case 1: - if s384 == nil { - s384 = sha512.New384() - } - h = s384 - case 2: - if s512 == nil { - s512 = sha512.New() - } - h = s512 - } - - // step d: take the hash of E, use as a new K - h.Reset() - h.Write(E) - K = h.Sum(hbuf[:0]) - - return E - } - - for i := 0; ; { - E := round(i) - b := uint8(E[len(E)-1]) - // from the spec, it appears that i should be incremented after - // the test, but that doesn't match what Adobe does - i++ - if i >= 64 && b <= uint8(i-32) { - break - } - } - return K[:32] -} - -// alg2 computes an encryption key. -func (crypt *PdfCrypt) alg2(pass []byte) []byte { - common.Log.Trace("alg2") - key := crypt.paddedPass(pass) - - h := md5.New() - h.Write(key) - - // Pass O. - h.Write(crypt.O) - - // Pass P (Lower order byte first). - var p uint32 = uint32(crypt.P) - var pb = []byte{} - for i := 0; i < 4; i++ { - pb = append(pb, byte(((p >> uint(8*i)) & 0xff))) - } - h.Write(pb) - common.Log.Trace("go P: % x", pb) - - // Pass ID[0] from the trailer - h.Write([]byte(crypt.Id0)) - - common.Log.Trace("this.R = %d encryptMetadata %v", crypt.R, crypt.EncryptMetadata) - if (crypt.R >= 4) && !crypt.EncryptMetadata { - h.Write([]byte{0xff, 0xff, 0xff, 0xff}) - } - hashb := h.Sum(nil) - - if crypt.R >= 3 { - for i := 0; i < 50; i++ { - h = md5.New() - h.Write(hashb[0 : crypt.Length/8]) - hashb = h.Sum(nil) - } - } - - if crypt.R >= 3 { - return hashb[0 : crypt.Length/8] - } - - return hashb[0:5] -} - -// Create the RC4 encryption key. -func (crypt *PdfCrypt) alg3Key(pass []byte) []byte { - h := md5.New() - okey := crypt.paddedPass(pass) - h.Write(okey) - - if crypt.R >= 3 { - for i := 0; i < 50; i++ { - hashb := h.Sum(nil) - h = md5.New() - h.Write(hashb) - } - } - - encKey := h.Sum(nil) - if crypt.R == 2 { - encKey = encKey[0:5] - } else { - encKey = encKey[0 : crypt.Length/8] - } - return encKey -} - -// Alg3 computes the encryption dictionary’s O (owner password) value. -func (crypt *PdfCrypt) Alg3(upass, opass []byte) (string, error) { - // Return O string val. - O := "" - - var encKey []byte - if len(opass) > 0 { - encKey = crypt.alg3Key(opass) - } else { - encKey = crypt.alg3Key(upass) - } - - ociph, err := rc4.NewCipher(encKey) - if err != nil { - return O, errors.New("Failed rc4 ciph") - } - - ukey := crypt.paddedPass(upass) - encrypted := make([]byte, len(ukey)) - ociph.XORKeyStream(encrypted, ukey) - - if crypt.R >= 3 { - encKey2 := make([]byte, len(encKey)) - for i := 0; i < 19; i++ { - for j := 0; j < len(encKey); j++ { - encKey2[j] = encKey[j] ^ byte(i+1) - } - ciph, err := rc4.NewCipher(encKey2) - if err != nil { - return O, errors.New("Failed rc4 ciph") - } - ciph.XORKeyStream(encrypted, encrypted) - } - } - - O = string(encrypted) - return O, nil -} - -// alg4 computes the encryption dictionary’s U (user password) value (Security handlers of revision 2). -func (crypt *PdfCrypt) alg4(upass []byte) (string, []byte, error) { - U := "" - - ekey := crypt.alg2(upass) - ciph, err := rc4.NewCipher(ekey) - if err != nil { - return U, ekey, errors.New("Failed rc4 ciph") - } - - s := []byte(padding) - encrypted := make([]byte, len(s)) - ciph.XORKeyStream(encrypted, s) - - U = string(encrypted) - return U, ekey, nil -} - -// Alg5 computes the encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater). -func (crypt *PdfCrypt) Alg5(upass []byte) (string, []byte, error) { - U := "" - - ekey := crypt.alg2(upass) - - h := md5.New() - h.Write([]byte(padding)) - h.Write([]byte(crypt.Id0)) - hash := h.Sum(nil) - - common.Log.Trace("Alg5") - common.Log.Trace("ekey: % x", ekey) - common.Log.Trace("ID: % x", crypt.Id0) - - if len(hash) != 16 { - return U, ekey, errors.New("Hash length not 16 bytes") - } - - ciph, err := rc4.NewCipher(ekey) - if err != nil { - return U, ekey, errors.New("Failed rc4 ciph") - } - encrypted := make([]byte, 16) - ciph.XORKeyStream(encrypted, hash) - - // Do the following 19 times: Take the output from the previous - // invocation of the RC4 function and pass it as input to a new - // invocation of the function; use an encryption key generated by - // taking each byte of the original encryption key obtained in step - // (a) and performing an XOR (exclusive or) operation between that - // byte and the single-byte value of the iteration counter (from 1 to 19). - ekey2 := make([]byte, len(ekey)) - for i := 0; i < 19; i++ { - for j := 0; j < len(ekey); j++ { - ekey2[j] = ekey[j] ^ byte(i+1) - } - ciph, err = rc4.NewCipher(ekey2) - if err != nil { - return U, ekey, errors.New("Failed rc4 ciph") - } - ciph.XORKeyStream(encrypted, encrypted) - common.Log.Trace("i = %d, ekey: % x", i, ekey2) - common.Log.Trace("i = %d -> % x", i, encrypted) - } - - bb := make([]byte, 32) - for i := 0; i < 16; i++ { - bb[i] = encrypted[i] - } - - // Append 16 bytes of arbitrary padding to the output from the final - // invocation of the RC4 function and store the 32-byte result as - // the value of the U entry in the encryption dictionary. - _, err = rand.Read(bb[16:32]) - if err != nil { - return U, ekey, errors.New("Failed to gen rand number") - } - - U = string(bb) - return U, ekey, nil -} - -// alg6 authenticates the user password. -func (crypt *PdfCrypt) alg6(upass []byte) (bool, error) { - var uo string - var err error - var key []byte - if crypt.R == 2 { - uo, key, err = crypt.alg4(upass) - } else if crypt.R >= 3 { - uo, key, err = crypt.Alg5(upass) - } else { - return false, errors.New("invalid R") - } - - if err != nil { - return false, err - } - - common.Log.Trace("check: % x == % x ?", string(uo), string(crypt.U)) - - uGen := string(uo) // Generated U from specified pass. - uDoc := string(crypt.U) // U from the document. - if crypt.R >= 3 { - // comparing on the first 16 bytes in the case of security - // handlers of revision 3 or greater), - if len(uGen) > 16 { - uGen = uGen[0:16] - } - if len(uDoc) > 16 { - uDoc = uDoc[0:16] - } - } - - if uGen == uDoc { - crypt.EncryptionKey = key - return true, nil - } - - return false, nil -} - -// alg7 authenticates the owner password. -func (crypt *PdfCrypt) alg7(opass []byte) (bool, error) { - encKey := crypt.alg3Key(opass) - - decrypted := make([]byte, len(crypt.O)) - if crypt.R == 2 { - ciph, err := rc4.NewCipher(encKey) - if err != nil { - return false, errors.New("Failed cipher") - } - ciph.XORKeyStream(decrypted, crypt.O) - } else if crypt.R >= 3 { - s := append([]byte{}, crypt.O...) - for i := 0; i < 20; i++ { - //newKey := encKey - newKey := append([]byte{}, encKey...) - for j := 0; j < len(encKey); j++ { - newKey[j] ^= byte(19 - i) - } - ciph, err := rc4.NewCipher(newKey) - if err != nil { - return false, errors.New("Failed cipher") - } - ciph.XORKeyStream(decrypted, s) - s = append([]byte{}, decrypted...) - } - } else { - return false, errors.New("invalid R") - } - - auth, err := crypt.alg6(decrypted) - if err != nil { - return false, nil - } - - return auth, nil -} - -// GenerateParams generates encryption parameters for specified passwords. -// Can be called only for R>=5. -func (crypt *PdfCrypt) GenerateParams(upass, opass []byte) error { - if crypt.R < 5 { - // TODO(dennwc): move code for R<5 from PdfWriter.Encrypt - return errors.New("can be used only for R>=5") - } - crypt.EncryptionKey = make([]byte, 32) - if _, err := io.ReadFull(rand.Reader, crypt.EncryptionKey); err != nil { return err } - return crypt.generateR6(upass, opass) -} - -// generateR6 is the algorithm opposite to alg2a (R>=5). -// It generates U,O,UE,OE,Perms fields using AESv3 encryption. -// There is no algorithm number assigned to this function in the spec. -func (crypt *PdfCrypt) generateR6(upass, opass []byte) error { - // all these field will be populated by functions below - crypt.U = nil - crypt.O = nil - crypt.UE = nil - crypt.OE = nil - crypt.Perms = nil // populated only for R=6 - - if len(upass) > 127 { - upass = upass[:127] - } - if len(opass) > 127 { - opass = opass[:127] - } - // generate U and UE - if err := crypt.alg8(upass); err != nil { - return err - } - // generate O and OE - if err := crypt.alg9(opass); err != nil { - return err - } - if crypt.R == 5 { - return nil - } - // generate Perms - return crypt.alg10() -} - -// alg8 computes the encryption dictionary's U (user password) and UE (user encryption) values (R>=5). -// 7.6.4.4.6 Algorithm 8 (page 86) -func (crypt *PdfCrypt) alg8(upass []byte) error { - // step a: compute U (user password) - var rbuf [16]byte - if _, err := io.ReadFull(rand.Reader, rbuf[:]); err != nil { - return err - } - valSalt := rbuf[0:8] - keySalt := rbuf[8:16] - - str := make([]byte, len(upass)+len(valSalt)) - i := copy(str, upass) - i += copy(str[i:], valSalt) - - h := crypt.alg2b(str, upass, nil) - - U := make([]byte, len(h)+len(valSalt)+len(keySalt)) - i = copy(U, h[:32]) - i += copy(U[i:], valSalt) - i += copy(U[i:], keySalt) - - crypt.U = U - - // step b: compute UE (user encryption) - - // str still contains a password, reuse it - i = len(upass) - i += copy(str[i:], keySalt) - - h = crypt.alg2b(str, upass, nil) - - ac, err := aes.NewCipher(h[:32]) - if err != nil { - panic(err) - } - - iv := crypt.aesZeroIV() - cbc := cipher.NewCBCEncrypter(ac, iv) - UE := make([]byte, 32) - cbc.CryptBlocks(UE, crypt.EncryptionKey[:32]) - crypt.UE = UE - + crypt.encryptionKey = ekey return nil } - -// alg9 computes the encryption dictionary's O (owner password) and OE (owner encryption) values (R>=5). -// 7.6.4.4.7 Algorithm 9 (page 86) -func (crypt *PdfCrypt) alg9(opass []byte) error { - // step a: compute O (owner password) - var rbuf [16]byte - if _, err := io.ReadFull(rand.Reader, rbuf[:]); err != nil { - return err - } - valSalt := rbuf[0:8] - keySalt := rbuf[8:16] - userKey := crypt.U[:48] - - str := make([]byte, len(opass)+len(valSalt)+len(userKey)) - i := copy(str, opass) - i += copy(str[i:], valSalt) - i += copy(str[i:], userKey) - - h := crypt.alg2b(str, opass, userKey) - - O := make([]byte, len(h)+len(valSalt)+len(keySalt)) - i = copy(O, h[:32]) - i += copy(O[i:], valSalt) - i += copy(O[i:], keySalt) - - crypt.O = O - - // step b: compute OE (owner encryption) - - // str still contains a password and a user key - reuse both, but overwrite the salt - i = len(opass) - i += copy(str[i:], keySalt) - // i += len(userKey) - - h = crypt.alg2b(str, opass, userKey) - - ac, err := aes.NewCipher(h[:32]) - if err != nil { - panic(err) - } - - iv := crypt.aesZeroIV() - cbc := cipher.NewCBCEncrypter(ac, iv) - OE := make([]byte, 32) - cbc.CryptBlocks(OE, crypt.EncryptionKey[:32]) - crypt.OE = OE - - return nil -} - -// alg10 computes the encryption dictionary's Perms (permissions) value (R=6). -// 7.6.4.4.8 Algorithm 10 (page 87) -func (crypt *PdfCrypt) alg10() error { - // step a: extend permissions to 64 bits - perms := uint64(uint32(crypt.P)) | (math.MaxUint32 << 32) - - // step b: record permissions - Perms := make([]byte, 16) - binary.LittleEndian.PutUint64(Perms[:8], perms) - - // step c: record EncryptMetadata - if crypt.EncryptMetadata { - Perms[8] = 'T' - } else { - Perms[8] = 'F' - } - - // step d: write "adb" magic - copy(Perms[9:12], "adb") - - // step e: write 4 bytes of random data - - // spec doesn't specify them as generated "from a strong random source", - // but we will use the cryptographic random generator anyway - if _, err := io.ReadFull(rand.Reader, Perms[12:16]); err != nil { - return err - } - - // step f: encrypt permissions - ac, err := aes.NewCipher(crypt.EncryptionKey[:32]) - if err != nil { - panic(err) - } - - ecb := newECBEncrypter(ac) - ecb.CryptBlocks(Perms, Perms) - - crypt.Perms = Perms[:16] - return nil -} - -// alg11 authenticates the user password (R >= 5) and returns the hash. -func (crypt *PdfCrypt) alg11(upass []byte) ([]byte, error) { - str := make([]byte, len(upass)+8) - i := copy(str, upass) - i += copy(str[i:], crypt.U[32:40]) // user Validation Salt - - h := crypt.alg2b(str, upass, nil) - h = h[:32] - if !bytes.Equal(h, crypt.U[:32]) { - return nil, nil - } - return h, nil -} - -// alg12 authenticates the owner password (R >= 5) and returns the hash. -// 7.6.4.4.10 Algorithm 12 (page 87) -func (crypt *PdfCrypt) alg12(opass []byte) ([]byte, error) { - str := make([]byte, len(opass)+8+48) - i := copy(str, opass) - i += copy(str[i:], crypt.O[32:40]) // owner Validation Salt - i += copy(str[i:], crypt.U[0:48]) - - h := crypt.alg2b(str, opass, crypt.U[0:48]) - h = h[:32] - if !bytes.Equal(h, crypt.O[:32]) { - return nil, nil - } - return h, nil -} - -// alg13 validates user permissions (P+EncryptMetadata vs Perms) for R=6. -// 7.6.4.4.11 Algorithm 13 (page 87) -func (crypt *PdfCrypt) alg13(fkey []byte) (bool, error) { - perms := make([]byte, 16) - copy(perms, crypt.Perms[:16]) - - ac, err := aes.NewCipher(fkey[:32]) - if err != nil { - panic(err) - } - - ecb := newECBDecrypter(ac) - ecb.CryptBlocks(perms, perms) - - if !bytes.Equal(perms[9:12], []byte("adb")) { - return false, errors.New("decoded permissions are invalid") - } - p := int(int32(binary.LittleEndian.Uint32(perms[0:4]))) - if p != crypt.P { - return false, errors.New("permissions validation failed") - } - encMeta := true - if perms[8] == 'T' { - encMeta = true - } else if perms[8] == 'F' { - encMeta = false - } else { - return false, errors.New("decoded metadata encryption flag is invalid") - } - if encMeta != crypt.EncryptMetadata { - return false, errors.New("metadata encryption validation failed") - } - return true, nil -} diff --git a/pdf/core/crypt_filters.go b/pdf/core/crypt_filters.go deleted file mode 100644 index 1e68a409..00000000 --- a/pdf/core/crypt_filters.go +++ /dev/null @@ -1,266 +0,0 @@ -package core - -import ( - "crypto/aes" - "crypto/cipher" - "crypto/md5" - "crypto/rand" - "crypto/rc4" - "fmt" - "io" - - "github.com/unidoc/unidoc/common" -) - -var ( - cryptMethods = make(map[string]cryptFilterMethod) -) - -// registerCryptFilterMethod registers a CFM. -func registerCryptFilterMethod(m cryptFilterMethod) { - cryptMethods[m.CFM()] = m -} - -// getCryptFilterMethod check if a CFM with a specified name is supported an returns its implementation. -func getCryptFilterMethod(name string) (cryptFilterMethod, error) { - f := cryptMethods[name] - if f == nil { - return nil, fmt.Errorf("unsupported crypt filter: %q", name) - } - return f, nil -} - -func init() { - // register supported crypt filter methods - registerCryptFilterMethod(cryptFilterV2{}) - registerCryptFilterMethod(cryptFilterAESV2{}) - registerCryptFilterMethod(cryptFilterAESV3{}) -} - -// cryptFilterMethod is a common interface for crypt filter methods. -type cryptFilterMethod interface { - // CFM returns a name of the filter that should be used in CFM field of Encrypt dictionary. - CFM() string - // MakeKey generates a object encryption key based on file encryption key and object numbers. - // Used only for legacy filters - AESV3 doesn't change the key for each object. - MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error) - // EncryptBytes encrypts a buffer using object encryption key, as returned by MakeKey. - // Implementation may reuse a buffer and encrypt data in-place. - EncryptBytes(p []byte, okey []byte) ([]byte, error) - // DecryptBytes decrypts a buffer using object encryption key, as returned by MakeKey. - // Implementation may reuse a buffer and decrypt data in-place. - DecryptBytes(p []byte, okey []byte) ([]byte, error) -} - -// makeKeyV2 is a common object key generation shared by V2 and AESV2 crypt filters. -func makeKeyV2(objNum, genNum uint32, ekey []byte, isAES bool) ([]byte, error) { - key := make([]byte, len(ekey)+5) - for i := 0; i < len(ekey); i++ { - key[i] = ekey[i] - } - for i := 0; i < 3; i++ { - b := byte((objNum >> uint32(8*i)) & 0xff) - key[i+len(ekey)] = b - } - for i := 0; i < 2; i++ { - b := byte((genNum >> uint32(8*i)) & 0xff) - key[i+len(ekey)+3] = b - } - if isAES { - // If using the AES algorithm, extend the encryption key an - // additional 4 bytes by adding the value “sAlT”, which - // corresponds to the hexadecimal values 0x73, 0x41, 0x6C, 0x54. - key = append(key, 0x73) - key = append(key, 0x41) - key = append(key, 0x6C) - key = append(key, 0x54) - } - - // Take the MD5. - h := md5.New() - h.Write(key) - hashb := h.Sum(nil) - - if len(ekey)+5 < 16 { - return hashb[0 : len(ekey)+5], nil - } - - return hashb, nil -} - -// cryptFilterV2 is a RC4-based filter -type cryptFilterV2 struct{} - -func (cryptFilterV2) CFM() string { - return CryptFilterV2 -} - -func (f cryptFilterV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) { - return makeKeyV2(objNum, genNum, ekey, false) -} - -func (cryptFilterV2) EncryptBytes(buf []byte, okey []byte) ([]byte, error) { - // Standard RC4 algorithm. - ciph, err := rc4.NewCipher(okey) - if err != nil { - return nil, err - } - common.Log.Trace("RC4 Encrypt: % x", buf) - ciph.XORKeyStream(buf, buf) - common.Log.Trace("to: % x", buf) - return buf, nil -} - -func (cryptFilterV2) DecryptBytes(buf []byte, okey []byte) ([]byte, error) { - // Standard RC4 algorithm. - ciph, err := rc4.NewCipher(okey) - if err != nil { - return nil, err - } - common.Log.Trace("RC4 Decrypt: % x", buf) - ciph.XORKeyStream(buf, buf) - common.Log.Trace("to: % x", buf) - return buf, nil -} - -// cryptFilterAES implements a generic AES encryption and decryption algorithm used by AESV2 and AESV3 filter methods. -type cryptFilterAES struct{} - -func (cryptFilterAES) EncryptBytes(buf []byte, okey []byte) ([]byte, error) { - // Strings and streams encrypted with AES shall use a padding - // scheme that is described in Internet RFC 2898, PKCS #5: - // Password-Based Cryptography Specification Version 2.0; see - // the Bibliography. For an original message length of M, - // the pad shall consist of 16 - (M mod 16) bytes whose value - // shall also be 16 - (M mod 16). - // - // A 9-byte message has a pad of 7 bytes, each with the value - // 0x07. The pad can be unambiguously removed to determine the - // original message length when decrypting. Note that the pad is - // present when M is evenly divisible by 16; it contains 16 bytes - // of 0x10. - - ciph, err := aes.NewCipher(okey) - if err != nil { - return nil, err - } - - common.Log.Trace("AES Encrypt (%d): % x", len(buf), buf) - - // If using the AES algorithm, the Cipher Block Chaining (CBC) - // mode, which requires an initialization vector, is used. The - // block size parameter is set to 16 bytes, and the initialization - // vector is a 16-byte random number that is stored as the first - // 16 bytes of the encrypted stream or string. - - const block = aes.BlockSize // 16 - - pad := block - len(buf)%block - for i := 0; i < pad; i++ { - buf = append(buf, byte(pad)) - } - common.Log.Trace("Padded to %d bytes", len(buf)) - - // Generate random 16 bytes, place in beginning of buffer. - ciphertext := make([]byte, block+len(buf)) - iv := ciphertext[:block] - if _, err := io.ReadFull(rand.Reader, iv); err != nil { - return nil, err - } - - mode := cipher.NewCBCEncrypter(ciph, iv) - mode.CryptBlocks(ciphertext[block:], buf) - - buf = ciphertext - common.Log.Trace("to (%d): % x", len(buf), buf) - - return buf, nil -} - -func (cryptFilterAES) DecryptBytes(buf []byte, okey []byte) ([]byte, error) { - // Strings and streams encrypted with AES shall use a padding - // scheme that is described in Internet RFC 2898, PKCS #5: - // Password-Based Cryptography Specification Version 2.0; see - // the Bibliography. For an original message length of M, - // the pad shall consist of 16 - (M mod 16) bytes whose value - // shall also be 16 - (M mod 16). - // - // A 9-byte message has a pad of 7 bytes, each with the value - // 0x07. The pad can be unambiguously removed to determine the - // original message length when decrypting. Note that the pad is - // present when M is evenly divisible by 16; it contains 16 bytes - // of 0x10. - - ciph, err := aes.NewCipher(okey) - if err != nil { - return nil, err - } - - // If using the AES algorithm, the Cipher Block Chaining (CBC) - // mode, which requires an initialization vector, is used. The - // block size parameter is set to 16 bytes, and the initialization - // vector is a 16-byte random number that is stored as the first - // 16 bytes of the encrypted stream or string. - if len(buf) < 16 { - common.Log.Debug("ERROR AES invalid buf %s", buf) - return buf, fmt.Errorf("AES: Buf len < 16 (%d)", len(buf)) - } - - iv := buf[:16] - buf = buf[16:] - - if len(buf)%16 != 0 { - common.Log.Debug(" iv (%d): % x", len(iv), iv) - common.Log.Debug("buf (%d): % x", len(buf), buf) - return buf, fmt.Errorf("AES buf length not multiple of 16 (%d)", len(buf)) - } - - mode := cipher.NewCBCDecrypter(ciph, iv) - - common.Log.Trace("AES Decrypt (%d): % x", len(buf), buf) - common.Log.Trace("chop AES Decrypt (%d): % x", len(buf), buf) - mode.CryptBlocks(buf, buf) - common.Log.Trace("to (%d): % x", len(buf), buf) - - if len(buf) == 0 { - common.Log.Trace("Empty buf, returning empty string") - return buf, nil - } - - // The padded length is indicated by the last values. Remove those. - - padLen := int(buf[len(buf)-1]) - if padLen >= len(buf) { - common.Log.Debug("Illegal pad length") - return buf, fmt.Errorf("Invalid pad length") - } - buf = buf[:len(buf)-padLen] - - return buf, nil -} - -// cryptFilterAESV2 is an AES-based filter (128 bit key, PDF 1.6) -type cryptFilterAESV2 struct { - cryptFilterAES -} - -func (cryptFilterAESV2) CFM() string { - return CryptFilterAESV2 -} - -func (cryptFilterAESV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) { - return makeKeyV2(objNum, genNum, ekey, true) -} - -// cryptFilterAESV3 is an AES-based filter (256 bit key, PDF 2.0) -type cryptFilterAESV3 struct { - cryptFilterAES -} - -func (cryptFilterAESV3) CFM() string { - return CryptFilterAESV3 -} - -func (cryptFilterAESV3) MakeKey(_, _ uint32, ekey []byte) ([]byte, error) { - return ekey, nil -} diff --git a/pdf/core/crypt_test.go b/pdf/core/crypt_test.go index bc5e577e..6cb56a62 100644 --- a/pdf/core/crypt_test.go +++ b/pdf/core/crypt_test.go @@ -8,162 +8,42 @@ package core import ( - "bytes" - "fmt" - "math" - "math/rand" - "strings" "testing" - "time" "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core/security" ) func init() { common.SetLogger(common.ConsoleLogger{}) } -func TestPadding(t *testing.T) { - crypter := PdfCrypt{} - - // Case 1 empty pass, should match padded string. - key := crypter.paddedPass([]byte("")) - if len(key) != 32 { - t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key)) - } - if key[0] != 0x28 { - t.Errorf("key[0] != 0x28 (%q in %q)", key[0], key) - } - if key[31] != 0x7A { - t.Errorf("key[31] != 0x7A (%q in %q)", key[31], key) - } - - // Case 2, non empty pass. - key = crypter.paddedPass([]byte("bla")) - if len(key) != 32 { - t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key)) - } - if string(key[0:3]) != "bla" { - t.Errorf("Expecting start with bla (%s)", key) - } - if key[3] != 0x28 { - t.Errorf("key[3] != 0x28 (%q in %q)", key[3], key) - } - if key[31] != 0x64 { - t.Errorf("key[31] != 0x64 (%q in %q)", key[31], key) - } -} - -// Test algorithm 2. -func TestAlg2(t *testing.T) { - crypter := PdfCrypt{} - crypter.V = 2 - crypter.R = 3 - crypter.P = -3904 - crypter.Id0 = string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24, - 0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}) - crypter.O = []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, - 0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, - 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, - 0x72, 0x6A, 0x8C, 0xDB} - crypter.Length = 128 - crypter.EncryptMetadata = true - - key := crypter.alg2([]byte("")) - - keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca, - 0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5} - - if string(key) != string(keyExp) { - common.Log.Debug(" Key (%d): % x", len(key), key) - common.Log.Debug("KeyExp (%d): % x", len(keyExp), keyExp) - t.Errorf("alg2 -> key != expected\n") - } - -} - -// Test algorithm 3. -func TestAlg3(t *testing.T) { - crypter := PdfCrypt{} - crypter.V = 2 - crypter.R = 3 - crypter.P = -3904 - crypter.Id0 = string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24, - 0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}) - Oexp := []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, - 0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, - 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, - 0x72, 0x6A, 0x8C, 0xDB} - crypter.Length = 128 - crypter.EncryptMetadata = true - - O, err := crypter.Alg3([]byte(""), []byte("test")) - if err != nil { - t.Errorf("crypt alg3 error %s", err) - return - } - - if string(O) != string(Oexp) { - common.Log.Debug(" O (%d): % x", len(O), O) - common.Log.Debug("Oexp (%d): % x", len(Oexp), Oexp) - t.Errorf("alg3 -> key != expected") - } -} - -// Test algorithm 5 for computing dictionary's U (user password) value -// valid for R >= 3. -func TestAlg5(t *testing.T) { - crypter := PdfCrypt{} - crypter.V = 2 - crypter.R = 3 - crypter.P = -3904 - crypter.Id0 = string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24, - 0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}) - crypter.O = []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, - 0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, - 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, - 0x72, 0x6A, 0x8C, 0xDB} - crypter.Length = 128 - crypter.EncryptMetadata = true - - U, _, err := crypter.Alg5([]byte("")) - if err != nil { - t.Errorf("Error %s", err) - return - } - - Uexp := []byte{0x59, 0x66, 0x38, 0x6c, 0x76, 0xfe, 0x95, 0x7d, 0x3d, - 0x0d, 0x14, 0x3d, 0x36, 0xfd, 0x01, 0x3d, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} - - if string(U[0:16]) != string(Uexp[0:16]) { - common.Log.Info(" U (%d): % x", len(U), U) - common.Log.Info("Uexp (%d): % x", len(Uexp), Uexp) - t.Errorf("U != expected\n") - } -} - // Test decrypting. Example with V=2, R=3, using standard algorithm. func TestDecryption1(t *testing.T) { - crypter := PdfCrypt{} - crypter.DecryptedObjects = map[PdfObject]bool{} - // Default algorithm is V2 (RC4). - crypter.CryptFilters = newCryptFiltersV2(crypter.Length) - crypter.V = 2 - crypter.R = 3 - crypter.P = -3904 - crypter.Id0 = string([]byte{0x5f, 0x91, 0xff, 0xf2, 0x00, 0x88, 0x13, - 0x5f, 0x30, 0x24, 0xd1, 0x0f, 0x28, 0x31, 0xc6, 0xfa}) - crypter.O = []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, - 0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, - 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, - 0x72, 0x6A, 0x8C, 0xDB} - crypter.U = []byte{0xED, 0x5B, 0xA7, 0x76, 0xFD, 0xD8, 0xE3, 0x89, - 0x4F, 0x54, 0x05, 0xC1, 0x3B, 0xFD, 0x86, 0xCF, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00} - crypter.Length = 128 - crypter.EncryptMetadata = true + crypter := PdfCrypt{ + encrypt: encryptDict{ + V: 2, + Length: 128, + }, + encryptStd: security.StdEncryptDict{ + R: 3, + P: 0xfffff0c0, + EncryptMetadata: true, + O: []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, + 0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, + 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, + 0x72, 0x6A, 0x8C, 0xDB}, + U: []byte{0xED, 0x5B, 0xA7, 0x76, 0xFD, 0xD8, 0xE3, 0x89, + 0x4F, 0x54, 0x05, 0xC1, 0x3B, 0xFD, 0x86, 0xCF, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00}, + }, + id0: string([]byte{0x5f, 0x91, 0xff, 0xf2, 0x00, 0x88, 0x13, + 0x5f, 0x30, 0x24, 0xd1, 0x0f, 0x28, 0x31, 0xc6, 0xfa}), + // Default algorithm is V2 (RC4). + cryptFilters: newCryptFiltersV2(128), + decryptedObjects: make(map[PdfObject]bool), + } streamData := []byte{0xBC, 0x89, 0x86, 0x8B, 0x3E, 0xCF, 0x24, 0x1C, 0xC4, 0x88, 0xF3, 0x60, 0x74, 0x8A, 0x22, 0xE3, 0xAD, 0xF4, 0x48, @@ -215,117 +95,3 @@ func TestDecryption1(t *testing.T) { return } } - -func BenchmarkAlg2b(b *testing.B) { - // hash runs a variable number of rounds, so we need to have a - // deterministic random source to make benchmark results comparable - r := rand.New(rand.NewSource(1234567)) - const n = 20 - pass := make([]byte, n) - r.Read(pass) - data := make([]byte, n+8+48) - r.Read(data) - user := make([]byte, 48) - r.Read(user) - - b.ResetTimer() - b.ReportAllocs() - for i := 0; i < b.N; i++ { - _ = alg2b(data, pass, user) - } -} - -func TestAESv3(t *testing.T) { - const keySize = 32 - - seed := time.Now().UnixNano() - rand := rand.New(rand.NewSource(seed)) - - var cases = []struct { - Name string - EncMeta bool - UserPass string - OwnerPass string - }{ - { - Name: "simple", EncMeta: true, - UserPass: "user", OwnerPass: "owner", - }, - { - Name: "utf8", EncMeta: false, - UserPass: "æøå-u", OwnerPass: "æøå-o", - }, - { - Name: "long", EncMeta: true, - UserPass: strings.Repeat("user", 80), - OwnerPass: strings.Repeat("owner", 80), - }, - } - - const ( - perms = 0x12345678 - ) - - for _, R := range []int{5, 6} { - R := R - t.Run(fmt.Sprintf("R=%d", R), func(t *testing.T) { - for _, c := range cases { - c := c - t.Run(c.Name, func(t *testing.T) { - fkey := make([]byte, keySize) - rand.Read(fkey) - - crypt := &PdfCrypt{ - V: 5, R: R, - P: perms, - EncryptionKey: append([]byte{}, fkey...), - EncryptMetadata: c.EncMeta, - } - - // generate encryption parameters - err := crypt.generateR6([]byte(c.UserPass), []byte(c.OwnerPass)) - if err != nil { - t.Fatal("Failed to encrypt:", err) - } - - // Perms and EncryptMetadata are checked as a part of alg2a - - // decrypt using user password - crypt.EncryptionKey = nil - ok, err := crypt.alg2a([]byte(c.UserPass)) - if err != nil || !ok { - t.Error("Failed to authenticate user pass:", err) - } else if !bytes.Equal(crypt.EncryptionKey, fkey) { - t.Error("wrong encryption key") - } - - // decrypt using owner password - crypt.EncryptionKey = nil - ok, err = crypt.alg2a([]byte(c.OwnerPass)) - if err != nil || !ok { - t.Error("Failed to authenticate owner pass:", err) - } else if !bytes.Equal(crypt.EncryptionKey, fkey) { - t.Error("wrong encryption key") - } - - // try to elevate user permissions - crypt.P = math.MaxUint32 - - crypt.EncryptionKey = nil - ok, err = crypt.alg2a([]byte(c.UserPass)) - if R == 5 { - // it's actually possible with R=5, since Perms is not generated - if err != nil || !ok { - t.Error("Failed to authenticate user pass:", err) - } - } else { - // not possible in R=6, should return an error - if err == nil || ok { - t.Error("was able to elevate permissions with R=6") - } - } - }) - } - }) - } -} diff --git a/pdf/core/parser.go b/pdf/core/parser.go index f98adc82..e56ec1f2 100755 --- a/pdf/core/parser.go +++ b/pdf/core/parser.go @@ -18,6 +18,7 @@ import ( "strings" "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core/security" ) // Regular Expressions for parsing and identifying object signatures. @@ -53,7 +54,7 @@ type PdfParser struct { streamLengthReferenceLookupInProgress map[int64]bool } -// Version holds the PDF version information for a file parsed by PdfParser +// Version represents a version of a PDF standard. type Version struct { Major int Minor int @@ -76,7 +77,7 @@ func (parser *PdfParser) GetCrypter() *PdfCrypt { // IsAuthenticated returns true if the PDF has already been authenticated for accessing. func (parser *PdfParser) IsAuthenticated() bool { - return parser.crypter.Authenticated + return parser.crypter.authenticated } // GetTrailer returns the PDFs trailer dictionary. The trailer dictionary is typically the starting point for a PDF, @@ -1604,7 +1605,7 @@ func (parser *PdfParser) IsEncrypted() (bool, error) { return false, fmt.Errorf("unsupported type: %T", e) } - crypter, err := PdfCryptMakeNew(parser, dict, parser.trailer) + crypter, err := PdfCryptNewDecrypt(parser, dict, parser.trailer) if err != nil { return false, err } @@ -1618,11 +1619,11 @@ func (parser *PdfParser) IsEncrypted() (bool, error) { case *PdfObjectReference: crypter.decryptedObjNum[int(f.ObjectNumber)] = struct{}{} case *PdfIndirectObject: - crypter.DecryptedObjects[f] = true + crypter.decryptedObjects[f] = true crypter.decryptedObjNum[int(f.ObjectNumber)] = struct{}{} } } - parser.crypter = &crypter + parser.crypter = crypter common.Log.Trace("Crypter object %b", crypter) return true, nil } @@ -1642,6 +1643,7 @@ func (parser *PdfParser) Decrypt(password []byte) (bool, error) { } if !authenticated { + // TODO(dennwc): R6 handler will try it automatically, make R4 do the same authenticated, err = parser.crypter.authenticate([]byte("")) } @@ -1654,21 +1656,11 @@ func (parser *PdfParser) Decrypt(password []byte) (bool, error) { // The bool flag indicates that the user can access and view the file. // The AccessPermissions shows what access the user has for editing etc. // An error is returned if there was a problem performing the authentication. -func (parser *PdfParser) CheckAccessRights(password []byte) (bool, AccessPermissions, error) { +func (parser *PdfParser) CheckAccessRights(password []byte) (bool, security.Permissions, error) { // Also build the encryption/decryption key. if parser.crypter == nil { // If the crypter is not set, the file is not encrypted and we can assume full access permissions. - perms := AccessPermissions{} - perms.Printing = true - perms.Modify = true - perms.FillForms = true - perms.RotateInsert = true - perms.ExtractGraphics = true - perms.DisabilityExtract = true - perms.Annotate = true - perms.FullPrintQuality = true - return true, perms, nil + return true, security.PermOwner, nil } - return parser.crypter.checkAccessRights(password) } diff --git a/pdf/core/primitives.go b/pdf/core/primitives.go index 75614280..08553550 100644 --- a/pdf/core/primitives.go +++ b/pdf/core/primitives.go @@ -78,6 +78,13 @@ type PdfObjectStream struct { Stream []byte } +// PdfObjectStreams represents the primitive PDF object streams. +// 7.5.7 Object Streams (page 45). +type PdfObjectStreams struct { + PdfObjectReference + vec []PdfObject +} + // MakeDict creates and returns an empty PdfObjectDictionary. func MakeDict() *PdfObjectDictionary { d := &PdfObjectDictionary{} @@ -219,6 +226,16 @@ func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error return stream, nil } +// MakeObjectStreams creates an PdfObjectStreams from a list of PdfObjects. +func MakeObjectStreams(objects ...PdfObject) *PdfObjectStreams { + streams := &PdfObjectStreams{} + streams.vec = []PdfObject{} + for _, obj := range objects { + streams.vec = append(streams.vec, obj) + } + return streams +} + // String returns the state of the bool as "true" or "false". func (bool *PdfObjectBool) String() string { if *bool { @@ -617,6 +634,16 @@ func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject { return val } +// GetString is a helper for Get that returns a string value. +// Returns false if the key is missing or a value is not a string. +func (d *PdfObjectDictionary) GetString(key PdfObjectName) (string, bool) { + val, ok := d.dict[key].(*PdfObjectString) + if !ok { + return "", false + } + return val.Str(), true +} + // Keys returns the list of keys in the dictionary. func (d *PdfObjectDictionary) Keys() []PdfObjectName { return d.keys @@ -896,3 +923,53 @@ func GetStream(obj PdfObject) (stream *PdfObjectStream, found bool) { stream, found = obj.(*PdfObjectStream) return stream, found } + +// GetObjectStreams returns the *PdfObjectStreams represented by the PdfObject. On type mismatch the found bool flag is +// false and a nil pointer is returned. +func GetObjectStreams(obj PdfObject) (objStream *PdfObjectStreams, found bool) { + objStream, found = obj.(*PdfObjectStreams) + return objStream, found +} + +// Append appends PdfObject(s) to the streams. +func (streams *PdfObjectStreams) Append(objects ...PdfObject) { + if streams == nil { + common.Log.Debug("Warn - Attempt to append to a nil streams") + return + } + if streams.vec == nil { + streams.vec = []PdfObject{} + } + + for _, obj := range objects { + streams.vec = append(streams.vec, obj) + } +} + +// Elements returns a slice of the PdfObject elements in the array. +// Preferred over accessing the array directly as type may be changed in future major versions (v3). +func (streams *PdfObjectStreams) Elements() []PdfObject { + if streams == nil { + return nil + } + return streams.vec +} + +// String returns a string describing `streams`. +func (streams *PdfObjectStreams) String() string { + return fmt.Sprintf("Object stream %d", streams.ObjectNumber) +} + +// Len returns the number of elements in the streams. +func (streams *PdfObjectStreams) Len() int { + if streams == nil { + return 0 + } + return len(streams.vec) +} + +// DefaultWriteString outputs the object as it is to be written to file. +func (streams *PdfObjectStreams) DefaultWriteString() string { + outStr := fmt.Sprintf("%d 0 R", (*streams).ObjectNumber) + return outStr +} diff --git a/pdf/core/security/auth.go b/pdf/core/security/auth.go new file mode 100644 index 00000000..c199c410 --- /dev/null +++ b/pdf/core/security/auth.go @@ -0,0 +1,16 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +// AuthEvent is an event type that triggers authentication. +type AuthEvent string + +const ( + // EventDocOpen is an event triggered when opening the document. + EventDocOpen = AuthEvent("DocOpen") + // EventEFOpen is an event triggered when accessing an embedded file. + EventEFOpen = AuthEvent("EFOpen") +) diff --git a/pdf/core/security/crypt/filter_aesv2.go b/pdf/core/security/crypt/filter_aesv2.go new file mode 100644 index 00000000..dc6b95d7 --- /dev/null +++ b/pdf/core/security/crypt/filter_aesv2.go @@ -0,0 +1,61 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package crypt + +import "fmt" + +func init() { + registerFilter("AESV2", newFilterAESV2) +} + +// NewFilterAESV2 creates an AES-based filter with a 128 bit key (AESV2). +func NewFilterAESV2() Filter { + f, err := newFilterAESV2(FilterDict{}) + if err != nil { + panic(err) + } + return f +} + +func newFilterAESV2(d FilterDict) (Filter, error) { + if d.Length != 0 && d.Length != 16 { + return nil, fmt.Errorf("Invalid AESV2 crypt filter length (%d)", d.Length) + } + return filterAESV2{}, nil +} + +var _ Filter = filterAESV2{} + +// filterAESV2 is an AES-based filter (128 bit key, PDF 1.6) +type filterAESV2 struct { + filterAES +} + +// PDFVersion implements Filter interface. +func (filterAESV2) PDFVersion() [2]int { + return [2]int{1, 5} +} + +// HandlerVersion implements Filter interface. +func (filterAESV2) HandlerVersion() (V, R int) { + V, R = 4, 4 + return +} + +// Name implements Filter interface. +func (filterAESV2) Name() string { + return "AESV2" +} + +// KeyLength implements Filter interface. +func (filterAESV2) KeyLength() int { + return 128 / 8 +} + +// MakeKey implements Filter interface. +func (filterAESV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) { + return makeKeyV2(objNum, genNum, ekey, true) +} diff --git a/pdf/core/security/crypt/filter_aesv3.go b/pdf/core/security/crypt/filter_aesv3.go new file mode 100644 index 00000000..5af52b39 --- /dev/null +++ b/pdf/core/security/crypt/filter_aesv3.go @@ -0,0 +1,185 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package crypt + +import ( + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "fmt" + "io" + + "github.com/unidoc/unidoc/common" +) + +func init() { + registerFilter("AESV3", newFilterAESV3) +} + +// NewFilterAESV3 creates an AES-based filter with a 256 bit key (AESV3). +func NewFilterAESV3() Filter { + f, err := newFilterAESV3(FilterDict{}) + if err != nil { + panic(err) + } + return f +} + +func newFilterAESV3(d FilterDict) (Filter, error) { + if d.Length != 0 && d.Length != 32 { + return nil, fmt.Errorf("Invalid AESV3 crypt filter length (%d)", d.Length) + } + return filterAESV3{}, nil +} + +// filterAES implements a generic AES encryption and decryption algorithm used by AESV2 and AESV3 filter methods. +type filterAES struct{} + +func (filterAES) EncryptBytes(buf []byte, okey []byte) ([]byte, error) { + // Strings and streams encrypted with AES shall use a padding + // scheme that is described in Internet RFC 2898, PKCS #5: + // Password-Based Cryptography Specification Version 2.0; see + // the Bibliography. For an original message length of M, + // the pad shall consist of 16 - (M mod 16) bytes whose value + // shall also be 16 - (M mod 16). + // + // A 9-byte message has a pad of 7 bytes, each with the value + // 0x07. The pad can be unambiguously removed to determine the + // original message length when decrypting. Note that the pad is + // present when M is evenly divisible by 16; it contains 16 bytes + // of 0x10. + + ciph, err := aes.NewCipher(okey) + if err != nil { + return nil, err + } + + common.Log.Trace("AES Encrypt (%d): % x", len(buf), buf) + + // If using the AES algorithm, the Cipher Block Chaining (CBC) + // mode, which requires an initialization vector, is used. The + // block size parameter is set to 16 bytes, and the initialization + // vector is a 16-byte random number that is stored as the first + // 16 bytes of the encrypted stream or string. + + const block = aes.BlockSize // 16 + + pad := block - len(buf)%block + for i := 0; i < pad; i++ { + buf = append(buf, byte(pad)) + } + common.Log.Trace("Padded to %d bytes", len(buf)) + + // Generate random 16 bytes, place in beginning of buffer. + ciphertext := make([]byte, block+len(buf)) + iv := ciphertext[:block] + if _, err := io.ReadFull(rand.Reader, iv); err != nil { + return nil, err + } + + mode := cipher.NewCBCEncrypter(ciph, iv) + mode.CryptBlocks(ciphertext[block:], buf) + + buf = ciphertext + common.Log.Trace("to (%d): % x", len(buf), buf) + + return buf, nil +} + +func (filterAES) DecryptBytes(buf []byte, okey []byte) ([]byte, error) { + // Strings and streams encrypted with AES shall use a padding + // scheme that is described in Internet RFC 2898, PKCS #5: + // Password-Based Cryptography Specification Version 2.0; see + // the Bibliography. For an original message length of M, + // the pad shall consist of 16 - (M mod 16) bytes whose value + // shall also be 16 - (M mod 16). + // + // A 9-byte message has a pad of 7 bytes, each with the value + // 0x07. The pad can be unambiguously removed to determine the + // original message length when decrypting. Note that the pad is + // present when M is evenly divisible by 16; it contains 16 bytes + // of 0x10. + + ciph, err := aes.NewCipher(okey) + if err != nil { + return nil, err + } + + // If using the AES algorithm, the Cipher Block Chaining (CBC) + // mode, which requires an initialization vector, is used. The + // block size parameter is set to 16 bytes, and the initialization + // vector is a 16-byte random number that is stored as the first + // 16 bytes of the encrypted stream or string. + if len(buf) < 16 { + common.Log.Debug("ERROR AES invalid buf %s", buf) + return buf, fmt.Errorf("AES: Buf len < 16 (%d)", len(buf)) + } + + iv := buf[:16] + buf = buf[16:] + + if len(buf)%16 != 0 { + common.Log.Debug(" iv (%d): % x", len(iv), iv) + common.Log.Debug("buf (%d): % x", len(buf), buf) + return buf, fmt.Errorf("AES buf length not multiple of 16 (%d)", len(buf)) + } + + mode := cipher.NewCBCDecrypter(ciph, iv) + + common.Log.Trace("AES Decrypt (%d): % x", len(buf), buf) + common.Log.Trace("chop AES Decrypt (%d): % x", len(buf), buf) + mode.CryptBlocks(buf, buf) + common.Log.Trace("to (%d): % x", len(buf), buf) + + if len(buf) == 0 { + common.Log.Trace("Empty buf, returning empty string") + return buf, nil + } + + // The padded length is indicated by the last values. Remove those. + + padLen := int(buf[len(buf)-1]) + if padLen >= len(buf) { + common.Log.Debug("Illegal pad length") + return buf, fmt.Errorf("Invalid pad length") + } + buf = buf[:len(buf)-padLen] + + return buf, nil +} + +var _ Filter = filterAESV3{} + +// filterAESV3 is an AES-based filter (256 bit key, PDF 2.0) +type filterAESV3 struct { + filterAES +} + +// PDFVersion implements Filter interface. +func (filterAESV3) PDFVersion() [2]int { + return [2]int{2, 0} +} + +// HandlerVersion implements Filter interface. +func (filterAESV3) HandlerVersion() (V, R int) { + V, R = 5, 6 + return +} + +// Name implements Filter interface. +func (filterAESV3) Name() string { + return "AESV3" +} + +// KeyLength implements Filter interface. +func (filterAESV3) KeyLength() int { + return 256 / 8 +} + +// MakeKey implements Filter interface. +func (filterAESV3) MakeKey(_, _ uint32, ekey []byte) ([]byte, error) { + return ekey, nil // document encryption key == object encryption key +} diff --git a/pdf/core/security/crypt/filter_v2.go b/pdf/core/security/crypt/filter_v2.go new file mode 100644 index 00000000..6f7f3f99 --- /dev/null +++ b/pdf/core/security/crypt/filter_v2.go @@ -0,0 +1,140 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package crypt + +import ( + "crypto/md5" + "crypto/rc4" + "fmt" + + "github.com/unidoc/unidoc/common" +) + +func init() { + registerFilter("V2", newFilterV2) +} + +// NewFilterV2 creates a RC4-based filter with a specified key length (in bytes). +func NewFilterV2(length int) Filter { + f, err := newFilterV2(FilterDict{Length: length}) + if err != nil { + panic(err) + } + return f +} + +// newFilterV2 creates a RC4-based filter from a Filter dictionary. +func newFilterV2(d FilterDict) (Filter, error) { + if d.Length%8 != 0 { + return nil, fmt.Errorf("Crypt filter length not multiple of 8 (%d)", d.Length) + } + // Standard security handler expresses the length in multiples of 8 (16 means 128) + // We only deal with standard so far. (Public key not supported yet). + if d.Length < 5 || d.Length > 16 { + if d.Length == 40 || d.Length == 64 || d.Length == 128 { + common.Log.Debug("STANDARD VIOLATION: Crypt Length appears to be in bits rather than bytes - assuming bits (%d)", d.Length) + d.Length /= 8 + } else { + return nil, fmt.Errorf("Crypt filter length not in range 40 - 128 bit (%d)", d.Length) + } + } + return filterV2{length: d.Length}, nil +} + +// makeKeyV2 is a common object key generation shared by V2 and AESV2 crypt filters. +func makeKeyV2(objNum, genNum uint32, ekey []byte, isAES bool) ([]byte, error) { + key := make([]byte, len(ekey)+5) + for i := 0; i < len(ekey); i++ { + key[i] = ekey[i] + } + for i := 0; i < 3; i++ { + b := byte((objNum >> uint32(8*i)) & 0xff) + key[i+len(ekey)] = b + } + for i := 0; i < 2; i++ { + b := byte((genNum >> uint32(8*i)) & 0xff) + key[i+len(ekey)+3] = b + } + if isAES { + // If using the AES algorithm, extend the encryption key an + // additional 4 bytes by adding the value “sAlT”, which + // corresponds to the hexadecimal values 0x73, 0x41, 0x6C, 0x54. + key = append(key, 0x73) + key = append(key, 0x41) + key = append(key, 0x6C) + key = append(key, 0x54) + } + + // Take the MD5. + h := md5.New() + h.Write(key) + hashb := h.Sum(nil) + + if len(ekey)+5 < 16 { + return hashb[0 : len(ekey)+5], nil + } + + return hashb, nil +} + +var _ Filter = filterV2{} + +// filterV2 is a RC4-based filter +type filterV2 struct { + length int +} + +// PDFVersion implements Filter interface. +func (f filterV2) PDFVersion() [2]int { + return [2]int{} // TODO(dennwc): unspecified; check what it should be +} + +// HandlerVersion implements Filter interface. +func (f filterV2) HandlerVersion() (V, R int) { + V, R = 2, 3 + return +} + +// Name implements Filter interface. +func (filterV2) Name() string { + return "V2" +} + +// KeyLength implements Filter interface. +func (f filterV2) KeyLength() int { + return f.length +} + +// MakeKey implements Filter interface. +func (f filterV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) { + return makeKeyV2(objNum, genNum, ekey, false) +} + +// EncryptBytes implements Filter interface. +func (filterV2) EncryptBytes(buf []byte, okey []byte) ([]byte, error) { + // Standard RC4 algorithm. + ciph, err := rc4.NewCipher(okey) + if err != nil { + return nil, err + } + common.Log.Trace("RC4 Encrypt: % x", buf) + ciph.XORKeyStream(buf, buf) + common.Log.Trace("to: % x", buf) + return buf, nil +} + +// DecryptBytes implements Filter interface. +func (filterV2) DecryptBytes(buf []byte, okey []byte) ([]byte, error) { + // Standard RC4 algorithm. + ciph, err := rc4.NewCipher(okey) + if err != nil { + return nil, err + } + common.Log.Trace("RC4 Decrypt: % x", buf) + ciph.XORKeyStream(buf, buf) + common.Log.Trace("to: % x", buf) + return buf, nil +} diff --git a/pdf/core/security/crypt/filters.go b/pdf/core/security/crypt/filters.go new file mode 100644 index 00000000..3f275710 --- /dev/null +++ b/pdf/core/security/crypt/filters.go @@ -0,0 +1,113 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package crypt + +import ( + "fmt" + + "github.com/unidoc/unidoc/pdf/core/security" +) + +var ( + filterMethods = make(map[string]filterFunc) +) + +// filterFunc is used to construct crypt filters from CryptFilter dictionary +type filterFunc func(d FilterDict) (Filter, error) + +// Filter is a common interface for crypt filter methods. +type Filter interface { + // Name returns a name of the filter that should be used in CFM field of Encrypt dictionary. + Name() string + // KeyLength returns a length of the encryption key in bytes. + KeyLength() int + // PDFVersion reports the minimal version of PDF document that introduced this filter. + PDFVersion() [2]int + // HandlerVersion reports V and R parameters that should be used for this filter. + HandlerVersion() (V, R int) + // MakeKey generates a object encryption key based on file encryption key and object numbers. + // Used only for legacy filters - AESV3 doesn't change the key for each object. + MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error) + // EncryptBytes encrypts a buffer using object encryption key, as returned by MakeKey. + // Implementation may reuse a buffer and encrypt data in-place. + EncryptBytes(p []byte, okey []byte) ([]byte, error) + // DecryptBytes decrypts a buffer using object encryption key, as returned by MakeKey. + // Implementation may reuse a buffer and decrypt data in-place. + DecryptBytes(p []byte, okey []byte) ([]byte, error) +} + +// NewFilter creates CryptFilter from a corresponding dictionary. +func NewFilter(d FilterDict) (Filter, error) { + fnc, err := getFilter(d.CFM) + if err != nil { + return nil, err + } + cf, err := fnc(d) + if err != nil { + return nil, err + } + return cf, nil +} + +// NewIdentity creates an identity filter that bypasses all data without changes. +func NewIdentity() Filter { + return filterIdentity{} +} + +// FilterDict represents information from a CryptFilter dictionary. +type FilterDict struct { + CFM string // The method used, if any, by the PDF reader to decrypt data. + AuthEvent security.AuthEvent + Length int // in bytes +} + +// registerFilter register supported crypt filter methods. +// Table 25, CFM (page 92) +func registerFilter(name string, fnc filterFunc) { + if _, ok := filterMethods[name]; ok { + panic("already registered") + } + filterMethods[name] = fnc +} + +// getFilter check if a CFM with a specified name is supported an returns its implementation. +func getFilter(name string) (filterFunc, error) { + f := filterMethods[string(name)] + if f == nil { + return nil, fmt.Errorf("unsupported crypt filter: %q", name) + } + return f, nil +} + +type filterIdentity struct{} + +func (filterIdentity) PDFVersion() [2]int { + return [2]int{} +} + +func (filterIdentity) HandlerVersion() (V, R int) { + return +} + +func (filterIdentity) Name() string { + return "Identity" +} + +func (filterIdentity) KeyLength() int { + return 0 +} + +func (filterIdentity) MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error) { + return fkey, nil +} + +func (filterIdentity) EncryptBytes(p []byte, okey []byte) ([]byte, error) { + return p, nil +} + +func (filterIdentity) DecryptBytes(p []byte, okey []byte) ([]byte, error) { + return p, nil +} diff --git a/pdf/core/ecb.go b/pdf/core/security/ecb.go similarity index 98% rename from pdf/core/ecb.go rename to pdf/core/security/ecb.go index b96cdfa3..20aeebfd 100644 --- a/pdf/core/ecb.go +++ b/pdf/core/security/ecb.go @@ -3,7 +3,7 @@ * file 'LICENSE.md', which is part of this source code package. */ -package core +package security import "crypto/cipher" diff --git a/pdf/core/security/handlers.go b/pdf/core/security/handlers.go new file mode 100644 index 00000000..a72e6f2f --- /dev/null +++ b/pdf/core/security/handlers.go @@ -0,0 +1,32 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +// StdHandler is an interface for standard security handlers. +type StdHandler interface { + // GenerateParams uses owner and user passwords to set encryption parameters and generate an encryption key. + // It assumes that R, P and EncryptMetadata are already set. + GenerateParams(d *StdEncryptDict, ownerPass, userPass []byte) ([]byte, error) + + // Authenticate uses encryption dictionary parameters and the password to calculate + // the document encryption key. It also returns permissions that should be granted to a user. + // In case of failed authentication, it returns empty key and zero permissions with no error. + Authenticate(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) +} + +// StdEncryptDict is a set of additional fields used in standard encryption dictionary. +type StdEncryptDict struct { + R int // (Required) A number specifying which revision of the standard security handler shall be used. + + P Permissions + EncryptMetadata bool // Indicates whether the document-level metadata stream shall be encrypted. + + // set by security handlers: + + O, U []byte + OE, UE []byte // R=6 + Perms []byte // An encrypted copy of P (16 bytes). Used to verify permissions. R=6 +} diff --git a/pdf/core/security/permissions.go b/pdf/core/security/permissions.go new file mode 100644 index 00000000..5bb569a0 --- /dev/null +++ b/pdf/core/security/permissions.go @@ -0,0 +1,38 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +import "math" + +// Permissions is a bitmask of access permissions for a PDF file. +type Permissions uint32 + +const ( + // PermOwner grants all permissions. + PermOwner = Permissions(math.MaxUint32) + + // PermPrinting allows printing the document with a low quality. + PermPrinting = Permissions(1 << 2) + // PermModify allows to modify the document. + PermModify = Permissions(1 << 3) + // PermExtractGraphics allows to extract graphics from the document. + PermExtractGraphics = Permissions(1 << 4) + // PermAnnotate allows annotating the document. + PermAnnotate = Permissions(1 << 5) + // PermFillForms allow form filling, if annotation is disabled? If annotation enabled, is not looked at. + PermFillForms = Permissions(1 << 8) + // PermDisabilityExtract allows to extract graphics in accessibility mode. + PermDisabilityExtract = Permissions(1 << 9) + // PermRotateInsert allows rotating, editing page order. + PermRotateInsert = Permissions(1 << 10) + // PermFullPrintQuality limits print quality (lowres), assuming Printing bit is set. + PermFullPrintQuality = Permissions(1 << 11) +) + +// Allowed checks if a set of permissions can be granted. +func (p Permissions) Allowed(p2 Permissions) bool { + return p&p2 == p2 +} diff --git a/pdf/core/security/standard_r4.go b/pdf/core/security/standard_r4.go new file mode 100644 index 00000000..c4d655c6 --- /dev/null +++ b/pdf/core/security/standard_r4.go @@ -0,0 +1,356 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +import ( + "bytes" + "crypto/md5" + "crypto/rand" + "crypto/rc4" + "errors" + + "github.com/unidoc/unidoc/common" +) + +var _ StdHandler = stdHandlerR4{} + +const padding = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF" + + "\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C" + + "\xA9\xFE\x64\x53\x69\x7A" + +// NewHandlerR4 creates a new standard security handler for R<=4. +func NewHandlerR4(id0 string, length int) StdHandler { + return stdHandlerR4{ID0: id0, Length: length} +} + +// stdHandlerR4 is a standard security handler for R<=4. +// It uses RC4 and MD5 to generate encryption parameters. +// This legacy handler also requires Length parameter from +// Encrypt dictionary and ID0 from the trailer. +type stdHandlerR4 struct { + Length int + ID0 string +} + +func (sh stdHandlerR4) paddedPass(pass []byte) []byte { + key := make([]byte, 32) + if len(pass) >= 32 { + for i := 0; i < 32; i++ { + key[i] = pass[i] + } + } else { + for i := 0; i < len(pass); i++ { + key[i] = pass[i] + } + for i := len(pass); i < 32; i++ { + key[i] = padding[i-len(pass)] + } + } + return key +} + +// alg2 computes an encryption key. +func (sh stdHandlerR4) alg2(d *StdEncryptDict, pass []byte) []byte { + common.Log.Trace("alg2") + key := sh.paddedPass(pass) + + h := md5.New() + h.Write(key) + + // Pass O. + h.Write(d.O) + + // Pass P (Lower order byte first). + var p = uint32(d.P) + var pb []byte + for i := 0; i < 4; i++ { + pb = append(pb, byte(((p >> uint(8*i)) & 0xff))) + } + h.Write(pb) + common.Log.Trace("go P: % x", pb) + + // Pass ID[0] from the trailer + h.Write([]byte(sh.ID0)) + + common.Log.Trace("this.R = %d encryptMetadata %v", d.R, d.EncryptMetadata) + if (d.R >= 4) && !d.EncryptMetadata { + h.Write([]byte{0xff, 0xff, 0xff, 0xff}) + } + hashb := h.Sum(nil) + + if d.R >= 3 { + for i := 0; i < 50; i++ { + h = md5.New() + h.Write(hashb[0 : sh.Length/8]) + hashb = h.Sum(nil) + } + } + + if d.R >= 3 { + return hashb[0 : sh.Length/8] + } + + return hashb[0:5] +} + +// Create the RC4 encryption key. +func (sh stdHandlerR4) alg3Key(R int, pass []byte) []byte { + h := md5.New() + okey := sh.paddedPass(pass) + h.Write(okey) + + if R >= 3 { + for i := 0; i < 50; i++ { + hashb := h.Sum(nil) + h = md5.New() + h.Write(hashb) + } + } + + encKey := h.Sum(nil) + if R == 2 { + encKey = encKey[0:5] + } else { + encKey = encKey[0 : sh.Length/8] + } + return encKey +} + +// alg3 computes the encryption dictionary’s O (owner password) value. +func (sh stdHandlerR4) alg3(R int, upass, opass []byte) ([]byte, error) { + var encKey []byte + if len(opass) > 0 { + encKey = sh.alg3Key(R, opass) + } else { + encKey = sh.alg3Key(R, upass) + } + + ociph, err := rc4.NewCipher(encKey) + if err != nil { + return nil, errors.New("Failed rc4 ciph") + } + + ukey := sh.paddedPass(upass) + encrypted := make([]byte, len(ukey)) + ociph.XORKeyStream(encrypted, ukey) + + if R >= 3 { + encKey2 := make([]byte, len(encKey)) + for i := 0; i < 19; i++ { + for j := 0; j < len(encKey); j++ { + encKey2[j] = encKey[j] ^ byte(i+1) + } + ciph, err := rc4.NewCipher(encKey2) + if err != nil { + return nil, errors.New("Failed rc4 ciph") + } + ciph.XORKeyStream(encrypted, encrypted) + } + } + return encrypted, nil +} + +// alg4 computes the encryption dictionary’s U (user password) value (Security handlers of revision 2). +func (sh stdHandlerR4) alg4(ekey []byte, upass []byte) ([]byte, error) { + ciph, err := rc4.NewCipher(ekey) + if err != nil { + return nil, errors.New("Failed rc4 ciph") + } + + s := []byte(padding) + encrypted := make([]byte, len(s)) + ciph.XORKeyStream(encrypted, s) + return encrypted, nil +} + +// alg5 computes the encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater). +func (sh stdHandlerR4) alg5(ekey []byte, upass []byte) ([]byte, error) { + h := md5.New() + h.Write([]byte(padding)) + h.Write([]byte(sh.ID0)) + hash := h.Sum(nil) + + common.Log.Trace("alg5") + common.Log.Trace("ekey: % x", ekey) + common.Log.Trace("ID: % x", sh.ID0) + + if len(hash) != 16 { + return nil, errors.New("Hash length not 16 bytes") + } + + ciph, err := rc4.NewCipher(ekey) + if err != nil { + return nil, errors.New("Failed rc4 ciph") + } + encrypted := make([]byte, 16) + ciph.XORKeyStream(encrypted, hash) + + // Do the following 19 times: Take the output from the previous + // invocation of the RC4 function and pass it as input to a new + // invocation of the function; use an encryption key generated by + // taking each byte of the original encryption key obtained in step + // (a) and performing an XOR (exclusive or) operation between that + // byte and the single-byte value of the iteration counter (from 1 to 19). + ekey2 := make([]byte, len(ekey)) + for i := 0; i < 19; i++ { + for j := 0; j < len(ekey); j++ { + ekey2[j] = ekey[j] ^ byte(i+1) + } + ciph, err = rc4.NewCipher(ekey2) + if err != nil { + return nil, errors.New("Failed rc4 ciph") + } + ciph.XORKeyStream(encrypted, encrypted) + common.Log.Trace("i = %d, ekey: % x", i, ekey2) + common.Log.Trace("i = %d -> % x", i, encrypted) + } + + bb := make([]byte, 32) + for i := 0; i < 16; i++ { + bb[i] = encrypted[i] + } + + // Append 16 bytes of arbitrary padding to the output from the final + // invocation of the RC4 function and store the 32-byte result as + // the value of the U entry in the encryption dictionary. + _, err = rand.Read(bb[16:32]) + if err != nil { + return nil, errors.New("Failed to gen rand number") + } + return bb, nil +} + +// alg6 authenticates the user password and returns the document encryption key. +// It returns an nil key in case authentication failed. +func (sh stdHandlerR4) alg6(d *StdEncryptDict, upass []byte) ([]byte, error) { + var ( + uo []byte + err error + ) + ekey := sh.alg2(d, upass) + if d.R == 2 { + uo, err = sh.alg4(ekey, upass) + } else if d.R >= 3 { + uo, err = sh.alg5(ekey, upass) + } else { + return nil, errors.New("invalid R") + } + if err != nil { + return nil, err + } + + common.Log.Trace("check: % x == % x ?", string(uo), string(d.U)) + + uGen := uo // Generated U from specified pass. + uDoc := d.U // U from the document. + if d.R >= 3 { + // comparing on the first 16 bytes in the case of security + // handlers of revision 3 or greater), + if len(uGen) > 16 { + uGen = uGen[0:16] + } + if len(uDoc) > 16 { + uDoc = uDoc[0:16] + } + } + + if !bytes.Equal(uGen, uDoc) { + return nil, nil + } + return ekey, nil +} + +// alg7 authenticates the owner password and returns the document encryption key. +// It returns an nil key in case authentication failed. +func (sh stdHandlerR4) alg7(d *StdEncryptDict, opass []byte) ([]byte, error) { + encKey := sh.alg3Key(d.R, opass) + + decrypted := make([]byte, len(d.O)) + if d.R == 2 { + ciph, err := rc4.NewCipher(encKey) + if err != nil { + return nil, errors.New("Failed cipher") + } + ciph.XORKeyStream(decrypted, d.O) + } else if d.R >= 3 { + s := append([]byte{}, d.O...) + for i := 0; i < 20; i++ { + //newKey := encKey + newKey := append([]byte{}, encKey...) + for j := 0; j < len(encKey); j++ { + newKey[j] ^= byte(19 - i) + } + ciph, err := rc4.NewCipher(newKey) + if err != nil { + return nil, errors.New("Failed cipher") + } + ciph.XORKeyStream(decrypted, s) + s = append([]byte{}, decrypted...) + } + } else { + return nil, errors.New("invalid R") + } + + ekey, err := sh.alg6(d, decrypted) + if err != nil { + // TODO(dennwc): this doesn't look right, but it was in the old code + return nil, nil + } + return ekey, nil +} + +// GenerateParams generates and sets O and U parameters for the encryption dictionary. +// It expects R, P and EncryptMetadata fields to be set. +func (sh stdHandlerR4) GenerateParams(d *StdEncryptDict, opass, upass []byte) ([]byte, error) { + // Make the O and U objects. + O, err := sh.alg3(d.R, upass, opass) + if err != nil { + common.Log.Debug("ERROR: Error generating O for encryption (%s)", err) + return nil, err + } + d.O = O + common.Log.Trace("gen O: % x", O) + // requires O + ekey := sh.alg2(d, upass) + + U, err := sh.alg5(ekey, upass) + if err != nil { + common.Log.Debug("ERROR: Error generating O for encryption (%s)", err) + return nil, err + } + d.U = U + common.Log.Trace("gen U: % x", U) + return ekey, nil +} + +// Authenticate implements StdHandler interface. +func (sh stdHandlerR4) Authenticate(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) { + // Try owner password. + // May not be necessary if only want to get all contents. + // (user pass needs to be known or empty). + common.Log.Trace("Debugging authentication - owner pass") + ekey, err := sh.alg7(d, pass) + if err != nil { + return nil, 0, err + } + if ekey != nil { + common.Log.Trace("this.authenticated = True") + return ekey, PermOwner, nil + } + + // Try user password. + common.Log.Trace("Debugging authentication - user pass") + ekey, err = sh.alg6(d, pass) + if err != nil { + return nil, 0, err + } + if ekey != nil { + common.Log.Trace("this.authenticated = True") + return ekey, d.P, nil + } + // Cannot even view the file. + return nil, 0, nil +} diff --git a/pdf/core/security/standard_r4_test.go b/pdf/core/security/standard_r4_test.go new file mode 100644 index 00000000..441b7dd7 --- /dev/null +++ b/pdf/core/security/standard_r4_test.go @@ -0,0 +1,139 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +import ( + "github.com/unidoc/unidoc/common" + "testing" +) + +func init() { + common.SetLogger(common.ConsoleLogger{}) +} + +func TestR4Padding(t *testing.T) { + sh := stdHandlerR4{} + + // Case 1 empty pass, should match padded string. + key := sh.paddedPass([]byte("")) + if len(key) != 32 { + t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key)) + } + if key[0] != 0x28 { + t.Errorf("key[0] != 0x28 (%q in %q)", key[0], key) + } + if key[31] != 0x7A { + t.Errorf("key[31] != 0x7A (%q in %q)", key[31], key) + } + + // Case 2, non empty pass. + key = sh.paddedPass([]byte("bla")) + if len(key) != 32 { + t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key)) + } + if string(key[0:3]) != "bla" { + t.Errorf("Expecting start with bla (%s)", key) + } + if key[3] != 0x28 { + t.Errorf("key[3] != 0x28 (%q in %q)", key[3], key) + } + if key[31] != 0x64 { + t.Errorf("key[31] != 0x64 (%q in %q)", key[31], key) + } +} + +// Test algorithm 2. +func TestAlg2(t *testing.T) { + sh := stdHandlerR4{ + // V: 2, + ID0: string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24, + 0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}), + Length: 128, + } + d := &StdEncryptDict{ + R: 3, + P: 0xfffff0c0, + EncryptMetadata: true, + O: []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, + 0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, + 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, + 0x72, 0x6A, 0x8C, 0xDB}, + } + + key := sh.alg2(d, []byte("")) + + keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca, + 0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5} + + if string(key) != string(keyExp) { + common.Log.Debug(" Key (%d): % x", len(key), key) + common.Log.Debug("KeyExp (%d): % x", len(keyExp), keyExp) + t.Errorf("alg2 -> key != expected\n") + } +} + +// Test algorithm 3. +func TestAlg3(t *testing.T) { + sh := stdHandlerR4{ + // V: 2, + ID0: string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24, + 0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}), + Length: 128, + } + + Oexp := []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, + 0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, + 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, + 0x72, 0x6A, 0x8C, 0xDB} + O, err := sh.alg3(3, []byte(""), []byte("test")) + if err != nil { + t.Errorf("crypt alg3 error %s", err) + return + } + + if string(O) != string(Oexp) { + common.Log.Debug(" O (%d): % x", len(O), O) + common.Log.Debug("Oexp (%d): % x", len(Oexp), Oexp) + t.Errorf("alg3 -> key != expected") + } +} + +// Test algorithm 5 for computing dictionary's U (user password) value +// valid for R >= 3. +func TestAlg5(t *testing.T) { + sh := stdHandlerR4{ + // V: 2, + ID0: string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24, + 0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}), + Length: 128, + } + d := &StdEncryptDict{ + R: 3, + P: 0xfffff0c0, + EncryptMetadata: true, + O: []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B, + 0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51, + 0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86, + 0x72, 0x6A, 0x8C, 0xDB}, + } + + ekey := sh.alg2(d, []byte("")) + U, err := sh.alg5(ekey, []byte("")) + if err != nil { + t.Errorf("Error %s", err) + return + } + + Uexp := []byte{0x59, 0x66, 0x38, 0x6c, 0x76, 0xfe, 0x95, 0x7d, 0x3d, + 0x0d, 0x14, 0x3d, 0x36, 0xfd, 0x01, 0x3d, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} + + if string(U[0:16]) != string(Uexp[0:16]) { + common.Log.Info(" U (%d): % x", len(U), U) + common.Log.Info("Uexp (%d): % x", len(Uexp), Uexp) + t.Errorf("U != expected\n") + } +} diff --git a/pdf/core/security/standard_r6.go b/pdf/core/security/standard_r6.go new file mode 100644 index 00000000..3ead14c9 --- /dev/null +++ b/pdf/core/security/standard_r6.go @@ -0,0 +1,469 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "crypto/sha256" + "crypto/sha512" + "encoding/binary" + "errors" + "hash" + "io" + "math" +) + +var _ StdHandler = stdHandlerR6{} + +// NewHandlerR6 creates a new standard security handler for R=5 and R=6. +func NewHandlerR6() StdHandler { + return stdHandlerR6{} +} + +// stdHandlerR6 is an implementation of standard security handler with R=5 and R=6. +// Both revisions are expected to be used with AES encryption filters. +type stdHandlerR6 struct{} + +// alg2a retrieves the encryption key from an encrypted document (R >= 5). +// 7.6.4.3.2 Algorithm 2.A (page 83) +func (sh stdHandlerR6) alg2a(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) { + // O & U: 32 byte hash + 8 byte Validation Salt + 8 byte Key Salt + + // step a: Unicode normalization + // TODO(dennwc): make sure that UTF-8 strings are normalized + + // step b: truncate to 127 bytes + if len(pass) > 127 { + pass = pass[:127] + } + + // step c: test pass against the owner key + h, err := sh.alg12(d, pass) + if err != nil { + return nil, 0, err + } + var ( + data []byte // data to hash + ekey []byte // encrypted file key + ukey []byte // user key; set only when using owner's password + ) + var perm Permissions + if len(h) != 0 { + // owner password valid + perm = PermOwner + + // step d: compute an intermediate owner key + str := make([]byte, len(pass)+8+48) + i := copy(str, pass) + i += copy(str[i:], d.O[40:48]) // owner Key Salt + i += copy(str[i:], d.U[0:48]) + + data = str + ekey = d.OE + ukey = d.U[0:48] + } else { + // check user password + h, err = sh.alg11(d, pass) + if err == nil && len(h) == 0 { + // try default password + h, err = sh.alg11(d, []byte("")) + } + if err != nil { + return nil, 0, err + } else if len(h) == 0 { + // wrong password + return nil, 0, nil + } + perm = d.P + // step e: compute an intermediate user key + str := make([]byte, len(pass)+8) + i := copy(str, pass) + i += copy(str[i:], d.U[40:48]) // user Key Salt + + data = str + ekey = d.UE + ukey = nil + } + ekey = ekey[:32] + + // intermediate key + ikey := sh.alg2b(d.R, data, pass, ukey) + + ac, err := aes.NewCipher(ikey[:32]) + if err != nil { + return nil, 0, err + } + + iv := make([]byte, aes.BlockSize) + cbc := cipher.NewCBCDecrypter(ac, iv) + fkey := make([]byte, 32) + cbc.CryptBlocks(fkey, ekey) + + if d.R == 5 { + return fkey, perm, nil + } + // validate permissions + err = sh.alg13(d, fkey) + if err != nil { + return nil, 0, err + } + return fkey, perm, nil +} + +// alg2bR5 computes a hash for R=5, used in a deprecated extension. +// It's used the same way as a hash described in Algorithm 2.B, but it doesn't use the original password +// and the user key to calculate the hash. +func alg2bR5(data []byte) []byte { + h := sha256.New() + h.Write(data) + return h.Sum(nil) +} + +// repeat repeats first n bytes of buf until the end of the buffer. +// It assumes that the length of buf is a multiple of n. +func repeat(buf []byte, n int) { + bp := n + for bp < len(buf) { + copy(buf[bp:], buf[:bp]) + bp *= 2 + } +} + +// alg2b computes a hash for R=6. +// 7.6.4.3.3 Algorithm 2.B (page 83) +func alg2b(data, pwd, userKey []byte) []byte { + var ( + s256, s384, s512 hash.Hash + ) + s256 = sha256.New() + hbuf := make([]byte, 64) + + h := s256 + h.Write(data) + K := h.Sum(hbuf[:0]) + + buf := make([]byte, 64*(127+64+48)) + + round := func(rnd int) (E []byte) { + // step a: repeat pass+K 64 times + n := len(pwd) + len(K) + len(userKey) + part := buf[:n] + i := copy(part, pwd) + i += copy(part[i:], K[:]) + i += copy(part[i:], userKey) + if i != n { + panic("wrong size") + } + K1 := buf[:n*64] + repeat(K1, n) + + // step b: encrypt K1 with AES-128 CBC + ac, err := aes.NewCipher(K[0:16]) + if err != nil { + panic(err) + } + cbc := cipher.NewCBCEncrypter(ac, K[16:32]) + cbc.CryptBlocks(K1, K1) + E = K1 + + // step c: use 16 bytes of E as big-endian int, select the next hash + b := 0 + for i := 0; i < 16; i++ { + b += int(E[i] % 3) + } + var h hash.Hash + switch b % 3 { + case 0: + h = s256 + case 1: + if s384 == nil { + s384 = sha512.New384() + } + h = s384 + case 2: + if s512 == nil { + s512 = sha512.New() + } + h = s512 + } + + // step d: take the hash of E, use as a new K + h.Reset() + h.Write(E) + K = h.Sum(hbuf[:0]) + + return E + } + + for i := 0; ; { + E := round(i) + b := uint8(E[len(E)-1]) + // from the spec, it appears that i should be incremented after + // the test, but that doesn't match what Adobe does + i++ + if i >= 64 && b <= uint8(i-32) { + break + } + } + return K[:32] +} + +// alg2b computes a hash for R=5 and R=6. +func (sh stdHandlerR6) alg2b(R int, data, pwd, userKey []byte) []byte { + if R == 5 { + return alg2bR5(data) + } + return alg2b(data, pwd, userKey) +} + +// alg8 computes the encryption dictionary's U (user password) and UE (user encryption) values (R>=5). +// 7.6.4.4.6 Algorithm 8 (page 86) +func (sh stdHandlerR6) alg8(d *StdEncryptDict, ekey []byte, upass []byte) error { + // step a: compute U (user password) + var rbuf [16]byte + if _, err := io.ReadFull(rand.Reader, rbuf[:]); err != nil { + return err + } + valSalt := rbuf[0:8] + keySalt := rbuf[8:16] + + str := make([]byte, len(upass)+len(valSalt)) + i := copy(str, upass) + i += copy(str[i:], valSalt) + + h := sh.alg2b(d.R, str, upass, nil) + + U := make([]byte, len(h)+len(valSalt)+len(keySalt)) + i = copy(U, h[:32]) + i += copy(U[i:], valSalt) + i += copy(U[i:], keySalt) + + d.U = U + + // step b: compute UE (user encryption) + + // str still contains a password, reuse it + i = len(upass) + i += copy(str[i:], keySalt) + + h = sh.alg2b(d.R, str, upass, nil) + + ac, err := aes.NewCipher(h[:32]) + if err != nil { + panic(err) + } + + iv := make([]byte, aes.BlockSize) + cbc := cipher.NewCBCEncrypter(ac, iv) + UE := make([]byte, 32) + cbc.CryptBlocks(UE, ekey[:32]) + d.UE = UE + + return nil +} + +// alg9 computes the encryption dictionary's O (owner password) and OE (owner encryption) values (R>=5). +// 7.6.4.4.7 Algorithm 9 (page 86) +func (sh stdHandlerR6) alg9(d *StdEncryptDict, ekey []byte, opass []byte) error { + // step a: compute O (owner password) + var rbuf [16]byte + if _, err := io.ReadFull(rand.Reader, rbuf[:]); err != nil { + return err + } + valSalt := rbuf[0:8] + keySalt := rbuf[8:16] + userKey := d.U[:48] + + str := make([]byte, len(opass)+len(valSalt)+len(userKey)) + i := copy(str, opass) + i += copy(str[i:], valSalt) + i += copy(str[i:], userKey) + + h := sh.alg2b(d.R, str, opass, userKey) + + O := make([]byte, len(h)+len(valSalt)+len(keySalt)) + i = copy(O, h[:32]) + i += copy(O[i:], valSalt) + i += copy(O[i:], keySalt) + + d.O = O + + // step b: compute OE (owner encryption) + + // str still contains a password and a user key - reuse both, but overwrite the salt + i = len(opass) + i += copy(str[i:], keySalt) + // i += len(userKey) + + h = sh.alg2b(d.R, str, opass, userKey) + + ac, err := aes.NewCipher(h[:32]) + if err != nil { + panic(err) + } + + iv := make([]byte, aes.BlockSize) + cbc := cipher.NewCBCEncrypter(ac, iv) + OE := make([]byte, 32) + cbc.CryptBlocks(OE, ekey[:32]) + d.OE = OE + + return nil +} + +// alg10 computes the encryption dictionary's Perms (permissions) value (R=6). +// 7.6.4.4.8 Algorithm 10 (page 87) +func (sh stdHandlerR6) alg10(d *StdEncryptDict, ekey []byte) error { + // step a: extend permissions to 64 bits + perms := uint64(uint32(d.P)) | (math.MaxUint32 << 32) + + // step b: record permissions + Perms := make([]byte, 16) + binary.LittleEndian.PutUint64(Perms[:8], perms) + + // step c: record EncryptMetadata + if d.EncryptMetadata { + Perms[8] = 'T' + } else { + Perms[8] = 'F' + } + + // step d: write "adb" magic + copy(Perms[9:12], "adb") + + // step e: write 4 bytes of random data + + // spec doesn't specify them as generated "from a strong random source", + // but we will use the cryptographic random generator anyway + if _, err := io.ReadFull(rand.Reader, Perms[12:16]); err != nil { + return err + } + + // step f: encrypt permissions + ac, err := aes.NewCipher(ekey[:32]) + if err != nil { + panic(err) + } + + ecb := newECBEncrypter(ac) + ecb.CryptBlocks(Perms, Perms) + + d.Perms = Perms[:16] + return nil +} + +// alg11 authenticates the user password (R >= 5) and returns the hash. +func (sh stdHandlerR6) alg11(d *StdEncryptDict, upass []byte) ([]byte, error) { + str := make([]byte, len(upass)+8) + i := copy(str, upass) + i += copy(str[i:], d.U[32:40]) // user Validation Salt + + h := sh.alg2b(d.R, str, upass, nil) + h = h[:32] + if !bytes.Equal(h, d.U[:32]) { + return nil, nil + } + return h, nil +} + +// alg12 authenticates the owner password (R >= 5) and returns the hash. +// 7.6.4.4.10 Algorithm 12 (page 87) +func (sh stdHandlerR6) alg12(d *StdEncryptDict, opass []byte) ([]byte, error) { + str := make([]byte, len(opass)+8+48) + i := copy(str, opass) + i += copy(str[i:], d.O[32:40]) // owner Validation Salt + i += copy(str[i:], d.U[0:48]) + + h := sh.alg2b(d.R, str, opass, d.U[0:48]) + h = h[:32] + if !bytes.Equal(h, d.O[:32]) { + return nil, nil + } + return h, nil +} + +// alg13 validates user permissions (P+EncryptMetadata vs Perms) for R=6. +// 7.6.4.4.11 Algorithm 13 (page 87) +func (sh stdHandlerR6) alg13(d *StdEncryptDict, fkey []byte) error { + perms := make([]byte, 16) + copy(perms, d.Perms[:16]) + + ac, err := aes.NewCipher(fkey[:32]) + if err != nil { + panic(err) + } + + ecb := newECBDecrypter(ac) + ecb.CryptBlocks(perms, perms) + + if !bytes.Equal(perms[9:12], []byte("adb")) { + return errors.New("decoded permissions are invalid") + } + p := Permissions(binary.LittleEndian.Uint32(perms[0:4])) + if p != d.P { + return errors.New("permissions validation failed") + } + encMeta := true + if perms[8] == 'T' { + encMeta = true + } else if perms[8] == 'F' { + encMeta = false + } else { + return errors.New("decoded metadata encryption flag is invalid") + } + if encMeta != d.EncryptMetadata { + return errors.New("metadata encryption validation failed") + } + return nil +} + +// GenerateParams is the algorithm opposite to alg2a (R>=5). +// It generates U,O,UE,OE,Perms fields using AESv3 encryption. +// There is no algorithm number assigned to this function in the spec. +// It expects R, P and EncryptMetadata fields to be set. +func (sh stdHandlerR6) GenerateParams(d *StdEncryptDict, opass, upass []byte) ([]byte, error) { + ekey := make([]byte, 32) + if _, err := io.ReadFull(rand.Reader, ekey); err != nil { + return nil, err + } + // all these field will be populated by functions below + d.U = nil + d.O = nil + d.UE = nil + d.OE = nil + d.Perms = nil // populated only for R=6 + + if len(upass) > 127 { + upass = upass[:127] + } + if len(opass) > 127 { + opass = opass[:127] + } + // generate U and UE + if err := sh.alg8(d, ekey, upass); err != nil { + return nil, err + } + // generate O and OE + if err := sh.alg9(d, ekey, opass); err != nil { + return nil, err + } + if d.R == 5 { + return ekey, nil + } + // generate Perms + if err := sh.alg10(d, ekey); err != nil { + return nil, err + } + return ekey, nil +} + +// Authenticate implements StdHandler interface. +func (sh stdHandlerR6) Authenticate(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) { + return sh.alg2a(d, pass) +} diff --git a/pdf/core/security/standard_r6_test.go b/pdf/core/security/standard_r6_test.go new file mode 100644 index 00000000..1e230c69 --- /dev/null +++ b/pdf/core/security/standard_r6_test.go @@ -0,0 +1,116 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package security + +import ( + "bytes" + "fmt" + "math/rand" + "strings" + "testing" +) + +func BenchmarkAlg2b(b *testing.B) { + // hash runs a variable number of rounds, so we need to have a + // deterministic random source to make benchmark results comparable + r := rand.New(rand.NewSource(1234567)) + const n = 20 + pass := make([]byte, n) + r.Read(pass) + data := make([]byte, n+8+48) + r.Read(data) + user := make([]byte, 48) + r.Read(user) + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = alg2b(data, pass, user) + } +} + +func TestStdHandlerR6(t *testing.T) { + var cases = []struct { + Name string + EncMeta bool + UserPass string + OwnerPass string + }{ + { + Name: "simple", EncMeta: true, + UserPass: "user", OwnerPass: "owner", + }, + { + Name: "utf8", EncMeta: false, + UserPass: "æøå-u", OwnerPass: "æøå-o", + }, + { + Name: "long", EncMeta: true, + UserPass: strings.Repeat("user", 80), + OwnerPass: strings.Repeat("owner", 80), + }, + } + + const ( + perms = 0x12345678 + ) + + for _, R := range []int{5, 6} { + R := R + t.Run(fmt.Sprintf("R=%d", R), func(t *testing.T) { + for _, c := range cases { + c := c + t.Run(c.Name, func(t *testing.T) { + sh := stdHandlerR6{} // V=5 + d := &StdEncryptDict{ + R: R, P: perms, + EncryptMetadata: c.EncMeta, + } + + // generate encryption parameters + ekey, err := sh.GenerateParams(d, []byte(c.OwnerPass), []byte(c.UserPass)) + if err != nil { + t.Fatal("Failed to encrypt:", err) + } + + // Perms and EncryptMetadata are checked as a part of alg2a + + // decrypt using user password + key, uperm, err := sh.alg2a(d, []byte(c.UserPass)) + if err != nil || uperm != perms { + t.Error("Failed to authenticate user pass:", err) + } else if !bytes.Equal(ekey, key) { + t.Error("wrong encryption key") + } + + // decrypt using owner password + key, uperm, err = sh.alg2a(d, []byte(c.OwnerPass)) + if err != nil || uperm != PermOwner { + t.Error("Failed to authenticate owner pass:", err, uperm) + } else if !bytes.Equal(ekey, key) { + t.Error("wrong encryption key") + } + + // try to elevate user permissions + d.P = PermOwner + + key, uperm, err = sh.alg2a(d, []byte(c.UserPass)) + if R == 5 { + // it's actually possible with R=5, since Perms is not generated + if err != nil || uperm != PermOwner { + t.Error("Failed to authenticate user pass:", err) + } + } else { + // not possible in R=6, should return an error + if err == nil || uperm == PermOwner { + t.Error("was able to elevate permissions with R=6") + } + } + }) + } + }) + } +} diff --git a/pdf/creator/chapters.go b/pdf/creator/chapters.go index 4e991251..2502ab0c 100644 --- a/pdf/creator/chapters.go +++ b/pdf/creator/chapters.go @@ -8,6 +8,7 @@ package creator import ( "errors" "fmt" + "strconv" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/model" @@ -40,7 +41,7 @@ type Chapter struct { margins margins // Reference to the creator's TOC. - toc *TableOfContents + toc *TOC } // NewChapter creates a new chapter with the specified title as the heading. @@ -149,7 +150,12 @@ func (chap *Chapter) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, if chap.includeInTOC { // Add to TOC. - chap.toc.add(chap.title, chap.number, 0, ctx.Page) + chapNumber := "" + if chap.number != 0 { + chapNumber = strconv.Itoa(chap.number) + "." + } + + chap.toc.Add(chapNumber, chap.title, strconv.Itoa(ctx.Page), 1) } for _, d := range chap.contents { diff --git a/pdf/creator/creator.go b/pdf/creator/creator.go index 6d9cf7ac..89b15a37 100644 --- a/pdf/creator/creator.go +++ b/pdf/creator/creator.go @@ -9,6 +9,7 @@ import ( "errors" "io" "os" + "strconv" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/model" @@ -33,17 +34,23 @@ type Creator struct { // Hooks. genFrontPageFunc func(args FrontpageFunctionArgs) - genTableOfContentFunc func(toc *TableOfContents) (*Chapter, error) + genTableOfContentFunc func(toc *TOC) error drawHeaderFunc func(header *Block, args HeaderFunctionArgs) drawFooterFunc func(footer *Block, args FooterFunctionArgs) pdfWriterAccessFunc func(writer *model.PdfWriter) error finalized bool - toc *TableOfContents + // The table of contents. + toc *TOC + + // Controls whether a table of contents will be added. + AddTOC bool // Forms. acroForm *model.PdfAcroForm + + optimizer model.Optimizer } // SetForms adds an Acroform to a PDF file. Sets the specified form for writing. @@ -96,11 +103,21 @@ func New() *Creator { c.pageMargins.top = m c.pageMargins.bottom = m - c.toc = newTableOfContents() + c.toc = NewTOC("Table of Contents") return c } +// SetOptimizer sets the optimizer to optimize PDF before writing. +func (c *Creator) SetOptimizer(optimizer model.Optimizer) { + c.optimizer = optimizer +} + +// GetOptimizer returns current PDF optimizer. +func (c *Creator) GetOptimizer() model.Optimizer { + return c.optimizer +} + // SetPageMargins sets the page margins: left, right, top, bottom. // The default page margins are 10% of document width. func (c *Creator) SetPageMargins(left, right, top, bottom float64) { @@ -120,6 +137,21 @@ func (c *Creator) Height() float64 { return c.pageHeight } +// TOC returns the table of contents component of the creator. +func (c *Creator) TOC() *TOC { + return c.toc +} + +// SetTOC sets the table of content component of the creator. +// This method should be used when building a custom table of contents. +func (c *Creator) SetTOC(toc *TOC) { + if toc == nil { + return + } + + c.toc = toc +} + func (c *Creator) setActivePage(p *model.PdfPage) { c.activePage = p } @@ -182,7 +214,7 @@ func (c *Creator) CreateFrontPage(genFrontPageFunc func(args FrontpageFunctionAr } // CreateTableOfContents sets a function to generate table of contents. -func (c *Creator) CreateTableOfContents(genTOCFunc func(toc *TableOfContents) (*Chapter, error)) { +func (c *Creator) CreateTableOfContents(genTOCFunc func(toc *TOC) error) { c.genTableOfContentFunc = genTOCFunc } @@ -271,8 +303,8 @@ func (c *Creator) Context() DrawContext { return c.context } -// Call before writing out. Takes care of adding headers and footers, as well as generating front -// Page and table of contents. +// Call before writing out. Takes care of adding headers and footers, as well +// as generating front Page and table of contents. func (c *Creator) finalize() error { totPages := len(c.pages) @@ -281,16 +313,18 @@ func (c *Creator) finalize() error { if c.genFrontPageFunc != nil { genpages++ } - if c.genTableOfContentFunc != nil { + if c.AddTOC { c.initContext() c.context.Page = genpages + 1 - ch, err := c.genTableOfContentFunc(c.toc) - if err != nil { - return err + + if c.genTableOfContentFunc != nil { + if err := c.genTableOfContentFunc(c.toc); err != nil { + return err + } } // Make an estimate of the number of pages. - blocks, _, err := ch.GeneratePageBlocks(c.context) + blocks, _, err := c.toc.GeneratePageBlocks(c.context) if err != nil { common.Log.Debug("Failed to generate blocks: %v", err) return err @@ -298,12 +332,15 @@ func (c *Creator) finalize() error { genpages += len(blocks) // Update the table of content Page numbers, accounting for front Page and TOC. - for idx := range c.toc.entries { - c.toc.entries[idx].PageNumber += genpages - } + lines := c.toc.Lines() + for _, line := range lines { + pageNum, err := strconv.Atoi(line.Page.Text) + if err != nil { + continue + } - // Remove the TOC chapter entry. - c.toc.entries = c.toc.entries[:len(c.toc.entries)-1] + line.Page.Text = strconv.Itoa(pageNum + genpages) + } } hasFrontPage := false @@ -323,17 +360,17 @@ func (c *Creator) finalize() error { hasFrontPage = true } - if c.genTableOfContentFunc != nil { + if c.AddTOC { c.initContext() - ch, err := c.genTableOfContentFunc(c.toc) - if err != nil { - common.Log.Debug("Error generating TOC: %v", err) - return err - } - ch.SetShowNumbering(false) - ch.SetIncludeInTOC(false) - blocks, _, _ := ch.GeneratePageBlocks(c.context) + if c.genTableOfContentFunc != nil { + if err := c.genTableOfContentFunc(c.toc); err != nil { + common.Log.Debug("Error generating TOC: %v", err) + return err + } + } + + blocks, _, _ := c.toc.GeneratePageBlocks(c.context) tocpages := []*model.PdfPage{} for _, block := range blocks { block.SetPos(0, 0) @@ -459,13 +496,15 @@ func (c *Creator) Draw(d Drawable) error { return nil } -// Write output of creator to io.WriteSeeker interface. -func (c *Creator) Write(ws io.WriteSeeker) error { +// Write output of creator to io.Writer interface. +func (c *Creator) Write(ws io.Writer) error { if !c.finalized { c.finalize() } pdfWriter := model.NewPdfWriter() + pdfWriter.SetOptimizer(c.optimizer) + // Form fields. if c.acroForm != nil { err := pdfWriter.SetForms(c.acroForm) diff --git a/pdf/creator/creator_test.go b/pdf/creator/creator_test.go index b7bc44ef..2e707641 100644 --- a/pdf/creator/creator_test.go +++ b/pdf/creator/creator_test.go @@ -10,10 +10,12 @@ package creator // if every detail is correct. import ( + "bytes" "fmt" goimage "image" "io/ioutil" "math" + "os" "testing" "github.com/boombuler/barcode" @@ -23,6 +25,7 @@ import ( "github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/internal/textencoding" "github.com/unidoc/unidoc/pdf/model" + "github.com/unidoc/unidoc/pdf/model/optimize" ) func init() { @@ -757,6 +760,17 @@ func TestChapterMargins(t *testing.T) { func TestSubchaptersSimple(t *testing.T) { c := New() + // Enable table of contents and set the style of the lines. + c.AddTOC = true + + lineStyle := NewTextStyle() + lineStyle.Font = model.NewStandard14FontMustCompile(model.HelveticaBold) + + toc := c.TOC() + toc.SetLineStyle(lineStyle) + toc.SetLineMargins(0, 0, 3, 3) + + // Add chapters. ch1 := c.NewChapter("Introduction") subchap1 := c.NewSubchapter(ch1, "The fundamentals of the mastery of the most genious experiment of all times in modern world history. The story of the maker and the maker bot and the genius cow.") subchap1.SetMargins(0, 0, 5, 0) @@ -814,47 +828,33 @@ func TestSubchaptersSimple(t *testing.T) { c.Draw(p) }) - // Set a function to create the table of contents. - // Should be able to wrap.. - c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) { - ch := c.NewChapter("Table of contents") - ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5)) - ch.GetHeading().SetFontSize(28) - ch.GetHeading().SetMargins(0, 0, 0, 30) + // The table of contents is created automatically if the + // AddTOC property of the creator is set to true. + // This function is used just to customize the style of the TOC. + c.CreateTableOfContents(func(toc *TOC) error { + // Set style of TOC heading just before render. + style := NewTextStyle() + style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5) + style.FontSize = 20 - table := NewTable(2) // 2 column table. - // Default, equal column sizes (4x0.25)... - table.SetColumnWidths(0.9, 0.1) + toc.SetHeading("Table of Contents", style) - for _, entry := range toc.entries { - // Col 1. Chapter number, title. - var str string - if entry.Subchapter == 0 { - str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title) - } else { - str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title) - } - p := NewParagraph(str) - p.SetFontSize(14) - cell := table.NewCell() - cell.SetContent(p) - // Set the paragraph width to the cell width. - p.SetWidth(cell.Width(c.Context())) - table.SetRowHeight(table.CurRow(), p.Height()*1.2) + // Set style of TOC lines just before render. + lineStyle := NewTextStyle() + lineStyle.FontSize = 14 - // Col 1. Page number. - p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber)) - p.SetFontSize(14) - cell = table.NewCell() - cell.SetContent(p) - } - err := ch.Add(table) - if err != nil { - fmt.Printf("Error adding table: %v\n", err) - return nil, err + pageStyle := lineStyle + pageStyle.Font = model.NewStandard14FontMustCompile(model.HelveticaBold) + + lines := toc.Lines() + for _, line := range lines { + line.SetStyle(lineStyle) + + // Make page part bold. + line.Page.Style = pageStyle } - return ch, nil + return nil }) err := c.WriteToFile("/tmp/3_subchapters_simple.pdf") @@ -867,6 +867,19 @@ func TestSubchaptersSimple(t *testing.T) { func TestSubchapters(t *testing.T) { c := New() + // Enable table of contents and set the style of the lines. + c.AddTOC = true + + lineStyle := NewTextStyle() + lineStyle.Font = model.NewStandard14FontMustCompile(model.Helvetica) + lineStyle.FontSize = 14 + lineStyle.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5) + + toc := c.TOC() + toc.SetLineStyle(lineStyle) + toc.SetLineMargins(0, 0, 3, 3) + + // Add chapters. ch1 := c.NewChapter("Introduction") subchap1 := c.NewSubchapter(ch1, "The fundamentals") subchap1.SetMargins(0, 0, 5, 0) @@ -928,46 +941,28 @@ func TestSubchapters(t *testing.T) { c.Draw(p) }) - // Set a function to create the table of contents. - c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) { - ch := c.NewChapter("Table of contents") - ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5)) - ch.GetHeading().SetFontSize(28) - ch.GetHeading().SetMargins(0, 0, 0, 30) + // The table of contents is created automatically if the + // AddTOC property of the creator is set to true. + // This function is used just to customize the style of the TOC. + c.CreateTableOfContents(func(toc *TOC) error { + // Set style of TOC heading just before render. + style := NewTextStyle() + style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5) + style.FontSize = 20 - table := NewTable(2) - // Default, equal column sizes (4x0.25)... - table.SetColumnWidths(0.9, 0.1) + toc.SetHeading("Table of Contents", style) - for _, entry := range toc.entries { - // Col 1. Chapter number, title. - var str string - if entry.Subchapter == 0 { - str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title) - } else { - str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title) - } - p := NewParagraph(str) - p.SetFontSize(14) - cell := table.NewCell() - cell.SetContent(p) - // Set the paragraph width to the cell width. - p.SetWidth(cell.Width(c.Context())) - table.SetRowHeight(table.CurRow(), p.Height()*1.2) + // Set style of TOC lines just before render. + pageStyle := NewTextStyle() + pageStyle.Font = model.NewStandard14FontMustCompile(model.HelveticaBold) + pageStyle.FontSize = 10 - // Col 1. Page number. - p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber)) - p.SetFontSize(14) - cell = table.NewCell() - cell.SetContent(p) - } - err := ch.Add(table) - if err != nil { - fmt.Printf("Error adding table: %v\n", err) - return nil, err + lines := toc.Lines() + for _, line := range lines { + line.Page.Style = pageStyle } - return ch, nil + return nil }) addHeadersAndFooters(c) @@ -2132,4 +2127,775 @@ func TestEncrypting1(t *testing.T) { t.Errorf("Fail: %v\n", err) return } + + // Try reading generated PDF and ensure encryption is OK. + // Try writing out to memory and opening with password. + var buf bytes.Buffer + err = c.Write(&buf) + if err != nil { + t.Fatalf("Error: %v", err) + } + r, err := model.NewPdfReader(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatalf("Error: %v", err) + } + isEnc, err := r.IsEncrypted() + if err != nil { + t.Fatalf("Error: %v", err) + } + if !isEnc { + t.Fatalf("Error: Should be encrypted") + } + ok, err := r.Decrypt([]byte("password")) + if err != nil { + t.Fatalf("Error: %v", err) + } + if !ok { + t.Fatalf("Failed to decrypt") + } + numpages, err := r.GetNumPages() + if err != nil { + t.Fatalf("Error: %v", err) + } + if numpages <= 0 { + t.Fatalf("Pages should be 1+") + } +} + +// TestOptimizeCombineDuplicateStreams tests optimizing PDFs to reduce output file size. +func TestOptimizeCombineDuplicateStreams(t *testing.T) { + c := createPdf4Optimization(t) + + err := c.WriteToFile("/tmp/7_combine_duplicate_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createPdf4Optimization(t) + + c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateStreams: true})) + + err = c.WriteToFile("/tmp/7_combine_duplicate_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/7_combine_duplicate_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/7_combine_duplicate_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestOptimizeImageQuality tests optimizing PDFs to reduce output file size. +func TestOptimizeImageQuality(t *testing.T) { + c := New() + + imgDataJpeg, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + imgJpeg, err := NewImageFromData(imgDataJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + // JPEG encoder (DCT) with quality factor 70. + encoder := core.NewDCTEncoder() + encoder.Quality = 100 + encoder.Width = int(imgJpeg.Width()) + encoder.Height = int(imgJpeg.Height()) + imgJpeg.SetEncoder(encoder) + + imgJpeg.SetPos(250, 350) + imgJpeg.Scale(0.25, 0.25) + + err = c.Draw(imgJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + err = c.WriteToFile("/tmp/8_image_quality_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.SetOptimizer(optimize.New(optimize.Options{ImageQuality: 20})) + + err = c.WriteToFile("/tmp/8_image_quality_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/8_image_quality_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/8_image_quality_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +func createPdf4Optimization(t *testing.T) *Creator { + c := New() + + p := NewParagraph("Test text1") + // Change to times bold font (default is helvetica). + font, err := model.NewStandard14Font(model.CourierBold) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + p.SetFont(font) + p.SetPos(15, 15) + _ = c.Draw(p) + + imgData, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img, err := NewImageFromData(imgData) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img.SetPos(0, 100) + img.ScaleToWidth(1.0 * c.Width()) + + err = c.Draw(img) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img1, err := NewImageFromData(imgData) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img1.SetPos(0, 200) + img1.ScaleToWidth(1.0 * c.Width()) + + err = c.Draw(img1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + imgData2, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img2, err := NewImageFromData(imgData2) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img2.SetPos(0, 500) + img2.ScaleToWidth(1.0 * c.Width()) + + c.NewPage() + p = NewParagraph("Test text2") + // Change to times bold font (default is helvetica). + font, err = model.NewStandard14Font(model.Helvetica) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + p.SetFont(font) + p.SetPos(15, 15) + _ = c.Draw(p) + + err = c.Draw(img2) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + return c +} + +// TestOptimizeUseObjectStreams tests optimizing PDFs to reduce output file size. +func TestOptimizeUseObjectStreams(t *testing.T) { + c := createPdf4Optimization(t) + + err := c.WriteToFile("/tmp/9_use_object_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createPdf4Optimization(t) + c.SetOptimizer(optimize.New(optimize.Options{UseObjectStreams: true})) + + err = c.WriteToFile("/tmp/9_use_object_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/9_use_object_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/9_use_object_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestCombineDuplicateDirectObjects tests optimizing PDFs to reduce output file size. +func TestCombineDuplicateDirectObjects(t *testing.T) { + + createDoc := func() *Creator { + c := New() + c.AddTOC = true + + ch1 := c.NewChapter("Introduction") + subchap1 := c.NewSubchapter(ch1, "The fundamentals") + subchap1.SetMargins(0, 0, 5, 0) + + //subCh1 := NewSubchapter(ch1, "Workflow") + + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + p.SetTextAlignment(TextAlignmentJustify) + p.SetMargins(0, 0, 5, 0) + + for j := 0; j < 2; j++ { + subchap1.Add(p) + } + + subchap2 := c.NewSubchapter(ch1, "Mechanism") + subchap2.SetMargins(0, 0, 5, 0) + for j := 0; j < 3; j++ { + subchap2.Add(p) + } + + subchap3 := c.NewSubchapter(ch1, "Discussion") + subchap3.SetMargins(0, 0, 5, 0) + for j := 0; j < 4; j++ { + subchap3.Add(p) + } + + subchap4 := c.NewSubchapter(ch1, "Conclusion") + subchap4.SetMargins(0, 0, 5, 0) + for j := 0; j < 3; j++ { + subchap4.Add(p) + } + c.Draw(ch1) + + for i := 0; i < 5; i++ { + ch2 := c.NewChapter("References") + ch2.SetMargins(1, 1, 1, 1) + for j := 0; j < 13; j++ { + ch2.Add(p) + } + metadata := core.MakeDict() + metadata.Set(core.PdfObjectName("TEST"), core.MakeString("---------------- ## ----------------")) + c.Draw(ch2) + c.getActivePage().Metadata = metadata + } + + // Set a function to create the front Page. + c.CreateFrontPage(func(args FrontpageFunctionArgs) { + p := NewParagraph("Example Report") + p.SetWidth(c.Width()) + p.SetTextAlignment(TextAlignmentCenter) + p.SetFontSize(32) + p.SetPos(0, 300) + c.Draw(p) + + p.SetFontSize(22) + p.SetText("Example Report Data Results") + p.SetPos(0, 340) + c.Draw(p) + }) + + // The table of contents is created automatically if the + // AddTOC property of the creator is set to true. + // This function is used just to customize the style of the TOC. + c.CreateTableOfContents(func(toc *TOC) error { + style := NewTextStyle() + style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5) + style.FontSize = 20 + + toc.SetHeading("Table of Contents", style) + return nil + }) + + addHeadersAndFooters(c) + return c + } + + c := createDoc() + + err := c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createDoc() + c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateDirectObjects: true})) + + err = c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestOptimizeImagePPI tests optimizing PDFs to reduce output file size. +func TestOptimizeImagePPI(t *testing.T) { + c := New() + + imgDataJpeg, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + imgJpeg, err := NewImageFromData(imgDataJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + // JPEG encoder (DCT) with quality factor 100. + encoder := core.NewDCTEncoder() + encoder.Quality = 100 + encoder.Width = int(imgJpeg.Width()) + encoder.Height = int(imgJpeg.Height()) + imgJpeg.SetEncoder(encoder) + + imgJpeg.SetPos(250, 350) + imgJpeg.Scale(0.25, 0.25) + + err = c.Draw(imgJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.NewPage() + + imgData, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + } + + img, err := NewImageFromData(imgData) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + } + + img.SetPos(0, 100) + img.ScaleToWidth(0.1 * c.Width()) + + err = c.Draw(img) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + } + + err = c.Draw(imgJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + err = c.WriteToFile("/tmp/11_image_ppi_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.SetOptimizer(optimize.New(optimize.Options{ImageUpperPPI: 144})) + + err = c.WriteToFile("/tmp/11_image_ppi_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/11_image_ppi_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/11_image_ppi_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestCombineIdenticalIndirectObjects tests optimizing PDFs to reduce output file size. +func TestCombineIdenticalIndirectObjects(t *testing.T) { + c := New() + c.AddTOC = true + + ch1 := c.NewChapter("Introduction") + subchap1 := c.NewSubchapter(ch1, "The fundamentals") + subchap1.SetMargins(0, 0, 5, 0) + + //subCh1 := NewSubchapter(ch1, "Workflow") + + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + p.SetTextAlignment(TextAlignmentJustify) + p.SetMargins(0, 0, 5, 0) + for j := 0; j < 5; j++ { + subchap1.Add(p) + } + + subchap2 := c.NewSubchapter(ch1, "Mechanism") + subchap2.SetMargins(0, 0, 5, 0) + for j := 0; j < 15; j++ { + subchap2.Add(p) + } + + subchap3 := c.NewSubchapter(ch1, "Discussion") + subchap3.SetMargins(0, 0, 5, 0) + for j := 0; j < 19; j++ { + subchap3.Add(p) + } + + subchap4 := c.NewSubchapter(ch1, "Conclusion") + subchap4.SetMargins(0, 0, 5, 0) + for j := 0; j < 23; j++ { + subchap4.Add(p) + } + + c.Draw(ch1) + + for i := 0; i < 50; i++ { + ch2 := c.NewChapter("References") + for j := 0; j < 13; j++ { + ch2.Add(p) + } + + c.Draw(ch2) + } + + // Set a function to create the front Page. + c.CreateFrontPage(func(args FrontpageFunctionArgs) { + p := NewParagraph("Example Report") + p.SetWidth(c.Width()) + p.SetTextAlignment(TextAlignmentCenter) + p.SetFontSize(32) + p.SetPos(0, 300) + c.Draw(p) + + p.SetFontSize(22) + p.SetText("Example Report Data Results") + p.SetPos(0, 340) + c.Draw(p) + }) + + // The table of contents is created automatically if the + // AddTOC property of the creator is set to true. + // This function is used just to customize the style of the TOC. + c.CreateTableOfContents(func(toc *TOC) error { + style := NewTextStyle() + style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5) + style.FontSize = 20 + + toc.SetHeading("Table of Contents", style) + return nil + }) + + addHeadersAndFooters(c) + + err := c.WriteToFile("/tmp/12_identical_indirect_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.SetOptimizer(optimize.New(optimize.Options{CombineIdenticalIndirectObjects: true})) + + err = c.WriteToFile("/tmp/12_identical_indirect_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/12_identical_indirect_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/12_identical_indirect_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestCompressStreams tests optimizing PDFs to reduce output file size. +func TestCompressStreams(t *testing.T) { + createDoc := func() *Creator { + c := New() + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt" + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore" + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + + p.SetMargins(0, 0, 5, 0) + c.Draw(p) + //c.NewPage() + + page := c.pages[0] + // Need to add Arial to the page resources to avoid generating invalid PDF (avoid build fail). + times := model.NewStandard14FontMustCompile(model.TimesRoman) + page.Resources.SetFontByName("Times", times.ToPdfObject()) + page.AddContentStreamByString(`BT +/Times 56 Tf +20 600 Td +(The multiline example text)Tj +/Times 30 Tf +0 30 Td +60 TL +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +ET`) + return c + } + + c := createDoc() + + err := c.WriteToFile("/tmp/13_compress_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createDoc() + c.SetOptimizer(optimize.New(optimize.Options{CompressStreams: true})) + + err = c.WriteToFile("/tmp/13_compress_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/13_compress_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/13_compress_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestAllOptimizations tests optimizing PDFs to reduce output file size. +func TestAllOptimizations(t *testing.T) { + + createDoc := func() *Creator { + c := New() + c.AddTOC = true + + ch1 := c.NewChapter("Introduction") + subchap1 := c.NewSubchapter(ch1, "The fundamentals") + subchap1.SetMargins(0, 0, 5, 0) + + //subCh1 := NewSubchapter(ch1, "Workflow") + + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + p.SetTextAlignment(TextAlignmentJustify) + p.SetMargins(0, 0, 5, 0) + for j := 0; j < 7; j++ { + subchap1.Add(p) + } + + subchap2 := c.NewSubchapter(ch1, "Mechanism") + subchap2.SetMargins(0, 0, 5, 0) + for j := 0; j < 15; j++ { + subchap2.Add(p) + } + + subchap3 := c.NewSubchapter(ch1, "Discussion") + subchap3.SetMargins(0, 0, 5, 0) + for j := 0; j < 19; j++ { + subchap3.Add(p) + } + + subchap4 := c.NewSubchapter(ch1, "Conclusion") + subchap4.SetMargins(0, 0, 5, 0) + for j := 0; j < 23; j++ { + subchap4.Add(p) + } + + c.Draw(ch1) + + for i := 0; i < 50; i++ { + ch2 := c.NewChapter("References") + for j := 0; j < 13; j++ { + ch2.Add(p) + } + + c.Draw(ch2) + } + + // Set a function to create the front Page. + c.CreateFrontPage(func(args FrontpageFunctionArgs) { + p := NewParagraph("Example Report") + p.SetWidth(c.Width()) + p.SetTextAlignment(TextAlignmentCenter) + p.SetFontSize(32) + p.SetPos(0, 300) + c.Draw(p) + + p.SetFontSize(22) + p.SetText("Example Report Data Results") + p.SetPos(0, 340) + c.Draw(p) + }) + + // The table of contents is created automatically if the + // AddTOC property of the creator is set to true. + // This function is used just to customize the style of the TOC. + c.CreateTableOfContents(func(toc *TOC) error { + style := NewTextStyle() + style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5) + style.FontSize = 20 + + toc.SetHeading("Table of Contents", style) + + return nil + }) + + addHeadersAndFooters(c) + return c + } + + c := createDoc() + + err := c.WriteToFile("/tmp/14_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createDoc() + c.SetOptimizer(optimize.New(optimize.Options{ + CombineDuplicateDirectObjects: true, + CombineIdenticalIndirectObjects: true, + ImageUpperPPI: 50.0, + UseObjectStreams: true, + ImageQuality: 50, + CombineDuplicateStreams: true, + CompressStreams: true, + })) + + err = c.WriteToFile("/tmp/14_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/14_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/14_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } } diff --git a/pdf/creator/division_test.go b/pdf/creator/division_test.go index dbf2888e..dcea574f 100644 --- a/pdf/creator/division_test.go +++ b/pdf/creator/division_test.go @@ -139,13 +139,14 @@ func TestDivInline(t *testing.T) { style.Color = ColorRGBFrom8bit(0, 0, 255) s := NewStyledParagraph("This styled paragraph should ", style) + s.SetEnableWrap(false) style.Color = ColorRGBFrom8bit(255, 0, 0) s.Append("fit", style) style.Color = ColorRGBFrom8bit(0, 255, 0) style.Font = fontBold - s.Append(" right in.", style) + s.Append(" in.", style) div.Add(s) diff --git a/pdf/creator/paragraph.go b/pdf/creator/paragraph.go index 77b1dcf6..3b8ee8da 100644 --- a/pdf/creator/paragraph.go +++ b/pdf/creator/paragraph.go @@ -85,7 +85,7 @@ func NewParagraph(text string) *Paragraph { // TODO: Can we wrap intellectually, only if given width is known? - p.enableWrap = false + p.enableWrap = true p.defaultWrap = true p.SetColor(ColorRGBFrom8bit(0, 0, 0)) p.alignment = TextAlignmentLeft @@ -187,13 +187,12 @@ func (p *Paragraph) GetMargins() (float64, float64, float64, float64) { // text can extend to prior to wrapping over to next line. func (p *Paragraph) SetWidth(width float64) { p.wrapWidth = width - p.enableWrap = true p.wrapText() } // Width returns the width of the Paragraph. func (p *Paragraph) Width() float64 { - if p.enableWrap { + if p.enableWrap && int(p.wrapWidth) > 0 { return p.wrapWidth } return p.getTextWidth() / 1000.0 @@ -236,10 +235,54 @@ func (p *Paragraph) getTextWidth() float64 { return w } +// getTextLineWidth calculates the text width of a provided line of text. +func (p *Paragraph) getTextLineWidth(line string) float64 { + var width float64 + for _, r := range line { + glyph, found := p.textFont.Encoder().RuneToGlyph(r) + if !found { + common.Log.Debug("ERROR: Glyph not found for rune: 0x%04x=%c", r, r) + return -1 // XXX/FIXME: return error. + } + + // Ignore newline for this.. Handles as if all in one line. + if glyph == "controlLF" { + continue + } + + metrics, found := p.textFont.GetGlyphCharMetrics(glyph) + if !found { + common.Log.Debug("ERROR: Glyph char metrics not found! %q (rune 0x%04x=%c)", glyph, r, r) + return -1 // XXX/FIXME: return error. + } + + width += p.fontSize * metrics.Wx + } + + return width +} + +// getMaxLineWidth returns the width of the longest line of text in the paragraph. +func (p *Paragraph) getMaxLineWidth() float64 { + if p.textLines == nil || len(p.textLines) == 0 { + p.wrapText() + } + + var width float64 + for _, line := range p.textLines { + w := p.getTextLineWidth(line) + if w > width { + width = w + } + } + + return width +} + // Simple algorithm to wrap the text into lines (greedy algorithm - fill the lines). // XXX/TODO: Consider the Knuth/Plass algorithm or an alternative. func (p *Paragraph) wrapText() error { - if !p.enableWrap { + if !p.enableWrap || int(p.wrapWidth) <= 0 { p.textLines = []string{p.text} return nil } @@ -367,7 +410,7 @@ func (p *Paragraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, } } else { // Absolute. - if p.wrapWidth == 0 { + if int(p.wrapWidth) <= 0 { // Use necessary space. p.SetWidth(p.getTextWidth()) } diff --git a/pdf/creator/styled_paragraph.go b/pdf/creator/styled_paragraph.go index 698f83ac..86707a4a 100644 --- a/pdf/creator/styled_paragraph.go +++ b/pdf/creator/styled_paragraph.go @@ -20,10 +20,10 @@ import ( // StyledParagraph represents text drawn with a specified font and can wrap across lines and pages. // By default occupies the available width in the drawing context. type StyledParagraph struct { - // Text chunks with styles that compose the paragraph + // Text chunks with styles that compose the paragraph. chunks []TextChunk - // Style used for the paragraph for spacing and offsets + // Style used for the paragraph for spacing and offsets. defaultStyle TextStyle // The text encoder which can convert the text (as runes) into a series of glyphs and get character metrics. @@ -63,6 +63,9 @@ type StyledParagraph struct { // Text chunk lines after wrapping to available width. lines [][]TextChunk + + // Before render callback. + beforeRender func(p *StyledParagraph, ctx DrawContext) } // NewStyledParagraph creates a new styled paragraph. @@ -104,6 +107,23 @@ func (p *StyledParagraph) Append(text string, style TextStyle) { p.wrapText() } +// Insert adds a new text chunk at the specified position in the paragraph. +func (p *StyledParagraph) Insert(index uint, text string, style TextStyle) { + l := uint(len(p.chunks)) + if index > l { + index = l + } + + chunk := TextChunk{ + Text: text, + Style: style, + } + chunk.Style.Font.SetEncoder(p.encoder) + + p.chunks = append(p.chunks[:index], append([]TextChunk{chunk}, p.chunks[index:]...)...) + p.wrapText() +} + // Reset sets the entire text and also the style of the paragraph // to those specified. It behaves as if the paragraph was a new one. func (p *StyledParagraph) Reset(text string, style TextStyle) { @@ -173,7 +193,7 @@ func (p *StyledParagraph) SetWidth(width float64) { // Width returns the width of the Paragraph. func (p *StyledParagraph) Width() float64 { - if p.enableWrap { + if p.enableWrap && int(p.wrapWidth) > 0 { return p.wrapWidth } @@ -238,6 +258,58 @@ func (p *StyledParagraph) getTextWidth() float64 { return width } +// getTextLineWidth calculates the text width of a provided collection of text chunks. +func (p *StyledParagraph) getTextLineWidth(line []TextChunk) float64 { + var width float64 + for _, chunk := range line { + style := &chunk.Style + + for _, r := range chunk.Text { + glyph, found := p.encoder.RuneToGlyph(r) + if !found { + common.Log.Debug("Error! Glyph not found for rune: %s\n", r) + + // XXX/FIXME: return error. + return -1 + } + + // Ignore newline for this.. Handles as if all in one line. + if glyph == "controlLF" { + continue + } + + metrics, found := style.Font.GetGlyphCharMetrics(glyph) + if !found { + common.Log.Debug("Glyph char metrics not found! %s\n", glyph) + + // XXX/FIXME: return error. + return -1 + } + + width += style.FontSize * metrics.Wx + } + } + + return width +} + +// getMaxLineWidth returns the width of the longest line of text in the paragraph. +func (p *StyledParagraph) getMaxLineWidth() float64 { + if p.lines == nil || len(p.lines) == 0 { + p.wrapText() + } + + var width float64 + for _, line := range p.lines { + w := p.getTextLineWidth(line) + if w > width { + width = w + } + } + + return width +} + // getTextHeight calculates the text height as if all in one line (not taking wrapping into account). func (p *StyledParagraph) getTextHeight() float64 { var height float64 @@ -255,7 +327,7 @@ func (p *StyledParagraph) getTextHeight() float64 { // fill the lines. // XXX/TODO: Consider the Knuth/Plass algorithm or an alternative. func (p *StyledParagraph) wrapText() error { - if !p.enableWrap { + if !p.enableWrap || int(p.wrapWidth) <= 0 { p.lines = [][]TextChunk{p.chunks} return nil } @@ -281,7 +353,7 @@ func (p *StyledParagraph) wrapText() error { } // newline wrapping. - if glyph == "controllf" { + if glyph == "controlLF" { // moves to next line. line = append(line, TextChunk{ Text: strings.TrimRightFunc(string(part), unicode.IsSpace), @@ -407,7 +479,7 @@ func (p *StyledParagraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCon } } else { // Absolute. - if p.wrapWidth == 0 { + if int(p.wrapWidth) <= 0 { // Use necessary space. p.SetWidth(p.getTextWidth()) } @@ -415,6 +487,10 @@ func (p *StyledParagraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCon ctx.Y = p.yPos } + if p.beforeRender != nil { + p.beforeRender(p, ctx) + } + // Place the Paragraph on the template at position (x,y) based on the ctx. ctx, err := drawStyledParagraphOnBlock(blk, p, ctx) if err != nil { @@ -434,7 +510,7 @@ func (p *StyledParagraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCon // Draw block on specified location on Page, adding to the content stream. func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) (DrawContext, error) { - // Find first free index for the font resources of the paragraph + // Find first free index for the font resources of the paragraph. num := 1 fontName := core.PdfObjectName(fmt.Sprintf("Font%d", num)) for blk.resources.HasFontByName(fontName) { @@ -442,7 +518,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) fontName = core.PdfObjectName(fmt.Sprintf("Font%d", num)) } - // Add default font to the page resources + // Add default font to the page resources. err := blk.resources.SetFontByName(fontName, p.defaultStyle.Font.ToPdfObject()) if err != nil { return ctx, err @@ -455,7 +531,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) // Wrap the text into lines. p.wrapText() - // Add the fonts of all chunks to the page resources + // Add the fonts of all chunks to the page resources. fonts := [][]core.PdfObjectName{} for _, line := range p.lines { @@ -539,19 +615,22 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) spaces += chunkSpaces } - // Add line shifts + // Add line shifts. objs := []core.PdfObject{} + + wrapWidth := p.wrapWidth * 1000.0 if p.alignment == TextAlignmentJustify { - // Not to justify last line. + // Do not justify last line. if spaces > 0 && !isLastLine { - spaceWidth = (p.wrapWidth*1000.0 - width) / float64(spaces) / defaultFontSize + spaceWidth = (wrapWidth - width) / float64(spaces) / defaultFontSize } } else if p.alignment == TextAlignmentCenter { - // Start with a shift. - shift := (p.wrapWidth*1000.0 - width - spaceWidth) / 2 / defaultFontSize + // Start with an offset of half of the remaining line space. + shift := (wrapWidth - width - spaceWidth) / 2 / defaultFontSize objs = append(objs, core.MakeFloat(-shift)) } else if p.alignment == TextAlignmentRight { - shift := (p.wrapWidth*1000.0 - width - spaceWidth) / defaultFontSize + // Push the text at the end of the line. + shift := (wrapWidth - width - spaceWidth) / defaultFontSize objs = append(objs, core.MakeFloat(-shift)) } @@ -561,7 +640,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) Add_TJ(objs...) } - // Render line text chunks + // Render line text chunks. for k, chunk := range line { style := &chunk.Style diff --git a/pdf/creator/styled_paragraph_test.go b/pdf/creator/styled_paragraph_test.go index 38bc5d1d..c756fd8c 100644 --- a/pdf/creator/styled_paragraph_test.go +++ b/pdf/creator/styled_paragraph_test.go @@ -128,6 +128,111 @@ func TestParagraphRegularVsStyled(t *testing.T) { cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) cell.SetContent(s) + // Test table cell alignment. + style = NewTextStyle() + + // Test left alignment with paragraph wrapping enabled. + p = NewParagraph("Wrap enabled. This text should be left aligned.") + p.SetEnableWrap(true) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft) + cell.SetContent(p) + + s = NewStyledParagraph("Wrap enabled. This text should be left aligned.", style) + s.SetEnableWrap(true) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft) + cell.SetContent(s) + + // Test left alignment with paragraph wrapping disabled. + p = NewParagraph("Wrap disabled. This text should be left aligned.") + p.SetEnableWrap(false) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft) + cell.SetContent(p) + + s = NewStyledParagraph("Wrap disabled. This text should be left aligned.", style) + s.SetEnableWrap(false) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft) + cell.SetContent(s) + + // Test center alignment with paragraph wrapping enabled. + p = NewParagraph("Wrap enabled. This text should be center aligned.") + p.SetEnableWrap(true) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter) + cell.SetContent(p) + + s = NewStyledParagraph("Wrap enabled. This text should be center aligned.", style) + s.SetEnableWrap(true) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter) + cell.SetContent(s) + + // Test center alignment with paragraph wrapping disabled. + p = NewParagraph("Wrap disabled. This text should be center aligned.") + p.SetEnableWrap(false) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter) + cell.SetContent(p) + + s = NewStyledParagraph("Wrap disabled. This text should be center aligned.", style) + s.SetEnableWrap(false) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter) + cell.SetContent(s) + + // Test right alignment with paragraph wrapping enabled. + p = NewParagraph("Wrap enabled. This text should be right aligned.") + p.SetEnableWrap(true) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentRight) + cell.SetContent(p) + + s = NewStyledParagraph("Wrap enabled. This text should be right aligned.", style) + s.SetEnableWrap(true) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentRight) + cell.SetContent(s) + + // Test right alignment with paragraph wrapping disabled. + p = NewParagraph("Wrap disabled. This text should be right aligned.") + p.SetEnableWrap(false) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentRight) + cell.SetContent(p) + + s = NewStyledParagraph("Wrap disabled. This text should be right aligned.", style) + s.SetEnableWrap(false) + + cell = table.NewCell() + cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1) + cell.SetHorizontalAlignment(CellHorizontalAlignmentRight) + cell.SetContent(s) + // Draw table. err = c.Draw(table) if err != nil { diff --git a/pdf/creator/subchapter.go b/pdf/creator/subchapter.go index 81758b55..49371617 100644 --- a/pdf/creator/subchapter.go +++ b/pdf/creator/subchapter.go @@ -7,6 +7,7 @@ package creator import ( "fmt" + "strconv" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/model" @@ -38,7 +39,7 @@ type Subchapter struct { margins margins // Reference to the creator's TOC. - toc *TableOfContents + toc *TOC } // NewSubchapter creates a new Subchapter under Chapter ch with specified title. @@ -154,7 +155,19 @@ func (subchap *Subchapter) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCo } if subchap.includeInTOC { // Add to TOC. - subchap.toc.add(subchap.title, subchap.chapterNum, subchap.subchapterNum, ctx.Page) + subchapNumber := "" + if subchap.chapterNum != 0 { + subchapNumber = strconv.Itoa(subchap.chapterNum) + } + if subchap.subchapterNum != 0 { + if subchapNumber != "" { + subchapNumber += "." + } + + subchapNumber += strconv.Itoa(subchap.subchapterNum) + "." + } + + subchap.toc.Add(subchapNumber, subchap.title, strconv.Itoa(ctx.Page), 2) } for _, d := range subchap.contents { diff --git a/pdf/creator/table.go b/pdf/creator/table.go index 97d6f7e1..d7deca77 100644 --- a/pdf/creator/table.go +++ b/pdf/creator/table.go @@ -362,8 +362,20 @@ func (table *Table) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, } if cell.content != nil { + // content width. + cw := cell.content.Width() + switch t := cell.content.(type) { + case *Paragraph: + if t.enableWrap { + cw = t.getMaxLineWidth() / 1000.0 + } + case *StyledParagraph: + if t.enableWrap { + cw = t.getMaxLineWidth() / 1000.0 + } + } + // Account for horizontal alignment: - cw := cell.content.Width() // content width. switch cell.horizontalAlignment { case CellHorizontalAlignmentLeft: // Account for indent. @@ -691,15 +703,15 @@ func (cell *TableCell) SetContent(vd VectorDrawable) error { switch t := vd.(type) { case *Paragraph: if t.defaultWrap { - // Default paragraph settings in table: no wrapping. - t.enableWrap = false // No wrapping. + // Enable wrapping by default. + t.enableWrap = true } cell.content = vd case *StyledParagraph: if t.defaultWrap { - // Default styled paragraph settings in table: no wrapping. - t.enableWrap = false // No wrapping. + // Enable wrapping by default. + t.enableWrap = true } cell.content = vd diff --git a/pdf/creator/text_chunk.go b/pdf/creator/text_chunk.go new file mode 100644 index 00000000..061d3174 --- /dev/null +++ b/pdf/creator/text_chunk.go @@ -0,0 +1,15 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package creator + +// TextChunk represents a chunk of text along with a particular style. +type TextChunk struct { + // The text that is being rendered in the PDF. + Text string + + // The style of the text being rendered. + Style TextStyle +} diff --git a/pdf/creator/text_style.go b/pdf/creator/text_style.go index 71446827..4ec1e9a3 100644 --- a/pdf/creator/text_style.go +++ b/pdf/creator/text_style.go @@ -35,9 +35,3 @@ func NewTextStyle() TextStyle { FontSize: 10, } } - -// TextChunk represents a chunk of text along with a particular style. -type TextChunk struct { - Text string - Style TextStyle -} diff --git a/pdf/creator/toc.go b/pdf/creator/toc.go index 108ee460..082b7cb8 100644 --- a/pdf/creator/toc.go +++ b/pdf/creator/toc.go @@ -5,39 +5,227 @@ package creator -// TableOfContents provides an overview over chapters and subchapters when creating a document with Creator. -type TableOfContents struct { - entries []TableOfContentsEntry +import "github.com/unidoc/unidoc/pdf/model/fonts" + +// TOC represents a table of contents component. +// It consists of a paragraph heading and a collection of +// table of contents lines. +// The representation of a table of contents line is as follows: +// [number] [title] [separator] [page] +// e.g.: Chapter1 Introduction ........... 1 +type TOC struct { + // The heading of the table of contents. + heading *StyledParagraph + + // The lines of the table of contents. + lines []*TOCLine + + // The style of the number part of new TOC lines. + lineNumberStyle TextStyle + + // The style of the title part of new TOC lines. + lineTitleStyle TextStyle + + // The style of the separator part of new TOC lines. + lineSeparatorStyle TextStyle + + // The style of the page part of new TOC lines. + linePageStyle TextStyle + + // The separator for new TOC lines. + lineSeparator string + + // The amount of space an indentation level occupies in a TOC line. + lineLevelOffset float64 + + // The margins of new TOC lines. + lineMargins margins + + // Positioning: relative/absolute. + positioning positioning } -// Make a new table of contents. -func newTableOfContents() *TableOfContents { - toc := TableOfContents{} - toc.entries = []TableOfContentsEntry{} - return &toc +// NewTOC creates a new table of contents. +func NewTOC(title string) *TOC { + headingStyle := NewTextStyle() + headingStyle.Font = fonts.NewFontHelveticaBold() + headingStyle.FontSize = 14 + + heading := NewStyledParagraph(title, headingStyle) + heading.SetEnableWrap(true) + heading.SetTextAlignment(TextAlignmentLeft) + heading.SetMargins(0, 0, 0, 5) + + lineStyle := NewTextStyle() + + return &TOC{ + heading: heading, + lines: []*TOCLine{}, + lineNumberStyle: lineStyle, + lineTitleStyle: lineStyle, + lineSeparatorStyle: lineStyle, + linePageStyle: lineStyle, + lineSeparator: ".", + lineLevelOffset: 10, + lineMargins: margins{0, 0, 2, 2}, + positioning: positionRelative, + } } -// Entries returns the table of content entries. -func (toc *TableOfContents) Entries() []TableOfContentsEntry { - return toc.entries +// Heading returns the heading component of the table of contents. +func (t *TOC) Heading() *StyledParagraph { + return t.heading } -// Add a TOC entry. -func (toc *TableOfContents) add(title string, chapter, subchapter, pageNum int) { - entry := TableOfContentsEntry{} - entry.Title = title - entry.Chapter = chapter - entry.Subchapter = subchapter - entry.PageNumber = pageNum - - toc.entries = append(toc.entries, entry) +// Lines returns all the lines the table of contents has. +func (t *TOC) Lines() []*TOCLine { + return t.lines } -// TableOfContentsEntry defines a single entry in the TableOfContents. -// Each entry has a title, chapter number, sub chapter (0 if chapter) and the page number. -type TableOfContentsEntry struct { - Title string - Chapter int - Subchapter int // 0 if chapter - PageNumber int // Page number +// SetHeading sets the text and the style of the heading of the TOC component. +func (t *TOC) SetHeading(text string, style TextStyle) { + t.heading.Reset(text, style) +} + +// Add adds a new line with the default style to the table of contents. +func (t *TOC) Add(number, title, page string, level uint) *TOCLine { + tl := t.AddLine(NewStyledTOCLine( + TextChunk{ + Text: number, + Style: t.lineNumberStyle, + }, + TextChunk{ + Text: title, + Style: t.lineTitleStyle, + }, + TextChunk{ + Text: page, + Style: t.linePageStyle, + }, + level, + )) + + if tl == nil { + return nil + } + + // Set line margins. + m := &t.lineMargins + tl.SetMargins(m.left, m.right, m.top, m.bottom) + + // Set line level offset. + tl.SetLevelOffset(t.lineLevelOffset) + + // Set line separator text and style. + tl.Separator.Text = t.lineSeparator + tl.Separator.Style = t.lineSeparatorStyle + + return tl +} + +// AddLine adds a new line with the provided style to the table of contents. +func (t *TOC) AddLine(line *TOCLine) *TOCLine { + if line == nil { + return nil + } + + t.lines = append(t.lines, line) + return line +} + +// SetLineSeparator sets the separator for all new lines of the table of contents. +func (t *TOC) SetLineSeparator(separator string) { + t.lineSeparator = separator +} + +// SetLineMargins sets the margins for all new lines of the table of contents. +func (t *TOC) SetLineMargins(left, right, top, bottom float64) { + m := &t.lineMargins + + m.left = left + m.right = right + m.top = top + m.bottom = bottom +} + +// SetLineStyle sets the style for all the line components: number, title, +// separator, page. The style is applied only for new lines added to the +// TOC component. +func (t *TOC) SetLineStyle(style TextStyle) { + t.SetLineNumberStyle(style) + t.SetLineTitleStyle(style) + t.SetLineSeparatorStyle(style) + t.SetLinePageStyle(style) +} + +// SetLineNumberStyle sets the style for the numbers part of all new lines +// of the table of contents. +func (t *TOC) SetLineNumberStyle(style TextStyle) { + t.lineNumberStyle = style +} + +// SetLineTitleStyle sets the style for the title part of all new lines +// of the table of contents. +func (t *TOC) SetLineTitleStyle(style TextStyle) { + t.lineTitleStyle = style +} + +// SetLineSeparatorStyle sets the style for the separator part of all new +// lines of the table of contents. +func (t *TOC) SetLineSeparatorStyle(style TextStyle) { + t.lineSeparatorStyle = style +} + +// SetLinePageStyle sets the style for the page part of all new lines +// of the table of contents. +func (t *TOC) SetLinePageStyle(style TextStyle) { + t.linePageStyle = style +} + +// SetLineLevelOffset sets the amount of space an indentation level occupies +// for all new lines of the table of contents. +func (t *TOC) SetLineLevelOffset(levelOffset float64) { + t.lineLevelOffset = levelOffset +} + +// GeneratePageBlocks generate the Page blocks. Multiple blocks are generated +// if the contents wrap over multiple pages. +func (t *TOC) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) { + origCtx := ctx + + // Generate heading blocks. + blocks, ctx, err := t.heading.GeneratePageBlocks(ctx) + if err != nil { + return blocks, ctx, err + } + + // Generate blocks for the table of contents lines. + for _, line := range t.lines { + newBlocks, c, err := line.GeneratePageBlocks(ctx) + if err != nil { + return blocks, ctx, err + } + if len(newBlocks) < 1 { + continue + } + + // The first block is always appended to the last. + blocks[len(blocks)-1].mergeBlocks(newBlocks[0]) + blocks = append(blocks, newBlocks[1:]...) + + ctx = c + } + + if t.positioning.isRelative() { + // Move back X to same start of line. + ctx.X = origCtx.X + } + + if t.positioning.isAbsolute() { + // If absolute: return original context. + return blocks, origCtx, nil + + } + + return blocks, ctx, nil } diff --git a/pdf/creator/toc_line.go b/pdf/creator/toc_line.go new file mode 100644 index 00000000..4b0f85db --- /dev/null +++ b/pdf/creator/toc_line.go @@ -0,0 +1,225 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package creator + +import ( + "strings" +) + +// TOCLine represents a line in a table of contents. +// The component can be used both in the context of a +// table of contents component and as a standalone component. +// The representation of a table of contents line is as follows: +// [number] [title] [separator] [page] +// e.g.: Chapter1 Introduction ........... 1 +type TOCLine struct { + // The underlyng styled paragraph used to render the TOC line. + sp *StyledParagraph + + // Holds the text and style of the number part of the TOC line. + Number TextChunk + + // Holds the text and style of the title part of the TOC line. + Title TextChunk + + // Holds the text and style of the separator part of the TOC line. + Separator TextChunk + + // Holds the text and style of the page part of the TOC line. + Page TextChunk + + // The left margin of the TOC line. + offset float64 + + // The indentation level of the TOC line. + level uint + + // The amount of space an indentation level occupies. + levelOffset float64 + + // Positioning: relative/absolute. + positioning positioning +} + +// NewTOCLine creates a new table of contents line with the default style. +func NewTOCLine(number, title, page string, level uint) *TOCLine { + style := NewTextStyle() + + return NewStyledTOCLine( + TextChunk{ + Text: number, + Style: style, + }, + TextChunk{ + Text: title, + Style: style, + }, + TextChunk{ + Text: page, + Style: style, + }, + level, + ) +} + +// NewStyledTOCLine creates a new table of contents line with the provided style. +func NewStyledTOCLine(number, title, page TextChunk, level uint) *TOCLine { + style := NewTextStyle() + + sp := NewStyledParagraph("", style) + sp.SetEnableWrap(true) + sp.SetTextAlignment(TextAlignmentLeft) + sp.SetMargins(0, 0, 2, 2) + + tl := &TOCLine{ + sp: sp, + Number: number, + Title: title, + Page: page, + Separator: TextChunk{ + Text: ".", + Style: style, + }, + offset: 0, + level: level, + levelOffset: 10, + positioning: positionRelative, + } + + sp.margins.left = tl.offset + float64(tl.level-1)*tl.levelOffset + sp.beforeRender = tl.prepareParagraph + return tl +} + +// SetStyle sets the style for all the line components: number, title, +// separator, page. +func (tl *TOCLine) SetStyle(style TextStyle) { + tl.Number.Style = style + tl.Title.Style = style + tl.Separator.Style = style + tl.Page.Style = style +} + +// Level returns the indentation level of the TOC line. +func (tl *TOCLine) Level() uint { + return tl.level +} + +// SetLevel sets the indentation level of the TOC line. +func (tl *TOCLine) SetLevel(level uint) { + tl.level = level + tl.sp.margins.left = tl.offset + float64(tl.level-1)*tl.levelOffset +} + +// LevelOffset returns the amount of space an indentation level occupies. +func (tl *TOCLine) LevelOffset() float64 { + return tl.levelOffset +} + +// SetLevelOffset sets the amount of space an indentation level occupies. +func (tl *TOCLine) SetLevelOffset(levelOffset float64) { + tl.levelOffset = levelOffset + tl.sp.margins.left = tl.offset + float64(tl.level-1)*tl.levelOffset +} + +// GetMargins returns the margins of the TOC line: left, right, top, bottom. +func (tl *TOCLine) GetMargins() (float64, float64, float64, float64) { + m := &tl.sp.margins + return tl.offset, m.right, m.top, m.bottom +} + +// SetMargins sets the margins TOC line. +func (tl *TOCLine) SetMargins(left, right, top, bottom float64) { + tl.offset = left + + m := &tl.sp.margins + m.left = tl.offset + float64(tl.level-1)*tl.levelOffset + m.right = right + m.top = top + m.bottom = bottom +} + +// prepareParagraph generates and adds all the components of the TOC line +// to the underlying paragraph. +func (tl *TOCLine) prepareParagraph(sp *StyledParagraph, ctx DrawContext) { + // Add text chunks to the paragraph. + title := tl.Title.Text + if tl.Number.Text != "" { + title = " " + title + } + title += " " + + page := tl.Page.Text + if page != "" { + page = " " + page + } + + sp.chunks = []TextChunk{ + tl.Number, + TextChunk{ + Text: title, + Style: tl.Title.Style, + }, + TextChunk{ + Text: page, + Style: tl.Page.Style, + }, + } + + sp.SetEncoder(sp.encoder) + sp.wrapText() + + // Insert separator. + l := len(sp.lines) + if l == 0 { + return + } + + availWidth := ctx.Width*1000 - sp.getTextLineWidth(sp.lines[l-1]) + sepWidth := sp.getTextLineWidth([]TextChunk{tl.Separator}) + sepCount := int(availWidth / sepWidth) + sepText := strings.Repeat(tl.Separator.Text, sepCount) + sepStyle := tl.Separator.Style + + sp.Insert(2, sepText, sepStyle) + + // Push page numbers to the end of the line. + availWidth = availWidth - float64(sepCount)*sepWidth + if availWidth > 500 { + spaceMetrics, found := sepStyle.Font.GetGlyphCharMetrics("space") + if found && availWidth > spaceMetrics.Wx { + spaces := int(availWidth / spaceMetrics.Wx) + if spaces > 0 { + style := sepStyle + style.FontSize = 1 + sp.Insert(2, strings.Repeat(" ", spaces), style) + } + } + } +} + +// GeneratePageBlocks generate the Page blocks. Multiple blocks are generated +// if the contents wrap over multiple pages. +func (tl *TOCLine) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) { + origCtx := ctx + + blocks, ctx, err := tl.sp.GeneratePageBlocks(ctx) + if err != nil { + return blocks, ctx, err + } + + if tl.positioning.isRelative() { + // Move back X to same start of line. + ctx.X = origCtx.X + } + + if tl.positioning.isAbsolute() { + // If absolute: return original context. + return blocks, origCtx, nil + } + + return blocks, ctx, nil +} diff --git a/pdf/creator/toc_test.go b/pdf/creator/toc_test.go new file mode 100644 index 00000000..12234dfc --- /dev/null +++ b/pdf/creator/toc_test.go @@ -0,0 +1,109 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ +package creator + +import ( + "testing" + + "github.com/unidoc/unidoc/pdf/model/fonts" +) + +func TestTOCAdvanced(t *testing.T) { + fontHelvetica := fonts.NewFontHelvetica() + fontHelveticaBold := fonts.NewFontHelveticaBold() + + c := New() + c.NewPage() + + toc := NewTOC("Table of Contents") + + // Set separator and margins for all the lines. + toc.SetLineSeparator(".") + toc.SetLineMargins(0, 0, 2, 2) + toc.SetLineLevelOffset(12) + + // Set style for all line numbers. + style := NewTextStyle() + style.Font = fontHelveticaBold + style.Color = ColorRGBFrom8bit(100, 100, 100) + toc.SetLineNumberStyle(style) + + // Set style for all line pages. + style.Font = fontHelveticaBold + style.Color = ColorRGBFrom8bit(0, 0, 0) + toc.SetLinePageStyle(style) + + // Set style for all line titles. + style.Font = fontHelveticaBold + toc.SetLineTitleStyle(style) + + // Set style for all line separators. + style.Font = fontHelvetica + style.FontSize = 9 + toc.SetLineSeparatorStyle(style) + + // Add TOC lines. + tl := toc.Add("", "Abstract", "i", 1) + tl.Title.Style.Font = fontHelveticaBold + tl.SetMargins(0, 0, 5, 5) + + toc.Add("", "Aknowledgements", "ii", 1) + toc.Add("", "Table of Contents", "iii", 1) + + // Customize line style. + red := ColorRGBFrom8bit(255, 0, 0) + + tl = toc.Add("Chapter 1:", "Introduction", "1", 1) + tl.Title.Style.Font = fontHelveticaBold + tl.Title.Style.Color = red + tl.Number.Style.Color = red + tl.Page.Style.Color = red + tl.Separator.Style.Color = red + + // Set style for all line titles. + style.Font = fontHelvetica + style.FontSize = 10 + toc.SetLineTitleStyle(style) + + // Set another style for the line page part. + style.Font = fontHelvetica + toc.SetLinePageStyle(style) + + toc.Add("1.1", "Second Harmonic Generation (SHG)", "1", 2) + toc.Add("1.1.1", "Nonlinear induced polarization", "1", 3) + toc.Add("1.1.2", "Phase matching of the fundamental and emission waves", "2", 3) + toc.Add("1.1.3", "Collagen as an intrinsic biomarker for SHG generation", "3", 3) + toc.Add("1.1.4", "Second harmonic imaging microscopy", "6", 3) + toc.Add("1.2", "Light propagation in tissues", "8", 2) + toc.Add("1.2.1", "Radiative transfer equation for modeling light propagation in tissue", "8", 3) + toc.Add("1.2.2", "Monte Carlo method as a convenient and flexible solution to the RTE for modeling light transport\nin multi layered tissues", "10", 3) + toc.Add("1.2.3", "Measurement of optical properties", "15", 3) + toc.Add("1.2.4", "Analytical solution of light scattering: The Born aproximation", "19", 3) + toc.Add("1.2.5", "Refractive index corellation functions to describe light scattering in tissue", "21", 3) + toc.Add("1.3", "SHG creation and emission directionality", "24", 2) + toc.Add("1.4", "Combining SGH creation and emission directionality", "26", 2) + toc.Add("1.5", "Utilizing light to characterize tissue structure", "26", 2) + + // Make line page part bold again. + style.Font = fontHelveticaBold + toc.SetLinePageStyle(style) + + // Customize line style. + tl = toc.Add("", "References", "28", 1) + tl.Title.Style.Font = fontHelveticaBold + tl.Separator.Style.Font = fontHelveticaBold + tl.SetMargins(0, 0, 5, 0) + + err := c.Draw(toc) + if err != nil { + t.Fatalf("Error drawing: %v", err) + } + + // Write output file. + err = c.WriteToFile("/tmp/toc_advanced.pdf") + if err != nil { + t.Fatalf("Fail: %v\n", err) + } +} diff --git a/pdf/model/colorspace.go b/pdf/model/colorspace.go index 4ee7b8d5..7c9ead58 100644 --- a/pdf/model/colorspace.go +++ b/pdf/model/colorspace.go @@ -151,9 +151,9 @@ func NewPdfColorspaceFromPdfObject(obj PdfObject) (PdfColorspace, error) { return nil, errors.New("Type error") } -// determineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and +// DetermineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and // an error on failure. If the colorspace was not found, will return an empty string. -func determineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) { +func DetermineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) { var csName *PdfObjectName var csArray *PdfObjectArray @@ -2179,7 +2179,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS obj = array.Get(1) // Base cs cannot be another /Indexed or /Pattern space. - baseName, err := determineColorspaceNameFromPdfObject(obj) + baseName, err := DetermineColorspaceNameFromPdfObject(obj) if baseName == "Indexed" || baseName == "Pattern" { common.Log.Debug("Error: Indexed colorspace cannot have Indexed/Pattern CS as base (%v)", baseName) return nil, ErrRangeError diff --git a/pdf/model/optimize/chain.go b/pdf/model/optimize/chain.go new file mode 100644 index 00000000..e12ce2b9 --- /dev/null +++ b/pdf/model/optimize/chain.go @@ -0,0 +1,34 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model" +) + +// Chain allows to use sequence of optimizers. +// It implements interface model.Optimizer. +type Chain struct { + optimizers []model.Optimizer +} + +// Append appends optimizers to the chain. +func (c *Chain) Append(optimizers ...model.Optimizer) { + c.optimizers = append(c.optimizers, optimizers...) +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (c *Chain) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + optimizedObjects = objects + for _, optimizer := range c.optimizers { + optimizedObjects, err = optimizer.Optimize(optimizedObjects) + if err != nil { + return optimizedObjects, err + } + } + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/combine_duplicate_direct_objects.go b/pdf/model/optimize/combine_duplicate_direct_objects.go new file mode 100644 index 00000000..5486b5c5 --- /dev/null +++ b/pdf/model/optimize/combine_duplicate_direct_objects.go @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "crypto/md5" + + "github.com/unidoc/unidoc/pdf/core" +) + +// CombineDuplicateDirectObjects combines duplicated direct objects by its data hash. +// It implements interface model.Optimizer. +type CombineDuplicateDirectObjects struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (dup *CombineDuplicateDirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + updateObjectNumbers(objects) + dictsByHash := make(map[string][]*core.PdfObjectDictionary) + var processDict func(pDict *core.PdfObjectDictionary) + + processDict = func(pDict *core.PdfObjectDictionary) { + for _, key := range pDict.Keys() { + obj := pDict.Get(key) + if dict, isDictObj := obj.(*core.PdfObjectDictionary); isDictObj { + hasher := md5.New() + hasher.Write([]byte(dict.DefaultWriteString())) + hash := string(hasher.Sum(nil)) + dictsByHash[hash] = append(dictsByHash[hash], dict) + processDict(dict) + } + } + } + + for _, obj := range objects { + ind, isIndirectObj := obj.(*core.PdfIndirectObject) + if !isIndirectObj { + continue + } + if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj { + processDict(dict) + } + } + + indirects := make([]core.PdfObject, 0, len(dictsByHash)) + replaceTable := make(map[core.PdfObject]core.PdfObject) + + for _, dicts := range dictsByHash { + if len(dicts) < 2 { + continue + } + dict := core.MakeDict() + dict.Merge(dicts[0]) + ind := core.MakeIndirectObject(dict) + indirects = append(indirects, ind) + for i := 0; i < len(dicts); i++ { + dict := dicts[i] + replaceTable[dict] = ind + } + } + + optimizedObjects = make([]core.PdfObject, len(objects)) + copy(optimizedObjects, objects) + optimizedObjects = append(indirects, optimizedObjects...) + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/combine_duplicate_streams.go b/pdf/model/optimize/combine_duplicate_streams.go new file mode 100644 index 00000000..a43b6e15 --- /dev/null +++ b/pdf/model/optimize/combine_duplicate_streams.go @@ -0,0 +1,53 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "crypto/md5" + + "github.com/unidoc/unidoc/pdf/core" +) + +// CombineDuplicateStreams combines duplicated streams by its data hash. +// It implements interface model.Optimizer. +type CombineDuplicateStreams struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (dup *CombineDuplicateStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + replaceTable := make(map[core.PdfObject]core.PdfObject) + toDelete := make(map[core.PdfObject]struct{}) + streamsByHash := make(map[string][]*core.PdfObjectStream) + for _, obj := range objects { + if stream, isStreamObj := obj.(*core.PdfObjectStream); isStreamObj { + hasher := md5.New() + hasher.Write([]byte(stream.Stream)) + hash := string(hasher.Sum(nil)) + streamsByHash[hash] = append(streamsByHash[hash], stream) + } + } + for _, streams := range streamsByHash { + if len(streams) < 2 { + continue + } + firstStream := streams[0] + for i := 1; i < len(streams); i++ { + stream := streams[i] + replaceTable[stream] = firstStream + toDelete[stream] = struct{}{} + } + } + + optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete)) + for _, obj := range objects { + if _, found := toDelete[obj]; found { + continue + } + optimizedObjects = append(optimizedObjects, obj) + } + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/combine_identical_indirect_objects.go b/pdf/model/optimize/combine_identical_indirect_objects.go new file mode 100644 index 00000000..7c4f492e --- /dev/null +++ b/pdf/model/optimize/combine_identical_indirect_objects.go @@ -0,0 +1,65 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "crypto/md5" + + "github.com/unidoc/unidoc/pdf/core" +) + +// CombineIdenticalIndirectObjects combines identical indirect objects. +// It implements interface model.Optimizer. +type CombineIdenticalIndirectObjects struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (c *CombineIdenticalIndirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + updateObjectNumbers(objects) + replaceTable := make(map[core.PdfObject]core.PdfObject) + toDelete := make(map[core.PdfObject]struct{}) + + indWithDictByHash := make(map[string][]*core.PdfIndirectObject) + + for _, obj := range objects { + ind, isIndirectObj := obj.(*core.PdfIndirectObject) + if !isIndirectObj { + continue + } + if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj { + if name, isName := dict.Get("Type").(*core.PdfObjectName); isName && *name == "Page" { + continue + } + hasher := md5.New() + hasher.Write([]byte(dict.DefaultWriteString())) + + hash := string(hasher.Sum(nil)) + indWithDictByHash[hash] = append(indWithDictByHash[hash], ind) + } + } + + for _, dicts := range indWithDictByHash { + if len(dicts) < 2 { + continue + } + firstDict := dicts[0] + for i := 1; i < len(dicts); i++ { + dict := dicts[i] + replaceTable[dict] = firstDict + toDelete[dict] = struct{}{} + } + } + + optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete)) + for _, obj := range objects { + if _, found := toDelete[obj]; found { + continue + } + optimizedObjects = append(optimizedObjects, obj) + } + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/compress_streams.go b/pdf/model/optimize/compress_streams.go new file mode 100644 index 00000000..e58f96a0 --- /dev/null +++ b/pdf/model/optimize/compress_streams.go @@ -0,0 +1,45 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// CompressStreams compresses uncompressed streams. +// It implements interface model.Optimizer. +type CompressStreams struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (c *CompressStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + optimizedObjects = make([]core.PdfObject, len(objects)) + copy(optimizedObjects, objects) + for _, obj := range objects { + stream, isStreamObj := core.GetStream(obj) + if !isStreamObj { + continue + } + if _, found := core.GetName(stream.PdfObjectDictionary.Get("Filter")); found { + continue + } + encoder := core.NewLZWEncoder() + encoder.EarlyChange = 0 + var data []byte + data, err = encoder.EncodeBytes(stream.Stream) + if err != nil { + return optimizedObjects, err + } + dict := encoder.MakeStreamDict() + // compare compressed and uncompressed sizes + if len(data)+len(dict.DefaultWriteString()) < len(stream.Stream) { + stream.Stream = data + stream.PdfObjectDictionary.Merge(dict) + stream.PdfObjectDictionary.Set("Length", core.MakeInteger(int64(len(stream.Stream)))) + } + } + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/image.go b/pdf/model/optimize/image.go new file mode 100644 index 00000000..6a7fc704 --- /dev/null +++ b/pdf/model/optimize/image.go @@ -0,0 +1,138 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model" +) + +// Image optimizes images by rewrite images into JPEG format with quality equals to ImageQuality. +// TODO(a5i): Add support for inline images. +// It implements interface model.Optimizer. +type Image struct { + ImageQuality int +} + +// imageInfo is information about an image. +type imageInfo struct { + ColorSpace core.PdfObjectName + BitsPerComponent int + ColorComponents int + Width int + Height int + Stream *core.PdfObjectStream + PPI float64 +} + +// findImages returns images from objects. +func findImages(objects []core.PdfObject) []*imageInfo { + subTypeKey := core.PdfObjectName("Subtype") + streamProcessed := make(map[*core.PdfObjectStream]struct{}) + var err error + var images []*imageInfo + for _, obj := range objects { + stream, ok := core.GetStream(obj) + if !ok { + continue + } + if _, found := streamProcessed[stream]; found { + continue + } + streamProcessed[stream] = struct{}{} + subTypeValue := stream.PdfObjectDictionary.Get(subTypeKey) + subType, ok := core.GetName(subTypeValue) + if !ok || string(*subType) != "Image" { + continue + } + img := &imageInfo{BitsPerComponent: 8, Stream: stream} + if img.ColorSpace, err = model.DetermineColorspaceNameFromPdfObject(stream.PdfObjectDictionary.Get("ColorSpace")); err != nil { + common.Log.Error("Error determine color space %s", err) + continue + } + if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("BitsPerComponent")); ok { + img.BitsPerComponent = val + } + if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Width")); ok { + img.Width = val + } + if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Height")); ok { + img.Height = val + } + + switch img.ColorSpace { + case "DeviceRGB": + img.ColorComponents = 3 + case "DeviceGray": + img.ColorComponents = 1 + default: + common.Log.Warning("Optimization is not supported for color space %s", img.ColorSpace) + continue + } + images = append(images, img) + } + return images +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (i *Image) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + if i.ImageQuality <= 0 { + return objects, nil + } + images := findImages(objects) + if len(images) == 0 { + return objects, nil + } + + replaceTable := make(map[core.PdfObject]core.PdfObject) + imageMasks := make(map[core.PdfObject]struct{}) + for _, img := range images { + obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask")) + imageMasks[obj] = struct{}{} + } + + for index, img := range images { + stream := img.Stream + if _, isMask := imageMasks[stream]; isMask { + continue + } + streamEncoder, err := core.NewEncoderFromStream(stream) + if err != nil { + common.Log.Warning("Error get encoder for the image stream %s") + continue + } + data, err := streamEncoder.DecodeStream(stream) + if err != nil { + common.Log.Warning("Error decode the image stream %s") + continue + } + encoder := core.NewDCTEncoder() + encoder.ColorComponents = img.ColorComponents + encoder.Quality = i.ImageQuality + encoder.BitsPerComponent = img.BitsPerComponent + encoder.Width = img.Width + encoder.Height = img.Height + streamData, err := encoder.EncodeBytes(data) + if err != nil { + return nil, err + } + newStream := &core.PdfObjectStream{Stream: streamData} + newStream.PdfObjectReference = stream.PdfObjectReference + newStream.PdfObjectDictionary = core.MakeDict() + newStream.PdfObjectDictionary.Merge(stream.PdfObjectDictionary) + fn := core.PdfObjectName(encoder.GetFilterName()) + newStream.PdfObjectDictionary.Set(core.PdfObjectName("Filter"), &fn) + ln := core.PdfObjectInteger(int64(len(streamData))) + newStream.PdfObjectDictionary.Set(core.PdfObjectName("Length"), &ln) + replaceTable[stream] = newStream + images[index].Stream = newStream + } + optimizedObjects = make([]core.PdfObject, len(objects)) + copy(optimizedObjects, objects) + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/image_ppi.go b/pdf/model/optimize/image_ppi.go new file mode 100644 index 00000000..2996845e --- /dev/null +++ b/pdf/model/optimize/image_ppi.go @@ -0,0 +1,203 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "fmt" + "image" + "math" + + "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/contentstream" + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model" + "golang.org/x/image/draw" +) + +// ImagePPI optimizes images by scaling images such that the PPI (pixels per inch) is never higher than ImageUpperPPI. +// TODO(a5i): Add support for inline images. +// It implements interface model.Optimizer. +type ImagePPI struct { + ImageUpperPPI float64 +} + +func scaleImage(stream *core.PdfObjectStream, scale float64) error { + xImg, err := model.NewXObjectImageFromStream(stream) + if err != nil { + return err + } + i, err := xImg.ToImage() + if err != nil { + return err + } + goimg, err := i.ToGoImage() + if err != nil { + return err + } + + newW := int(math.RoundToEven(float64(i.Width) * scale)) + newH := int(math.RoundToEven(float64(i.Height) * scale)) + rect := image.Rect(0, 0, newW, newH) + var newImage draw.Image + switch xImg.ColorSpace.String() { + case "DeviceRGB": + newImage = image.NewRGBA(rect) + case "DeviceGray": + newImage = image.NewGray(rect) + default: + return fmt.Errorf("Optimization is not supported for color space %s", xImg.ColorSpace.String()) + } + draw.CatmullRom.Scale(newImage, newImage.Bounds(), goimg, goimg.Bounds(), draw.Over, &draw.Options{}) + i, err = model.ImageHandling.NewImageFromGoImage(newImage) + if err != nil { + return err + } + xImg.SetImage(i, xImg.ColorSpace) + xImg.ToPdfObject() + return nil +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (i *ImagePPI) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + if i.ImageUpperPPI <= 0 { + return objects, nil + } + images := findImages(objects) + if len(images) == 0 { + return objects, nil + } + imageMasks := make(map[core.PdfObject]struct{}) + for _, img := range images { + obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask")) + imageMasks[obj] = struct{}{} + } + imageByStream := make(map[*core.PdfObjectStream]*imageInfo) + for _, img := range images { + imageByStream[img.Stream] = img + } + var catalog *core.PdfObjectDictionary + for _, obj := range objects { + if dict, isDict := core.GetDict(obj); catalog == nil && isDict { + if tp, ok := core.GetName(dict.Get(core.PdfObjectName("Type"))); ok && *tp == "Catalog" { + catalog = dict + } + } + } + if catalog == nil { + return objects, nil + } + pages, hasPages := core.GetDict(catalog.Get(core.PdfObjectName("Pages"))) + if !hasPages { + return objects, nil + } + kids, hasKids := core.GetArray(pages.Get(core.PdfObjectName("Kids"))) + if !hasKids { + return objects, nil + } + imageByName := make(map[string]*imageInfo) + + for _, pageObj := range kids.Elements() { + page, ok := core.GetDict(pageObj) + if !ok { + continue + } + contents, hasContents := core.GetArray(page.Get("Contents")) + if !hasContents { + continue + } + resources, hasResources := core.GetDict(page.Get("Resources")) + if !hasResources { + continue + } + xObject, hasXObject := core.GetDict(resources.Get("XObject")) + if !hasXObject { + continue + } + xObjectKeys := xObject.Keys() + for _, key := range xObjectKeys { + if stream, isStream := core.GetStream(xObject.Get(key)); isStream { + if img, found := imageByStream[stream]; found { + imageByName[string(key)] = img + } + } + } + for _, obj := range contents.Elements() { + if stream, isStream := core.GetStream(obj); isStream { + streamEncoder, err := core.NewEncoderFromStream(stream) + if err != nil { + return nil, err + } + data, err := streamEncoder.DecodeStream(stream) + if err != nil { + return nil, err + } + + p := contentstream.NewContentStreamParser(string(data)) + operations, err := p.Parse() + if err != nil { + return nil, err + } + scaleX, scaleY := 1.0, 1.0 + for _, operation := range *operations { + if operation.Operand == "Q" { + scaleX, scaleY = 1.0, 1.0 + } + if operation.Operand == "cm" && len(operation.Params) == 6 { + if sx, ok := core.GetFloatVal(operation.Params[0]); ok { + scaleX = scaleX * sx + } + if sy, ok := core.GetFloatVal(operation.Params[3]); ok { + scaleY = scaleY * sy + } + if sx, ok := core.GetIntVal(operation.Params[0]); ok { + scaleX = scaleX * float64(sx) + } + if sy, ok := core.GetIntVal(operation.Params[3]); ok { + scaleY = scaleY * float64(sy) + } + } + if operation.Operand == "Do" && len(operation.Params) == 1 { + name, ok := core.GetName(operation.Params[0]) + if !ok { + continue + } + if img, found := imageByName[string(*name)]; found { + wInch, hInch := scaleX/72.0, scaleY/72.0 + xPPI, yPPI := float64(img.Width)/wInch, float64(img.Height)/hInch + if wInch == 0 || hInch == 0 { + xPPI = 72.0 + yPPI = 72.0 + } + img.PPI = math.Max(img.PPI, xPPI) + img.PPI = math.Max(img.PPI, yPPI) + } + } + } + } + } + } + + for _, img := range images { + if _, isMask := imageMasks[img.Stream]; isMask { + continue + } + if img.PPI <= i.ImageUpperPPI { + continue + } + scale := i.ImageUpperPPI / img.PPI + if err := scaleImage(img.Stream, scale); err != nil { + common.Log.Debug("Error scale image keep original image: %s", err) + } else { + if mask, hasMask := core.GetStream(img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))); hasMask { + if err := scaleImage(mask, scale); err != nil { + return nil, err + } + } + } + } + + return objects, nil +} diff --git a/pdf/model/optimize/object_streams.go b/pdf/model/optimize/object_streams.go new file mode 100644 index 00000000..4f03b699 --- /dev/null +++ b/pdf/model/optimize/object_streams.go @@ -0,0 +1,40 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// ObjectStreams groups PDF objects to object streams. +// It implements interface model.Optimizer. +type ObjectStreams struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (o *ObjectStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + objStream := &core.PdfObjectStreams{} + skippedObjects := make([]core.PdfObject, 0, len(objects)) + for _, obj := range objects { + if io, isIndirectObj := obj.(*core.PdfIndirectObject); isIndirectObj && io.GenerationNumber == 0 { + objStream.Append(obj) + } else { + skippedObjects = append(skippedObjects, obj) + } + } + if objStream.Len() == 0 { + return skippedObjects, nil + } + + optimizedObjects = make([]core.PdfObject, 0, len(skippedObjects)+objStream.Len()+1) + if objStream.Len() > 1 { + optimizedObjects = append(optimizedObjects, objStream) + } + optimizedObjects = append(optimizedObjects, objStream.Elements()...) + optimizedObjects = append(optimizedObjects, skippedObjects...) + + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/optimize_test.go b/pdf/model/optimize/optimize_test.go new file mode 100644 index 00000000..efe95aa1 --- /dev/null +++ b/pdf/model/optimize/optimize_test.go @@ -0,0 +1,212 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize_test + +import ( + "bytes" + "fmt" + "io" + "testing" + + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model/optimize" +) + +// parseIndirectObjects parses a sequence of indirect/stream objects sequentially from a `rawpdf` text. +func parseIndirectObjects(rawpdf string) ([]core.PdfObject, error) { + p := core.NewParserFromString(rawpdf) + indirects := []core.PdfObject{} + for { + obj, err := p.ParseIndirectObject() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + + indirects = append(indirects, obj) + } + + return indirects, nil +} + +// debugObjects prints objects in a readable fashion, convenient when debugging. +func debugObjects(objects []core.PdfObject) string { + var buf bytes.Buffer + + for _, obj := range objects { + switch t := obj.(type) { + case *core.PdfIndirectObject: + buf.WriteString(fmt.Sprintf("%d 0 obj\n", t.ObjectNumber)) + buf.WriteString(fmt.Sprintf(" %s\n", t.PdfObject.String())) + } + } + return buf.String() +} + +func TestOptimizeIdenticalIndirects1(t *testing.T) { + rawpdf := ` +1 0 obj +<< + /Name (1234) +>> +endobj +2 0 obj +<< /Name (1234) >> +endobj +` + objects, err := parseIndirectObjects(rawpdf) + if err != nil { + t.Fatalf("Error: %v", err) + } + + if len(objects) != 2 { + t.Fatalf("len(objects) != 2 (%d)", len(objects)) + } + + // Combine duplicate direct objects - Expect unchanged results. + { + opt := optimize.CombineDuplicateDirectObjects{} + optObjects, err := opt.Optimize(objects) + if err != nil { + t.Fatalf("Error: %v", err) + } + if len(optObjects) != 2 { + t.Fatalf("len(optObjects1) != 2 (%d)", len(optObjects)) + } + } + + // Combine indirect objects should go from 2 to 1. + { + opt := optimize.CombineIdenticalIndirectObjects{} + optObjects, err := opt.Optimize(objects) + if err != nil { + t.Fatalf("Error: %v", err) + } + if len(optObjects) != 1 { + t.Fatalf("len(optObjects1) != 1 (%d)", len(optObjects)) + } + } +} + +// More complex case, where has a reference, where as the other does not. +// Expecting this NOT to work as we don't currently support this case. +// TODO: Add support for this. +func TestOptimizeIdenticalIndirectsUnsupported1(t *testing.T) { + rawpdf := ` +1 0 obj +(1234) +endobj +2 0 obj +<< + /Name (1234) +>> +endobj +3 0 obj +<< /Name 1 0 R >> +endobj +` + objects, err := parseIndirectObjects(rawpdf) + if err != nil { + t.Fatalf("Error: %v", err) + } + + if len(objects) != 3 { + t.Fatalf("len(objects) != 2 (%d)", len(objects)) + } + + // Combine duplicate direct objects - Expect unchanged results. + { + opt := optimize.CombineDuplicateDirectObjects{} + optObjects, err := opt.Optimize(objects) + if err != nil { + t.Fatalf("Error: %v", err) + } + if len(optObjects) != 3 { + t.Fatalf("len(optObjects1) != 2 (%d)", len(optObjects)) + } + } + + // Combine indirect objects should go from 3 to 2. + { + opt := optimize.CombineIdenticalIndirectObjects{} + optObjects, err := opt.Optimize(objects) + if err != nil { + t.Fatalf("Error: %v", err) + } + if len(optObjects) != 3 { // TODO: Add support. IF IDEAL: would be 2. + t.Fatalf("len(optObjects1) != 2 (%d)", len(optObjects)) + } + } +} + +// Showcases problem with sequence of CombineDuplicateDirectObjects followed by CombineIdenticalIndirectObjects +// if object numbers are not updated between steps (due to non-unique object numbering and reference strings). +func TestOptimizationSequence1(t *testing.T) { + rawpdf := ` +1 0 obj +<< + /Inner << /Color (red) >> +>> +endobj +2 0 obj +<< + /Inner << /Color (red) >> + /Other (abc) +>> +endobj +3 0 obj +<< + /Inner << /Color (blue) >> + /Other (abc) +>> +endobj +4 0 obj +<< + /Inner << /Color (blue) >> +>> +endobj +` + objects, err := parseIndirectObjects(rawpdf) + if err != nil { + t.Fatalf("Error: %v", err) + } + if len(objects) != 4 { + t.Fatalf("len(objects) != 4 (%d)", len(objects)) + } + debugstr1 := debugObjects(objects) + + // 1. Combine duplicate direct objects. + // Expect that 2 new indirect objects will be added, as two of the inner dictionaries are identical. + opt := optimize.CombineDuplicateDirectObjects{} + optObjects, err := opt.Optimize(objects) + if err != nil { + t.Fatalf("Error: %v", err) + } + if len(optObjects) != 6 { + t.Fatalf("len(optObjects) != 6 (%d)", len(optObjects)) + } + debugstr2 := debugObjects(optObjects) + + // 2. Combine indirect objects. + // Should not make any difference here unless there was a problem. + opt2 := optimize.CombineIdenticalIndirectObjects{} + optObjects, err = opt2.Optimize(optObjects) + if err != nil { + t.Fatalf("Error: %v", err) + } + debugstr3 := debugObjects(optObjects) + fmt.Println("==Original") + fmt.Println(debugstr1) + fmt.Println("==After CombineDuplicateDirectObjects") + fmt.Println(debugstr2) + fmt.Println("==After CombineIdenticalIndirectObjects") + fmt.Println(debugstr3) + if len(optObjects) != 6 { + t.Fatalf("len(optObjects) != 6 (%d)", len(optObjects)) + } +} diff --git a/pdf/model/optimize/optimizer.go b/pdf/model/optimize/optimizer.go new file mode 100644 index 00000000..12daeb7a --- /dev/null +++ b/pdf/model/optimize/optimizer.go @@ -0,0 +1,102 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// New creates a optimizers chain from options. +func New(options Options) *Chain { + chain := new(Chain) + if options.ImageUpperPPI > 0 { + imageOptimizer := new(ImagePPI) + imageOptimizer.ImageUpperPPI = options.ImageUpperPPI + chain.Append(imageOptimizer) + } + if options.ImageQuality > 0 { + imageOptimizer := new(Image) + imageOptimizer.ImageQuality = options.ImageQuality + chain.Append(imageOptimizer) + } + if options.CombineDuplicateDirectObjects { + chain.Append(new(CombineDuplicateDirectObjects)) + } + if options.CombineDuplicateStreams { + chain.Append(new(CombineDuplicateStreams)) + } + if options.CombineIdenticalIndirectObjects { + chain.Append(new(CombineIdenticalIndirectObjects)) + } + if options.UseObjectStreams { + chain.Append(new(ObjectStreams)) + } + if options.CompressStreams { + chain.Append(new(CompressStreams)) + } + return chain +} + +// replaceObjectsInPlace replaces objects. objTo will be modified by the process. +func replaceObjectsInPlace(objects []core.PdfObject, objTo map[core.PdfObject]core.PdfObject) { + if objTo == nil || len(objTo) == 0 { + return + } + for i, obj := range objects { + if to, found := objTo[obj]; found { + objects[i] = to + continue + } + objTo[obj] = obj + switch t := obj.(type) { + case *core.PdfObjectArray: + values := make([]core.PdfObject, t.Len()) + copy(values, t.Elements()) + replaceObjectsInPlace(values, objTo) + for i, obj := range values { + t.Set(i, obj) + } + case *core.PdfObjectStreams: + replaceObjectsInPlace(t.Elements(), objTo) + case *core.PdfObjectStream: + values := []core.PdfObject{t.PdfObjectDictionary} + replaceObjectsInPlace(values, objTo) + t.PdfObjectDictionary = values[0].(*core.PdfObjectDictionary) + case *core.PdfObjectDictionary: + keys := t.Keys() + values := make([]core.PdfObject, len(keys)) + for i, key := range keys { + values[i] = t.Get(key) + } + replaceObjectsInPlace(values, objTo) + for i, key := range keys { + t.Set(key, values[i]) + } + case *core.PdfIndirectObject: + values := []core.PdfObject{t.PdfObject} + replaceObjectsInPlace(values, objTo) + t.PdfObject = values[0] + } + } +} + +// Update all the object numbers prior to get hash of objects. +func updateObjectNumbers(objects []core.PdfObject) { + // Update numbers + for idx, obj := range objects { + switch o := obj.(type) { + case *core.PdfIndirectObject: + o.ObjectNumber = int64(idx + 1) + o.GenerationNumber = 0 + case *core.PdfObjectStream: + o.ObjectNumber = int64(idx + 1) + o.GenerationNumber = 0 + case *core.PdfObjectStreams: + o.ObjectNumber = int64(idx + 1) + o.GenerationNumber = 0 + } + } +} diff --git a/pdf/model/optimize/options.go b/pdf/model/optimize/options.go new file mode 100644 index 00000000..db024510 --- /dev/null +++ b/pdf/model/optimize/options.go @@ -0,0 +1,17 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +// Options describes PDF optimization parameters. +type Options struct { + CombineDuplicateStreams bool + CombineDuplicateDirectObjects bool + ImageUpperPPI float64 + ImageQuality int + UseObjectStreams bool + CombineIdenticalIndirectObjects bool + CompressStreams bool +} diff --git a/pdf/model/optimizer.go b/pdf/model/optimizer.go new file mode 100644 index 00000000..c9961297 --- /dev/null +++ b/pdf/model/optimizer.go @@ -0,0 +1,18 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package model + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// Optimizer is the interface that performs optimization of PDF object structure for output writing. +// +// Optimize receives a slice of input `objects`, performs optimization, including removing, replacing objects and +// output the optimized slice of objects. +type Optimizer interface { + Optimize(objects []core.PdfObject) ([]core.PdfObject, error) +} diff --git a/pdf/model/reader.go b/pdf/model/reader.go index bab18f2f..665411b6 100755 --- a/pdf/model/reader.go +++ b/pdf/model/reader.go @@ -13,6 +13,7 @@ import ( "github.com/unidoc/unidoc/common" . "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/core/security" ) // PdfReader represents a PDF file reader. It is a frontend to the lower level parsing mechanism and provides @@ -79,29 +80,7 @@ func (this *PdfReader) IsEncrypted() (bool, error) { // GetEncryptionMethod returns a descriptive information string about the encryption method used. func (this *PdfReader) GetEncryptionMethod() string { crypter := this.parser.GetCrypter() - str := crypter.Filter + " - " - - if crypter.V == 0 { - str += "Undocumented algorithm" - } else if crypter.V == 1 { - // RC4 or AES (bits: 40) - str += "RC4: 40 bits" - } else if crypter.V == 2 { - str += fmt.Sprintf("RC4: %d bits", crypter.Length) - } else if crypter.V == 3 { - str += "Unpublished algorithm" - } else if crypter.V >= 4 { - // Look at CF, StmF, StrF - str += fmt.Sprintf("Stream filter: %s - String filter: %s", crypter.StreamFilter, crypter.StringFilter) - str += "; Crypt filters:" - for name, cf := range crypter.CryptFilters { - str += fmt.Sprintf(" - %s: %s (%d)", name, cf.Cfm, cf.Length) - } - } - perms := crypter.GetAccessPermissions() - str += fmt.Sprintf(" - %#v", perms) - - return str + return crypter.String() } // Decrypt decrypts the PDF file with a specified password. Also tries to @@ -132,7 +111,7 @@ func (this *PdfReader) Decrypt(password []byte) (bool, error) { // The bool flag indicates that the user can access and view the file. // The AccessPermissions shows what access the user has for editing etc. // An error is returned if there was a problem performing the authentication. -func (this *PdfReader) CheckAccessRights(password []byte) (bool, AccessPermissions, error) { +func (this *PdfReader) CheckAccessRights(password []byte) (bool, security.Permissions, error) { return this.parser.CheckAccessRights(password) } diff --git a/pdf/model/writer.go b/pdf/model/writer.go index bf7a5fc9..627b8c3b 100644 --- a/pdf/model/writer.go +++ b/pdf/model/writer.go @@ -10,21 +10,31 @@ package model import ( "bufio" - "crypto/md5" - "crypto/rand" + "bytes" + "encoding/binary" "errors" "fmt" "io" - "math" "strings" - "time" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common/license" . "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/core/security" + "github.com/unidoc/unidoc/pdf/core/security/crypt" "github.com/unidoc/unidoc/pdf/model/fonts" ) +type crossReference struct { + Type int + // Type 1 + Offset int64 + Generation int64 // and Type 0 + // Type 2 + ObjectNumber int // and Type 0 + Index int +} + var pdfCreator = "" func getPdfProducer() string { @@ -79,6 +89,9 @@ type PdfWriter struct { // Forms. acroForm *PdfAcroForm + + optimizer Optimizer + crossReferenceMap map[int]crossReference } // NewPdfWriter initializes a new PdfWriter. @@ -132,6 +145,111 @@ func NewPdfWriter() PdfWriter { return w } +// copyObject creates deep copy of the Pdf object and +// fills objectToObjectCopyMap to replace the old object to the copy of object if needed. +// Parameter objectToObjectCopyMap is needed to replace object references to its copies. +// Because many objects can contain references to another objects like pages to images. +func copyObject(obj PdfObject, objectToObjectCopyMap map[PdfObject]PdfObject) PdfObject { + if newObj, ok := objectToObjectCopyMap[obj]; ok { + return newObj + } + + switch t := obj.(type) { + case *PdfObjectArray: + newObj := &PdfObjectArray{} + objectToObjectCopyMap[obj] = newObj + for _, val := range t.Elements() { + newObj.Append(copyObject(val, objectToObjectCopyMap)) + } + return newObj + case *PdfObjectStreams: + newObj := &PdfObjectStreams{PdfObjectReference: t.PdfObjectReference} + objectToObjectCopyMap[obj] = newObj + for _, val := range t.Elements() { + newObj.Append(copyObject(val, objectToObjectCopyMap)) + } + return newObj + case *PdfObjectStream: + newObj := &PdfObjectStream{ + Stream: t.Stream, + PdfObjectReference: t.PdfObjectReference, + } + objectToObjectCopyMap[obj] = newObj + newObj.PdfObjectDictionary = copyObject(t.PdfObjectDictionary, objectToObjectCopyMap).(*PdfObjectDictionary) + return newObj + case *PdfObjectDictionary: + newObj := MakeDict() + objectToObjectCopyMap[obj] = newObj + for _, key := range t.Keys() { + val := t.Get(key) + newObj.Set(key, copyObject(val, objectToObjectCopyMap)) + } + return newObj + case *PdfIndirectObject: + newObj := &PdfIndirectObject{ + PdfObjectReference: t.PdfObjectReference, + } + objectToObjectCopyMap[obj] = newObj + newObj.PdfObject = copyObject(t.PdfObject, objectToObjectCopyMap) + return newObj + case *PdfObjectString: + newObj := &PdfObjectString{} + *newObj = *t + objectToObjectCopyMap[obj] = newObj + return newObj + case *PdfObjectName: + newObj := PdfObjectName(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectNull: + newObj := PdfObjectNull{} + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectInteger: + newObj := PdfObjectInteger(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectReference: + newObj := PdfObjectReference(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectFloat: + newObj := PdfObjectFloat(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectBool: + newObj := PdfObjectBool(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + default: + common.Log.Info("TODO(a5i): implement copyObject for %+v", obj) + } + // return other objects as is + return obj +} + +// copyObjects makes objects copy and set as working. +func (this *PdfWriter) copyObjects() { + objectToObjectCopyMap := make(map[PdfObject]PdfObject) + objects := make([]PdfObject, len(this.objects)) + objectsMap := make(map[PdfObject]bool) + for i, obj := range this.objects { + newObject := copyObject(obj, objectToObjectCopyMap) + objects[i] = newObject + if this.objectsMap[obj] { + objectsMap[newObject] = true + } + } + + this.objects = objects + this.objectsMap = objectsMap + this.infoObj = copyObject(this.infoObj, objectToObjectCopyMap).(*PdfIndirectObject) + this.root = copyObject(this.root, objectToObjectCopyMap).(*PdfIndirectObject) + if this.encryptObj != nil { + this.encryptObj = copyObject(this.encryptObj, objectToObjectCopyMap).(*PdfIndirectObject) + } +} + // Set the PDF version of the output file. func (this *PdfWriter) SetVersion(majorVersion, minorVersion int) { this.majorVersion = majorVersion @@ -152,6 +270,16 @@ func (this *PdfWriter) SetOCProperties(ocProperties PdfObject) error { return nil } +// SetOptimizer sets the optimizer to optimize PDF before writing. +func (this *PdfWriter) SetOptimizer(optimizer Optimizer) { + this.optimizer = optimizer +} + +// GetOptimizer returns current PDF optimizer. +func (this *PdfWriter) GetOptimizer() Optimizer { + return this.optimizer +} + func (this *PdfWriter) hasObject(obj PdfObject) bool { // Check if already added. for _, o := range this.objects { @@ -438,6 +566,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { common.Log.Trace("Write obj #%d\n", num) if pobj, isIndirect := obj.(*PdfIndirectObject); isIndirect { + this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber} outStr := fmt.Sprintf("%d 0 obj\n", num) outStr += pobj.PdfObject.DefaultWriteString() outStr += "\nendobj\n" @@ -448,6 +577,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { // XXX/TODO: Add a default encoder if Filter not specified? // Still need to make sure is encrypted. if pobj, isStream := obj.(*PdfObjectStream); isStream { + this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber} outStr := fmt.Sprintf("%d 0 obj\n", num) outStr += pobj.PdfObjectDictionary.DefaultWriteString() outStr += "\nstream\n" @@ -457,6 +587,46 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { return } + if ostreams, isObjStreams := obj.(*PdfObjectStreams); isObjStreams { + this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: ostreams.GenerationNumber} + outStr := fmt.Sprintf("%d 0 obj\n", num) + var offsets []string + var objData string + var offset int64 + + for index, obj := range ostreams.Elements() { + io, isIndirect := obj.(*PdfIndirectObject) + if !isIndirect { + common.Log.Error("Object streams N %d contains non indirect pdf object %v", num, obj) + } + data := io.PdfObject.DefaultWriteString() + " " + objData = objData + data + offsets = append(offsets, fmt.Sprintf("%d %d", io.ObjectNumber, offset)) + this.crossReferenceMap[int(io.ObjectNumber)] = crossReference{Type: 2, ObjectNumber: num, Index: index} + offset = offset + int64(len([]byte(data))) + } + offsetsStr := strings.Join(offsets, " ") + " " + encoder := NewFlateEncoder() + //encoder := NewRawEncoder() + dict := encoder.MakeStreamDict() + dict.Set(PdfObjectName("Type"), MakeName("ObjStm")) + n := int64(ostreams.Len()) + dict.Set(PdfObjectName("N"), MakeInteger(n)) + first := int64(len(offsetsStr)) + dict.Set(PdfObjectName("First"), MakeInteger(first)) + + data, _ := encoder.EncodeBytes([]byte(offsetsStr + objData)) + length := int64(len(data)) + + dict.Set(PdfObjectName("Length"), MakeInteger(length)) + outStr += dict.DefaultWriteString() + outStr += "\nstream\n" + this.writeString(outStr) + this.writeBytes(data) + this.writeString("\nendstream\nendobj\n") + return + } + this.writer.WriteString(obj.DefaultWriteString()) } @@ -464,20 +634,23 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { func (this *PdfWriter) updateObjectNumbers() { // Update numbers for idx, obj := range this.objects { - if io, isIndirect := obj.(*PdfIndirectObject); isIndirect { - io.ObjectNumber = int64(idx + 1) - io.GenerationNumber = 0 - } - if so, isStream := obj.(*PdfObjectStream); isStream { - so.ObjectNumber = int64(idx + 1) - so.GenerationNumber = 0 + switch o := obj.(type) { + case *PdfIndirectObject: + o.ObjectNumber = int64(idx + 1) + o.GenerationNumber = 0 + case *PdfObjectStream: + o.ObjectNumber = int64(idx + 1) + o.GenerationNumber = 0 + case *PdfObjectStreams: + o.ObjectNumber = int64(idx + 1) + o.GenerationNumber = 0 } } } // EncryptOptions represents encryption options for an output PDF. type EncryptOptions struct { - Permissions AccessPermissions + Permissions security.Permissions Algorithm EncryptionAlgorithm } @@ -495,121 +668,40 @@ const ( // Encrypt encrypts the output file with a specified user/owner password. func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptions) error { - crypter := PdfCrypt{} - this.crypter = &crypter - - crypter.EncryptedObjects = map[PdfObject]bool{} - - crypter.CryptFilters = CryptFilters{} - algo := RC4_128bit if options != nil { algo = options.Algorithm } + perm := security.PermOwner + if options != nil { + perm = options.Permissions + } - var cf CryptFilter + var cf crypt.Filter switch algo { case RC4_128bit: - crypter.V = 2 - crypter.R = 3 - cf = NewCryptFilterV2(16) + cf = crypt.NewFilterV2(16) case AES_128bit: - this.SetVersion(1, 5) - crypter.V = 4 - crypter.R = 4 - cf = NewCryptFilterAESV2() + cf = crypt.NewFilterAESV2() case AES_256bit: - this.SetVersion(2, 0) - crypter.V = 5 - crypter.R = 6 // TODO(dennwc): a way to set R=5? - cf = NewCryptFilterAESV3() + cf = crypt.NewFilterAESV3() default: return fmt.Errorf("unsupported algorithm: %v", options.Algorithm) } - crypter.Length = cf.Length * 8 - - const ( - defaultFilter = StandardCryptFilter - ) - crypter.CryptFilters[defaultFilter] = cf - if crypter.V >= 4 { - crypter.StreamFilter = defaultFilter - crypter.StringFilter = defaultFilter + crypter, info, err := PdfCryptNewEncrypt(cf, userPass, ownerPass, perm) + if err != nil { + return err } - - // Set - crypter.P = math.MaxUint32 - crypter.EncryptMetadata = true - if options != nil { - crypter.P = int(options.Permissions.GetP()) + this.crypter = crypter + if info.Major != 0 { + this.SetVersion(info.Major, info.Minor) } + this.encryptDict = info.Encrypt - // Generate the encryption dictionary. - ed := MakeDict() - ed.Set("Filter", MakeName("Standard")) - ed.Set("P", MakeInteger(int64(crypter.P))) - ed.Set("V", MakeInteger(int64(crypter.V))) - ed.Set("R", MakeInteger(int64(crypter.R))) - ed.Set("Length", MakeInteger(int64(crypter.Length))) - this.encryptDict = ed - - // Prepare the ID object for the trailer. - hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850))) - id0 := string(hashcode[:]) - b := make([]byte, 100) - rand.Read(b) - hashcode = md5.Sum(b) - id1 := string(hashcode[:]) - common.Log.Trace("Random b: % x", b) - - this.ids = MakeArray(MakeHexString(id0), MakeHexString(id1)) - common.Log.Trace("Gen Id 0: % x", id0) - - // Generate encryption parameters - if crypter.R < 5 { - crypter.Id0 = string(id0) - - // Make the O and U objects. - O, err := crypter.Alg3(userPass, ownerPass) - if err != nil { - common.Log.Debug("ERROR: Error generating O for encryption (%s)", err) - return err - } - crypter.O = []byte(O) - common.Log.Trace("gen O: % x", O) - U, key, err := crypter.Alg5(userPass) - if err != nil { - common.Log.Debug("ERROR: Error generating O for encryption (%s)", err) - return err - } - common.Log.Trace("gen U: % x", U) - crypter.U = []byte(U) - crypter.EncryptionKey = key - - ed.Set("O", MakeHexString(O)) - ed.Set("U", MakeHexString(U)) - } else { // R >= 5 - err := crypter.GenerateParams(userPass, ownerPass) - if err != nil { - return err - } - ed.Set("O", MakeString(string(crypter.O))) - ed.Set("U", MakeString(string(crypter.U))) - ed.Set("OE", MakeString(string(crypter.OE))) - ed.Set("UE", MakeString(string(crypter.UE))) - ed.Set("EncryptMetadata", MakeBool(crypter.EncryptMetadata)) - if crypter.R > 5 { - ed.Set("Perms", MakeString(string(crypter.Perms))) - } - } - if crypter.V >= 4 { - if err := crypter.SaveCryptFilters(ed); err != nil { - return err - } - } + this.ids = MakeArray(MakeHexString(info.ID0), MakeHexString(info.ID1)) // Make an object to contain the encryption dictionary. - io := MakeIndirectObject(ed) + io := MakeIndirectObject(info.Encrypt) this.encryptObj = io this.addObject(io) @@ -687,23 +779,54 @@ func (this *PdfWriter) Write(writer io.Writer) error { // Set version in the catalog. this.catalog.Set("Version", MakeName(fmt.Sprintf("%d.%d", this.majorVersion, this.minorVersion))) + // Make a copy of objects prior to optimizing as this can alter the objects. + this.copyObjects() + + if this.optimizer != nil { + var err error + this.objects, err = this.optimizer.Optimize(this.objects) + if err != nil { + return err + } + } + w := bufio.NewWriter(writer) this.writer = w this.writePos = 0 + useCrossReferenceStream := this.majorVersion > 1 || (this.majorVersion == 1 && this.minorVersion > 4) + objectsInObjectStreams := make(map[PdfObject]bool) + if !useCrossReferenceStream { + for _, obj := range this.objects { + if objStm, isObjectStreams := obj.(*PdfObjectStreams); isObjectStreams { + useCrossReferenceStream = true + for _, obj := range objStm.Elements() { + objectsInObjectStreams[obj] = true + if io, isIndirectObj := obj.(*PdfIndirectObject); isIndirectObj { + objectsInObjectStreams[io.PdfObject] = true + } + } + } + } + } + + if useCrossReferenceStream && this.majorVersion == 1 && this.minorVersion < 5 { + this.minorVersion = 5 + } this.writeString(fmt.Sprintf("%%PDF-%d.%d\n", this.majorVersion, this.minorVersion)) this.writeString("%âãÏÓ\n") this.updateObjectNumbers() - offsets := []int64{} - // Write objects common.Log.Trace("Writing %d obj", len(this.objects)) + this.crossReferenceMap = make(map[int]crossReference) + this.crossReferenceMap[0] = crossReference{Type: 0, ObjectNumber: 0, Generation: 0xFFFF} for idx, obj := range this.objects { + if skip := objectsInObjectStreams[obj]; skip { + continue + } common.Log.Trace("Writing %d", idx) - offset := this.writePos - offsets = append(offsets, offset) // Encrypt prior to writing. // Encrypt dictionary should not be encrypted. @@ -713,41 +836,90 @@ func (this *PdfWriter) Write(writer io.Writer) error { common.Log.Debug("ERROR: Failed encrypting (%s)", err) return err } - } this.writeObject(idx+1, obj) } xrefOffset := this.writePos - // Write xref table. - this.writeString("xref\r\n") - outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.objects)+1) - this.writeString(outStr) - outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535) - this.writeString(outStr) - for _, offset := range offsets { - outStr = fmt.Sprintf("%.10d %.5d n\r\n", offset, 0) - this.writeString(outStr) - } + if useCrossReferenceStream { + + crossObjNumber := len(this.crossReferenceMap) + this.crossReferenceMap[crossObjNumber] = crossReference{Type: 1, ObjectNumber: crossObjNumber, Offset: xrefOffset} + crossReferenceData := bytes.NewBuffer(nil) + + for idx := 0; idx < len(this.crossReferenceMap); idx++ { + ref := this.crossReferenceMap[idx] + switch ref.Type { + case 0: + binary.Write(crossReferenceData, binary.BigEndian, byte(0)) + binary.Write(crossReferenceData, binary.BigEndian, uint32(0)) + binary.Write(crossReferenceData, binary.BigEndian, uint16(0xFFFF)) + case 1: + binary.Write(crossReferenceData, binary.BigEndian, byte(1)) + binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.Offset)) + binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Generation)) + case 2: + binary.Write(crossReferenceData, binary.BigEndian, byte(2)) + binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.ObjectNumber)) + binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Index)) + } + } + crossReferenceStream, err := MakeStream(crossReferenceData.Bytes(), NewFlateEncoder()) + if err != nil { + return err + } + crossReferenceStream.ObjectNumber = int64(crossObjNumber) + crossReferenceStream.PdfObjectDictionary.Set("Type", MakeName("XRef")) + crossReferenceStream.PdfObjectDictionary.Set("W", MakeArray(MakeInteger(1), MakeInteger(4), MakeInteger(2))) + crossReferenceStream.PdfObjectDictionary.Set("Index", MakeArray(MakeInteger(0), MakeInteger(crossReferenceStream.ObjectNumber+1))) + crossReferenceStream.PdfObjectDictionary.Set("Size", MakeInteger(crossReferenceStream.ObjectNumber+1)) + crossReferenceStream.PdfObjectDictionary.Set("Info", this.infoObj) + crossReferenceStream.PdfObjectDictionary.Set("Root", this.root) + // If encrypted! + if this.crypter != nil { + crossReferenceStream.Set("Encrypt", this.encryptObj) + crossReferenceStream.Set("ID", this.ids) + common.Log.Trace("Ids: %s", this.ids) + } + + this.writeObject(int(crossReferenceStream.ObjectNumber), crossReferenceStream) + + } else { + this.writeString("xref\r\n") + outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.crossReferenceMap)) + this.writeString(outStr) + for idx := 0; idx < len(this.crossReferenceMap); idx++ { + ref := this.crossReferenceMap[idx] + switch ref.Type { + case 0: + outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535) + this.writeString(outStr) + case 1: + outStr = fmt.Sprintf("%.10d %.5d n\r\n", ref.Offset, 0) + this.writeString(outStr) + } + } + + // Generate & write trailer + trailer := MakeDict() + trailer.Set("Info", this.infoObj) + trailer.Set("Root", this.root) + trailer.Set("Size", MakeInteger(int64(len(this.objects)+1))) + // If encrypted! + if this.crypter != nil { + trailer.Set("Encrypt", this.encryptObj) + trailer.Set("ID", this.ids) + common.Log.Trace("Ids: %s", this.ids) + } + this.writeString("trailer\n") + this.writeString(trailer.DefaultWriteString()) + this.writeString("\n") - // Generate & write trailer - trailer := MakeDict() - trailer.Set("Info", this.infoObj) - trailer.Set("Root", this.root) - trailer.Set("Size", MakeInteger(int64(len(this.objects)+1))) - // If encrypted! - if this.crypter != nil { - trailer.Set("Encrypt", this.encryptObj) - trailer.Set("ID", this.ids) - common.Log.Trace("Ids: %s", this.ids) } - this.writeString("trailer\n") - this.writeString(trailer.DefaultWriteString()) - this.writeString("\n") // Make offset reference. - outStr = fmt.Sprintf("startxref\n%d\n", xrefOffset) + outStr := fmt.Sprintf("startxref\n%d\n", xrefOffset) this.writeString(outStr) this.writeString("%%EOF\n")