Merge branch 'v3' into v3-enhance-forms

This commit is contained in:
Gunnsteinn Hall 2018-10-13 11:32:20 +00:00
commit 3454ec67dc
47 changed files with 5247 additions and 2074 deletions

View File

@ -316,7 +316,7 @@ func (parser *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (Pdf
if parser.crypter != nil {
// Mark as decrypted (inside object stream) for caching.
// and avoid decrypting decrypted object.
parser.crypter.DecryptedObjects[optr] = true
parser.crypter.decryptedObjects[optr] = true
}
return optr, true, nil
} else {

File diff suppressed because it is too large Load Diff

View File

@ -1,266 +0,0 @@
package core
import (
"crypto/aes"
"crypto/cipher"
"crypto/md5"
"crypto/rand"
"crypto/rc4"
"fmt"
"io"
"github.com/unidoc/unidoc/common"
)
var (
cryptMethods = make(map[string]cryptFilterMethod)
)
// registerCryptFilterMethod registers a CFM.
func registerCryptFilterMethod(m cryptFilterMethod) {
cryptMethods[m.CFM()] = m
}
// getCryptFilterMethod check if a CFM with a specified name is supported an returns its implementation.
func getCryptFilterMethod(name string) (cryptFilterMethod, error) {
f := cryptMethods[name]
if f == nil {
return nil, fmt.Errorf("unsupported crypt filter: %q", name)
}
return f, nil
}
func init() {
// register supported crypt filter methods
registerCryptFilterMethod(cryptFilterV2{})
registerCryptFilterMethod(cryptFilterAESV2{})
registerCryptFilterMethod(cryptFilterAESV3{})
}
// cryptFilterMethod is a common interface for crypt filter methods.
type cryptFilterMethod interface {
// CFM returns a name of the filter that should be used in CFM field of Encrypt dictionary.
CFM() string
// MakeKey generates a object encryption key based on file encryption key and object numbers.
// Used only for legacy filters - AESV3 doesn't change the key for each object.
MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error)
// EncryptBytes encrypts a buffer using object encryption key, as returned by MakeKey.
// Implementation may reuse a buffer and encrypt data in-place.
EncryptBytes(p []byte, okey []byte) ([]byte, error)
// DecryptBytes decrypts a buffer using object encryption key, as returned by MakeKey.
// Implementation may reuse a buffer and decrypt data in-place.
DecryptBytes(p []byte, okey []byte) ([]byte, error)
}
// makeKeyV2 is a common object key generation shared by V2 and AESV2 crypt filters.
func makeKeyV2(objNum, genNum uint32, ekey []byte, isAES bool) ([]byte, error) {
key := make([]byte, len(ekey)+5)
for i := 0; i < len(ekey); i++ {
key[i] = ekey[i]
}
for i := 0; i < 3; i++ {
b := byte((objNum >> uint32(8*i)) & 0xff)
key[i+len(ekey)] = b
}
for i := 0; i < 2; i++ {
b := byte((genNum >> uint32(8*i)) & 0xff)
key[i+len(ekey)+3] = b
}
if isAES {
// If using the AES algorithm, extend the encryption key an
// additional 4 bytes by adding the value “sAlT”, which
// corresponds to the hexadecimal values 0x73, 0x41, 0x6C, 0x54.
key = append(key, 0x73)
key = append(key, 0x41)
key = append(key, 0x6C)
key = append(key, 0x54)
}
// Take the MD5.
h := md5.New()
h.Write(key)
hashb := h.Sum(nil)
if len(ekey)+5 < 16 {
return hashb[0 : len(ekey)+5], nil
}
return hashb, nil
}
// cryptFilterV2 is a RC4-based filter
type cryptFilterV2 struct{}
func (cryptFilterV2) CFM() string {
return CryptFilterV2
}
func (f cryptFilterV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) {
return makeKeyV2(objNum, genNum, ekey, false)
}
func (cryptFilterV2) EncryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Standard RC4 algorithm.
ciph, err := rc4.NewCipher(okey)
if err != nil {
return nil, err
}
common.Log.Trace("RC4 Encrypt: % x", buf)
ciph.XORKeyStream(buf, buf)
common.Log.Trace("to: % x", buf)
return buf, nil
}
func (cryptFilterV2) DecryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Standard RC4 algorithm.
ciph, err := rc4.NewCipher(okey)
if err != nil {
return nil, err
}
common.Log.Trace("RC4 Decrypt: % x", buf)
ciph.XORKeyStream(buf, buf)
common.Log.Trace("to: % x", buf)
return buf, nil
}
// cryptFilterAES implements a generic AES encryption and decryption algorithm used by AESV2 and AESV3 filter methods.
type cryptFilterAES struct{}
func (cryptFilterAES) EncryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Strings and streams encrypted with AES shall use a padding
// scheme that is described in Internet RFC 2898, PKCS #5:
// Password-Based Cryptography Specification Version 2.0; see
// the Bibliography. For an original message length of M,
// the pad shall consist of 16 - (M mod 16) bytes whose value
// shall also be 16 - (M mod 16).
//
// A 9-byte message has a pad of 7 bytes, each with the value
// 0x07. The pad can be unambiguously removed to determine the
// original message length when decrypting. Note that the pad is
// present when M is evenly divisible by 16; it contains 16 bytes
// of 0x10.
ciph, err := aes.NewCipher(okey)
if err != nil {
return nil, err
}
common.Log.Trace("AES Encrypt (%d): % x", len(buf), buf)
// If using the AES algorithm, the Cipher Block Chaining (CBC)
// mode, which requires an initialization vector, is used. The
// block size parameter is set to 16 bytes, and the initialization
// vector is a 16-byte random number that is stored as the first
// 16 bytes of the encrypted stream or string.
const block = aes.BlockSize // 16
pad := block - len(buf)%block
for i := 0; i < pad; i++ {
buf = append(buf, byte(pad))
}
common.Log.Trace("Padded to %d bytes", len(buf))
// Generate random 16 bytes, place in beginning of buffer.
ciphertext := make([]byte, block+len(buf))
iv := ciphertext[:block]
if _, err := io.ReadFull(rand.Reader, iv); err != nil {
return nil, err
}
mode := cipher.NewCBCEncrypter(ciph, iv)
mode.CryptBlocks(ciphertext[block:], buf)
buf = ciphertext
common.Log.Trace("to (%d): % x", len(buf), buf)
return buf, nil
}
func (cryptFilterAES) DecryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Strings and streams encrypted with AES shall use a padding
// scheme that is described in Internet RFC 2898, PKCS #5:
// Password-Based Cryptography Specification Version 2.0; see
// the Bibliography. For an original message length of M,
// the pad shall consist of 16 - (M mod 16) bytes whose value
// shall also be 16 - (M mod 16).
//
// A 9-byte message has a pad of 7 bytes, each with the value
// 0x07. The pad can be unambiguously removed to determine the
// original message length when decrypting. Note that the pad is
// present when M is evenly divisible by 16; it contains 16 bytes
// of 0x10.
ciph, err := aes.NewCipher(okey)
if err != nil {
return nil, err
}
// If using the AES algorithm, the Cipher Block Chaining (CBC)
// mode, which requires an initialization vector, is used. The
// block size parameter is set to 16 bytes, and the initialization
// vector is a 16-byte random number that is stored as the first
// 16 bytes of the encrypted stream or string.
if len(buf) < 16 {
common.Log.Debug("ERROR AES invalid buf %s", buf)
return buf, fmt.Errorf("AES: Buf len < 16 (%d)", len(buf))
}
iv := buf[:16]
buf = buf[16:]
if len(buf)%16 != 0 {
common.Log.Debug(" iv (%d): % x", len(iv), iv)
common.Log.Debug("buf (%d): % x", len(buf), buf)
return buf, fmt.Errorf("AES buf length not multiple of 16 (%d)", len(buf))
}
mode := cipher.NewCBCDecrypter(ciph, iv)
common.Log.Trace("AES Decrypt (%d): % x", len(buf), buf)
common.Log.Trace("chop AES Decrypt (%d): % x", len(buf), buf)
mode.CryptBlocks(buf, buf)
common.Log.Trace("to (%d): % x", len(buf), buf)
if len(buf) == 0 {
common.Log.Trace("Empty buf, returning empty string")
return buf, nil
}
// The padded length is indicated by the last values. Remove those.
padLen := int(buf[len(buf)-1])
if padLen >= len(buf) {
common.Log.Debug("Illegal pad length")
return buf, fmt.Errorf("Invalid pad length")
}
buf = buf[:len(buf)-padLen]
return buf, nil
}
// cryptFilterAESV2 is an AES-based filter (128 bit key, PDF 1.6)
type cryptFilterAESV2 struct {
cryptFilterAES
}
func (cryptFilterAESV2) CFM() string {
return CryptFilterAESV2
}
func (cryptFilterAESV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) {
return makeKeyV2(objNum, genNum, ekey, true)
}
// cryptFilterAESV3 is an AES-based filter (256 bit key, PDF 2.0)
type cryptFilterAESV3 struct {
cryptFilterAES
}
func (cryptFilterAESV3) CFM() string {
return CryptFilterAESV3
}
func (cryptFilterAESV3) MakeKey(_, _ uint32, ekey []byte) ([]byte, error) {
return ekey, nil
}

View File

@ -8,162 +8,42 @@
package core
import (
"bytes"
"fmt"
"math"
"math/rand"
"strings"
"testing"
"time"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core/security"
)
func init() {
common.SetLogger(common.ConsoleLogger{})
}
func TestPadding(t *testing.T) {
crypter := PdfCrypt{}
// Case 1 empty pass, should match padded string.
key := crypter.paddedPass([]byte(""))
if len(key) != 32 {
t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key))
}
if key[0] != 0x28 {
t.Errorf("key[0] != 0x28 (%q in %q)", key[0], key)
}
if key[31] != 0x7A {
t.Errorf("key[31] != 0x7A (%q in %q)", key[31], key)
}
// Case 2, non empty pass.
key = crypter.paddedPass([]byte("bla"))
if len(key) != 32 {
t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key))
}
if string(key[0:3]) != "bla" {
t.Errorf("Expecting start with bla (%s)", key)
}
if key[3] != 0x28 {
t.Errorf("key[3] != 0x28 (%q in %q)", key[3], key)
}
if key[31] != 0x64 {
t.Errorf("key[31] != 0x64 (%q in %q)", key[31], key)
}
}
// Test algorithm 2.
func TestAlg2(t *testing.T) {
crypter := PdfCrypt{}
crypter.V = 2
crypter.R = 3
crypter.P = -3904
crypter.Id0 = string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24,
0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4})
crypter.O = []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB}
crypter.Length = 128
crypter.EncryptMetadata = true
key := crypter.alg2([]byte(""))
keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca,
0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5}
if string(key) != string(keyExp) {
common.Log.Debug(" Key (%d): % x", len(key), key)
common.Log.Debug("KeyExp (%d): % x", len(keyExp), keyExp)
t.Errorf("alg2 -> key != expected\n")
}
}
// Test algorithm 3.
func TestAlg3(t *testing.T) {
crypter := PdfCrypt{}
crypter.V = 2
crypter.R = 3
crypter.P = -3904
crypter.Id0 = string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24,
0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4})
Oexp := []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB}
crypter.Length = 128
crypter.EncryptMetadata = true
O, err := crypter.Alg3([]byte(""), []byte("test"))
if err != nil {
t.Errorf("crypt alg3 error %s", err)
return
}
if string(O) != string(Oexp) {
common.Log.Debug(" O (%d): % x", len(O), O)
common.Log.Debug("Oexp (%d): % x", len(Oexp), Oexp)
t.Errorf("alg3 -> key != expected")
}
}
// Test algorithm 5 for computing dictionary's U (user password) value
// valid for R >= 3.
func TestAlg5(t *testing.T) {
crypter := PdfCrypt{}
crypter.V = 2
crypter.R = 3
crypter.P = -3904
crypter.Id0 = string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24,
0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4})
crypter.O = []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB}
crypter.Length = 128
crypter.EncryptMetadata = true
U, _, err := crypter.Alg5([]byte(""))
if err != nil {
t.Errorf("Error %s", err)
return
}
Uexp := []byte{0x59, 0x66, 0x38, 0x6c, 0x76, 0xfe, 0x95, 0x7d, 0x3d,
0x0d, 0x14, 0x3d, 0x36, 0xfd, 0x01, 0x3d, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
if string(U[0:16]) != string(Uexp[0:16]) {
common.Log.Info(" U (%d): % x", len(U), U)
common.Log.Info("Uexp (%d): % x", len(Uexp), Uexp)
t.Errorf("U != expected\n")
}
}
// Test decrypting. Example with V=2, R=3, using standard algorithm.
func TestDecryption1(t *testing.T) {
crypter := PdfCrypt{}
crypter.DecryptedObjects = map[PdfObject]bool{}
// Default algorithm is V2 (RC4).
crypter.CryptFilters = newCryptFiltersV2(crypter.Length)
crypter.V = 2
crypter.R = 3
crypter.P = -3904
crypter.Id0 = string([]byte{0x5f, 0x91, 0xff, 0xf2, 0x00, 0x88, 0x13,
0x5f, 0x30, 0x24, 0xd1, 0x0f, 0x28, 0x31, 0xc6, 0xfa})
crypter.O = []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB}
crypter.U = []byte{0xED, 0x5B, 0xA7, 0x76, 0xFD, 0xD8, 0xE3, 0x89,
0x4F, 0x54, 0x05, 0xC1, 0x3B, 0xFD, 0x86, 0xCF, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00}
crypter.Length = 128
crypter.EncryptMetadata = true
crypter := PdfCrypt{
encrypt: encryptDict{
V: 2,
Length: 128,
},
encryptStd: security.StdEncryptDict{
R: 3,
P: 0xfffff0c0,
EncryptMetadata: true,
O: []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB},
U: []byte{0xED, 0x5B, 0xA7, 0x76, 0xFD, 0xD8, 0xE3, 0x89,
0x4F, 0x54, 0x05, 0xC1, 0x3B, 0xFD, 0x86, 0xCF, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00},
},
id0: string([]byte{0x5f, 0x91, 0xff, 0xf2, 0x00, 0x88, 0x13,
0x5f, 0x30, 0x24, 0xd1, 0x0f, 0x28, 0x31, 0xc6, 0xfa}),
// Default algorithm is V2 (RC4).
cryptFilters: newCryptFiltersV2(128),
decryptedObjects: make(map[PdfObject]bool),
}
streamData := []byte{0xBC, 0x89, 0x86, 0x8B, 0x3E, 0xCF, 0x24, 0x1C,
0xC4, 0x88, 0xF3, 0x60, 0x74, 0x8A, 0x22, 0xE3, 0xAD, 0xF4, 0x48,
@ -215,117 +95,3 @@ func TestDecryption1(t *testing.T) {
return
}
}
func BenchmarkAlg2b(b *testing.B) {
// hash runs a variable number of rounds, so we need to have a
// deterministic random source to make benchmark results comparable
r := rand.New(rand.NewSource(1234567))
const n = 20
pass := make([]byte, n)
r.Read(pass)
data := make([]byte, n+8+48)
r.Read(data)
user := make([]byte, 48)
r.Read(user)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = alg2b(data, pass, user)
}
}
func TestAESv3(t *testing.T) {
const keySize = 32
seed := time.Now().UnixNano()
rand := rand.New(rand.NewSource(seed))
var cases = []struct {
Name string
EncMeta bool
UserPass string
OwnerPass string
}{
{
Name: "simple", EncMeta: true,
UserPass: "user", OwnerPass: "owner",
},
{
Name: "utf8", EncMeta: false,
UserPass: "æøå-u", OwnerPass: "æøå-o",
},
{
Name: "long", EncMeta: true,
UserPass: strings.Repeat("user", 80),
OwnerPass: strings.Repeat("owner", 80),
},
}
const (
perms = 0x12345678
)
for _, R := range []int{5, 6} {
R := R
t.Run(fmt.Sprintf("R=%d", R), func(t *testing.T) {
for _, c := range cases {
c := c
t.Run(c.Name, func(t *testing.T) {
fkey := make([]byte, keySize)
rand.Read(fkey)
crypt := &PdfCrypt{
V: 5, R: R,
P: perms,
EncryptionKey: append([]byte{}, fkey...),
EncryptMetadata: c.EncMeta,
}
// generate encryption parameters
err := crypt.generateR6([]byte(c.UserPass), []byte(c.OwnerPass))
if err != nil {
t.Fatal("Failed to encrypt:", err)
}
// Perms and EncryptMetadata are checked as a part of alg2a
// decrypt using user password
crypt.EncryptionKey = nil
ok, err := crypt.alg2a([]byte(c.UserPass))
if err != nil || !ok {
t.Error("Failed to authenticate user pass:", err)
} else if !bytes.Equal(crypt.EncryptionKey, fkey) {
t.Error("wrong encryption key")
}
// decrypt using owner password
crypt.EncryptionKey = nil
ok, err = crypt.alg2a([]byte(c.OwnerPass))
if err != nil || !ok {
t.Error("Failed to authenticate owner pass:", err)
} else if !bytes.Equal(crypt.EncryptionKey, fkey) {
t.Error("wrong encryption key")
}
// try to elevate user permissions
crypt.P = math.MaxUint32
crypt.EncryptionKey = nil
ok, err = crypt.alg2a([]byte(c.UserPass))
if R == 5 {
// it's actually possible with R=5, since Perms is not generated
if err != nil || !ok {
t.Error("Failed to authenticate user pass:", err)
}
} else {
// not possible in R=6, should return an error
if err == nil || ok {
t.Error("was able to elevate permissions with R=6")
}
}
})
}
})
}
}

View File

@ -18,6 +18,7 @@ import (
"strings"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core/security"
)
// Regular Expressions for parsing and identifying object signatures.
@ -53,7 +54,7 @@ type PdfParser struct {
streamLengthReferenceLookupInProgress map[int64]bool
}
// Version holds the PDF version information for a file parsed by PdfParser
// Version represents a version of a PDF standard.
type Version struct {
Major int
Minor int
@ -76,7 +77,7 @@ func (parser *PdfParser) GetCrypter() *PdfCrypt {
// IsAuthenticated returns true if the PDF has already been authenticated for accessing.
func (parser *PdfParser) IsAuthenticated() bool {
return parser.crypter.Authenticated
return parser.crypter.authenticated
}
// GetTrailer returns the PDFs trailer dictionary. The trailer dictionary is typically the starting point for a PDF,
@ -1604,7 +1605,7 @@ func (parser *PdfParser) IsEncrypted() (bool, error) {
return false, fmt.Errorf("unsupported type: %T", e)
}
crypter, err := PdfCryptMakeNew(parser, dict, parser.trailer)
crypter, err := PdfCryptNewDecrypt(parser, dict, parser.trailer)
if err != nil {
return false, err
}
@ -1618,11 +1619,11 @@ func (parser *PdfParser) IsEncrypted() (bool, error) {
case *PdfObjectReference:
crypter.decryptedObjNum[int(f.ObjectNumber)] = struct{}{}
case *PdfIndirectObject:
crypter.DecryptedObjects[f] = true
crypter.decryptedObjects[f] = true
crypter.decryptedObjNum[int(f.ObjectNumber)] = struct{}{}
}
}
parser.crypter = &crypter
parser.crypter = crypter
common.Log.Trace("Crypter object %b", crypter)
return true, nil
}
@ -1642,6 +1643,7 @@ func (parser *PdfParser) Decrypt(password []byte) (bool, error) {
}
if !authenticated {
// TODO(dennwc): R6 handler will try it automatically, make R4 do the same
authenticated, err = parser.crypter.authenticate([]byte(""))
}
@ -1654,21 +1656,11 @@ func (parser *PdfParser) Decrypt(password []byte) (bool, error) {
// The bool flag indicates that the user can access and view the file.
// The AccessPermissions shows what access the user has for editing etc.
// An error is returned if there was a problem performing the authentication.
func (parser *PdfParser) CheckAccessRights(password []byte) (bool, AccessPermissions, error) {
func (parser *PdfParser) CheckAccessRights(password []byte) (bool, security.Permissions, error) {
// Also build the encryption/decryption key.
if parser.crypter == nil {
// If the crypter is not set, the file is not encrypted and we can assume full access permissions.
perms := AccessPermissions{}
perms.Printing = true
perms.Modify = true
perms.FillForms = true
perms.RotateInsert = true
perms.ExtractGraphics = true
perms.DisabilityExtract = true
perms.Annotate = true
perms.FullPrintQuality = true
return true, perms, nil
return true, security.PermOwner, nil
}
return parser.crypter.checkAccessRights(password)
}

View File

@ -78,6 +78,13 @@ type PdfObjectStream struct {
Stream []byte
}
// PdfObjectStreams represents the primitive PDF object streams.
// 7.5.7 Object Streams (page 45).
type PdfObjectStreams struct {
PdfObjectReference
vec []PdfObject
}
// MakeDict creates and returns an empty PdfObjectDictionary.
func MakeDict() *PdfObjectDictionary {
d := &PdfObjectDictionary{}
@ -219,6 +226,16 @@ func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error
return stream, nil
}
// MakeObjectStreams creates an PdfObjectStreams from a list of PdfObjects.
func MakeObjectStreams(objects ...PdfObject) *PdfObjectStreams {
streams := &PdfObjectStreams{}
streams.vec = []PdfObject{}
for _, obj := range objects {
streams.vec = append(streams.vec, obj)
}
return streams
}
// String returns the state of the bool as "true" or "false".
func (bool *PdfObjectBool) String() string {
if *bool {
@ -617,6 +634,16 @@ func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject {
return val
}
// GetString is a helper for Get that returns a string value.
// Returns false if the key is missing or a value is not a string.
func (d *PdfObjectDictionary) GetString(key PdfObjectName) (string, bool) {
val, ok := d.dict[key].(*PdfObjectString)
if !ok {
return "", false
}
return val.Str(), true
}
// Keys returns the list of keys in the dictionary.
func (d *PdfObjectDictionary) Keys() []PdfObjectName {
return d.keys
@ -896,3 +923,53 @@ func GetStream(obj PdfObject) (stream *PdfObjectStream, found bool) {
stream, found = obj.(*PdfObjectStream)
return stream, found
}
// GetObjectStreams returns the *PdfObjectStreams represented by the PdfObject. On type mismatch the found bool flag is
// false and a nil pointer is returned.
func GetObjectStreams(obj PdfObject) (objStream *PdfObjectStreams, found bool) {
objStream, found = obj.(*PdfObjectStreams)
return objStream, found
}
// Append appends PdfObject(s) to the streams.
func (streams *PdfObjectStreams) Append(objects ...PdfObject) {
if streams == nil {
common.Log.Debug("Warn - Attempt to append to a nil streams")
return
}
if streams.vec == nil {
streams.vec = []PdfObject{}
}
for _, obj := range objects {
streams.vec = append(streams.vec, obj)
}
}
// Elements returns a slice of the PdfObject elements in the array.
// Preferred over accessing the array directly as type may be changed in future major versions (v3).
func (streams *PdfObjectStreams) Elements() []PdfObject {
if streams == nil {
return nil
}
return streams.vec
}
// String returns a string describing `streams`.
func (streams *PdfObjectStreams) String() string {
return fmt.Sprintf("Object stream %d", streams.ObjectNumber)
}
// Len returns the number of elements in the streams.
func (streams *PdfObjectStreams) Len() int {
if streams == nil {
return 0
}
return len(streams.vec)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (streams *PdfObjectStreams) DefaultWriteString() string {
outStr := fmt.Sprintf("%d 0 R", (*streams).ObjectNumber)
return outStr
}

16
pdf/core/security/auth.go Normal file
View File

@ -0,0 +1,16 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
// AuthEvent is an event type that triggers authentication.
type AuthEvent string
const (
// EventDocOpen is an event triggered when opening the document.
EventDocOpen = AuthEvent("DocOpen")
// EventEFOpen is an event triggered when accessing an embedded file.
EventEFOpen = AuthEvent("EFOpen")
)

View File

@ -0,0 +1,61 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package crypt
import "fmt"
func init() {
registerFilter("AESV2", newFilterAESV2)
}
// NewFilterAESV2 creates an AES-based filter with a 128 bit key (AESV2).
func NewFilterAESV2() Filter {
f, err := newFilterAESV2(FilterDict{})
if err != nil {
panic(err)
}
return f
}
func newFilterAESV2(d FilterDict) (Filter, error) {
if d.Length != 0 && d.Length != 16 {
return nil, fmt.Errorf("Invalid AESV2 crypt filter length (%d)", d.Length)
}
return filterAESV2{}, nil
}
var _ Filter = filterAESV2{}
// filterAESV2 is an AES-based filter (128 bit key, PDF 1.6)
type filterAESV2 struct {
filterAES
}
// PDFVersion implements Filter interface.
func (filterAESV2) PDFVersion() [2]int {
return [2]int{1, 5}
}
// HandlerVersion implements Filter interface.
func (filterAESV2) HandlerVersion() (V, R int) {
V, R = 4, 4
return
}
// Name implements Filter interface.
func (filterAESV2) Name() string {
return "AESV2"
}
// KeyLength implements Filter interface.
func (filterAESV2) KeyLength() int {
return 128 / 8
}
// MakeKey implements Filter interface.
func (filterAESV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) {
return makeKeyV2(objNum, genNum, ekey, true)
}

View File

@ -0,0 +1,185 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package crypt
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"fmt"
"io"
"github.com/unidoc/unidoc/common"
)
func init() {
registerFilter("AESV3", newFilterAESV3)
}
// NewFilterAESV3 creates an AES-based filter with a 256 bit key (AESV3).
func NewFilterAESV3() Filter {
f, err := newFilterAESV3(FilterDict{})
if err != nil {
panic(err)
}
return f
}
func newFilterAESV3(d FilterDict) (Filter, error) {
if d.Length != 0 && d.Length != 32 {
return nil, fmt.Errorf("Invalid AESV3 crypt filter length (%d)", d.Length)
}
return filterAESV3{}, nil
}
// filterAES implements a generic AES encryption and decryption algorithm used by AESV2 and AESV3 filter methods.
type filterAES struct{}
func (filterAES) EncryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Strings and streams encrypted with AES shall use a padding
// scheme that is described in Internet RFC 2898, PKCS #5:
// Password-Based Cryptography Specification Version 2.0; see
// the Bibliography. For an original message length of M,
// the pad shall consist of 16 - (M mod 16) bytes whose value
// shall also be 16 - (M mod 16).
//
// A 9-byte message has a pad of 7 bytes, each with the value
// 0x07. The pad can be unambiguously removed to determine the
// original message length when decrypting. Note that the pad is
// present when M is evenly divisible by 16; it contains 16 bytes
// of 0x10.
ciph, err := aes.NewCipher(okey)
if err != nil {
return nil, err
}
common.Log.Trace("AES Encrypt (%d): % x", len(buf), buf)
// If using the AES algorithm, the Cipher Block Chaining (CBC)
// mode, which requires an initialization vector, is used. The
// block size parameter is set to 16 bytes, and the initialization
// vector is a 16-byte random number that is stored as the first
// 16 bytes of the encrypted stream or string.
const block = aes.BlockSize // 16
pad := block - len(buf)%block
for i := 0; i < pad; i++ {
buf = append(buf, byte(pad))
}
common.Log.Trace("Padded to %d bytes", len(buf))
// Generate random 16 bytes, place in beginning of buffer.
ciphertext := make([]byte, block+len(buf))
iv := ciphertext[:block]
if _, err := io.ReadFull(rand.Reader, iv); err != nil {
return nil, err
}
mode := cipher.NewCBCEncrypter(ciph, iv)
mode.CryptBlocks(ciphertext[block:], buf)
buf = ciphertext
common.Log.Trace("to (%d): % x", len(buf), buf)
return buf, nil
}
func (filterAES) DecryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Strings and streams encrypted with AES shall use a padding
// scheme that is described in Internet RFC 2898, PKCS #5:
// Password-Based Cryptography Specification Version 2.0; see
// the Bibliography. For an original message length of M,
// the pad shall consist of 16 - (M mod 16) bytes whose value
// shall also be 16 - (M mod 16).
//
// A 9-byte message has a pad of 7 bytes, each with the value
// 0x07. The pad can be unambiguously removed to determine the
// original message length when decrypting. Note that the pad is
// present when M is evenly divisible by 16; it contains 16 bytes
// of 0x10.
ciph, err := aes.NewCipher(okey)
if err != nil {
return nil, err
}
// If using the AES algorithm, the Cipher Block Chaining (CBC)
// mode, which requires an initialization vector, is used. The
// block size parameter is set to 16 bytes, and the initialization
// vector is a 16-byte random number that is stored as the first
// 16 bytes of the encrypted stream or string.
if len(buf) < 16 {
common.Log.Debug("ERROR AES invalid buf %s", buf)
return buf, fmt.Errorf("AES: Buf len < 16 (%d)", len(buf))
}
iv := buf[:16]
buf = buf[16:]
if len(buf)%16 != 0 {
common.Log.Debug(" iv (%d): % x", len(iv), iv)
common.Log.Debug("buf (%d): % x", len(buf), buf)
return buf, fmt.Errorf("AES buf length not multiple of 16 (%d)", len(buf))
}
mode := cipher.NewCBCDecrypter(ciph, iv)
common.Log.Trace("AES Decrypt (%d): % x", len(buf), buf)
common.Log.Trace("chop AES Decrypt (%d): % x", len(buf), buf)
mode.CryptBlocks(buf, buf)
common.Log.Trace("to (%d): % x", len(buf), buf)
if len(buf) == 0 {
common.Log.Trace("Empty buf, returning empty string")
return buf, nil
}
// The padded length is indicated by the last values. Remove those.
padLen := int(buf[len(buf)-1])
if padLen >= len(buf) {
common.Log.Debug("Illegal pad length")
return buf, fmt.Errorf("Invalid pad length")
}
buf = buf[:len(buf)-padLen]
return buf, nil
}
var _ Filter = filterAESV3{}
// filterAESV3 is an AES-based filter (256 bit key, PDF 2.0)
type filterAESV3 struct {
filterAES
}
// PDFVersion implements Filter interface.
func (filterAESV3) PDFVersion() [2]int {
return [2]int{2, 0}
}
// HandlerVersion implements Filter interface.
func (filterAESV3) HandlerVersion() (V, R int) {
V, R = 5, 6
return
}
// Name implements Filter interface.
func (filterAESV3) Name() string {
return "AESV3"
}
// KeyLength implements Filter interface.
func (filterAESV3) KeyLength() int {
return 256 / 8
}
// MakeKey implements Filter interface.
func (filterAESV3) MakeKey(_, _ uint32, ekey []byte) ([]byte, error) {
return ekey, nil // document encryption key == object encryption key
}

View File

@ -0,0 +1,140 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package crypt
import (
"crypto/md5"
"crypto/rc4"
"fmt"
"github.com/unidoc/unidoc/common"
)
func init() {
registerFilter("V2", newFilterV2)
}
// NewFilterV2 creates a RC4-based filter with a specified key length (in bytes).
func NewFilterV2(length int) Filter {
f, err := newFilterV2(FilterDict{Length: length})
if err != nil {
panic(err)
}
return f
}
// newFilterV2 creates a RC4-based filter from a Filter dictionary.
func newFilterV2(d FilterDict) (Filter, error) {
if d.Length%8 != 0 {
return nil, fmt.Errorf("Crypt filter length not multiple of 8 (%d)", d.Length)
}
// Standard security handler expresses the length in multiples of 8 (16 means 128)
// We only deal with standard so far. (Public key not supported yet).
if d.Length < 5 || d.Length > 16 {
if d.Length == 40 || d.Length == 64 || d.Length == 128 {
common.Log.Debug("STANDARD VIOLATION: Crypt Length appears to be in bits rather than bytes - assuming bits (%d)", d.Length)
d.Length /= 8
} else {
return nil, fmt.Errorf("Crypt filter length not in range 40 - 128 bit (%d)", d.Length)
}
}
return filterV2{length: d.Length}, nil
}
// makeKeyV2 is a common object key generation shared by V2 and AESV2 crypt filters.
func makeKeyV2(objNum, genNum uint32, ekey []byte, isAES bool) ([]byte, error) {
key := make([]byte, len(ekey)+5)
for i := 0; i < len(ekey); i++ {
key[i] = ekey[i]
}
for i := 0; i < 3; i++ {
b := byte((objNum >> uint32(8*i)) & 0xff)
key[i+len(ekey)] = b
}
for i := 0; i < 2; i++ {
b := byte((genNum >> uint32(8*i)) & 0xff)
key[i+len(ekey)+3] = b
}
if isAES {
// If using the AES algorithm, extend the encryption key an
// additional 4 bytes by adding the value “sAlT”, which
// corresponds to the hexadecimal values 0x73, 0x41, 0x6C, 0x54.
key = append(key, 0x73)
key = append(key, 0x41)
key = append(key, 0x6C)
key = append(key, 0x54)
}
// Take the MD5.
h := md5.New()
h.Write(key)
hashb := h.Sum(nil)
if len(ekey)+5 < 16 {
return hashb[0 : len(ekey)+5], nil
}
return hashb, nil
}
var _ Filter = filterV2{}
// filterV2 is a RC4-based filter
type filterV2 struct {
length int
}
// PDFVersion implements Filter interface.
func (f filterV2) PDFVersion() [2]int {
return [2]int{} // TODO(dennwc): unspecified; check what it should be
}
// HandlerVersion implements Filter interface.
func (f filterV2) HandlerVersion() (V, R int) {
V, R = 2, 3
return
}
// Name implements Filter interface.
func (filterV2) Name() string {
return "V2"
}
// KeyLength implements Filter interface.
func (f filterV2) KeyLength() int {
return f.length
}
// MakeKey implements Filter interface.
func (f filterV2) MakeKey(objNum, genNum uint32, ekey []byte) ([]byte, error) {
return makeKeyV2(objNum, genNum, ekey, false)
}
// EncryptBytes implements Filter interface.
func (filterV2) EncryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Standard RC4 algorithm.
ciph, err := rc4.NewCipher(okey)
if err != nil {
return nil, err
}
common.Log.Trace("RC4 Encrypt: % x", buf)
ciph.XORKeyStream(buf, buf)
common.Log.Trace("to: % x", buf)
return buf, nil
}
// DecryptBytes implements Filter interface.
func (filterV2) DecryptBytes(buf []byte, okey []byte) ([]byte, error) {
// Standard RC4 algorithm.
ciph, err := rc4.NewCipher(okey)
if err != nil {
return nil, err
}
common.Log.Trace("RC4 Decrypt: % x", buf)
ciph.XORKeyStream(buf, buf)
common.Log.Trace("to: % x", buf)
return buf, nil
}

View File

@ -0,0 +1,113 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package crypt
import (
"fmt"
"github.com/unidoc/unidoc/pdf/core/security"
)
var (
filterMethods = make(map[string]filterFunc)
)
// filterFunc is used to construct crypt filters from CryptFilter dictionary
type filterFunc func(d FilterDict) (Filter, error)
// Filter is a common interface for crypt filter methods.
type Filter interface {
// Name returns a name of the filter that should be used in CFM field of Encrypt dictionary.
Name() string
// KeyLength returns a length of the encryption key in bytes.
KeyLength() int
// PDFVersion reports the minimal version of PDF document that introduced this filter.
PDFVersion() [2]int
// HandlerVersion reports V and R parameters that should be used for this filter.
HandlerVersion() (V, R int)
// MakeKey generates a object encryption key based on file encryption key and object numbers.
// Used only for legacy filters - AESV3 doesn't change the key for each object.
MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error)
// EncryptBytes encrypts a buffer using object encryption key, as returned by MakeKey.
// Implementation may reuse a buffer and encrypt data in-place.
EncryptBytes(p []byte, okey []byte) ([]byte, error)
// DecryptBytes decrypts a buffer using object encryption key, as returned by MakeKey.
// Implementation may reuse a buffer and decrypt data in-place.
DecryptBytes(p []byte, okey []byte) ([]byte, error)
}
// NewFilter creates CryptFilter from a corresponding dictionary.
func NewFilter(d FilterDict) (Filter, error) {
fnc, err := getFilter(d.CFM)
if err != nil {
return nil, err
}
cf, err := fnc(d)
if err != nil {
return nil, err
}
return cf, nil
}
// NewIdentity creates an identity filter that bypasses all data without changes.
func NewIdentity() Filter {
return filterIdentity{}
}
// FilterDict represents information from a CryptFilter dictionary.
type FilterDict struct {
CFM string // The method used, if any, by the PDF reader to decrypt data.
AuthEvent security.AuthEvent
Length int // in bytes
}
// registerFilter register supported crypt filter methods.
// Table 25, CFM (page 92)
func registerFilter(name string, fnc filterFunc) {
if _, ok := filterMethods[name]; ok {
panic("already registered")
}
filterMethods[name] = fnc
}
// getFilter check if a CFM with a specified name is supported an returns its implementation.
func getFilter(name string) (filterFunc, error) {
f := filterMethods[string(name)]
if f == nil {
return nil, fmt.Errorf("unsupported crypt filter: %q", name)
}
return f, nil
}
type filterIdentity struct{}
func (filterIdentity) PDFVersion() [2]int {
return [2]int{}
}
func (filterIdentity) HandlerVersion() (V, R int) {
return
}
func (filterIdentity) Name() string {
return "Identity"
}
func (filterIdentity) KeyLength() int {
return 0
}
func (filterIdentity) MakeKey(objNum, genNum uint32, fkey []byte) ([]byte, error) {
return fkey, nil
}
func (filterIdentity) EncryptBytes(p []byte, okey []byte) ([]byte, error) {
return p, nil
}
func (filterIdentity) DecryptBytes(p []byte, okey []byte) ([]byte, error) {
return p, nil
}

View File

@ -3,7 +3,7 @@
* file 'LICENSE.md', which is part of this source code package.
*/
package core
package security
import "crypto/cipher"

View File

@ -0,0 +1,32 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
// StdHandler is an interface for standard security handlers.
type StdHandler interface {
// GenerateParams uses owner and user passwords to set encryption parameters and generate an encryption key.
// It assumes that R, P and EncryptMetadata are already set.
GenerateParams(d *StdEncryptDict, ownerPass, userPass []byte) ([]byte, error)
// Authenticate uses encryption dictionary parameters and the password to calculate
// the document encryption key. It also returns permissions that should be granted to a user.
// In case of failed authentication, it returns empty key and zero permissions with no error.
Authenticate(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error)
}
// StdEncryptDict is a set of additional fields used in standard encryption dictionary.
type StdEncryptDict struct {
R int // (Required) A number specifying which revision of the standard security handler shall be used.
P Permissions
EncryptMetadata bool // Indicates whether the document-level metadata stream shall be encrypted.
// set by security handlers:
O, U []byte
OE, UE []byte // R=6
Perms []byte // An encrypted copy of P (16 bytes). Used to verify permissions. R=6
}

View File

@ -0,0 +1,38 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
import "math"
// Permissions is a bitmask of access permissions for a PDF file.
type Permissions uint32
const (
// PermOwner grants all permissions.
PermOwner = Permissions(math.MaxUint32)
// PermPrinting allows printing the document with a low quality.
PermPrinting = Permissions(1 << 2)
// PermModify allows to modify the document.
PermModify = Permissions(1 << 3)
// PermExtractGraphics allows to extract graphics from the document.
PermExtractGraphics = Permissions(1 << 4)
// PermAnnotate allows annotating the document.
PermAnnotate = Permissions(1 << 5)
// PermFillForms allow form filling, if annotation is disabled? If annotation enabled, is not looked at.
PermFillForms = Permissions(1 << 8)
// PermDisabilityExtract allows to extract graphics in accessibility mode.
PermDisabilityExtract = Permissions(1 << 9)
// PermRotateInsert allows rotating, editing page order.
PermRotateInsert = Permissions(1 << 10)
// PermFullPrintQuality limits print quality (lowres), assuming Printing bit is set.
PermFullPrintQuality = Permissions(1 << 11)
)
// Allowed checks if a set of permissions can be granted.
func (p Permissions) Allowed(p2 Permissions) bool {
return p&p2 == p2
}

View File

@ -0,0 +1,356 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
import (
"bytes"
"crypto/md5"
"crypto/rand"
"crypto/rc4"
"errors"
"github.com/unidoc/unidoc/common"
)
var _ StdHandler = stdHandlerR4{}
const padding = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF" +
"\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C" +
"\xA9\xFE\x64\x53\x69\x7A"
// NewHandlerR4 creates a new standard security handler for R<=4.
func NewHandlerR4(id0 string, length int) StdHandler {
return stdHandlerR4{ID0: id0, Length: length}
}
// stdHandlerR4 is a standard security handler for R<=4.
// It uses RC4 and MD5 to generate encryption parameters.
// This legacy handler also requires Length parameter from
// Encrypt dictionary and ID0 from the trailer.
type stdHandlerR4 struct {
Length int
ID0 string
}
func (sh stdHandlerR4) paddedPass(pass []byte) []byte {
key := make([]byte, 32)
if len(pass) >= 32 {
for i := 0; i < 32; i++ {
key[i] = pass[i]
}
} else {
for i := 0; i < len(pass); i++ {
key[i] = pass[i]
}
for i := len(pass); i < 32; i++ {
key[i] = padding[i-len(pass)]
}
}
return key
}
// alg2 computes an encryption key.
func (sh stdHandlerR4) alg2(d *StdEncryptDict, pass []byte) []byte {
common.Log.Trace("alg2")
key := sh.paddedPass(pass)
h := md5.New()
h.Write(key)
// Pass O.
h.Write(d.O)
// Pass P (Lower order byte first).
var p = uint32(d.P)
var pb []byte
for i := 0; i < 4; i++ {
pb = append(pb, byte(((p >> uint(8*i)) & 0xff)))
}
h.Write(pb)
common.Log.Trace("go P: % x", pb)
// Pass ID[0] from the trailer
h.Write([]byte(sh.ID0))
common.Log.Trace("this.R = %d encryptMetadata %v", d.R, d.EncryptMetadata)
if (d.R >= 4) && !d.EncryptMetadata {
h.Write([]byte{0xff, 0xff, 0xff, 0xff})
}
hashb := h.Sum(nil)
if d.R >= 3 {
for i := 0; i < 50; i++ {
h = md5.New()
h.Write(hashb[0 : sh.Length/8])
hashb = h.Sum(nil)
}
}
if d.R >= 3 {
return hashb[0 : sh.Length/8]
}
return hashb[0:5]
}
// Create the RC4 encryption key.
func (sh stdHandlerR4) alg3Key(R int, pass []byte) []byte {
h := md5.New()
okey := sh.paddedPass(pass)
h.Write(okey)
if R >= 3 {
for i := 0; i < 50; i++ {
hashb := h.Sum(nil)
h = md5.New()
h.Write(hashb)
}
}
encKey := h.Sum(nil)
if R == 2 {
encKey = encKey[0:5]
} else {
encKey = encKey[0 : sh.Length/8]
}
return encKey
}
// alg3 computes the encryption dictionarys O (owner password) value.
func (sh stdHandlerR4) alg3(R int, upass, opass []byte) ([]byte, error) {
var encKey []byte
if len(opass) > 0 {
encKey = sh.alg3Key(R, opass)
} else {
encKey = sh.alg3Key(R, upass)
}
ociph, err := rc4.NewCipher(encKey)
if err != nil {
return nil, errors.New("Failed rc4 ciph")
}
ukey := sh.paddedPass(upass)
encrypted := make([]byte, len(ukey))
ociph.XORKeyStream(encrypted, ukey)
if R >= 3 {
encKey2 := make([]byte, len(encKey))
for i := 0; i < 19; i++ {
for j := 0; j < len(encKey); j++ {
encKey2[j] = encKey[j] ^ byte(i+1)
}
ciph, err := rc4.NewCipher(encKey2)
if err != nil {
return nil, errors.New("Failed rc4 ciph")
}
ciph.XORKeyStream(encrypted, encrypted)
}
}
return encrypted, nil
}
// alg4 computes the encryption dictionarys U (user password) value (Security handlers of revision 2).
func (sh stdHandlerR4) alg4(ekey []byte, upass []byte) ([]byte, error) {
ciph, err := rc4.NewCipher(ekey)
if err != nil {
return nil, errors.New("Failed rc4 ciph")
}
s := []byte(padding)
encrypted := make([]byte, len(s))
ciph.XORKeyStream(encrypted, s)
return encrypted, nil
}
// alg5 computes the encryption dictionarys U (user password) value (Security handlers of revision 3 or greater).
func (sh stdHandlerR4) alg5(ekey []byte, upass []byte) ([]byte, error) {
h := md5.New()
h.Write([]byte(padding))
h.Write([]byte(sh.ID0))
hash := h.Sum(nil)
common.Log.Trace("alg5")
common.Log.Trace("ekey: % x", ekey)
common.Log.Trace("ID: % x", sh.ID0)
if len(hash) != 16 {
return nil, errors.New("Hash length not 16 bytes")
}
ciph, err := rc4.NewCipher(ekey)
if err != nil {
return nil, errors.New("Failed rc4 ciph")
}
encrypted := make([]byte, 16)
ciph.XORKeyStream(encrypted, hash)
// Do the following 19 times: Take the output from the previous
// invocation of the RC4 function and pass it as input to a new
// invocation of the function; use an encryption key generated by
// taking each byte of the original encryption key obtained in step
// (a) and performing an XOR (exclusive or) operation between that
// byte and the single-byte value of the iteration counter (from 1 to 19).
ekey2 := make([]byte, len(ekey))
for i := 0; i < 19; i++ {
for j := 0; j < len(ekey); j++ {
ekey2[j] = ekey[j] ^ byte(i+1)
}
ciph, err = rc4.NewCipher(ekey2)
if err != nil {
return nil, errors.New("Failed rc4 ciph")
}
ciph.XORKeyStream(encrypted, encrypted)
common.Log.Trace("i = %d, ekey: % x", i, ekey2)
common.Log.Trace("i = %d -> % x", i, encrypted)
}
bb := make([]byte, 32)
for i := 0; i < 16; i++ {
bb[i] = encrypted[i]
}
// Append 16 bytes of arbitrary padding to the output from the final
// invocation of the RC4 function and store the 32-byte result as
// the value of the U entry in the encryption dictionary.
_, err = rand.Read(bb[16:32])
if err != nil {
return nil, errors.New("Failed to gen rand number")
}
return bb, nil
}
// alg6 authenticates the user password and returns the document encryption key.
// It returns an nil key in case authentication failed.
func (sh stdHandlerR4) alg6(d *StdEncryptDict, upass []byte) ([]byte, error) {
var (
uo []byte
err error
)
ekey := sh.alg2(d, upass)
if d.R == 2 {
uo, err = sh.alg4(ekey, upass)
} else if d.R >= 3 {
uo, err = sh.alg5(ekey, upass)
} else {
return nil, errors.New("invalid R")
}
if err != nil {
return nil, err
}
common.Log.Trace("check: % x == % x ?", string(uo), string(d.U))
uGen := uo // Generated U from specified pass.
uDoc := d.U // U from the document.
if d.R >= 3 {
// comparing on the first 16 bytes in the case of security
// handlers of revision 3 or greater),
if len(uGen) > 16 {
uGen = uGen[0:16]
}
if len(uDoc) > 16 {
uDoc = uDoc[0:16]
}
}
if !bytes.Equal(uGen, uDoc) {
return nil, nil
}
return ekey, nil
}
// alg7 authenticates the owner password and returns the document encryption key.
// It returns an nil key in case authentication failed.
func (sh stdHandlerR4) alg7(d *StdEncryptDict, opass []byte) ([]byte, error) {
encKey := sh.alg3Key(d.R, opass)
decrypted := make([]byte, len(d.O))
if d.R == 2 {
ciph, err := rc4.NewCipher(encKey)
if err != nil {
return nil, errors.New("Failed cipher")
}
ciph.XORKeyStream(decrypted, d.O)
} else if d.R >= 3 {
s := append([]byte{}, d.O...)
for i := 0; i < 20; i++ {
//newKey := encKey
newKey := append([]byte{}, encKey...)
for j := 0; j < len(encKey); j++ {
newKey[j] ^= byte(19 - i)
}
ciph, err := rc4.NewCipher(newKey)
if err != nil {
return nil, errors.New("Failed cipher")
}
ciph.XORKeyStream(decrypted, s)
s = append([]byte{}, decrypted...)
}
} else {
return nil, errors.New("invalid R")
}
ekey, err := sh.alg6(d, decrypted)
if err != nil {
// TODO(dennwc): this doesn't look right, but it was in the old code
return nil, nil
}
return ekey, nil
}
// GenerateParams generates and sets O and U parameters for the encryption dictionary.
// It expects R, P and EncryptMetadata fields to be set.
func (sh stdHandlerR4) GenerateParams(d *StdEncryptDict, opass, upass []byte) ([]byte, error) {
// Make the O and U objects.
O, err := sh.alg3(d.R, upass, opass)
if err != nil {
common.Log.Debug("ERROR: Error generating O for encryption (%s)", err)
return nil, err
}
d.O = O
common.Log.Trace("gen O: % x", O)
// requires O
ekey := sh.alg2(d, upass)
U, err := sh.alg5(ekey, upass)
if err != nil {
common.Log.Debug("ERROR: Error generating O for encryption (%s)", err)
return nil, err
}
d.U = U
common.Log.Trace("gen U: % x", U)
return ekey, nil
}
// Authenticate implements StdHandler interface.
func (sh stdHandlerR4) Authenticate(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) {
// Try owner password.
// May not be necessary if only want to get all contents.
// (user pass needs to be known or empty).
common.Log.Trace("Debugging authentication - owner pass")
ekey, err := sh.alg7(d, pass)
if err != nil {
return nil, 0, err
}
if ekey != nil {
common.Log.Trace("this.authenticated = True")
return ekey, PermOwner, nil
}
// Try user password.
common.Log.Trace("Debugging authentication - user pass")
ekey, err = sh.alg6(d, pass)
if err != nil {
return nil, 0, err
}
if ekey != nil {
common.Log.Trace("this.authenticated = True")
return ekey, d.P, nil
}
// Cannot even view the file.
return nil, 0, nil
}

View File

@ -0,0 +1,139 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
import (
"github.com/unidoc/unidoc/common"
"testing"
)
func init() {
common.SetLogger(common.ConsoleLogger{})
}
func TestR4Padding(t *testing.T) {
sh := stdHandlerR4{}
// Case 1 empty pass, should match padded string.
key := sh.paddedPass([]byte(""))
if len(key) != 32 {
t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key))
}
if key[0] != 0x28 {
t.Errorf("key[0] != 0x28 (%q in %q)", key[0], key)
}
if key[31] != 0x7A {
t.Errorf("key[31] != 0x7A (%q in %q)", key[31], key)
}
// Case 2, non empty pass.
key = sh.paddedPass([]byte("bla"))
if len(key) != 32 {
t.Errorf("Fail, expected padded pass length = 32 (%d)", len(key))
}
if string(key[0:3]) != "bla" {
t.Errorf("Expecting start with bla (%s)", key)
}
if key[3] != 0x28 {
t.Errorf("key[3] != 0x28 (%q in %q)", key[3], key)
}
if key[31] != 0x64 {
t.Errorf("key[31] != 0x64 (%q in %q)", key[31], key)
}
}
// Test algorithm 2.
func TestAlg2(t *testing.T) {
sh := stdHandlerR4{
// V: 2,
ID0: string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24,
0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}),
Length: 128,
}
d := &StdEncryptDict{
R: 3,
P: 0xfffff0c0,
EncryptMetadata: true,
O: []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB},
}
key := sh.alg2(d, []byte(""))
keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca,
0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5}
if string(key) != string(keyExp) {
common.Log.Debug(" Key (%d): % x", len(key), key)
common.Log.Debug("KeyExp (%d): % x", len(keyExp), keyExp)
t.Errorf("alg2 -> key != expected\n")
}
}
// Test algorithm 3.
func TestAlg3(t *testing.T) {
sh := stdHandlerR4{
// V: 2,
ID0: string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24,
0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}),
Length: 128,
}
Oexp := []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x0d, 0x64, 0xA9, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB}
O, err := sh.alg3(3, []byte(""), []byte("test"))
if err != nil {
t.Errorf("crypt alg3 error %s", err)
return
}
if string(O) != string(Oexp) {
common.Log.Debug(" O (%d): % x", len(O), O)
common.Log.Debug("Oexp (%d): % x", len(Oexp), Oexp)
t.Errorf("alg3 -> key != expected")
}
}
// Test algorithm 5 for computing dictionary's U (user password) value
// valid for R >= 3.
func TestAlg5(t *testing.T) {
sh := stdHandlerR4{
// V: 2,
ID0: string([]byte{0x4e, 0x00, 0x99, 0xe5, 0x36, 0x78, 0x93, 0x24,
0xff, 0xd5, 0x82, 0xe4, 0xec, 0x0e, 0xa3, 0xb4}),
Length: 128,
}
d := &StdEncryptDict{
R: 3,
P: 0xfffff0c0,
EncryptMetadata: true,
O: []byte{0xE6, 0x00, 0xEC, 0xC2, 0x02, 0x88, 0xAD, 0x8B,
0x5C, 0x72, 0x64, 0xA9, 0x5C, 0x29, 0xC6, 0xA8, 0x3E, 0xE2, 0x51,
0x76, 0x79, 0xAA, 0x02, 0x18, 0xBE, 0xCE, 0xEA, 0x8B, 0x79, 0x86,
0x72, 0x6A, 0x8C, 0xDB},
}
ekey := sh.alg2(d, []byte(""))
U, err := sh.alg5(ekey, []byte(""))
if err != nil {
t.Errorf("Error %s", err)
return
}
Uexp := []byte{0x59, 0x66, 0x38, 0x6c, 0x76, 0xfe, 0x95, 0x7d, 0x3d,
0x0d, 0x14, 0x3d, 0x36, 0xfd, 0x01, 0x3d, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
if string(U[0:16]) != string(Uexp[0:16]) {
common.Log.Info(" U (%d): % x", len(U), U)
common.Log.Info("Uexp (%d): % x", len(Uexp), Uexp)
t.Errorf("U != expected\n")
}
}

View File

@ -0,0 +1,469 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
import (
"bytes"
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"crypto/sha256"
"crypto/sha512"
"encoding/binary"
"errors"
"hash"
"io"
"math"
)
var _ StdHandler = stdHandlerR6{}
// NewHandlerR6 creates a new standard security handler for R=5 and R=6.
func NewHandlerR6() StdHandler {
return stdHandlerR6{}
}
// stdHandlerR6 is an implementation of standard security handler with R=5 and R=6.
// Both revisions are expected to be used with AES encryption filters.
type stdHandlerR6 struct{}
// alg2a retrieves the encryption key from an encrypted document (R >= 5).
// 7.6.4.3.2 Algorithm 2.A (page 83)
func (sh stdHandlerR6) alg2a(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) {
// O & U: 32 byte hash + 8 byte Validation Salt + 8 byte Key Salt
// step a: Unicode normalization
// TODO(dennwc): make sure that UTF-8 strings are normalized
// step b: truncate to 127 bytes
if len(pass) > 127 {
pass = pass[:127]
}
// step c: test pass against the owner key
h, err := sh.alg12(d, pass)
if err != nil {
return nil, 0, err
}
var (
data []byte // data to hash
ekey []byte // encrypted file key
ukey []byte // user key; set only when using owner's password
)
var perm Permissions
if len(h) != 0 {
// owner password valid
perm = PermOwner
// step d: compute an intermediate owner key
str := make([]byte, len(pass)+8+48)
i := copy(str, pass)
i += copy(str[i:], d.O[40:48]) // owner Key Salt
i += copy(str[i:], d.U[0:48])
data = str
ekey = d.OE
ukey = d.U[0:48]
} else {
// check user password
h, err = sh.alg11(d, pass)
if err == nil && len(h) == 0 {
// try default password
h, err = sh.alg11(d, []byte(""))
}
if err != nil {
return nil, 0, err
} else if len(h) == 0 {
// wrong password
return nil, 0, nil
}
perm = d.P
// step e: compute an intermediate user key
str := make([]byte, len(pass)+8)
i := copy(str, pass)
i += copy(str[i:], d.U[40:48]) // user Key Salt
data = str
ekey = d.UE
ukey = nil
}
ekey = ekey[:32]
// intermediate key
ikey := sh.alg2b(d.R, data, pass, ukey)
ac, err := aes.NewCipher(ikey[:32])
if err != nil {
return nil, 0, err
}
iv := make([]byte, aes.BlockSize)
cbc := cipher.NewCBCDecrypter(ac, iv)
fkey := make([]byte, 32)
cbc.CryptBlocks(fkey, ekey)
if d.R == 5 {
return fkey, perm, nil
}
// validate permissions
err = sh.alg13(d, fkey)
if err != nil {
return nil, 0, err
}
return fkey, perm, nil
}
// alg2bR5 computes a hash for R=5, used in a deprecated extension.
// It's used the same way as a hash described in Algorithm 2.B, but it doesn't use the original password
// and the user key to calculate the hash.
func alg2bR5(data []byte) []byte {
h := sha256.New()
h.Write(data)
return h.Sum(nil)
}
// repeat repeats first n bytes of buf until the end of the buffer.
// It assumes that the length of buf is a multiple of n.
func repeat(buf []byte, n int) {
bp := n
for bp < len(buf) {
copy(buf[bp:], buf[:bp])
bp *= 2
}
}
// alg2b computes a hash for R=6.
// 7.6.4.3.3 Algorithm 2.B (page 83)
func alg2b(data, pwd, userKey []byte) []byte {
var (
s256, s384, s512 hash.Hash
)
s256 = sha256.New()
hbuf := make([]byte, 64)
h := s256
h.Write(data)
K := h.Sum(hbuf[:0])
buf := make([]byte, 64*(127+64+48))
round := func(rnd int) (E []byte) {
// step a: repeat pass+K 64 times
n := len(pwd) + len(K) + len(userKey)
part := buf[:n]
i := copy(part, pwd)
i += copy(part[i:], K[:])
i += copy(part[i:], userKey)
if i != n {
panic("wrong size")
}
K1 := buf[:n*64]
repeat(K1, n)
// step b: encrypt K1 with AES-128 CBC
ac, err := aes.NewCipher(K[0:16])
if err != nil {
panic(err)
}
cbc := cipher.NewCBCEncrypter(ac, K[16:32])
cbc.CryptBlocks(K1, K1)
E = K1
// step c: use 16 bytes of E as big-endian int, select the next hash
b := 0
for i := 0; i < 16; i++ {
b += int(E[i] % 3)
}
var h hash.Hash
switch b % 3 {
case 0:
h = s256
case 1:
if s384 == nil {
s384 = sha512.New384()
}
h = s384
case 2:
if s512 == nil {
s512 = sha512.New()
}
h = s512
}
// step d: take the hash of E, use as a new K
h.Reset()
h.Write(E)
K = h.Sum(hbuf[:0])
return E
}
for i := 0; ; {
E := round(i)
b := uint8(E[len(E)-1])
// from the spec, it appears that i should be incremented after
// the test, but that doesn't match what Adobe does
i++
if i >= 64 && b <= uint8(i-32) {
break
}
}
return K[:32]
}
// alg2b computes a hash for R=5 and R=6.
func (sh stdHandlerR6) alg2b(R int, data, pwd, userKey []byte) []byte {
if R == 5 {
return alg2bR5(data)
}
return alg2b(data, pwd, userKey)
}
// alg8 computes the encryption dictionary's U (user password) and UE (user encryption) values (R>=5).
// 7.6.4.4.6 Algorithm 8 (page 86)
func (sh stdHandlerR6) alg8(d *StdEncryptDict, ekey []byte, upass []byte) error {
// step a: compute U (user password)
var rbuf [16]byte
if _, err := io.ReadFull(rand.Reader, rbuf[:]); err != nil {
return err
}
valSalt := rbuf[0:8]
keySalt := rbuf[8:16]
str := make([]byte, len(upass)+len(valSalt))
i := copy(str, upass)
i += copy(str[i:], valSalt)
h := sh.alg2b(d.R, str, upass, nil)
U := make([]byte, len(h)+len(valSalt)+len(keySalt))
i = copy(U, h[:32])
i += copy(U[i:], valSalt)
i += copy(U[i:], keySalt)
d.U = U
// step b: compute UE (user encryption)
// str still contains a password, reuse it
i = len(upass)
i += copy(str[i:], keySalt)
h = sh.alg2b(d.R, str, upass, nil)
ac, err := aes.NewCipher(h[:32])
if err != nil {
panic(err)
}
iv := make([]byte, aes.BlockSize)
cbc := cipher.NewCBCEncrypter(ac, iv)
UE := make([]byte, 32)
cbc.CryptBlocks(UE, ekey[:32])
d.UE = UE
return nil
}
// alg9 computes the encryption dictionary's O (owner password) and OE (owner encryption) values (R>=5).
// 7.6.4.4.7 Algorithm 9 (page 86)
func (sh stdHandlerR6) alg9(d *StdEncryptDict, ekey []byte, opass []byte) error {
// step a: compute O (owner password)
var rbuf [16]byte
if _, err := io.ReadFull(rand.Reader, rbuf[:]); err != nil {
return err
}
valSalt := rbuf[0:8]
keySalt := rbuf[8:16]
userKey := d.U[:48]
str := make([]byte, len(opass)+len(valSalt)+len(userKey))
i := copy(str, opass)
i += copy(str[i:], valSalt)
i += copy(str[i:], userKey)
h := sh.alg2b(d.R, str, opass, userKey)
O := make([]byte, len(h)+len(valSalt)+len(keySalt))
i = copy(O, h[:32])
i += copy(O[i:], valSalt)
i += copy(O[i:], keySalt)
d.O = O
// step b: compute OE (owner encryption)
// str still contains a password and a user key - reuse both, but overwrite the salt
i = len(opass)
i += copy(str[i:], keySalt)
// i += len(userKey)
h = sh.alg2b(d.R, str, opass, userKey)
ac, err := aes.NewCipher(h[:32])
if err != nil {
panic(err)
}
iv := make([]byte, aes.BlockSize)
cbc := cipher.NewCBCEncrypter(ac, iv)
OE := make([]byte, 32)
cbc.CryptBlocks(OE, ekey[:32])
d.OE = OE
return nil
}
// alg10 computes the encryption dictionary's Perms (permissions) value (R=6).
// 7.6.4.4.8 Algorithm 10 (page 87)
func (sh stdHandlerR6) alg10(d *StdEncryptDict, ekey []byte) error {
// step a: extend permissions to 64 bits
perms := uint64(uint32(d.P)) | (math.MaxUint32 << 32)
// step b: record permissions
Perms := make([]byte, 16)
binary.LittleEndian.PutUint64(Perms[:8], perms)
// step c: record EncryptMetadata
if d.EncryptMetadata {
Perms[8] = 'T'
} else {
Perms[8] = 'F'
}
// step d: write "adb" magic
copy(Perms[9:12], "adb")
// step e: write 4 bytes of random data
// spec doesn't specify them as generated "from a strong random source",
// but we will use the cryptographic random generator anyway
if _, err := io.ReadFull(rand.Reader, Perms[12:16]); err != nil {
return err
}
// step f: encrypt permissions
ac, err := aes.NewCipher(ekey[:32])
if err != nil {
panic(err)
}
ecb := newECBEncrypter(ac)
ecb.CryptBlocks(Perms, Perms)
d.Perms = Perms[:16]
return nil
}
// alg11 authenticates the user password (R >= 5) and returns the hash.
func (sh stdHandlerR6) alg11(d *StdEncryptDict, upass []byte) ([]byte, error) {
str := make([]byte, len(upass)+8)
i := copy(str, upass)
i += copy(str[i:], d.U[32:40]) // user Validation Salt
h := sh.alg2b(d.R, str, upass, nil)
h = h[:32]
if !bytes.Equal(h, d.U[:32]) {
return nil, nil
}
return h, nil
}
// alg12 authenticates the owner password (R >= 5) and returns the hash.
// 7.6.4.4.10 Algorithm 12 (page 87)
func (sh stdHandlerR6) alg12(d *StdEncryptDict, opass []byte) ([]byte, error) {
str := make([]byte, len(opass)+8+48)
i := copy(str, opass)
i += copy(str[i:], d.O[32:40]) // owner Validation Salt
i += copy(str[i:], d.U[0:48])
h := sh.alg2b(d.R, str, opass, d.U[0:48])
h = h[:32]
if !bytes.Equal(h, d.O[:32]) {
return nil, nil
}
return h, nil
}
// alg13 validates user permissions (P+EncryptMetadata vs Perms) for R=6.
// 7.6.4.4.11 Algorithm 13 (page 87)
func (sh stdHandlerR6) alg13(d *StdEncryptDict, fkey []byte) error {
perms := make([]byte, 16)
copy(perms, d.Perms[:16])
ac, err := aes.NewCipher(fkey[:32])
if err != nil {
panic(err)
}
ecb := newECBDecrypter(ac)
ecb.CryptBlocks(perms, perms)
if !bytes.Equal(perms[9:12], []byte("adb")) {
return errors.New("decoded permissions are invalid")
}
p := Permissions(binary.LittleEndian.Uint32(perms[0:4]))
if p != d.P {
return errors.New("permissions validation failed")
}
encMeta := true
if perms[8] == 'T' {
encMeta = true
} else if perms[8] == 'F' {
encMeta = false
} else {
return errors.New("decoded metadata encryption flag is invalid")
}
if encMeta != d.EncryptMetadata {
return errors.New("metadata encryption validation failed")
}
return nil
}
// GenerateParams is the algorithm opposite to alg2a (R>=5).
// It generates U,O,UE,OE,Perms fields using AESv3 encryption.
// There is no algorithm number assigned to this function in the spec.
// It expects R, P and EncryptMetadata fields to be set.
func (sh stdHandlerR6) GenerateParams(d *StdEncryptDict, opass, upass []byte) ([]byte, error) {
ekey := make([]byte, 32)
if _, err := io.ReadFull(rand.Reader, ekey); err != nil {
return nil, err
}
// all these field will be populated by functions below
d.U = nil
d.O = nil
d.UE = nil
d.OE = nil
d.Perms = nil // populated only for R=6
if len(upass) > 127 {
upass = upass[:127]
}
if len(opass) > 127 {
opass = opass[:127]
}
// generate U and UE
if err := sh.alg8(d, ekey, upass); err != nil {
return nil, err
}
// generate O and OE
if err := sh.alg9(d, ekey, opass); err != nil {
return nil, err
}
if d.R == 5 {
return ekey, nil
}
// generate Perms
if err := sh.alg10(d, ekey); err != nil {
return nil, err
}
return ekey, nil
}
// Authenticate implements StdHandler interface.
func (sh stdHandlerR6) Authenticate(d *StdEncryptDict, pass []byte) ([]byte, Permissions, error) {
return sh.alg2a(d, pass)
}

View File

@ -0,0 +1,116 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package security
import (
"bytes"
"fmt"
"math/rand"
"strings"
"testing"
)
func BenchmarkAlg2b(b *testing.B) {
// hash runs a variable number of rounds, so we need to have a
// deterministic random source to make benchmark results comparable
r := rand.New(rand.NewSource(1234567))
const n = 20
pass := make([]byte, n)
r.Read(pass)
data := make([]byte, n+8+48)
r.Read(data)
user := make([]byte, 48)
r.Read(user)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = alg2b(data, pass, user)
}
}
func TestStdHandlerR6(t *testing.T) {
var cases = []struct {
Name string
EncMeta bool
UserPass string
OwnerPass string
}{
{
Name: "simple", EncMeta: true,
UserPass: "user", OwnerPass: "owner",
},
{
Name: "utf8", EncMeta: false,
UserPass: "æøå-u", OwnerPass: "æøå-o",
},
{
Name: "long", EncMeta: true,
UserPass: strings.Repeat("user", 80),
OwnerPass: strings.Repeat("owner", 80),
},
}
const (
perms = 0x12345678
)
for _, R := range []int{5, 6} {
R := R
t.Run(fmt.Sprintf("R=%d", R), func(t *testing.T) {
for _, c := range cases {
c := c
t.Run(c.Name, func(t *testing.T) {
sh := stdHandlerR6{} // V=5
d := &StdEncryptDict{
R: R, P: perms,
EncryptMetadata: c.EncMeta,
}
// generate encryption parameters
ekey, err := sh.GenerateParams(d, []byte(c.OwnerPass), []byte(c.UserPass))
if err != nil {
t.Fatal("Failed to encrypt:", err)
}
// Perms and EncryptMetadata are checked as a part of alg2a
// decrypt using user password
key, uperm, err := sh.alg2a(d, []byte(c.UserPass))
if err != nil || uperm != perms {
t.Error("Failed to authenticate user pass:", err)
} else if !bytes.Equal(ekey, key) {
t.Error("wrong encryption key")
}
// decrypt using owner password
key, uperm, err = sh.alg2a(d, []byte(c.OwnerPass))
if err != nil || uperm != PermOwner {
t.Error("Failed to authenticate owner pass:", err, uperm)
} else if !bytes.Equal(ekey, key) {
t.Error("wrong encryption key")
}
// try to elevate user permissions
d.P = PermOwner
key, uperm, err = sh.alg2a(d, []byte(c.UserPass))
if R == 5 {
// it's actually possible with R=5, since Perms is not generated
if err != nil || uperm != PermOwner {
t.Error("Failed to authenticate user pass:", err)
}
} else {
// not possible in R=6, should return an error
if err == nil || uperm == PermOwner {
t.Error("was able to elevate permissions with R=6")
}
}
})
}
})
}
}

View File

@ -8,6 +8,7 @@ package creator
import (
"errors"
"fmt"
"strconv"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/model"
@ -40,7 +41,7 @@ type Chapter struct {
margins margins
// Reference to the creator's TOC.
toc *TableOfContents
toc *TOC
}
// NewChapter creates a new chapter with the specified title as the heading.
@ -149,7 +150,12 @@ func (chap *Chapter) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext,
if chap.includeInTOC {
// Add to TOC.
chap.toc.add(chap.title, chap.number, 0, ctx.Page)
chapNumber := ""
if chap.number != 0 {
chapNumber = strconv.Itoa(chap.number) + "."
}
chap.toc.Add(chapNumber, chap.title, strconv.Itoa(ctx.Page), 1)
}
for _, d := range chap.contents {

View File

@ -9,6 +9,7 @@ import (
"errors"
"io"
"os"
"strconv"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/model"
@ -33,17 +34,23 @@ type Creator struct {
// Hooks.
genFrontPageFunc func(args FrontpageFunctionArgs)
genTableOfContentFunc func(toc *TableOfContents) (*Chapter, error)
genTableOfContentFunc func(toc *TOC) error
drawHeaderFunc func(header *Block, args HeaderFunctionArgs)
drawFooterFunc func(footer *Block, args FooterFunctionArgs)
pdfWriterAccessFunc func(writer *model.PdfWriter) error
finalized bool
toc *TableOfContents
// The table of contents.
toc *TOC
// Controls whether a table of contents will be added.
AddTOC bool
// Forms.
acroForm *model.PdfAcroForm
optimizer model.Optimizer
}
// SetForms adds an Acroform to a PDF file. Sets the specified form for writing.
@ -96,11 +103,21 @@ func New() *Creator {
c.pageMargins.top = m
c.pageMargins.bottom = m
c.toc = newTableOfContents()
c.toc = NewTOC("Table of Contents")
return c
}
// SetOptimizer sets the optimizer to optimize PDF before writing.
func (c *Creator) SetOptimizer(optimizer model.Optimizer) {
c.optimizer = optimizer
}
// GetOptimizer returns current PDF optimizer.
func (c *Creator) GetOptimizer() model.Optimizer {
return c.optimizer
}
// SetPageMargins sets the page margins: left, right, top, bottom.
// The default page margins are 10% of document width.
func (c *Creator) SetPageMargins(left, right, top, bottom float64) {
@ -120,6 +137,21 @@ func (c *Creator) Height() float64 {
return c.pageHeight
}
// TOC returns the table of contents component of the creator.
func (c *Creator) TOC() *TOC {
return c.toc
}
// SetTOC sets the table of content component of the creator.
// This method should be used when building a custom table of contents.
func (c *Creator) SetTOC(toc *TOC) {
if toc == nil {
return
}
c.toc = toc
}
func (c *Creator) setActivePage(p *model.PdfPage) {
c.activePage = p
}
@ -182,7 +214,7 @@ func (c *Creator) CreateFrontPage(genFrontPageFunc func(args FrontpageFunctionAr
}
// CreateTableOfContents sets a function to generate table of contents.
func (c *Creator) CreateTableOfContents(genTOCFunc func(toc *TableOfContents) (*Chapter, error)) {
func (c *Creator) CreateTableOfContents(genTOCFunc func(toc *TOC) error) {
c.genTableOfContentFunc = genTOCFunc
}
@ -271,8 +303,8 @@ func (c *Creator) Context() DrawContext {
return c.context
}
// Call before writing out. Takes care of adding headers and footers, as well as generating front
// Page and table of contents.
// Call before writing out. Takes care of adding headers and footers, as well
// as generating front Page and table of contents.
func (c *Creator) finalize() error {
totPages := len(c.pages)
@ -281,16 +313,18 @@ func (c *Creator) finalize() error {
if c.genFrontPageFunc != nil {
genpages++
}
if c.genTableOfContentFunc != nil {
if c.AddTOC {
c.initContext()
c.context.Page = genpages + 1
ch, err := c.genTableOfContentFunc(c.toc)
if err != nil {
return err
if c.genTableOfContentFunc != nil {
if err := c.genTableOfContentFunc(c.toc); err != nil {
return err
}
}
// Make an estimate of the number of pages.
blocks, _, err := ch.GeneratePageBlocks(c.context)
blocks, _, err := c.toc.GeneratePageBlocks(c.context)
if err != nil {
common.Log.Debug("Failed to generate blocks: %v", err)
return err
@ -298,12 +332,15 @@ func (c *Creator) finalize() error {
genpages += len(blocks)
// Update the table of content Page numbers, accounting for front Page and TOC.
for idx := range c.toc.entries {
c.toc.entries[idx].PageNumber += genpages
}
lines := c.toc.Lines()
for _, line := range lines {
pageNum, err := strconv.Atoi(line.Page.Text)
if err != nil {
continue
}
// Remove the TOC chapter entry.
c.toc.entries = c.toc.entries[:len(c.toc.entries)-1]
line.Page.Text = strconv.Itoa(pageNum + genpages)
}
}
hasFrontPage := false
@ -323,17 +360,17 @@ func (c *Creator) finalize() error {
hasFrontPage = true
}
if c.genTableOfContentFunc != nil {
if c.AddTOC {
c.initContext()
ch, err := c.genTableOfContentFunc(c.toc)
if err != nil {
common.Log.Debug("Error generating TOC: %v", err)
return err
}
ch.SetShowNumbering(false)
ch.SetIncludeInTOC(false)
blocks, _, _ := ch.GeneratePageBlocks(c.context)
if c.genTableOfContentFunc != nil {
if err := c.genTableOfContentFunc(c.toc); err != nil {
common.Log.Debug("Error generating TOC: %v", err)
return err
}
}
blocks, _, _ := c.toc.GeneratePageBlocks(c.context)
tocpages := []*model.PdfPage{}
for _, block := range blocks {
block.SetPos(0, 0)
@ -459,13 +496,15 @@ func (c *Creator) Draw(d Drawable) error {
return nil
}
// Write output of creator to io.WriteSeeker interface.
func (c *Creator) Write(ws io.WriteSeeker) error {
// Write output of creator to io.Writer interface.
func (c *Creator) Write(ws io.Writer) error {
if !c.finalized {
c.finalize()
}
pdfWriter := model.NewPdfWriter()
pdfWriter.SetOptimizer(c.optimizer)
// Form fields.
if c.acroForm != nil {
err := pdfWriter.SetForms(c.acroForm)

View File

@ -10,10 +10,12 @@ package creator
// if every detail is correct.
import (
"bytes"
"fmt"
goimage "image"
"io/ioutil"
"math"
"os"
"testing"
"github.com/boombuler/barcode"
@ -23,6 +25,7 @@ import (
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/internal/textencoding"
"github.com/unidoc/unidoc/pdf/model"
"github.com/unidoc/unidoc/pdf/model/optimize"
)
func init() {
@ -757,6 +760,17 @@ func TestChapterMargins(t *testing.T) {
func TestSubchaptersSimple(t *testing.T) {
c := New()
// Enable table of contents and set the style of the lines.
c.AddTOC = true
lineStyle := NewTextStyle()
lineStyle.Font = model.NewStandard14FontMustCompile(model.HelveticaBold)
toc := c.TOC()
toc.SetLineStyle(lineStyle)
toc.SetLineMargins(0, 0, 3, 3)
// Add chapters.
ch1 := c.NewChapter("Introduction")
subchap1 := c.NewSubchapter(ch1, "The fundamentals of the mastery of the most genious experiment of all times in modern world history. The story of the maker and the maker bot and the genius cow.")
subchap1.SetMargins(0, 0, 5, 0)
@ -814,47 +828,33 @@ func TestSubchaptersSimple(t *testing.T) {
c.Draw(p)
})
// Set a function to create the table of contents.
// Should be able to wrap..
c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) {
ch := c.NewChapter("Table of contents")
ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5))
ch.GetHeading().SetFontSize(28)
ch.GetHeading().SetMargins(0, 0, 0, 30)
// The table of contents is created automatically if the
// AddTOC property of the creator is set to true.
// This function is used just to customize the style of the TOC.
c.CreateTableOfContents(func(toc *TOC) error {
// Set style of TOC heading just before render.
style := NewTextStyle()
style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5)
style.FontSize = 20
table := NewTable(2) // 2 column table.
// Default, equal column sizes (4x0.25)...
table.SetColumnWidths(0.9, 0.1)
toc.SetHeading("Table of Contents", style)
for _, entry := range toc.entries {
// Col 1. Chapter number, title.
var str string
if entry.Subchapter == 0 {
str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title)
} else {
str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title)
}
p := NewParagraph(str)
p.SetFontSize(14)
cell := table.NewCell()
cell.SetContent(p)
// Set the paragraph width to the cell width.
p.SetWidth(cell.Width(c.Context()))
table.SetRowHeight(table.CurRow(), p.Height()*1.2)
// Set style of TOC lines just before render.
lineStyle := NewTextStyle()
lineStyle.FontSize = 14
// Col 1. Page number.
p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber))
p.SetFontSize(14)
cell = table.NewCell()
cell.SetContent(p)
}
err := ch.Add(table)
if err != nil {
fmt.Printf("Error adding table: %v\n", err)
return nil, err
pageStyle := lineStyle
pageStyle.Font = model.NewStandard14FontMustCompile(model.HelveticaBold)
lines := toc.Lines()
for _, line := range lines {
line.SetStyle(lineStyle)
// Make page part bold.
line.Page.Style = pageStyle
}
return ch, nil
return nil
})
err := c.WriteToFile("/tmp/3_subchapters_simple.pdf")
@ -867,6 +867,19 @@ func TestSubchaptersSimple(t *testing.T) {
func TestSubchapters(t *testing.T) {
c := New()
// Enable table of contents and set the style of the lines.
c.AddTOC = true
lineStyle := NewTextStyle()
lineStyle.Font = model.NewStandard14FontMustCompile(model.Helvetica)
lineStyle.FontSize = 14
lineStyle.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5)
toc := c.TOC()
toc.SetLineStyle(lineStyle)
toc.SetLineMargins(0, 0, 3, 3)
// Add chapters.
ch1 := c.NewChapter("Introduction")
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
subchap1.SetMargins(0, 0, 5, 0)
@ -928,46 +941,28 @@ func TestSubchapters(t *testing.T) {
c.Draw(p)
})
// Set a function to create the table of contents.
c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) {
ch := c.NewChapter("Table of contents")
ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5))
ch.GetHeading().SetFontSize(28)
ch.GetHeading().SetMargins(0, 0, 0, 30)
// The table of contents is created automatically if the
// AddTOC property of the creator is set to true.
// This function is used just to customize the style of the TOC.
c.CreateTableOfContents(func(toc *TOC) error {
// Set style of TOC heading just before render.
style := NewTextStyle()
style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5)
style.FontSize = 20
table := NewTable(2)
// Default, equal column sizes (4x0.25)...
table.SetColumnWidths(0.9, 0.1)
toc.SetHeading("Table of Contents", style)
for _, entry := range toc.entries {
// Col 1. Chapter number, title.
var str string
if entry.Subchapter == 0 {
str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title)
} else {
str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title)
}
p := NewParagraph(str)
p.SetFontSize(14)
cell := table.NewCell()
cell.SetContent(p)
// Set the paragraph width to the cell width.
p.SetWidth(cell.Width(c.Context()))
table.SetRowHeight(table.CurRow(), p.Height()*1.2)
// Set style of TOC lines just before render.
pageStyle := NewTextStyle()
pageStyle.Font = model.NewStandard14FontMustCompile(model.HelveticaBold)
pageStyle.FontSize = 10
// Col 1. Page number.
p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber))
p.SetFontSize(14)
cell = table.NewCell()
cell.SetContent(p)
}
err := ch.Add(table)
if err != nil {
fmt.Printf("Error adding table: %v\n", err)
return nil, err
lines := toc.Lines()
for _, line := range lines {
line.Page.Style = pageStyle
}
return ch, nil
return nil
})
addHeadersAndFooters(c)
@ -2132,4 +2127,775 @@ func TestEncrypting1(t *testing.T) {
t.Errorf("Fail: %v\n", err)
return
}
// Try reading generated PDF and ensure encryption is OK.
// Try writing out to memory and opening with password.
var buf bytes.Buffer
err = c.Write(&buf)
if err != nil {
t.Fatalf("Error: %v", err)
}
r, err := model.NewPdfReader(bytes.NewReader(buf.Bytes()))
if err != nil {
t.Fatalf("Error: %v", err)
}
isEnc, err := r.IsEncrypted()
if err != nil {
t.Fatalf("Error: %v", err)
}
if !isEnc {
t.Fatalf("Error: Should be encrypted")
}
ok, err := r.Decrypt([]byte("password"))
if err != nil {
t.Fatalf("Error: %v", err)
}
if !ok {
t.Fatalf("Failed to decrypt")
}
numpages, err := r.GetNumPages()
if err != nil {
t.Fatalf("Error: %v", err)
}
if numpages <= 0 {
t.Fatalf("Pages should be 1+")
}
}
// TestOptimizeCombineDuplicateStreams tests optimizing PDFs to reduce output file size.
func TestOptimizeCombineDuplicateStreams(t *testing.T) {
c := createPdf4Optimization(t)
err := c.WriteToFile("/tmp/7_combine_duplicate_streams_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c = createPdf4Optimization(t)
c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateStreams: true}))
err = c.WriteToFile("/tmp/7_combine_duplicate_streams_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/7_combine_duplicate_streams_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/7_combine_duplicate_streams_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
// TestOptimizeImageQuality tests optimizing PDFs to reduce output file size.
func TestOptimizeImageQuality(t *testing.T) {
c := New()
imgDataJpeg, err := ioutil.ReadFile(testImageFile1)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
imgJpeg, err := NewImageFromData(imgDataJpeg)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
// JPEG encoder (DCT) with quality factor 70.
encoder := core.NewDCTEncoder()
encoder.Quality = 100
encoder.Width = int(imgJpeg.Width())
encoder.Height = int(imgJpeg.Height())
imgJpeg.SetEncoder(encoder)
imgJpeg.SetPos(250, 350)
imgJpeg.Scale(0.25, 0.25)
err = c.Draw(imgJpeg)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
err = c.WriteToFile("/tmp/8_image_quality_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c.SetOptimizer(optimize.New(optimize.Options{ImageQuality: 20}))
err = c.WriteToFile("/tmp/8_image_quality_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/8_image_quality_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/8_image_quality_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
func createPdf4Optimization(t *testing.T) *Creator {
c := New()
p := NewParagraph("Test text1")
// Change to times bold font (default is helvetica).
font, err := model.NewStandard14Font(model.CourierBold)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
p.SetFont(font)
p.SetPos(15, 15)
_ = c.Draw(p)
imgData, err := ioutil.ReadFile(testImageFile1)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
img, err := NewImageFromData(imgData)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
img.SetPos(0, 100)
img.ScaleToWidth(1.0 * c.Width())
err = c.Draw(img)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
img1, err := NewImageFromData(imgData)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
img1.SetPos(0, 200)
img1.ScaleToWidth(1.0 * c.Width())
err = c.Draw(img1)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
imgData2, err := ioutil.ReadFile(testImageFile1)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
img2, err := NewImageFromData(imgData2)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
img2.SetPos(0, 500)
img2.ScaleToWidth(1.0 * c.Width())
c.NewPage()
p = NewParagraph("Test text2")
// Change to times bold font (default is helvetica).
font, err = model.NewStandard14Font(model.Helvetica)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
p.SetFont(font)
p.SetPos(15, 15)
_ = c.Draw(p)
err = c.Draw(img2)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
return nil
}
return c
}
// TestOptimizeUseObjectStreams tests optimizing PDFs to reduce output file size.
func TestOptimizeUseObjectStreams(t *testing.T) {
c := createPdf4Optimization(t)
err := c.WriteToFile("/tmp/9_use_object_streams_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c = createPdf4Optimization(t)
c.SetOptimizer(optimize.New(optimize.Options{UseObjectStreams: true}))
err = c.WriteToFile("/tmp/9_use_object_streams_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/9_use_object_streams_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/9_use_object_streams_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
// TestCombineDuplicateDirectObjects tests optimizing PDFs to reduce output file size.
func TestCombineDuplicateDirectObjects(t *testing.T) {
createDoc := func() *Creator {
c := New()
c.AddTOC = true
ch1 := c.NewChapter("Introduction")
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
subchap1.SetMargins(0, 0, 5, 0)
//subCh1 := NewSubchapter(ch1, "Workflow")
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " +
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " +
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
"mollit anim id est laborum.")
p.SetTextAlignment(TextAlignmentJustify)
p.SetMargins(0, 0, 5, 0)
for j := 0; j < 2; j++ {
subchap1.Add(p)
}
subchap2 := c.NewSubchapter(ch1, "Mechanism")
subchap2.SetMargins(0, 0, 5, 0)
for j := 0; j < 3; j++ {
subchap2.Add(p)
}
subchap3 := c.NewSubchapter(ch1, "Discussion")
subchap3.SetMargins(0, 0, 5, 0)
for j := 0; j < 4; j++ {
subchap3.Add(p)
}
subchap4 := c.NewSubchapter(ch1, "Conclusion")
subchap4.SetMargins(0, 0, 5, 0)
for j := 0; j < 3; j++ {
subchap4.Add(p)
}
c.Draw(ch1)
for i := 0; i < 5; i++ {
ch2 := c.NewChapter("References")
ch2.SetMargins(1, 1, 1, 1)
for j := 0; j < 13; j++ {
ch2.Add(p)
}
metadata := core.MakeDict()
metadata.Set(core.PdfObjectName("TEST"), core.MakeString("---------------- ## ----------------"))
c.Draw(ch2)
c.getActivePage().Metadata = metadata
}
// Set a function to create the front Page.
c.CreateFrontPage(func(args FrontpageFunctionArgs) {
p := NewParagraph("Example Report")
p.SetWidth(c.Width())
p.SetTextAlignment(TextAlignmentCenter)
p.SetFontSize(32)
p.SetPos(0, 300)
c.Draw(p)
p.SetFontSize(22)
p.SetText("Example Report Data Results")
p.SetPos(0, 340)
c.Draw(p)
})
// The table of contents is created automatically if the
// AddTOC property of the creator is set to true.
// This function is used just to customize the style of the TOC.
c.CreateTableOfContents(func(toc *TOC) error {
style := NewTextStyle()
style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5)
style.FontSize = 20
toc.SetHeading("Table of Contents", style)
return nil
})
addHeadersAndFooters(c)
return c
}
c := createDoc()
err := c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c = createDoc()
c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateDirectObjects: true}))
err = c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
// TestOptimizeImagePPI tests optimizing PDFs to reduce output file size.
func TestOptimizeImagePPI(t *testing.T) {
c := New()
imgDataJpeg, err := ioutil.ReadFile(testImageFile1)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
imgJpeg, err := NewImageFromData(imgDataJpeg)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
// JPEG encoder (DCT) with quality factor 100.
encoder := core.NewDCTEncoder()
encoder.Quality = 100
encoder.Width = int(imgJpeg.Width())
encoder.Height = int(imgJpeg.Height())
imgJpeg.SetEncoder(encoder)
imgJpeg.SetPos(250, 350)
imgJpeg.Scale(0.25, 0.25)
err = c.Draw(imgJpeg)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c.NewPage()
imgData, err := ioutil.ReadFile(testImageFile1)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
}
img, err := NewImageFromData(imgData)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
}
img.SetPos(0, 100)
img.ScaleToWidth(0.1 * c.Width())
err = c.Draw(img)
if err != nil {
t.Errorf("Fail: %v\n", err)
t.FailNow()
}
err = c.Draw(imgJpeg)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
err = c.WriteToFile("/tmp/11_image_ppi_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c.SetOptimizer(optimize.New(optimize.Options{ImageUpperPPI: 144}))
err = c.WriteToFile("/tmp/11_image_ppi_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/11_image_ppi_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/11_image_ppi_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
// TestCombineIdenticalIndirectObjects tests optimizing PDFs to reduce output file size.
func TestCombineIdenticalIndirectObjects(t *testing.T) {
c := New()
c.AddTOC = true
ch1 := c.NewChapter("Introduction")
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
subchap1.SetMargins(0, 0, 5, 0)
//subCh1 := NewSubchapter(ch1, "Workflow")
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " +
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " +
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
"mollit anim id est laborum.")
p.SetTextAlignment(TextAlignmentJustify)
p.SetMargins(0, 0, 5, 0)
for j := 0; j < 5; j++ {
subchap1.Add(p)
}
subchap2 := c.NewSubchapter(ch1, "Mechanism")
subchap2.SetMargins(0, 0, 5, 0)
for j := 0; j < 15; j++ {
subchap2.Add(p)
}
subchap3 := c.NewSubchapter(ch1, "Discussion")
subchap3.SetMargins(0, 0, 5, 0)
for j := 0; j < 19; j++ {
subchap3.Add(p)
}
subchap4 := c.NewSubchapter(ch1, "Conclusion")
subchap4.SetMargins(0, 0, 5, 0)
for j := 0; j < 23; j++ {
subchap4.Add(p)
}
c.Draw(ch1)
for i := 0; i < 50; i++ {
ch2 := c.NewChapter("References")
for j := 0; j < 13; j++ {
ch2.Add(p)
}
c.Draw(ch2)
}
// Set a function to create the front Page.
c.CreateFrontPage(func(args FrontpageFunctionArgs) {
p := NewParagraph("Example Report")
p.SetWidth(c.Width())
p.SetTextAlignment(TextAlignmentCenter)
p.SetFontSize(32)
p.SetPos(0, 300)
c.Draw(p)
p.SetFontSize(22)
p.SetText("Example Report Data Results")
p.SetPos(0, 340)
c.Draw(p)
})
// The table of contents is created automatically if the
// AddTOC property of the creator is set to true.
// This function is used just to customize the style of the TOC.
c.CreateTableOfContents(func(toc *TOC) error {
style := NewTextStyle()
style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5)
style.FontSize = 20
toc.SetHeading("Table of Contents", style)
return nil
})
addHeadersAndFooters(c)
err := c.WriteToFile("/tmp/12_identical_indirect_objects_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c.SetOptimizer(optimize.New(optimize.Options{CombineIdenticalIndirectObjects: true}))
err = c.WriteToFile("/tmp/12_identical_indirect_objects_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/12_identical_indirect_objects_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/12_identical_indirect_objects_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
// TestCompressStreams tests optimizing PDFs to reduce output file size.
func TestCompressStreams(t *testing.T) {
createDoc := func() *Creator {
c := New()
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt" +
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore" +
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
"mollit anim id est laborum.")
p.SetMargins(0, 0, 5, 0)
c.Draw(p)
//c.NewPage()
page := c.pages[0]
// Need to add Arial to the page resources to avoid generating invalid PDF (avoid build fail).
times := model.NewStandard14FontMustCompile(model.TimesRoman)
page.Resources.SetFontByName("Times", times.ToPdfObject())
page.AddContentStreamByString(`BT
/Times 56 Tf
20 600 Td
(The multiline example text)Tj
/Times 30 Tf
0 30 Td
60 TL
(example text)'
(example text)'
(example text)'
(example text)'
(example text)'
(example text)'
(example text)'
(example text)'
ET`)
return c
}
c := createDoc()
err := c.WriteToFile("/tmp/13_compress_streams_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c = createDoc()
c.SetOptimizer(optimize.New(optimize.Options{CompressStreams: true}))
err = c.WriteToFile("/tmp/13_compress_streams_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/13_compress_streams_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/13_compress_streams_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}
// TestAllOptimizations tests optimizing PDFs to reduce output file size.
func TestAllOptimizations(t *testing.T) {
createDoc := func() *Creator {
c := New()
c.AddTOC = true
ch1 := c.NewChapter("Introduction")
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
subchap1.SetMargins(0, 0, 5, 0)
//subCh1 := NewSubchapter(ch1, "Workflow")
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " +
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " +
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
"mollit anim id est laborum.")
p.SetTextAlignment(TextAlignmentJustify)
p.SetMargins(0, 0, 5, 0)
for j := 0; j < 7; j++ {
subchap1.Add(p)
}
subchap2 := c.NewSubchapter(ch1, "Mechanism")
subchap2.SetMargins(0, 0, 5, 0)
for j := 0; j < 15; j++ {
subchap2.Add(p)
}
subchap3 := c.NewSubchapter(ch1, "Discussion")
subchap3.SetMargins(0, 0, 5, 0)
for j := 0; j < 19; j++ {
subchap3.Add(p)
}
subchap4 := c.NewSubchapter(ch1, "Conclusion")
subchap4.SetMargins(0, 0, 5, 0)
for j := 0; j < 23; j++ {
subchap4.Add(p)
}
c.Draw(ch1)
for i := 0; i < 50; i++ {
ch2 := c.NewChapter("References")
for j := 0; j < 13; j++ {
ch2.Add(p)
}
c.Draw(ch2)
}
// Set a function to create the front Page.
c.CreateFrontPage(func(args FrontpageFunctionArgs) {
p := NewParagraph("Example Report")
p.SetWidth(c.Width())
p.SetTextAlignment(TextAlignmentCenter)
p.SetFontSize(32)
p.SetPos(0, 300)
c.Draw(p)
p.SetFontSize(22)
p.SetText("Example Report Data Results")
p.SetPos(0, 340)
c.Draw(p)
})
// The table of contents is created automatically if the
// AddTOC property of the creator is set to true.
// This function is used just to customize the style of the TOC.
c.CreateTableOfContents(func(toc *TOC) error {
style := NewTextStyle()
style.Color = ColorRGBFromArithmetic(0.5, 0.5, 0.5)
style.FontSize = 20
toc.SetHeading("Table of Contents", style)
return nil
})
addHeadersAndFooters(c)
return c
}
c := createDoc()
err := c.WriteToFile("/tmp/14_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
c = createDoc()
c.SetOptimizer(optimize.New(optimize.Options{
CombineDuplicateDirectObjects: true,
CombineIdenticalIndirectObjects: true,
ImageUpperPPI: 50.0,
UseObjectStreams: true,
ImageQuality: 50,
CombineDuplicateStreams: true,
CompressStreams: true,
}))
err = c.WriteToFile("/tmp/14_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfo, err := os.Stat("/tmp/14_not_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
fileInfoOptimized, err := os.Stat("/tmp/14_optimized.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
if fileInfoOptimized.Size() >= fileInfo.Size() {
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
}
}

View File

@ -139,13 +139,14 @@ func TestDivInline(t *testing.T) {
style.Color = ColorRGBFrom8bit(0, 0, 255)
s := NewStyledParagraph("This styled paragraph should ", style)
s.SetEnableWrap(false)
style.Color = ColorRGBFrom8bit(255, 0, 0)
s.Append("fit", style)
style.Color = ColorRGBFrom8bit(0, 255, 0)
style.Font = fontBold
s.Append(" right in.", style)
s.Append(" in.", style)
div.Add(s)

View File

@ -85,7 +85,7 @@ func NewParagraph(text string) *Paragraph {
// TODO: Can we wrap intellectually, only if given width is known?
p.enableWrap = false
p.enableWrap = true
p.defaultWrap = true
p.SetColor(ColorRGBFrom8bit(0, 0, 0))
p.alignment = TextAlignmentLeft
@ -187,13 +187,12 @@ func (p *Paragraph) GetMargins() (float64, float64, float64, float64) {
// text can extend to prior to wrapping over to next line.
func (p *Paragraph) SetWidth(width float64) {
p.wrapWidth = width
p.enableWrap = true
p.wrapText()
}
// Width returns the width of the Paragraph.
func (p *Paragraph) Width() float64 {
if p.enableWrap {
if p.enableWrap && int(p.wrapWidth) > 0 {
return p.wrapWidth
}
return p.getTextWidth() / 1000.0
@ -236,10 +235,54 @@ func (p *Paragraph) getTextWidth() float64 {
return w
}
// getTextLineWidth calculates the text width of a provided line of text.
func (p *Paragraph) getTextLineWidth(line string) float64 {
var width float64
for _, r := range line {
glyph, found := p.textFont.Encoder().RuneToGlyph(r)
if !found {
common.Log.Debug("ERROR: Glyph not found for rune: 0x%04x=%c", r, r)
return -1 // XXX/FIXME: return error.
}
// Ignore newline for this.. Handles as if all in one line.
if glyph == "controlLF" {
continue
}
metrics, found := p.textFont.GetGlyphCharMetrics(glyph)
if !found {
common.Log.Debug("ERROR: Glyph char metrics not found! %q (rune 0x%04x=%c)", glyph, r, r)
return -1 // XXX/FIXME: return error.
}
width += p.fontSize * metrics.Wx
}
return width
}
// getMaxLineWidth returns the width of the longest line of text in the paragraph.
func (p *Paragraph) getMaxLineWidth() float64 {
if p.textLines == nil || len(p.textLines) == 0 {
p.wrapText()
}
var width float64
for _, line := range p.textLines {
w := p.getTextLineWidth(line)
if w > width {
width = w
}
}
return width
}
// Simple algorithm to wrap the text into lines (greedy algorithm - fill the lines).
// XXX/TODO: Consider the Knuth/Plass algorithm or an alternative.
func (p *Paragraph) wrapText() error {
if !p.enableWrap {
if !p.enableWrap || int(p.wrapWidth) <= 0 {
p.textLines = []string{p.text}
return nil
}
@ -367,7 +410,7 @@ func (p *Paragraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext,
}
} else {
// Absolute.
if p.wrapWidth == 0 {
if int(p.wrapWidth) <= 0 {
// Use necessary space.
p.SetWidth(p.getTextWidth())
}

View File

@ -20,10 +20,10 @@ import (
// StyledParagraph represents text drawn with a specified font and can wrap across lines and pages.
// By default occupies the available width in the drawing context.
type StyledParagraph struct {
// Text chunks with styles that compose the paragraph
// Text chunks with styles that compose the paragraph.
chunks []TextChunk
// Style used for the paragraph for spacing and offsets
// Style used for the paragraph for spacing and offsets.
defaultStyle TextStyle
// The text encoder which can convert the text (as runes) into a series of glyphs and get character metrics.
@ -63,6 +63,9 @@ type StyledParagraph struct {
// Text chunk lines after wrapping to available width.
lines [][]TextChunk
// Before render callback.
beforeRender func(p *StyledParagraph, ctx DrawContext)
}
// NewStyledParagraph creates a new styled paragraph.
@ -104,6 +107,23 @@ func (p *StyledParagraph) Append(text string, style TextStyle) {
p.wrapText()
}
// Insert adds a new text chunk at the specified position in the paragraph.
func (p *StyledParagraph) Insert(index uint, text string, style TextStyle) {
l := uint(len(p.chunks))
if index > l {
index = l
}
chunk := TextChunk{
Text: text,
Style: style,
}
chunk.Style.Font.SetEncoder(p.encoder)
p.chunks = append(p.chunks[:index], append([]TextChunk{chunk}, p.chunks[index:]...)...)
p.wrapText()
}
// Reset sets the entire text and also the style of the paragraph
// to those specified. It behaves as if the paragraph was a new one.
func (p *StyledParagraph) Reset(text string, style TextStyle) {
@ -173,7 +193,7 @@ func (p *StyledParagraph) SetWidth(width float64) {
// Width returns the width of the Paragraph.
func (p *StyledParagraph) Width() float64 {
if p.enableWrap {
if p.enableWrap && int(p.wrapWidth) > 0 {
return p.wrapWidth
}
@ -238,6 +258,58 @@ func (p *StyledParagraph) getTextWidth() float64 {
return width
}
// getTextLineWidth calculates the text width of a provided collection of text chunks.
func (p *StyledParagraph) getTextLineWidth(line []TextChunk) float64 {
var width float64
for _, chunk := range line {
style := &chunk.Style
for _, r := range chunk.Text {
glyph, found := p.encoder.RuneToGlyph(r)
if !found {
common.Log.Debug("Error! Glyph not found for rune: %s\n", r)
// XXX/FIXME: return error.
return -1
}
// Ignore newline for this.. Handles as if all in one line.
if glyph == "controlLF" {
continue
}
metrics, found := style.Font.GetGlyphCharMetrics(glyph)
if !found {
common.Log.Debug("Glyph char metrics not found! %s\n", glyph)
// XXX/FIXME: return error.
return -1
}
width += style.FontSize * metrics.Wx
}
}
return width
}
// getMaxLineWidth returns the width of the longest line of text in the paragraph.
func (p *StyledParagraph) getMaxLineWidth() float64 {
if p.lines == nil || len(p.lines) == 0 {
p.wrapText()
}
var width float64
for _, line := range p.lines {
w := p.getTextLineWidth(line)
if w > width {
width = w
}
}
return width
}
// getTextHeight calculates the text height as if all in one line (not taking wrapping into account).
func (p *StyledParagraph) getTextHeight() float64 {
var height float64
@ -255,7 +327,7 @@ func (p *StyledParagraph) getTextHeight() float64 {
// fill the lines.
// XXX/TODO: Consider the Knuth/Plass algorithm or an alternative.
func (p *StyledParagraph) wrapText() error {
if !p.enableWrap {
if !p.enableWrap || int(p.wrapWidth) <= 0 {
p.lines = [][]TextChunk{p.chunks}
return nil
}
@ -281,7 +353,7 @@ func (p *StyledParagraph) wrapText() error {
}
// newline wrapping.
if glyph == "controllf" {
if glyph == "controlLF" {
// moves to next line.
line = append(line, TextChunk{
Text: strings.TrimRightFunc(string(part), unicode.IsSpace),
@ -407,7 +479,7 @@ func (p *StyledParagraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCon
}
} else {
// Absolute.
if p.wrapWidth == 0 {
if int(p.wrapWidth) <= 0 {
// Use necessary space.
p.SetWidth(p.getTextWidth())
}
@ -415,6 +487,10 @@ func (p *StyledParagraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCon
ctx.Y = p.yPos
}
if p.beforeRender != nil {
p.beforeRender(p, ctx)
}
// Place the Paragraph on the template at position (x,y) based on the ctx.
ctx, err := drawStyledParagraphOnBlock(blk, p, ctx)
if err != nil {
@ -434,7 +510,7 @@ func (p *StyledParagraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCon
// Draw block on specified location on Page, adding to the content stream.
func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) (DrawContext, error) {
// Find first free index for the font resources of the paragraph
// Find first free index for the font resources of the paragraph.
num := 1
fontName := core.PdfObjectName(fmt.Sprintf("Font%d", num))
for blk.resources.HasFontByName(fontName) {
@ -442,7 +518,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext)
fontName = core.PdfObjectName(fmt.Sprintf("Font%d", num))
}
// Add default font to the page resources
// Add default font to the page resources.
err := blk.resources.SetFontByName(fontName, p.defaultStyle.Font.ToPdfObject())
if err != nil {
return ctx, err
@ -455,7 +531,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext)
// Wrap the text into lines.
p.wrapText()
// Add the fonts of all chunks to the page resources
// Add the fonts of all chunks to the page resources.
fonts := [][]core.PdfObjectName{}
for _, line := range p.lines {
@ -539,19 +615,22 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext)
spaces += chunkSpaces
}
// Add line shifts
// Add line shifts.
objs := []core.PdfObject{}
wrapWidth := p.wrapWidth * 1000.0
if p.alignment == TextAlignmentJustify {
// Not to justify last line.
// Do not justify last line.
if spaces > 0 && !isLastLine {
spaceWidth = (p.wrapWidth*1000.0 - width) / float64(spaces) / defaultFontSize
spaceWidth = (wrapWidth - width) / float64(spaces) / defaultFontSize
}
} else if p.alignment == TextAlignmentCenter {
// Start with a shift.
shift := (p.wrapWidth*1000.0 - width - spaceWidth) / 2 / defaultFontSize
// Start with an offset of half of the remaining line space.
shift := (wrapWidth - width - spaceWidth) / 2 / defaultFontSize
objs = append(objs, core.MakeFloat(-shift))
} else if p.alignment == TextAlignmentRight {
shift := (p.wrapWidth*1000.0 - width - spaceWidth) / defaultFontSize
// Push the text at the end of the line.
shift := (wrapWidth - width - spaceWidth) / defaultFontSize
objs = append(objs, core.MakeFloat(-shift))
}
@ -561,7 +640,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext)
Add_TJ(objs...)
}
// Render line text chunks
// Render line text chunks.
for k, chunk := range line {
style := &chunk.Style

View File

@ -128,6 +128,111 @@ func TestParagraphRegularVsStyled(t *testing.T) {
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetContent(s)
// Test table cell alignment.
style = NewTextStyle()
// Test left alignment with paragraph wrapping enabled.
p = NewParagraph("Wrap enabled. This text should be left aligned.")
p.SetEnableWrap(true)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft)
cell.SetContent(p)
s = NewStyledParagraph("Wrap enabled. This text should be left aligned.", style)
s.SetEnableWrap(true)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft)
cell.SetContent(s)
// Test left alignment with paragraph wrapping disabled.
p = NewParagraph("Wrap disabled. This text should be left aligned.")
p.SetEnableWrap(false)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft)
cell.SetContent(p)
s = NewStyledParagraph("Wrap disabled. This text should be left aligned.", style)
s.SetEnableWrap(false)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentLeft)
cell.SetContent(s)
// Test center alignment with paragraph wrapping enabled.
p = NewParagraph("Wrap enabled. This text should be center aligned.")
p.SetEnableWrap(true)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter)
cell.SetContent(p)
s = NewStyledParagraph("Wrap enabled. This text should be center aligned.", style)
s.SetEnableWrap(true)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter)
cell.SetContent(s)
// Test center alignment with paragraph wrapping disabled.
p = NewParagraph("Wrap disabled. This text should be center aligned.")
p.SetEnableWrap(false)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter)
cell.SetContent(p)
s = NewStyledParagraph("Wrap disabled. This text should be center aligned.", style)
s.SetEnableWrap(false)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentCenter)
cell.SetContent(s)
// Test right alignment with paragraph wrapping enabled.
p = NewParagraph("Wrap enabled. This text should be right aligned.")
p.SetEnableWrap(true)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentRight)
cell.SetContent(p)
s = NewStyledParagraph("Wrap enabled. This text should be right aligned.", style)
s.SetEnableWrap(true)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentRight)
cell.SetContent(s)
// Test right alignment with paragraph wrapping disabled.
p = NewParagraph("Wrap disabled. This text should be right aligned.")
p.SetEnableWrap(false)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentRight)
cell.SetContent(p)
s = NewStyledParagraph("Wrap disabled. This text should be right aligned.", style)
s.SetEnableWrap(false)
cell = table.NewCell()
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
cell.SetHorizontalAlignment(CellHorizontalAlignmentRight)
cell.SetContent(s)
// Draw table.
err = c.Draw(table)
if err != nil {

View File

@ -7,6 +7,7 @@ package creator
import (
"fmt"
"strconv"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/model"
@ -38,7 +39,7 @@ type Subchapter struct {
margins margins
// Reference to the creator's TOC.
toc *TableOfContents
toc *TOC
}
// NewSubchapter creates a new Subchapter under Chapter ch with specified title.
@ -154,7 +155,19 @@ func (subchap *Subchapter) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawCo
}
if subchap.includeInTOC {
// Add to TOC.
subchap.toc.add(subchap.title, subchap.chapterNum, subchap.subchapterNum, ctx.Page)
subchapNumber := ""
if subchap.chapterNum != 0 {
subchapNumber = strconv.Itoa(subchap.chapterNum)
}
if subchap.subchapterNum != 0 {
if subchapNumber != "" {
subchapNumber += "."
}
subchapNumber += strconv.Itoa(subchap.subchapterNum) + "."
}
subchap.toc.Add(subchapNumber, subchap.title, strconv.Itoa(ctx.Page), 2)
}
for _, d := range subchap.contents {

View File

@ -362,8 +362,20 @@ func (table *Table) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext,
}
if cell.content != nil {
// content width.
cw := cell.content.Width()
switch t := cell.content.(type) {
case *Paragraph:
if t.enableWrap {
cw = t.getMaxLineWidth() / 1000.0
}
case *StyledParagraph:
if t.enableWrap {
cw = t.getMaxLineWidth() / 1000.0
}
}
// Account for horizontal alignment:
cw := cell.content.Width() // content width.
switch cell.horizontalAlignment {
case CellHorizontalAlignmentLeft:
// Account for indent.
@ -691,15 +703,15 @@ func (cell *TableCell) SetContent(vd VectorDrawable) error {
switch t := vd.(type) {
case *Paragraph:
if t.defaultWrap {
// Default paragraph settings in table: no wrapping.
t.enableWrap = false // No wrapping.
// Enable wrapping by default.
t.enableWrap = true
}
cell.content = vd
case *StyledParagraph:
if t.defaultWrap {
// Default styled paragraph settings in table: no wrapping.
t.enableWrap = false // No wrapping.
// Enable wrapping by default.
t.enableWrap = true
}
cell.content = vd

15
pdf/creator/text_chunk.go Normal file
View File

@ -0,0 +1,15 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
// TextChunk represents a chunk of text along with a particular style.
type TextChunk struct {
// The text that is being rendered in the PDF.
Text string
// The style of the text being rendered.
Style TextStyle
}

View File

@ -35,9 +35,3 @@ func NewTextStyle() TextStyle {
FontSize: 10,
}
}
// TextChunk represents a chunk of text along with a particular style.
type TextChunk struct {
Text string
Style TextStyle
}

View File

@ -5,39 +5,227 @@
package creator
// TableOfContents provides an overview over chapters and subchapters when creating a document with Creator.
type TableOfContents struct {
entries []TableOfContentsEntry
import "github.com/unidoc/unidoc/pdf/model/fonts"
// TOC represents a table of contents component.
// It consists of a paragraph heading and a collection of
// table of contents lines.
// The representation of a table of contents line is as follows:
// [number] [title] [separator] [page]
// e.g.: Chapter1 Introduction ........... 1
type TOC struct {
// The heading of the table of contents.
heading *StyledParagraph
// The lines of the table of contents.
lines []*TOCLine
// The style of the number part of new TOC lines.
lineNumberStyle TextStyle
// The style of the title part of new TOC lines.
lineTitleStyle TextStyle
// The style of the separator part of new TOC lines.
lineSeparatorStyle TextStyle
// The style of the page part of new TOC lines.
linePageStyle TextStyle
// The separator for new TOC lines.
lineSeparator string
// The amount of space an indentation level occupies in a TOC line.
lineLevelOffset float64
// The margins of new TOC lines.
lineMargins margins
// Positioning: relative/absolute.
positioning positioning
}
// Make a new table of contents.
func newTableOfContents() *TableOfContents {
toc := TableOfContents{}
toc.entries = []TableOfContentsEntry{}
return &toc
// NewTOC creates a new table of contents.
func NewTOC(title string) *TOC {
headingStyle := NewTextStyle()
headingStyle.Font = fonts.NewFontHelveticaBold()
headingStyle.FontSize = 14
heading := NewStyledParagraph(title, headingStyle)
heading.SetEnableWrap(true)
heading.SetTextAlignment(TextAlignmentLeft)
heading.SetMargins(0, 0, 0, 5)
lineStyle := NewTextStyle()
return &TOC{
heading: heading,
lines: []*TOCLine{},
lineNumberStyle: lineStyle,
lineTitleStyle: lineStyle,
lineSeparatorStyle: lineStyle,
linePageStyle: lineStyle,
lineSeparator: ".",
lineLevelOffset: 10,
lineMargins: margins{0, 0, 2, 2},
positioning: positionRelative,
}
}
// Entries returns the table of content entries.
func (toc *TableOfContents) Entries() []TableOfContentsEntry {
return toc.entries
// Heading returns the heading component of the table of contents.
func (t *TOC) Heading() *StyledParagraph {
return t.heading
}
// Add a TOC entry.
func (toc *TableOfContents) add(title string, chapter, subchapter, pageNum int) {
entry := TableOfContentsEntry{}
entry.Title = title
entry.Chapter = chapter
entry.Subchapter = subchapter
entry.PageNumber = pageNum
toc.entries = append(toc.entries, entry)
// Lines returns all the lines the table of contents has.
func (t *TOC) Lines() []*TOCLine {
return t.lines
}
// TableOfContentsEntry defines a single entry in the TableOfContents.
// Each entry has a title, chapter number, sub chapter (0 if chapter) and the page number.
type TableOfContentsEntry struct {
Title string
Chapter int
Subchapter int // 0 if chapter
PageNumber int // Page number
// SetHeading sets the text and the style of the heading of the TOC component.
func (t *TOC) SetHeading(text string, style TextStyle) {
t.heading.Reset(text, style)
}
// Add adds a new line with the default style to the table of contents.
func (t *TOC) Add(number, title, page string, level uint) *TOCLine {
tl := t.AddLine(NewStyledTOCLine(
TextChunk{
Text: number,
Style: t.lineNumberStyle,
},
TextChunk{
Text: title,
Style: t.lineTitleStyle,
},
TextChunk{
Text: page,
Style: t.linePageStyle,
},
level,
))
if tl == nil {
return nil
}
// Set line margins.
m := &t.lineMargins
tl.SetMargins(m.left, m.right, m.top, m.bottom)
// Set line level offset.
tl.SetLevelOffset(t.lineLevelOffset)
// Set line separator text and style.
tl.Separator.Text = t.lineSeparator
tl.Separator.Style = t.lineSeparatorStyle
return tl
}
// AddLine adds a new line with the provided style to the table of contents.
func (t *TOC) AddLine(line *TOCLine) *TOCLine {
if line == nil {
return nil
}
t.lines = append(t.lines, line)
return line
}
// SetLineSeparator sets the separator for all new lines of the table of contents.
func (t *TOC) SetLineSeparator(separator string) {
t.lineSeparator = separator
}
// SetLineMargins sets the margins for all new lines of the table of contents.
func (t *TOC) SetLineMargins(left, right, top, bottom float64) {
m := &t.lineMargins
m.left = left
m.right = right
m.top = top
m.bottom = bottom
}
// SetLineStyle sets the style for all the line components: number, title,
// separator, page. The style is applied only for new lines added to the
// TOC component.
func (t *TOC) SetLineStyle(style TextStyle) {
t.SetLineNumberStyle(style)
t.SetLineTitleStyle(style)
t.SetLineSeparatorStyle(style)
t.SetLinePageStyle(style)
}
// SetLineNumberStyle sets the style for the numbers part of all new lines
// of the table of contents.
func (t *TOC) SetLineNumberStyle(style TextStyle) {
t.lineNumberStyle = style
}
// SetLineTitleStyle sets the style for the title part of all new lines
// of the table of contents.
func (t *TOC) SetLineTitleStyle(style TextStyle) {
t.lineTitleStyle = style
}
// SetLineSeparatorStyle sets the style for the separator part of all new
// lines of the table of contents.
func (t *TOC) SetLineSeparatorStyle(style TextStyle) {
t.lineSeparatorStyle = style
}
// SetLinePageStyle sets the style for the page part of all new lines
// of the table of contents.
func (t *TOC) SetLinePageStyle(style TextStyle) {
t.linePageStyle = style
}
// SetLineLevelOffset sets the amount of space an indentation level occupies
// for all new lines of the table of contents.
func (t *TOC) SetLineLevelOffset(levelOffset float64) {
t.lineLevelOffset = levelOffset
}
// GeneratePageBlocks generate the Page blocks. Multiple blocks are generated
// if the contents wrap over multiple pages.
func (t *TOC) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
origCtx := ctx
// Generate heading blocks.
blocks, ctx, err := t.heading.GeneratePageBlocks(ctx)
if err != nil {
return blocks, ctx, err
}
// Generate blocks for the table of contents lines.
for _, line := range t.lines {
newBlocks, c, err := line.GeneratePageBlocks(ctx)
if err != nil {
return blocks, ctx, err
}
if len(newBlocks) < 1 {
continue
}
// The first block is always appended to the last.
blocks[len(blocks)-1].mergeBlocks(newBlocks[0])
blocks = append(blocks, newBlocks[1:]...)
ctx = c
}
if t.positioning.isRelative() {
// Move back X to same start of line.
ctx.X = origCtx.X
}
if t.positioning.isAbsolute() {
// If absolute: return original context.
return blocks, origCtx, nil
}
return blocks, ctx, nil
}

225
pdf/creator/toc_line.go Normal file
View File

@ -0,0 +1,225 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
import (
"strings"
)
// TOCLine represents a line in a table of contents.
// The component can be used both in the context of a
// table of contents component and as a standalone component.
// The representation of a table of contents line is as follows:
// [number] [title] [separator] [page]
// e.g.: Chapter1 Introduction ........... 1
type TOCLine struct {
// The underlyng styled paragraph used to render the TOC line.
sp *StyledParagraph
// Holds the text and style of the number part of the TOC line.
Number TextChunk
// Holds the text and style of the title part of the TOC line.
Title TextChunk
// Holds the text and style of the separator part of the TOC line.
Separator TextChunk
// Holds the text and style of the page part of the TOC line.
Page TextChunk
// The left margin of the TOC line.
offset float64
// The indentation level of the TOC line.
level uint
// The amount of space an indentation level occupies.
levelOffset float64
// Positioning: relative/absolute.
positioning positioning
}
// NewTOCLine creates a new table of contents line with the default style.
func NewTOCLine(number, title, page string, level uint) *TOCLine {
style := NewTextStyle()
return NewStyledTOCLine(
TextChunk{
Text: number,
Style: style,
},
TextChunk{
Text: title,
Style: style,
},
TextChunk{
Text: page,
Style: style,
},
level,
)
}
// NewStyledTOCLine creates a new table of contents line with the provided style.
func NewStyledTOCLine(number, title, page TextChunk, level uint) *TOCLine {
style := NewTextStyle()
sp := NewStyledParagraph("", style)
sp.SetEnableWrap(true)
sp.SetTextAlignment(TextAlignmentLeft)
sp.SetMargins(0, 0, 2, 2)
tl := &TOCLine{
sp: sp,
Number: number,
Title: title,
Page: page,
Separator: TextChunk{
Text: ".",
Style: style,
},
offset: 0,
level: level,
levelOffset: 10,
positioning: positionRelative,
}
sp.margins.left = tl.offset + float64(tl.level-1)*tl.levelOffset
sp.beforeRender = tl.prepareParagraph
return tl
}
// SetStyle sets the style for all the line components: number, title,
// separator, page.
func (tl *TOCLine) SetStyle(style TextStyle) {
tl.Number.Style = style
tl.Title.Style = style
tl.Separator.Style = style
tl.Page.Style = style
}
// Level returns the indentation level of the TOC line.
func (tl *TOCLine) Level() uint {
return tl.level
}
// SetLevel sets the indentation level of the TOC line.
func (tl *TOCLine) SetLevel(level uint) {
tl.level = level
tl.sp.margins.left = tl.offset + float64(tl.level-1)*tl.levelOffset
}
// LevelOffset returns the amount of space an indentation level occupies.
func (tl *TOCLine) LevelOffset() float64 {
return tl.levelOffset
}
// SetLevelOffset sets the amount of space an indentation level occupies.
func (tl *TOCLine) SetLevelOffset(levelOffset float64) {
tl.levelOffset = levelOffset
tl.sp.margins.left = tl.offset + float64(tl.level-1)*tl.levelOffset
}
// GetMargins returns the margins of the TOC line: left, right, top, bottom.
func (tl *TOCLine) GetMargins() (float64, float64, float64, float64) {
m := &tl.sp.margins
return tl.offset, m.right, m.top, m.bottom
}
// SetMargins sets the margins TOC line.
func (tl *TOCLine) SetMargins(left, right, top, bottom float64) {
tl.offset = left
m := &tl.sp.margins
m.left = tl.offset + float64(tl.level-1)*tl.levelOffset
m.right = right
m.top = top
m.bottom = bottom
}
// prepareParagraph generates and adds all the components of the TOC line
// to the underlying paragraph.
func (tl *TOCLine) prepareParagraph(sp *StyledParagraph, ctx DrawContext) {
// Add text chunks to the paragraph.
title := tl.Title.Text
if tl.Number.Text != "" {
title = " " + title
}
title += " "
page := tl.Page.Text
if page != "" {
page = " " + page
}
sp.chunks = []TextChunk{
tl.Number,
TextChunk{
Text: title,
Style: tl.Title.Style,
},
TextChunk{
Text: page,
Style: tl.Page.Style,
},
}
sp.SetEncoder(sp.encoder)
sp.wrapText()
// Insert separator.
l := len(sp.lines)
if l == 0 {
return
}
availWidth := ctx.Width*1000 - sp.getTextLineWidth(sp.lines[l-1])
sepWidth := sp.getTextLineWidth([]TextChunk{tl.Separator})
sepCount := int(availWidth / sepWidth)
sepText := strings.Repeat(tl.Separator.Text, sepCount)
sepStyle := tl.Separator.Style
sp.Insert(2, sepText, sepStyle)
// Push page numbers to the end of the line.
availWidth = availWidth - float64(sepCount)*sepWidth
if availWidth > 500 {
spaceMetrics, found := sepStyle.Font.GetGlyphCharMetrics("space")
if found && availWidth > spaceMetrics.Wx {
spaces := int(availWidth / spaceMetrics.Wx)
if spaces > 0 {
style := sepStyle
style.FontSize = 1
sp.Insert(2, strings.Repeat(" ", spaces), style)
}
}
}
}
// GeneratePageBlocks generate the Page blocks. Multiple blocks are generated
// if the contents wrap over multiple pages.
func (tl *TOCLine) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
origCtx := ctx
blocks, ctx, err := tl.sp.GeneratePageBlocks(ctx)
if err != nil {
return blocks, ctx, err
}
if tl.positioning.isRelative() {
// Move back X to same start of line.
ctx.X = origCtx.X
}
if tl.positioning.isAbsolute() {
// If absolute: return original context.
return blocks, origCtx, nil
}
return blocks, ctx, nil
}

109
pdf/creator/toc_test.go Normal file
View File

@ -0,0 +1,109 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
import (
"testing"
"github.com/unidoc/unidoc/pdf/model/fonts"
)
func TestTOCAdvanced(t *testing.T) {
fontHelvetica := fonts.NewFontHelvetica()
fontHelveticaBold := fonts.NewFontHelveticaBold()
c := New()
c.NewPage()
toc := NewTOC("Table of Contents")
// Set separator and margins for all the lines.
toc.SetLineSeparator(".")
toc.SetLineMargins(0, 0, 2, 2)
toc.SetLineLevelOffset(12)
// Set style for all line numbers.
style := NewTextStyle()
style.Font = fontHelveticaBold
style.Color = ColorRGBFrom8bit(100, 100, 100)
toc.SetLineNumberStyle(style)
// Set style for all line pages.
style.Font = fontHelveticaBold
style.Color = ColorRGBFrom8bit(0, 0, 0)
toc.SetLinePageStyle(style)
// Set style for all line titles.
style.Font = fontHelveticaBold
toc.SetLineTitleStyle(style)
// Set style for all line separators.
style.Font = fontHelvetica
style.FontSize = 9
toc.SetLineSeparatorStyle(style)
// Add TOC lines.
tl := toc.Add("", "Abstract", "i", 1)
tl.Title.Style.Font = fontHelveticaBold
tl.SetMargins(0, 0, 5, 5)
toc.Add("", "Aknowledgements", "ii", 1)
toc.Add("", "Table of Contents", "iii", 1)
// Customize line style.
red := ColorRGBFrom8bit(255, 0, 0)
tl = toc.Add("Chapter 1:", "Introduction", "1", 1)
tl.Title.Style.Font = fontHelveticaBold
tl.Title.Style.Color = red
tl.Number.Style.Color = red
tl.Page.Style.Color = red
tl.Separator.Style.Color = red
// Set style for all line titles.
style.Font = fontHelvetica
style.FontSize = 10
toc.SetLineTitleStyle(style)
// Set another style for the line page part.
style.Font = fontHelvetica
toc.SetLinePageStyle(style)
toc.Add("1.1", "Second Harmonic Generation (SHG)", "1", 2)
toc.Add("1.1.1", "Nonlinear induced polarization", "1", 3)
toc.Add("1.1.2", "Phase matching of the fundamental and emission waves", "2", 3)
toc.Add("1.1.3", "Collagen as an intrinsic biomarker for SHG generation", "3", 3)
toc.Add("1.1.4", "Second harmonic imaging microscopy", "6", 3)
toc.Add("1.2", "Light propagation in tissues", "8", 2)
toc.Add("1.2.1", "Radiative transfer equation for modeling light propagation in tissue", "8", 3)
toc.Add("1.2.2", "Monte Carlo method as a convenient and flexible solution to the RTE for modeling light transport\nin multi layered tissues", "10", 3)
toc.Add("1.2.3", "Measurement of optical properties", "15", 3)
toc.Add("1.2.4", "Analytical solution of light scattering: The Born aproximation", "19", 3)
toc.Add("1.2.5", "Refractive index corellation functions to describe light scattering in tissue", "21", 3)
toc.Add("1.3", "SHG creation and emission directionality", "24", 2)
toc.Add("1.4", "Combining SGH creation and emission directionality", "26", 2)
toc.Add("1.5", "Utilizing light to characterize tissue structure", "26", 2)
// Make line page part bold again.
style.Font = fontHelveticaBold
toc.SetLinePageStyle(style)
// Customize line style.
tl = toc.Add("", "References", "28", 1)
tl.Title.Style.Font = fontHelveticaBold
tl.Separator.Style.Font = fontHelveticaBold
tl.SetMargins(0, 0, 5, 0)
err := c.Draw(toc)
if err != nil {
t.Fatalf("Error drawing: %v", err)
}
// Write output file.
err = c.WriteToFile("/tmp/toc_advanced.pdf")
if err != nil {
t.Fatalf("Fail: %v\n", err)
}
}

View File

@ -151,9 +151,9 @@ func NewPdfColorspaceFromPdfObject(obj PdfObject) (PdfColorspace, error) {
return nil, errors.New("Type error")
}
// determineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and
// DetermineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and
// an error on failure. If the colorspace was not found, will return an empty string.
func determineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) {
func DetermineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) {
var csName *PdfObjectName
var csArray *PdfObjectArray
@ -2179,7 +2179,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS
obj = array.Get(1)
// Base cs cannot be another /Indexed or /Pattern space.
baseName, err := determineColorspaceNameFromPdfObject(obj)
baseName, err := DetermineColorspaceNameFromPdfObject(obj)
if baseName == "Indexed" || baseName == "Pattern" {
common.Log.Debug("Error: Indexed colorspace cannot have Indexed/Pattern CS as base (%v)", baseName)
return nil, ErrRangeError

View File

@ -0,0 +1,34 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
)
// Chain allows to use sequence of optimizers.
// It implements interface model.Optimizer.
type Chain struct {
optimizers []model.Optimizer
}
// Append appends optimizers to the chain.
func (c *Chain) Append(optimizers ...model.Optimizer) {
c.optimizers = append(c.optimizers, optimizers...)
}
// Optimize optimizes PDF objects to decrease PDF size.
func (c *Chain) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
optimizedObjects = objects
for _, optimizer := range c.optimizers {
optimizedObjects, err = optimizer.Optimize(optimizedObjects)
if err != nil {
return optimizedObjects, err
}
}
return optimizedObjects, nil
}

View File

@ -0,0 +1,70 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"crypto/md5"
"github.com/unidoc/unidoc/pdf/core"
)
// CombineDuplicateDirectObjects combines duplicated direct objects by its data hash.
// It implements interface model.Optimizer.
type CombineDuplicateDirectObjects struct {
}
// Optimize optimizes PDF objects to decrease PDF size.
func (dup *CombineDuplicateDirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
updateObjectNumbers(objects)
dictsByHash := make(map[string][]*core.PdfObjectDictionary)
var processDict func(pDict *core.PdfObjectDictionary)
processDict = func(pDict *core.PdfObjectDictionary) {
for _, key := range pDict.Keys() {
obj := pDict.Get(key)
if dict, isDictObj := obj.(*core.PdfObjectDictionary); isDictObj {
hasher := md5.New()
hasher.Write([]byte(dict.DefaultWriteString()))
hash := string(hasher.Sum(nil))
dictsByHash[hash] = append(dictsByHash[hash], dict)
processDict(dict)
}
}
}
for _, obj := range objects {
ind, isIndirectObj := obj.(*core.PdfIndirectObject)
if !isIndirectObj {
continue
}
if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj {
processDict(dict)
}
}
indirects := make([]core.PdfObject, 0, len(dictsByHash))
replaceTable := make(map[core.PdfObject]core.PdfObject)
for _, dicts := range dictsByHash {
if len(dicts) < 2 {
continue
}
dict := core.MakeDict()
dict.Merge(dicts[0])
ind := core.MakeIndirectObject(dict)
indirects = append(indirects, ind)
for i := 0; i < len(dicts); i++ {
dict := dicts[i]
replaceTable[dict] = ind
}
}
optimizedObjects = make([]core.PdfObject, len(objects))
copy(optimizedObjects, objects)
optimizedObjects = append(indirects, optimizedObjects...)
replaceObjectsInPlace(optimizedObjects, replaceTable)
return optimizedObjects, nil
}

View File

@ -0,0 +1,53 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"crypto/md5"
"github.com/unidoc/unidoc/pdf/core"
)
// CombineDuplicateStreams combines duplicated streams by its data hash.
// It implements interface model.Optimizer.
type CombineDuplicateStreams struct {
}
// Optimize optimizes PDF objects to decrease PDF size.
func (dup *CombineDuplicateStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
replaceTable := make(map[core.PdfObject]core.PdfObject)
toDelete := make(map[core.PdfObject]struct{})
streamsByHash := make(map[string][]*core.PdfObjectStream)
for _, obj := range objects {
if stream, isStreamObj := obj.(*core.PdfObjectStream); isStreamObj {
hasher := md5.New()
hasher.Write([]byte(stream.Stream))
hash := string(hasher.Sum(nil))
streamsByHash[hash] = append(streamsByHash[hash], stream)
}
}
for _, streams := range streamsByHash {
if len(streams) < 2 {
continue
}
firstStream := streams[0]
for i := 1; i < len(streams); i++ {
stream := streams[i]
replaceTable[stream] = firstStream
toDelete[stream] = struct{}{}
}
}
optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete))
for _, obj := range objects {
if _, found := toDelete[obj]; found {
continue
}
optimizedObjects = append(optimizedObjects, obj)
}
replaceObjectsInPlace(optimizedObjects, replaceTable)
return optimizedObjects, nil
}

View File

@ -0,0 +1,65 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"crypto/md5"
"github.com/unidoc/unidoc/pdf/core"
)
// CombineIdenticalIndirectObjects combines identical indirect objects.
// It implements interface model.Optimizer.
type CombineIdenticalIndirectObjects struct {
}
// Optimize optimizes PDF objects to decrease PDF size.
func (c *CombineIdenticalIndirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
updateObjectNumbers(objects)
replaceTable := make(map[core.PdfObject]core.PdfObject)
toDelete := make(map[core.PdfObject]struct{})
indWithDictByHash := make(map[string][]*core.PdfIndirectObject)
for _, obj := range objects {
ind, isIndirectObj := obj.(*core.PdfIndirectObject)
if !isIndirectObj {
continue
}
if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj {
if name, isName := dict.Get("Type").(*core.PdfObjectName); isName && *name == "Page" {
continue
}
hasher := md5.New()
hasher.Write([]byte(dict.DefaultWriteString()))
hash := string(hasher.Sum(nil))
indWithDictByHash[hash] = append(indWithDictByHash[hash], ind)
}
}
for _, dicts := range indWithDictByHash {
if len(dicts) < 2 {
continue
}
firstDict := dicts[0]
for i := 1; i < len(dicts); i++ {
dict := dicts[i]
replaceTable[dict] = firstDict
toDelete[dict] = struct{}{}
}
}
optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete))
for _, obj := range objects {
if _, found := toDelete[obj]; found {
continue
}
optimizedObjects = append(optimizedObjects, obj)
}
replaceObjectsInPlace(optimizedObjects, replaceTable)
return optimizedObjects, nil
}

View File

@ -0,0 +1,45 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"github.com/unidoc/unidoc/pdf/core"
)
// CompressStreams compresses uncompressed streams.
// It implements interface model.Optimizer.
type CompressStreams struct {
}
// Optimize optimizes PDF objects to decrease PDF size.
func (c *CompressStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
optimizedObjects = make([]core.PdfObject, len(objects))
copy(optimizedObjects, objects)
for _, obj := range objects {
stream, isStreamObj := core.GetStream(obj)
if !isStreamObj {
continue
}
if _, found := core.GetName(stream.PdfObjectDictionary.Get("Filter")); found {
continue
}
encoder := core.NewLZWEncoder()
encoder.EarlyChange = 0
var data []byte
data, err = encoder.EncodeBytes(stream.Stream)
if err != nil {
return optimizedObjects, err
}
dict := encoder.MakeStreamDict()
// compare compressed and uncompressed sizes
if len(data)+len(dict.DefaultWriteString()) < len(stream.Stream) {
stream.Stream = data
stream.PdfObjectDictionary.Merge(dict)
stream.PdfObjectDictionary.Set("Length", core.MakeInteger(int64(len(stream.Stream))))
}
}
return optimizedObjects, nil
}

138
pdf/model/optimize/image.go Normal file
View File

@ -0,0 +1,138 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
)
// Image optimizes images by rewrite images into JPEG format with quality equals to ImageQuality.
// TODO(a5i): Add support for inline images.
// It implements interface model.Optimizer.
type Image struct {
ImageQuality int
}
// imageInfo is information about an image.
type imageInfo struct {
ColorSpace core.PdfObjectName
BitsPerComponent int
ColorComponents int
Width int
Height int
Stream *core.PdfObjectStream
PPI float64
}
// findImages returns images from objects.
func findImages(objects []core.PdfObject) []*imageInfo {
subTypeKey := core.PdfObjectName("Subtype")
streamProcessed := make(map[*core.PdfObjectStream]struct{})
var err error
var images []*imageInfo
for _, obj := range objects {
stream, ok := core.GetStream(obj)
if !ok {
continue
}
if _, found := streamProcessed[stream]; found {
continue
}
streamProcessed[stream] = struct{}{}
subTypeValue := stream.PdfObjectDictionary.Get(subTypeKey)
subType, ok := core.GetName(subTypeValue)
if !ok || string(*subType) != "Image" {
continue
}
img := &imageInfo{BitsPerComponent: 8, Stream: stream}
if img.ColorSpace, err = model.DetermineColorspaceNameFromPdfObject(stream.PdfObjectDictionary.Get("ColorSpace")); err != nil {
common.Log.Error("Error determine color space %s", err)
continue
}
if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("BitsPerComponent")); ok {
img.BitsPerComponent = val
}
if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Width")); ok {
img.Width = val
}
if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Height")); ok {
img.Height = val
}
switch img.ColorSpace {
case "DeviceRGB":
img.ColorComponents = 3
case "DeviceGray":
img.ColorComponents = 1
default:
common.Log.Warning("Optimization is not supported for color space %s", img.ColorSpace)
continue
}
images = append(images, img)
}
return images
}
// Optimize optimizes PDF objects to decrease PDF size.
func (i *Image) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
if i.ImageQuality <= 0 {
return objects, nil
}
images := findImages(objects)
if len(images) == 0 {
return objects, nil
}
replaceTable := make(map[core.PdfObject]core.PdfObject)
imageMasks := make(map[core.PdfObject]struct{})
for _, img := range images {
obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))
imageMasks[obj] = struct{}{}
}
for index, img := range images {
stream := img.Stream
if _, isMask := imageMasks[stream]; isMask {
continue
}
streamEncoder, err := core.NewEncoderFromStream(stream)
if err != nil {
common.Log.Warning("Error get encoder for the image stream %s")
continue
}
data, err := streamEncoder.DecodeStream(stream)
if err != nil {
common.Log.Warning("Error decode the image stream %s")
continue
}
encoder := core.NewDCTEncoder()
encoder.ColorComponents = img.ColorComponents
encoder.Quality = i.ImageQuality
encoder.BitsPerComponent = img.BitsPerComponent
encoder.Width = img.Width
encoder.Height = img.Height
streamData, err := encoder.EncodeBytes(data)
if err != nil {
return nil, err
}
newStream := &core.PdfObjectStream{Stream: streamData}
newStream.PdfObjectReference = stream.PdfObjectReference
newStream.PdfObjectDictionary = core.MakeDict()
newStream.PdfObjectDictionary.Merge(stream.PdfObjectDictionary)
fn := core.PdfObjectName(encoder.GetFilterName())
newStream.PdfObjectDictionary.Set(core.PdfObjectName("Filter"), &fn)
ln := core.PdfObjectInteger(int64(len(streamData)))
newStream.PdfObjectDictionary.Set(core.PdfObjectName("Length"), &ln)
replaceTable[stream] = newStream
images[index].Stream = newStream
}
optimizedObjects = make([]core.PdfObject, len(objects))
copy(optimizedObjects, objects)
replaceObjectsInPlace(optimizedObjects, replaceTable)
return optimizedObjects, nil
}

View File

@ -0,0 +1,203 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"fmt"
"image"
"math"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/contentstream"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
"golang.org/x/image/draw"
)
// ImagePPI optimizes images by scaling images such that the PPI (pixels per inch) is never higher than ImageUpperPPI.
// TODO(a5i): Add support for inline images.
// It implements interface model.Optimizer.
type ImagePPI struct {
ImageUpperPPI float64
}
func scaleImage(stream *core.PdfObjectStream, scale float64) error {
xImg, err := model.NewXObjectImageFromStream(stream)
if err != nil {
return err
}
i, err := xImg.ToImage()
if err != nil {
return err
}
goimg, err := i.ToGoImage()
if err != nil {
return err
}
newW := int(math.RoundToEven(float64(i.Width) * scale))
newH := int(math.RoundToEven(float64(i.Height) * scale))
rect := image.Rect(0, 0, newW, newH)
var newImage draw.Image
switch xImg.ColorSpace.String() {
case "DeviceRGB":
newImage = image.NewRGBA(rect)
case "DeviceGray":
newImage = image.NewGray(rect)
default:
return fmt.Errorf("Optimization is not supported for color space %s", xImg.ColorSpace.String())
}
draw.CatmullRom.Scale(newImage, newImage.Bounds(), goimg, goimg.Bounds(), draw.Over, &draw.Options{})
i, err = model.ImageHandling.NewImageFromGoImage(newImage)
if err != nil {
return err
}
xImg.SetImage(i, xImg.ColorSpace)
xImg.ToPdfObject()
return nil
}
// Optimize optimizes PDF objects to decrease PDF size.
func (i *ImagePPI) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
if i.ImageUpperPPI <= 0 {
return objects, nil
}
images := findImages(objects)
if len(images) == 0 {
return objects, nil
}
imageMasks := make(map[core.PdfObject]struct{})
for _, img := range images {
obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))
imageMasks[obj] = struct{}{}
}
imageByStream := make(map[*core.PdfObjectStream]*imageInfo)
for _, img := range images {
imageByStream[img.Stream] = img
}
var catalog *core.PdfObjectDictionary
for _, obj := range objects {
if dict, isDict := core.GetDict(obj); catalog == nil && isDict {
if tp, ok := core.GetName(dict.Get(core.PdfObjectName("Type"))); ok && *tp == "Catalog" {
catalog = dict
}
}
}
if catalog == nil {
return objects, nil
}
pages, hasPages := core.GetDict(catalog.Get(core.PdfObjectName("Pages")))
if !hasPages {
return objects, nil
}
kids, hasKids := core.GetArray(pages.Get(core.PdfObjectName("Kids")))
if !hasKids {
return objects, nil
}
imageByName := make(map[string]*imageInfo)
for _, pageObj := range kids.Elements() {
page, ok := core.GetDict(pageObj)
if !ok {
continue
}
contents, hasContents := core.GetArray(page.Get("Contents"))
if !hasContents {
continue
}
resources, hasResources := core.GetDict(page.Get("Resources"))
if !hasResources {
continue
}
xObject, hasXObject := core.GetDict(resources.Get("XObject"))
if !hasXObject {
continue
}
xObjectKeys := xObject.Keys()
for _, key := range xObjectKeys {
if stream, isStream := core.GetStream(xObject.Get(key)); isStream {
if img, found := imageByStream[stream]; found {
imageByName[string(key)] = img
}
}
}
for _, obj := range contents.Elements() {
if stream, isStream := core.GetStream(obj); isStream {
streamEncoder, err := core.NewEncoderFromStream(stream)
if err != nil {
return nil, err
}
data, err := streamEncoder.DecodeStream(stream)
if err != nil {
return nil, err
}
p := contentstream.NewContentStreamParser(string(data))
operations, err := p.Parse()
if err != nil {
return nil, err
}
scaleX, scaleY := 1.0, 1.0
for _, operation := range *operations {
if operation.Operand == "Q" {
scaleX, scaleY = 1.0, 1.0
}
if operation.Operand == "cm" && len(operation.Params) == 6 {
if sx, ok := core.GetFloatVal(operation.Params[0]); ok {
scaleX = scaleX * sx
}
if sy, ok := core.GetFloatVal(operation.Params[3]); ok {
scaleY = scaleY * sy
}
if sx, ok := core.GetIntVal(operation.Params[0]); ok {
scaleX = scaleX * float64(sx)
}
if sy, ok := core.GetIntVal(operation.Params[3]); ok {
scaleY = scaleY * float64(sy)
}
}
if operation.Operand == "Do" && len(operation.Params) == 1 {
name, ok := core.GetName(operation.Params[0])
if !ok {
continue
}
if img, found := imageByName[string(*name)]; found {
wInch, hInch := scaleX/72.0, scaleY/72.0
xPPI, yPPI := float64(img.Width)/wInch, float64(img.Height)/hInch
if wInch == 0 || hInch == 0 {
xPPI = 72.0
yPPI = 72.0
}
img.PPI = math.Max(img.PPI, xPPI)
img.PPI = math.Max(img.PPI, yPPI)
}
}
}
}
}
}
for _, img := range images {
if _, isMask := imageMasks[img.Stream]; isMask {
continue
}
if img.PPI <= i.ImageUpperPPI {
continue
}
scale := i.ImageUpperPPI / img.PPI
if err := scaleImage(img.Stream, scale); err != nil {
common.Log.Debug("Error scale image keep original image: %s", err)
} else {
if mask, hasMask := core.GetStream(img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))); hasMask {
if err := scaleImage(mask, scale); err != nil {
return nil, err
}
}
}
}
return objects, nil
}

View File

@ -0,0 +1,40 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"github.com/unidoc/unidoc/pdf/core"
)
// ObjectStreams groups PDF objects to object streams.
// It implements interface model.Optimizer.
type ObjectStreams struct {
}
// Optimize optimizes PDF objects to decrease PDF size.
func (o *ObjectStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
objStream := &core.PdfObjectStreams{}
skippedObjects := make([]core.PdfObject, 0, len(objects))
for _, obj := range objects {
if io, isIndirectObj := obj.(*core.PdfIndirectObject); isIndirectObj && io.GenerationNumber == 0 {
objStream.Append(obj)
} else {
skippedObjects = append(skippedObjects, obj)
}
}
if objStream.Len() == 0 {
return skippedObjects, nil
}
optimizedObjects = make([]core.PdfObject, 0, len(skippedObjects)+objStream.Len()+1)
if objStream.Len() > 1 {
optimizedObjects = append(optimizedObjects, objStream)
}
optimizedObjects = append(optimizedObjects, objStream.Elements()...)
optimizedObjects = append(optimizedObjects, skippedObjects...)
return optimizedObjects, nil
}

View File

@ -0,0 +1,212 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize_test
import (
"bytes"
"fmt"
"io"
"testing"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model/optimize"
)
// parseIndirectObjects parses a sequence of indirect/stream objects sequentially from a `rawpdf` text.
func parseIndirectObjects(rawpdf string) ([]core.PdfObject, error) {
p := core.NewParserFromString(rawpdf)
indirects := []core.PdfObject{}
for {
obj, err := p.ParseIndirectObject()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
indirects = append(indirects, obj)
}
return indirects, nil
}
// debugObjects prints objects in a readable fashion, convenient when debugging.
func debugObjects(objects []core.PdfObject) string {
var buf bytes.Buffer
for _, obj := range objects {
switch t := obj.(type) {
case *core.PdfIndirectObject:
buf.WriteString(fmt.Sprintf("%d 0 obj\n", t.ObjectNumber))
buf.WriteString(fmt.Sprintf(" %s\n", t.PdfObject.String()))
}
}
return buf.String()
}
func TestOptimizeIdenticalIndirects1(t *testing.T) {
rawpdf := `
1 0 obj
<<
/Name (1234)
>>
endobj
2 0 obj
<< /Name (1234) >>
endobj
`
objects, err := parseIndirectObjects(rawpdf)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(objects) != 2 {
t.Fatalf("len(objects) != 2 (%d)", len(objects))
}
// Combine duplicate direct objects - Expect unchanged results.
{
opt := optimize.CombineDuplicateDirectObjects{}
optObjects, err := opt.Optimize(objects)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(optObjects) != 2 {
t.Fatalf("len(optObjects1) != 2 (%d)", len(optObjects))
}
}
// Combine indirect objects should go from 2 to 1.
{
opt := optimize.CombineIdenticalIndirectObjects{}
optObjects, err := opt.Optimize(objects)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(optObjects) != 1 {
t.Fatalf("len(optObjects1) != 1 (%d)", len(optObjects))
}
}
}
// More complex case, where has a reference, where as the other does not.
// Expecting this NOT to work as we don't currently support this case.
// TODO: Add support for this.
func TestOptimizeIdenticalIndirectsUnsupported1(t *testing.T) {
rawpdf := `
1 0 obj
(1234)
endobj
2 0 obj
<<
/Name (1234)
>>
endobj
3 0 obj
<< /Name 1 0 R >>
endobj
`
objects, err := parseIndirectObjects(rawpdf)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(objects) != 3 {
t.Fatalf("len(objects) != 2 (%d)", len(objects))
}
// Combine duplicate direct objects - Expect unchanged results.
{
opt := optimize.CombineDuplicateDirectObjects{}
optObjects, err := opt.Optimize(objects)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(optObjects) != 3 {
t.Fatalf("len(optObjects1) != 2 (%d)", len(optObjects))
}
}
// Combine indirect objects should go from 3 to 2.
{
opt := optimize.CombineIdenticalIndirectObjects{}
optObjects, err := opt.Optimize(objects)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(optObjects) != 3 { // TODO: Add support. IF IDEAL: would be 2.
t.Fatalf("len(optObjects1) != 2 (%d)", len(optObjects))
}
}
}
// Showcases problem with sequence of CombineDuplicateDirectObjects followed by CombineIdenticalIndirectObjects
// if object numbers are not updated between steps (due to non-unique object numbering and reference strings).
func TestOptimizationSequence1(t *testing.T) {
rawpdf := `
1 0 obj
<<
/Inner << /Color (red) >>
>>
endobj
2 0 obj
<<
/Inner << /Color (red) >>
/Other (abc)
>>
endobj
3 0 obj
<<
/Inner << /Color (blue) >>
/Other (abc)
>>
endobj
4 0 obj
<<
/Inner << /Color (blue) >>
>>
endobj
`
objects, err := parseIndirectObjects(rawpdf)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(objects) != 4 {
t.Fatalf("len(objects) != 4 (%d)", len(objects))
}
debugstr1 := debugObjects(objects)
// 1. Combine duplicate direct objects.
// Expect that 2 new indirect objects will be added, as two of the inner dictionaries are identical.
opt := optimize.CombineDuplicateDirectObjects{}
optObjects, err := opt.Optimize(objects)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(optObjects) != 6 {
t.Fatalf("len(optObjects) != 6 (%d)", len(optObjects))
}
debugstr2 := debugObjects(optObjects)
// 2. Combine indirect objects.
// Should not make any difference here unless there was a problem.
opt2 := optimize.CombineIdenticalIndirectObjects{}
optObjects, err = opt2.Optimize(optObjects)
if err != nil {
t.Fatalf("Error: %v", err)
}
debugstr3 := debugObjects(optObjects)
fmt.Println("==Original")
fmt.Println(debugstr1)
fmt.Println("==After CombineDuplicateDirectObjects")
fmt.Println(debugstr2)
fmt.Println("==After CombineIdenticalIndirectObjects")
fmt.Println(debugstr3)
if len(optObjects) != 6 {
t.Fatalf("len(optObjects) != 6 (%d)", len(optObjects))
}
}

View File

@ -0,0 +1,102 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"github.com/unidoc/unidoc/pdf/core"
)
// New creates a optimizers chain from options.
func New(options Options) *Chain {
chain := new(Chain)
if options.ImageUpperPPI > 0 {
imageOptimizer := new(ImagePPI)
imageOptimizer.ImageUpperPPI = options.ImageUpperPPI
chain.Append(imageOptimizer)
}
if options.ImageQuality > 0 {
imageOptimizer := new(Image)
imageOptimizer.ImageQuality = options.ImageQuality
chain.Append(imageOptimizer)
}
if options.CombineDuplicateDirectObjects {
chain.Append(new(CombineDuplicateDirectObjects))
}
if options.CombineDuplicateStreams {
chain.Append(new(CombineDuplicateStreams))
}
if options.CombineIdenticalIndirectObjects {
chain.Append(new(CombineIdenticalIndirectObjects))
}
if options.UseObjectStreams {
chain.Append(new(ObjectStreams))
}
if options.CompressStreams {
chain.Append(new(CompressStreams))
}
return chain
}
// replaceObjectsInPlace replaces objects. objTo will be modified by the process.
func replaceObjectsInPlace(objects []core.PdfObject, objTo map[core.PdfObject]core.PdfObject) {
if objTo == nil || len(objTo) == 0 {
return
}
for i, obj := range objects {
if to, found := objTo[obj]; found {
objects[i] = to
continue
}
objTo[obj] = obj
switch t := obj.(type) {
case *core.PdfObjectArray:
values := make([]core.PdfObject, t.Len())
copy(values, t.Elements())
replaceObjectsInPlace(values, objTo)
for i, obj := range values {
t.Set(i, obj)
}
case *core.PdfObjectStreams:
replaceObjectsInPlace(t.Elements(), objTo)
case *core.PdfObjectStream:
values := []core.PdfObject{t.PdfObjectDictionary}
replaceObjectsInPlace(values, objTo)
t.PdfObjectDictionary = values[0].(*core.PdfObjectDictionary)
case *core.PdfObjectDictionary:
keys := t.Keys()
values := make([]core.PdfObject, len(keys))
for i, key := range keys {
values[i] = t.Get(key)
}
replaceObjectsInPlace(values, objTo)
for i, key := range keys {
t.Set(key, values[i])
}
case *core.PdfIndirectObject:
values := []core.PdfObject{t.PdfObject}
replaceObjectsInPlace(values, objTo)
t.PdfObject = values[0]
}
}
}
// Update all the object numbers prior to get hash of objects.
func updateObjectNumbers(objects []core.PdfObject) {
// Update numbers
for idx, obj := range objects {
switch o := obj.(type) {
case *core.PdfIndirectObject:
o.ObjectNumber = int64(idx + 1)
o.GenerationNumber = 0
case *core.PdfObjectStream:
o.ObjectNumber = int64(idx + 1)
o.GenerationNumber = 0
case *core.PdfObjectStreams:
o.ObjectNumber = int64(idx + 1)
o.GenerationNumber = 0
}
}
}

View File

@ -0,0 +1,17 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
// Options describes PDF optimization parameters.
type Options struct {
CombineDuplicateStreams bool
CombineDuplicateDirectObjects bool
ImageUpperPPI float64
ImageQuality int
UseObjectStreams bool
CombineIdenticalIndirectObjects bool
CompressStreams bool
}

18
pdf/model/optimizer.go Normal file
View File

@ -0,0 +1,18 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"github.com/unidoc/unidoc/pdf/core"
)
// Optimizer is the interface that performs optimization of PDF object structure for output writing.
//
// Optimize receives a slice of input `objects`, performs optimization, including removing, replacing objects and
// output the optimized slice of objects.
type Optimizer interface {
Optimize(objects []core.PdfObject) ([]core.PdfObject, error)
}

View File

@ -13,6 +13,7 @@ import (
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/core/security"
)
// PdfReader represents a PDF file reader. It is a frontend to the lower level parsing mechanism and provides
@ -79,29 +80,7 @@ func (this *PdfReader) IsEncrypted() (bool, error) {
// GetEncryptionMethod returns a descriptive information string about the encryption method used.
func (this *PdfReader) GetEncryptionMethod() string {
crypter := this.parser.GetCrypter()
str := crypter.Filter + " - "
if crypter.V == 0 {
str += "Undocumented algorithm"
} else if crypter.V == 1 {
// RC4 or AES (bits: 40)
str += "RC4: 40 bits"
} else if crypter.V == 2 {
str += fmt.Sprintf("RC4: %d bits", crypter.Length)
} else if crypter.V == 3 {
str += "Unpublished algorithm"
} else if crypter.V >= 4 {
// Look at CF, StmF, StrF
str += fmt.Sprintf("Stream filter: %s - String filter: %s", crypter.StreamFilter, crypter.StringFilter)
str += "; Crypt filters:"
for name, cf := range crypter.CryptFilters {
str += fmt.Sprintf(" - %s: %s (%d)", name, cf.Cfm, cf.Length)
}
}
perms := crypter.GetAccessPermissions()
str += fmt.Sprintf(" - %#v", perms)
return str
return crypter.String()
}
// Decrypt decrypts the PDF file with a specified password. Also tries to
@ -132,7 +111,7 @@ func (this *PdfReader) Decrypt(password []byte) (bool, error) {
// The bool flag indicates that the user can access and view the file.
// The AccessPermissions shows what access the user has for editing etc.
// An error is returned if there was a problem performing the authentication.
func (this *PdfReader) CheckAccessRights(password []byte) (bool, AccessPermissions, error) {
func (this *PdfReader) CheckAccessRights(password []byte) (bool, security.Permissions, error) {
return this.parser.CheckAccessRights(password)
}

View File

@ -10,21 +10,31 @@ package model
import (
"bufio"
"crypto/md5"
"crypto/rand"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"strings"
"time"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/common/license"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/core/security"
"github.com/unidoc/unidoc/pdf/core/security/crypt"
"github.com/unidoc/unidoc/pdf/model/fonts"
)
type crossReference struct {
Type int
// Type 1
Offset int64
Generation int64 // and Type 0
// Type 2
ObjectNumber int // and Type 0
Index int
}
var pdfCreator = ""
func getPdfProducer() string {
@ -79,6 +89,9 @@ type PdfWriter struct {
// Forms.
acroForm *PdfAcroForm
optimizer Optimizer
crossReferenceMap map[int]crossReference
}
// NewPdfWriter initializes a new PdfWriter.
@ -132,6 +145,111 @@ func NewPdfWriter() PdfWriter {
return w
}
// copyObject creates deep copy of the Pdf object and
// fills objectToObjectCopyMap to replace the old object to the copy of object if needed.
// Parameter objectToObjectCopyMap is needed to replace object references to its copies.
// Because many objects can contain references to another objects like pages to images.
func copyObject(obj PdfObject, objectToObjectCopyMap map[PdfObject]PdfObject) PdfObject {
if newObj, ok := objectToObjectCopyMap[obj]; ok {
return newObj
}
switch t := obj.(type) {
case *PdfObjectArray:
newObj := &PdfObjectArray{}
objectToObjectCopyMap[obj] = newObj
for _, val := range t.Elements() {
newObj.Append(copyObject(val, objectToObjectCopyMap))
}
return newObj
case *PdfObjectStreams:
newObj := &PdfObjectStreams{PdfObjectReference: t.PdfObjectReference}
objectToObjectCopyMap[obj] = newObj
for _, val := range t.Elements() {
newObj.Append(copyObject(val, objectToObjectCopyMap))
}
return newObj
case *PdfObjectStream:
newObj := &PdfObjectStream{
Stream: t.Stream,
PdfObjectReference: t.PdfObjectReference,
}
objectToObjectCopyMap[obj] = newObj
newObj.PdfObjectDictionary = copyObject(t.PdfObjectDictionary, objectToObjectCopyMap).(*PdfObjectDictionary)
return newObj
case *PdfObjectDictionary:
newObj := MakeDict()
objectToObjectCopyMap[obj] = newObj
for _, key := range t.Keys() {
val := t.Get(key)
newObj.Set(key, copyObject(val, objectToObjectCopyMap))
}
return newObj
case *PdfIndirectObject:
newObj := &PdfIndirectObject{
PdfObjectReference: t.PdfObjectReference,
}
objectToObjectCopyMap[obj] = newObj
newObj.PdfObject = copyObject(t.PdfObject, objectToObjectCopyMap)
return newObj
case *PdfObjectString:
newObj := &PdfObjectString{}
*newObj = *t
objectToObjectCopyMap[obj] = newObj
return newObj
case *PdfObjectName:
newObj := PdfObjectName(*t)
objectToObjectCopyMap[obj] = &newObj
return &newObj
case *PdfObjectNull:
newObj := PdfObjectNull{}
objectToObjectCopyMap[obj] = &newObj
return &newObj
case *PdfObjectInteger:
newObj := PdfObjectInteger(*t)
objectToObjectCopyMap[obj] = &newObj
return &newObj
case *PdfObjectReference:
newObj := PdfObjectReference(*t)
objectToObjectCopyMap[obj] = &newObj
return &newObj
case *PdfObjectFloat:
newObj := PdfObjectFloat(*t)
objectToObjectCopyMap[obj] = &newObj
return &newObj
case *PdfObjectBool:
newObj := PdfObjectBool(*t)
objectToObjectCopyMap[obj] = &newObj
return &newObj
default:
common.Log.Info("TODO(a5i): implement copyObject for %+v", obj)
}
// return other objects as is
return obj
}
// copyObjects makes objects copy and set as working.
func (this *PdfWriter) copyObjects() {
objectToObjectCopyMap := make(map[PdfObject]PdfObject)
objects := make([]PdfObject, len(this.objects))
objectsMap := make(map[PdfObject]bool)
for i, obj := range this.objects {
newObject := copyObject(obj, objectToObjectCopyMap)
objects[i] = newObject
if this.objectsMap[obj] {
objectsMap[newObject] = true
}
}
this.objects = objects
this.objectsMap = objectsMap
this.infoObj = copyObject(this.infoObj, objectToObjectCopyMap).(*PdfIndirectObject)
this.root = copyObject(this.root, objectToObjectCopyMap).(*PdfIndirectObject)
if this.encryptObj != nil {
this.encryptObj = copyObject(this.encryptObj, objectToObjectCopyMap).(*PdfIndirectObject)
}
}
// Set the PDF version of the output file.
func (this *PdfWriter) SetVersion(majorVersion, minorVersion int) {
this.majorVersion = majorVersion
@ -152,6 +270,16 @@ func (this *PdfWriter) SetOCProperties(ocProperties PdfObject) error {
return nil
}
// SetOptimizer sets the optimizer to optimize PDF before writing.
func (this *PdfWriter) SetOptimizer(optimizer Optimizer) {
this.optimizer = optimizer
}
// GetOptimizer returns current PDF optimizer.
func (this *PdfWriter) GetOptimizer() Optimizer {
return this.optimizer
}
func (this *PdfWriter) hasObject(obj PdfObject) bool {
// Check if already added.
for _, o := range this.objects {
@ -438,6 +566,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
common.Log.Trace("Write obj #%d\n", num)
if pobj, isIndirect := obj.(*PdfIndirectObject); isIndirect {
this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber}
outStr := fmt.Sprintf("%d 0 obj\n", num)
outStr += pobj.PdfObject.DefaultWriteString()
outStr += "\nendobj\n"
@ -448,6 +577,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
// XXX/TODO: Add a default encoder if Filter not specified?
// Still need to make sure is encrypted.
if pobj, isStream := obj.(*PdfObjectStream); isStream {
this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber}
outStr := fmt.Sprintf("%d 0 obj\n", num)
outStr += pobj.PdfObjectDictionary.DefaultWriteString()
outStr += "\nstream\n"
@ -457,6 +587,46 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
return
}
if ostreams, isObjStreams := obj.(*PdfObjectStreams); isObjStreams {
this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: ostreams.GenerationNumber}
outStr := fmt.Sprintf("%d 0 obj\n", num)
var offsets []string
var objData string
var offset int64
for index, obj := range ostreams.Elements() {
io, isIndirect := obj.(*PdfIndirectObject)
if !isIndirect {
common.Log.Error("Object streams N %d contains non indirect pdf object %v", num, obj)
}
data := io.PdfObject.DefaultWriteString() + " "
objData = objData + data
offsets = append(offsets, fmt.Sprintf("%d %d", io.ObjectNumber, offset))
this.crossReferenceMap[int(io.ObjectNumber)] = crossReference{Type: 2, ObjectNumber: num, Index: index}
offset = offset + int64(len([]byte(data)))
}
offsetsStr := strings.Join(offsets, " ") + " "
encoder := NewFlateEncoder()
//encoder := NewRawEncoder()
dict := encoder.MakeStreamDict()
dict.Set(PdfObjectName("Type"), MakeName("ObjStm"))
n := int64(ostreams.Len())
dict.Set(PdfObjectName("N"), MakeInteger(n))
first := int64(len(offsetsStr))
dict.Set(PdfObjectName("First"), MakeInteger(first))
data, _ := encoder.EncodeBytes([]byte(offsetsStr + objData))
length := int64(len(data))
dict.Set(PdfObjectName("Length"), MakeInteger(length))
outStr += dict.DefaultWriteString()
outStr += "\nstream\n"
this.writeString(outStr)
this.writeBytes(data)
this.writeString("\nendstream\nendobj\n")
return
}
this.writer.WriteString(obj.DefaultWriteString())
}
@ -464,20 +634,23 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
func (this *PdfWriter) updateObjectNumbers() {
// Update numbers
for idx, obj := range this.objects {
if io, isIndirect := obj.(*PdfIndirectObject); isIndirect {
io.ObjectNumber = int64(idx + 1)
io.GenerationNumber = 0
}
if so, isStream := obj.(*PdfObjectStream); isStream {
so.ObjectNumber = int64(idx + 1)
so.GenerationNumber = 0
switch o := obj.(type) {
case *PdfIndirectObject:
o.ObjectNumber = int64(idx + 1)
o.GenerationNumber = 0
case *PdfObjectStream:
o.ObjectNumber = int64(idx + 1)
o.GenerationNumber = 0
case *PdfObjectStreams:
o.ObjectNumber = int64(idx + 1)
o.GenerationNumber = 0
}
}
}
// EncryptOptions represents encryption options for an output PDF.
type EncryptOptions struct {
Permissions AccessPermissions
Permissions security.Permissions
Algorithm EncryptionAlgorithm
}
@ -495,121 +668,40 @@ const (
// Encrypt encrypts the output file with a specified user/owner password.
func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptions) error {
crypter := PdfCrypt{}
this.crypter = &crypter
crypter.EncryptedObjects = map[PdfObject]bool{}
crypter.CryptFilters = CryptFilters{}
algo := RC4_128bit
if options != nil {
algo = options.Algorithm
}
perm := security.PermOwner
if options != nil {
perm = options.Permissions
}
var cf CryptFilter
var cf crypt.Filter
switch algo {
case RC4_128bit:
crypter.V = 2
crypter.R = 3
cf = NewCryptFilterV2(16)
cf = crypt.NewFilterV2(16)
case AES_128bit:
this.SetVersion(1, 5)
crypter.V = 4
crypter.R = 4
cf = NewCryptFilterAESV2()
cf = crypt.NewFilterAESV2()
case AES_256bit:
this.SetVersion(2, 0)
crypter.V = 5
crypter.R = 6 // TODO(dennwc): a way to set R=5?
cf = NewCryptFilterAESV3()
cf = crypt.NewFilterAESV3()
default:
return fmt.Errorf("unsupported algorithm: %v", options.Algorithm)
}
crypter.Length = cf.Length * 8
const (
defaultFilter = StandardCryptFilter
)
crypter.CryptFilters[defaultFilter] = cf
if crypter.V >= 4 {
crypter.StreamFilter = defaultFilter
crypter.StringFilter = defaultFilter
crypter, info, err := PdfCryptNewEncrypt(cf, userPass, ownerPass, perm)
if err != nil {
return err
}
// Set
crypter.P = math.MaxUint32
crypter.EncryptMetadata = true
if options != nil {
crypter.P = int(options.Permissions.GetP())
this.crypter = crypter
if info.Major != 0 {
this.SetVersion(info.Major, info.Minor)
}
this.encryptDict = info.Encrypt
// Generate the encryption dictionary.
ed := MakeDict()
ed.Set("Filter", MakeName("Standard"))
ed.Set("P", MakeInteger(int64(crypter.P)))
ed.Set("V", MakeInteger(int64(crypter.V)))
ed.Set("R", MakeInteger(int64(crypter.R)))
ed.Set("Length", MakeInteger(int64(crypter.Length)))
this.encryptDict = ed
// Prepare the ID object for the trailer.
hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850)))
id0 := string(hashcode[:])
b := make([]byte, 100)
rand.Read(b)
hashcode = md5.Sum(b)
id1 := string(hashcode[:])
common.Log.Trace("Random b: % x", b)
this.ids = MakeArray(MakeHexString(id0), MakeHexString(id1))
common.Log.Trace("Gen Id 0: % x", id0)
// Generate encryption parameters
if crypter.R < 5 {
crypter.Id0 = string(id0)
// Make the O and U objects.
O, err := crypter.Alg3(userPass, ownerPass)
if err != nil {
common.Log.Debug("ERROR: Error generating O for encryption (%s)", err)
return err
}
crypter.O = []byte(O)
common.Log.Trace("gen O: % x", O)
U, key, err := crypter.Alg5(userPass)
if err != nil {
common.Log.Debug("ERROR: Error generating O for encryption (%s)", err)
return err
}
common.Log.Trace("gen U: % x", U)
crypter.U = []byte(U)
crypter.EncryptionKey = key
ed.Set("O", MakeHexString(O))
ed.Set("U", MakeHexString(U))
} else { // R >= 5
err := crypter.GenerateParams(userPass, ownerPass)
if err != nil {
return err
}
ed.Set("O", MakeString(string(crypter.O)))
ed.Set("U", MakeString(string(crypter.U)))
ed.Set("OE", MakeString(string(crypter.OE)))
ed.Set("UE", MakeString(string(crypter.UE)))
ed.Set("EncryptMetadata", MakeBool(crypter.EncryptMetadata))
if crypter.R > 5 {
ed.Set("Perms", MakeString(string(crypter.Perms)))
}
}
if crypter.V >= 4 {
if err := crypter.SaveCryptFilters(ed); err != nil {
return err
}
}
this.ids = MakeArray(MakeHexString(info.ID0), MakeHexString(info.ID1))
// Make an object to contain the encryption dictionary.
io := MakeIndirectObject(ed)
io := MakeIndirectObject(info.Encrypt)
this.encryptObj = io
this.addObject(io)
@ -687,23 +779,54 @@ func (this *PdfWriter) Write(writer io.Writer) error {
// Set version in the catalog.
this.catalog.Set("Version", MakeName(fmt.Sprintf("%d.%d", this.majorVersion, this.minorVersion)))
// Make a copy of objects prior to optimizing as this can alter the objects.
this.copyObjects()
if this.optimizer != nil {
var err error
this.objects, err = this.optimizer.Optimize(this.objects)
if err != nil {
return err
}
}
w := bufio.NewWriter(writer)
this.writer = w
this.writePos = 0
useCrossReferenceStream := this.majorVersion > 1 || (this.majorVersion == 1 && this.minorVersion > 4)
objectsInObjectStreams := make(map[PdfObject]bool)
if !useCrossReferenceStream {
for _, obj := range this.objects {
if objStm, isObjectStreams := obj.(*PdfObjectStreams); isObjectStreams {
useCrossReferenceStream = true
for _, obj := range objStm.Elements() {
objectsInObjectStreams[obj] = true
if io, isIndirectObj := obj.(*PdfIndirectObject); isIndirectObj {
objectsInObjectStreams[io.PdfObject] = true
}
}
}
}
}
if useCrossReferenceStream && this.majorVersion == 1 && this.minorVersion < 5 {
this.minorVersion = 5
}
this.writeString(fmt.Sprintf("%%PDF-%d.%d\n", this.majorVersion, this.minorVersion))
this.writeString("%âãÏÓ\n")
this.updateObjectNumbers()
offsets := []int64{}
// Write objects
common.Log.Trace("Writing %d obj", len(this.objects))
this.crossReferenceMap = make(map[int]crossReference)
this.crossReferenceMap[0] = crossReference{Type: 0, ObjectNumber: 0, Generation: 0xFFFF}
for idx, obj := range this.objects {
if skip := objectsInObjectStreams[obj]; skip {
continue
}
common.Log.Trace("Writing %d", idx)
offset := this.writePos
offsets = append(offsets, offset)
// Encrypt prior to writing.
// Encrypt dictionary should not be encrypted.
@ -713,41 +836,90 @@ func (this *PdfWriter) Write(writer io.Writer) error {
common.Log.Debug("ERROR: Failed encrypting (%s)", err)
return err
}
}
this.writeObject(idx+1, obj)
}
xrefOffset := this.writePos
// Write xref table.
this.writeString("xref\r\n")
outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.objects)+1)
this.writeString(outStr)
outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535)
this.writeString(outStr)
for _, offset := range offsets {
outStr = fmt.Sprintf("%.10d %.5d n\r\n", offset, 0)
this.writeString(outStr)
}
if useCrossReferenceStream {
crossObjNumber := len(this.crossReferenceMap)
this.crossReferenceMap[crossObjNumber] = crossReference{Type: 1, ObjectNumber: crossObjNumber, Offset: xrefOffset}
crossReferenceData := bytes.NewBuffer(nil)
for idx := 0; idx < len(this.crossReferenceMap); idx++ {
ref := this.crossReferenceMap[idx]
switch ref.Type {
case 0:
binary.Write(crossReferenceData, binary.BigEndian, byte(0))
binary.Write(crossReferenceData, binary.BigEndian, uint32(0))
binary.Write(crossReferenceData, binary.BigEndian, uint16(0xFFFF))
case 1:
binary.Write(crossReferenceData, binary.BigEndian, byte(1))
binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.Offset))
binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Generation))
case 2:
binary.Write(crossReferenceData, binary.BigEndian, byte(2))
binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.ObjectNumber))
binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Index))
}
}
crossReferenceStream, err := MakeStream(crossReferenceData.Bytes(), NewFlateEncoder())
if err != nil {
return err
}
crossReferenceStream.ObjectNumber = int64(crossObjNumber)
crossReferenceStream.PdfObjectDictionary.Set("Type", MakeName("XRef"))
crossReferenceStream.PdfObjectDictionary.Set("W", MakeArray(MakeInteger(1), MakeInteger(4), MakeInteger(2)))
crossReferenceStream.PdfObjectDictionary.Set("Index", MakeArray(MakeInteger(0), MakeInteger(crossReferenceStream.ObjectNumber+1)))
crossReferenceStream.PdfObjectDictionary.Set("Size", MakeInteger(crossReferenceStream.ObjectNumber+1))
crossReferenceStream.PdfObjectDictionary.Set("Info", this.infoObj)
crossReferenceStream.PdfObjectDictionary.Set("Root", this.root)
// If encrypted!
if this.crypter != nil {
crossReferenceStream.Set("Encrypt", this.encryptObj)
crossReferenceStream.Set("ID", this.ids)
common.Log.Trace("Ids: %s", this.ids)
}
this.writeObject(int(crossReferenceStream.ObjectNumber), crossReferenceStream)
} else {
this.writeString("xref\r\n")
outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.crossReferenceMap))
this.writeString(outStr)
for idx := 0; idx < len(this.crossReferenceMap); idx++ {
ref := this.crossReferenceMap[idx]
switch ref.Type {
case 0:
outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535)
this.writeString(outStr)
case 1:
outStr = fmt.Sprintf("%.10d %.5d n\r\n", ref.Offset, 0)
this.writeString(outStr)
}
}
// Generate & write trailer
trailer := MakeDict()
trailer.Set("Info", this.infoObj)
trailer.Set("Root", this.root)
trailer.Set("Size", MakeInteger(int64(len(this.objects)+1)))
// If encrypted!
if this.crypter != nil {
trailer.Set("Encrypt", this.encryptObj)
trailer.Set("ID", this.ids)
common.Log.Trace("Ids: %s", this.ids)
}
this.writeString("trailer\n")
this.writeString(trailer.DefaultWriteString())
this.writeString("\n")
// Generate & write trailer
trailer := MakeDict()
trailer.Set("Info", this.infoObj)
trailer.Set("Root", this.root)
trailer.Set("Size", MakeInteger(int64(len(this.objects)+1)))
// If encrypted!
if this.crypter != nil {
trailer.Set("Encrypt", this.encryptObj)
trailer.Set("ID", this.ids)
common.Log.Trace("Ids: %s", this.ids)
}
this.writeString("trailer\n")
this.writeString(trailer.DefaultWriteString())
this.writeString("\n")
// Make offset reference.
outStr = fmt.Sprintf("startxref\n%d\n", xrefOffset)
outStr := fmt.Sprintf("startxref\n%d\n", xrefOffset)
this.writeString(outStr)
this.writeString("%%EOF\n")