unipdf/pdf/model/appender.go

685 lines
19 KiB
Go
Raw Normal View History

2018-12-11 16:06:34 +03:00
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
2018-12-19 18:36:15 +03:00
"time"
2018-12-11 16:06:34 +03:00
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
)
// PdfAppender appends a new Pdf content to an existing Pdf document.
type PdfAppender struct {
rs io.ReadSeeker
parser *core.PdfParser
roReader *PdfReader
Reader *PdfReader
pages []*PdfPage
acroForm *PdfAcroForm
xrefs core.XrefTable
greatestObjNum int
newObjects []core.PdfObject
hasNewObject map[core.PdfObject]struct{}
}
func getPageResources(p *PdfPage) map[core.PdfObjectName]core.PdfObject {
resources := make(map[core.PdfObjectName]core.PdfObject)
if p.Resources == nil {
return resources
}
if p.Resources.Font != nil {
if dict, found := core.GetDict(p.Resources.Font); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
if p.Resources.ExtGState != nil {
if dict, found := core.GetDict(p.Resources.ExtGState); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
if p.Resources.XObject != nil {
if dict, found := core.GetDict(p.Resources.XObject); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
if p.Resources.Pattern != nil {
if dict, found := core.GetDict(p.Resources.Pattern); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
if p.Resources.Shading != nil {
if dict, found := core.GetDict(p.Resources.Shading); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
if p.Resources.ProcSet != nil {
if dict, found := core.GetDict(p.Resources.ProcSet); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
if p.Resources.Properties != nil {
if dict, found := core.GetDict(p.Resources.Properties); found {
for _, key := range dict.Keys() {
resources[key] = dict.Get(key)
}
}
}
return resources
}
// NewPdfAppender creates a new Pdf appender from a Pdf reader.
func NewPdfAppender(reader *PdfReader) (*PdfAppender, error) {
a := &PdfAppender{}
a.rs = reader.rs
a.Reader = reader
a.parser = a.Reader.parser
if _, err := a.rs.Seek(0, io.SeekStart); err != nil {
return nil, err
}
var err error
// Create a readonly (immutable) reader. It increases memory using.
// Why? We can not check the original reader objects are changed or not.
// When we merge, replace a page content. The new page will contain objects from the readonly reader and other objects.
// The readonly objects won't append to the result Pdf file. This check is not resource demanding. It checks indirect objects owners only.
a.roReader, err = NewPdfReader(a.rs)
if err != nil {
return nil, err
}
for _, idx := range a.Reader.GetObjectNums() {
if a.greatestObjNum < idx {
a.greatestObjNum = idx
}
}
a.xrefs = a.parser.GetXrefTable()
a.hasNewObject = make(map[core.PdfObject]struct{})
for _, p := range a.roReader.PageList {
a.pages = append(a.pages, p)
}
a.acroForm = a.roReader.AcroForm
return a, nil
}
func (a *PdfAppender) addNewObjects(obj core.PdfObject) {
if _, ok := a.hasNewObject[obj]; ok || obj == nil {
return
}
switch v := obj.(type) {
case *core.PdfIndirectObject:
// Check the object is changing.
// If the indirect object has not the readonly parser then the object is changed.
if v.GetParser() != a.roReader.parser {
a.newObjects = append(a.newObjects, obj)
a.hasNewObject[obj] = struct{}{}
a.addNewObjects(v.PdfObject)
}
case *core.PdfObjectArray:
for _, o := range v.Elements() {
a.addNewObjects(o)
}
case *core.PdfObjectDictionary:
for _, key := range v.Keys() {
a.addNewObjects(v.Get(key))
}
case *core.PdfObjectStreams:
// Check the object is changing.
// If the indirect object has not the readonly parser then the object is changed.
if v.GetParser() != a.roReader.parser {
for _, o := range v.Elements() {
a.addNewObjects(o)
}
}
case *core.PdfObjectStream:
// Check the object is changing.
// If the indirect object has the readonly parser then the object is not changed.
if v.GetParser() == a.roReader.parser {
return
}
// If the indirect object has not the origin parser then the object may be changed orr not.
if v.GetParser() == a.Reader.parser {
// Check data is not changed.
if streamObj, err := a.roReader.parser.LookupByReference(v.PdfObjectReference); err == nil {
var isNotChanged bool
if stream, ok := core.GetStream(streamObj); ok && bytes.Equal(stream.Stream, v.Stream) {
isNotChanged = true
}
if dict, ok := core.GetDict(streamObj); isNotChanged && ok {
isNotChanged = dict.WriteString() == v.PdfObjectDictionary.WriteString()
2018-12-11 16:06:34 +03:00
}
if isNotChanged {
return
}
}
}
a.newObjects = append(a.newObjects, obj)
a.hasNewObject[obj] = struct{}{}
a.addNewObjects(v.PdfObjectDictionary)
}
}
// mergeResources adds new named resources from src to dest. If the resources have the same name its will be renamed.
// The dest and src are resources dictionary. resourcesRenameMap is a rename map for resources.
func (a *PdfAppender) mergeResources(dest, src core.PdfObject, resourcesRenameMap map[core.PdfObjectName]core.PdfObjectName) core.PdfObject {
if src == nil && dest == nil {
return nil
}
if src == nil {
return dest
}
srcDict, ok := core.GetDict(src)
if !ok {
return dest
}
if dest == nil {
dict := core.MakeDict()
dict.Merge(srcDict)
return src
}
destDict, ok := core.GetDict(dest)
if !ok {
common.Log.Error("Error resource is not a dictionary")
destDict = core.MakeDict()
}
for _, key := range srcDict.Keys() {
if newKey, found := resourcesRenameMap[key]; found {
destDict.Set(newKey, srcDict.Get(key))
} else {
destDict.Set(key, srcDict.Get(key))
}
}
return destDict
}
// MergePageWith appends page content to source Pdf file page content.
func (a *PdfAppender) MergePageWith(pageNum int, page *PdfPage) error {
pageIndex := pageNum - 1
var srcPage *PdfPage
for i, p := range a.pages {
if i == pageIndex {
srcPage = p
}
}
if srcPage == nil {
return fmt.Errorf("ERROR: Page dictionary %d not found in the source document", pageNum)
}
if srcPage.primitive != nil && srcPage.primitive.GetParser() == a.roReader.parser {
srcPage = srcPage.Duplicate()
a.pages[pageIndex] = srcPage
}
page = page.Duplicate()
procPage(page)
srcResources := getPageResources(srcPage)
pageResources := getPageResources(page)
resourcesRenameMap := make(map[core.PdfObjectName]core.PdfObjectName)
for key := range pageResources {
if _, found := srcResources[key]; found {
for i := 1; true; i++ {
newKey := core.PdfObjectName(string(key) + strconv.Itoa(i))
if _, exists := srcResources[newKey]; !exists {
resourcesRenameMap[key] = newKey
break
}
}
}
}
contentStreams, err := page.GetContentStreams()
if err != nil {
return err
}
srcContentStreams, err := srcPage.GetContentStreams()
if err != nil {
return err
}
for i, stream := range contentStreams {
for oldName, newName := range resourcesRenameMap {
stream = strings.Replace(stream, "/"+string(oldName), "/"+string(newName), -1)
}
contentStreams[i] = stream
}
srcContentStreams = append(srcContentStreams, contentStreams...)
if err := srcPage.SetContentStreams(srcContentStreams, core.NewFlateEncoder()); err != nil {
return err
}
for _, a := range page.Annotations {
srcPage.Annotations = append(srcPage.Annotations, a)
}
if srcPage.Resources == nil {
srcPage.Resources = NewPdfPageResources()
}
if page.Resources != nil {
srcPage.Resources.Font = a.mergeResources(srcPage.Resources.Font, page.Resources.Font, resourcesRenameMap)
srcPage.Resources.XObject = a.mergeResources(srcPage.Resources.XObject, page.Resources.XObject, resourcesRenameMap)
srcPage.Resources.Properties = a.mergeResources(srcPage.Resources.Properties, page.Resources.Properties, resourcesRenameMap)
if srcPage.Resources.ProcSet == nil {
srcPage.Resources.ProcSet = page.Resources.ProcSet
}
srcPage.Resources.Shading = a.mergeResources(srcPage.Resources.Shading, page.Resources.Shading, resourcesRenameMap)
srcPage.Resources.ExtGState = a.mergeResources(srcPage.Resources.ExtGState, page.Resources.ExtGState, resourcesRenameMap)
}
srcMediaBox, err := srcPage.GetMediaBox()
if err != nil {
return err
}
pageMediaBox, err := page.GetMediaBox()
if err != nil {
return err
}
var mediaBoxChanged bool
if srcMediaBox.Llx > pageMediaBox.Llx {
srcMediaBox.Llx = pageMediaBox.Llx
mediaBoxChanged = true
}
if srcMediaBox.Lly > pageMediaBox.Lly {
srcMediaBox.Lly = pageMediaBox.Lly
mediaBoxChanged = true
}
if srcMediaBox.Urx < pageMediaBox.Urx {
srcMediaBox.Urx = pageMediaBox.Urx
mediaBoxChanged = true
}
if srcMediaBox.Ury < pageMediaBox.Ury {
srcMediaBox.Ury = pageMediaBox.Ury
mediaBoxChanged = true
}
if mediaBoxChanged {
srcPage.MediaBox = srcMediaBox
}
return nil
}
// AddPages adds pages to end of the source Pdf.
func (a *PdfAppender) AddPages(pages ...*PdfPage) {
for _, page := range pages {
page = page.Duplicate()
procPage(page)
a.pages = append(a.pages, page)
}
return
}
// RemovePage removes a page by number.
func (a *PdfAppender) RemovePage(pageNum int) {
pageIndex := pageNum - 1
pages := make([]*PdfPage, 0, len(a.pages))
for i, p := range a.pages {
if i == pageIndex {
continue
}
if p.primitive != nil && p.primitive.GetParser() == a.roReader.parser {
p = p.Duplicate()
procPage(p)
}
pages = append(pages, p)
}
a.pages = pages
}
// ReplacePage replaces the original page to a new page.
func (a *PdfAppender) ReplacePage(pageNum int, page *PdfPage) {
pageIndex := pageNum - 1
for i := range a.pages {
if i == pageIndex {
p := page.Duplicate()
procPage(p)
a.pages[i] = p
}
}
}
2018-12-19 18:36:15 +03:00
// Sign a document
func (a *PdfAppender) Sign(pageNum int, handler SignatureHandler) (acroForm *PdfAcroForm, appearance *PdfAppearance, err error) {
acroForm = a.Reader.AcroForm
if acroForm == nil {
acroForm = NewPdfAcroForm()
}
pageIndex := pageNum - 1
var page *PdfPage
for i, p := range a.pages {
if i == pageIndex {
page = p.Duplicate()
break
}
}
if page == nil {
return nil, nil, fmt.Errorf("page %d not found", pageNum)
}
// TODO add more checks before set the fields
acroForm.SigFlags = core.MakeInteger(3)
acroForm.DA = core.MakeString("/F1 0 Tf 0 g")
n2ResourcesFont := core.MakeDict()
n2ResourcesFont.Set("F1", DefaultFont().ToPdfObject())
acroForm.DR = NewPdfPageResources()
acroForm.DR.Font = n2ResourcesFont
sig := NewPdfSignature()
sig.M = core.MakeString(time.Now().Format("D:20060102150405-07'00'"))
//sig.M = core.MakeString("D:20150226112648Z")
sig.Type = core.MakeName("Sig")
sig.Reason = core.MakeString("Test1")
if err := handler.InitSignature(sig); err != nil {
return nil, nil, err
}
a.addNewObjects(sig.container)
appearance = NewPdfAppearance()
fields := append(acroForm.AllFields(), appearance.PdfField)
acroForm.Fields = &fields
procPage(page)
appearance.V = sig.ToPdfObject()
appearance.FT = core.MakeName("Sig")
appearance.V = sig.ToPdfObject()
//appearance.Ff = core.MakeInteger(0)
appearance.T = core.MakeString("Signature1")
appearance.F = core.MakeInteger(132)
appearance.P = page.ToPdfObject()
appearance.Rect = core.MakeArray(core.MakeInteger(0), core.MakeInteger(0), core.MakeInteger(0),
core.MakeInteger(0))
appearance.Signature = sig
a.pages[pageIndex] = page
a.ReplaceAcroForm(acroForm)
return acroForm, appearance, nil
}
2018-12-11 16:06:34 +03:00
// ReplaceAcroForm replaces the acrobat form. It appends a new form to the Pdf which replaces the original acrobat form.
func (a *PdfAppender) ReplaceAcroForm(acroForm *PdfAcroForm) {
a.acroForm = acroForm
}
// Write writes the Appender output to io.Writer.
func (a *PdfAppender) Write(w io.Writer) error {
writer := NewPdfWriter()
pagesDict, ok := core.GetDict(writer.pages)
if !ok {
return errors.New("Invalid Pages obj (not a dict)")
}
kids, ok := pagesDict.Get("Kids").(*core.PdfObjectArray)
if !ok {
return errors.New("Invalid Pages Kids obj (not an array)")
}
pageCount, ok := pagesDict.Get("Count").(*core.PdfObjectInteger)
if !ok {
return errors.New("Invalid Pages Count object (not an integer)")
}
parser := a.roReader.parser
trailer := parser.GetTrailer()
if trailer == nil {
return fmt.Errorf("Missing trailer")
}
// Catalog.
root, ok := trailer.Get("Root").(*core.PdfObjectReference)
if !ok {
return fmt.Errorf("Invalid Root (trailer: %s)", trailer)
}
oc, err := parser.LookupByReference(*root)
if err != nil {
return err
}
catalog, ok := core.GetDict(oc)
if !ok {
common.Log.Debug("ERROR: Missing catalog: (root %q) (trailer %s)", oc, *trailer)
return errors.New("Missing catalog")
}
for _, key := range catalog.Keys() {
if writer.catalog.Get(key) == nil {
obj := catalog.Get(key)
writer.catalog.Set(key, obj)
}
}
inheritedFields := []core.PdfObjectName{"Resources", "MediaBox", "CropBox", "Rotate"}
for _, p := range a.pages {
// Update the count.
obj := p.ToPdfObject()
*pageCount = *pageCount + 1
// Check the object is not changing.
// If the indirect object has the parser which equals to the readonly then the object is not changed.
if ind, ok := obj.(*core.PdfIndirectObject); ok && ind.GetParser() == a.roReader.parser {
kids.Append(&ind.PdfObjectReference)
continue
}
if pDict, ok := core.GetDict(obj); ok {
parent, hasParent := pDict.Get("Parent").(*core.PdfIndirectObject)
for hasParent {
common.Log.Trace("Page Parent: %T", parent)
parentDict, ok := parent.PdfObject.(*core.PdfObjectDictionary)
if !ok {
return errors.New("Invalid Parent object")
}
for _, field := range inheritedFields {
common.Log.Trace("Field %s", field)
if pDict.Get(field) != nil {
common.Log.Trace("- page has already")
continue
}
if obj := parentDict.Get(field); obj != nil {
// Parent has the field. Inherit, pass to the new page.
common.Log.Trace("Inheriting field %s", field)
pDict.Set(field, obj)
}
}
parent, hasParent = parentDict.Get("Parent").(*core.PdfIndirectObject)
common.Log.Trace("Next parent: %T", parentDict.Get("Parent"))
}
pDict.Set("Parent", writer.pages)
}
a.addNewObjects(obj)
kids.Append(obj)
}
if a.acroForm != nil && a.acroForm != a.roReader.AcroForm {
writer.SetForms(a.acroForm)
}
2018-12-19 18:36:15 +03:00
if _, err := a.rs.Seek(0, io.SeekStart); err != nil {
return err
}
digestWriters := make(map[SignatureHandler]io.Writer)
byteRange := core.MakeArray()
for _, obj := range a.newObjects {
if ind, found := core.GetIndirect(obj); found {
if sigDict, found := ind.PdfObject.(*pdfSignDictionary); found {
handler := *sigDict.handler
// TODO fix it
digestWriters[handler], _ = handler.NewDigest(sigDict.signature)
byteRange.Append(core.MakeInteger(0xfffff), core.MakeInteger(0xfffff))
}
}
}
if byteRange.Len() > 0 {
byteRange.Append(core.MakeInteger(0xfffff), core.MakeInteger(0xfffff))
}
for _, obj := range a.newObjects {
if ind, found := core.GetIndirect(obj); found {
if sigDict, found := ind.PdfObject.(*pdfSignDictionary); found {
sigDict.Set("ByteRange", byteRange)
}
}
}
hasSigDict := len(digestWriters) > 0
var reader io.Reader = a.rs
if hasSigDict {
writers := make([]io.Writer, 0, len(digestWriters))
for _, hash := range digestWriters {
writers = append(writers, hash)
}
//hashSha1 := sha1.New() // if needed
reader = io.TeeReader(a.rs, io.MultiWriter(writers...))
}
offset, err := io.Copy(w, reader)
if err != nil {
return err
}
2018-12-11 16:06:34 +03:00
if len(a.newObjects) == 0 {
return nil
}
writer.writeOffset = offset
writer.ObjNumOffset = a.greatestObjNum
writer.appendMode = true
writer.appendToXrefs = a.xrefs
2018-12-19 18:36:15 +03:00
writer.minorVersion = 7
2018-12-11 16:06:34 +03:00
for _, obj := range a.newObjects {
writer.addObject(obj)
}
2018-12-19 18:36:15 +03:00
writerW := w
if hasSigDict {
writerW = bytes.NewBuffer(nil)
}
if err := writer.Write(writerW); err != nil {
2018-12-11 16:06:34 +03:00
return err
}
2018-12-19 18:36:15 +03:00
if hasSigDict {
bufferData := writerW.(*bytes.Buffer).Bytes()
byteRange := core.MakeArray()
var sigDicts []*pdfSignDictionary
var lastPosition int64
for _, obj := range writer.objects {
if ind, found := core.GetIndirect(obj); found {
if sigDict, found := ind.PdfObject.(*pdfSignDictionary); found {
sigDicts = append(sigDicts, sigDict)
newPosition := sigDict.fileOffset + int64(sigDict.contentsOffsetStart)
byteRange.Append(
core.MakeInteger(lastPosition),
core.MakeInteger(newPosition-lastPosition),
)
lastPosition = sigDict.fileOffset + int64(sigDict.contentsOffsetEnd)
}
}
}
byteRange.Append(
core.MakeInteger(lastPosition),
core.MakeInteger(offset+int64(len(bufferData))-lastPosition),
)
// set the ByteRange value
byteRangeData := []byte(byteRange.WriteString())
for _, sigDict := range sigDicts {
bufferOffset := int(sigDict.fileOffset - offset)
for i := sigDict.byteRangeOffsetStart; i < sigDict.byteRangeOffsetEnd; i++ {
bufferData[bufferOffset+i] = ' '
}
dst := bufferData[bufferOffset+sigDict.byteRangeOffsetStart : bufferOffset+sigDict.byteRangeOffsetEnd]
copy(dst, byteRangeData)
}
var prevOffset int
for _, sigDict := range sigDicts {
bufferOffset := int(sigDict.fileOffset - offset)
data := bufferData[prevOffset : bufferOffset+sigDict.contentsOffsetStart]
handler := *sigDict.handler
digestWriters[handler].Write(data)
prevOffset = bufferOffset + sigDict.contentsOffsetEnd
}
for _, sigDict := range sigDicts {
data := bufferData[prevOffset:]
handler := *sigDict.handler
digestWriters[handler].Write(data)
}
for _, sigDict := range sigDicts {
bufferOffset := int(sigDict.fileOffset - offset)
handler := *sigDict.handler
digest := digestWriters[handler]
if err := handler.Sign(sigDict.signature, digest); err != nil {
return err
}
contents := []byte(sigDict.signature.Contents.WriteString())
for i := sigDict.byteRangeOffsetStart; i < sigDict.byteRangeOffsetEnd; i++ {
bufferData[bufferOffset+i] = ' '
}
for i := sigDict.contentsOffsetStart; i < sigDict.contentsOffsetEnd; i++ {
bufferData[bufferOffset+i] = ' '
}
dst := bufferData[bufferOffset+sigDict.byteRangeOffsetStart : bufferOffset+sigDict.byteRangeOffsetEnd]
copy(dst, byteRangeData)
dst = bufferData[bufferOffset+sigDict.contentsOffsetStart : bufferOffset+sigDict.contentsOffsetEnd]
copy(dst, contents)
}
writerW = bytes.NewBuffer(bufferData)
}
if buffer, ok := writerW.(*bytes.Buffer); ok {
_, err = io.Copy(w, buffer)
}
2018-12-11 16:06:34 +03:00
return err
}
// WriteToFile writes the Appender output to file specified by path.
func (a *PdfAppender) WriteToFile(outputPath string) error {
fWrite, err := os.Create(outputPath)
if err != nil {
return err
}
defer fWrite.Close()
return a.Write(fWrite)
}