V3: Write page annots from loaded PDFs (#452)

* Add Annots when serializing page object
* Handle multiple dicts with pending objects per object (writer)
This commit is contained in:
Gunnsteinn Hall 2019-04-28 12:57:11 +00:00 committed by GitHub
parent dcc0723e70
commit 359d6965de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 134 additions and 19 deletions

View File

@ -306,6 +306,8 @@ type PdfAnnotationWidget struct {
AA core.PdfObject
BS core.PdfObject
Parent core.PdfObject
parent *PdfField
}
// PdfAnnotationWatermark represents Watermark annotations.
@ -1796,7 +1798,12 @@ func (widget *PdfAnnotationWidget) ToPdfObject() core.PdfObject {
d.SetIfNotNil("A", widget.A)
d.SetIfNotNil("AA", widget.AA)
d.SetIfNotNil("BS", widget.BS)
d.SetIfNotNil("Parent", widget.Parent)
if widget.parent != nil {
d.SetIfNotNil("Parent", widget.parent.GetContainingPdfObject())
} else if widget.Parent != nil {
d.SetIfNotNil("Parent", widget.Parent)
}
return container
}

View File

@ -547,16 +547,16 @@ func (f *PdfField) SetFlag(flag FieldFlag) {
// newPdfFieldFromIndirectObject load a field from an indirect object containing the field dictionary.
func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObject, parent *PdfField) (*PdfField, error) {
d, isDict := container.PdfObject.(*core.PdfObjectDictionary)
if !isDict {
return nil, fmt.Errorf("PdfField indirect object not containing a dictionary")
}
// If already processed and cached - return processed model.
if field, cached := r.modelManager.GetModelFromPrimitive(container).(*PdfField); cached {
return field, nil
}
d, isDict := core.GetDict(container)
if !isDict {
return nil, fmt.Errorf("PdfField indirect object not containing a dictionary")
}
field := NewPdfField()
// Field type (required in terminal fields).
@ -661,7 +661,8 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
if !ok {
return nil, errors.New("invalid widget annotation")
}
widget.Parent = field.GetContainingPdfObject()
widget.parent = field
widget.Parent = field.container
field.Annotations = append(field.Annotations, widget)
@ -695,6 +696,7 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
if !ok {
return nil, ErrTypeCheck
}
wa.parent = field
field.Annotations = append(field.Annotations, wa)
} else {
childf, err := r.newPdfFieldFromIndirectObject(container, field)

View File

@ -495,6 +495,8 @@ func (p *PdfPage) GetPageDict() *core.PdfObjectDictionary {
}
}
d.Set("Annots", arr)
} else if p.Annots != nil {
d.SetIfNotNil("Annots", p.Annots)
}
return d

View File

@ -157,7 +157,7 @@ type PdfWriter struct {
// for writing.
// The map stores the object and the dictionary it is contained in.
// Only way so we can access the dictionary entry later.
pendingObjects map[core.PdfObject]*core.PdfObjectDictionary
pendingObjects map[core.PdfObject][]*core.PdfObjectDictionary
// Forms.
acroForm *PdfAcroForm
@ -179,7 +179,7 @@ func NewPdfWriter() PdfWriter {
w.objectsMap = map[core.PdfObject]struct{}{}
w.objects = []core.PdfObject{}
w.pendingObjects = map[core.PdfObject]*core.PdfObjectDictionary{}
w.pendingObjects = map[core.PdfObject][]*core.PdfObjectDictionary{}
w.traversed = map[core.PdfObject]struct{}{}
// PDF Version. Can be changed if using more advanced features in PDF.
@ -466,8 +466,8 @@ func (w *PdfWriter) addObjects(obj core.PdfObject) error {
}
if hasObj := w.hasObject(v); !hasObj {
common.Log.Debug("Parent obj is missing!! %T %p %v", v, v, v)
w.pendingObjects[v] = dict
common.Log.Debug("Parent obj not added yet!! %T %p %v", v, v, v)
w.pendingObjects[v] = append(w.pendingObjects[v], dict)
// Although it is missing at this point, it could be added later...
}
// How to handle the parent? Make sure it is present?
@ -889,15 +889,17 @@ func (w *PdfWriter) Write(writer io.Writer) error {
}
// Check pending objects prior to write.
for pendingObj, pendingObjDict := range w.pendingObjects {
for pendingObj, pendingObjDicts := range w.pendingObjects {
if !w.hasObject(pendingObj) {
common.Log.Debug("ERROR Pending object %+v %T (%p) never added for writing", pendingObj, pendingObj, pendingObj)
for _, key := range pendingObjDict.Keys() {
val := pendingObjDict.Get(key)
if val == pendingObj {
common.Log.Debug("Pending object found! and replaced with null")
pendingObjDict.Set(key, core.MakeNull())
break
common.Log.Debug("WARN Pending object %+v %T (%p) never added for writing", pendingObj, pendingObj, pendingObj)
for _, pendingObjDict := range pendingObjDicts {
for _, key := range pendingObjDict.Keys() {
val := pendingObjDict.Get(key)
if val == pendingObj {
common.Log.Debug("Pending object found! and replaced with null")
pendingObjDict.Set(key, core.MakeNull())
break
}
}
}
}

102
pdf/model/writer_test.go Normal file
View File

@ -0,0 +1,102 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"bytes"
"os"
"testing"
"github.com/stretchr/testify/require"
"github.com/unidoc/unidoc/pdf/core"
)
// Tests loading annotations from file, writing back out and reloading.
func TestReadWriteAnnotations(t *testing.T) {
f, err := os.Open(`testdata/OoPdfFormExample.pdf`)
require.NoError(t, err)
defer f.Close()
reader, err := NewPdfReaderLazy(f)
require.NoError(t, err)
checkAnnots := func(reader *PdfReader, formExpected bool) {
// Check Acroform and fields.
if formExpected {
require.NotNil(t, reader.AcroForm)
fields := reader.AcroForm.AllFields()
require.Len(t, fields, 17)
require.Nil(t, fields[0].Parent)
} else {
require.Nil(t, reader.AcroForm)
}
// Check annotations.
numPages, err := reader.GetNumPages()
require.NoError(t, err)
require.Equal(t, 1, numPages)
page, err := reader.GetPage(1)
require.NoError(t, err)
require.NotNil(t, page.Annots)
annots, err := page.GetAnnotations()
require.NoError(t, err)
require.Len(t, annots, 17)
wa, ok := annots[0].GetContext().(*PdfAnnotationWidget)
require.True(t, ok)
if formExpected {
require.NotNil(t, wa.parent)
require.NotNil(t, wa.Parent)
} else {
require.Nil(t, wa.parent)
require.True(t, core.IsNullObject(wa.Parent))
}
}
checkAnnots(reader, true)
// Write out and reload. With the AcroForm in place.
{
w := NewPdfWriter()
page, err := reader.GetPage(1)
require.NoError(t, err)
err = w.AddPage(page)
require.NoError(t, err)
err = w.SetForms(reader.AcroForm)
require.NoError(t, err)
var buf bytes.Buffer
err = w.Write(&buf)
require.NoError(t, err)
bufReader := bytes.NewReader(buf.Bytes())
reader, err = NewPdfReaderLazy(bufReader)
require.NoError(t, err)
checkAnnots(reader, true)
}
// Write out and reload without setting the AcroForm.
{
w := NewPdfWriter()
page, err := reader.GetPage(1)
require.NoError(t, err)
err = w.AddPage(page)
require.NoError(t, err)
var buf bytes.Buffer
err = w.Write(&buf)
require.NoError(t, err)
bufReader := bytes.NewReader(buf.Bytes())
reader, err = NewPdfReaderLazy(bufReader)
require.NoError(t, err)
checkAnnots(reader, false)
}
}