Merge branch 'development' of https://github.com/unidoc/unipdf into cmap

This commit is contained in:
Peter Williams 2020-05-20 19:40:30 +10:00
commit 6103fb8ea3
3 changed files with 85 additions and 0 deletions

View File

@ -343,6 +343,12 @@ type PdfAnnotationWidget struct {
processing bool // Used in ToPdfObject serialization to avoid infinite loops for merged-in annots.
}
// Field returns the parent form field of the widget annotation, if one exists.
// NOTE: the method returns nil if the parent form field has not been parsed.
func (widget *PdfAnnotationWidget) Field() *PdfField {
return widget.parent
}
// PdfAnnotationWatermark represents Watermark annotations.
// (Section 12.5.6.22).
type PdfAnnotationWatermark struct {

View File

@ -6,6 +6,7 @@
package model
import (
"os"
"testing"
"github.com/stretchr/testify/require"
@ -160,3 +161,18 @@ endobj
_ = raw
t.Skip("Not implemented yet")
}
func TestRepairAcroForm(t *testing.T) {
f, err := os.Open("./testdata/OoPdfFormExample.pdf")
require.NoError(t, err)
defer f.Close()
reader, err := NewPdfReader(f)
require.NoError(t, err)
original := *reader.AcroForm.Fields
reader.AcroForm.Fields = nil
require.NoError(t, reader.RepairAcroForm(nil))
repaired := *reader.AcroForm.Fields
require.ElementsMatch(t, original, repaired)
}

View File

@ -507,6 +507,69 @@ func (r *PdfReader) GetOutlines() (*Outline, error) {
return outline, nil
}
// AcroFormRepairOptions contains options for rebuilding the AcroForm.
type AcroFormRepairOptions struct {
}
// RepairAcroForm attempts to rebuild the AcroForm fields using the widget
// annotations present in the document pages. Pass nil for the opts parameter
// in order to use the default options.
// NOTE: Currently, the opts parameter is declared in order to enable adding
// future options, but passing nil will always result in the default options
// being used.
func (r *PdfReader) RepairAcroForm(opts *AcroFormRepairOptions) error {
var fields []*PdfField
fieldCache := map[*core.PdfIndirectObject]struct{}{}
for _, page := range r.PageList {
annotations, err := page.GetAnnotations()
if err != nil {
return err
}
for _, annotation := range annotations {
var field *PdfField
switch t := annotation.GetContext().(type) {
case *PdfAnnotationWidget:
if t.parent != nil {
field = t.parent
break
}
if parentObj, ok := core.GetIndirect(t.Parent); ok {
field, err = r.newPdfFieldFromIndirectObject(parentObj, nil)
if err == nil {
break
}
common.Log.Debug("WARN: could not parse form field %+v: %v", parentObj, err)
}
if t.container != nil {
field, err = r.newPdfFieldFromIndirectObject(t.container, nil)
if err == nil {
break
}
common.Log.Debug("WARN: could not parse form field %+v: %v", t.container, err)
}
}
if field == nil {
continue
}
if _, ok := fieldCache[field.container]; ok {
continue
}
fieldCache[field.container] = struct{}{}
fields = append(fields, field)
}
}
if len(fields) == 0 {
return nil
}
if r.AcroForm == nil {
r.AcroForm = NewPdfAcroForm()
}
r.AcroForm.Fields = &fields
return nil
}
// loadForms loads the AcroForm.
func (r *PdfReader) loadForms() (*PdfAcroForm, error) {
if r.parser.GetCrypter() != nil && !r.parser.IsAuthenticated() {