Improvements in field handling and utility functions

This commit is contained in:
Gunnsteinn Hall 2018-07-04 23:02:34 +00:00
parent 11ec4d42e3
commit 178727ca05
4 changed files with 145 additions and 21 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
* -crlf

View File

@ -9,6 +9,7 @@ import (
"bytes" "bytes"
"errors" "errors"
"fmt" "fmt"
"strings"
"github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/core"
@ -73,6 +74,74 @@ func (flag FieldFlag) Has(fl FieldFlag) bool {
return (flag.Mask() & fl.Mask()) > 0 return (flag.Mask() & fl.Mask()) > 0
} }
// String returns a string representation of what flags are set.
func (flag FieldFlag) String() string {
s := ""
if flag == FieldFlagClear {
s = "Clear"
return s
}
if flag&FieldFlagReadOnly > 0 {
s += "|ReadOnly"
}
if flag&FieldFlagRequired > 0 {
s += "|ReadOnly"
}
if flag&FieldFlagNoExport > 0 {
s += "|NoExport"
}
if flag&FieldFlagNoToggleToOff > 0 {
s += "|NoToggleToOff"
}
if flag&FieldFlagRadio > 0 {
s += "|Radio"
}
if flag&FieldFlagPushbutton > 0 {
s += "|Pushbutton"
}
if flag&FieldFlagRadiosInUnision > 0 {
s += "|RadiosInUnision"
}
if flag&FieldFlagMultiline > 0 {
s += "|Multiline"
}
if flag&FieldFlagPassword > 0 {
s += "|Password"
}
if flag&FieldFlagFileSelect > 0 {
s += "|FileSelect"
}
if flag&FieldFlagDoNotScroll > 0 {
s += "|DoNotScroll"
}
if flag&FieldFlagComb > 0 {
s += "|Comb"
}
if flag&FieldFlagRichText > 0 {
s += "|RichText"
}
if flag&FieldFlagDoNotSpellCheck > 0 {
s += "|DoNotSpellCheck"
}
if flag&FieldFlagCombo > 0 {
s += "|Combo"
}
if flag&FieldFlagEdit > 0 {
s += "|Edit"
}
if flag&FieldFlagSort > 0 {
s += "|Sort"
}
if flag&FieldFlagMultiSelect > 0 {
s += "|MultiSelect"
}
if flag&FieldFlagCommitOnSelChange > 0 {
s += "|CommitOnSelChange"
}
return strings.Trim(s, "|")
}
// PdfField contains the common attributes of a form field. The context object contains the specific field data // PdfField contains the common attributes of a form field. The context object contains the specific field data
// which can represent a button, text, choice or signature. // which can represent a button, text, choice or signature.
// The PdfField is typically not used directly, but is encapsulated by the more specific field types such as // The PdfField is typically not used directly, but is encapsulated by the more specific field types such as
@ -83,11 +152,10 @@ type PdfField struct {
isTerminal *bool // If set: indicates whether is a terminal field (if null, may not be determined yet). isTerminal *bool // If set: indicates whether is a terminal field (if null, may not be determined yet).
Parent *PdfField Parent *PdfField
Annotations []*PdfAnnotation Annotations []*PdfAnnotationWidget
Kids []*PdfField Kids []*PdfField
FT *core.PdfObjectName FT *core.PdfObjectName
//Kids *core.PdfObjectArray
T *core.PdfObjectString T *core.PdfObjectString
TU *core.PdfObjectString TU *core.PdfObjectString
TM *core.PdfObjectString TM *core.PdfObjectString
@ -382,7 +450,7 @@ type PdfFieldSignature struct {
} }
// ToPdfObject returns an indirect object containing the signature field dictionary. // ToPdfObject returns an indirect object containing the signature field dictionary.
func (sig *PdfFieldSignature) ToPdfObject() *core.PdfIndirectObject { func (sig *PdfFieldSignature) ToPdfObject() core.PdfObject {
// Set general field attributes // Set general field attributes
sig.PdfField.ToPdfObject() sig.PdfField.ToPdfObject()
container := sig.container container := sig.container
@ -463,7 +531,7 @@ func (f *PdfField) Flags() FieldFlag {
common.Log.Debug("Error evaluating flags via inheritance: %v", err) common.Log.Debug("Error evaluating flags via inheritance: %v", err)
} }
if !found { if !found {
common.Log.Debug("No field flags found") common.Log.Trace("No field flags found - assume clear")
} }
return flags return flags
@ -549,7 +617,15 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
} }
ctx.PdfField = field ctx.PdfField = field
field.context = ctx field.context = ctx
case "Sig":
ctx, err := newPdfFieldSignatureFromDict(d)
if err != nil {
return nil, err
}
ctx.PdfField = field
field.context = ctx
default: default:
common.Log.Debug("Unsupported field type %s", *field.FT)
return nil, errors.New("Unsupported field type") return nil, errors.New("Unsupported field type")
} }
} }
@ -566,10 +642,10 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
field.Parent = parent field.Parent = parent
} }
field.Annotations = []*PdfAnnotation{} field.Annotations = []*PdfAnnotationWidget{}
// Has a merged-in widget annotation? // Has a merged-in widget annotation?
if name := d.GetDirect("Subtype").(*core.PdfObjectName); name != nil { if name, _ := d.GetDirect("Subtype").(*core.PdfObjectName); name != nil {
if *name == "Widget" { if *name == "Widget" {
// Is a merged field / widget dict. // Is a merged field / widget dict.
@ -585,7 +661,7 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
} }
widget.Parent = field.GetContainingPdfObject() widget.Parent = field.GetContainingPdfObject()
field.Annotations = append(field.Annotations, annot) field.Annotations = append(field.Annotations, widget)
return field, nil return field, nil
} }
@ -613,12 +689,16 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
// Widget annotations contain key Subtype with value equal to /Widget. Otherwise are assumed to be fields. // Widget annotations contain key Subtype with value equal to /Widget. Otherwise are assumed to be fields.
if name, has := dict.GetDirect("Subtype").(*core.PdfObjectName); has && *name == "Widget" { if name, has := dict.GetDirect("Subtype").(*core.PdfObjectName); has && *name == "Widget" {
widg, err := r.newPdfAnnotationFromIndirectObject(container) annot, err := r.newPdfAnnotationFromIndirectObject(container)
if err != nil { if err != nil {
common.Log.Debug("Error loading widget annotation for field: %v", err) common.Log.Debug("Error loading widget annotation for field: %v", err)
return nil, err return nil, err
} }
field.Annotations = append(field.Annotations, widg) wa, ok := annot.context.(*PdfAnnotationWidget)
if !ok {
return nil, ErrTypeCheck
}
field.Annotations = append(field.Annotations, wa)
} else { } else {
childf, err := r.newPdfFieldFromIndirectObject(container, field) childf, err := r.newPdfFieldFromIndirectObject(container, field)
if err != nil { if err != nil {
@ -642,7 +722,7 @@ func newPdfFieldTextFromDict(d *core.PdfObjectDictionary) (*PdfFieldText, error)
textf.DS, _ = d.GetDirect("DS").(*core.PdfObjectString) textf.DS, _ = d.GetDirect("DS").(*core.PdfObjectString)
textf.RV = d.Get("RV") textf.RV = d.Get("RV")
// TODO: MaxLen should be loaded for other fields too? // TODO: MaxLen should be loaded for other fields too?
textf.MaxLen = d.Get("MaxLen").(*core.PdfObjectInteger) textf.MaxLen, _ = d.Get("MaxLen").(*core.PdfObjectInteger)
return textf, nil return textf, nil
} }
@ -663,3 +743,13 @@ func newPdfFieldButtonFromDict(d *core.PdfObjectDictionary) (*PdfFieldButton, er
buttonf.Opt, _ = d.GetDirect("Opt").(*core.PdfObjectArray) buttonf.Opt, _ = d.GetDirect("Opt").(*core.PdfObjectArray)
return buttonf, nil return buttonf, nil
} }
// newPdfFieldSignatureFromDict returns a new PdfFieldSignature (representing a signature field) loaded from a dictionary.
// This function loads only the signature-specific fields (called by a more generic field loader).
func newPdfFieldSignatureFromDict(d *core.PdfObjectDictionary) (*PdfFieldSignature, error) {
sigf := &PdfFieldSignature{}
sigf.V, _ = d.Get("V").(*core.PdfIndirectObject)
sigf.Lock, _ = d.Get("Lock").(*core.PdfIndirectObject)
sigf.SV, _ = d.Get("SV").(*core.PdfIndirectObject)
return sigf, nil
}

View File

@ -7,6 +7,7 @@ package model
import ( import (
"fmt" "fmt"
"github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/core"
) )
@ -19,8 +20,7 @@ Ch = choice
Sig = signature Sig = signature
*/ */
// PdfAcroForm represents the AcroForm dictionary used for representation of forms // PdfAcroForm represents the AcroForm dictionary used for representation of form data in PDF.
// in PDF.
type PdfAcroForm struct { type PdfAcroForm struct {
Fields *[]*PdfField Fields *[]*PdfField
NeedAppearances *core.PdfObjectBool NeedAppearances *core.PdfObjectBool
@ -31,7 +31,7 @@ type PdfAcroForm struct {
Q *core.PdfObjectInteger Q *core.PdfObjectInteger
XFA core.PdfObject XFA core.PdfObject
primitive *core.PdfIndirectObject container *core.PdfIndirectObject
} }
// NewPdfAcroForm returns a new PdfAcroForm with an intialized container (indirect object). // NewPdfAcroForm returns a new PdfAcroForm with an intialized container (indirect object).
@ -40,11 +40,31 @@ func NewPdfAcroForm() *PdfAcroForm {
container := &core.PdfIndirectObject{} container := &core.PdfIndirectObject{}
container.PdfObject = core.MakeDict() container.PdfObject = core.MakeDict()
acroForm.container = container
acroForm.primitive = container
return acroForm return acroForm
} }
// flattenFields returns a flattened list of field hierarchy.
func flattenFields(field *PdfField) []*PdfField {
list := []*PdfField{field}
for _, k := range field.Kids {
list = append(list, flattenFields(k)...)
}
return list
}
// AllFields returns a flattened list of all fields in the form.
func (form *PdfAcroForm) AllFields() []*PdfField {
fields := []*PdfField{}
if form.Fields != nil {
for _, field := range *form.Fields {
fields = append(fields, flattenFields(field)...)
}
}
return fields
}
// newPdfAcroFormFromDict is used when loading forms from PDF files. // newPdfAcroFormFromDict is used when loading forms from PDF files.
func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcroForm, error) { func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcroForm, error) {
acroForm := NewPdfAcroForm() acroForm := NewPdfAcroForm()
@ -154,13 +174,13 @@ func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcr
// GetContainingPdfObject returns the container of the PdfAcroForm (indirect object). // GetContainingPdfObject returns the container of the PdfAcroForm (indirect object).
func (this *PdfAcroForm) GetContainingPdfObject() core.PdfObject { func (this *PdfAcroForm) GetContainingPdfObject() core.PdfObject {
return this.primitive return this.container
} }
// ToPdfObject converts PdfAcroForm to a PdfObject, i.e. an indirect object containing the // ToPdfObject converts PdfAcroForm to a PdfObject, i.e. an indirect object containing the
// AcroForm dictionary. // AcroForm dictionary.
func (this *PdfAcroForm) ToPdfObject() core.PdfObject { func (this *PdfAcroForm) ToPdfObject() core.PdfObject {
container := this.primitive container := this.container
dict := container.PdfObject.(*core.PdfObjectDictionary) dict := container.PdfObject.(*core.PdfObjectDictionary)
if this.Fields != nil { if this.Fields != nil {
@ -176,7 +196,6 @@ func (this *PdfAcroForm) ToPdfObject() core.PdfObject {
} }
if this.SigFlags != nil { if this.SigFlags != nil {
dict.Set("SigFlags", this.SigFlags) dict.Set("SigFlags", this.SigFlags)
} }
if this.CO != nil { if this.CO != nil {
dict.Set("CO", this.CO) dict.Set("CO", this.CO)
@ -196,5 +215,3 @@ func (this *PdfAcroForm) ToPdfObject() core.PdfObject {
return container return container
} }

View File

@ -695,7 +695,8 @@ func (this *PdfReader) traverseObjectData(o PdfObject) error {
return nil return nil
} }
// Get a page by the page number. Indirect object with type /Page. // GetPageAsIndirectObject returns the indirect object representing a page fro a given page number.
// Indirect object with type /Page.
func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error) { func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File needs to be decrypted first") return nil, fmt.Errorf("File needs to be decrypted first")
@ -706,6 +707,7 @@ func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error
page := this.pageList[pageNumber-1] page := this.pageList[pageNumber-1]
// Look up all references related to page and load everything. // Look up all references related to page and load everything.
// XXX/TODO: Use of traverse object data will be limited when lazy-loading is supported.
err := this.traverseObjectData(page) err := this.traverseObjectData(page)
if err != nil { if err != nil {
return nil, err return nil, err
@ -716,6 +718,20 @@ func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error
return page, nil return page, nil
} }
// PageFromIndirectObject returns the PdfPage and page number for a given indirect object.
func (r *PdfReader) PageFromIndirectObject(ind *PdfIndirectObject) (*PdfPage, int, error) {
if len(r.PageList) != len(r.pageList) {
return nil, 0, errors.New("page list invalid")
}
for i, pageind := range r.pageList {
if pageind == ind {
return r.PageList[i], i + 1, nil
}
}
return nil, 0, errors.New("Page not found")
}
// Get a page by the page number. // Get a page by the page number.
// Returns the PdfPage entry. // Returns the PdfPage entry.
func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) { func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) {