diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..aecf2503 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* -crlf diff --git a/pdf/model/fields.go b/pdf/model/fields.go index f8668291..13dd9cef 100644 --- a/pdf/model/fields.go +++ b/pdf/model/fields.go @@ -9,6 +9,7 @@ import ( "bytes" "errors" "fmt" + "strings" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" @@ -73,6 +74,74 @@ func (flag FieldFlag) Has(fl FieldFlag) bool { return (flag.Mask() & fl.Mask()) > 0 } +// String returns a string representation of what flags are set. +func (flag FieldFlag) String() string { + s := "" + if flag == FieldFlagClear { + s = "Clear" + return s + } + if flag&FieldFlagReadOnly > 0 { + s += "|ReadOnly" + } + if flag&FieldFlagRequired > 0 { + s += "|ReadOnly" + } + if flag&FieldFlagNoExport > 0 { + s += "|NoExport" + } + if flag&FieldFlagNoToggleToOff > 0 { + s += "|NoToggleToOff" + } + if flag&FieldFlagRadio > 0 { + s += "|Radio" + } + if flag&FieldFlagPushbutton > 0 { + s += "|Pushbutton" + } + if flag&FieldFlagRadiosInUnision > 0 { + s += "|RadiosInUnision" + } + if flag&FieldFlagMultiline > 0 { + s += "|Multiline" + } + if flag&FieldFlagPassword > 0 { + s += "|Password" + } + if flag&FieldFlagFileSelect > 0 { + s += "|FileSelect" + } + if flag&FieldFlagDoNotScroll > 0 { + s += "|DoNotScroll" + } + if flag&FieldFlagComb > 0 { + s += "|Comb" + } + if flag&FieldFlagRichText > 0 { + s += "|RichText" + } + if flag&FieldFlagDoNotSpellCheck > 0 { + s += "|DoNotSpellCheck" + } + if flag&FieldFlagCombo > 0 { + s += "|Combo" + } + if flag&FieldFlagEdit > 0 { + s += "|Edit" + } + if flag&FieldFlagSort > 0 { + s += "|Sort" + } + if flag&FieldFlagMultiSelect > 0 { + s += "|MultiSelect" + } + if flag&FieldFlagCommitOnSelChange > 0 { + s += "|CommitOnSelChange" + } + + return strings.Trim(s, "|") +} + // PdfField contains the common attributes of a form field. The context object contains the specific field data // which can represent a button, text, choice or signature. // The PdfField is typically not used directly, but is encapsulated by the more specific field types such as @@ -83,11 +152,10 @@ type PdfField struct { isTerminal *bool // If set: indicates whether is a terminal field (if null, may not be determined yet). Parent *PdfField - Annotations []*PdfAnnotation + Annotations []*PdfAnnotationWidget Kids []*PdfField FT *core.PdfObjectName - //Kids *core.PdfObjectArray T *core.PdfObjectString TU *core.PdfObjectString TM *core.PdfObjectString @@ -382,7 +450,7 @@ type PdfFieldSignature struct { } // ToPdfObject returns an indirect object containing the signature field dictionary. -func (sig *PdfFieldSignature) ToPdfObject() *core.PdfIndirectObject { +func (sig *PdfFieldSignature) ToPdfObject() core.PdfObject { // Set general field attributes sig.PdfField.ToPdfObject() container := sig.container @@ -463,7 +531,7 @@ func (f *PdfField) Flags() FieldFlag { common.Log.Debug("Error evaluating flags via inheritance: %v", err) } if !found { - common.Log.Debug("No field flags found") + common.Log.Trace("No field flags found - assume clear") } return flags @@ -549,7 +617,15 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj } ctx.PdfField = field field.context = ctx + case "Sig": + ctx, err := newPdfFieldSignatureFromDict(d) + if err != nil { + return nil, err + } + ctx.PdfField = field + field.context = ctx default: + common.Log.Debug("Unsupported field type %s", *field.FT) return nil, errors.New("Unsupported field type") } } @@ -566,10 +642,10 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj field.Parent = parent } - field.Annotations = []*PdfAnnotation{} + field.Annotations = []*PdfAnnotationWidget{} // Has a merged-in widget annotation? - if name := d.GetDirect("Subtype").(*core.PdfObjectName); name != nil { + if name, _ := d.GetDirect("Subtype").(*core.PdfObjectName); name != nil { if *name == "Widget" { // Is a merged field / widget dict. @@ -585,7 +661,7 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj } widget.Parent = field.GetContainingPdfObject() - field.Annotations = append(field.Annotations, annot) + field.Annotations = append(field.Annotations, widget) return field, nil } @@ -613,12 +689,16 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj // Widget annotations contain key Subtype with value equal to /Widget. Otherwise are assumed to be fields. if name, has := dict.GetDirect("Subtype").(*core.PdfObjectName); has && *name == "Widget" { - widg, err := r.newPdfAnnotationFromIndirectObject(container) + annot, err := r.newPdfAnnotationFromIndirectObject(container) if err != nil { common.Log.Debug("Error loading widget annotation for field: %v", err) return nil, err } - field.Annotations = append(field.Annotations, widg) + wa, ok := annot.context.(*PdfAnnotationWidget) + if !ok { + return nil, ErrTypeCheck + } + field.Annotations = append(field.Annotations, wa) } else { childf, err := r.newPdfFieldFromIndirectObject(container, field) if err != nil { @@ -642,7 +722,7 @@ func newPdfFieldTextFromDict(d *core.PdfObjectDictionary) (*PdfFieldText, error) textf.DS, _ = d.GetDirect("DS").(*core.PdfObjectString) textf.RV = d.Get("RV") // TODO: MaxLen should be loaded for other fields too? - textf.MaxLen = d.Get("MaxLen").(*core.PdfObjectInteger) + textf.MaxLen, _ = d.Get("MaxLen").(*core.PdfObjectInteger) return textf, nil } @@ -663,3 +743,13 @@ func newPdfFieldButtonFromDict(d *core.PdfObjectDictionary) (*PdfFieldButton, er buttonf.Opt, _ = d.GetDirect("Opt").(*core.PdfObjectArray) return buttonf, nil } + +// newPdfFieldSignatureFromDict returns a new PdfFieldSignature (representing a signature field) loaded from a dictionary. +// This function loads only the signature-specific fields (called by a more generic field loader). +func newPdfFieldSignatureFromDict(d *core.PdfObjectDictionary) (*PdfFieldSignature, error) { + sigf := &PdfFieldSignature{} + sigf.V, _ = d.Get("V").(*core.PdfIndirectObject) + sigf.Lock, _ = d.Get("Lock").(*core.PdfIndirectObject) + sigf.SV, _ = d.Get("SV").(*core.PdfIndirectObject) + return sigf, nil +} diff --git a/pdf/model/form.go b/pdf/model/form.go index c6c1e4a0..3e1a93cb 100644 --- a/pdf/model/form.go +++ b/pdf/model/form.go @@ -7,6 +7,7 @@ package model import ( "fmt" + "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" ) @@ -17,10 +18,9 @@ Btn = button Tx = text Ch = choice Sig = signature - */ +*/ -// PdfAcroForm represents the AcroForm dictionary used for representation of forms -// in PDF. +// PdfAcroForm represents the AcroForm dictionary used for representation of form data in PDF. type PdfAcroForm struct { Fields *[]*PdfField NeedAppearances *core.PdfObjectBool @@ -31,7 +31,7 @@ type PdfAcroForm struct { Q *core.PdfObjectInteger XFA core.PdfObject - primitive *core.PdfIndirectObject + container *core.PdfIndirectObject } // NewPdfAcroForm returns a new PdfAcroForm with an intialized container (indirect object). @@ -40,11 +40,31 @@ func NewPdfAcroForm() *PdfAcroForm { container := &core.PdfIndirectObject{} container.PdfObject = core.MakeDict() + acroForm.container = container - acroForm.primitive = container return acroForm } +// flattenFields returns a flattened list of field hierarchy. +func flattenFields(field *PdfField) []*PdfField { + list := []*PdfField{field} + for _, k := range field.Kids { + list = append(list, flattenFields(k)...) + } + return list +} + +// AllFields returns a flattened list of all fields in the form. +func (form *PdfAcroForm) AllFields() []*PdfField { + fields := []*PdfField{} + if form.Fields != nil { + for _, field := range *form.Fields { + fields = append(fields, flattenFields(field)...) + } + } + return fields +} + // newPdfAcroFormFromDict is used when loading forms from PDF files. func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcroForm, error) { acroForm := NewPdfAcroForm() @@ -154,13 +174,13 @@ func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcr // GetContainingPdfObject returns the container of the PdfAcroForm (indirect object). func (this *PdfAcroForm) GetContainingPdfObject() core.PdfObject { - return this.primitive + return this.container } // ToPdfObject converts PdfAcroForm to a PdfObject, i.e. an indirect object containing the // AcroForm dictionary. func (this *PdfAcroForm) ToPdfObject() core.PdfObject { - container := this.primitive + container := this.container dict := container.PdfObject.(*core.PdfObjectDictionary) if this.Fields != nil { @@ -176,7 +196,6 @@ func (this *PdfAcroForm) ToPdfObject() core.PdfObject { } if this.SigFlags != nil { dict.Set("SigFlags", this.SigFlags) - } if this.CO != nil { dict.Set("CO", this.CO) @@ -196,5 +215,3 @@ func (this *PdfAcroForm) ToPdfObject() core.PdfObject { return container } - - diff --git a/pdf/model/reader.go b/pdf/model/reader.go index 6482c30e..a34630a5 100644 --- a/pdf/model/reader.go +++ b/pdf/model/reader.go @@ -695,7 +695,8 @@ func (this *PdfReader) traverseObjectData(o PdfObject) error { return nil } -// Get a page by the page number. Indirect object with type /Page. +// GetPageAsIndirectObject returns the indirect object representing a page fro a given page number. +// Indirect object with type /Page. func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error) { if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { return nil, fmt.Errorf("File needs to be decrypted first") @@ -706,6 +707,7 @@ func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error page := this.pageList[pageNumber-1] // Look up all references related to page and load everything. + // XXX/TODO: Use of traverse object data will be limited when lazy-loading is supported. err := this.traverseObjectData(page) if err != nil { return nil, err @@ -716,6 +718,20 @@ func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error return page, nil } +// PageFromIndirectObject returns the PdfPage and page number for a given indirect object. +func (r *PdfReader) PageFromIndirectObject(ind *PdfIndirectObject) (*PdfPage, int, error) { + if len(r.PageList) != len(r.pageList) { + return nil, 0, errors.New("page list invalid") + } + + for i, pageind := range r.pageList { + if pageind == ind { + return r.PageList[i], i + 1, nil + } + } + return nil, 0, errors.New("Page not found") +} + // Get a page by the page number. // Returns the PdfPage entry. func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) {