diff --git a/pdf/forms.go b/pdf/forms.go index 47a8a8d2..d82c82ba 100644 --- a/pdf/forms.go +++ b/pdf/forms.go @@ -19,7 +19,7 @@ import ( ) type PdfAcroForm struct { - Fields []*PdfField + Fields *[]*PdfField NeedAppearances PdfObject SigFlags PdfObject CO PdfObject @@ -29,24 +29,149 @@ type PdfAcroForm struct { XFA PdfObject } -type PdfWidgetAnnotation { - Subtype PdfObject - H PdfObject - MK PdfObject - A PdfObject - AA PdfObject - BS PdfObject - Parent *PdfIndirectObject // Max 1 parent; Gets tricky for both form and annotation refs? Seems to usually refer to the page one. +func (r *PdfReader) newPdfAcroFormFromDict(d PdfObjectDictionary) (*PdfAcroForm, error) { + acroForm := PdfAcroForm{} + + if obj, has := d["Fields"]; has { + obj, err := r.traceToObject(obj) + if err != nil { + return nil, err + } + fieldArray, ok := TraceToDirectObject(obj).(*PdfObjectArray) + if !ok { + return nil, fmt.Errorf("Fields not an array (%T)", obj) + } + + fields := []*PdfField{} + for _, obj := range fieldArray { + obj, err := r.traceToObject(obj) + if err != nil { + return nil, err + } + fDict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary) + if !ok { + return nil, fmt.Errorf("Invalid Fields entry: %T", obj) + } + field := newPdfFieldFromDict(fDict) + fields = append(fields, field) + } + acroForm.Fields = &fields + } + + if obj, has := d["NeedAppearances"]; has { + acroForm.NeedAppearances = obj + } + if obj, has := d["SigFlags"]; has { + acroForm.SigFlags = obj + } + if obj, has := d["CO"]; has { + acroForm.CO = obj + } + if obj, has := d["DR"]; has { + acroForm.DR = obj + } + if obj, has := d["DA"]; has { + acroForm.DA = obj + } + if obj, has := d["Q"]; has { + acroForm.Q = obj + } + if obj, has := d["XFA"]; has { + acroForm.XFA = obj + } + + return &acroForm, nil +} + +func (this *PdfAcroForm) ToPdfObject() PdfObject { + +} + +func (r *PdfReader) newPdfFieldDict(d PdfObjectDictionary) (*PdfField, error) { + field := PdfField{} + + // Field type (required in terminal fields). + // Can be /Btn /Tx /Ch /Sig + // Required for a terminal field (inheritable). + if obj, has := d["FT"]; has { + obj, err = r.traceToObject(obj) + if err != nil { + return nil, err + } + name, ok := obj.(*PdfObjectName) + if !ok { + return nil, fmt.Errorf("Invalid type of FT field (%T)", obj) + } + + acroForm.FT = name + } + + // In a non-terminal field, the Kids array shall refer to field dictionaries that are immediate descendants of this field. + // In a terminal field, the Kids array ordinarily shall refer to one or more separate widget annotations that are associated + // with this field. However, if there is only one associated widget annotation, and its contents have been merged into the field + // dictionary, Kids shall be omitted. + + // Terminal field if: + // 1. Kids pointing to widget annotations (Kids[0].(*PdfField) casting fails) + // 2. Kids empty or missing and a the dict has a Subtype equivalent to "Widget" + + if obj, has := d["Parent"]; has { + field.Parent = obj + } + if obj, has := d["Kids"]; has { + field.Kids = obj + } + + // Partial field name (Optional) + if obj, has := d["T"]; has { + field.T = obj + } + // Alternate description (Optional) + if obj, has := d["TU"]; has { + field.TU = obj + } + // Mapping name (Optional) + if obj, has := d["TM"]; has { + field.TM = obj + } + // Field flag. (Optional; inheritable) + if obj, has := d["Ff"]; has { + field.Ff = obj + } + // Value (Optional; inheritable) - Various types depending on the field type. + if obj, has := d["V"]; has { + field.V = obj + } + // Default value for reset (Optional; inheritable) + if obj, has := d["DV"]; has { + field.DV = obj + } + // Additional actions dictionary (Optional) + if obj, has := d["AA"]; has { + field.AA = obj + } + + return &field, nil +} + +type PdfAnnotationWidget struct { + Subtype PdfObject // Widget (required) + H PdfObject + MK PdfObject + A PdfObject + AA PdfObject + BS PdfObject + Parent *PdfIndirectObject // Max 1 parent; Gets tricky for both form and annotation refs? Seems to usually refer to the page one. } type PdfField struct { + FT *PdfObjectName // field type Parent *PdfField // In a non-terminal field, the Kids array shall refer to field dictionaries that are immediate descendants of this field. // In a terminal field, the Kids array ordinarily shall refer to one or more separate widget annotations that are associated // with this field. However, if there is only one associated widget annotation, and its contents have been merged into the field // dictionary, Kids shall be omitted. - Kids []*PdfField - FT *PdfObjectString // field type + Kids PdfObject T PdfObject TU PdfObject TM PdfObject @@ -55,47 +180,38 @@ type PdfField struct { DV PdfObject AA PdfObject // Widget annotation can be merged in. + // PdfAnnotationWidget +} - // Variable text fields. +func (this *PdfField) ToPdfObject() { + // If Kids refer only to a single pdf widget annotation widget, then can merge it in. +} + +type PdfFieldVariableText struct { + PdfField DA PdfObject Q PdfObject DS PdfObject RV PdfObject +} + +type PdfFieldText struct { + PdfField // Text field MaxLen PdfObject // inheritable +} + +type PdfFieldChoice struct { + PdfField // Choice fields. Opt PdfObject TI PdfObject I PdfObject +} + +type PdfFieldSignature struct { + PdfField // Signature fields (Table 232). Lock PdfObject SV PdfObject - // Signature field lock dict (Table 233). - Type PdfObject // SigFieldLock - Action *PdfObjectName - Fields PdfObject - // Signature field seed value dictionary (Table 234) - //Type //SV - Ff - Filter - SubFilter - DigestMethod - V - Cert - Reasons - MDP - TimeStamp - LegalAttestation - AddRevInfo - // Certificate seed value dictionary (Table 235). - // Type //SVCert - // Ff - Subject - SubjectDN - KeyUsage - Issuer - OID - URL - URLType - // S } diff --git a/pdf/reader.go b/pdf/reader.go index b153c249..831f136e 100644 --- a/pdf/reader.go +++ b/pdf/reader.go @@ -393,6 +393,40 @@ func (this *PdfReader) GetForms() (*PdfObjectDictionary, error) { return formsDict, nil } +// XXX: Under construction. +func (this *PdfReader) LoadForms() error { + if this.parser.crypter != nil && !this.parser.crypter.authenticated { + return nil, fmt.Errorf("File need to be decrypted first") + } + + // Has forms? + catalog := this.catalog + obj, has := (*catalog)["AcroForm"] + if !has { + // Nothing to load. + return nil + } + var err error + obj, err = this.traceToObject(obj) + if err != nil { + return err + } + formsDict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary) + if !ok { + common.Log.Debug("Invalid AcroForm entry %T", obj) + common.Log.Debug("Does not have forms") + return nil + } + common.Log.Debug("Has Acro forms") + // Load it. + +} + +// Recursive build form field tree. +func (this *PdfReader) buildFieldTree(obj PdfObject) (*PdfOutlineTreeNode, error) { + // Describe how to do this first by hand. +} + func (this *PdfReader) lookupPageByObject(obj PdfObject) (*PdfPage, error) { // can be indirect, direct, or reference // look up the corresponding page