From d65df0b7b3b76e3c573f339bfc357fdc46c4b6c8 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Tue, 2 Oct 2018 20:02:27 +0000 Subject: [PATCH] Make FDF form field value provider implementation. Remove unnecessary exports. --- pdf/fdf/doc.go | 7 +++ pdf/fdf/fielddata.go | 99 +++++++++++++++++++++++++++++++++++++++ pdf/fdf/fielddata_test.go | 47 +++++++++++++++++++ pdf/fdf/io.go | 8 ++-- pdf/fdf/parser.go | 56 +++++++++++----------- pdf/fdf/parser_test.go | 12 ++--- 6 files changed, 191 insertions(+), 38 deletions(-) create mode 100644 pdf/fdf/doc.go create mode 100644 pdf/fdf/fielddata.go create mode 100644 pdf/fdf/fielddata_test.go diff --git a/pdf/fdf/doc.go b/pdf/fdf/doc.go new file mode 100644 index 00000000..b4d6b325 --- /dev/null +++ b/pdf/fdf/doc.go @@ -0,0 +1,7 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +// Package fdf provides support for loading form field data from Form Field Data (FDF) files. +package fdf diff --git a/pdf/fdf/fielddata.go b/pdf/fdf/fielddata.go new file mode 100644 index 00000000..da7c9e61 --- /dev/null +++ b/pdf/fdf/fielddata.go @@ -0,0 +1,99 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package fdf + +import ( + "errors" + "io" + "os" + "sort" + + "github.com/unidoc/unidoc/pdf/core" +) + +// FDFData represents forms data format (FDF) file data. +type FDFData struct { + root *core.PdfObjectDictionary + fields *core.PdfObjectArray +} + +// Load loads FDF form data from `r`. +func Load(r io.ReadSeeker) (*FDFData, error) { + p, err := newParser(r) + if err != nil { + return nil, err + } + + fdf := &FDFData{} + + fdfDict, err := p.Root() + if err != nil { + return nil, err + } + fdf.root = fdfDict + + fields, found := core.GetArray(fdfDict.Get("Fields")) + if !found { + return nil, errors.New("Fields missing") + } + fdf.fields = fields + + return fdf, nil +} + +// LoadFromPath loads FDF form data from file path `fdfPath`. +func LoadFromPath(fdfPath string) (*FDFData, error) { + f, err := os.Open(fdfPath) + if err != nil { + return nil, err + } + defer f.Close() + + return Load(f) +} + +// FieldDictionaries returns a map of field names to field dictionaries. +func (fdf *FDFData) FieldDictionaries() (map[string]*core.PdfObjectDictionary, error) { + fieldDataMap := map[string]*core.PdfObjectDictionary{} + + for i := 0; i < fdf.fields.Len(); i++ { + fieldDict, has := core.GetDict(fdf.fields.Get(i)) + if has { + // Key value field data. + t, _ := core.GetString(fieldDict.Get("T")) + if t != nil { + fieldDataMap[t.Str()] = fieldDict + } + } + } + + return fieldDataMap, nil +} + +// FieldValues implements interface model.FieldValueProvider. +// Returns a map of field names to values (PdfObjects). +func (fdf *FDFData) FieldValues() (map[string]core.PdfObject, error) { + fieldDictMap, err := fdf.FieldDictionaries() + if err != nil { + return nil, err + } + + keys := []string{} + for fieldName := range fieldDictMap { + keys = append(keys, fieldName) + } + sort.Strings(keys) + + fieldValMap := map[string]core.PdfObject{} + for _, fieldName := range keys { + fieldDict := fieldDictMap[fieldName] + val := core.TraceToDirectObject(fieldDict.Get("V")) + fieldValMap[fieldName] = val + + } + + return fieldValMap, nil +} diff --git a/pdf/fdf/fielddata_test.go b/pdf/fdf/fielddata_test.go new file mode 100644 index 00000000..dcb4227f --- /dev/null +++ b/pdf/fdf/fielddata_test.go @@ -0,0 +1,47 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package fdf + +import ( + "bytes" + "testing" +) + +func TestFDFDataLoading(t *testing.T) { + r := bytes.NewReader([]byte(fdfExample1)) + + fdfData, err := Load(r) + if err != nil { + t.Fatalf("Error: %v", err) + } + + fvalMap, err := fdfData.FieldValues() + if err != nil { + t.Fatalf("Error: %v", err) + } + + expectedVals := []struct { + Name string + Val string + }{ + {"Field1", "Test1"}, + {"Field2", "Test2"}, + } + + if len(fvalMap) != len(expectedVals) { + t.Fatalf("len(fvalMap) != %d (got %d)", len(expectedVals), len(fvalMap)) + } + + for _, exp := range expectedVals { + val, has := fvalMap[exp.Name] + if !has { + t.Fatalf("%s missing from map", exp.Name) + } + if val.String() != exp.Val { + t.Fatalf("val.String() != %s (got %s)", exp.Val, val.String()) + } + } +} diff --git a/pdf/fdf/io.go b/pdf/fdf/io.go index b300d0ed..c7313bb2 100644 --- a/pdf/fdf/io.go +++ b/pdf/fdf/io.go @@ -8,15 +8,15 @@ package fdf import ( "bufio" "errors" + "io" "os" "github.com/unidoc/unidoc/common" - "io" ) // readAtLeast reads at least n bytes into slice p. // Returns the number of bytes read (should always be == n), and an error on failure. -func (parser *FdfParser) readAtLeast(p []byte, n int) (int, error) { +func (parser *fdfParser) readAtLeast(p []byte, n int) (int, error) { remaining := n start := 0 numRounds := 0 @@ -34,14 +34,14 @@ func (parser *FdfParser) readAtLeast(p []byte, n int) (int, error) { } // getFileOffset returns the current file offset, accounting for buffered position. -func (parser *FdfParser) getFileOffset() int64 { +func (parser *fdfParser) getFileOffset() int64 { offset, _ := parser.rs.Seek(0, os.SEEK_CUR) offset -= int64(parser.reader.Buffered()) return offset } // setFileOffset seeks the file to an offset position. -func (parser *FdfParser) setFileOffset(offset int64) { +func (parser *fdfParser) setFileOffset(offset int64) { parser.rs.Seek(offset, io.SeekStart) parser.reader = bufio.NewReader(parser.rs) } diff --git a/pdf/fdf/parser.go b/pdf/fdf/parser.go index 92fbc520..a61ba96a 100644 --- a/pdf/fdf/parser.go +++ b/pdf/fdf/parser.go @@ -31,8 +31,8 @@ var reReference = regexp.MustCompile(`^\s*(\d+)\s+(\d+)\s+R`) var reIndirectObject = regexp.MustCompile(`(\d+)\s+(\d+)\s+obj`) var reTrailer = regexp.MustCompile(`trailer`) -// FdfParser parses a FDF file and provides access to the object structure of the FDF. -type FdfParser struct { +// fdfParser parses a FDF file and provides access to the object structure of the FDF. +type fdfParser struct { majorVersion int minorVersion int @@ -46,7 +46,7 @@ type FdfParser struct { } // Skip over any spaces. -func (parser *FdfParser) skipSpaces() (int, error) { +func (parser *fdfParser) skipSpaces() (int, error) { cnt := 0 for { b, err := parser.reader.ReadByte() @@ -65,7 +65,7 @@ func (parser *FdfParser) skipSpaces() (int, error) { } // Skip over comments and spaces. Can handle multi-line comments. -func (parser *FdfParser) skipComments() error { +func (parser *fdfParser) skipComments() error { if _, err := parser.skipSpaces(); err != nil { return err } @@ -95,7 +95,7 @@ func (parser *FdfParser) skipComments() error { } // Read a comment starting with '%'. -func (parser *FdfParser) readComment() (string, error) { +func (parser *fdfParser) readComment() (string, error) { var r bytes.Buffer _, err := parser.skipSpaces() @@ -126,7 +126,7 @@ func (parser *FdfParser) readComment() (string, error) { } // Read a single line of text from current position. -func (parser *FdfParser) readTextLine() (string, error) { +func (parser *fdfParser) readTextLine() (string, error) { var r bytes.Buffer for { bb, err := parser.reader.Peek(1) @@ -145,7 +145,7 @@ func (parser *FdfParser) readTextLine() (string, error) { } // Parse a name starting with '/'. -func (parser *FdfParser) parseName() (core.PdfObjectName, error) { +func (parser *fdfParser) parseName() (core.PdfObjectName, error) { var r bytes.Buffer nameStarted := false for { @@ -215,7 +215,7 @@ func (parser *FdfParser) parseName() (core.PdfObjectName, error) { // Nonetheless, we sometimes get numbers with exponential format, so // we will support it in the reader (no confusion with other types, so // no compromise). -func (parser *FdfParser) parseNumber() (core.PdfObject, error) { +func (parser *fdfParser) parseNumber() (core.PdfObject, error) { isFloat := false allowSigns := true var r bytes.Buffer @@ -267,7 +267,7 @@ func (parser *FdfParser) parseNumber() (core.PdfObject, error) { } // A string starts with '(' and ends with ')'. -func (parser *FdfParser) parseString() (*core.PdfObjectString, error) { +func (parser *fdfParser) parseString() (*core.PdfObjectString, error) { parser.reader.ReadByte() var r bytes.Buffer @@ -351,7 +351,7 @@ func (parser *FdfParser) parseString() (*core.PdfObjectString, error) { // Starts with '<' ends with '>'. // Currently not converting the hex codes to characters. -func (parser *FdfParser) parseHexString() (*core.PdfObjectString, error) { +func (parser *fdfParser) parseHexString() (*core.PdfObjectString, error) { parser.reader.ReadByte() var r bytes.Buffer @@ -381,7 +381,7 @@ func (parser *FdfParser) parseHexString() (*core.PdfObjectString, error) { } // Starts with '[' ends with ']'. Can contain any kinds of direct objects. -func (parser *FdfParser) parseArray() (*core.PdfObjectArray, error) { +func (parser *fdfParser) parseArray() (*core.PdfObjectArray, error) { arr := core.MakeArray() parser.reader.ReadByte() @@ -410,7 +410,7 @@ func (parser *FdfParser) parseArray() (*core.PdfObjectArray, error) { } // Parse bool object. -func (parser *FdfParser) parseBool() (core.PdfObjectBool, error) { +func (parser *fdfParser) parseBool() (core.PdfObjectBool, error) { bb, err := parser.reader.Peek(4) if err != nil { return core.PdfObjectBool(false), err @@ -451,14 +451,14 @@ func parseReference(refStr string) (core.PdfObjectReference, error) { } // Parse null object. -func (parser *FdfParser) parseNull() (core.PdfObjectNull, error) { +func (parser *fdfParser) parseNull() (core.PdfObjectNull, error) { _, err := parser.reader.Discard(4) return core.PdfObjectNull{}, err } // Detect the signature at the current file position and parse // the corresponding object. -func (parser *FdfParser) parseObject() (core.PdfObject, error) { +func (parser *fdfParser) parseObject() (core.PdfObject, error) { common.Log.Trace("Read direct object") parser.skipSpaces() for { @@ -544,7 +544,7 @@ func (parser *FdfParser) parseObject() (core.PdfObject, error) { } // Reads and parses a FDF dictionary object enclosed with '<<' and '>>' -func (parser *FdfParser) parseDict() (*core.PdfObjectDictionary, error) { +func (parser *fdfParser) parseDict() (*core.PdfObjectDictionary, error) { common.Log.Trace("Reading FDF Dict!") dict := core.MakeDict() @@ -616,7 +616,7 @@ func (parser *FdfParser) parseDict() (*core.PdfObjectDictionary, error) { // Parse the FDF version from the beginning of the file. // Returns the major and minor parts of the version. // E.g. for "FDF-1.4" would return 1 and 4. -func (parser *FdfParser) parseFdfVersion() (int, int, error) { +func (parser *fdfParser) parseFdfVersion() (int, int, error) { parser.rs.Seek(0, os.SEEK_SET) var offset int64 = 20 b := make([]byte, offset) @@ -651,7 +651,7 @@ func (parser *FdfParser) parseFdfVersion() (int, int, error) { // Look for EOF marker and seek to its beginning. // Define an offset position from the end of the file. -func (parser *FdfParser) seekToEOFMarker(fSize int64) error { +func (parser *fdfParser) seekToEOFMarker(fSize int64) error { // Define the starting point (from the end of the file) to search from. var offset int64 = 0 @@ -693,7 +693,7 @@ func (parser *FdfParser) seekToEOFMarker(fSize int64) error { // Parse an indirect object from the input stream. Can also be an object stream. // Returns the indirect object (*PdfIndirectObject) or the stream object (*PdfObjectStream). -func (parser *FdfParser) parseIndirectObject() (core.PdfObject, error) { +func (parser *fdfParser) parseIndirectObject() (core.PdfObject, error) { indirect := core.PdfIndirectObject{} common.Log.Trace("-Read indirect obj") @@ -840,10 +840,10 @@ func (parser *FdfParser) parseIndirectObject() (core.PdfObject, error) { return &indirect, nil } -// NewParserFromString parses an FDF from a string. +// newParserFromString parses an FDF from a string. // Useful for testing purposes. -func NewParserFromString(txt string) (*FdfParser, error) { - parser := FdfParser{} +func newParserFromString(txt string) (*fdfParser, error) { + parser := fdfParser{} buf := []byte(txt) bufReader := bytes.NewReader(buf) @@ -859,7 +859,7 @@ func NewParserFromString(txt string) (*FdfParser, error) { } // Root returns the Root of the FDF document. -func (parser *FdfParser) Root() (*core.PdfObjectDictionary, error) { +func (parser *fdfParser) Root() (*core.PdfObjectDictionary, error) { if parser.trailerDict != nil { if rootDict, ok := parser.trace(parser.trailerDict.Get("Root")).(*core.PdfObjectDictionary); ok { if fdfDict, ok := parser.trace(rootDict.Get("FDF")).(*core.PdfObjectDictionary); ok { @@ -886,10 +886,10 @@ func (parser *FdfParser) Root() (*core.PdfObjectDictionary, error) { return nil, errors.New("FDF not found") } -// NewParser creates a new parser for a FDF file via ReadSeeker. Loads the cross reference stream and trailer. +// newParser creates a new parser for a FDF file via ReadSeeker. Loads the cross reference stream and trailer. // An error is returned on failure. -func NewParser(rs io.ReadSeeker) (*FdfParser, error) { - parser := &FdfParser{} +func newParser(rs io.ReadSeeker) (*fdfParser, error) { + parser := &fdfParser{} parser.rs = rs parser.objCache = map[int64]core.PdfObject{} @@ -911,7 +911,7 @@ func NewParser(rs io.ReadSeeker) (*FdfParser, error) { } // trace resolves a PdfObject to direct object, looking up and resolving references as needed. -func (parser *FdfParser) trace(obj core.PdfObject) core.PdfObject { +func (parser *fdfParser) trace(obj core.PdfObject) core.PdfObject { switch t := obj.(type) { case *core.PdfObjectReference: indObj, ok := parser.objCache[t.ObjectNumber].(*core.PdfIndirectObject) @@ -929,7 +929,7 @@ func (parser *FdfParser) trace(obj core.PdfObject) core.PdfObject { } // parse runs through the file and parses indirect objects and loads into cache. -func (parser *FdfParser) parse() error { +func (parser *fdfParser) parse() error { // Go to beginning, reset reader. parser.rs.Seek(0, io.SeekStart) parser.reader = bufio.NewReader(parser.rs) @@ -981,7 +981,7 @@ func (parser *FdfParser) parse() error { // Called when Fdf version not found normally. Looks for the PDF version by scanning top-down. // %FDF-1.4 -func (parser *FdfParser) seekFdfVersionTopDown() (int, int, error) { +func (parser *fdfParser) seekFdfVersionTopDown() (int, int, error) { // Go to beginning, reset reader. parser.rs.Seek(0, os.SEEK_SET) parser.reader = bufio.NewReader(parser.rs) diff --git a/pdf/fdf/parser_test.go b/pdf/fdf/parser_test.go index e7613f94..96cb8a0c 100644 --- a/pdf/fdf/parser_test.go +++ b/pdf/fdf/parser_test.go @@ -1,10 +1,14 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + package fdf import ( "fmt" "testing" - "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" ) @@ -19,12 +23,8 @@ trailer %%EOF ` -func init() { - common.SetLogger(common.ConsoleLogger{LogLevel: common.LogLevelTrace}) -} - func TestFdfExample1(t *testing.T) { - fdfDoc, err := NewParserFromString(fdfExample1) + fdfDoc, err := newParserFromString(fdfExample1) if err != nil { t.Errorf("Error: %v", err) return