Make FDF form field value provider implementation. Remove unnecessary exports.

This commit is contained in:
Gunnsteinn Hall 2018-10-02 20:02:27 +00:00
parent d9ae3d6d38
commit d65df0b7b3
6 changed files with 191 additions and 38 deletions

7
pdf/fdf/doc.go Normal file
View File

@ -0,0 +1,7 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
// Package fdf provides support for loading form field data from Form Field Data (FDF) files.
package fdf

99
pdf/fdf/fielddata.go Normal file
View File

@ -0,0 +1,99 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package fdf
import (
"errors"
"io"
"os"
"sort"
"github.com/unidoc/unidoc/pdf/core"
)
// FDFData represents forms data format (FDF) file data.
type FDFData struct {
root *core.PdfObjectDictionary
fields *core.PdfObjectArray
}
// Load loads FDF form data from `r`.
func Load(r io.ReadSeeker) (*FDFData, error) {
p, err := newParser(r)
if err != nil {
return nil, err
}
fdf := &FDFData{}
fdfDict, err := p.Root()
if err != nil {
return nil, err
}
fdf.root = fdfDict
fields, found := core.GetArray(fdfDict.Get("Fields"))
if !found {
return nil, errors.New("Fields missing")
}
fdf.fields = fields
return fdf, nil
}
// LoadFromPath loads FDF form data from file path `fdfPath`.
func LoadFromPath(fdfPath string) (*FDFData, error) {
f, err := os.Open(fdfPath)
if err != nil {
return nil, err
}
defer f.Close()
return Load(f)
}
// FieldDictionaries returns a map of field names to field dictionaries.
func (fdf *FDFData) FieldDictionaries() (map[string]*core.PdfObjectDictionary, error) {
fieldDataMap := map[string]*core.PdfObjectDictionary{}
for i := 0; i < fdf.fields.Len(); i++ {
fieldDict, has := core.GetDict(fdf.fields.Get(i))
if has {
// Key value field data.
t, _ := core.GetString(fieldDict.Get("T"))
if t != nil {
fieldDataMap[t.Str()] = fieldDict
}
}
}
return fieldDataMap, nil
}
// FieldValues implements interface model.FieldValueProvider.
// Returns a map of field names to values (PdfObjects).
func (fdf *FDFData) FieldValues() (map[string]core.PdfObject, error) {
fieldDictMap, err := fdf.FieldDictionaries()
if err != nil {
return nil, err
}
keys := []string{}
for fieldName := range fieldDictMap {
keys = append(keys, fieldName)
}
sort.Strings(keys)
fieldValMap := map[string]core.PdfObject{}
for _, fieldName := range keys {
fieldDict := fieldDictMap[fieldName]
val := core.TraceToDirectObject(fieldDict.Get("V"))
fieldValMap[fieldName] = val
}
return fieldValMap, nil
}

47
pdf/fdf/fielddata_test.go Normal file
View File

@ -0,0 +1,47 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package fdf
import (
"bytes"
"testing"
)
func TestFDFDataLoading(t *testing.T) {
r := bytes.NewReader([]byte(fdfExample1))
fdfData, err := Load(r)
if err != nil {
t.Fatalf("Error: %v", err)
}
fvalMap, err := fdfData.FieldValues()
if err != nil {
t.Fatalf("Error: %v", err)
}
expectedVals := []struct {
Name string
Val string
}{
{"Field1", "Test1"},
{"Field2", "Test2"},
}
if len(fvalMap) != len(expectedVals) {
t.Fatalf("len(fvalMap) != %d (got %d)", len(expectedVals), len(fvalMap))
}
for _, exp := range expectedVals {
val, has := fvalMap[exp.Name]
if !has {
t.Fatalf("%s missing from map", exp.Name)
}
if val.String() != exp.Val {
t.Fatalf("val.String() != %s (got %s)", exp.Val, val.String())
}
}
}

View File

@ -8,15 +8,15 @@ package fdf
import ( import (
"bufio" "bufio"
"errors" "errors"
"io"
"os" "os"
"github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common"
"io"
) )
// readAtLeast reads at least n bytes into slice p. // readAtLeast reads at least n bytes into slice p.
// Returns the number of bytes read (should always be == n), and an error on failure. // Returns the number of bytes read (should always be == n), and an error on failure.
func (parser *FdfParser) readAtLeast(p []byte, n int) (int, error) { func (parser *fdfParser) readAtLeast(p []byte, n int) (int, error) {
remaining := n remaining := n
start := 0 start := 0
numRounds := 0 numRounds := 0
@ -34,14 +34,14 @@ func (parser *FdfParser) readAtLeast(p []byte, n int) (int, error) {
} }
// getFileOffset returns the current file offset, accounting for buffered position. // getFileOffset returns the current file offset, accounting for buffered position.
func (parser *FdfParser) getFileOffset() int64 { func (parser *fdfParser) getFileOffset() int64 {
offset, _ := parser.rs.Seek(0, os.SEEK_CUR) offset, _ := parser.rs.Seek(0, os.SEEK_CUR)
offset -= int64(parser.reader.Buffered()) offset -= int64(parser.reader.Buffered())
return offset return offset
} }
// setFileOffset seeks the file to an offset position. // setFileOffset seeks the file to an offset position.
func (parser *FdfParser) setFileOffset(offset int64) { func (parser *fdfParser) setFileOffset(offset int64) {
parser.rs.Seek(offset, io.SeekStart) parser.rs.Seek(offset, io.SeekStart)
parser.reader = bufio.NewReader(parser.rs) parser.reader = bufio.NewReader(parser.rs)
} }

View File

@ -31,8 +31,8 @@ var reReference = regexp.MustCompile(`^\s*(\d+)\s+(\d+)\s+R`)
var reIndirectObject = regexp.MustCompile(`(\d+)\s+(\d+)\s+obj`) var reIndirectObject = regexp.MustCompile(`(\d+)\s+(\d+)\s+obj`)
var reTrailer = regexp.MustCompile(`trailer`) var reTrailer = regexp.MustCompile(`trailer`)
// FdfParser parses a FDF file and provides access to the object structure of the FDF. // fdfParser parses a FDF file and provides access to the object structure of the FDF.
type FdfParser struct { type fdfParser struct {
majorVersion int majorVersion int
minorVersion int minorVersion int
@ -46,7 +46,7 @@ type FdfParser struct {
} }
// Skip over any spaces. // Skip over any spaces.
func (parser *FdfParser) skipSpaces() (int, error) { func (parser *fdfParser) skipSpaces() (int, error) {
cnt := 0 cnt := 0
for { for {
b, err := parser.reader.ReadByte() b, err := parser.reader.ReadByte()
@ -65,7 +65,7 @@ func (parser *FdfParser) skipSpaces() (int, error) {
} }
// Skip over comments and spaces. Can handle multi-line comments. // Skip over comments and spaces. Can handle multi-line comments.
func (parser *FdfParser) skipComments() error { func (parser *fdfParser) skipComments() error {
if _, err := parser.skipSpaces(); err != nil { if _, err := parser.skipSpaces(); err != nil {
return err return err
} }
@ -95,7 +95,7 @@ func (parser *FdfParser) skipComments() error {
} }
// Read a comment starting with '%'. // Read a comment starting with '%'.
func (parser *FdfParser) readComment() (string, error) { func (parser *fdfParser) readComment() (string, error) {
var r bytes.Buffer var r bytes.Buffer
_, err := parser.skipSpaces() _, err := parser.skipSpaces()
@ -126,7 +126,7 @@ func (parser *FdfParser) readComment() (string, error) {
} }
// Read a single line of text from current position. // Read a single line of text from current position.
func (parser *FdfParser) readTextLine() (string, error) { func (parser *fdfParser) readTextLine() (string, error) {
var r bytes.Buffer var r bytes.Buffer
for { for {
bb, err := parser.reader.Peek(1) bb, err := parser.reader.Peek(1)
@ -145,7 +145,7 @@ func (parser *FdfParser) readTextLine() (string, error) {
} }
// Parse a name starting with '/'. // Parse a name starting with '/'.
func (parser *FdfParser) parseName() (core.PdfObjectName, error) { func (parser *fdfParser) parseName() (core.PdfObjectName, error) {
var r bytes.Buffer var r bytes.Buffer
nameStarted := false nameStarted := false
for { for {
@ -215,7 +215,7 @@ func (parser *FdfParser) parseName() (core.PdfObjectName, error) {
// Nonetheless, we sometimes get numbers with exponential format, so // Nonetheless, we sometimes get numbers with exponential format, so
// we will support it in the reader (no confusion with other types, so // we will support it in the reader (no confusion with other types, so
// no compromise). // no compromise).
func (parser *FdfParser) parseNumber() (core.PdfObject, error) { func (parser *fdfParser) parseNumber() (core.PdfObject, error) {
isFloat := false isFloat := false
allowSigns := true allowSigns := true
var r bytes.Buffer var r bytes.Buffer
@ -267,7 +267,7 @@ func (parser *FdfParser) parseNumber() (core.PdfObject, error) {
} }
// A string starts with '(' and ends with ')'. // A string starts with '(' and ends with ')'.
func (parser *FdfParser) parseString() (*core.PdfObjectString, error) { func (parser *fdfParser) parseString() (*core.PdfObjectString, error) {
parser.reader.ReadByte() parser.reader.ReadByte()
var r bytes.Buffer var r bytes.Buffer
@ -351,7 +351,7 @@ func (parser *FdfParser) parseString() (*core.PdfObjectString, error) {
// Starts with '<' ends with '>'. // Starts with '<' ends with '>'.
// Currently not converting the hex codes to characters. // Currently not converting the hex codes to characters.
func (parser *FdfParser) parseHexString() (*core.PdfObjectString, error) { func (parser *fdfParser) parseHexString() (*core.PdfObjectString, error) {
parser.reader.ReadByte() parser.reader.ReadByte()
var r bytes.Buffer var r bytes.Buffer
@ -381,7 +381,7 @@ func (parser *FdfParser) parseHexString() (*core.PdfObjectString, error) {
} }
// Starts with '[' ends with ']'. Can contain any kinds of direct objects. // Starts with '[' ends with ']'. Can contain any kinds of direct objects.
func (parser *FdfParser) parseArray() (*core.PdfObjectArray, error) { func (parser *fdfParser) parseArray() (*core.PdfObjectArray, error) {
arr := core.MakeArray() arr := core.MakeArray()
parser.reader.ReadByte() parser.reader.ReadByte()
@ -410,7 +410,7 @@ func (parser *FdfParser) parseArray() (*core.PdfObjectArray, error) {
} }
// Parse bool object. // Parse bool object.
func (parser *FdfParser) parseBool() (core.PdfObjectBool, error) { func (parser *fdfParser) parseBool() (core.PdfObjectBool, error) {
bb, err := parser.reader.Peek(4) bb, err := parser.reader.Peek(4)
if err != nil { if err != nil {
return core.PdfObjectBool(false), err return core.PdfObjectBool(false), err
@ -451,14 +451,14 @@ func parseReference(refStr string) (core.PdfObjectReference, error) {
} }
// Parse null object. // Parse null object.
func (parser *FdfParser) parseNull() (core.PdfObjectNull, error) { func (parser *fdfParser) parseNull() (core.PdfObjectNull, error) {
_, err := parser.reader.Discard(4) _, err := parser.reader.Discard(4)
return core.PdfObjectNull{}, err return core.PdfObjectNull{}, err
} }
// Detect the signature at the current file position and parse // Detect the signature at the current file position and parse
// the corresponding object. // the corresponding object.
func (parser *FdfParser) parseObject() (core.PdfObject, error) { func (parser *fdfParser) parseObject() (core.PdfObject, error) {
common.Log.Trace("Read direct object") common.Log.Trace("Read direct object")
parser.skipSpaces() parser.skipSpaces()
for { for {
@ -544,7 +544,7 @@ func (parser *FdfParser) parseObject() (core.PdfObject, error) {
} }
// Reads and parses a FDF dictionary object enclosed with '<<' and '>>' // Reads and parses a FDF dictionary object enclosed with '<<' and '>>'
func (parser *FdfParser) parseDict() (*core.PdfObjectDictionary, error) { func (parser *fdfParser) parseDict() (*core.PdfObjectDictionary, error) {
common.Log.Trace("Reading FDF Dict!") common.Log.Trace("Reading FDF Dict!")
dict := core.MakeDict() dict := core.MakeDict()
@ -616,7 +616,7 @@ func (parser *FdfParser) parseDict() (*core.PdfObjectDictionary, error) {
// Parse the FDF version from the beginning of the file. // Parse the FDF version from the beginning of the file.
// Returns the major and minor parts of the version. // Returns the major and minor parts of the version.
// E.g. for "FDF-1.4" would return 1 and 4. // E.g. for "FDF-1.4" would return 1 and 4.
func (parser *FdfParser) parseFdfVersion() (int, int, error) { func (parser *fdfParser) parseFdfVersion() (int, int, error) {
parser.rs.Seek(0, os.SEEK_SET) parser.rs.Seek(0, os.SEEK_SET)
var offset int64 = 20 var offset int64 = 20
b := make([]byte, offset) b := make([]byte, offset)
@ -651,7 +651,7 @@ func (parser *FdfParser) parseFdfVersion() (int, int, error) {
// Look for EOF marker and seek to its beginning. // Look for EOF marker and seek to its beginning.
// Define an offset position from the end of the file. // Define an offset position from the end of the file.
func (parser *FdfParser) seekToEOFMarker(fSize int64) error { func (parser *fdfParser) seekToEOFMarker(fSize int64) error {
// Define the starting point (from the end of the file) to search from. // Define the starting point (from the end of the file) to search from.
var offset int64 = 0 var offset int64 = 0
@ -693,7 +693,7 @@ func (parser *FdfParser) seekToEOFMarker(fSize int64) error {
// Parse an indirect object from the input stream. Can also be an object stream. // Parse an indirect object from the input stream. Can also be an object stream.
// Returns the indirect object (*PdfIndirectObject) or the stream object (*PdfObjectStream). // Returns the indirect object (*PdfIndirectObject) or the stream object (*PdfObjectStream).
func (parser *FdfParser) parseIndirectObject() (core.PdfObject, error) { func (parser *fdfParser) parseIndirectObject() (core.PdfObject, error) {
indirect := core.PdfIndirectObject{} indirect := core.PdfIndirectObject{}
common.Log.Trace("-Read indirect obj") common.Log.Trace("-Read indirect obj")
@ -840,10 +840,10 @@ func (parser *FdfParser) parseIndirectObject() (core.PdfObject, error) {
return &indirect, nil return &indirect, nil
} }
// NewParserFromString parses an FDF from a string. // newParserFromString parses an FDF from a string.
// Useful for testing purposes. // Useful for testing purposes.
func NewParserFromString(txt string) (*FdfParser, error) { func newParserFromString(txt string) (*fdfParser, error) {
parser := FdfParser{} parser := fdfParser{}
buf := []byte(txt) buf := []byte(txt)
bufReader := bytes.NewReader(buf) bufReader := bytes.NewReader(buf)
@ -859,7 +859,7 @@ func NewParserFromString(txt string) (*FdfParser, error) {
} }
// Root returns the Root of the FDF document. // Root returns the Root of the FDF document.
func (parser *FdfParser) Root() (*core.PdfObjectDictionary, error) { func (parser *fdfParser) Root() (*core.PdfObjectDictionary, error) {
if parser.trailerDict != nil { if parser.trailerDict != nil {
if rootDict, ok := parser.trace(parser.trailerDict.Get("Root")).(*core.PdfObjectDictionary); ok { if rootDict, ok := parser.trace(parser.trailerDict.Get("Root")).(*core.PdfObjectDictionary); ok {
if fdfDict, ok := parser.trace(rootDict.Get("FDF")).(*core.PdfObjectDictionary); ok { if fdfDict, ok := parser.trace(rootDict.Get("FDF")).(*core.PdfObjectDictionary); ok {
@ -886,10 +886,10 @@ func (parser *FdfParser) Root() (*core.PdfObjectDictionary, error) {
return nil, errors.New("FDF not found") return nil, errors.New("FDF not found")
} }
// NewParser creates a new parser for a FDF file via ReadSeeker. Loads the cross reference stream and trailer. // newParser creates a new parser for a FDF file via ReadSeeker. Loads the cross reference stream and trailer.
// An error is returned on failure. // An error is returned on failure.
func NewParser(rs io.ReadSeeker) (*FdfParser, error) { func newParser(rs io.ReadSeeker) (*fdfParser, error) {
parser := &FdfParser{} parser := &fdfParser{}
parser.rs = rs parser.rs = rs
parser.objCache = map[int64]core.PdfObject{} parser.objCache = map[int64]core.PdfObject{}
@ -911,7 +911,7 @@ func NewParser(rs io.ReadSeeker) (*FdfParser, error) {
} }
// trace resolves a PdfObject to direct object, looking up and resolving references as needed. // trace resolves a PdfObject to direct object, looking up and resolving references as needed.
func (parser *FdfParser) trace(obj core.PdfObject) core.PdfObject { func (parser *fdfParser) trace(obj core.PdfObject) core.PdfObject {
switch t := obj.(type) { switch t := obj.(type) {
case *core.PdfObjectReference: case *core.PdfObjectReference:
indObj, ok := parser.objCache[t.ObjectNumber].(*core.PdfIndirectObject) indObj, ok := parser.objCache[t.ObjectNumber].(*core.PdfIndirectObject)
@ -929,7 +929,7 @@ func (parser *FdfParser) trace(obj core.PdfObject) core.PdfObject {
} }
// parse runs through the file and parses indirect objects and loads into cache. // parse runs through the file and parses indirect objects and loads into cache.
func (parser *FdfParser) parse() error { func (parser *fdfParser) parse() error {
// Go to beginning, reset reader. // Go to beginning, reset reader.
parser.rs.Seek(0, io.SeekStart) parser.rs.Seek(0, io.SeekStart)
parser.reader = bufio.NewReader(parser.rs) parser.reader = bufio.NewReader(parser.rs)
@ -981,7 +981,7 @@ func (parser *FdfParser) parse() error {
// Called when Fdf version not found normally. Looks for the PDF version by scanning top-down. // Called when Fdf version not found normally. Looks for the PDF version by scanning top-down.
// %FDF-1.4 // %FDF-1.4
func (parser *FdfParser) seekFdfVersionTopDown() (int, int, error) { func (parser *fdfParser) seekFdfVersionTopDown() (int, int, error) {
// Go to beginning, reset reader. // Go to beginning, reset reader.
parser.rs.Seek(0, os.SEEK_SET) parser.rs.Seek(0, os.SEEK_SET)
parser.reader = bufio.NewReader(parser.rs) parser.reader = bufio.NewReader(parser.rs)

View File

@ -1,10 +1,14 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package fdf package fdf
import ( import (
"fmt" "fmt"
"testing" "testing"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/core"
) )
@ -19,12 +23,8 @@ trailer
%%EOF %%EOF
` `
func init() {
common.SetLogger(common.ConsoleLogger{LogLevel: common.LogLevelTrace})
}
func TestFdfExample1(t *testing.T) { func TestFdfExample1(t *testing.T) {
fdfDoc, err := NewParserFromString(fdfExample1) fdfDoc, err := newParserFromString(fdfExample1)
if err != nil { if err != nil {
t.Errorf("Error: %v", err) t.Errorf("Error: %v", err)
return return