diff --git a/pdf/core/parser.go b/pdf/core/parser.go index 6aac638f..50eeedaa 100644 --- a/pdf/core/parser.go +++ b/pdf/core/parser.go @@ -43,7 +43,7 @@ type PdfParser struct { xrefs XrefTable objstms ObjectStreams trailer *PdfObjectDictionary - ObjCache ObjectCache // TODO: Unexport (v3). + ObjCache ObjectCache // TODO: Unexport (v3). - May need access from testing. crypter *PdfCrypt repairsAttempted bool // Avoid multiple attempts for repair. @@ -579,6 +579,7 @@ func (parser *PdfParser) ParseDict() (*PdfObjectDictionary, error) { common.Log.Trace("Reading PDF Dict!") dict := MakeDict() + dict.parser = parser // Pass the '<<' c, _ := parser.reader.ReadByte() @@ -1321,7 +1322,9 @@ func (parser *PdfParser) ParseIndirectObject() (PdfObject, error) { common.Log.Trace("-Read indirect obj") bb, err := parser.reader.Peek(20) if err != nil { - common.Log.Debug("ERROR: Fail to read indirect obj") + if err != io.EOF { + common.Log.Debug("ERROR: Fail to read indirect obj") + } return &indirect, err } common.Log.Trace("(indirect obj peek \"%s\"", string(bb)) @@ -1493,6 +1496,7 @@ func (parser *PdfParser) ParseIndirectObject() (PdfObject, error) { // TODO: Unexport (v3) or move to test files, if needed by external test cases. func NewParserFromString(txt string) *PdfParser { parser := PdfParser{} + parser.ObjCache = ObjectCache{} buf := []byte(txt) bufReader := bytes.NewReader(buf) diff --git a/pdf/model/annotations.go b/pdf/model/annotations.go index 9fdde454..c8a374da 100644 --- a/pdf/model/annotations.go +++ b/pdf/model/annotations.go @@ -54,7 +54,7 @@ func (this *PdfAnnotation) String() string { return s } -// Additional elements for mark-up annotations. +// PdfAnnotationMarkup represents additional elements for mark-up annotations. type PdfAnnotationMarkup struct { T PdfObject Popup *PdfAnnotationPopup @@ -68,7 +68,7 @@ type PdfAnnotationMarkup struct { ExData PdfObject } -// Subtype: Text +// PdfAnnotationText represents a Text annotation. type PdfAnnotationText struct { *PdfAnnotation *PdfAnnotationMarkup diff --git a/pdf/model/form_test.go b/pdf/model/form_test.go new file mode 100644 index 00000000..0edb0dbd --- /dev/null +++ b/pdf/model/form_test.go @@ -0,0 +1,234 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package model + +import ( + "testing" + + "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core" +) + +func compareDictionariesDeep(d1, d2 *core.PdfObjectDictionary) bool { + if len(d1.Keys()) != len(d2.Keys()) { + common.Log.Debug("Dict entries mismatch (%d != %d)", len(d1.Keys()), len(d2.Keys())) + common.Log.Debug("Was '%s' vs '%s'", d1.DefaultWriteString(), d2.DefaultWriteString()) + return false + } + + for _, k := range d1.Keys() { + if k == "Parent" { + continue + } + v1 := core.TraceToDirectObject(d1.Get(k)) + v2 := core.TraceToDirectObject(d2.Get(k)) + + if v1 == nil { + common.Log.Debug("v1 is nil") + return false + } + if v2 == nil { + common.Log.Debug("v2 is nil") + return false + } + + switch t1 := v1.(type) { + case *core.PdfObjectDictionary: + t2, ok := v2.(*core.PdfObjectDictionary) + if !ok { + common.Log.Debug("Type mismatch %T vs %T", v1, v2) + return false + } + if !compareDictionariesDeep(t1, t2) { + return false + } + continue + + case *core.PdfObjectArray: + t2, ok := v2.(*core.PdfObjectArray) + if !ok { + common.Log.Debug("v2 not an array") + return false + } + if t1.Len() != t2.Len() { + common.Log.Debug("array length mismatch (%d != %d)", t1.Len(), t2.Len()) + return false + } + for i := 0; i < t1.Len(); i++ { + v1 := core.TraceToDirectObject(t1.Get(i)) + v2 := core.TraceToDirectObject(t2.Get(i)) + if d1, isD1 := v1.(*core.PdfObjectDictionary); isD1 { + d2, isD2 := v2.(*core.PdfObjectDictionary) + if !isD2 { + return false + } + if !compareDictionariesDeep(d1, d2) { + return false + } + } else { + if v1.DefaultWriteString() != v2.DefaultWriteString() { + common.Log.Debug("Mismatch '%s' != '%s'", v1.DefaultWriteString(), v2.DefaultWriteString()) + return false + } + } + } + continue + } + + if v1.String() != v2.String() { + common.Log.Debug("key=%s Mismatch! '%s' != '%s'", k, v1.String(), v2.String()) + common.Log.Debug("For '%T' - '%T'", v1, v2) + common.Log.Debug("For '%+v' - '%+v'", v1, v2) + return false + } + } + + return true +} + +// Test loading of a basic checkbox field with a merged-in annotation. +func TestCheckboxField1(t *testing.T) { + rawText := ` +1 0 obj +<< +/Type /Annot +/Subtype /Widget +/Rect [100 100 120 120] +/FT /Btn +/T (Urgent) +/V /Yes +/AS /Yes +/AP <> >> +>> +endobj + +2 0 obj +<> +stream +q +0 0 1 rg +BT +/ZaDb 12 Tf +0 0 Td +(4) Tj +ET +Q +endstream +endobj + +3 0 obj +<> +stream +q +0 0 1 rg +BT +/ZaDb 12 Tf +0 0 Td +(8) Tj +ET +Q +endstream +endobj + +4 0 obj +% Copy of obj 1 except not with merged-in annotation +<< +/FT /Btn +/T (Urgent) +/V /Yes +/Kids [5 0 R] +>> +endobj + +5 0 obj +<< +/Type /Annot +/Subtype /Widget +/Rect [100 100 120 120] +/AS /Yes +/AP <> >> +/Parent 4 0 R +>> +endobj +` + r := NewReaderForText(rawText) + + err := r.ParseIndObjSeries() + if err != nil { + t.Fatalf("Failed loading indirect object series: %v", err) + } + + // Load the field from object number 1. + obj, err := r.parser.LookupByNumber(1) + if err != nil { + t.Fatalf("Failed to parse indirect obj (%s)", err) + } + + ind, ok := obj.(*core.PdfIndirectObject) + if !ok { + t.Fatalf("Incorrect type (%T)", obj) + } + + field, err := r.newPdfFieldFromIndirectObject(ind, nil) + if err != nil { + t.Fatalf("Unable to load field (%v)", err) + return + } + + // Check properties of the field. + buttonf, ok := field.GetContext().(*PdfFieldButton) + if !ok { + t.Errorf("Field content incorrect (%T)", field.GetContext()) + return + } + if buttonf == nil { + t.Fatalf("buttonf is nil") + } + + if len(field.Kids) > 0 { + t.Fatalf("Field should not have kids") + } + + if len(field.Annotations) != 1 { + t.Fatalf("Field should have a single annotation") + } + + // Field -> PDF object. Regenerate the field dictionary and see if matches expectations. + // Reset the dictionaries for both field and annotation to avoid re-use during re-generation of PDF object. + field.container = core.MakeIndirectObject(core.MakeDict()) + field.Annotations[0].primitive = core.MakeIndirectObject(core.MakeDict()) + fieldPdfObj := field.ToPdfObject() + fieldDict, ok := fieldPdfObj.(*core.PdfIndirectObject).PdfObject.(*core.PdfObjectDictionary) + if !ok { + t.Fatalf("Type error") + } + + // Load the expected field dictionary (output). Slightly different than original as the input had + // a merged-in annotation. Our output does not currently merge annotations. + obj, err = r.parser.LookupByNumber(4) + if err != nil { + t.Fatalf("Error: %v", err) + } + + expDict, ok := obj.(*core.PdfIndirectObject).PdfObject.(*core.PdfObjectDictionary) + if !ok { + t.Fatalf("Unable to load expected dict") + } + + if !compareDictionariesDeep(expDict, fieldDict) { + t.Fatalf("Mismatch in expected and actual field dictionaries (deep)") + } +} diff --git a/pdf/model/fuzz_test.go b/pdf/model/fuzz_test.go index 2ddb9442..068e048d 100644 --- a/pdf/model/fuzz_test.go +++ b/pdf/model/fuzz_test.go @@ -8,7 +8,7 @@ import ( ) func init() { - common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace)) + common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug)) } // Test for an endless recursive loop in diff --git a/pdf/model/page_test.go b/pdf/model/page_test.go index 6b1e813c..225c83a2 100644 --- a/pdf/model/page_test.go +++ b/pdf/model/page_test.go @@ -6,19 +6,62 @@ package model import ( + "io" "testing" + "github.com/unidoc/unidoc/common" . "github.com/unidoc/unidoc/pdf/core" ) -/* -func makeReaderForText(txt string) *bufio.Reader { - buf := []byte(txt) - bufReader := bytes.NewReader(buf) - bufferedReader := bufio.NewReader(bufReader) - return bufferedReader +// NewReaderForText makes a new PdfReader for an input PDF content string. For use in testing. +func NewReaderForText(txt string) *PdfReader { + r := &PdfReader{} + r.traversed = map[PdfObject]bool{} + r.modelManager = NewModelManager() + + // Create the parser, loads the cross reference table and trailer. + parser := NewParserFromString(txt) + r.parser = parser + + return r +} + +// ParseIndObjSeries loads a series of indirect objects until it runs into an error. +// Fully loads the objects and traverses resolving references to *PdfIndirectObjects. +// For use in testing. +func (r *PdfReader) ParseIndObjSeries() error { + for { + obj, err := r.parser.ParseIndirectObject() + if err != nil { + if err != io.EOF { + common.Log.Debug("Error parsing indirect object: %v", err) + return err + } + break + } + + switch t := obj.(type) { + case *PdfObjectStream: + r.parser.ObjCache[int(t.ObjectNumber)] = t + case *PdfIndirectObject: + r.parser.ObjCache[int(t.ObjectNumber)] = t + default: + common.Log.Debug("Incorrect type for ind obj: %T", obj) + return ErrTypeCheck + } + } + + // Traverse the objects, resolving references to instances to PdfIndirectObject pointers. + for _, obj := range r.parser.ObjCache { + err := r.traverseObjectData(obj) + if err != nil { + common.Log.Debug("ERROR: Unable to traverse(%s)", err) + return err + } + } + + return nil } -*/ // Test PDF date parsing from string. func TestDateParse(t *testing.T) { @@ -221,8 +264,6 @@ func TestPdfPage1(t *testing.T) { >> endobj ` - //parser := PdfParser{} - //parser.reader = makeReaderForText(rawText) parser := NewParserFromString(rawText) obj, err := parser.ParseIndirectObject() diff --git a/pdf/model/reader.go b/pdf/model/reader.go index 152f020f..6482c30e 100644 --- a/pdf/model/reader.go +++ b/pdf/model/reader.go @@ -32,6 +32,7 @@ type PdfReader struct { traversed map[PdfObject]bool } +// NewPdfReader returns a new PdfReader for a specified data stream (ReadSeeker interface). func NewPdfReader(rs io.ReadSeeker) (*PdfReader, error) { pdfReader := &PdfReader{} pdfReader.traversed = map[PdfObject]bool{} @@ -140,7 +141,7 @@ func (this *PdfReader) loadStructure() error { // Catalog. root, ok := trailerDict.Get("Root").(*PdfObjectReference) if !ok { - return fmt.Errorf("Invalid Root (trailer: %s)", *trailerDict) + return fmt.Errorf("Invalid Root (trailer: %s)", trailerDict) } oc, err := this.parser.LookupByReference(*root) if err != nil {