Fix fuzzy problems with crossrefs and add a recursive loop guard in reader (page building). Added test cases for these issues.

This commit is contained in:
Gunnsteinn Hall 2017-07-21 14:12:37 +00:00
parent d38a5ad1e0
commit 5f506b7427
6 changed files with 114 additions and 10 deletions

2
.gitignore vendored
View File

@ -49,3 +49,5 @@ temp/
buildinfo.json
pdf/font.go
fuzz.go

6
doc.go
View File

@ -14,9 +14,3 @@
//
package unidoc
import (
_ "github.com/unidoc/unidoc/common"
_ "github.com/unidoc/unidoc/license"
_ "github.com/unidoc/unidoc/pdf"
)

View File

@ -335,7 +335,11 @@ func (this *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
return nil, err
}
io, _ := o.(*PdfIndirectObject)
io, isInd := o.(*PdfIndirectObject)
if !isInd {
// Not indirect (must be a PdfObjectNull object)...
return o, nil
}
o = io.PdfObject
_, isRef = o.(*PdfObjectReference)
if isRef {

25
pdf/core/fuzz_test.go Normal file
View File

@ -0,0 +1,25 @@
package core
import "testing"
// Fuzz tests based on findings with go-fuzz.
// Test for a crash in
// func (this *PdfParser) Trace(obj PdfObject) (PdfObject, error)
// when passing a reference to a non-existing object.
func TestFuzzParserTrace1(t *testing.T) {
parser := PdfParser{}
parser.rs, parser.reader = makeReaderForText(" /Name")
ref := &PdfObjectReference{ObjectNumber: -1}
obj, err := parser.Trace(ref)
// Should return non-err, and a nil object.
if err != nil {
t.Errorf("Fail, err != nil (%v)", err)
}
if _, isNil := obj.(*PdfObjectNull); !isNil {
t.Errorf("Fail, obj != PdfObjectNull (%T)", obj)
}
}

71
pdf/model/fuzz_test.go Normal file
View File

@ -0,0 +1,71 @@
package model
import (
"testing"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
)
func init() {
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
}
// Test for an endless recursive loop in
// func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject) error
func TestFuzzReaderBuildPageLoop(t *testing.T) {
/*
The problem is when there are Pages entries pointing forward and backward (illegal), causing endless
recursive looping.
Example problem data:
3 0 obj
<< /Type /Pages /MediaBox [0 0 595 842] /Count 2 /Kids [ 2 0 R 12 0 R ] >>
endobj
2 0 obj
<< /Type /Pages
/Kids [3 0 R]
/Count 1
/MediaBox [0 0 300 144]
>>
endobj
12 0 obj
<<
/Type /Page
/Parent 3 0 R
/Resources 15 0 R
/Contents 13 0 R
/MediaBox [0 0 595 842]
>>
endobj
*/
pageDict := core.MakeDict()
pageDict.Set("Type", core.MakeName("Pages"))
page := core.MakeIndirectObject(pageDict)
pagesDict := core.MakeDict()
pages := core.MakeIndirectObject(pagesDict)
pagesDict.Set("Type", core.MakeName("Pages"))
pagesDict.Set("Kids", core.MakeArray(page))
pageDict.Set("Kids", core.MakeArray(pages))
// Make a dummy reader to test
dummyPdfReader := PdfReader{}
dummyPdfReader.traversed = map[core.PdfObject]bool{}
dummyPdfReader.modelManager = NewModelManager()
traversedPageNodes := map[core.PdfObject]bool{}
err := dummyPdfReader.buildPageList(pages, nil, traversedPageNodes)
// Current behavior is to avoid the recursive endless loop and simply return nil. Logs a debug message.
if err != nil {
t.Errorf("Fail: %v", err)
}
}

View File

@ -192,7 +192,8 @@ func (this *PdfReader) loadStructure() error {
this.pageCount = int(*pageCount)
this.pageList = []*PdfIndirectObject{}
err = this.buildPageList(ppages, nil)
traversedPageNodes := map[PdfObject]bool{}
err = this.buildPageList(ppages, nil, traversedPageNodes)
if err != nil {
return err
}
@ -499,11 +500,17 @@ func (this *PdfReader) lookupPageByObject(obj PdfObject) (*PdfPage, error) {
// Build the table of contents.
// tree, ex: Pages -> Pages -> Pages -> Page
// Traverse through the whole thing recursively.
func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject) error {
func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject, traversedPageNodes map[PdfObject]bool) error {
if node == nil {
return nil
}
if _, alreadyTraversed := traversedPageNodes[node]; alreadyTraversed {
common.Log.Debug("Cyclic recursion, skipping")
return nil
}
traversedPageNodes[node] = true
nodeDict, ok := node.PdfObject.(*PdfObjectDictionary)
if !ok {
return errors.New("Node not a dictionary")
@ -572,7 +579,7 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec
return errors.New("Page not indirect object")
}
(*kids)[idx] = child
err = this.buildPageList(child, node)
err = this.buildPageList(child, node, traversedPageNodes)
if err != nil {
return err
}
@ -614,6 +621,7 @@ func (this *PdfReader) resolveReference(ref *PdfObjectReference) (PdfObject, boo
func (this *PdfReader) traverseObjectData(o PdfObject) error {
common.Log.Trace("Traverse object data")
if _, isTraversed := this.traversed[o]; isTraversed {
common.Log.Trace("-Already traversed...")
return nil
}
this.traversed[o] = true