mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Fix fuzzy problems with crossrefs and add a recursive loop guard in reader (page building). Added test cases for these issues.
This commit is contained in:
parent
d38a5ad1e0
commit
5f506b7427
2
.gitignore
vendored
2
.gitignore
vendored
@ -49,3 +49,5 @@ temp/
|
|||||||
buildinfo.json
|
buildinfo.json
|
||||||
|
|
||||||
pdf/font.go
|
pdf/font.go
|
||||||
|
|
||||||
|
fuzz.go
|
||||||
|
6
doc.go
6
doc.go
@ -14,9 +14,3 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
package unidoc
|
package unidoc
|
||||||
|
|
||||||
import (
|
|
||||||
_ "github.com/unidoc/unidoc/common"
|
|
||||||
_ "github.com/unidoc/unidoc/license"
|
|
||||||
_ "github.com/unidoc/unidoc/pdf"
|
|
||||||
)
|
|
||||||
|
@ -335,7 +335,11 @@ func (this *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
io, _ := o.(*PdfIndirectObject)
|
io, isInd := o.(*PdfIndirectObject)
|
||||||
|
if !isInd {
|
||||||
|
// Not indirect (must be a PdfObjectNull object)...
|
||||||
|
return o, nil
|
||||||
|
}
|
||||||
o = io.PdfObject
|
o = io.PdfObject
|
||||||
_, isRef = o.(*PdfObjectReference)
|
_, isRef = o.(*PdfObjectReference)
|
||||||
if isRef {
|
if isRef {
|
||||||
|
25
pdf/core/fuzz_test.go
Normal file
25
pdf/core/fuzz_test.go
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
package core
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
// Fuzz tests based on findings with go-fuzz.
|
||||||
|
|
||||||
|
// Test for a crash in
|
||||||
|
// func (this *PdfParser) Trace(obj PdfObject) (PdfObject, error)
|
||||||
|
// when passing a reference to a non-existing object.
|
||||||
|
func TestFuzzParserTrace1(t *testing.T) {
|
||||||
|
parser := PdfParser{}
|
||||||
|
parser.rs, parser.reader = makeReaderForText(" /Name")
|
||||||
|
|
||||||
|
ref := &PdfObjectReference{ObjectNumber: -1}
|
||||||
|
obj, err := parser.Trace(ref)
|
||||||
|
|
||||||
|
// Should return non-err, and a nil object.
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Fail, err != nil (%v)", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, isNil := obj.(*PdfObjectNull); !isNil {
|
||||||
|
t.Errorf("Fail, obj != PdfObjectNull (%T)", obj)
|
||||||
|
}
|
||||||
|
}
|
71
pdf/model/fuzz_test.go
Normal file
71
pdf/model/fuzz_test.go
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
package model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/unidoc/unidoc/common"
|
||||||
|
"github.com/unidoc/unidoc/pdf/core"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for an endless recursive loop in
|
||||||
|
// func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject) error
|
||||||
|
func TestFuzzReaderBuildPageLoop(t *testing.T) {
|
||||||
|
/*
|
||||||
|
The problem is when there are Pages entries pointing forward and backward (illegal), causing endless
|
||||||
|
recursive looping.
|
||||||
|
|
||||||
|
Example problem data:
|
||||||
|
3 0 obj
|
||||||
|
<< /Type /Pages /MediaBox [0 0 595 842] /Count 2 /Kids [ 2 0 R 12 0 R ] >>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
|
||||||
|
2 0 obj
|
||||||
|
<< /Type /Pages
|
||||||
|
/Kids [3 0 R]
|
||||||
|
/Count 1
|
||||||
|
/MediaBox [0 0 300 144]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
12 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 3 0 R
|
||||||
|
/Resources 15 0 R
|
||||||
|
/Contents 13 0 R
|
||||||
|
/MediaBox [0 0 595 842]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
*/
|
||||||
|
|
||||||
|
pageDict := core.MakeDict()
|
||||||
|
pageDict.Set("Type", core.MakeName("Pages"))
|
||||||
|
page := core.MakeIndirectObject(pageDict)
|
||||||
|
|
||||||
|
pagesDict := core.MakeDict()
|
||||||
|
pages := core.MakeIndirectObject(pagesDict)
|
||||||
|
pagesDict.Set("Type", core.MakeName("Pages"))
|
||||||
|
pagesDict.Set("Kids", core.MakeArray(page))
|
||||||
|
|
||||||
|
pageDict.Set("Kids", core.MakeArray(pages))
|
||||||
|
|
||||||
|
// Make a dummy reader to test
|
||||||
|
dummyPdfReader := PdfReader{}
|
||||||
|
dummyPdfReader.traversed = map[core.PdfObject]bool{}
|
||||||
|
dummyPdfReader.modelManager = NewModelManager()
|
||||||
|
|
||||||
|
traversedPageNodes := map[core.PdfObject]bool{}
|
||||||
|
err := dummyPdfReader.buildPageList(pages, nil, traversedPageNodes)
|
||||||
|
|
||||||
|
// Current behavior is to avoid the recursive endless loop and simply return nil. Logs a debug message.
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Fail: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -192,7 +192,8 @@ func (this *PdfReader) loadStructure() error {
|
|||||||
this.pageCount = int(*pageCount)
|
this.pageCount = int(*pageCount)
|
||||||
this.pageList = []*PdfIndirectObject{}
|
this.pageList = []*PdfIndirectObject{}
|
||||||
|
|
||||||
err = this.buildPageList(ppages, nil)
|
traversedPageNodes := map[PdfObject]bool{}
|
||||||
|
err = this.buildPageList(ppages, nil, traversedPageNodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -499,11 +500,17 @@ func (this *PdfReader) lookupPageByObject(obj PdfObject) (*PdfPage, error) {
|
|||||||
// Build the table of contents.
|
// Build the table of contents.
|
||||||
// tree, ex: Pages -> Pages -> Pages -> Page
|
// tree, ex: Pages -> Pages -> Pages -> Page
|
||||||
// Traverse through the whole thing recursively.
|
// Traverse through the whole thing recursively.
|
||||||
func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject) error {
|
func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject, traversedPageNodes map[PdfObject]bool) error {
|
||||||
if node == nil {
|
if node == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, alreadyTraversed := traversedPageNodes[node]; alreadyTraversed {
|
||||||
|
common.Log.Debug("Cyclic recursion, skipping")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
traversedPageNodes[node] = true
|
||||||
|
|
||||||
nodeDict, ok := node.PdfObject.(*PdfObjectDictionary)
|
nodeDict, ok := node.PdfObject.(*PdfObjectDictionary)
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.New("Node not a dictionary")
|
return errors.New("Node not a dictionary")
|
||||||
@ -572,7 +579,7 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec
|
|||||||
return errors.New("Page not indirect object")
|
return errors.New("Page not indirect object")
|
||||||
}
|
}
|
||||||
(*kids)[idx] = child
|
(*kids)[idx] = child
|
||||||
err = this.buildPageList(child, node)
|
err = this.buildPageList(child, node, traversedPageNodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -614,6 +621,7 @@ func (this *PdfReader) resolveReference(ref *PdfObjectReference) (PdfObject, boo
|
|||||||
func (this *PdfReader) traverseObjectData(o PdfObject) error {
|
func (this *PdfReader) traverseObjectData(o PdfObject) error {
|
||||||
common.Log.Trace("Traverse object data")
|
common.Log.Trace("Traverse object data")
|
||||||
if _, isTraversed := this.traversed[o]; isTraversed {
|
if _, isTraversed := this.traversed[o]; isTraversed {
|
||||||
|
common.Log.Trace("-Already traversed...")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
this.traversed[o] = true
|
this.traversed[o] = true
|
||||||
|
Loading…
x
Reference in New Issue
Block a user