Lazy loading improvements (#131)

* Simplify lazy loading logic, remove redundancy. Fixes, improved performance.
* Additional lazy reader fixes
* Make core.IsNullObject method resolve references
* Adapt appender test cases for lazy reader usage
This commit is contained in:
Gunnsteinn Hall 2019-07-24 21:19:13 +00:00 committed by GitHub
parent 1c32554f09
commit d2e7eda95f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 25 additions and 104 deletions

View File

@ -541,7 +541,7 @@ func GetNumberAsFloat(obj PdfObject) (float64, error) {
// IsNullObject returns true if `obj` is a PdfObjectNull.
func IsNullObject(obj PdfObject) bool {
_, isNull := obj.(*PdfObjectNull)
_, isNull := TraceToDirectObject(obj).(*PdfObjectNull)
return isNull
}

View File

@ -20,8 +20,8 @@ import (
"github.com/stretchr/testify/require"
"golang.org/x/crypto/pkcs12"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/annotator"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/model"
"github.com/unidoc/unipdf/v3/model/sighandler"
@ -1072,8 +1072,9 @@ func TestAppenderSignMultiple(t *testing.T) {
t.Fatalf("page annotations != %d (got %d)", i, len(annotations))
}
for j, annot := range annotations {
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
annotPage := core.ResolveReference(annot.P)
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
}
appender, err := model.NewPdfAppender(pdfReader)
@ -1314,8 +1315,9 @@ func TestAppenderSignMultipleAppearances(t *testing.T) {
t.Fatalf("page annotations != %d (got %d)", i, len(annotations))
}
for j, annot := range annotations {
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
annotPage := core.ResolveReference(annot.P)
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
}
appender, err := model.NewPdfAppender(pdfReader)

View File

@ -125,7 +125,7 @@ func (r *PdfReader) newPdfPageFromDict(p *core.PdfObjectDictionary) (*PdfPage, e
page.LastModified = &lastmod
}
if obj := d.Get("Resources"); obj != nil {
if obj := d.Get("Resources"); obj != nil && !core.IsNullObject(obj) {
dict, ok := core.GetDict(obj)
if !ok {
return nil, fmt.Errorf("invalid resource dictionary (%T)", obj)
@ -736,7 +736,7 @@ func (p *PdfPage) AddContentStreamByString(contentStr string) error {
if p.Contents == nil {
// If not set, place it directly.
p.Contents = stream
} else if contArray, isArray := core.TraceToDirectObject(p.Contents).(*core.PdfObjectArray); isArray {
} else if contArray, isArray := core.GetArray(p.Contents); isArray {
// If an array of content streams, append it.
contArray.Append(stream)
} else {

View File

@ -614,79 +614,7 @@ func (r *PdfReader) resolveReference(ref *core.PdfObjectReference) (core.PdfObje
* GH: Are we fully protected against circular references? (Add tests).
*/
func (r *PdfReader) traverseObjectData(o core.PdfObject) error {
common.Log.Trace("Traverse object data")
if _, isTraversed := r.traversed[o]; isTraversed {
common.Log.Trace("-Already traversed...")
return nil
}
r.traversed[o] = struct{}{}
if io, isIndirectObj := o.(*core.PdfIndirectObject); isIndirectObj {
common.Log.Trace("io: %s", io)
common.Log.Trace("- %s", io.PdfObject)
err := r.traverseObjectData(io.PdfObject)
return err
}
if so, isStreamObj := o.(*core.PdfObjectStream); isStreamObj {
err := r.traverseObjectData(so.PdfObjectDictionary)
return err
}
if dict, isDict := o.(*core.PdfObjectDictionary); isDict {
common.Log.Trace("- dict: %s", dict)
for _, name := range dict.Keys() {
v := dict.Get(name)
if ref, isRef := v.(*core.PdfObjectReference); isRef {
resolvedObj, _, err := r.resolveReference(ref)
if err != nil {
return err
}
dict.Set(name, resolvedObj)
err = r.traverseObjectData(resolvedObj)
if err != nil {
return err
}
} else {
err := r.traverseObjectData(v)
if err != nil {
return err
}
}
}
return nil
}
if arr, isArray := o.(*core.PdfObjectArray); isArray {
common.Log.Trace("- array: %s", arr)
for idx, v := range arr.Elements() {
if ref, isRef := v.(*core.PdfObjectReference); isRef {
resolvedObj, _, err := r.resolveReference(ref)
if err != nil {
return err
}
arr.Set(idx, resolvedObj)
err = r.traverseObjectData(resolvedObj)
if err != nil {
return err
}
} else {
err := r.traverseObjectData(v)
if err != nil {
return err
}
}
}
return nil
}
if _, isRef := o.(*core.PdfObjectReference); isRef {
common.Log.Debug("ERROR: Reader tracing a reference!")
return errors.New("reader tracing a reference")
}
return nil
return core.ResolveReferencesDeep(o, r.traversed)
}
// PageFromIndirectObject returns the PdfPage and page number for a given indirect object.

View File

@ -58,7 +58,7 @@ func NewPdfPageResourcesFromDict(dict *core.PdfObjectDictionary) (*PdfPageResour
if obj := dict.Get("XObject"); obj != nil {
r.XObject = obj
}
if obj := dict.Get("Font"); obj != nil {
if obj := core.ResolveReference(dict.Get("Font")); obj != nil {
r.Font = obj
}
if obj := dict.Get("ProcSet"); obj != nil {

View File

@ -403,7 +403,7 @@ func (w *PdfWriter) SetOCProperties(ocProperties core.PdfObject) error {
common.Log.Trace("Setting OC Properties...")
dict.Set("OCProperties", ocProperties)
// Any risk of infinite loops?
w.addObjects(ocProperties)
return w.addObjects(ocProperties)
}
return nil
@ -418,8 +418,7 @@ func (w *PdfWriter) SetNamedDestinations(names core.PdfObject) error {
common.Log.Trace("Setting catalog Names...")
w.catalog.Set("Names", names)
w.addObjects(names)
return nil
return w.addObjects(names)
}
// SetOptimizer sets the optimizer to optimize PDF before writing.
@ -487,8 +486,7 @@ func (w *PdfWriter) addObjects(obj core.PdfObject) error {
common.Log.Trace("Dict")
common.Log.Trace("- %s", obj)
for _, k := range dict.Keys() {
v := dict.Get(k)
common.Log.Trace("Key %s", k)
v := core.ResolveReference(dict.Get(k))
if k != "Parent" {
err := w.addObjects(v)
if err != nil {
@ -526,7 +524,7 @@ func (w *PdfWriter) addObjects(obj core.PdfObject) error {
return errors.New("array is nil")
}
for _, v := range arr.Elements() {
err := w.addObjects(v)
err := w.addObjects(core.ResolveReference(v))
if err != nil {
return err
}
@ -548,34 +546,27 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
procPage(page)
obj := page.ToPdfObject()
// Resolve references if page reader is lazy.
if r := page.reader; r != nil && r.isLazy {
if err := core.ResolveReferencesDeep(obj, nil); err != nil {
return nil
}
}
common.Log.Trace("==========")
common.Log.Trace("Appending to page list %T", obj)
pageObj, ok := obj.(*core.PdfIndirectObject)
pageObj, ok := core.GetIndirect(obj)
if !ok {
return errors.New("page should be an indirect object")
}
common.Log.Trace("%s", pageObj)
common.Log.Trace("%s", pageObj.PdfObject)
pDict, ok := pageObj.PdfObject.(*core.PdfObjectDictionary)
pDict, ok := core.GetDict(pageObj.PdfObject)
if !ok {
return errors.New("page object should be a dictionary")
}
otype, ok := pDict.Get("Type").(*core.PdfObjectName)
otype, ok := core.GetName(pDict.Get("Type"))
if !ok {
return fmt.Errorf("page should have a Type key with a value of type name (%T)", pDict.Get("Type"))
}
if *otype != "Page" {
if otype.String() != "Page" {
return errors.New("field Type != Page (Required)")
}
@ -585,7 +576,7 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
common.Log.Trace("Page Parent: %T (%v)", pDict.Get("Parent"), hasParent)
for hasParent {
common.Log.Trace("Page Parent: %T", parent)
parentDict, ok := parent.PdfObject.(*core.PdfObjectDictionary)
parentDict, ok := core.GetDict(parent.PdfObject)
if !ok {
return errors.New("invalid Parent object")
}
@ -614,16 +605,16 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
pageObj.PdfObject = pDict
// Add to Pages.
pagesDict, ok := w.pages.PdfObject.(*core.PdfObjectDictionary)
pagesDict, ok := core.GetDict(w.pages.PdfObject)
if !ok {
return errors.New("invalid Pages obj (not a dict)")
}
kids, ok := pagesDict.Get("Kids").(*core.PdfObjectArray)
kids, ok := core.GetArray(pagesDict.Get("Kids"))
if !ok {
return errors.New("invalid Pages Kids obj (not an array)")
}
kids.Append(pageObj)
pageCount, ok := pagesDict.Get("Count").(*core.PdfObjectInteger)
pageCount, ok := core.GetInt(pagesDict.Get("Count"))
if !ok {
return errors.New("invalid Pages Count object (not an integer)")
}
@ -828,7 +819,7 @@ func (w *PdfWriter) updateObjectNumbers() {
o.ObjectNumber = objNum
o.GenerationNumber = 0
default:
common.Log.Debug("ERROR: Unknown type %T - skipping")
common.Log.Debug("ERROR: Unknown type %T - skipping", o)
continue
}