mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
Lazy loading improvements (#131)
* Simplify lazy loading logic, remove redundancy. Fixes, improved performance. * Additional lazy reader fixes * Make core.IsNullObject method resolve references * Adapt appender test cases for lazy reader usage
This commit is contained in:
parent
1c32554f09
commit
d2e7eda95f
@ -541,7 +541,7 @@ func GetNumberAsFloat(obj PdfObject) (float64, error) {
|
||||
|
||||
// IsNullObject returns true if `obj` is a PdfObjectNull.
|
||||
func IsNullObject(obj PdfObject) bool {
|
||||
_, isNull := obj.(*PdfObjectNull)
|
||||
_, isNull := TraceToDirectObject(obj).(*PdfObjectNull)
|
||||
return isNull
|
||||
}
|
||||
|
||||
|
@ -20,8 +20,8 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/crypto/pkcs12"
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/annotator"
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/model"
|
||||
"github.com/unidoc/unipdf/v3/model/sighandler"
|
||||
@ -1072,8 +1072,9 @@ func TestAppenderSignMultiple(t *testing.T) {
|
||||
t.Fatalf("page annotations != %d (got %d)", i, len(annotations))
|
||||
}
|
||||
for j, annot := range annotations {
|
||||
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
|
||||
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
|
||||
annotPage := core.ResolveReference(annot.P)
|
||||
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
|
||||
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
|
||||
}
|
||||
|
||||
appender, err := model.NewPdfAppender(pdfReader)
|
||||
@ -1314,8 +1315,9 @@ func TestAppenderSignMultipleAppearances(t *testing.T) {
|
||||
t.Fatalf("page annotations != %d (got %d)", i, len(annotations))
|
||||
}
|
||||
for j, annot := range annotations {
|
||||
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
|
||||
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annot.P)
|
||||
annotPage := core.ResolveReference(annot.P)
|
||||
t.Logf("i=%d Annots page object equal? %v == %v?", j, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
|
||||
require.Equal(t, pdfReader.PageList[0].GetContainingPdfObject(), annotPage)
|
||||
}
|
||||
|
||||
appender, err := model.NewPdfAppender(pdfReader)
|
||||
|
@ -125,7 +125,7 @@ func (r *PdfReader) newPdfPageFromDict(p *core.PdfObjectDictionary) (*PdfPage, e
|
||||
page.LastModified = &lastmod
|
||||
}
|
||||
|
||||
if obj := d.Get("Resources"); obj != nil {
|
||||
if obj := d.Get("Resources"); obj != nil && !core.IsNullObject(obj) {
|
||||
dict, ok := core.GetDict(obj)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid resource dictionary (%T)", obj)
|
||||
@ -736,7 +736,7 @@ func (p *PdfPage) AddContentStreamByString(contentStr string) error {
|
||||
if p.Contents == nil {
|
||||
// If not set, place it directly.
|
||||
p.Contents = stream
|
||||
} else if contArray, isArray := core.TraceToDirectObject(p.Contents).(*core.PdfObjectArray); isArray {
|
||||
} else if contArray, isArray := core.GetArray(p.Contents); isArray {
|
||||
// If an array of content streams, append it.
|
||||
contArray.Append(stream)
|
||||
} else {
|
||||
|
@ -614,79 +614,7 @@ func (r *PdfReader) resolveReference(ref *core.PdfObjectReference) (core.PdfObje
|
||||
* GH: Are we fully protected against circular references? (Add tests).
|
||||
*/
|
||||
func (r *PdfReader) traverseObjectData(o core.PdfObject) error {
|
||||
common.Log.Trace("Traverse object data")
|
||||
if _, isTraversed := r.traversed[o]; isTraversed {
|
||||
common.Log.Trace("-Already traversed...")
|
||||
return nil
|
||||
}
|
||||
r.traversed[o] = struct{}{}
|
||||
|
||||
if io, isIndirectObj := o.(*core.PdfIndirectObject); isIndirectObj {
|
||||
common.Log.Trace("io: %s", io)
|
||||
common.Log.Trace("- %s", io.PdfObject)
|
||||
err := r.traverseObjectData(io.PdfObject)
|
||||
return err
|
||||
}
|
||||
|
||||
if so, isStreamObj := o.(*core.PdfObjectStream); isStreamObj {
|
||||
err := r.traverseObjectData(so.PdfObjectDictionary)
|
||||
return err
|
||||
}
|
||||
|
||||
if dict, isDict := o.(*core.PdfObjectDictionary); isDict {
|
||||
common.Log.Trace("- dict: %s", dict)
|
||||
for _, name := range dict.Keys() {
|
||||
v := dict.Get(name)
|
||||
if ref, isRef := v.(*core.PdfObjectReference); isRef {
|
||||
resolvedObj, _, err := r.resolveReference(ref)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dict.Set(name, resolvedObj)
|
||||
err = r.traverseObjectData(resolvedObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
err := r.traverseObjectData(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if arr, isArray := o.(*core.PdfObjectArray); isArray {
|
||||
common.Log.Trace("- array: %s", arr)
|
||||
for idx, v := range arr.Elements() {
|
||||
if ref, isRef := v.(*core.PdfObjectReference); isRef {
|
||||
resolvedObj, _, err := r.resolveReference(ref)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
arr.Set(idx, resolvedObj)
|
||||
|
||||
err = r.traverseObjectData(resolvedObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
err := r.traverseObjectData(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, isRef := o.(*core.PdfObjectReference); isRef {
|
||||
common.Log.Debug("ERROR: Reader tracing a reference!")
|
||||
return errors.New("reader tracing a reference")
|
||||
}
|
||||
|
||||
return nil
|
||||
return core.ResolveReferencesDeep(o, r.traversed)
|
||||
}
|
||||
|
||||
// PageFromIndirectObject returns the PdfPage and page number for a given indirect object.
|
||||
|
@ -58,7 +58,7 @@ func NewPdfPageResourcesFromDict(dict *core.PdfObjectDictionary) (*PdfPageResour
|
||||
if obj := dict.Get("XObject"); obj != nil {
|
||||
r.XObject = obj
|
||||
}
|
||||
if obj := dict.Get("Font"); obj != nil {
|
||||
if obj := core.ResolveReference(dict.Get("Font")); obj != nil {
|
||||
r.Font = obj
|
||||
}
|
||||
if obj := dict.Get("ProcSet"); obj != nil {
|
||||
|
@ -403,7 +403,7 @@ func (w *PdfWriter) SetOCProperties(ocProperties core.PdfObject) error {
|
||||
common.Log.Trace("Setting OC Properties...")
|
||||
dict.Set("OCProperties", ocProperties)
|
||||
// Any risk of infinite loops?
|
||||
w.addObjects(ocProperties)
|
||||
return w.addObjects(ocProperties)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -418,8 +418,7 @@ func (w *PdfWriter) SetNamedDestinations(names core.PdfObject) error {
|
||||
|
||||
common.Log.Trace("Setting catalog Names...")
|
||||
w.catalog.Set("Names", names)
|
||||
w.addObjects(names)
|
||||
return nil
|
||||
return w.addObjects(names)
|
||||
}
|
||||
|
||||
// SetOptimizer sets the optimizer to optimize PDF before writing.
|
||||
@ -487,8 +486,7 @@ func (w *PdfWriter) addObjects(obj core.PdfObject) error {
|
||||
common.Log.Trace("Dict")
|
||||
common.Log.Trace("- %s", obj)
|
||||
for _, k := range dict.Keys() {
|
||||
v := dict.Get(k)
|
||||
common.Log.Trace("Key %s", k)
|
||||
v := core.ResolveReference(dict.Get(k))
|
||||
if k != "Parent" {
|
||||
err := w.addObjects(v)
|
||||
if err != nil {
|
||||
@ -526,7 +524,7 @@ func (w *PdfWriter) addObjects(obj core.PdfObject) error {
|
||||
return errors.New("array is nil")
|
||||
}
|
||||
for _, v := range arr.Elements() {
|
||||
err := w.addObjects(v)
|
||||
err := w.addObjects(core.ResolveReference(v))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -548,34 +546,27 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
|
||||
procPage(page)
|
||||
obj := page.ToPdfObject()
|
||||
|
||||
// Resolve references if page reader is lazy.
|
||||
if r := page.reader; r != nil && r.isLazy {
|
||||
if err := core.ResolveReferencesDeep(obj, nil); err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
common.Log.Trace("==========")
|
||||
common.Log.Trace("Appending to page list %T", obj)
|
||||
|
||||
pageObj, ok := obj.(*core.PdfIndirectObject)
|
||||
pageObj, ok := core.GetIndirect(obj)
|
||||
if !ok {
|
||||
return errors.New("page should be an indirect object")
|
||||
}
|
||||
common.Log.Trace("%s", pageObj)
|
||||
common.Log.Trace("%s", pageObj.PdfObject)
|
||||
|
||||
pDict, ok := pageObj.PdfObject.(*core.PdfObjectDictionary)
|
||||
pDict, ok := core.GetDict(pageObj.PdfObject)
|
||||
if !ok {
|
||||
return errors.New("page object should be a dictionary")
|
||||
}
|
||||
|
||||
otype, ok := pDict.Get("Type").(*core.PdfObjectName)
|
||||
otype, ok := core.GetName(pDict.Get("Type"))
|
||||
if !ok {
|
||||
return fmt.Errorf("page should have a Type key with a value of type name (%T)", pDict.Get("Type"))
|
||||
|
||||
}
|
||||
if *otype != "Page" {
|
||||
if otype.String() != "Page" {
|
||||
return errors.New("field Type != Page (Required)")
|
||||
}
|
||||
|
||||
@ -585,7 +576,7 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
|
||||
common.Log.Trace("Page Parent: %T (%v)", pDict.Get("Parent"), hasParent)
|
||||
for hasParent {
|
||||
common.Log.Trace("Page Parent: %T", parent)
|
||||
parentDict, ok := parent.PdfObject.(*core.PdfObjectDictionary)
|
||||
parentDict, ok := core.GetDict(parent.PdfObject)
|
||||
if !ok {
|
||||
return errors.New("invalid Parent object")
|
||||
}
|
||||
@ -614,16 +605,16 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
|
||||
pageObj.PdfObject = pDict
|
||||
|
||||
// Add to Pages.
|
||||
pagesDict, ok := w.pages.PdfObject.(*core.PdfObjectDictionary)
|
||||
pagesDict, ok := core.GetDict(w.pages.PdfObject)
|
||||
if !ok {
|
||||
return errors.New("invalid Pages obj (not a dict)")
|
||||
}
|
||||
kids, ok := pagesDict.Get("Kids").(*core.PdfObjectArray)
|
||||
kids, ok := core.GetArray(pagesDict.Get("Kids"))
|
||||
if !ok {
|
||||
return errors.New("invalid Pages Kids obj (not an array)")
|
||||
}
|
||||
kids.Append(pageObj)
|
||||
pageCount, ok := pagesDict.Get("Count").(*core.PdfObjectInteger)
|
||||
pageCount, ok := core.GetInt(pagesDict.Get("Count"))
|
||||
if !ok {
|
||||
return errors.New("invalid Pages Count object (not an integer)")
|
||||
}
|
||||
@ -828,7 +819,7 @@ func (w *PdfWriter) updateObjectNumbers() {
|
||||
o.ObjectNumber = objNum
|
||||
o.GenerationNumber = 0
|
||||
default:
|
||||
common.Log.Debug("ERROR: Unknown type %T - skipping")
|
||||
common.Log.Debug("ERROR: Unknown type %T - skipping", o)
|
||||
continue
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user