From af5ea0bd0022eaf19d9ef2ca8466be78d9e57093 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Mon, 5 Dec 2016 00:46:27 +0000 Subject: [PATCH] Improved outline handling (in progress) Structure element currently scrapped. Later to add support for that. Then will fix the SE of the outline item. Still a bit of testing and validation required. --- pdf/model/outlines.go | 49 +++++++++++++++--- pdf/model/page.go | 6 +++ pdf/model/reader.go | 118 ++++++++++++++++++++++++++---------------- pdf/model/writer.go | 5 +- 4 files changed, 122 insertions(+), 56 deletions(-) diff --git a/pdf/model/outlines.go b/pdf/model/outlines.go index 31e63637..1a665657 100644 --- a/pdf/model/outlines.go +++ b/pdf/model/outlines.go @@ -21,7 +21,8 @@ type PdfOutlineTreeNode struct { // PDF outline dictionary (Table 152 - p. 376). type PdfOutline struct { PdfOutlineTreeNode - Count *int64 + Parent *PdfOutlineTreeNode + Count *int64 primitive *PdfIndirectObject } @@ -85,8 +86,14 @@ func NewOutlineBookmark(title string, page *PdfIndirectObject) *PdfOutlineItem { } // Does not traverse the tree. -func newPdfOutlineFromDict(dict *PdfObjectDictionary) (*PdfOutline, error) { +func newPdfOutlineFromIndirectObject(container *PdfIndirectObject) (*PdfOutline, error) { + dict, isDict := container.PdfObject.(*PdfObjectDictionary) + if !isDict { + return nil, fmt.Errorf("Outline object not a dictionary") + } + outline := PdfOutline{} + outline.primitive = container outline.context = &outline if obj, hasType := (*dict)["Type"]; hasType { @@ -114,8 +121,14 @@ func newPdfOutlineFromDict(dict *PdfObjectDictionary) (*PdfOutline, error) { } // Does not traverse the tree. -func (this *PdfReader) newPdfOutlineItemFromDict(dict *PdfObjectDictionary) (*PdfOutlineItem, error) { +func (this *PdfReader) newPdfOutlineItemFromIndirectObject(container *PdfIndirectObject) (*PdfOutlineItem, error) { + dict, isDict := container.PdfObject.(*PdfObjectDictionary) + if !isDict { + return nil, fmt.Errorf("Outline object not a dictionary") + } + item := PdfOutlineItem{} + item.primitive = container item.context = &item // Title (required). @@ -164,11 +177,16 @@ func (this *PdfReader) newPdfOutlineItemFromDict(dict *PdfObjectDictionary) (*Pd return nil, err } } - if obj, hasKey := (*dict)["SE"]; hasKey { - item.SE, err = this.traceToObject(obj) - if err != nil { - return nil, err - } + if _, hasKey := (*dict)["SE"]; hasKey { + // XXX: To add structure element support. + // Currently not supporting structure elements. + item.SE = nil + /* + item.SE, err = this.traceToObject(obj) + if err != nil { + return nil, err + } + */ } if obj, hasKey := (*dict)["C"]; hasKey { item.C, err = this.traceToObject(obj) @@ -213,6 +231,7 @@ func (this *PdfOutline) GetContainingPdfObject() PdfObject { // Recursively build the Outline tree PDF object. func (this *PdfOutline) ToPdfObject() PdfObject { + fmt.Printf("Outline primitive: %+v\n", this.primitive) container := this.primitive dict := container.PdfObject.(*PdfObjectDictionary) @@ -227,6 +246,10 @@ func (this *PdfOutline) ToPdfObject() PdfObject { //PdfObjectConverterCache[this.Last.getOuter()] } + if this.Parent != nil { + dict.Set("Parent", this.Parent.getOuter().GetContainingPdfObject()) + } + return container } @@ -244,6 +267,16 @@ func (this *PdfOutlineItem) ToPdfObject() PdfObject { if this.A != nil { (*dict)["A"] = this.A } + if _, hasSE := (*dict)["SE"]; hasSE { + // XXX: Currently not supporting structure element hierarchy. + // Remove it. + delete(*dict, "SE") + } + /* + if this.SE != nil { + (*dict)["SE"] = this.SE + } + */ if this.C != nil { (*dict)["C"] = this.C } diff --git a/pdf/model/page.go b/pdf/model/page.go index 673c7737..0422a6ee 100644 --- a/pdf/model/page.go +++ b/pdf/model/page.go @@ -69,8 +69,14 @@ func NewPdfPage() *PdfPage { return &page } +func (this *PdfPage) setContainer(container *PdfIndirectObject) { + container.PdfObject = this.pageDict + this.primitive = container +} + // Build a PdfPage based on the underlying dictionary. // Used in loading existing PDF files. +// Note that a new container is created (indirect object). func (reader *PdfReader) newPdfPageFromDict(p *PdfObjectDictionary) (*PdfPage, error) { page := NewPdfPage() diff --git a/pdf/model/reader.go b/pdf/model/reader.go index 85b00d01..4ed8f7a0 100644 --- a/pdf/model/reader.go +++ b/pdf/model/reader.go @@ -255,7 +255,7 @@ func (this *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) { common.Log.Debug("Outline root dict: %v", dict) - outlineTree, err := this.buildOutlineTree(dict) + outlineTree, _, err := this.buildOutlineTree(outlineRoot, nil, nil) if err != nil { return nil, err } @@ -265,80 +265,105 @@ func (this *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) { } // Recursive build outline tree. -func (this *PdfReader) buildOutlineTree(obj PdfObject) (*PdfOutlineTreeNode, error) { - dict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary) - if !ok { - return nil, errors.New("Not a dictionary object") +// prev PdfObject, +// Input: The indirect object containing an Outlines or Outline item dictionary. +// Parent, Prev are the parent or previous node in the hierarchy. +// The function returns the corresponding tree node and the last node which is used +// for setting the Last pointer of the tree node structures. +func (this *PdfReader) buildOutlineTree(obj PdfObject, parent *PdfOutlineTreeNode, prev *PdfOutlineTreeNode) (*PdfOutlineTreeNode, *PdfOutlineTreeNode, error) { + container, isInd := obj.(*PdfIndirectObject) + if !isInd { + return nil, nil, fmt.Errorf("Outline container not an indirect object %T", obj) } - common.Log.Debug("build outline tree: dict: %v", dict) + dict, ok := container.PdfObject.(*PdfObjectDictionary) + if !ok { + return nil, nil, errors.New("Not a dictionary object") + } + common.Log.Debug("build outline tree: dict: %v (%v) p: %p", dict, container, container) if _, hasTitle := (*dict)["Title"]; hasTitle { - // Outline item has a title. - outlineItem, err := this.newPdfOutlineItemFromDict(dict) + // Outline item has a title. (required) + outlineItem, err := this.newPdfOutlineItemFromIndirectObject(container) if err != nil { - return nil, err + return nil, nil, err } + outlineItem.Parent = parent + outlineItem.Prev = prev + + if firstObj, hasChildren := (*dict)["First"]; hasChildren { + firstObj, err = this.traceToObject(firstObj) + if err != nil { + return nil, nil, err + } + if _, isNull := firstObj.(*PdfObjectNull); !isNull { + first, last, err := this.buildOutlineTree(firstObj, &outlineItem.PdfOutlineTreeNode, nil) + if err != nil { + return nil, nil, err + } + outlineItem.First = first + outlineItem.Last = last + } + } + // Resolve the reference to next if nextObj, hasNext := (*dict)["Next"]; hasNext { nextObj, err = this.traceToObject(nextObj) if err != nil { - return nil, err + return nil, nil, err } - nextObj = TraceToDirectObject(nextObj) if _, isNull := nextObj.(*PdfObjectNull); !isNull { - nextDict, ok := nextObj.(*PdfObjectDictionary) - if !ok { - return nil, fmt.Errorf("Next not a dictionary object (%T)", nextObj) - } - outlineItem.Next, err = this.buildOutlineTree(nextDict) + next, last, err := this.buildOutlineTree(nextObj, parent, &outlineItem.PdfOutlineTreeNode) if err != nil { - return nil, err + return nil, nil, err } + outlineItem.Next = next + return &outlineItem.PdfOutlineTreeNode, last, nil } } - if firstObj, hasChildren := (*dict)["First"]; hasChildren { - firstObj, err = this.traceToObject(firstObj) - if err != nil { - return nil, err - } - firstObj = TraceToDirectObject(firstObj) - if _, isNull := firstObj.(*PdfObjectNull); !isNull { - firstDict, ok := firstObj.(*PdfObjectDictionary) - if !ok { - return nil, fmt.Errorf("First not a dictionary object (%T)", firstObj) - } - outlineItem.First, err = this.buildOutlineTree(firstDict) - if err != nil { - return nil, err - } - } - } - return &outlineItem.PdfOutlineTreeNode, nil + + return &outlineItem.PdfOutlineTreeNode, &outlineItem.PdfOutlineTreeNode, nil } else { // Outline dictionary (structure element). - outline, err := newPdfOutlineFromDict(dict) + + outline, err := newPdfOutlineFromIndirectObject(container) if err != nil { - return nil, err + return nil, nil, err } + outline.Parent = parent + //outline.Prev = parent if firstObj, hasChildren := (*dict)["First"]; hasChildren { firstObj, err = this.traceToObject(firstObj) if err != nil { - return nil, err + return nil, nil, err } - firstObj = TraceToDirectObject(firstObj) if _, isNull := firstObj.(*PdfObjectNull); !isNull { - firstDict, ok := firstObj.(*PdfObjectDictionary) - if !ok { - return nil, fmt.Errorf("First not a dictionary object (%T)", firstObj) - } - outline.First, err = this.buildOutlineTree(firstDict) + first, last, err := this.buildOutlineTree(firstObj, &outline.PdfOutlineTreeNode, nil) if err != nil { - return nil, err + return nil, nil, err } + outline.First = first + outline.Last = last } } - return &outline.PdfOutlineTreeNode, nil + + /* + if nextObj, hasNext := (*dict)["Next"]; hasNext { + nextObj, err = this.traceToObject(nextObj) + if err != nil { + return nil, nil, err + } + if _, isNull := nextObj.(*PdfObjectNull); !isNull { + next, last, err := this.buildOutlineTree(nextObj, parent, &outline.PdfOutlineTreeNode) + if err != nil { + return nil, nil, err + } + outline.Next = next + return &outline.PdfOutlineTreeNode, last, nil + } + }*/ + + return &outline.PdfOutlineTreeNode, &outline.PdfOutlineTreeNode, nil } } @@ -494,6 +519,7 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec if err != nil { return err } + p.setContainer(node) if parent != nil { // Set the parent (in case missing or incorrect). diff --git a/pdf/model/writer.go b/pdf/model/writer.go index 35a15552..52cbdea9 100644 --- a/pdf/model/writer.go +++ b/pdf/model/writer.go @@ -154,7 +154,7 @@ func (this *PdfWriter) addObjects(obj PdfObject) error { if io, isIndirectObj := obj.(*PdfIndirectObject); isIndirectObj { common.Log.Debug("Indirect") - common.Log.Debug("- %s", obj) + common.Log.Debug("- %s (%p)", obj, io) common.Log.Debug("- %s", io.PdfObject) if this.addObject(io) { err := this.addObjects(io.PdfObject) @@ -561,8 +561,9 @@ func (this *PdfWriter) Write(ws io.WriteSeeker) error { common.Log.Debug("Write()") // Outlines. if this.outlineTree != nil { - common.Log.Debug("OutlineTree: %v", this.outlineTree) + common.Log.Debug("OutlineTree: %+v", this.outlineTree) outlines := this.outlineTree.ToPdfObject() + common.Log.Debug("Outlines: %+v (%T, p:%p)", outlines, outlines, outlines) (*this.catalog)["Outlines"] = outlines err := this.addObjects(outlines) if err != nil {