unipdf/pdf/model/reader.go

813 lines
23 KiB
Go
Raw Normal View History

/*
* This file is subject to the terms and conditions defined in
2016-07-29 17:23:39 +00:00
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"errors"
"fmt"
"io"
"strings"
2016-07-17 19:59:17 +00:00
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
)
2018-08-14 03:02:19 +00:00
// PdfReader represents a PDF file reader. It is a frontend to the lower level parsing mechanism and provides
// a higher level access to work with PDF structure and information, such as the page structure etc.
type PdfReader struct {
parser *PdfParser
root PdfObject
pages *PdfObjectDictionary
pageList []*PdfIndirectObject
PageList []*PdfPage
pageCount int
catalog *PdfObjectDictionary
2016-08-16 09:36:24 +00:00
outlineTree *PdfOutlineTreeNode
2016-09-05 09:57:16 +00:00
AcroForm *PdfAcroForm
2018-07-14 03:07:24 +00:00
modelManager *modelManager
// For tracking traversal (cache).
traversed map[PdfObject]bool
}
2018-08-14 03:02:19 +00:00
// NewPdfReader returns a new PdfReader for an input io.ReadSeeker interface. Can be used to read PDF from
// memory or file. Immediately loads and traverses the PDF structure including pages and page contents (if
// not encrypted).
func NewPdfReader(rs io.ReadSeeker) (*PdfReader, error) {
pdfReader := &PdfReader{}
pdfReader.traversed = map[PdfObject]bool{}
2018-07-14 03:07:24 +00:00
pdfReader.modelManager = newModelManager()
// Create the parser, loads the cross reference table and trailer.
parser, err := NewParser(rs)
if err != nil {
return nil, err
}
pdfReader.parser = parser
isEncrypted, err := pdfReader.IsEncrypted()
if err != nil {
return nil, err
}
// Load pdf doc structure if not encrypted.
if !isEncrypted {
err = pdfReader.loadStructure()
if err != nil {
return nil, err
}
}
return pdfReader, nil
}
// PdfVersion returns version of the PDF file.
func (this *PdfReader) PdfVersion() string {
return this.parser.PdfVersion()
}
2018-08-14 03:02:19 +00:00
// IsEncrypted returns true if the PDF file is encrypted.
func (this *PdfReader) IsEncrypted() (bool, error) {
return this.parser.IsEncrypted()
}
// GetEncryptionMethod returns a descriptive information string about the encryption method used.
func (this *PdfReader) GetEncryptionMethod() string {
crypter := this.parser.GetCrypter()
str := crypter.Filter + " - "
if crypter.V == 0 {
str += "Undocumented algorithm"
} else if crypter.V == 1 {
// RC4 or AES (bits: 40)
str += "RC4: 40 bits"
} else if crypter.V == 2 {
str += fmt.Sprintf("RC4: %d bits", crypter.Length)
} else if crypter.V == 3 {
str += "Unpublished algorithm"
} else if crypter.V >= 4 {
// Look at CF, StmF, StrF
str += fmt.Sprintf("Stream filter: %s - String filter: %s", crypter.StreamFilter, crypter.StringFilter)
str += "; Crypt filters:"
for name, cf := range crypter.CryptFilters {
str += fmt.Sprintf(" - %s: %s (%d)", name, cf.Cfm, cf.Length)
}
}
perms := crypter.GetAccessPermissions()
str += fmt.Sprintf(" - %#v", perms)
return str
}
// Decrypt decrypts the PDF file with a specified password. Also tries to
// decrypt with an empty password. Returns true if successful,
// false otherwise.
func (this *PdfReader) Decrypt(password []byte) (bool, error) {
success, err := this.parser.Decrypt(password)
if err != nil {
return false, err
}
if !success {
return false, nil
}
err = this.loadStructure()
if err != nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Fail to load structure (%s)", err)
return false, err
}
return true, nil
}
2018-08-14 03:02:19 +00:00
// CheckAccessRights checks access rights and permissions for a specified password. If either user/owner
// password is specified, full rights are granted, otherwise the access rights are specified by the
// Permissions flag.
//
// The bool flag indicates that the user can access and view the file.
// The AccessPermissions shows what access the user has for editing etc.
// An error is returned if there was a problem performing the authentication.
func (this *PdfReader) CheckAccessRights(password []byte) (bool, AccessPermissions, error) {
return this.parser.CheckAccessRights(password)
}
// Loads the structure of the pdf file: pages, outlines, etc.
func (this *PdfReader) loadStructure() error {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return fmt.Errorf("File need to be decrypted first")
}
trailerDict := this.parser.GetTrailer()
if trailerDict == nil {
return fmt.Errorf("Missing trailer")
}
// Catalog.
root, ok := trailerDict.Get("Root").(*PdfObjectReference)
if !ok {
return fmt.Errorf("Invalid Root (trailer: %s)", trailerDict)
}
oc, err := this.parser.LookupByReference(*root)
if err != nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Failed to read root element catalog: %s", err)
return err
}
pcatalog, ok := oc.(*PdfIndirectObject)
if !ok {
common.Log.Debug("ERROR: Missing catalog: (root %q) (trailer %s)", oc, *trailerDict)
return errors.New("Missing catalog")
}
catalog, ok := (*pcatalog).PdfObject.(*PdfObjectDictionary)
if !ok {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Invalid catalog (%s)", pcatalog.PdfObject)
return errors.New("Invalid catalog")
}
common.Log.Trace("Catalog: %s", catalog)
// Pages.
pagesRef, ok := catalog.Get("Pages").(*PdfObjectReference)
if !ok {
return errors.New("Pages in catalog should be a reference")
}
op, err := this.parser.LookupByReference(*pagesRef)
if err != nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Failed to read pages")
return err
}
ppages, ok := op.(*PdfIndirectObject)
if !ok {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Pages object invalid")
common.Log.Debug("op: %p", ppages)
return errors.New("Pages object invalid")
}
pages, ok := ppages.PdfObject.(*PdfObjectDictionary)
if !ok {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Pages object invalid (%s)", ppages)
return errors.New("Pages object invalid")
}
pageCount, ok := pages.Get("Count").(*PdfObjectInteger)
if !ok {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Pages count object invalid")
return errors.New("Pages count invalid")
}
this.root = root
this.catalog = catalog
this.pages = pages
this.pageCount = int(*pageCount)
this.pageList = []*PdfIndirectObject{}
traversedPageNodes := map[PdfObject]bool{}
err = this.buildPageList(ppages, nil, traversedPageNodes)
if err != nil {
return err
}
common.Log.Trace("---")
common.Log.Trace("TOC")
common.Log.Trace("Pages")
common.Log.Trace("%d: %s", len(this.pageList), this.pageList)
// Outlines.
2016-08-16 17:57:23 +00:00
this.outlineTree, err = this.loadOutlines()
2016-08-16 09:36:24 +00:00
if err != nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Failed to build outline tree (%s)", err)
2016-08-16 09:36:24 +00:00
return err
}
// Load interactive forms and fields.
this.AcroForm, err = this.loadForms()
2016-09-05 09:57:16 +00:00
if err != nil {
return err
}
return nil
}
// Trace to object. Keeps a list of already visited references to avoid circular references.
//
// Example circular reference.
// 1 0 obj << /Next 2 0 R >>
// 2 0 obj << /Next 1 0 R >>
//
func (this *PdfReader) traceToObjectWrapper(obj PdfObject, refList map[*PdfObjectReference]bool) (PdfObject, error) {
// Keep a list of references to avoid circular references.
ref, isRef := obj.(*PdfObjectReference)
if isRef {
// Make sure not already visited (circular ref).
if _, alreadyTraversed := refList[ref]; alreadyTraversed {
return nil, errors.New("Circular reference")
}
refList[ref] = true
obj, err := this.parser.LookupByReference(*ref)
if err != nil {
return nil, err
}
return this.traceToObjectWrapper(obj, refList)
}
// Not a reference, an object. Can be indirect or any direct pdf object (other than reference).
return obj, nil
}
func (this *PdfReader) traceToObject(obj PdfObject) (PdfObject, error) {
refList := map[*PdfObjectReference]bool{}
return this.traceToObjectWrapper(obj, refList)
}
2016-08-16 17:57:23 +00:00
func (this *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File need to be decrypted first")
}
2016-08-16 09:36:24 +00:00
// Has outlines? Otherwise return an empty outlines structure.
catalog := this.catalog
outlinesObj := catalog.Get("Outlines")
if outlinesObj == nil {
2016-08-19 09:13:12 +00:00
return nil, nil
}
common.Log.Trace("-Has outlines")
2016-08-16 09:36:24 +00:00
// Trace references to the object.
outlineRootObj, err := this.traceToObject(outlinesObj)
if err != nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Failed to read outlines")
return nil, err
}
common.Log.Trace("Outline root: %v", outlineRootObj)
if _, isNull := outlineRootObj.(*PdfObjectNull); isNull {
common.Log.Trace("Outline root is null - no outlines")
return nil, nil
}
outlineRoot, ok := outlineRootObj.(*PdfIndirectObject)
if !ok {
2016-08-19 09:13:12 +00:00
return nil, errors.New("Outline root should be an indirect object")
}
dict, ok := outlineRoot.PdfObject.(*PdfObjectDictionary)
if !ok {
2016-08-19 09:13:12 +00:00
return nil, errors.New("Outline indirect object should contain a dictionary")
}
common.Log.Trace("Outline root dict: %v", dict)
outlineTree, _, err := this.buildOutlineTree(outlineRoot, nil, nil)
2016-08-16 09:36:24 +00:00
if err != nil {
return nil, err
}
common.Log.Trace("Resulting outline tree: %v", outlineTree)
2016-08-16 17:57:23 +00:00
return outlineTree, nil
}
2016-08-16 17:57:23 +00:00
// Recursive build outline tree.
// prev PdfObject,
// Input: The indirect object containing an Outlines or Outline item dictionary.
// Parent, Prev are the parent or previous node in the hierarchy.
// The function returns the corresponding tree node and the last node which is used
// for setting the Last pointer of the tree node structures.
func (this *PdfReader) buildOutlineTree(obj PdfObject, parent *PdfOutlineTreeNode, prev *PdfOutlineTreeNode) (*PdfOutlineTreeNode, *PdfOutlineTreeNode, error) {
container, isInd := obj.(*PdfIndirectObject)
if !isInd {
return nil, nil, fmt.Errorf("Outline container not an indirect object %T", obj)
}
dict, ok := container.PdfObject.(*PdfObjectDictionary)
if !ok {
return nil, nil, errors.New("Not a dictionary object")
}
common.Log.Trace("build outline tree: dict: %v (%v) p: %p", dict, container, container)
if obj := dict.Get("Title"); obj != nil {
// Outline item has a title. (required)
outlineItem, err := this.newPdfOutlineItemFromIndirectObject(container)
2016-08-16 09:36:24 +00:00
if err != nil {
return nil, nil, err
2016-08-16 09:36:24 +00:00
}
outlineItem.Parent = parent
outlineItem.Prev = prev
if firstObj := dict.Get("First"); firstObj != nil {
firstObj, err = this.traceToObject(firstObj)
if err != nil {
return nil, nil, err
}
if _, isNull := firstObj.(*PdfObjectNull); !isNull {
first, last, err := this.buildOutlineTree(firstObj, &outlineItem.PdfOutlineTreeNode, nil)
2016-09-17 00:41:56 +00:00
if err != nil {
return nil, nil, err
2016-09-17 00:41:56 +00:00
}
outlineItem.First = first
outlineItem.Last = last
2016-08-16 09:36:24 +00:00
}
}
// Resolve the reference to next
if nextObj := dict.Get("Next"); nextObj != nil {
nextObj, err = this.traceToObject(nextObj)
if err != nil {
return nil, nil, err
}
if _, isNull := nextObj.(*PdfObjectNull); !isNull {
next, last, err := this.buildOutlineTree(nextObj, parent, &outlineItem.PdfOutlineTreeNode)
2016-09-17 00:41:56 +00:00
if err != nil {
return nil, nil, err
2016-09-17 00:41:56 +00:00
}
outlineItem.Next = next
return &outlineItem.PdfOutlineTreeNode, last, nil
2016-08-16 09:36:24 +00:00
}
}
return &outlineItem.PdfOutlineTreeNode, &outlineItem.PdfOutlineTreeNode, nil
2016-08-16 09:36:24 +00:00
} else {
// Outline dictionary (structure element).
outline, err := newPdfOutlineFromIndirectObject(container)
2016-08-16 09:36:24 +00:00
if err != nil {
return nil, nil, err
2016-08-16 09:36:24 +00:00
}
outline.Parent = parent
//outline.Prev = parent
2016-08-16 09:36:24 +00:00
if firstObj := dict.Get("First"); firstObj != nil {
// Has children...
firstObj, err = this.traceToObject(firstObj)
if err != nil {
return nil, nil, err
}
2016-09-17 00:41:56 +00:00
if _, isNull := firstObj.(*PdfObjectNull); !isNull {
first, last, err := this.buildOutlineTree(firstObj, &outline.PdfOutlineTreeNode, nil)
2016-09-17 00:41:56 +00:00
if err != nil {
return nil, nil, err
2016-09-17 00:41:56 +00:00
}
outline.First = first
outline.Last = last
2016-08-16 09:36:24 +00:00
}
}
/*
if nextObj, hasNext := (*dict)["Next"]; hasNext {
nextObj, err = this.traceToObject(nextObj)
if err != nil {
return nil, nil, err
}
if _, isNull := nextObj.(*PdfObjectNull); !isNull {
next, last, err := this.buildOutlineTree(nextObj, parent, &outline.PdfOutlineTreeNode)
if err != nil {
return nil, nil, err
}
outline.Next = next
return &outline.PdfOutlineTreeNode, last, nil
}
}*/
return &outline.PdfOutlineTreeNode, &outline.PdfOutlineTreeNode, nil
}
2016-08-16 09:36:24 +00:00
}
2018-08-14 03:02:19 +00:00
// GetOutlineTree returns the outline tree.
2016-08-18 18:15:57 +00:00
func (this *PdfReader) GetOutlineTree() *PdfOutlineTreeNode {
return this.outlineTree
}
2018-08-14 03:02:19 +00:00
// GetOutlinesFlattened returns a flattened list of tree nodes and titles.
func (this *PdfReader) GetOutlinesFlattened() ([]*PdfOutlineTreeNode, []string, error) {
outlineNodeList := []*PdfOutlineTreeNode{}
flattenedTitleList := []string{}
// Recursive flattening function.
var flattenFunc func(*PdfOutlineTreeNode, *[]*PdfOutlineTreeNode, *[]string, int)
flattenFunc = func(node *PdfOutlineTreeNode, outlineList *[]*PdfOutlineTreeNode, titleList *[]string, depth int) {
if node == nil {
return
}
if node.context == nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Missing node.context") // Should not happen ever.
return
}
2016-08-16 17:57:23 +00:00
if item, isItem := node.context.(*PdfOutlineItem); isItem {
*outlineList = append(*outlineList, &item.PdfOutlineTreeNode)
title := strings.Repeat(" ", depth*2) + item.Title.Str()
*titleList = append(*titleList, title)
if item.Next != nil {
flattenFunc(item.Next, outlineList, titleList, depth)
}
}
if node.First != nil {
title := strings.Repeat(" ", depth*2) + "+"
*titleList = append(*titleList, title)
flattenFunc(node.First, outlineList, titleList, depth+1)
}
}
flattenFunc(this.outlineTree, &outlineNodeList, &flattenedTitleList, 0)
return outlineNodeList, flattenedTitleList, nil
}
2018-08-14 03:02:19 +00:00
// loadForms loads the AcroForm.
func (this *PdfReader) loadForms() (*PdfAcroForm, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
2016-08-25 08:01:15 +00:00
return nil, fmt.Errorf("File need to be decrypted first")
}
// Has forms?
catalog := this.catalog
obj := catalog.Get("AcroForm")
if obj == nil {
2016-08-25 08:01:15 +00:00
// Nothing to load.
2016-09-05 09:57:16 +00:00
return nil, nil
2016-08-25 08:01:15 +00:00
}
var err error
obj, err = this.traceToObject(obj)
if err != nil {
2016-09-05 09:57:16 +00:00
return nil, err
2016-08-25 08:01:15 +00:00
}
obj = TraceToDirectObject(obj)
if _, isNull := obj.(*PdfObjectNull); isNull {
common.Log.Trace("Acroform is a null object (empty)\n")
return nil, nil
}
formsDict, ok := obj.(*PdfObjectDictionary)
2016-08-25 08:01:15 +00:00
if !ok {
common.Log.Debug("Invalid AcroForm entry %T", obj)
common.Log.Debug("Does not have forms")
2016-09-05 09:57:16 +00:00
return nil, fmt.Errorf("Invalid acroform entry %T", obj)
2016-08-25 08:01:15 +00:00
}
common.Log.Trace("Has Acro forms")
2016-08-25 08:01:15 +00:00
// Load it.
// Ensure we have access to everything.
common.Log.Trace("Traverse the Acroforms structure")
err = this.traverseObjectData(formsDict)
if err != nil {
common.Log.Debug("ERROR: Unable to traverse AcroForms (%s)", err)
return nil, err
}
// Create the acro forms object.
acroForm, err := this.newPdfAcroFormFromDict(formsDict)
if err != nil {
return nil, err
}
2016-09-05 09:57:16 +00:00
return acroForm, nil
2016-08-25 08:01:15 +00:00
}
func (this *PdfReader) lookupPageByObject(obj PdfObject) (*PdfPage, error) {
// can be indirect, direct, or reference
// look up the corresponding page
return nil, errors.New("Page not found")
}
// Build the table of contents.
// tree, ex: Pages -> Pages -> Pages -> Page
// Traverse through the whole thing recursively.
func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject, traversedPageNodes map[PdfObject]bool) error {
if node == nil {
return nil
}
if _, alreadyTraversed := traversedPageNodes[node]; alreadyTraversed {
common.Log.Debug("Cyclic recursion, skipping")
return nil
}
traversedPageNodes[node] = true
nodeDict, ok := node.PdfObject.(*PdfObjectDictionary)
if !ok {
return errors.New("Node not a dictionary")
}
objType, ok := (*nodeDict).Get("Type").(*PdfObjectName)
if !ok {
return errors.New("Node missing Type (Required)")
}
common.Log.Trace("buildPageList node type: %s", *objType)
if *objType == "Page" {
2016-08-16 09:36:24 +00:00
p, err := this.newPdfPageFromDict(nodeDict)
if err != nil {
return err
}
p.setContainer(node)
if parent != nil {
// Set the parent (in case missing or incorrect).
nodeDict.Set("Parent", parent)
}
this.pageList = append(this.pageList, node)
this.PageList = append(this.PageList, p)
return nil
}
if *objType != "Pages" {
2017-03-02 12:50:45 +00:00
common.Log.Debug("ERROR: Table of content containing non Page/Pages object! (%s)", objType)
return errors.New("Table of content containing non Page/Pages object!")
}
// A Pages object. Update the parent.
if parent != nil {
nodeDict.Set("Parent", parent)
}
// Resolve the object recursively.
err := this.traverseObjectData(node)
if err != nil {
return err
}
kidsObj, err := this.parser.Resolve(nodeDict.Get("Kids"))
if err != nil {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Failed loading Kids object")
return err
}
var kids *PdfObjectArray
kids, ok = kidsObj.(*PdfObjectArray)
if !ok {
kidsIndirect, isIndirect := kidsObj.(*PdfIndirectObject)
if !isIndirect {
return errors.New("Invalid Kids object")
}
kids, ok = kidsIndirect.PdfObject.(*PdfObjectArray)
if !ok {
return errors.New("Invalid Kids indirect object")
}
}
common.Log.Trace("Kids: %s", kids)
for idx, child := range kids.Elements() {
child, ok := child.(*PdfIndirectObject)
if !ok {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Page not indirect object - (%s)", child)
return errors.New("Page not indirect object")
}
kids.Set(idx, child)
err = this.buildPageList(child, node, traversedPageNodes)
if err != nil {
return err
}
}
return nil
}
// GetNumPages returns the number of pages in the document.
func (this *PdfReader) GetNumPages() (int, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return 0, fmt.Errorf("File need to be decrypted first")
}
return len(this.pageList), nil
}
// Resolves a reference, returning the object and indicates whether or not
// it was cached.
func (this *PdfReader) resolveReference(ref *PdfObjectReference) (PdfObject, bool, error) {
cachedObj, isCached := this.parser.ObjCache[int(ref.ObjectNumber)]
if !isCached {
common.Log.Trace("Reader Lookup ref: %s", ref)
obj, err := this.parser.LookupByReference(*ref)
if err != nil {
return nil, false, err
}
this.parser.ObjCache[int(ref.ObjectNumber)] = obj
return obj, false, nil
}
return cachedObj, true, nil
}
/*
* Recursively traverse through the page object data and look up
* references to indirect objects.
*
* GH: Are we fully protected against circular references? (Add tests).
*/
func (this *PdfReader) traverseObjectData(o PdfObject) error {
common.Log.Trace("Traverse object data")
if _, isTraversed := this.traversed[o]; isTraversed {
common.Log.Trace("-Already traversed...")
return nil
}
this.traversed[o] = true
if io, isIndirectObj := o.(*PdfIndirectObject); isIndirectObj {
common.Log.Trace("io: %s", io)
common.Log.Trace("- %s", io.PdfObject)
err := this.traverseObjectData(io.PdfObject)
return err
}
if so, isStreamObj := o.(*PdfObjectStream); isStreamObj {
err := this.traverseObjectData(so.PdfObjectDictionary)
return err
}
if dict, isDict := o.(*PdfObjectDictionary); isDict {
common.Log.Trace("- dict: %s", dict)
for _, name := range dict.Keys() {
v := dict.Get(name)
if ref, isRef := v.(*PdfObjectReference); isRef {
resolvedObj, _, err := this.resolveReference(ref)
if err != nil {
return err
}
dict.Set(name, resolvedObj)
err = this.traverseObjectData(resolvedObj)
if err != nil {
return err
}
} else {
err := this.traverseObjectData(v)
if err != nil {
return err
}
}
}
return nil
}
if arr, isArray := o.(*PdfObjectArray); isArray {
common.Log.Trace("- array: %s", arr)
for idx, v := range arr.Elements() {
if ref, isRef := v.(*PdfObjectReference); isRef {
resolvedObj, _, err := this.resolveReference(ref)
if err != nil {
return err
}
arr.Set(idx, resolvedObj)
err = this.traverseObjectData(resolvedObj)
if err != nil {
return err
}
} else {
err := this.traverseObjectData(v)
if err != nil {
return err
}
}
}
return nil
}
if _, isRef := o.(*PdfObjectReference); isRef {
2016-10-31 21:48:25 +00:00
common.Log.Debug("ERROR: Reader tracing a reference!")
return errors.New("Reader tracing a reference!")
}
return nil
}
// GetPageAsIndirectObject returns the indirect object representing a page fro a given page number.
// Indirect object with type /Page.
func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File needs to be decrypted first")
}
if len(this.pageList) < pageNumber {
return nil, errors.New("Invalid page number (page count too short)")
}
page := this.pageList[pageNumber-1]
// Look up all references related to page and load everything.
// XXX/TODO: Use of traverse object data will be limited when lazy-loading is supported.
err := this.traverseObjectData(page)
if err != nil {
return nil, err
}
common.Log.Trace("Page: %T %s", page, page)
common.Log.Trace("- %T %s", page.PdfObject, page.PdfObject)
return page, nil
}
// PageFromIndirectObject returns the PdfPage and page number for a given indirect object.
func (r *PdfReader) PageFromIndirectObject(ind *PdfIndirectObject) (*PdfPage, int, error) {
if len(r.PageList) != len(r.pageList) {
return nil, 0, errors.New("page list invalid")
}
for i, pageind := range r.pageList {
if pageind == ind {
return r.PageList[i], i + 1, nil
}
}
return nil, 0, errors.New("Page not found")
}
2018-08-14 03:02:19 +00:00
// GetPage returns the PdfPage model for the specified page number.
func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File needs to be decrypted first")
}
if len(this.pageList) < pageNumber {
return nil, errors.New("Invalid page number (page count too short)")
}
idx := pageNumber - 1
if idx < 0 {
return nil, fmt.Errorf("Page numbering must start at 1")
}
page := this.PageList[idx]
return page, nil
}
2018-08-14 03:02:19 +00:00
// GetOCProperties returns the optional content properties PdfObject.
func (this *PdfReader) GetOCProperties() (PdfObject, error) {
dict := this.catalog
obj := dict.Get("OCProperties")
var err error
obj, err = this.traceToObject(obj)
if err != nil {
return nil, err
}
// Resolve all references...
// Should be pretty safe. Should not be referencing to pages or
// any large structures. Local structures and references
// to OC Groups.
err = this.traverseObjectData(obj)
if err != nil {
return nil, err
}
return obj, nil
}
2018-08-14 03:02:19 +00:00
// Inspect inspects the object types, subtypes and content in the PDF file returning a map of
// object type to number of instances of each.
func (this *PdfReader) Inspect() (map[string]int, error) {
return this.parser.Inspect()
}
2017-08-05 00:56:05 +00:00
// GetObjectNums returns the object numbers of the PDF objects in the file
// Numbered objects are either indirect objects or stream objects.
// e.g. objNums := pdfReader.GetObjectNums()
// The underlying objects can then be accessed with
// pdfReader.GetIndirectObjectByNumber(objNums[0]) for the first available object.
func (r *PdfReader) GetObjectNums() []int {
return r.parser.GetObjectNums()
}
2018-08-14 03:02:19 +00:00
// GetIndirectObjectByNumber retrieves and returns a specific PdfObject by object number.
func (this *PdfReader) GetIndirectObjectByNumber(number int) (PdfObject, error) {
obj, err := this.parser.LookupByNumber(number)
return obj, err
}
2017-03-02 12:50:45 +00:00
2018-08-14 03:02:19 +00:00
// GetTrailer returns the PDF's trailer dictionary.
2017-03-02 12:50:45 +00:00
func (this *PdfReader) GetTrailer() (*PdfObjectDictionary, error) {
trailerDict := this.parser.GetTrailer()
if trailerDict == nil {
return nil, errors.New("Trailer missing")
}
return trailerDict, nil
}