unipdf/pdf/model/page.go

835 lines
20 KiB
Go
Raw Normal View History

/*
* This file is subject to the terms and conditions defined in
2016-07-29 17:23:39 +00:00
* file 'LICENSE.md', which is part of this source code package.
*/
//
// Allow higher level manipulation of PDF files and pages.
// This can be continuously expanded to support more and more features.
// Generic handling can be done by defining elements as PdfObject which
// can later be replaced and fully defined.
//
package model
import (
"errors"
2016-12-13 21:42:27 +00:00
"fmt"
"strings"
. "github.com/unidoc/unidoc/pdf/core"
)
// PDF page object (7.7.3.3 - Table 30).
type PdfPage struct {
Parent PdfObject
LastModified *PdfDate
Resources *PdfPageResources
CropBox *PdfRectangle
MediaBox *PdfRectangle
BleedBox *PdfRectangle
TrimBox *PdfRectangle
ArtBox *PdfRectangle
BoxColorInfo PdfObject
Contents PdfObject
Rotate *int64
Group PdfObject
Thumb PdfObject
B PdfObject
Dur PdfObject
Trans PdfObject
AA PdfObject
Metadata PdfObject
PieceInfo PdfObject
StructParents PdfObject
ID PdfObject
PZ PdfObject
SeparationInfo PdfObject
Tabs PdfObject
TemplateInstantiated PdfObject
PresSteps PdfObject
UserUnit PdfObject
VP PdfObject
2016-09-07 17:56:45 +00:00
Annotations []*PdfAnnotation
// Primitive container.
pageDict *PdfObjectDictionary
primitive *PdfIndirectObject
}
func NewPdfPage() *PdfPage {
page := PdfPage{}
page.pageDict = &PdfObjectDictionary{}
container := PdfIndirectObject{}
container.PdfObject = page.pageDict
page.primitive = &container
return &page
}
func (this *PdfPage) setContainer(container *PdfIndirectObject) {
container.PdfObject = this.pageDict
this.primitive = container
}
// Build a PdfPage based on the underlying dictionary.
// Used in loading existing PDF files.
// Note that a new container is created (indirect object).
2016-08-16 09:36:24 +00:00
func (reader *PdfReader) newPdfPageFromDict(p *PdfObjectDictionary) (*PdfPage, error) {
page := NewPdfPage()
page.pageDict = p //XXX?
d := *p
pType, ok := d["Type"].(*PdfObjectName)
if !ok {
return nil, errors.New("Missing/Invalid Page dictionary Type")
}
if *pType != "Page" {
return nil, errors.New("Page dictionary Type != Page")
}
if obj, isDefined := d["Parent"]; isDefined {
page.Parent = obj
}
if obj, isDefined := d["LastModified"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
strObj, ok := TraceToDirectObject(obj).(*PdfObjectString)
if !ok {
return nil, errors.New("Page dictionary LastModified != string")
}
lastmod, err := NewPdfDate(string(*strObj))
if err != nil {
return nil, err
}
page.LastModified = &lastmod
}
if obj, isDefined := d["Resources"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
2016-08-16 09:36:24 +00:00
if err != nil {
return nil, err
}
dict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary)
2016-07-25 14:06:37 +00:00
if !ok {
2016-08-16 09:36:24 +00:00
return nil, fmt.Errorf("Invalid resource dictionary (%T)", obj)
2016-07-25 14:06:37 +00:00
}
page.Resources, err = NewPdfPageResourcesFromDict(dict)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["MediaBox"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
boxArr, ok := TraceToDirectObject(obj).(*PdfObjectArray)
if !ok {
return nil, errors.New("Page MediaBox not an array")
}
page.MediaBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["CropBox"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
boxArr, ok := TraceToDirectObject(obj).(*PdfObjectArray)
if !ok {
return nil, errors.New("Page CropBox not an array")
}
page.CropBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["BleedBox"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
boxArr, ok := TraceToDirectObject(obj).(*PdfObjectArray)
if !ok {
return nil, errors.New("Page BleedBox not an array")
}
page.BleedBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["TrimBox"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
boxArr, ok := TraceToDirectObject(obj).(*PdfObjectArray)
if !ok {
return nil, errors.New("Page TrimBox not an array")
}
page.TrimBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["ArtBox"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
boxArr, ok := TraceToDirectObject(obj).(*PdfObjectArray)
if !ok {
return nil, errors.New("Page ArtBox not an array")
}
page.ArtBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["BoxColorInfo"]; isDefined {
page.BoxColorInfo = obj
}
if obj, isDefined := d["Contents"]; isDefined {
page.Contents = obj
}
if obj, isDefined := d["Rotate"]; isDefined {
var err error
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
iObj, ok := TraceToDirectObject(obj).(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid Page Rotate object")
}
iVal := int64(*iObj)
page.Rotate = &iVal
}
if obj, isDefined := d["Group"]; isDefined {
page.Group = obj
}
if obj, isDefined := d["Thumb"]; isDefined {
page.Thumb = obj
}
if obj, isDefined := d["B"]; isDefined {
page.B = obj
}
if obj, isDefined := d["Dur"]; isDefined {
page.Dur = obj
}
if obj, isDefined := d["Trans"]; isDefined {
page.Trans = obj
}
//if obj, isDefined := d["Annots"]; isDefined {
// page.Annots = obj
//}
if obj, isDefined := d["AA"]; isDefined {
page.AA = obj
}
if obj, isDefined := d["Metadata"]; isDefined {
page.Metadata = obj
}
if obj, isDefined := d["PieceInfo"]; isDefined {
page.PieceInfo = obj
}
if obj, isDefined := d["StructParents"]; isDefined {
page.StructParents = obj
}
if obj, isDefined := d["ID"]; isDefined {
page.ID = obj
}
if obj, isDefined := d["PZ"]; isDefined {
page.PZ = obj
}
if obj, isDefined := d["SeparationInfo"]; isDefined {
page.SeparationInfo = obj
}
if obj, isDefined := d["Tabs"]; isDefined {
page.Tabs = obj
}
if obj, isDefined := d["TemplateInstantiated"]; isDefined {
page.TemplateInstantiated = obj
}
if obj, isDefined := d["PresSteps"]; isDefined {
page.PresSteps = obj
}
if obj, isDefined := d["UserUnit"]; isDefined {
page.UserUnit = obj
}
if obj, isDefined := d["VP"]; isDefined {
page.VP = obj
}
2016-09-07 17:56:45 +00:00
var err error
page.Annotations, err = reader.LoadAnnotations(&d)
if err != nil {
return nil, err
}
return page, nil
}
2016-09-07 17:56:45 +00:00
func (reader *PdfReader) LoadAnnotations(d *PdfObjectDictionary) ([]*PdfAnnotation, error) {
annotsObj, hasAnnots := (*d)["Annots"]
if !hasAnnots {
return nil, nil
}
var err error
annotsObj, err = reader.traceToObject(annotsObj)
if err != nil {
return nil, err
}
annotsArr, ok := TraceToDirectObject(annotsObj).(*PdfObjectArray)
if !ok {
return nil, fmt.Errorf("Annots not an array")
}
annotations := []*PdfAnnotation{}
for _, obj := range *annotsArr {
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
// Technically all annotation dictionaries should be inside indirect objects.
// In reality, sometimes the annotation dictionary is inline within the Annots array.
if _, isNull := obj.(*PdfObjectNull); isNull {
// Can safely ignore.
continue
2016-09-07 17:56:45 +00:00
}
annotDict, isDict := obj.(*PdfObjectDictionary)
indirectObj, isIndirect := obj.(*PdfIndirectObject)
if isDict {
// Create a container; indirect object; around the dictionary.
indirectObj = &PdfIndirectObject{}
indirectObj.PdfObject = annotDict
} else {
if !isIndirect {
return nil, fmt.Errorf("Annotation not in an indirect object")
}
}
annot, err := reader.newPdfAnnotationFromIndirectObject(indirectObj)
2016-09-07 17:56:45 +00:00
if err != nil {
return nil, err
}
annotations = append(annotations, annot)
}
return annotations, nil
}
// Get the inheritable media box value, either from the page
// or a higher up page/pages struct.
func (this *PdfPage) GetMediaBox() (*PdfRectangle, error) {
if this.MediaBox != nil {
return this.MediaBox, nil
}
node := this.Parent
for node != nil {
dictObj, ok := node.(*PdfIndirectObject)
if !ok {
return nil, errors.New("Invalid parent object")
}
dict, ok := dictObj.PdfObject.(*PdfObjectDictionary)
if !ok {
return nil, errors.New("Invalid parent objects dictionary")
}
if obj, hasMediaBox := (*dict)["MediaBox"]; hasMediaBox {
arr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Invalid media box")
}
rect, err := NewPdfRectangle(*arr)
if err != nil {
return nil, err
}
return rect, nil
}
node = (*dict)["Parent"]
}
return nil, errors.New("Media box not defined")
}
// Get the inheritable resources, either from the page or
// or a higher up page/pages struct.
func (this *PdfPage) GetResources() (*PdfPageResources, error) {
if this.Resources != nil {
return this.Resources, nil
}
node := this.Parent
for node != nil {
dictObj, ok := node.(*PdfIndirectObject)
if !ok {
return nil, errors.New("Invalid parent object")
}
dict, ok := dictObj.PdfObject.(*PdfObjectDictionary)
if !ok {
return nil, errors.New("Invalid parent objects dictionary")
}
if obj, hasResources := (*dict)["Resources"]; hasResources {
prDict, ok := obj.(*PdfObjectDictionary)
if !ok {
return nil, errors.New("Invalid resource dict!")
}
resources, err := NewPdfPageResourcesFromDict(prDict)
if err != nil {
return nil, err
}
return resources, nil
}
// Keep moving up the tree...
node = (*dict)["Parent"]
}
// No resources defined...
return nil, nil
}
// Convert the Page to a PDF object dictionary.
func (this *PdfPage) GetPageDict() *PdfObjectDictionary {
p := this.pageDict
2016-07-25 14:06:37 +00:00
(*p)["Type"] = MakeName("Page")
(*p)["Parent"] = this.Parent
if this.LastModified != nil {
p.Set("LastModified", this.LastModified.ToPdfObject())
}
2016-07-25 14:06:37 +00:00
if this.Resources != nil {
p.Set("Resources", this.Resources.ToPdfObject())
}
if this.CropBox != nil {
p.Set("CropBox", this.CropBox.ToPdfObject())
}
if this.MediaBox != nil {
p.Set("MediaBox", this.MediaBox.ToPdfObject())
}
if this.BleedBox != nil {
p.Set("BleedBox", this.BleedBox.ToPdfObject())
}
if this.TrimBox != nil {
p.Set("TrimBox", this.TrimBox.ToPdfObject())
}
if this.ArtBox != nil {
p.Set("ArtBox", this.ArtBox.ToPdfObject())
}
p.SetIfNotNil("BoxColorInfo", this.BoxColorInfo)
p.SetIfNotNil("Contents", this.Contents)
if this.Rotate != nil {
2016-07-25 14:06:37 +00:00
p.Set("Rotate", MakeInteger(*this.Rotate))
}
p.SetIfNotNil("Group", this.Group)
p.SetIfNotNil("Thumb", this.Thumb)
p.SetIfNotNil("B", this.B)
p.SetIfNotNil("Dur", this.Dur)
p.SetIfNotNil("Trans", this.Trans)
p.SetIfNotNil("AA", this.AA)
p.SetIfNotNil("Metadata", this.Metadata)
p.SetIfNotNil("PieceInfo", this.PieceInfo)
p.SetIfNotNil("StructParents", this.StructParents)
p.SetIfNotNil("ID", this.ID)
p.SetIfNotNil("PZ", this.PZ)
p.SetIfNotNil("SeparationInfo", this.SeparationInfo)
p.SetIfNotNil("Tabs", this.Tabs)
p.SetIfNotNil("TemplateInstantiated", this.TemplateInstantiated)
p.SetIfNotNil("PresSteps", this.PresSteps)
p.SetIfNotNil("UserUnit", this.UserUnit)
p.SetIfNotNil("VP", this.VP)
2016-09-07 17:56:45 +00:00
if this.Annotations != nil {
arr := PdfObjectArray{}
for _, annot := range this.Annotations {
if subannot := annot.GetContext(); subannot != nil {
arr = append(arr, subannot.ToPdfObject())
} else {
// Generic annotation dict (without subtype).
arr = append(arr, annot.ToPdfObject())
}
2016-09-07 17:56:45 +00:00
}
p.Set("Annots", &arr)
2016-09-07 17:56:45 +00:00
}
return p
}
2016-07-25 14:06:37 +00:00
// Get the page object as an indirect objects. Wraps the Page
// dictionary into an indirect object.
func (this *PdfPage) GetPageAsIndirectObject() *PdfIndirectObject {
return this.primitive
2016-07-25 14:06:37 +00:00
}
func (this *PdfPage) GetContainingPdfObject() PdfObject {
return this.primitive
}
func (this *PdfPage) ToPdfObject() PdfObject {
container := this.primitive
this.GetPageDict() // update.
return container
}
2016-07-25 14:06:37 +00:00
// Add an image to the XObject resources.
func (this *PdfPage) AddImageResource(name PdfObjectName, ximg *XObjectImage) error {
if this.Resources == nil {
// Get the resources (is required, should be there..), can be defined by parent object.
resources, err := this.GetResources()
if err != nil {
return err
}
if resources != nil {
this.Resources = resources
} else {
this.Resources = NewPdfPageResources()
}
2016-07-25 14:06:37 +00:00
}
2016-07-25 14:06:37 +00:00
var xresDict *PdfObjectDictionary
if this.Resources.XObject == nil {
xresDict = &PdfObjectDictionary{}
this.Resources.XObject = xresDict
} else {
var ok bool
xresDict, ok = (this.Resources.XObject).(*PdfObjectDictionary)
if !ok {
return errors.New("Invalid xres dict type")
}
}
// Make a stream object container.
(*xresDict)[name] = ximg.ToPdfObject()
return nil
}
// Check if has image resource by name.
func (this *PdfPage) HasImageResource(name PdfObjectName) bool {
resources, err := this.GetResources()
if err != nil {
return false
}
xresDict, has := resources.XObject.(*PdfObjectDictionary)
if !has {
return false
}
if _, has := (*xresDict)[name]; has {
return true
} else {
return false
}
}
// Check if has font resource by name.
func (this *PdfPage) HasFontByName(name PdfObjectName) bool {
resources, err := this.GetResources()
if err != nil {
return false
}
fontDict, has := resources.Font.(*PdfObjectDictionary)
if !has {
return false
}
if _, has := (*fontDict)[name]; has {
return true
} else {
return false
}
}
2016-07-25 14:06:37 +00:00
// Add a graphics state to the XObject resources.
func (this *PdfPage) AddExtGState(name PdfObjectName, egs *PdfObjectDictionary) {
if this.Resources == nil {
//this.Resources = &PdfPageResources{}
this.Resources = NewPdfPageResources()
2016-07-25 14:06:37 +00:00
}
if this.Resources.ExtGState == nil {
this.Resources.ExtGState = &PdfObjectDictionary{}
}
egsDict := this.Resources.ExtGState.(*PdfObjectDictionary)
(*egsDict)[name] = egs
}
2016-07-30 00:27:21 +00:00
// Add a font dictionary to the Font resources.
func (this *PdfPage) AddFont(name PdfObjectName, font *PdfObjectDictionary) {
if this.Resources == nil {
//this.Resources = &PdfPageResources{}
this.Resources = NewPdfPageResources()
2016-07-30 00:27:21 +00:00
}
if this.Resources.Font == nil {
this.Resources.Font = &PdfObjectDictionary{}
}
fontDict := this.Resources.Font.(*PdfObjectDictionary)
(*fontDict)[name] = font
}
type WatermarkImageOptions struct {
Alpha float64
FitToWidth bool
2016-07-30 00:27:21 +00:00
PreserveAspectRatio bool
}
// Add a watermark to the page.
func (this *PdfPage) AddWatermarkImage(ximg *XObjectImage, opt WatermarkImageOptions) error {
bbox, err := this.GetMediaBox()
if err != nil {
return err
}
pWidth := bbox.Urx - bbox.Llx
pHeight := bbox.Ury - bbox.Lly
wWidth := float64(*ximg.Width)
xOffset := (float64(pWidth) - float64(wWidth)) / 2
if opt.FitToWidth {
wWidth = pWidth
xOffset = 0
}
2016-07-30 00:27:21 +00:00
wHeight := pHeight
yOffset := float64(0)
if opt.PreserveAspectRatio {
wHeight = wWidth * float64(*ximg.Height) / float64(*ximg.Width)
yOffset = (pHeight - wHeight) / 2
}
imgName := PdfObjectName("Imw0")
this.AddImageResource(imgName, ximg)
gs0 := PdfObjectDictionary{}
gs0["BM"] = MakeName("Normal")
gs0["CA"] = MakeFloat(opt.Alpha)
gs0["ca"] = MakeFloat(opt.Alpha)
this.AddExtGState("GS0", &gs0)
contentStr := fmt.Sprintf("q\n"+
"/GS0 gs\n"+
"%.0f 0 0 %.0f %.4f %.4f cm\n"+
2016-07-30 00:27:21 +00:00
"/%s Do\n"+
"Q", wWidth, wHeight, xOffset, yOffset, imgName)
2016-07-30 00:27:21 +00:00
this.AddContentStreamByString(contentStr)
return nil
}
2016-07-25 14:06:37 +00:00
// Add content stream by string. Puts the content string into a stream
// object and points the content stream towards it.
func (this *PdfPage) AddContentStreamByString(contentStr string) {
stream := PdfObjectStream{}
sDict := PdfObjectDictionary{}
stream.PdfObjectDictionary = &sDict
sDict["Length"] = MakeInteger(int64(len(contentStr)))
stream.Stream = []byte(contentStr)
2016-07-30 00:27:21 +00:00
if this.Contents == nil {
// If not set, place it directly.
this.Contents = &stream
} else if contArray, isArray := this.Contents.(*PdfObjectArray); isArray {
// If an array of content streams, append it.
*contArray = append(*contArray, &stream)
} else {
// Only 1 element in place. Wrap inside a new array and add the new one.
contArray := PdfObjectArray{}
contArray = append(contArray, this.Contents)
contArray = append(contArray, &stream)
this.Contents = &contArray
}
}
// Set the content streams based on a string array. Will make 1 object stream
// for each string and reference from the page Contents. Each stream will be
// encoded using the encoding specified by the StreamEncoder, if empty, will
// use identity encoding (raw data).
func (this *PdfPage) SetContentStreams(cStreams []string, encoder StreamEncoder) error {
if len(cStreams) == 0 {
this.Contents = nil
return nil
}
// If encoding is not set, use default raw encoder.
if encoder == nil {
encoder = NewRawEncoder()
}
streamObjs := []*PdfObjectStream{}
for _, cStream := range cStreams {
stream := &PdfObjectStream{}
// Make a new stream dict based on the encoding parameters.
sDict := encoder.MakeStreamDict()
encoded, err := encoder.EncodeBytes([]byte(cStream))
if err != nil {
return err
}
2016-07-30 00:27:21 +00:00
(*sDict)["Length"] = MakeInteger(int64(len(encoded)))
stream.PdfObjectDictionary = sDict
stream.Stream = []byte(encoded)
streamObjs = append(streamObjs, stream)
}
// Set the page contents.
// Point directly to the object stream if only one, or embed in an array.
if len(streamObjs) == 1 {
this.Contents = streamObjs[0]
} else {
contArray := PdfObjectArray{}
for _, streamObj := range streamObjs {
contArray = append(contArray, streamObj)
}
this.Contents = &contArray
}
return nil
2016-07-25 14:06:37 +00:00
}
2016-08-22 08:46:18 +00:00
func getContentStreamAsString(cstreamObj PdfObject) (string, error) {
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectString); ok {
return string(*cstream), nil
}
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectStream); ok {
buf, err := DecodeStream(cstream)
2016-08-22 08:46:18 +00:00
if err != nil {
return "", err
}
return string(buf), nil
}
return "", fmt.Errorf("Invalid content stream object holder (%T)", cstreamObj)
}
// Get Content Stream as an array of strings.
func (this *PdfPage) GetContentStreams() ([]string, error) {
if this.Contents == nil {
return nil, nil
}
contents := TraceToDirectObject(this.Contents)
if contArray, isArray := contents.(*PdfObjectArray); isArray {
2016-08-22 08:46:18 +00:00
// If an array of content streams, append it.
cstreams := []string{}
for _, cstreamObj := range *contArray {
cstreamStr, err := getContentStreamAsString(cstreamObj)
if err != nil {
return nil, err
}
cstreams = append(cstreams, cstreamStr)
}
return cstreams, nil
} else {
// Only 1 element in place. Wrap inside a new array and add the new one.
cstreamStr, err := getContentStreamAsString(contents)
2016-08-22 08:46:18 +00:00
if err != nil {
return nil, err
}
cstreams := []string{cstreamStr}
return cstreams, nil
}
}
// Get all the content streams for a page as one string.
func (this *PdfPage) GetAllContentStreams() (string, error) {
cstreams, err := this.GetContentStreams()
if err != nil {
return "", err
}
return strings.Join(cstreams, " "), nil
}
// Needs to have matching name and colorspace map entry. The Names define the order.
type PdfPageResourcesColorspaces struct {
Names []string
Colorspaces map[string]PdfColorspace
container *PdfIndirectObject
}
func newPdfPageResourcesColorspacesFromPdfObject(obj PdfObject) (*PdfPageResourcesColorspaces, error) {
colorspaces := &PdfPageResourcesColorspaces{}
if indObj, isIndirect := obj.(*PdfIndirectObject); isIndirect {
colorspaces.container = indObj
obj = indObj.PdfObject
}
dict, ok := obj.(*PdfObjectDictionary)
if !ok {
return nil, errors.New("CS attribute type error")
}
colorspaces.Names = []string{}
colorspaces.Colorspaces = map[string]PdfColorspace{}
for csName, csObj := range *dict {
colorspaces.Names = append(colorspaces.Names, string(csName))
cs, err := newPdfColorspaceFromPdfObject(csObj)
if err != nil {
return nil, err
}
colorspaces.Colorspaces[string(csName)] = cs
}
return colorspaces, nil
}
func (this *PdfPageResourcesColorspaces) ToPdfObject() PdfObject {
dict := &PdfObjectDictionary{}
for _, csName := range this.Names {
(*dict)[PdfObjectName(csName)] = this.Colorspaces[csName].ToPdfObject()
}
if this.container != nil {
this.container.PdfObject = dict
return this.container
}
return dict
}