unipdf/pdf/model/page.go
2016-09-11 23:17:38 +00:00

832 lines
21 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
//
// Allow higher level manipulation of PDF files and pages.
// This can be continously expanded to support more and more features.
// Generic handling can be done by defining elements as PdfObject which
// can later be replaced and fully defined.
//
package model
import (
"errors"
"fmt"
. "github.com/unidoc/unidoc/pdf/core"
)
// PDF page object (7.7.3.3 - Table 30).
type PdfPage struct {
Parent PdfObject
LastModified *PdfDate
Resources *PdfPageResources
CropBox *PdfRectangle
MediaBox *PdfRectangle
BleedBox *PdfRectangle
TrimBox *PdfRectangle
ArtBox *PdfRectangle
BoxColorInfo PdfObject
Contents PdfObject
Rotate *int64
Group PdfObject
Thumb PdfObject
B PdfObject
Dur PdfObject
Trans PdfObject
Annots PdfObject
AA PdfObject
Metadata PdfObject
PieceInfo PdfObject
StructParents PdfObject
ID PdfObject
PZ PdfObject
SeparationInfo PdfObject
Tabs PdfObject
TemplateInstantiated PdfObject
PresSteps PdfObject
UserUnit PdfObject
VP PdfObject
//Annotations
Annotations []*PdfAnnotation
// Primitive container.
pageDict *PdfObjectDictionary
primitive *PdfIndirectObject
}
func NewPdfPage() *PdfPage {
page := PdfPage{}
page.pageDict = &PdfObjectDictionary{}
container := PdfIndirectObject{}
container.PdfObject = page.pageDict
page.primitive = &container
return &page
}
// Build a PdfPage based on the underlying dictionary.
// Used in loading existing PDF files.
func (reader *PdfReader) newPdfPageFromDict(p *PdfObjectDictionary) (*PdfPage, error) {
page := NewPdfPage()
d := *p
pType, ok := d["Type"].(*PdfObjectName)
if !ok {
return nil, errors.New("Missing/Invalid Page dictionary Type")
}
if *pType != "Page" {
return nil, errors.New("Page dictionary Type != Page")
}
if obj, isDefined := d["Parent"]; isDefined {
page.Parent = obj
}
if obj, isDefined := d["LastModified"]; isDefined {
strObj, ok := obj.(*PdfObjectString)
if !ok {
return nil, errors.New("Page dictionary LastModified != string")
}
lastmod, err := NewPdfDate(string(*strObj))
if err != nil {
return nil, err
}
page.LastModified = &lastmod
}
if obj, isDefined := d["Resources"]; isDefined {
obj, err := reader.traceToObject(obj)
if err != nil {
return nil, err
}
dict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary)
if !ok {
return nil, fmt.Errorf("Invalid resource dictionary (%T)", obj)
}
page.Resources, err = NewPdfPageResourcesFromDict(dict)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["MediaBox"]; isDefined {
boxArr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Page MediaBox not an array")
}
var err error
page.MediaBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["CropBox"]; isDefined {
boxArr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Page CropBox not an array")
}
var err error
page.CropBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["BleedBox"]; isDefined {
boxArr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Page BleedBox not an array")
}
var err error
page.BleedBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["TrimBox"]; isDefined {
boxArr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Page TrimBox not an array")
}
var err error
page.TrimBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["ArtBox"]; isDefined {
boxArr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Page ArtBox not an array")
}
var err error
page.ArtBox, err = NewPdfRectangle(*boxArr)
if err != nil {
return nil, err
}
}
if obj, isDefined := d["BoxColorInfo"]; isDefined {
page.BoxColorInfo = obj
}
if obj, isDefined := d["Contents"]; isDefined {
page.Contents = obj
}
if obj, isDefined := d["Rotate"]; isDefined {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid Page Rotate object")
}
iVal := int64(*iObj)
page.Rotate = &iVal
}
if obj, isDefined := d["Group"]; isDefined {
page.Group = obj
}
if obj, isDefined := d["Thumb"]; isDefined {
page.Thumb = obj
}
if obj, isDefined := d["B"]; isDefined {
page.B = obj
}
if obj, isDefined := d["Dur"]; isDefined {
page.Dur = obj
}
if obj, isDefined := d["Trans"]; isDefined {
page.Trans = obj
}
if obj, isDefined := d["Annots"]; isDefined {
page.Annots = obj
}
if obj, isDefined := d["AA"]; isDefined {
page.AA = obj
}
if obj, isDefined := d["Metadata"]; isDefined {
page.Metadata = obj
}
if obj, isDefined := d["PieceInfo"]; isDefined {
page.PieceInfo = obj
}
if obj, isDefined := d["StructParents"]; isDefined {
page.StructParents = obj
}
if obj, isDefined := d["ID"]; isDefined {
page.ID = obj
}
if obj, isDefined := d["PZ"]; isDefined {
page.PZ = obj
}
if obj, isDefined := d["SeparationInfo"]; isDefined {
page.SeparationInfo = obj
}
if obj, isDefined := d["Tabs"]; isDefined {
page.Tabs = obj
}
if obj, isDefined := d["TemplateInstantiated"]; isDefined {
page.TemplateInstantiated = obj
}
if obj, isDefined := d["PresSteps"]; isDefined {
page.PresSteps = obj
}
if obj, isDefined := d["UserUnit"]; isDefined {
page.UserUnit = obj
}
if obj, isDefined := d["VP"]; isDefined {
page.VP = obj
}
var err error
page.Annotations, err = reader.LoadAnnotations(&d)
if err != nil {
return nil, err
}
fmt.Printf("Annotations: %+v\n", page.Annotations)
for i, annot := range page.Annotations {
fmt.Printf("%d : %+v (%T)\n", i+1, annot, annot.GetContext())
fmt.Printf("- %+v\n", annot.GetContext())
}
return page, nil
}
func (reader *PdfReader) LoadAnnotations(d *PdfObjectDictionary) ([]*PdfAnnotation, error) {
annotsObj, hasAnnots := (*d)["Annots"]
if !hasAnnots {
fmt.Printf("Page does not have annotations!\n")
return nil, nil
}
var err error
annotsObj, err = reader.traceToObject(annotsObj)
if err != nil {
return nil, err
}
annotsArr, ok := TraceToDirectObject(annotsObj).(*PdfObjectArray)
if !ok {
return nil, fmt.Errorf("Annots not an array")
}
annotations := []*PdfAnnotation{}
for _, obj := range *annotsArr {
obj, err = reader.traceToObject(obj)
if err != nil {
return nil, err
}
dict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary)
if !ok {
return nil, fmt.Errorf("Annotation not a dictionary")
}
annot, err := reader.newPdfAnnotationFromDict(dict)
if err != nil {
return nil, err
}
annotations = append(annotations, annot)
}
return annotations, nil
}
// Get the inheritable media box value, either from the page
// or a higher up page/pages struct.
func (this *PdfPage) GetMediaBox() (*PdfRectangle, error) {
if this.MediaBox != nil {
return this.MediaBox, nil
}
node := this.Parent
for node != nil {
dictObj, ok := node.(*PdfIndirectObject)
if !ok {
return nil, errors.New("Invalid parent object")
}
dict, ok := dictObj.PdfObject.(*PdfObjectDictionary)
if !ok {
return nil, errors.New("Invalid parent objects dictionary")
}
if obj, hasMediaBox := (*dict)["MediaBox"]; hasMediaBox {
arr, ok := obj.(*PdfObjectArray)
if !ok {
return nil, errors.New("Invalid media box")
}
rect, err := NewPdfRectangle(*arr)
if err != nil {
return nil, err
}
return rect, nil
}
node = (*dict)["Parent"]
}
return nil, errors.New("Media box not defined")
}
// Convert the Page to a PDF object dictionary.
func (this *PdfPage) GetPageDict() *PdfObjectDictionary {
p := this.pageDict
(*p)["Type"] = MakeName("Page")
(*p)["Parent"] = this.Parent
if this.LastModified != nil {
p.Set("LastModified", this.LastModified.ToPdfObject())
}
if this.Resources != nil {
p.Set("Resources", this.Resources.ToPdfObject())
}
if this.CropBox != nil {
p.Set("CropBox", this.CropBox.ToPdfObject())
}
if this.MediaBox != nil {
p.Set("MediaBox", this.MediaBox.ToPdfObject())
}
if this.BleedBox != nil {
p.Set("BleedBox", this.BleedBox.ToPdfObject())
}
if this.TrimBox != nil {
p.Set("TrimBox", this.TrimBox.ToPdfObject())
}
if this.ArtBox != nil {
p.Set("ArtBox", this.ArtBox.ToPdfObject())
}
p.SetIfNotNil("BoxColorInfo", this.BoxColorInfo)
p.SetIfNotNil("Contents", this.Contents)
if this.Rotate != nil {
p.Set("Rotate", MakeInteger(*this.Rotate))
}
p.SetIfNotNil("Group", this.Group)
p.SetIfNotNil("Thumb", this.Thumb)
p.SetIfNotNil("B", this.B)
p.SetIfNotNil("Dur", this.Dur)
p.SetIfNotNil("Trans", this.Trans)
p.SetIfNotNil("Annots2", this.Annots)
p.SetIfNotNil("AA", this.AA)
p.SetIfNotNil("Metadata", this.Metadata)
p.SetIfNotNil("PieceInfo", this.PieceInfo)
p.SetIfNotNil("StructParents", this.StructParents)
p.SetIfNotNil("ID", this.ID)
p.SetIfNotNil("PZ", this.PZ)
p.SetIfNotNil("SeparationInfo", this.SeparationInfo)
p.SetIfNotNil("Tabs", this.Tabs)
p.SetIfNotNil("TemplateInstantiated", this.TemplateInstantiated)
p.SetIfNotNil("PresSteps", this.PresSteps)
p.SetIfNotNil("UserUnit", this.UserUnit)
p.SetIfNotNil("VP", this.VP)
fmt.Printf("Writing annotations\n")
if this.Annotations != nil {
fmt.Printf("...\n")
arr := PdfObjectArray{}
for i, annot := range this.Annotations {
fmt.Printf("Annotation %d\n", i+1)
arr = append(arr, annot.GetContext().ToPdfObject())
}
p.SetIfNotNil("Annots", &arr)
}
return p
}
// Get the page object as an indirect objects. Wraps the Page
// dictionary into an indirect object.
func (this *PdfPage) GetPageAsIndirectObject() *PdfIndirectObject {
return this.primitive
}
func (this *PdfPage) GetContainingPdfObject() PdfObject {
return this.primitive
}
func (this *PdfPage) ToPdfObject() PdfObject {
container := this.primitive
this.GetPageDict() // update.
return container
}
// Add an image to the XObject resources.
func (this *PdfPage) AddImageResource(name PdfObjectName, ximg *XObjectImage) error {
if this.Resources == nil {
this.Resources = &PdfPageResources{}
}
var xresDict *PdfObjectDictionary
if this.Resources.XObject == nil {
xresDict = &PdfObjectDictionary{}
this.Resources.XObject = xresDict
} else {
var ok bool
xresDict, ok = (this.Resources.XObject).(*PdfObjectDictionary)
if !ok {
return errors.New("Invalid xres dict type")
}
}
// Make a stream object container.
(*xresDict)[name] = ximg.ToPdfObject()
return nil
}
// Add a graphics state to the XObject resources.
func (this *PdfPage) AddExtGState(name PdfObjectName, egs *PdfObjectDictionary) {
if this.Resources == nil {
this.Resources = &PdfPageResources{}
}
if this.Resources.ExtGState == nil {
this.Resources.ExtGState = &PdfObjectDictionary{}
}
egsDict := this.Resources.ExtGState.(*PdfObjectDictionary)
(*egsDict)[name] = egs
}
// Add a font dictionary to the Font resources.
func (this *PdfPage) AddFont(name PdfObjectName, font *PdfObjectDictionary) {
if this.Resources == nil {
this.Resources = &PdfPageResources{}
}
if this.Resources.Font == nil {
this.Resources.Font = &PdfObjectDictionary{}
}
fontDict := this.Resources.Font.(*PdfObjectDictionary)
(*fontDict)[name] = font
}
type WatermarkImageOptions struct {
Alpha float64
FitToWidth bool
PreserveAspectRatio bool
}
// Add a watermark to the page.
func (this *PdfPage) AddWatermarkImage(ximg *XObjectImage, opt WatermarkImageOptions) error {
bbox, err := this.GetMediaBox()
if err != nil {
return err
}
pWidth := bbox.Urx - bbox.Llx
pHeight := bbox.Ury - bbox.Lly
wWidth := float64(*ximg.Width)
xOffset := (float64(pWidth) - float64(wWidth)) / 2
if opt.FitToWidth {
wWidth = pWidth
xOffset = 0
}
wHeight := pHeight
yOffset := float64(0)
if opt.PreserveAspectRatio {
wHeight = wWidth * float64(*ximg.Height) / float64(*ximg.Width)
yOffset = (pHeight - wHeight) / 2
}
imgName := PdfObjectName("Imw0")
this.AddImageResource(imgName, ximg)
gs0 := PdfObjectDictionary{}
gs0["BM"] = MakeName("Normal")
gs0["CA"] = MakeFloat(opt.Alpha)
gs0["ca"] = MakeFloat(opt.Alpha)
this.AddExtGState("GS0", &gs0)
contentStr := fmt.Sprintf("q\n"+
"/GS0 gs\n"+
"%.0f 0 0 %.0f %.4f %.4f cm\n"+
"/%s Do\n"+
"Q", wWidth, wHeight, xOffset, yOffset, imgName)
this.AddContentStreamByString(contentStr)
return nil
}
// Add content stream by string. Puts the content string into a stream
// object and points the content stream towards it.
func (this *PdfPage) AddContentStreamByString(contentStr string) {
stream := PdfObjectStream{}
sDict := PdfObjectDictionary{}
stream.PdfObjectDictionary = &sDict
sDict["Length"] = MakeInteger(int64(len(contentStr)))
stream.Stream = []byte(contentStr)
if this.Contents == nil {
// If not set, place it directly.
this.Contents = &stream
} else if contArray, isArray := this.Contents.(*PdfObjectArray); isArray {
// If an array of content streams, append it.
*contArray = append(*contArray, &stream)
} else {
// Only 1 element in place. Wrap inside a new array and add the new one.
contArray := PdfObjectArray{}
contArray = append(contArray, this.Contents)
contArray = append(contArray, &stream)
this.Contents = &contArray
}
}
func getContentStreamAsString(cstreamObj PdfObject) (string, error) {
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectString); ok {
return string(*cstream), nil
}
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectStream); ok {
buf, err := DecodeStream(cstream)
if err != nil {
return "", err
}
return string(buf), nil
}
return "", fmt.Errorf("Invalid content stream object holder (%T)", cstreamObj)
}
// Get Content Stream as an array of strings.
func (this *PdfPage) GetContentStreams() ([]string, error) {
if this.Contents == nil {
return nil, nil
} else if contArray, isArray := this.Contents.(*PdfObjectArray); isArray {
// If an array of content streams, append it.
cstreams := []string{}
for _, cstreamObj := range *contArray {
cstreamStr, err := getContentStreamAsString(cstreamObj)
if err != nil {
return nil, err
}
cstreams = append(cstreams, cstreamStr)
}
return cstreams, nil
} else {
// Only 1 element in place. Wrap inside a new array and add the new one.
cstreamStr, err := getContentStreamAsString(this.Contents)
if err != nil {
return nil, err
}
cstreams := []string{cstreamStr}
return cstreams, nil
}
}
// Page resources model.
// Implements PdfModel.
type PdfPageResources struct {
ExtGState PdfObject
ColorSpace PdfObject
Pattern PdfObject
Shading PdfObject
XObject PdfObject
Font PdfObject
ProcSet PdfObject
// Primitive.
primitive *PdfObjectDictionary
}
func NewPdfPageResources() *PdfPageResources {
r := &PdfPageResources{}
r.primitive = &PdfObjectDictionary{}
return r
}
func NewPdfPageResourcesFromDict(dict *PdfObjectDictionary) (*PdfPageResources, error) {
r := NewPdfPageResources()
if obj, isDefined := (*dict)["ExtGState"]; isDefined {
r.ExtGState = obj
}
if obj, isDefined := (*dict)["ColorSpace"]; isDefined {
r.ColorSpace = obj
}
if obj, isDefined := (*dict)["Pattern"]; isDefined {
r.Pattern = obj
}
if obj, isDefined := (*dict)["Shading"]; isDefined {
r.Shading = obj
}
if obj, isDefined := (*dict)["XObject"]; isDefined {
r.XObject = obj
}
if obj, isDefined := (*dict)["Font"]; isDefined {
r.Font = obj
}
if obj, isDefined := (*dict)["ProcSet"]; isDefined {
r.ProcSet = obj
}
return r, nil
}
func (r *PdfPageResources) GetContainingPdfObject() PdfObject {
return r.primitive
}
func (r *PdfPageResources) ToPdfObject() PdfObject {
d := r.primitive
d.SetIfNotNil("ExtGState", r.ExtGState)
d.SetIfNotNil("ColorSpace", r.ColorSpace)
d.SetIfNotNil("Pattern", r.Pattern)
d.SetIfNotNil("Shading", r.Shading)
d.SetIfNotNil("XObject", r.XObject)
d.SetIfNotNil("Font", r.Font)
d.SetIfNotNil("ProcSet", r.ProcSet)
return d
}
// XObjectImage (Table 89 in 8.9.5.1).
// Implements PdfModel interface.
type XObjectImage struct {
Width *int64
Height *int64
ColorSpace PdfObject
BitsPerComponent *int64
Intent PdfObject
ImageMask PdfObject
Mask PdfObject
Decode PdfObject
Interpolate PdfObject
Alternatives PdfObject
SMask PdfObject
SMaskInData PdfObject
Name PdfObject
StructParent PdfObject
ID PdfObject
OPI PdfObject
Metadata PdfObject
OC PdfObject
Stream []byte
// Primitive
primitive *PdfObjectStream
}
func NewXObjectImage() *XObjectImage {
xobj := &XObjectImage{}
stream := &PdfObjectStream{}
stream.PdfObjectDictionary = &PdfObjectDictionary{}
xobj.primitive = stream
return xobj
}
// Creates a new XObject Image from an image object with default
// options.
func NewXObjectImageFromImage(name PdfObjectName, img *Image) (*XObjectImage, error) {
xobj := NewXObjectImage()
xobj.Name = &name
xobj.Stream = img.Data.Bytes()
// Width and height.
imWidth := img.Width
imHeight := img.Height
xobj.Width = &imWidth
xobj.Height = &imHeight
// Bits.
bitDepth := int64(8)
xobj.BitsPerComponent = &bitDepth
xobj.ColorSpace = MakeName("DeviceRGB")
return xobj, nil
}
// Build the image xobject from a stream object.
func NewXObjectImageFromStream(stream PdfObjectStream) (*XObjectImage, error) {
img := NewXObjectImage()
dict := *(stream.PdfObjectDictionary)
if obj, isDefined := dict["Width"]; isDefined {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid image width object")
}
iVal := int64(*iObj)
img.Width = &iVal
}
if obj, isDefined := dict["Height"]; isDefined {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid image height object")
}
iVal := int64(*iObj)
img.Height = &iVal
}
if obj, isDefined := dict["ColorSpace"]; isDefined {
img.ColorSpace = obj
}
if obj, isDefined := dict["BitsPerComponent"]; isDefined {
iObj, ok := obj.(*PdfObjectInteger)
if !ok {
return nil, errors.New("Invalid image height object")
}
iVal := int64(*iObj)
img.BitsPerComponent = &iVal
}
if obj, isDefined := dict["Intent"]; isDefined {
img.Intent = obj
}
if obj, isDefined := dict["ImageMask"]; isDefined {
img.ImageMask = obj
}
if obj, isDefined := dict["Mask"]; isDefined {
img.Mask = obj
}
if obj, isDefined := dict["Decode"]; isDefined {
img.Decode = obj
}
if obj, isDefined := dict["Interpolate"]; isDefined {
img.Interpolate = obj
}
if obj, isDefined := dict["Alternatives"]; isDefined {
img.Alternatives = obj
}
if obj, isDefined := dict["SMask"]; isDefined {
img.SMask = obj
}
if obj, isDefined := dict["SMaskInData"]; isDefined {
img.SMaskInData = obj
}
if obj, isDefined := dict["Name"]; isDefined {
img.Name = obj
}
if obj, isDefined := dict["StructParent"]; isDefined {
img.StructParent = obj
}
if obj, isDefined := dict["ID"]; isDefined {
img.ID = obj
}
if obj, isDefined := dict["OPI"]; isDefined {
img.OPI = obj
}
if obj, isDefined := dict["Metadata"]; isDefined {
img.Metadata = obj
}
if obj, isDefined := dict["OC"]; isDefined {
img.OC = obj
}
img.Stream = stream.Stream
return img, nil
}
func (ximg *XObjectImage) GetContainingPdfObject() PdfObject {
return ximg.primitive
}
// Return a stream object.
func (ximg *XObjectImage) ToPdfObject() PdfObject {
stream := ximg.primitive
stream.Stream = ximg.Stream
dict := stream.PdfObjectDictionary
dict.Set("Type", MakeName("XObject"))
dict.Set("Subtype", MakeName("Image"))
dict.Set("Width", MakeInteger(*(ximg.Width)))
dict.Set("Height", MakeInteger(*(ximg.Height)))
dict.Set("Filter", MakeName("DCTDecode"))
if ximg.BitsPerComponent != nil {
dict.Set("BitsPerComponent", MakeInteger(*(ximg.BitsPerComponent)))
}
dict.SetIfNotNil("ColorSpace", ximg.ColorSpace)
dict.SetIfNotNil("Intent", ximg.Intent)
dict.SetIfNotNil("ImageMask", ximg.ImageMask)
dict.SetIfNotNil("Mask", ximg.Mask)
dict.SetIfNotNil("Decode", ximg.Decode)
dict.SetIfNotNil("Interpolate", ximg.Interpolate)
dict.SetIfNotNil("Alternatives", ximg.Alternatives)
dict.SetIfNotNil("SMask", ximg.SMask)
dict.SetIfNotNil("SMaskInData", ximg.SMaskInData)
dict.SetIfNotNil("Name", ximg.Name)
dict.SetIfNotNil("StructParent", ximg.StructParent)
dict.SetIfNotNil("ID", ximg.ID)
dict.SetIfNotNil("OPI", ximg.OPI)
dict.SetIfNotNil("Metadata", ximg.Metadata)
dict.SetIfNotNil("OC", ximg.OC)
dict.Set("Length", MakeInteger(int64(len(ximg.Stream))))
stream.Stream = ximg.Stream
return stream
}