mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
420 lines
11 KiB
Go
420 lines
11 KiB
Go
![]() |
/*
|
||
|
* This file is subject to the terms and conditions defined in
|
||
|
* file 'LICENSE.txt', which is part of this source code package.
|
||
|
*/
|
||
|
|
||
|
//
|
||
|
// Allow higher level manipulation of PDF files and pages.
|
||
|
// This can be continously expanded to support more and more features.
|
||
|
// Generic handling can be done by defining elements as PdfObject which
|
||
|
// can later be replaced and fully defined.
|
||
|
//
|
||
|
|
||
|
package pdf
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"regexp"
|
||
|
"strconv"
|
||
|
)
|
||
|
|
||
|
type PdfRectangle struct {
|
||
|
Llx float64 // Lower left corner (ll).
|
||
|
Lly float64
|
||
|
Urx float64 // Upper right corner (ur).
|
||
|
Ury float64
|
||
|
}
|
||
|
|
||
|
func getNumberAsFloat(obj PdfObject) (float64, error) {
|
||
|
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
||
|
return float64(*fObj), nil
|
||
|
}
|
||
|
|
||
|
if iObj, ok := obj.(*PdfObjectInteger); ok {
|
||
|
return float64(*iObj), nil
|
||
|
}
|
||
|
|
||
|
return 0, errors.New("Not a number")
|
||
|
}
|
||
|
|
||
|
// Create a PDF rectangle object based on an input array of 4 integers.
|
||
|
// Defining the lower left (LL) and upper right (UR) corners with
|
||
|
// floating point numbers.
|
||
|
func NewPdfRectangle(arr PdfObjectArray) (*PdfRectangle, error) {
|
||
|
rect := PdfRectangle{}
|
||
|
if len(arr) != 4 {
|
||
|
return nil, errors.New("Invalid rectangle array, len != 4")
|
||
|
}
|
||
|
|
||
|
var err error
|
||
|
rect.Llx, err = getNumberAsFloat(arr[0])
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
rect.Lly, err = getNumberAsFloat(arr[1])
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
rect.Urx, err = getNumberAsFloat(arr[2])
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
rect.Ury, err = getNumberAsFloat(arr[3])
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return &rect, nil
|
||
|
}
|
||
|
|
||
|
// Convert to a PDF object.
|
||
|
func (rect *PdfRectangle) ToPdfObject() PdfObject {
|
||
|
arr := PdfObjectArray{}
|
||
|
arr = append(arr, makeFloat(rect.Llx))
|
||
|
arr = append(arr, makeFloat(rect.Lly))
|
||
|
arr = append(arr, makeFloat(rect.Urx))
|
||
|
arr = append(arr, makeFloat(rect.Ury))
|
||
|
return &arr
|
||
|
}
|
||
|
|
||
|
// A date is a PDF string of the form:
|
||
|
// (D:YYYYMMDDHHmmSSOHH'mm)
|
||
|
type PdfDate struct {
|
||
|
year int64 // YYYY
|
||
|
month int64 // MM (01-12)
|
||
|
day int64 // DD (01-31)
|
||
|
hour int64 // HH (00-23)
|
||
|
minute int64 // mm (00-59)
|
||
|
second int64 // SS (00-59)
|
||
|
utOffsetSign byte // O ('+' / '-' / 'Z')
|
||
|
utOffsetHours int64 // HH' (00-23 followed by ')
|
||
|
utOffsetMins int64 // mm (00-59)
|
||
|
}
|
||
|
|
||
|
var reDate = regexp.MustCompile(`\s*D\s*:\s*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})([+-Z])(\d{2})'(\d{2})`)
|
||
|
|
||
|
// Make a new PdfDate object from a PDF date string (see 7.9.4 Dates).
|
||
|
// format: "D: YYYYMMDDHHmmSSOHH'mm"
|
||
|
func NewPdfDate(dateStr string) (PdfDate, error) {
|
||
|
d := PdfDate{}
|
||
|
|
||
|
matches := reDate.FindAllStringSubmatch(dateStr, 1)
|
||
|
if len(matches) < 1 {
|
||
|
return d, errors.New("Invalid date string")
|
||
|
}
|
||
|
if len(matches[0]) != 10 {
|
||
|
return d, errors.New("Invalid regexp group match length != 10")
|
||
|
}
|
||
|
|
||
|
// No need to handle err from ParseInt, as pre-validated via regexp.
|
||
|
d.year, _ = strconv.ParseInt(matches[0][1], 10, 32)
|
||
|
d.month, _ = strconv.ParseInt(matches[0][2], 10, 32)
|
||
|
d.day, _ = strconv.ParseInt(matches[0][3], 10, 32)
|
||
|
d.hour, _ = strconv.ParseInt(matches[0][4], 10, 32)
|
||
|
d.minute, _ = strconv.ParseInt(matches[0][5], 10, 32)
|
||
|
d.second, _ = strconv.ParseInt(matches[0][6], 10, 32)
|
||
|
d.utOffsetSign = matches[0][7][0]
|
||
|
d.utOffsetHours, _ = strconv.ParseInt(matches[0][8], 10, 32)
|
||
|
d.utOffsetMins, _ = strconv.ParseInt(matches[0][9], 10, 32)
|
||
|
|
||
|
return d, nil
|
||
|
}
|
||
|
|
||
|
// Convert to a PDF string object.
|
||
|
func (date *PdfDate) ToPdfObject() PdfObject {
|
||
|
str := fmt.Sprintf("D:%.4d%.2d%.2d%.2d%.2d%.2d%c%.2d'%.2d'",
|
||
|
date.year, date.month, date.day, date.hour, date.minute, date.second,
|
||
|
date.utOffsetSign, date.utOffsetHours, date.utOffsetMins)
|
||
|
pdfStr := PdfObjectString(str)
|
||
|
return &pdfStr
|
||
|
}
|
||
|
|
||
|
// PDF page object (7.7.3.3 - Table 30).
|
||
|
type PdfPage struct {
|
||
|
Parent PdfObject
|
||
|
LastModified *PdfDate
|
||
|
Resources PdfObject
|
||
|
CropBox *PdfRectangle
|
||
|
MediaBox *PdfRectangle
|
||
|
BleedBox *PdfRectangle
|
||
|
TrimBox *PdfRectangle
|
||
|
ArtBox *PdfRectangle
|
||
|
BoxColorInfo PdfObject
|
||
|
Contents PdfObject
|
||
|
Rotate *int64
|
||
|
Group PdfObject
|
||
|
Thumb PdfObject
|
||
|
B PdfObject
|
||
|
Dur PdfObject
|
||
|
Trans PdfObject
|
||
|
Annots PdfObject
|
||
|
AA PdfObject
|
||
|
Metadata PdfObject
|
||
|
PieceInfo PdfObject
|
||
|
StructParents PdfObject
|
||
|
ID PdfObject
|
||
|
PZ PdfObject
|
||
|
SeparationInfo PdfObject
|
||
|
Tabs PdfObject
|
||
|
TemplateInstantiated PdfObject
|
||
|
PresSteps PdfObject
|
||
|
UserUnit PdfObject
|
||
|
VP PdfObject
|
||
|
}
|
||
|
|
||
|
// Build a PdfPage based on the underlying dictionary.
|
||
|
func NewPdfPage(p PdfObjectDictionary) (*PdfPage, error) {
|
||
|
page := PdfPage{}
|
||
|
|
||
|
pType, ok := p["Type"].(*PdfObjectName)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Missing/Invalid Page dictionary Type")
|
||
|
}
|
||
|
if *pType != "Page" {
|
||
|
return nil, errors.New("Page dictionary Type != Page")
|
||
|
}
|
||
|
|
||
|
if obj, isDefined := p["Parent"]; isDefined {
|
||
|
page.Parent = obj
|
||
|
}
|
||
|
|
||
|
if obj, isDefined := p["LastModified"]; isDefined {
|
||
|
strObj, ok := obj.(*PdfObjectString)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Page dictionary LastModified != string")
|
||
|
}
|
||
|
lastmod, err := NewPdfDate(string(*strObj))
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
page.LastModified = &lastmod
|
||
|
}
|
||
|
|
||
|
if obj, isDefined := p["Resources"]; isDefined {
|
||
|
page.Resources = obj
|
||
|
}
|
||
|
|
||
|
if obj, isDefined := p["MediaBox"]; isDefined {
|
||
|
boxArr, ok := obj.(*PdfObjectArray)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Page MediaBox not an array")
|
||
|
}
|
||
|
var err error
|
||
|
page.MediaBox, err = NewPdfRectangle(*boxArr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if obj, isDefined := p["CropBox"]; isDefined {
|
||
|
boxArr, ok := obj.(*PdfObjectArray)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Page CropBox not an array")
|
||
|
}
|
||
|
var err error
|
||
|
page.CropBox, err = NewPdfRectangle(*boxArr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if obj, isDefined := p["BleedBox"]; isDefined {
|
||
|
boxArr, ok := obj.(*PdfObjectArray)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Page BleedBox not an array")
|
||
|
}
|
||
|
var err error
|
||
|
page.BleedBox, err = NewPdfRectangle(*boxArr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if obj, isDefined := p["TrimBox"]; isDefined {
|
||
|
boxArr, ok := obj.(*PdfObjectArray)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Page TrimBox not an array")
|
||
|
}
|
||
|
var err error
|
||
|
page.TrimBox, err = NewPdfRectangle(*boxArr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if obj, isDefined := p["ArtBox"]; isDefined {
|
||
|
boxArr, ok := obj.(*PdfObjectArray)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Page ArtBox not an array")
|
||
|
}
|
||
|
var err error
|
||
|
page.ArtBox, err = NewPdfRectangle(*boxArr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if obj, isDefined := p["BoxColorInfo"]; isDefined {
|
||
|
page.BoxColorInfo = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Contents"]; isDefined {
|
||
|
page.Contents = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Rotate"]; isDefined {
|
||
|
iObj, ok := obj.(*PdfObjectInteger)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Invalid Page Rotate object")
|
||
|
}
|
||
|
iVal := int64(*iObj)
|
||
|
page.Rotate = &iVal
|
||
|
}
|
||
|
if obj, isDefined := p["Group"]; isDefined {
|
||
|
page.Group = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Thumb"]; isDefined {
|
||
|
page.Thumb = obj
|
||
|
}
|
||
|
if obj, isDefined := p["B"]; isDefined {
|
||
|
page.B = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Dur"]; isDefined {
|
||
|
page.Dur = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Trans"]; isDefined {
|
||
|
page.Trans = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Annots"]; isDefined {
|
||
|
page.Annots = obj
|
||
|
}
|
||
|
if obj, isDefined := p["AA"]; isDefined {
|
||
|
page.AA = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Metadata"]; isDefined {
|
||
|
page.Metadata = obj
|
||
|
}
|
||
|
if obj, isDefined := p["PieceInfo"]; isDefined {
|
||
|
page.PieceInfo = obj
|
||
|
}
|
||
|
if obj, isDefined := p["StructParents"]; isDefined {
|
||
|
page.StructParents = obj
|
||
|
}
|
||
|
if obj, isDefined := p["ID"]; isDefined {
|
||
|
page.ID = obj
|
||
|
}
|
||
|
if obj, isDefined := p["PZ"]; isDefined {
|
||
|
page.PZ = obj
|
||
|
}
|
||
|
if obj, isDefined := p["SeparationInfo"]; isDefined {
|
||
|
page.SeparationInfo = obj
|
||
|
}
|
||
|
if obj, isDefined := p["Tabs"]; isDefined {
|
||
|
page.Tabs = obj
|
||
|
}
|
||
|
if obj, isDefined := p["TemplateInstantiated"]; isDefined {
|
||
|
page.TemplateInstantiated = obj
|
||
|
}
|
||
|
if obj, isDefined := p["PresSteps"]; isDefined {
|
||
|
page.PresSteps = obj
|
||
|
}
|
||
|
if obj, isDefined := p["UserUnit"]; isDefined {
|
||
|
page.UserUnit = obj
|
||
|
}
|
||
|
if obj, isDefined := p["VP"]; isDefined {
|
||
|
page.VP = obj
|
||
|
}
|
||
|
|
||
|
return &page, nil
|
||
|
}
|
||
|
|
||
|
// Get the inheritable media box value, either from the page
|
||
|
// or a higher up page/pages struct.
|
||
|
func (this *PdfPage) GetMediaBox() (*PdfRectangle, error) {
|
||
|
if this.MediaBox != nil {
|
||
|
return this.MediaBox, nil
|
||
|
}
|
||
|
|
||
|
node := this.Parent
|
||
|
for node != nil {
|
||
|
dictObj, ok := node.(*PdfIndirectObject)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Invalid parent object")
|
||
|
}
|
||
|
|
||
|
dict, ok := dictObj.PdfObject.(*PdfObjectDictionary)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Invalid parent objects dictionary")
|
||
|
}
|
||
|
|
||
|
if obj, hasMediaBox := (*dict)["MediaBox"]; hasMediaBox {
|
||
|
arr, ok := obj.(*PdfObjectArray)
|
||
|
if !ok {
|
||
|
return nil, errors.New("Invalid media box")
|
||
|
}
|
||
|
rect, err := NewPdfRectangle(*arr)
|
||
|
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return rect, nil
|
||
|
}
|
||
|
|
||
|
node = (*dict)["Parent"]
|
||
|
}
|
||
|
|
||
|
return nil, errors.New("Media box not defined")
|
||
|
}
|
||
|
|
||
|
// Convert the Page to a PDF object dictionary.
|
||
|
func (this *PdfPage) GetPageDict() *PdfObjectDictionary {
|
||
|
p := &PdfObjectDictionary{}
|
||
|
(*p)["Type"] = makeName("Page")
|
||
|
(*p)["Parent"] = this.Parent
|
||
|
|
||
|
if this.LastModified != nil {
|
||
|
p.Set("LastModified", this.LastModified.ToPdfObject())
|
||
|
}
|
||
|
p.SetIfNotNil("Resources", this.Resources)
|
||
|
if this.CropBox != nil {
|
||
|
p.Set("CropBox", this.CropBox.ToPdfObject())
|
||
|
}
|
||
|
if this.MediaBox != nil {
|
||
|
p.Set("MediaBox", this.MediaBox.ToPdfObject())
|
||
|
}
|
||
|
if this.BleedBox != nil {
|
||
|
p.Set("BleedBox", this.BleedBox.ToPdfObject())
|
||
|
}
|
||
|
if this.TrimBox != nil {
|
||
|
p.Set("TrimBox", this.TrimBox.ToPdfObject())
|
||
|
}
|
||
|
if this.ArtBox != nil {
|
||
|
p.Set("ArtBox", this.ArtBox.ToPdfObject())
|
||
|
}
|
||
|
p.SetIfNotNil("BoxColorInfo", this.BoxColorInfo)
|
||
|
p.SetIfNotNil("Contents", this.Contents)
|
||
|
|
||
|
if this.Rotate != nil {
|
||
|
p.Set("Rotate", makeInteger(*this.Rotate))
|
||
|
}
|
||
|
|
||
|
p.SetIfNotNil("Group", this.Group)
|
||
|
p.SetIfNotNil("Thumb", this.Thumb)
|
||
|
p.SetIfNotNil("B", this.B)
|
||
|
p.SetIfNotNil("Dur", this.Dur)
|
||
|
p.SetIfNotNil("Trans", this.Trans)
|
||
|
p.SetIfNotNil("Annots", this.Annots)
|
||
|
p.SetIfNotNil("AA", this.AA)
|
||
|
p.SetIfNotNil("Metadata", this.Metadata)
|
||
|
p.SetIfNotNil("PieceInfo", this.PieceInfo)
|
||
|
p.SetIfNotNil("StructParents", this.StructParents)
|
||
|
p.SetIfNotNil("ID", this.ID)
|
||
|
p.SetIfNotNil("PZ", this.PZ)
|
||
|
p.SetIfNotNil("SeparationInfo", this.SeparationInfo)
|
||
|
p.SetIfNotNil("Tabs", this.Tabs)
|
||
|
p.SetIfNotNil("TemplateInstantiated", this.TemplateInstantiated)
|
||
|
p.SetIfNotNil("PresSteps", this.PresSteps)
|
||
|
p.SetIfNotNil("UserUnit", this.UserUnit)
|
||
|
p.SetIfNotNil("VP", this.VP)
|
||
|
|
||
|
return p
|
||
|
}
|