unipdf/model/functions.go
2020-04-15 21:05:20 +00:00

862 lines
22 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"errors"
"math"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/internal/sampling"
"github.com/unidoc/unipdf/v3/ps"
)
// PdfFunction interface represents the common methods of a function in PDF.
type PdfFunction interface {
Evaluate([]float64) ([]float64, error)
ToPdfObject() core.PdfObject
}
// In PDF: A function object may be a dictionary or a stream, depending on the type of function.
// - Stream: Type 0, Type 4
// - Dictionary: Type 2, Type 3.
// Loads a PDF Function from a PdfObject (can be either stream or dictionary).
func newPdfFunctionFromPdfObject(obj core.PdfObject) (PdfFunction, error) {
obj = core.ResolveReference(obj)
if stream, is := obj.(*core.PdfObjectStream); is {
dict := stream.PdfObjectDictionary
ftype, ok := dict.Get("FunctionType").(*core.PdfObjectInteger)
if !ok {
common.Log.Error("FunctionType number missing")
return nil, errors.New("invalid parameter or missing")
}
if *ftype == 0 {
return newPdfFunctionType0FromStream(stream)
} else if *ftype == 4 {
return newPdfFunctionType4FromStream(stream)
} else {
return nil, errors.New("invalid function type")
}
} else if indObj, is := obj.(*core.PdfIndirectObject); is {
// Indirect object containing a dictionary.
// The indirect object is the container (which is tracked).
dict, ok := indObj.PdfObject.(*core.PdfObjectDictionary)
if !ok {
common.Log.Error("Function Indirect object not containing dictionary")
return nil, errors.New("invalid parameter or missing")
}
ftype, ok := dict.Get("FunctionType").(*core.PdfObjectInteger)
if !ok {
common.Log.Error("FunctionType number missing")
return nil, errors.New("invalid parameter or missing")
}
if *ftype == 2 {
return newPdfFunctionType2FromPdfObject(indObj)
} else if *ftype == 3 {
return newPdfFunctionType3FromPdfObject(indObj)
} else {
return nil, errors.New("invalid function type")
}
} else if dict, is := obj.(*core.PdfObjectDictionary); is {
ftype, ok := dict.Get("FunctionType").(*core.PdfObjectInteger)
if !ok {
common.Log.Error("FunctionType number missing")
return nil, errors.New("invalid parameter or missing")
}
if *ftype == 2 {
return newPdfFunctionType2FromPdfObject(dict)
} else if *ftype == 3 {
return newPdfFunctionType3FromPdfObject(dict)
} else {
return nil, errors.New("invalid function type")
}
} else {
common.Log.Debug("Function Type error: %#v", obj)
return nil, errors.New("type error")
}
}
// Simple linear interpolation from the PDF manual.
func interpolate(x, xmin, xmax, ymin, ymax float64) float64 {
if math.Abs(xmax-xmin) < 0.000001 {
return ymin
}
y := ymin + (x-xmin)*(ymax-ymin)/(xmax-xmin)
return y
}
// PdfFunctionType0 uses a sequence of sample values (contained in a stream) to provide an approximation
// for functions whose domains and ranges are bounded. The samples are organized as an m-dimensional
// table in which each entry has n components
type PdfFunctionType0 struct {
Domain []float64 // required; 2*m length; where m is the number of input values
Range []float64 // required (type 0); 2*n length; where n is the number of output values
NumInputs int
NumOutputs int
Size []int
BitsPerSample int
Order int // Values 1 or 3 (linear or cubic spline interpolation)
Encode []float64
Decode []float64
rawData []byte
data []uint32
container *core.PdfObjectStream
}
// Construct the PDF function object from a stream object (typically loaded from a PDF file).
func newPdfFunctionType0FromStream(stream *core.PdfObjectStream) (*PdfFunctionType0, error) {
fun := &PdfFunctionType0{}
fun.container = stream
dict := stream.PdfObjectDictionary
// Domain
array, has := core.TraceToDirectObject(dict.Get("Domain")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Domain not specified")
return nil, errors.New("required attribute missing or invalid")
}
if array.Len() < 0 || array.Len()%2 != 0 {
common.Log.Error("Domain invalid")
return nil, errors.New("invalid domain range")
}
fun.NumInputs = array.Len() / 2
domain, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Domain = domain
// Range
array, has = core.TraceToDirectObject(dict.Get("Range")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Range not specified")
return nil, errors.New("required attribute missing or invalid")
}
if array.Len() < 0 || array.Len()%2 != 0 {
return nil, errors.New("invalid range")
}
fun.NumOutputs = array.Len() / 2
rang, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Range = rang
// Number of samples in each input dimension
array, has = core.TraceToDirectObject(dict.Get("Size")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Size not specified")
return nil, errors.New("required attribute missing or invalid")
}
tablesize, err := array.ToIntegerArray()
if err != nil {
return nil, err
}
if len(tablesize) != fun.NumInputs {
common.Log.Error("Table size not matching number of inputs")
return nil, errors.New("range check")
}
fun.Size = tablesize
// BitsPerSample
bps, has := core.TraceToDirectObject(dict.Get("BitsPerSample")).(*core.PdfObjectInteger)
if !has {
common.Log.Error("BitsPerSample not specified")
return nil, errors.New("required attribute missing or invalid")
}
if *bps != 1 && *bps != 2 && *bps != 4 && *bps != 8 && *bps != 12 && *bps != 16 && *bps != 24 && *bps != 32 {
common.Log.Error("Bits per sample outside range (%d)", *bps)
return nil, errors.New("range check")
}
fun.BitsPerSample = int(*bps)
fun.Order = 1
order, has := core.TraceToDirectObject(dict.Get("Order")).(*core.PdfObjectInteger)
if has {
if *order != 1 && *order != 3 {
common.Log.Error("Invalid order (%d)", *order)
return nil, errors.New("range check")
}
fun.Order = int(*order)
}
// Encode: is a 2*m array specifying the linear mapping of input values into the domain of the function's
// sample table.
array, has = core.TraceToDirectObject(dict.Get("Encode")).(*core.PdfObjectArray)
if has {
encode, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Encode = encode
}
// Decode
array, has = core.TraceToDirectObject(dict.Get("Decode")).(*core.PdfObjectArray)
if has {
decode, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Decode = decode
}
data, err := core.DecodeStream(stream)
if err != nil {
return nil, err
}
fun.rawData = data
return fun, nil
}
// ToPdfObject returns the PDF representation of the function.
func (f *PdfFunctionType0) ToPdfObject() core.PdfObject {
if f.container == nil {
f.container = &core.PdfObjectStream{}
}
dict := core.MakeDict()
dict.Set("FunctionType", core.MakeInteger(0))
// Domain (required).
domainArray := &core.PdfObjectArray{}
for _, val := range f.Domain {
domainArray.Append(core.MakeFloat(val))
}
dict.Set("Domain", domainArray)
// Range (required).
rangeArray := &core.PdfObjectArray{}
for _, val := range f.Range {
rangeArray.Append(core.MakeFloat(val))
}
dict.Set("Range", rangeArray)
// Size (required).
sizeArray := &core.PdfObjectArray{}
for _, val := range f.Size {
sizeArray.Append(core.MakeInteger(int64(val)))
}
dict.Set("Size", sizeArray)
dict.Set("BitsPerSample", core.MakeInteger(int64(f.BitsPerSample)))
if f.Order != 1 {
dict.Set("Order", core.MakeInteger(int64(f.Order)))
}
// TODO: Encode.
// Either here, or automatically later on when writing out.
dict.Set("Length", core.MakeInteger(int64(len(f.rawData))))
f.container.Stream = f.rawData
f.container.PdfObjectDictionary = dict
return f.container
}
// Evaluate runs the function on the passed in slice and returns the results.
func (f *PdfFunctionType0) Evaluate(x []float64) ([]float64, error) {
if len(x) != f.NumInputs {
common.Log.Error("Number of inputs not matching what is needed")
return nil, errors.New("range check error")
}
if f.data == nil {
// Process the samples if not already done.
err := f.processSamples()
if err != nil {
return nil, err
}
}
// Fall back to default Encode/Decode params if not set.
encode := f.Encode
if encode == nil {
encode = []float64{}
for i := 0; i < len(f.Size); i++ {
encode = append(encode, 0)
encode = append(encode, float64(f.Size[i]-1))
}
}
decode := f.Decode
if decode == nil {
decode = f.Range
}
var indices []int
// Start with nearest neighbour interpolation.
for i := 0; i < len(x); i++ {
xi := x[i]
// See section 7.10.2 Type 0 (Sampled) Functions (pp. 93-94 PDF32000_2008).
xip := math.Min(math.Max(xi, f.Domain[2*i]), f.Domain[2*i+1])
ei := interpolate(xip, f.Domain[2*i], f.Domain[2*i+1], encode[2*i], encode[2*i+1])
eip := math.Min(math.Max(ei, 0), float64(f.Size[i]-1))
// eip represents coordinate into the data table.
// At this point it is real values.
// Interpolation shall be used to to determine output values
// from the nearest surrounding values in the sample table.
// Initial implementation is simply nearest neighbour.
// Then will add the linear and possibly bicubic/spline.
index := int(math.Floor(eip + 0.5))
if index < 0 {
index = 0
} else if index > f.Size[i] {
index = f.Size[i] - 1
}
indices = append(indices, index)
}
// Calculate the index
m := indices[0]
for i := 1; i < f.NumInputs; i++ {
add := indices[i]
for j := 0; j < i; j++ {
add *= f.Size[j]
}
m += add
}
m *= f.NumOutputs
// Output values.
var outputs []float64
for j := 0; j < f.NumOutputs; j++ {
rjIdx := m + j
if rjIdx >= len(f.data) {
common.Log.Debug("WARN: not enough input samples to determine output values. Output may be incorrect.")
continue
}
rj := f.data[rjIdx]
rjp := interpolate(float64(rj), 0, math.Pow(2, float64(f.BitsPerSample)), decode[2*j], decode[2*j+1])
yj := math.Min(math.Max(rjp, f.Range[2*j]), f.Range[2*j+1])
outputs = append(outputs, yj)
}
return outputs, nil
}
// Convert raw data to data table. The maximum supported BitsPerSample is 32, so we store the resulting data
// in a uint32 array. This is somewhat wasteful in the case of a small BitsPerSample, but these tables are
// presumably not huge at any rate.
func (f *PdfFunctionType0) processSamples() error {
data := sampling.ResampleBytes(f.rawData, f.BitsPerSample)
f.data = data
return nil
}
// PdfFunctionType2 defines an exponential interpolation of one input value and n
// output values:
// f(x) = y_0, ..., y_(n-1)
// y_j = C0_j + x^N * (C1_j - C0_j); for 0 <= j < n
// When N=1 ; linear interpolation between C0 and C1.
type PdfFunctionType2 struct {
Domain []float64
Range []float64
C0 []float64
C1 []float64
N float64
container *core.PdfIndirectObject
}
// Can be either indirect object or dictionary. If indirect, then must be holding a dictionary,
// i.e. acting as a container. When converting back to pdf object, will use the container provided.
func newPdfFunctionType2FromPdfObject(obj core.PdfObject) (*PdfFunctionType2, error) {
fun := &PdfFunctionType2{}
var dict *core.PdfObjectDictionary
if indObj, is := obj.(*core.PdfIndirectObject); is {
d, ok := indObj.PdfObject.(*core.PdfObjectDictionary)
if !ok {
return nil, errors.New("type check error")
}
fun.container = indObj
dict = d
} else if d, is := obj.(*core.PdfObjectDictionary); is {
dict = d
} else {
return nil, errors.New("type check error")
}
common.Log.Trace("FUNC2: %s", dict.String())
// Domain
array, has := core.TraceToDirectObject(dict.Get("Domain")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Domain not specified")
return nil, errors.New("required attribute missing or invalid")
}
if array.Len() < 0 || array.Len()%2 != 0 {
common.Log.Error("Domain range invalid")
return nil, errors.New("invalid domain range")
}
domain, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Domain = domain
// Range
array, has = core.TraceToDirectObject(dict.Get("Range")).(*core.PdfObjectArray)
if has {
if array.Len() < 0 || array.Len()%2 != 0 {
return nil, errors.New("invalid range")
}
rang, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Range = rang
}
// C0.
array, has = core.TraceToDirectObject(dict.Get("C0")).(*core.PdfObjectArray)
if has {
c0, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.C0 = c0
}
// C1.
array, has = core.TraceToDirectObject(dict.Get("C1")).(*core.PdfObjectArray)
if has {
c1, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.C1 = c1
}
if len(fun.C0) != len(fun.C1) {
common.Log.Error("C0 and C1 not matching")
return nil, core.ErrRangeError
}
// Exponent.
N, err := core.GetNumberAsFloat(core.TraceToDirectObject(dict.Get("N")))
if err != nil {
common.Log.Error("N missing or invalid, dict: %s", dict.String())
return nil, err
}
fun.N = N
return fun, nil
}
// ToPdfObject returns the PDF representation of the function.
func (f *PdfFunctionType2) ToPdfObject() core.PdfObject {
dict := core.MakeDict()
dict.Set("FunctionType", core.MakeInteger(2))
// Domain (required).
domainArray := &core.PdfObjectArray{}
for _, val := range f.Domain {
domainArray.Append(core.MakeFloat(val))
}
dict.Set("Domain", domainArray)
// Range (required).
if f.Range != nil {
rangeArray := &core.PdfObjectArray{}
for _, val := range f.Range {
rangeArray.Append(core.MakeFloat(val))
}
dict.Set("Range", rangeArray)
}
// C0.
if f.C0 != nil {
c0Array := &core.PdfObjectArray{}
for _, val := range f.C0 {
c0Array.Append(core.MakeFloat(val))
}
dict.Set("C0", c0Array)
}
// C1.
if f.C1 != nil {
c1Array := &core.PdfObjectArray{}
for _, val := range f.C1 {
c1Array.Append(core.MakeFloat(val))
}
dict.Set("C1", c1Array)
}
// exponent
dict.Set("N", core.MakeFloat(f.N))
// Wrap in a container if we have one already specified.
if f.container != nil {
f.container.PdfObject = dict
return f.container
}
return dict
}
// Evaluate runs the function on the passed in slice and returns the results.
func (f *PdfFunctionType2) Evaluate(x []float64) ([]float64, error) {
if len(x) != 1 {
common.Log.Error("Only one input allowed")
return nil, errors.New("range check")
}
// Prepare.
c0 := []float64{0.0}
if f.C0 != nil {
c0 = f.C0
}
c1 := []float64{1.0}
if f.C1 != nil {
c1 = f.C1
}
var y []float64
for i := 0; i < len(c0); i++ {
yi := c0[i] + math.Pow(x[0], f.N)*(c1[i]-c0[i])
y = append(y, yi)
}
return y, nil
}
// PdfFunctionType3 defines stitching of the subdomains of several 1-input functions to produce
// a single new 1-input function.
type PdfFunctionType3 struct {
Domain []float64
Range []float64
Functions []PdfFunction // k-1 input functions
Bounds []float64 // k-1 numbers; defines the intervals where each function applies
Encode []float64 // Array of 2k numbers..
container *core.PdfIndirectObject
}
// Evaluate runs the function on the passed in slice and returns the results.
func (f *PdfFunctionType3) Evaluate(x []float64) ([]float64, error) {
if len(x) != 1 {
common.Log.Error("Only one input allowed")
return nil, errors.New("range check")
}
// Determine which function to use
// Encode
return nil, errors.New("not implemented yet")
}
func newPdfFunctionType3FromPdfObject(obj core.PdfObject) (*PdfFunctionType3, error) {
fun := &PdfFunctionType3{}
var dict *core.PdfObjectDictionary
if indObj, is := obj.(*core.PdfIndirectObject); is {
d, ok := indObj.PdfObject.(*core.PdfObjectDictionary)
if !ok {
return nil, errors.New("type check error")
}
fun.container = indObj
dict = d
} else if d, is := obj.(*core.PdfObjectDictionary); is {
dict = d
} else {
return nil, errors.New("type check error")
}
// Domain
array, has := core.TraceToDirectObject(dict.Get("Domain")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Domain not specified")
return nil, errors.New("required attribute missing or invalid")
}
if array.Len() != 2 {
common.Log.Error("Domain invalid")
return nil, errors.New("invalid domain range")
}
domain, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Domain = domain
// Range
array, has = core.TraceToDirectObject(dict.Get("Range")).(*core.PdfObjectArray)
if has {
if array.Len() < 0 || array.Len()%2 != 0 {
return nil, errors.New("invalid range")
}
rang, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Range = rang
}
// Functions.
array, has = core.TraceToDirectObject(dict.Get("Functions")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Functions not specified")
return nil, errors.New("required attribute missing or invalid")
}
fun.Functions = []PdfFunction{}
for _, obj := range array.Elements() {
subf, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
return nil, err
}
fun.Functions = append(fun.Functions, subf)
}
// Bounds
array, has = core.TraceToDirectObject(dict.Get("Bounds")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Bounds not specified")
return nil, errors.New("required attribute missing or invalid")
}
bounds, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Bounds = bounds
if len(fun.Bounds) != len(fun.Functions)-1 {
common.Log.Error("Bounds (%d) and num functions (%d) not matching", len(fun.Bounds), len(fun.Functions))
return nil, errors.New("range check")
}
// Encode.
array, has = core.TraceToDirectObject(dict.Get("Encode")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Encode not specified")
return nil, errors.New("required attribute missing or invalid")
}
encode, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Encode = encode
if len(fun.Encode) != 2*len(fun.Functions) {
common.Log.Error("Len encode (%d) and num functions (%d) not matching up", len(fun.Encode), len(fun.Functions))
return nil, errors.New("range check")
}
return fun, nil
}
// ToPdfObject returns the PDF representation of the function.
func (f *PdfFunctionType3) ToPdfObject() core.PdfObject {
dict := core.MakeDict()
dict.Set("FunctionType", core.MakeInteger(3))
// Domain (required).
domainArray := &core.PdfObjectArray{}
for _, val := range f.Domain {
domainArray.Append(core.MakeFloat(val))
}
dict.Set("Domain", domainArray)
// Range (required).
if f.Range != nil {
rangeArray := &core.PdfObjectArray{}
for _, val := range f.Range {
rangeArray.Append(core.MakeFloat(val))
}
dict.Set("Range", rangeArray)
}
// Functions
if f.Functions != nil {
fArray := &core.PdfObjectArray{}
for _, fun := range f.Functions {
fArray.Append(fun.ToPdfObject())
}
dict.Set("Functions", fArray)
}
// Bounds.
if f.Bounds != nil {
bArray := &core.PdfObjectArray{}
for _, val := range f.Bounds {
bArray.Append(core.MakeFloat(val))
}
dict.Set("Bounds", bArray)
}
// Encode.
if f.Encode != nil {
eArray := &core.PdfObjectArray{}
for _, val := range f.Encode {
eArray.Append(core.MakeFloat(val))
}
dict.Set("Encode", eArray)
}
// Wrap in a container if we have one already specified.
if f.container != nil {
f.container.PdfObject = dict
return f.container
}
return dict
}
// PdfFunctionType4 is a Postscript calculator functions.
type PdfFunctionType4 struct {
Domain []float64
Range []float64
Program *ps.PSProgram
executor *ps.PSExecutor
decodedData []byte
container *core.PdfObjectStream
}
// Evaluate runs the function. Input is [x1 x2 x3].
func (f *PdfFunctionType4) Evaluate(xVec []float64) ([]float64, error) {
if f.executor == nil {
f.executor = ps.NewPSExecutor(f.Program)
}
var inputs []ps.PSObject
for _, val := range xVec {
inputs = append(inputs, ps.MakeReal(val))
}
outputs, err := f.executor.Execute(inputs)
if err != nil {
return nil, err
}
// After execution the outputs are on the stack [y1 ... yM]
// Convert to floats.
yVec, err := ps.PSObjectArrayToFloat64Array(outputs)
if err != nil {
return nil, err
}
return yVec, nil
}
// Load a type 4 function from a PDF stream object.
func newPdfFunctionType4FromStream(stream *core.PdfObjectStream) (*PdfFunctionType4, error) {
fun := &PdfFunctionType4{}
fun.container = stream
dict := stream.PdfObjectDictionary
// Domain
array, has := core.TraceToDirectObject(dict.Get("Domain")).(*core.PdfObjectArray)
if !has {
common.Log.Error("Domain not specified")
return nil, errors.New("required attribute missing or invalid")
}
if array.Len()%2 != 0 {
common.Log.Error("Domain invalid")
return nil, errors.New("invalid domain range")
}
domain, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Domain = domain
// Range
array, has = core.TraceToDirectObject(dict.Get("Range")).(*core.PdfObjectArray)
if has {
if array.Len() < 0 || array.Len()%2 != 0 {
return nil, errors.New("invalid range")
}
rang, err := array.ToFloat64Array()
if err != nil {
return nil, err
}
fun.Range = rang
}
// Program. Decode the program and parse the PS code.
decoded, err := core.DecodeStream(stream)
if err != nil {
return nil, err
}
fun.decodedData = decoded
psParser := ps.NewPSParser([]byte(decoded))
prog, err := psParser.Parse()
if err != nil {
return nil, err
}
fun.Program = prog
return fun, nil
}
// ToPdfObject returns the PDF representation of the function.
func (f *PdfFunctionType4) ToPdfObject() core.PdfObject {
container := f.container
if container == nil {
f.container = &core.PdfObjectStream{}
container = f.container
}
dict := core.MakeDict()
dict.Set("FunctionType", core.MakeInteger(4))
// Domain (required).
domainArray := &core.PdfObjectArray{}
for _, val := range f.Domain {
domainArray.Append(core.MakeFloat(val))
}
dict.Set("Domain", domainArray)
// Range (required).
rangeArray := &core.PdfObjectArray{}
for _, val := range f.Range {
rangeArray.Append(core.MakeFloat(val))
}
dict.Set("Range", rangeArray)
if f.decodedData == nil && f.Program != nil {
// Update data. This is used for created functions (not parsed ones).
f.decodedData = []byte(f.Program.String())
}
// TODO: Encode.
// Either here, or automatically later on when writing out.
dict.Set("Length", core.MakeInteger(int64(len(f.decodedData))))
container.Stream = f.decodedData
container.PdfObjectDictionary = dict
return container
}