mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00

* Track runes in IdentityEncoder (for subsetting), track decoded runes * Working with the identity encoder in font_composite.go * Add GetFilterArray to multi encoder. Add comments. * Add NewFromContents constructor to extractor only requiring contents and resources * golint fixes * Optimizer compress streams - improved detection of raw streams * Optimize - CleanContentStream optimizer that removes redundant operands * WIP Optimize - clean fonts Will support both font file reduction and subsetting. (WIP) * Optimize - image processing - try combined DCT and Flate * Update options.go * Update optimizer.go * Create utils.go for optimize with common methods needed for optimization * Optimizer - add font subsetting method Covers XObject Forms, annotaitons etc. Uses extractor package to extract text marks covering what fonts and glyphs are used. Package truetype used for subsetting. * Add some comments * Fix cmap parsing rune conversion * Error checking for extractor. Add some comments. * Update Jenkinsfile * Update modules
109 lines
2.9 KiB
Go
109 lines
2.9 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package optimize
|
|
|
|
import (
|
|
"github.com/unidoc/unipdf/v3/core"
|
|
)
|
|
|
|
// New creates a optimizers chain from options.
|
|
func New(options Options) *Chain {
|
|
chain := new(Chain)
|
|
if options.CleanFonts || options.SubsetFonts {
|
|
chain.Append(&CleanFonts{Subset: options.SubsetFonts})
|
|
}
|
|
if options.CleanContentstream {
|
|
chain.Append(new(CleanContentstream))
|
|
}
|
|
if options.ImageUpperPPI > 0 {
|
|
imageOptimizer := new(ImagePPI)
|
|
imageOptimizer.ImageUpperPPI = options.ImageUpperPPI
|
|
chain.Append(imageOptimizer)
|
|
}
|
|
if options.ImageQuality > 0 {
|
|
imageOptimizer := new(Image)
|
|
imageOptimizer.ImageQuality = options.ImageQuality
|
|
chain.Append(imageOptimizer)
|
|
}
|
|
if options.CombineDuplicateDirectObjects {
|
|
chain.Append(new(CombineDuplicateDirectObjects))
|
|
}
|
|
if options.CombineDuplicateStreams {
|
|
chain.Append(new(CombineDuplicateStreams))
|
|
}
|
|
if options.CombineIdenticalIndirectObjects {
|
|
chain.Append(new(CombineIdenticalIndirectObjects))
|
|
}
|
|
if options.UseObjectStreams {
|
|
chain.Append(new(ObjectStreams))
|
|
}
|
|
if options.CompressStreams {
|
|
chain.Append(new(CompressStreams))
|
|
}
|
|
return chain
|
|
}
|
|
|
|
// replaceObjectsInPlace replaces objects. objTo will be modified by the process.
|
|
func replaceObjectsInPlace(objects []core.PdfObject, objTo map[core.PdfObject]core.PdfObject) {
|
|
if objTo == nil || len(objTo) == 0 {
|
|
return
|
|
}
|
|
for i, obj := range objects {
|
|
if to, found := objTo[obj]; found {
|
|
objects[i] = to
|
|
continue
|
|
}
|
|
objTo[obj] = obj
|
|
switch t := obj.(type) {
|
|
case *core.PdfObjectArray:
|
|
values := make([]core.PdfObject, t.Len())
|
|
copy(values, t.Elements())
|
|
replaceObjectsInPlace(values, objTo)
|
|
for i, obj := range values {
|
|
t.Set(i, obj)
|
|
}
|
|
case *core.PdfObjectStreams:
|
|
replaceObjectsInPlace(t.Elements(), objTo)
|
|
case *core.PdfObjectStream:
|
|
values := []core.PdfObject{t.PdfObjectDictionary}
|
|
replaceObjectsInPlace(values, objTo)
|
|
t.PdfObjectDictionary = values[0].(*core.PdfObjectDictionary)
|
|
case *core.PdfObjectDictionary:
|
|
keys := t.Keys()
|
|
values := make([]core.PdfObject, len(keys))
|
|
for i, key := range keys {
|
|
values[i] = t.Get(key)
|
|
}
|
|
replaceObjectsInPlace(values, objTo)
|
|
for i, key := range keys {
|
|
t.Set(key, values[i])
|
|
}
|
|
case *core.PdfIndirectObject:
|
|
values := []core.PdfObject{t.PdfObject}
|
|
replaceObjectsInPlace(values, objTo)
|
|
t.PdfObject = values[0]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update all the object numbers prior to get hash of objects.
|
|
func updateObjectNumbers(objects []core.PdfObject) {
|
|
// Update numbers
|
|
for idx, obj := range objects {
|
|
switch o := obj.(type) {
|
|
case *core.PdfIndirectObject:
|
|
o.ObjectNumber = int64(idx + 1)
|
|
o.GenerationNumber = 0
|
|
case *core.PdfObjectStream:
|
|
o.ObjectNumber = int64(idx + 1)
|
|
o.GenerationNumber = 0
|
|
case *core.PdfObjectStreams:
|
|
o.ObjectNumber = int64(idx + 1)
|
|
o.GenerationNumber = 0
|
|
}
|
|
}
|
|
}
|