unipdf/model/optimize/compress_streams.go
Gunnsteinn Hall 11f692bc3a
Font subsetting and font optimization improvements (#362)
* Track runes in IdentityEncoder (for subsetting), track decoded runes

* Working with the identity encoder in font_composite.go

* Add GetFilterArray to multi encoder.  Add comments.

* Add NewFromContents constructor to extractor only requiring contents and resources

* golint fixes

* Optimizer compress streams - improved detection of raw streams

* Optimize - CleanContentStream optimizer that removes redundant operands

* WIP Optimize - clean fonts

Will support both font file reduction and subsetting. (WIP)

* Optimize - image processing - try combined DCT and Flate

* Update options.go

* Update optimizer.go

* Create utils.go for optimize with common methods needed for optimization

* Optimizer - add font subsetting method

Covers XObject Forms, annotaitons etc.  Uses extractor package to extract text marks covering what fonts and glyphs are used.  Package truetype used for subsetting.

* Add some comments

* Fix cmap parsing rune conversion

* Error checking for extractor.  Add some comments.

* Update Jenkinsfile

* Update modules
2020-06-16 21:19:10 +00:00

53 lines
1.5 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package optimize
import (
"github.com/unidoc/unipdf/v3/core"
)
// CompressStreams compresses uncompressed streams.
// It implements interface model.Optimizer.
type CompressStreams struct {
}
// Optimize optimizes PDF objects to decrease PDF size.
func (c *CompressStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
optimizedObjects = make([]core.PdfObject, len(objects))
copy(optimizedObjects, objects)
for _, obj := range objects {
stream, isStreamObj := core.GetStream(obj)
if !isStreamObj {
continue
}
// Skip objects that are already encoded.
// TODO: Try filter combinations, and ignoring inefficient filters.
if obj := stream.Get("Filter"); obj != nil {
if _, skip := core.GetName(obj); skip {
continue
}
if arr, ok := core.GetArray(obj); ok && arr.Len() > 0 {
continue
}
}
encoder := core.NewFlateEncoder() // Most mainstream compressor and probably most robust.
var data []byte
data, err = encoder.EncodeBytes(stream.Stream)
if err != nil {
return optimizedObjects, err
}
dict := encoder.MakeStreamDict()
// compare compressed and uncompressed sizes
if len(data)+len(dict.WriteString()) < len(stream.Stream) {
stream.Stream = data
stream.PdfObjectDictionary.Merge(dict)
stream.PdfObjectDictionary.Set("Length", core.MakeInteger(int64(len(stream.Stream))))
}
}
return optimizedObjects, nil
}