2018-09-29 17:22:53 +03:00
|
|
|
/*
|
|
|
|
* This file is subject to the terms and conditions defined in
|
|
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package optimize
|
|
|
|
|
|
|
|
import (
|
|
|
|
"crypto/md5"
|
|
|
|
|
2019-05-16 23:44:51 +03:00
|
|
|
"github.com/unidoc/unipdf/v3/core"
|
2018-09-29 17:22:53 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
// CombineDuplicateStreams combines duplicated streams by its data hash.
|
|
|
|
// It implements interface model.Optimizer.
|
|
|
|
type CombineDuplicateStreams struct {
|
|
|
|
}
|
|
|
|
|
|
|
|
// Optimize optimizes PDF objects to decrease PDF size.
|
|
|
|
func (dup *CombineDuplicateStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
|
|
|
replaceTable := make(map[core.PdfObject]core.PdfObject)
|
|
|
|
toDelete := make(map[core.PdfObject]struct{})
|
|
|
|
streamsByHash := make(map[string][]*core.PdfObjectStream)
|
|
|
|
for _, obj := range objects {
|
|
|
|
if stream, isStreamObj := obj.(*core.PdfObjectStream); isStreamObj {
|
|
|
|
hasher := md5.New()
|
|
|
|
hasher.Write([]byte(stream.Stream))
|
|
|
|
hash := string(hasher.Sum(nil))
|
|
|
|
streamsByHash[hash] = append(streamsByHash[hash], stream)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, streams := range streamsByHash {
|
|
|
|
if len(streams) < 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
firstStream := streams[0]
|
|
|
|
for i := 1; i < len(streams); i++ {
|
|
|
|
stream := streams[i]
|
|
|
|
replaceTable[stream] = firstStream
|
|
|
|
toDelete[stream] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete))
|
|
|
|
for _, obj := range objects {
|
|
|
|
if _, found := toDelete[obj]; found {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
optimizedObjects = append(optimizedObjects, obj)
|
|
|
|
}
|
|
|
|
replaceObjectsInPlace(optimizedObjects, replaceTable)
|
|
|
|
return optimizedObjects, nil
|
|
|
|
}
|