mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Implemented : PDFs optimization
This commit is contained in:
parent
6a6a387faa
commit
ea5dba8e0d
@ -74,6 +74,13 @@ type PdfObjectStream struct {
|
||||
Stream []byte
|
||||
}
|
||||
|
||||
// PdfObjectStreams represents the primitive PDF object streams.
|
||||
// 7.5.7 Object Streams (page 45).
|
||||
type PdfObjectStreams struct {
|
||||
PdfObjectReference
|
||||
vec []PdfObject
|
||||
}
|
||||
|
||||
// MakeDict creates and returns an empty PdfObjectDictionary.
|
||||
func MakeDict() *PdfObjectDictionary {
|
||||
d := &PdfObjectDictionary{}
|
||||
@ -203,6 +210,16 @@ func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error
|
||||
return stream, nil
|
||||
}
|
||||
|
||||
// MakeObjectStreams creates an PdfObjectStreams from a list of PdfObjects.
|
||||
func MakeObjectStreams(objects ...PdfObject) *PdfObjectStreams {
|
||||
streams := &PdfObjectStreams{}
|
||||
streams.vec = []PdfObject{}
|
||||
for _, obj := range objects {
|
||||
streams.vec = append(streams.vec, obj)
|
||||
}
|
||||
return streams
|
||||
}
|
||||
|
||||
func (bool *PdfObjectBool) String() string {
|
||||
if *bool {
|
||||
return "true"
|
||||
@ -848,3 +865,53 @@ func GetStream(obj PdfObject) (stream *PdfObjectStream, found bool) {
|
||||
stream, found = obj.(*PdfObjectStream)
|
||||
return stream, found
|
||||
}
|
||||
|
||||
// GetObjectStreams returns the *PdfObjectStreams represented by the PdfObject. On type mismatch the found bool flag is
|
||||
// false and a nil pointer is returned.
|
||||
func GetObjectStreams(obj PdfObject) (objStream *PdfObjectStreams, found bool) {
|
||||
objStream, found = obj.(*PdfObjectStreams)
|
||||
return objStream, found
|
||||
}
|
||||
|
||||
// Append appends PdfObject(s) to the streams.
|
||||
func (streams *PdfObjectStreams) Append(objects ...PdfObject) {
|
||||
if streams == nil {
|
||||
common.Log.Debug("Warn - Attempt to append to a nil streams")
|
||||
return
|
||||
}
|
||||
if streams.vec == nil {
|
||||
streams.vec = []PdfObject{}
|
||||
}
|
||||
|
||||
for _, obj := range objects {
|
||||
streams.vec = append(streams.vec, obj)
|
||||
}
|
||||
}
|
||||
|
||||
// Elements returns a slice of the PdfObject elements in the array.
|
||||
// Preferred over accessing the array directly as type may be changed in future major versions (v3).
|
||||
func (streams *PdfObjectStreams) Elements() []PdfObject {
|
||||
if streams == nil {
|
||||
return nil
|
||||
}
|
||||
return streams.vec
|
||||
}
|
||||
|
||||
// String returns a string describing `streams`.
|
||||
func (streams *PdfObjectStreams) String() string {
|
||||
return fmt.Sprintf("Object stream %d", streams.ObjectNumber)
|
||||
}
|
||||
|
||||
// Len returns the number of elements in the streams.
|
||||
func (streams *PdfObjectStreams) Len() int {
|
||||
if streams == nil {
|
||||
return 0
|
||||
}
|
||||
return len(streams.vec)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (streams *PdfObjectStreams) DefaultWriteString() string {
|
||||
outStr := fmt.Sprintf("%d 0 R", (*streams).ObjectNumber)
|
||||
return outStr
|
||||
}
|
||||
|
@ -44,6 +44,8 @@ type Creator struct {
|
||||
|
||||
// Forms.
|
||||
acroForm *model.PdfAcroForm
|
||||
|
||||
optimizer model.Optimizer
|
||||
}
|
||||
|
||||
// SetForms adds an Acroform to a PDF file. Sets the specified form for writing.
|
||||
@ -101,6 +103,16 @@ func New() *Creator {
|
||||
return c
|
||||
}
|
||||
|
||||
// SetOptimizer sets the optimizer to optimize PDF before writing.
|
||||
func (c *Creator) SetOptimizer(optimizer model.Optimizer) {
|
||||
c.optimizer = optimizer
|
||||
}
|
||||
|
||||
// GetOptimizer returns current PDF optimizer.
|
||||
func (c *Creator) GetOptimizer() model.Optimizer {
|
||||
return c.optimizer
|
||||
}
|
||||
|
||||
// SetPageMargins sets the page margins: left, right, top, bottom.
|
||||
// The default page margins are 10% of document width.
|
||||
func (c *Creator) SetPageMargins(left, right, top, bottom float64) {
|
||||
@ -459,13 +471,15 @@ func (c *Creator) Draw(d Drawable) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write output of creator to io.WriteSeeker interface.
|
||||
func (c *Creator) Write(ws io.WriteSeeker) error {
|
||||
// Write output of creator to io.Writer interface.
|
||||
func (c *Creator) Write(ws io.Writer) error {
|
||||
if !c.finalized {
|
||||
c.finalize()
|
||||
}
|
||||
|
||||
pdfWriter := model.NewPdfWriter()
|
||||
pdfWriter.SetOptimizer(c.optimizer)
|
||||
|
||||
// Form fields.
|
||||
if c.acroForm != nil {
|
||||
err := pdfWriter.SetForms(c.acroForm)
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
goimage "image"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/boombuler/barcode"
|
||||
@ -22,6 +23,7 @@ import (
|
||||
"github.com/unidoc/unidoc/pdf/contentstream/draw"
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
"github.com/unidoc/unidoc/pdf/model"
|
||||
"github.com/unidoc/unidoc/pdf/model/optimize"
|
||||
"github.com/unidoc/unidoc/pdf/model/textencoding"
|
||||
)
|
||||
|
||||
@ -2133,3 +2135,822 @@ func TestEncrypting1(t *testing.T) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// TestOptimizeCombineDuplicateStreams tests optimizing PDFs to reduce output file size.
|
||||
func TestOptimizeCombineDuplicateStreams(t *testing.T) {
|
||||
c := createPdf4Optimization(t)
|
||||
|
||||
err := c.WriteToFile("/tmp/7_combine_duplicate_streams_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c = createPdf4Optimization(t)
|
||||
|
||||
c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateStreams: true}))
|
||||
|
||||
err = c.WriteToFile("/tmp/7_combine_duplicate_streams_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/7_combine_duplicate_streams_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/7_combine_duplicate_streams_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestOptimizeImageQuality tests optimizing PDFs to reduce output file size.
|
||||
func TestOptimizeImageQuality(t *testing.T) {
|
||||
c := New()
|
||||
|
||||
imgDataJpeg, err := ioutil.ReadFile(testImageFile1)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
imgJpeg, err := NewImageFromData(imgDataJpeg)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// JPEG encoder (DCT) with quality factor 70.
|
||||
encoder := core.NewDCTEncoder()
|
||||
encoder.Quality = 100
|
||||
encoder.Width = int(imgJpeg.Width())
|
||||
encoder.Height = int(imgJpeg.Height())
|
||||
imgJpeg.SetEncoder(encoder)
|
||||
|
||||
imgJpeg.SetPos(250, 350)
|
||||
imgJpeg.Scale(0.25, 0.25)
|
||||
|
||||
err = c.Draw(imgJpeg)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = c.WriteToFile("/tmp/8_image_quality_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.SetOptimizer(optimize.New(optimize.Options{ImageQuality: 20}))
|
||||
|
||||
err = c.WriteToFile("/tmp/8_image_quality_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/8_image_quality_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/8_image_quality_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func createPdf4Optimization(t *testing.T) *Creator {
|
||||
c := New()
|
||||
|
||||
p := NewParagraph("Test text1")
|
||||
// Change to times bold font (default is helvetica).
|
||||
font, err := model.NewStandard14Font(model.CourierBold)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
p.SetFont(font)
|
||||
p.SetPos(15, 15)
|
||||
_ = c.Draw(p)
|
||||
|
||||
imgData, err := ioutil.ReadFile(testImageFile1)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
img, err := NewImageFromData(imgData)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
img.SetPos(0, 100)
|
||||
img.ScaleToWidth(1.0 * c.Width())
|
||||
|
||||
err = c.Draw(img)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
img1, err := NewImageFromData(imgData)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
img1.SetPos(0, 200)
|
||||
img1.ScaleToWidth(1.0 * c.Width())
|
||||
|
||||
err = c.Draw(img1)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
imgData2, err := ioutil.ReadFile(testImageFile1)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
img2, err := NewImageFromData(imgData2)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
img2.SetPos(0, 500)
|
||||
img2.ScaleToWidth(1.0 * c.Width())
|
||||
|
||||
c.NewPage()
|
||||
p = NewParagraph("Test text2")
|
||||
// Change to times bold font (default is helvetica).
|
||||
font, err = model.NewStandard14Font(model.Helvetica)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
p.SetFont(font)
|
||||
p.SetPos(15, 15)
|
||||
_ = c.Draw(p)
|
||||
|
||||
err = c.Draw(img2)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
return nil
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// TestOptimizeUseObjectStreams tests optimizing PDFs to reduce output file size.
|
||||
func TestOptimizeUseObjectStreams(t *testing.T) {
|
||||
c := createPdf4Optimization(t)
|
||||
|
||||
err := c.WriteToFile("/tmp/9_use_object_streams_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c = createPdf4Optimization(t)
|
||||
c.SetOptimizer(optimize.New(optimize.Options{UseObjectStreams: true}))
|
||||
|
||||
err = c.WriteToFile("/tmp/9_use_object_streams_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/9_use_object_streams_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/9_use_object_streams_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestCombineDuplicateDirectObjects tests optimizing PDFs to reduce output file size.
|
||||
func TestCombineDuplicateDirectObjects(t *testing.T) {
|
||||
|
||||
createDoc := func() *Creator {
|
||||
c := New()
|
||||
|
||||
ch1 := c.NewChapter("Introduction")
|
||||
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
|
||||
subchap1.SetMargins(0, 0, 5, 0)
|
||||
|
||||
//subCh1 := NewSubchapter(ch1, "Workflow")
|
||||
|
||||
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " +
|
||||
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
|
||||
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " +
|
||||
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
|
||||
"mollit anim id est laborum.")
|
||||
p.SetTextAlignment(TextAlignmentJustify)
|
||||
p.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 7; j++ {
|
||||
subchap1.Add(p)
|
||||
}
|
||||
|
||||
subchap2 := c.NewSubchapter(ch1, "Mechanism")
|
||||
subchap2.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 15; j++ {
|
||||
subchap2.Add(p)
|
||||
}
|
||||
|
||||
subchap3 := c.NewSubchapter(ch1, "Discussion")
|
||||
subchap3.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 19; j++ {
|
||||
subchap3.Add(p)
|
||||
}
|
||||
|
||||
subchap4 := c.NewSubchapter(ch1, "Conclusion")
|
||||
subchap4.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 23; j++ {
|
||||
subchap4.Add(p)
|
||||
}
|
||||
|
||||
c.Draw(ch1)
|
||||
|
||||
for i := 0; i < 50; i++ {
|
||||
ch2 := c.NewChapter("References")
|
||||
for j := 0; j < 13; j++ {
|
||||
ch2.Add(p)
|
||||
}
|
||||
|
||||
c.Draw(ch2)
|
||||
}
|
||||
|
||||
// Set a function to create the front Page.
|
||||
c.CreateFrontPage(func(args FrontpageFunctionArgs) {
|
||||
p := NewParagraph("Example Report")
|
||||
p.SetWidth(c.Width())
|
||||
p.SetTextAlignment(TextAlignmentCenter)
|
||||
p.SetFontSize(32)
|
||||
p.SetPos(0, 300)
|
||||
c.Draw(p)
|
||||
|
||||
p.SetFontSize(22)
|
||||
p.SetText("Example Report Data Results")
|
||||
p.SetPos(0, 340)
|
||||
c.Draw(p)
|
||||
})
|
||||
|
||||
// Set a function to create the table of contents.
|
||||
c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) {
|
||||
ch := c.NewChapter("Table of contents")
|
||||
ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5))
|
||||
ch.GetHeading().SetFontSize(28)
|
||||
ch.GetHeading().SetMargins(0, 0, 0, 30)
|
||||
|
||||
table := NewTable(2)
|
||||
// Default, equal column sizes (4x0.25)...
|
||||
table.SetColumnWidths(0.9, 0.1)
|
||||
|
||||
for _, entry := range toc.entries {
|
||||
// Col 1. Chapter number, title.
|
||||
var str string
|
||||
if entry.Subchapter == 0 {
|
||||
str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title)
|
||||
} else {
|
||||
str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title)
|
||||
}
|
||||
p := NewParagraph(str)
|
||||
p.SetFontSize(14)
|
||||
cell := table.NewCell()
|
||||
cell.SetContent(p)
|
||||
// Set the paragraph width to the cell width.
|
||||
p.SetWidth(cell.Width(c.Context()))
|
||||
table.SetRowHeight(table.CurRow(), p.Height()*1.2)
|
||||
|
||||
// Col 1. Page number.
|
||||
p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber))
|
||||
p.SetFontSize(14)
|
||||
cell = table.NewCell()
|
||||
cell.SetContent(p)
|
||||
}
|
||||
err := ch.Add(table)
|
||||
if err != nil {
|
||||
fmt.Printf("Error adding table: %v\n", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ch, nil
|
||||
})
|
||||
|
||||
addHeadersAndFooters(c)
|
||||
return c
|
||||
}
|
||||
|
||||
c := createDoc()
|
||||
|
||||
err := c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c = createDoc()
|
||||
c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateDirectObjects: true}))
|
||||
|
||||
err = c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestOptimizeImagePPI tests optimizing PDFs to reduce output file size.
|
||||
func TestOptimizeImagePPI(t *testing.T) {
|
||||
c := New()
|
||||
|
||||
imgDataJpeg, err := ioutil.ReadFile(testImageFile1)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
imgJpeg, err := NewImageFromData(imgDataJpeg)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// JPEG encoder (DCT) with quality factor 100.
|
||||
encoder := core.NewDCTEncoder()
|
||||
encoder.Quality = 100
|
||||
encoder.Width = int(imgJpeg.Width())
|
||||
encoder.Height = int(imgJpeg.Height())
|
||||
imgJpeg.SetEncoder(encoder)
|
||||
|
||||
imgJpeg.SetPos(250, 350)
|
||||
imgJpeg.Scale(0.25, 0.25)
|
||||
|
||||
err = c.Draw(imgJpeg)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.NewPage()
|
||||
|
||||
imgData, err := ioutil.ReadFile(testImageFile1)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
img, err := NewImageFromData(imgData)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
img.SetPos(0, 100)
|
||||
img.ScaleToWidth(0.1 * c.Width())
|
||||
|
||||
err = c.Draw(img)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
err = c.Draw(imgJpeg)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = c.WriteToFile("/tmp/11_image_ppi_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.SetOptimizer(optimize.New(optimize.Options{ImageUpperPPI: 144}))
|
||||
|
||||
err = c.WriteToFile("/tmp/11_image_ppi_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/11_image_ppi_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/11_image_ppi_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestCombineIdenticalIndirectObjects tests optimizing PDFs to reduce output file size.
|
||||
func TestCombineIdenticalIndirectObjects(t *testing.T) {
|
||||
c := New()
|
||||
|
||||
ch1 := c.NewChapter("Introduction")
|
||||
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
|
||||
subchap1.SetMargins(0, 0, 5, 0)
|
||||
|
||||
//subCh1 := NewSubchapter(ch1, "Workflow")
|
||||
|
||||
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " +
|
||||
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
|
||||
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " +
|
||||
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
|
||||
"mollit anim id est laborum.")
|
||||
p.SetTextAlignment(TextAlignmentJustify)
|
||||
p.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 5; j++ {
|
||||
subchap1.Add(p)
|
||||
}
|
||||
|
||||
subchap2 := c.NewSubchapter(ch1, "Mechanism")
|
||||
subchap2.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 15; j++ {
|
||||
subchap2.Add(p)
|
||||
}
|
||||
|
||||
subchap3 := c.NewSubchapter(ch1, "Discussion")
|
||||
subchap3.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 19; j++ {
|
||||
subchap3.Add(p)
|
||||
}
|
||||
|
||||
subchap4 := c.NewSubchapter(ch1, "Conclusion")
|
||||
subchap4.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 23; j++ {
|
||||
subchap4.Add(p)
|
||||
}
|
||||
|
||||
c.Draw(ch1)
|
||||
|
||||
for i := 0; i < 50; i++ {
|
||||
ch2 := c.NewChapter("References")
|
||||
for j := 0; j < 13; j++ {
|
||||
ch2.Add(p)
|
||||
}
|
||||
|
||||
c.Draw(ch2)
|
||||
}
|
||||
|
||||
// Set a function to create the front Page.
|
||||
c.CreateFrontPage(func(args FrontpageFunctionArgs) {
|
||||
p := NewParagraph("Example Report")
|
||||
p.SetWidth(c.Width())
|
||||
p.SetTextAlignment(TextAlignmentCenter)
|
||||
p.SetFontSize(32)
|
||||
p.SetPos(0, 300)
|
||||
c.Draw(p)
|
||||
|
||||
p.SetFontSize(22)
|
||||
p.SetText("Example Report Data Results")
|
||||
p.SetPos(0, 340)
|
||||
c.Draw(p)
|
||||
})
|
||||
|
||||
// Set a function to create the table of contents.
|
||||
c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) {
|
||||
ch := c.NewChapter("Table of contents")
|
||||
ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5))
|
||||
ch.GetHeading().SetFontSize(28)
|
||||
ch.GetHeading().SetMargins(0, 0, 0, 30)
|
||||
|
||||
table := NewTable(2)
|
||||
// Default, equal column sizes (4x0.25)...
|
||||
table.SetColumnWidths(0.9, 0.1)
|
||||
|
||||
for _, entry := range toc.entries {
|
||||
// Col 1. Chapter number, title.
|
||||
var str string
|
||||
if entry.Subchapter == 0 {
|
||||
str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title)
|
||||
} else {
|
||||
str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title)
|
||||
}
|
||||
p := NewParagraph(str)
|
||||
p.SetFontSize(14)
|
||||
cell := table.NewCell()
|
||||
cell.SetContent(p)
|
||||
// Set the paragraph width to the cell width.
|
||||
p.SetWidth(cell.Width(c.Context()))
|
||||
table.SetRowHeight(table.CurRow(), p.Height()*1.2)
|
||||
|
||||
// Col 1. Page number.
|
||||
p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber))
|
||||
p.SetFontSize(14)
|
||||
cell = table.NewCell()
|
||||
cell.SetContent(p)
|
||||
}
|
||||
err := ch.Add(table)
|
||||
if err != nil {
|
||||
fmt.Printf("Error adding table: %v\n", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ch, nil
|
||||
})
|
||||
|
||||
addHeadersAndFooters(c)
|
||||
|
||||
err := c.WriteToFile("/tmp/12_identical_indirect_objects_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.SetOptimizer(optimize.New(optimize.Options{CombineIdenticalIndirectObjects: true}))
|
||||
|
||||
err = c.WriteToFile("/tmp/12_identical_indirect_objects_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/12_identical_indirect_objects_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/12_identical_indirect_objects_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestCompressStreams tests optimizing PDFs to reduce output file size.
|
||||
func TestCompressStreams(t *testing.T) {
|
||||
|
||||
createDoc := func() *Creator {
|
||||
c := New()
|
||||
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt" +
|
||||
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
|
||||
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore" +
|
||||
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
|
||||
"mollit anim id est laborum.")
|
||||
|
||||
p.SetMargins(0, 0, 5, 0)
|
||||
c.Draw(p)
|
||||
//c.NewPage()
|
||||
|
||||
page := c.pages[0]
|
||||
page.AddContentStreamByString(`BT
|
||||
/Arial 56 Tf
|
||||
20 600 Td
|
||||
(The multiline example text)Tj
|
||||
/Arial 30 Tf
|
||||
0 30 Td
|
||||
60 TL
|
||||
(example text)'
|
||||
(example text)'
|
||||
(example text)'
|
||||
(example text)'
|
||||
(example text)'
|
||||
(example text)'
|
||||
(example text)'
|
||||
(example text)'
|
||||
ET`)
|
||||
return c
|
||||
}
|
||||
|
||||
c := createDoc()
|
||||
|
||||
err := c.WriteToFile("/tmp/13_compress_streams_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c = createDoc()
|
||||
c.SetOptimizer(optimize.New(optimize.Options{CompressStreams: true}))
|
||||
|
||||
err = c.WriteToFile("/tmp/13_compress_streams_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/13_compress_streams_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/13_compress_streams_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestAllOptimizations tests optimizing PDFs to reduce output file size.
|
||||
func TestAllOptimizations(t *testing.T) {
|
||||
|
||||
createDoc := func() *Creator {
|
||||
c := New()
|
||||
|
||||
ch1 := c.NewChapter("Introduction")
|
||||
subchap1 := c.NewSubchapter(ch1, "The fundamentals")
|
||||
subchap1.SetMargins(0, 0, 5, 0)
|
||||
|
||||
//subCh1 := NewSubchapter(ch1, "Workflow")
|
||||
|
||||
p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " +
|
||||
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " +
|
||||
"aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " +
|
||||
"eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " +
|
||||
"mollit anim id est laborum.")
|
||||
p.SetTextAlignment(TextAlignmentJustify)
|
||||
p.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 7; j++ {
|
||||
subchap1.Add(p)
|
||||
}
|
||||
|
||||
subchap2 := c.NewSubchapter(ch1, "Mechanism")
|
||||
subchap2.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 15; j++ {
|
||||
subchap2.Add(p)
|
||||
}
|
||||
|
||||
subchap3 := c.NewSubchapter(ch1, "Discussion")
|
||||
subchap3.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 19; j++ {
|
||||
subchap3.Add(p)
|
||||
}
|
||||
|
||||
subchap4 := c.NewSubchapter(ch1, "Conclusion")
|
||||
subchap4.SetMargins(0, 0, 5, 0)
|
||||
for j := 0; j < 23; j++ {
|
||||
subchap4.Add(p)
|
||||
}
|
||||
|
||||
c.Draw(ch1)
|
||||
|
||||
for i := 0; i < 50; i++ {
|
||||
ch2 := c.NewChapter("References")
|
||||
for j := 0; j < 13; j++ {
|
||||
ch2.Add(p)
|
||||
}
|
||||
|
||||
c.Draw(ch2)
|
||||
}
|
||||
|
||||
// Set a function to create the front Page.
|
||||
c.CreateFrontPage(func(args FrontpageFunctionArgs) {
|
||||
p := NewParagraph("Example Report")
|
||||
p.SetWidth(c.Width())
|
||||
p.SetTextAlignment(TextAlignmentCenter)
|
||||
p.SetFontSize(32)
|
||||
p.SetPos(0, 300)
|
||||
c.Draw(p)
|
||||
|
||||
p.SetFontSize(22)
|
||||
p.SetText("Example Report Data Results")
|
||||
p.SetPos(0, 340)
|
||||
c.Draw(p)
|
||||
})
|
||||
|
||||
// Set a function to create the table of contents.
|
||||
c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) {
|
||||
ch := c.NewChapter("Table of contents")
|
||||
ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5))
|
||||
ch.GetHeading().SetFontSize(28)
|
||||
ch.GetHeading().SetMargins(0, 0, 0, 30)
|
||||
|
||||
table := NewTable(2)
|
||||
// Default, equal column sizes (4x0.25)...
|
||||
table.SetColumnWidths(0.9, 0.1)
|
||||
|
||||
for _, entry := range toc.entries {
|
||||
// Col 1. Chapter number, title.
|
||||
var str string
|
||||
if entry.Subchapter == 0 {
|
||||
str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title)
|
||||
} else {
|
||||
str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title)
|
||||
}
|
||||
p := NewParagraph(str)
|
||||
p.SetFontSize(14)
|
||||
cell := table.NewCell()
|
||||
cell.SetContent(p)
|
||||
// Set the paragraph width to the cell width.
|
||||
p.SetWidth(cell.Width(c.Context()))
|
||||
table.SetRowHeight(table.CurRow(), p.Height()*1.2)
|
||||
|
||||
// Col 1. Page number.
|
||||
p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber))
|
||||
p.SetFontSize(14)
|
||||
cell = table.NewCell()
|
||||
cell.SetContent(p)
|
||||
}
|
||||
err := ch.Add(table)
|
||||
if err != nil {
|
||||
fmt.Printf("Error adding table: %v\n", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ch, nil
|
||||
})
|
||||
|
||||
addHeadersAndFooters(c)
|
||||
return c
|
||||
}
|
||||
|
||||
c := createDoc()
|
||||
|
||||
err := c.WriteToFile("/tmp/14_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
c = createDoc()
|
||||
c.SetOptimizer(optimize.New(optimize.Options{
|
||||
CombineDuplicateDirectObjects: true,
|
||||
CombineIdenticalIndirectObjects: true,
|
||||
ImageUpperPPI: 50.0,
|
||||
UseObjectStreams: true,
|
||||
ImageQuality: 50,
|
||||
CombineDuplicateStreams: true,
|
||||
CompressStreams: true,
|
||||
}))
|
||||
|
||||
err = c.WriteToFile("/tmp/14_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat("/tmp/14_not_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
fileInfoOptimized, err := os.Stat("/tmp/14_optimized.pdf")
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
if fileInfoOptimized.Size() >= fileInfo.Size() {
|
||||
t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size())
|
||||
}
|
||||
}
|
||||
|
@ -151,9 +151,9 @@ func NewPdfColorspaceFromPdfObject(obj PdfObject) (PdfColorspace, error) {
|
||||
return nil, errors.New("Type error")
|
||||
}
|
||||
|
||||
// determineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and
|
||||
// DetermineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and
|
||||
// an error on failure. If the colorspace was not found, will return an empty string.
|
||||
func determineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) {
|
||||
func DetermineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) {
|
||||
var csName *PdfObjectName
|
||||
var csArray *PdfObjectArray
|
||||
|
||||
@ -2179,7 +2179,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS
|
||||
obj = array.Get(1)
|
||||
|
||||
// Base cs cannot be another /Indexed or /Pattern space.
|
||||
baseName, err := determineColorspaceNameFromPdfObject(obj)
|
||||
baseName, err := DetermineColorspaceNameFromPdfObject(obj)
|
||||
if baseName == "Indexed" || baseName == "Pattern" {
|
||||
common.Log.Debug("Error: Indexed colorspace cannot have Indexed/Pattern CS as base (%v)", baseName)
|
||||
return nil, ErrRangeError
|
||||
|
34
pdf/model/optimize/chain.go
Normal file
34
pdf/model/optimize/chain.go
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
"github.com/unidoc/unidoc/pdf/model"
|
||||
)
|
||||
|
||||
// Chain allows to use sequence of optimizers.
|
||||
// It implements interface model.Optimizer.
|
||||
type Chain struct {
|
||||
optimizers []model.Optimizer
|
||||
}
|
||||
|
||||
// Append appends optimizers to the chain.
|
||||
func (c *Chain) Append(optimizers ...model.Optimizer) {
|
||||
c.optimizers = append(c.optimizers, optimizers...)
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (c *Chain) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
optimizedObjects = objects
|
||||
for _, optimizer := range c.optimizers {
|
||||
optimizedObjects, err = optimizer.Optimize(optimizedObjects)
|
||||
if err != nil {
|
||||
return optimizedObjects, err
|
||||
}
|
||||
}
|
||||
return optimizedObjects, nil
|
||||
}
|
71
pdf/model/optimize/combine_duplicate_direct_objects.go
Normal file
71
pdf/model/optimize/combine_duplicate_direct_objects.go
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// CombineDuplicateDirectObjects combines duplicated direct objects by its data hash.
|
||||
// It implements interface model.Optimizer.
|
||||
type CombineDuplicateDirectObjects struct {
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (dup *CombineDuplicateDirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
|
||||
dictsByHash := make(map[string][]*core.PdfObjectDictionary)
|
||||
var processDict func(pDict *core.PdfObjectDictionary)
|
||||
processDict = func(pDict *core.PdfObjectDictionary) {
|
||||
|
||||
for _, key := range pDict.Keys() {
|
||||
obj := pDict.Get(key)
|
||||
if dict, isDictObj := obj.(*core.PdfObjectDictionary); isDictObj {
|
||||
hasher := md5.New()
|
||||
hasher.Write([]byte(dict.DefaultWriteString()))
|
||||
|
||||
hash := string(hasher.Sum(nil))
|
||||
dictsByHash[hash] = append(dictsByHash[hash], dict)
|
||||
processDict(dict)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, obj := range objects {
|
||||
ind, isIndirectObj := obj.(*core.PdfIndirectObject)
|
||||
if !isIndirectObj {
|
||||
continue
|
||||
}
|
||||
if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj {
|
||||
processDict(dict)
|
||||
}
|
||||
}
|
||||
|
||||
indirects := make([]core.PdfObject, 0, len(dictsByHash))
|
||||
replaceTable := make(map[core.PdfObject]core.PdfObject)
|
||||
|
||||
for _, dicts := range dictsByHash {
|
||||
if len(dicts) < 2 {
|
||||
continue
|
||||
}
|
||||
dict := core.MakeDict()
|
||||
dict.Merge(dicts[0])
|
||||
ind := core.MakeIndirectObject(dict)
|
||||
indirects = append(indirects, ind)
|
||||
for i := 0; i < len(dicts); i++ {
|
||||
dict := dicts[i]
|
||||
replaceTable[dict] = ind
|
||||
}
|
||||
}
|
||||
|
||||
optimizedObjects = make([]core.PdfObject, len(objects))
|
||||
copy(optimizedObjects, objects)
|
||||
optimizedObjects = append(indirects, optimizedObjects...)
|
||||
replaceObjectsInPlace(optimizedObjects, replaceTable)
|
||||
return optimizedObjects, nil
|
||||
}
|
53
pdf/model/optimize/combine_duplicate_streams.go
Normal file
53
pdf/model/optimize/combine_duplicate_streams.go
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// CombineDuplicateStreams combines duplicated streams by its data hash.
|
||||
// It implements interface model.Optimizer.
|
||||
type CombineDuplicateStreams struct {
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (dup *CombineDuplicateStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
replaceTable := make(map[core.PdfObject]core.PdfObject)
|
||||
toDelete := make(map[core.PdfObject]struct{})
|
||||
streamsByHash := make(map[string][]*core.PdfObjectStream)
|
||||
for _, obj := range objects {
|
||||
if stream, isStreamObj := obj.(*core.PdfObjectStream); isStreamObj {
|
||||
hasher := md5.New()
|
||||
hasher.Write([]byte(stream.Stream))
|
||||
hash := string(hasher.Sum(nil))
|
||||
streamsByHash[hash] = append(streamsByHash[hash], stream)
|
||||
}
|
||||
}
|
||||
for _, streams := range streamsByHash {
|
||||
if len(streams) < 2 {
|
||||
continue
|
||||
}
|
||||
firstStream := streams[0]
|
||||
for i := 1; i < len(streams); i++ {
|
||||
stream := streams[i]
|
||||
replaceTable[stream] = firstStream
|
||||
toDelete[stream] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete))
|
||||
for _, obj := range objects {
|
||||
if _, found := toDelete[obj]; found {
|
||||
continue
|
||||
}
|
||||
optimizedObjects = append(optimizedObjects, obj)
|
||||
}
|
||||
replaceObjectsInPlace(optimizedObjects, replaceTable)
|
||||
return optimizedObjects, nil
|
||||
}
|
64
pdf/model/optimize/combine_identical_indirect_objects.go
Normal file
64
pdf/model/optimize/combine_identical_indirect_objects.go
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// CombineIdenticalIndirectObjects combines identical indirect objects.
|
||||
// It implements interface model.Optimizer.
|
||||
type CombineIdenticalIndirectObjects struct {
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (c *CombineIdenticalIndirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
replaceTable := make(map[core.PdfObject]core.PdfObject)
|
||||
toDelete := make(map[core.PdfObject]struct{})
|
||||
|
||||
indWithDictByHash := make(map[string][]*core.PdfIndirectObject)
|
||||
|
||||
for _, obj := range objects {
|
||||
ind, isIndirectObj := obj.(*core.PdfIndirectObject)
|
||||
if !isIndirectObj {
|
||||
continue
|
||||
}
|
||||
if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj {
|
||||
if name, isName := dict.Get("Type").(*core.PdfObjectName); isName && *name == "Page" {
|
||||
continue
|
||||
}
|
||||
hasher := md5.New()
|
||||
hasher.Write([]byte(dict.DefaultWriteString()))
|
||||
|
||||
hash := string(hasher.Sum(nil))
|
||||
indWithDictByHash[hash] = append(indWithDictByHash[hash], ind)
|
||||
}
|
||||
}
|
||||
|
||||
for _, dicts := range indWithDictByHash {
|
||||
if len(dicts) < 2 {
|
||||
continue
|
||||
}
|
||||
firstDict := dicts[0]
|
||||
for i := 1; i < len(dicts); i++ {
|
||||
dict := dicts[i]
|
||||
replaceTable[dict] = firstDict
|
||||
toDelete[dict] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete))
|
||||
for _, obj := range objects {
|
||||
if _, found := toDelete[obj]; found {
|
||||
continue
|
||||
}
|
||||
optimizedObjects = append(optimizedObjects, obj)
|
||||
}
|
||||
replaceObjectsInPlace(optimizedObjects, replaceTable)
|
||||
return optimizedObjects, nil
|
||||
}
|
45
pdf/model/optimize/compress_streams.go
Normal file
45
pdf/model/optimize/compress_streams.go
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// CompressStreams compresses uncompressed streams.
|
||||
// It implements interface model.Optimizer.
|
||||
type CompressStreams struct {
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (c *CompressStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
optimizedObjects = make([]core.PdfObject, len(objects))
|
||||
copy(optimizedObjects, objects)
|
||||
for _, obj := range objects {
|
||||
stream, isStreamObj := core.GetStream(obj)
|
||||
if !isStreamObj {
|
||||
continue
|
||||
}
|
||||
if _, found := core.GetName(stream.PdfObjectDictionary.Get("Filter")); found {
|
||||
continue
|
||||
}
|
||||
encoder := core.NewLZWEncoder()
|
||||
encoder.EarlyChange = 0
|
||||
var data []byte
|
||||
data, err = encoder.EncodeBytes(stream.Stream)
|
||||
if err != nil {
|
||||
return optimizedObjects, err
|
||||
}
|
||||
dict := encoder.MakeStreamDict()
|
||||
// compare compressed and uncompressed sizes
|
||||
if len(data)+len(dict.DefaultWriteString()) < len(stream.Stream) {
|
||||
stream.Stream = data
|
||||
stream.PdfObjectDictionary.Merge(dict)
|
||||
stream.PdfObjectDictionary.Set("Length", core.MakeInteger(int64(len(stream.Stream))))
|
||||
}
|
||||
}
|
||||
return optimizedObjects, nil
|
||||
}
|
138
pdf/model/optimize/image.go
Normal file
138
pdf/model/optimize/image.go
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
"github.com/unidoc/unidoc/pdf/model"
|
||||
)
|
||||
|
||||
// Image optimizes images by rewrite images into JPEG format with quality equals to ImageQuality.
|
||||
// TODO(a5i): Add support for inline images.
|
||||
// It implements interface model.Optimizer.
|
||||
type Image struct {
|
||||
ImageQuality int
|
||||
}
|
||||
|
||||
// imageInfo is information about an image.
|
||||
type imageInfo struct {
|
||||
ColorSpace core.PdfObjectName
|
||||
BitsPerComponent int
|
||||
ColorComponents int
|
||||
Width int
|
||||
Height int
|
||||
Stream *core.PdfObjectStream
|
||||
PPI float64
|
||||
}
|
||||
|
||||
// findImages returns images from objects.
|
||||
func findImages(objects []core.PdfObject) []*imageInfo {
|
||||
subTypeKey := core.PdfObjectName("Subtype")
|
||||
streamProcessed := make(map[*core.PdfObjectStream]struct{})
|
||||
var err error
|
||||
var images []*imageInfo
|
||||
for _, obj := range objects {
|
||||
stream, ok := core.GetStream(obj)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, found := streamProcessed[stream]; found {
|
||||
continue
|
||||
}
|
||||
streamProcessed[stream] = struct{}{}
|
||||
subTypeValue := stream.PdfObjectDictionary.Get(subTypeKey)
|
||||
subType, ok := core.GetName(subTypeValue)
|
||||
if !ok || string(*subType) != "Image" {
|
||||
continue
|
||||
}
|
||||
img := &imageInfo{BitsPerComponent: 8, Stream: stream}
|
||||
if img.ColorSpace, err = model.DetermineColorspaceNameFromPdfObject(stream.PdfObjectDictionary.Get("ColorSpace")); err != nil {
|
||||
common.Log.Error("Error determine color space %s", err)
|
||||
continue
|
||||
}
|
||||
if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("BitsPerComponent")); ok {
|
||||
img.BitsPerComponent = val
|
||||
}
|
||||
if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Width")); ok {
|
||||
img.Width = val
|
||||
}
|
||||
if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Height")); ok {
|
||||
img.Height = val
|
||||
}
|
||||
|
||||
switch img.ColorSpace {
|
||||
case "DeviceRGB":
|
||||
img.ColorComponents = 3
|
||||
case "DeviceGray":
|
||||
img.ColorComponents = 1
|
||||
default:
|
||||
common.Log.Warning("Optimization is not supported for color space %s", img.ColorSpace)
|
||||
continue
|
||||
}
|
||||
images = append(images, img)
|
||||
}
|
||||
return images
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (i *Image) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
if i.ImageQuality <= 0 {
|
||||
return objects, nil
|
||||
}
|
||||
images := findImages(objects)
|
||||
if len(images) == 0 {
|
||||
return objects, nil
|
||||
}
|
||||
|
||||
replaceTable := make(map[core.PdfObject]core.PdfObject)
|
||||
imageMasks := make(map[core.PdfObject]struct{})
|
||||
for _, img := range images {
|
||||
obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))
|
||||
imageMasks[obj] = struct{}{}
|
||||
}
|
||||
|
||||
for index, img := range images {
|
||||
stream := img.Stream
|
||||
if _, isMask := imageMasks[stream]; isMask {
|
||||
continue
|
||||
}
|
||||
streamEncoder, err := core.NewEncoderFromStream(stream)
|
||||
if err != nil {
|
||||
common.Log.Warning("Error get encoder for the image stream %s")
|
||||
continue
|
||||
}
|
||||
data, err := streamEncoder.DecodeStream(stream)
|
||||
if err != nil {
|
||||
common.Log.Warning("Error decode the image stream %s")
|
||||
continue
|
||||
}
|
||||
encoder := core.NewDCTEncoder()
|
||||
encoder.ColorComponents = img.ColorComponents
|
||||
encoder.Quality = i.ImageQuality
|
||||
encoder.BitsPerComponent = img.BitsPerComponent
|
||||
encoder.Width = img.Width
|
||||
encoder.Height = img.Height
|
||||
streamData, err := encoder.EncodeBytes(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newStream := &core.PdfObjectStream{Stream: streamData}
|
||||
newStream.PdfObjectReference = stream.PdfObjectReference
|
||||
newStream.PdfObjectDictionary = core.MakeDict()
|
||||
newStream.PdfObjectDictionary.Merge(stream.PdfObjectDictionary)
|
||||
fn := core.PdfObjectName(encoder.GetFilterName())
|
||||
newStream.PdfObjectDictionary.Set(core.PdfObjectName("Filter"), &fn)
|
||||
ln := core.PdfObjectInteger(int64(len(streamData)))
|
||||
newStream.PdfObjectDictionary.Set(core.PdfObjectName("Length"), &ln)
|
||||
replaceTable[stream] = newStream
|
||||
images[index].Stream = newStream
|
||||
}
|
||||
optimizedObjects = make([]core.PdfObject, len(objects))
|
||||
copy(optimizedObjects, objects)
|
||||
replaceObjectsInPlace(optimizedObjects, replaceTable)
|
||||
return optimizedObjects, nil
|
||||
}
|
203
pdf/model/optimize/image_ppi.go
Normal file
203
pdf/model/optimize/image_ppi.go
Normal file
@ -0,0 +1,203 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"image"
|
||||
"math"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/pdf/contentstream"
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
"github.com/unidoc/unidoc/pdf/model"
|
||||
"golang.org/x/image/draw"
|
||||
)
|
||||
|
||||
// ImagePPI optimizes images by scaling images such that the PPI (pixels per inch) is never higher than ImageUpperPPI.
|
||||
// TODO(a5i): Add support for inline images.
|
||||
// It implements interface model.Optimizer.
|
||||
type ImagePPI struct {
|
||||
ImageUpperPPI float64
|
||||
}
|
||||
|
||||
func scaleImage(stream *core.PdfObjectStream, scale float64) error {
|
||||
xImg, err := model.NewXObjectImageFromStream(stream)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
i, err := xImg.ToImage()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
goimg, err := i.ToGoImage()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newW := int(math.RoundToEven(float64(i.Width) * scale))
|
||||
newH := int(math.RoundToEven(float64(i.Height) * scale))
|
||||
rect := image.Rect(0, 0, newW, newH)
|
||||
var newImage draw.Image
|
||||
switch xImg.ColorSpace.String() {
|
||||
case "DeviceRGB":
|
||||
newImage = image.NewRGBA(rect)
|
||||
case "DeviceGray":
|
||||
newImage = image.NewGray(rect)
|
||||
default:
|
||||
return fmt.Errorf("Optimization is not supported for color space %s", xImg.ColorSpace.String())
|
||||
}
|
||||
draw.CatmullRom.Scale(newImage, newImage.Bounds(), goimg, goimg.Bounds(), draw.Over, &draw.Options{})
|
||||
i, err = model.ImageHandling.NewImageFromGoImage(newImage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
xImg.SetImage(i, xImg.ColorSpace)
|
||||
xImg.ToPdfObject()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (i *ImagePPI) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
if i.ImageUpperPPI <= 0 {
|
||||
return objects, nil
|
||||
}
|
||||
images := findImages(objects)
|
||||
if len(images) == 0 {
|
||||
return objects, nil
|
||||
}
|
||||
imageMasks := make(map[core.PdfObject]struct{})
|
||||
for _, img := range images {
|
||||
obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))
|
||||
imageMasks[obj] = struct{}{}
|
||||
}
|
||||
imageByStream := make(map[*core.PdfObjectStream]*imageInfo)
|
||||
for _, img := range images {
|
||||
imageByStream[img.Stream] = img
|
||||
}
|
||||
var catalog *core.PdfObjectDictionary
|
||||
for _, obj := range objects {
|
||||
if dict, isDict := core.GetDict(obj); catalog == nil && isDict {
|
||||
if tp, ok := core.GetName(dict.Get(core.PdfObjectName("Type"))); ok && *tp == "Catalog" {
|
||||
catalog = dict
|
||||
}
|
||||
}
|
||||
}
|
||||
if catalog == nil {
|
||||
return objects, nil
|
||||
}
|
||||
pages, hasPages := core.GetDict(catalog.Get(core.PdfObjectName("Pages")))
|
||||
if !hasPages {
|
||||
return objects, nil
|
||||
}
|
||||
kids, hasKids := core.GetArray(pages.Get(core.PdfObjectName("Kids")))
|
||||
if !hasKids {
|
||||
return objects, nil
|
||||
}
|
||||
imageByName := make(map[string]*imageInfo)
|
||||
|
||||
for _, pageObj := range kids.Elements() {
|
||||
page, ok := core.GetDict(pageObj)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
contents, hasContents := core.GetArray(page.Get("Contents"))
|
||||
if !hasContents {
|
||||
continue
|
||||
}
|
||||
resources, hasResources := core.GetDict(page.Get("Resources"))
|
||||
if !hasResources {
|
||||
continue
|
||||
}
|
||||
xObject, hasXObject := core.GetDict(resources.Get("XObject"))
|
||||
if !hasXObject {
|
||||
continue
|
||||
}
|
||||
xObjectKeys := xObject.Keys()
|
||||
for _, key := range xObjectKeys {
|
||||
if stream, isStream := core.GetStream(xObject.Get(key)); isStream {
|
||||
if img, found := imageByStream[stream]; found {
|
||||
imageByName[string(key)] = img
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, obj := range contents.Elements() {
|
||||
if stream, isStream := core.GetStream(obj); isStream {
|
||||
streamEncoder, err := core.NewEncoderFromStream(stream)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
data, err := streamEncoder.DecodeStream(stream)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p := contentstream.NewContentStreamParser(string(data))
|
||||
operations, err := p.Parse()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
scaleX, scaleY := 1.0, 1.0
|
||||
for _, operation := range *operations {
|
||||
if operation.Operand == "Q" {
|
||||
scaleX, scaleY = 1.0, 1.0
|
||||
}
|
||||
if operation.Operand == "cm" && len(operation.Params) == 6 {
|
||||
if sx, ok := core.GetFloatVal(operation.Params[0]); ok {
|
||||
scaleX = scaleX * sx
|
||||
}
|
||||
if sy, ok := core.GetFloatVal(operation.Params[3]); ok {
|
||||
scaleY = scaleY * sy
|
||||
}
|
||||
if sx, ok := core.GetIntVal(operation.Params[0]); ok {
|
||||
scaleX = scaleX * float64(sx)
|
||||
}
|
||||
if sy, ok := core.GetIntVal(operation.Params[3]); ok {
|
||||
scaleY = scaleY * float64(sy)
|
||||
}
|
||||
}
|
||||
if operation.Operand == "Do" && len(operation.Params) == 1 {
|
||||
name, ok := core.GetName(operation.Params[0])
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if img, found := imageByName[string(*name)]; found {
|
||||
wInch, hInch := scaleX/72.0, scaleY/72.0
|
||||
xPPI, yPPI := float64(img.Width)/wInch, float64(img.Height)/hInch
|
||||
if wInch == 0 || hInch == 0 {
|
||||
xPPI = 72.0
|
||||
yPPI = 72.0
|
||||
}
|
||||
img.PPI = math.Max(img.PPI, xPPI)
|
||||
img.PPI = math.Max(img.PPI, yPPI)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, img := range images {
|
||||
if _, isMask := imageMasks[img.Stream]; isMask {
|
||||
continue
|
||||
}
|
||||
if img.PPI <= i.ImageUpperPPI {
|
||||
continue
|
||||
}
|
||||
scale := i.ImageUpperPPI / img.PPI
|
||||
if err := scaleImage(img.Stream, scale); err != nil {
|
||||
common.Log.Debug("Error scale image keep original image: %s", err)
|
||||
} else {
|
||||
if mask, hasMask := core.GetStream(img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))); hasMask {
|
||||
if err := scaleImage(mask, scale); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return objects, nil
|
||||
}
|
40
pdf/model/optimize/object_streams.go
Normal file
40
pdf/model/optimize/object_streams.go
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// ObjectStreams groups PDF objects to object streams.
|
||||
// It implements interface model.Optimizer.
|
||||
type ObjectStreams struct {
|
||||
}
|
||||
|
||||
// Optimize optimizes PDF objects to decrease PDF size.
|
||||
func (o *ObjectStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) {
|
||||
objStream := &core.PdfObjectStreams{}
|
||||
skippedObjects := make([]core.PdfObject, 0, len(objects))
|
||||
for _, obj := range objects {
|
||||
if io, isIndirectObj := obj.(*core.PdfIndirectObject); isIndirectObj && io.GenerationNumber == 0 {
|
||||
objStream.Append(obj)
|
||||
} else {
|
||||
skippedObjects = append(skippedObjects, obj)
|
||||
}
|
||||
}
|
||||
if objStream.Len() == 0 {
|
||||
return skippedObjects, nil
|
||||
}
|
||||
|
||||
optimizedObjects = make([]core.PdfObject, 0, len(skippedObjects)+objStream.Len()+1)
|
||||
if objStream.Len() > 1 {
|
||||
optimizedObjects = append(optimizedObjects, objStream)
|
||||
}
|
||||
optimizedObjects = append(optimizedObjects, objStream.Elements()...)
|
||||
optimizedObjects = append(optimizedObjects, skippedObjects...)
|
||||
|
||||
return optimizedObjects, nil
|
||||
}
|
84
pdf/model/optimize/optimizer.go
Normal file
84
pdf/model/optimize/optimizer.go
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// New creates a optimizers chain from options.
|
||||
func New(options Options) *Chain {
|
||||
chain := new(Chain)
|
||||
if options.ImageUpperPPI > 0 {
|
||||
imageOptimizer := new(ImagePPI)
|
||||
imageOptimizer.ImageUpperPPI = options.ImageUpperPPI
|
||||
chain.Append(imageOptimizer)
|
||||
}
|
||||
if options.ImageQuality > 0 {
|
||||
imageOptimizer := new(Image)
|
||||
imageOptimizer.ImageQuality = options.ImageQuality
|
||||
chain.Append(imageOptimizer)
|
||||
}
|
||||
if options.CombineDuplicateDirectObjects {
|
||||
chain.Append(new(CombineDuplicateDirectObjects))
|
||||
}
|
||||
if options.CombineDuplicateStreams {
|
||||
chain.Append(new(CombineDuplicateStreams))
|
||||
}
|
||||
if options.CombineIdenticalIndirectObjects {
|
||||
chain.Append(new(CombineIdenticalIndirectObjects))
|
||||
}
|
||||
if options.UseObjectStreams {
|
||||
chain.Append(new(ObjectStreams))
|
||||
}
|
||||
if options.CompressStreams {
|
||||
chain.Append(new(CompressStreams))
|
||||
}
|
||||
return chain
|
||||
}
|
||||
|
||||
// replaceObjectsInPlace replaces objects. objTo will be modified by the process.
|
||||
func replaceObjectsInPlace(objects []core.PdfObject, objTo map[core.PdfObject]core.PdfObject) {
|
||||
if objTo == nil || len(objTo) == 0 {
|
||||
return
|
||||
}
|
||||
for i, obj := range objects {
|
||||
if to, found := objTo[obj]; found {
|
||||
objects[i] = to
|
||||
continue
|
||||
}
|
||||
objTo[obj] = obj
|
||||
switch t := obj.(type) {
|
||||
case *core.PdfObjectArray:
|
||||
values := make([]core.PdfObject, t.Len())
|
||||
copy(values, t.Elements())
|
||||
replaceObjectsInPlace(values, objTo)
|
||||
for i, obj := range values {
|
||||
t.Set(i, obj)
|
||||
}
|
||||
case *core.PdfObjectStreams:
|
||||
replaceObjectsInPlace(t.Elements(), objTo)
|
||||
case *core.PdfObjectStream:
|
||||
values := []core.PdfObject{t.PdfObjectDictionary}
|
||||
replaceObjectsInPlace(values, objTo)
|
||||
t.PdfObjectDictionary = values[0].(*core.PdfObjectDictionary)
|
||||
case *core.PdfObjectDictionary:
|
||||
keys := t.Keys()
|
||||
values := make([]core.PdfObject, len(keys))
|
||||
for i, key := range keys {
|
||||
values[i] = t.Get(key)
|
||||
}
|
||||
replaceObjectsInPlace(values, objTo)
|
||||
for i, key := range keys {
|
||||
t.Set(key, values[i])
|
||||
}
|
||||
case *core.PdfIndirectObject:
|
||||
values := []core.PdfObject{t.PdfObject}
|
||||
replaceObjectsInPlace(values, objTo)
|
||||
t.PdfObject = values[0]
|
||||
}
|
||||
}
|
||||
}
|
17
pdf/model/optimize/options.go
Normal file
17
pdf/model/optimize/options.go
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package optimize
|
||||
|
||||
// Options describes PDF optimization parameters.
|
||||
type Options struct {
|
||||
CombineDuplicateStreams bool
|
||||
CombineDuplicateDirectObjects bool
|
||||
ImageUpperPPI float64
|
||||
ImageQuality int
|
||||
UseObjectStreams bool
|
||||
CombineIdenticalIndirectObjects bool
|
||||
CompressStreams bool
|
||||
}
|
18
pdf/model/optimizer.go
Normal file
18
pdf/model/optimizer.go
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package model
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// Optimizer is the interface that performs optimization of PDF object structure for output writing.
|
||||
//
|
||||
// Optimize receives a slice of input `objects`, performs optimization, including removing, replacing objects and
|
||||
// output the optimized slice of objects.
|
||||
type Optimizer interface {
|
||||
Optimize(objects []core.PdfObject) ([]core.PdfObject, error)
|
||||
}
|
@ -10,8 +10,10 @@ package model
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@ -25,6 +27,16 @@ import (
|
||||
"github.com/unidoc/unidoc/pdf/model/fonts"
|
||||
)
|
||||
|
||||
type crossReference struct {
|
||||
Type int
|
||||
// Type 1
|
||||
Offset int64
|
||||
Generation int64 // and Type 0
|
||||
// Type 2
|
||||
ObjectNumber int // and Type 0
|
||||
Index int
|
||||
}
|
||||
|
||||
var pdfCreator = ""
|
||||
|
||||
func getPdfProducer() string {
|
||||
@ -79,6 +91,9 @@ type PdfWriter struct {
|
||||
|
||||
// Forms.
|
||||
acroForm *PdfAcroForm
|
||||
|
||||
optimizer Optimizer
|
||||
crossReferenceMap map[int]crossReference
|
||||
}
|
||||
|
||||
// NewPdfWriter initializes a new PdfWriter.
|
||||
@ -132,6 +147,108 @@ func NewPdfWriter() PdfWriter {
|
||||
return w
|
||||
}
|
||||
|
||||
// copyObject creates deep copy of the Pdf object and
|
||||
// fills objectToObjectCopyMap to replace the old object to the copy of object if needed.
|
||||
// Parameter objectToObjectCopyMap is needed to replace object references to its copies.
|
||||
// Because many objects can contain references to another objects like pages to images.
|
||||
func copyObject(obj PdfObject, objectToObjectCopyMap map[PdfObject]PdfObject) PdfObject {
|
||||
if newObj, ok := objectToObjectCopyMap[obj]; ok {
|
||||
return newObj
|
||||
}
|
||||
|
||||
switch t := obj.(type) {
|
||||
case *PdfObjectArray:
|
||||
newObj := &PdfObjectArray{}
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, val := range t.Elements() {
|
||||
newObj.Append(copyObject(val, objectToObjectCopyMap))
|
||||
}
|
||||
return newObj
|
||||
case *PdfObjectStreams:
|
||||
newObj := &PdfObjectStreams{PdfObjectReference: t.PdfObjectReference}
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, val := range t.Elements() {
|
||||
newObj.Append(copyObject(val, objectToObjectCopyMap))
|
||||
}
|
||||
return newObj
|
||||
case *PdfObjectStream:
|
||||
newObj := &PdfObjectStream{
|
||||
Stream: t.Stream,
|
||||
PdfObjectReference: t.PdfObjectReference,
|
||||
}
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
newObj.PdfObjectDictionary = copyObject(t.PdfObjectDictionary, objectToObjectCopyMap).(*PdfObjectDictionary)
|
||||
return newObj
|
||||
case *PdfObjectDictionary:
|
||||
newObj := MakeDict()
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, key := range t.Keys() {
|
||||
val := t.Get(key)
|
||||
newObj.Set(key, copyObject(val, objectToObjectCopyMap))
|
||||
}
|
||||
return newObj
|
||||
case *PdfIndirectObject:
|
||||
newObj := &PdfIndirectObject{
|
||||
PdfObjectReference: t.PdfObjectReference,
|
||||
}
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
newObj.PdfObject = copyObject(t.PdfObject, objectToObjectCopyMap)
|
||||
return newObj
|
||||
case *PdfObjectString:
|
||||
newObj := &PdfObjectString{}
|
||||
*newObj = *t
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
return newObj
|
||||
case *PdfObjectName:
|
||||
newObj := PdfObjectName(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
case *PdfObjectNull:
|
||||
newObj := PdfObjectNull{}
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
case *PdfObjectInteger:
|
||||
newObj := PdfObjectInteger(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
case *PdfObjectReference:
|
||||
newObj := PdfObjectReference(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
case *PdfObjectFloat:
|
||||
newObj := PdfObjectFloat(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
case *PdfObjectBool:
|
||||
newObj := PdfObjectBool(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
default:
|
||||
common.Log.Info("TODO(a5i): implement copyObject for %+v", obj)
|
||||
}
|
||||
// return other objects as is
|
||||
return obj
|
||||
}
|
||||
|
||||
// copyObjects makes objects copy and set as working.
|
||||
func (this *PdfWriter) copyObjects() {
|
||||
objectToObjectCopyMap := make(map[PdfObject]PdfObject)
|
||||
objects := make([]PdfObject, len(this.objects))
|
||||
objectsMap := make(map[PdfObject]bool)
|
||||
for i, obj := range this.objects {
|
||||
newObject := copyObject(obj, objectToObjectCopyMap)
|
||||
objects[i] = newObject
|
||||
if this.objectsMap[obj] {
|
||||
objectsMap[newObject] = true
|
||||
}
|
||||
}
|
||||
|
||||
this.objects = objects
|
||||
this.objectsMap = objectsMap
|
||||
this.infoObj = copyObject(this.infoObj, objectToObjectCopyMap).(*PdfIndirectObject)
|
||||
this.root = copyObject(this.root, objectToObjectCopyMap).(*PdfIndirectObject)
|
||||
}
|
||||
|
||||
// Set the PDF version of the output file.
|
||||
func (this *PdfWriter) SetVersion(majorVersion, minorVersion int) {
|
||||
this.majorVersion = majorVersion
|
||||
@ -152,6 +269,16 @@ func (this *PdfWriter) SetOCProperties(ocProperties PdfObject) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetOptimizer sets the optimizer to optimize PDF before writing.
|
||||
func (this *PdfWriter) SetOptimizer(optimizer Optimizer) {
|
||||
this.optimizer = optimizer
|
||||
}
|
||||
|
||||
// GetOptimizer returns current PDF optimizer.
|
||||
func (this *PdfWriter) GetOptimizer() Optimizer {
|
||||
return this.optimizer
|
||||
}
|
||||
|
||||
func (this *PdfWriter) hasObject(obj PdfObject) bool {
|
||||
// Check if already added.
|
||||
for _, o := range this.objects {
|
||||
@ -438,6 +565,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
|
||||
common.Log.Trace("Write obj #%d\n", num)
|
||||
|
||||
if pobj, isIndirect := obj.(*PdfIndirectObject); isIndirect {
|
||||
this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber}
|
||||
outStr := fmt.Sprintf("%d 0 obj\n", num)
|
||||
outStr += pobj.PdfObject.DefaultWriteString()
|
||||
outStr += "\nendobj\n"
|
||||
@ -448,6 +576,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
|
||||
// XXX/TODO: Add a default encoder if Filter not specified?
|
||||
// Still need to make sure is encrypted.
|
||||
if pobj, isStream := obj.(*PdfObjectStream); isStream {
|
||||
this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber}
|
||||
outStr := fmt.Sprintf("%d 0 obj\n", num)
|
||||
outStr += pobj.PdfObjectDictionary.DefaultWriteString()
|
||||
outStr += "\nstream\n"
|
||||
@ -457,6 +586,46 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) {
|
||||
return
|
||||
}
|
||||
|
||||
if ostreams, isObjStreams := obj.(*PdfObjectStreams); isObjStreams {
|
||||
this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: ostreams.GenerationNumber}
|
||||
outStr := fmt.Sprintf("%d 0 obj\n", num)
|
||||
var offsets []string
|
||||
var objData string
|
||||
var offset int64
|
||||
|
||||
for index, obj := range ostreams.Elements() {
|
||||
io, isIndirect := obj.(*PdfIndirectObject)
|
||||
if !isIndirect {
|
||||
common.Log.Error("Object streams N %d contains non indirect pdf object %v", num, obj)
|
||||
}
|
||||
data := io.PdfObject.DefaultWriteString() + " "
|
||||
objData = objData + data
|
||||
offsets = append(offsets, fmt.Sprintf("%d %d", io.ObjectNumber, offset))
|
||||
this.crossReferenceMap[int(io.ObjectNumber)] = crossReference{Type: 2, ObjectNumber: num, Index: index}
|
||||
offset = offset + int64(len([]byte(data)))
|
||||
}
|
||||
offsetsStr := strings.Join(offsets, " ") + " "
|
||||
encoder := NewFlateEncoder()
|
||||
//encoder := NewRawEncoder()
|
||||
dict := encoder.MakeStreamDict()
|
||||
dict.Set(PdfObjectName("Type"), MakeName("ObjStm"))
|
||||
n := int64(ostreams.Len())
|
||||
dict.Set(PdfObjectName("N"), MakeInteger(n))
|
||||
first := int64(len(offsetsStr))
|
||||
dict.Set(PdfObjectName("First"), MakeInteger(first))
|
||||
|
||||
data, _ := encoder.EncodeBytes([]byte(offsetsStr + objData))
|
||||
length := int64(len(data))
|
||||
|
||||
dict.Set(PdfObjectName("Length"), MakeInteger(length))
|
||||
outStr += dict.DefaultWriteString()
|
||||
outStr += "\nstream\n"
|
||||
this.writeString(outStr)
|
||||
this.writeBytes(data)
|
||||
this.writeString("\nendstream\nendobj\n")
|
||||
return
|
||||
}
|
||||
|
||||
this.writer.WriteString(obj.DefaultWriteString())
|
||||
}
|
||||
|
||||
@ -472,6 +641,10 @@ func (this *PdfWriter) updateObjectNumbers() {
|
||||
so.ObjectNumber = int64(idx + 1)
|
||||
so.GenerationNumber = 0
|
||||
}
|
||||
if so, isObjectStreams := obj.(*PdfObjectStreams); isObjectStreams {
|
||||
so.ObjectNumber = int64(idx + 1)
|
||||
so.GenerationNumber = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -686,24 +859,53 @@ func (this *PdfWriter) Write(writer io.Writer) error {
|
||||
}
|
||||
// Set version in the catalog.
|
||||
this.catalog.Set("Version", MakeName(fmt.Sprintf("%d.%d", this.majorVersion, this.minorVersion)))
|
||||
this.copyObjects()
|
||||
|
||||
if this.optimizer != nil {
|
||||
var err error
|
||||
this.objects, err = this.optimizer.Optimize(this.objects)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
w := bufio.NewWriter(writer)
|
||||
this.writer = w
|
||||
this.writePos = 0
|
||||
useCrossReferenceStream := this.majorVersion > 1 || (this.majorVersion == 1 && this.minorVersion > 4)
|
||||
objectsInObjectStreams := make(map[PdfObject]bool)
|
||||
if !useCrossReferenceStream {
|
||||
for _, obj := range this.objects {
|
||||
if objStm, isObjectStreams := obj.(*PdfObjectStreams); isObjectStreams {
|
||||
useCrossReferenceStream = true
|
||||
for _, obj := range objStm.Elements() {
|
||||
objectsInObjectStreams[obj] = true
|
||||
if io, isIndirectObj := obj.(*PdfIndirectObject); isIndirectObj {
|
||||
objectsInObjectStreams[io.PdfObject] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if useCrossReferenceStream && this.majorVersion == 1 && this.minorVersion < 5 {
|
||||
this.minorVersion = 5
|
||||
}
|
||||
|
||||
this.writeString(fmt.Sprintf("%%PDF-%d.%d\n", this.majorVersion, this.minorVersion))
|
||||
this.writeString("%âãÏÓ\n")
|
||||
|
||||
this.updateObjectNumbers()
|
||||
|
||||
offsets := []int64{}
|
||||
|
||||
// Write objects
|
||||
common.Log.Trace("Writing %d obj", len(this.objects))
|
||||
this.crossReferenceMap = make(map[int]crossReference)
|
||||
this.crossReferenceMap[0] = crossReference{Type: 0, ObjectNumber: 0, Generation: 0xFFFF}
|
||||
for idx, obj := range this.objects {
|
||||
if skip := objectsInObjectStreams[obj]; skip {
|
||||
continue
|
||||
}
|
||||
common.Log.Trace("Writing %d", idx)
|
||||
offset := this.writePos
|
||||
offsets = append(offsets, offset)
|
||||
|
||||
// Encrypt prior to writing.
|
||||
// Encrypt dictionary should not be encrypted.
|
||||
@ -713,41 +915,90 @@ func (this *PdfWriter) Write(writer io.Writer) error {
|
||||
common.Log.Debug("ERROR: Failed encrypting (%s)", err)
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
this.writeObject(idx+1, obj)
|
||||
}
|
||||
|
||||
xrefOffset := this.writePos
|
||||
|
||||
// Write xref table.
|
||||
this.writeString("xref\r\n")
|
||||
outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.objects)+1)
|
||||
this.writeString(outStr)
|
||||
outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535)
|
||||
this.writeString(outStr)
|
||||
for _, offset := range offsets {
|
||||
outStr = fmt.Sprintf("%.10d %.5d n\r\n", offset, 0)
|
||||
this.writeString(outStr)
|
||||
}
|
||||
if useCrossReferenceStream {
|
||||
|
||||
crossObjNumber := len(this.crossReferenceMap)
|
||||
this.crossReferenceMap[crossObjNumber] = crossReference{Type: 1, ObjectNumber: crossObjNumber, Offset: xrefOffset}
|
||||
crossReferenceData := bytes.NewBuffer(nil)
|
||||
|
||||
for idx := 0; idx < len(this.crossReferenceMap); idx++ {
|
||||
ref := this.crossReferenceMap[idx]
|
||||
switch ref.Type {
|
||||
case 0:
|
||||
binary.Write(crossReferenceData, binary.BigEndian, byte(0))
|
||||
binary.Write(crossReferenceData, binary.BigEndian, uint32(0))
|
||||
binary.Write(crossReferenceData, binary.BigEndian, uint16(0xFFFF))
|
||||
case 1:
|
||||
binary.Write(crossReferenceData, binary.BigEndian, byte(1))
|
||||
binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.Offset))
|
||||
binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Generation))
|
||||
case 2:
|
||||
binary.Write(crossReferenceData, binary.BigEndian, byte(2))
|
||||
binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.ObjectNumber))
|
||||
binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Index))
|
||||
}
|
||||
}
|
||||
crossReferenceStream, err := MakeStream(crossReferenceData.Bytes(), NewFlateEncoder())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
crossReferenceStream.ObjectNumber = int64(crossObjNumber)
|
||||
crossReferenceStream.PdfObjectDictionary.Set("Type", MakeName("XRef"))
|
||||
crossReferenceStream.PdfObjectDictionary.Set("W", MakeArray(MakeInteger(1), MakeInteger(4), MakeInteger(2)))
|
||||
crossReferenceStream.PdfObjectDictionary.Set("Index", MakeArray(MakeInteger(0), MakeInteger(crossReferenceStream.ObjectNumber+1)))
|
||||
crossReferenceStream.PdfObjectDictionary.Set("Size", MakeInteger(crossReferenceStream.ObjectNumber+1))
|
||||
crossReferenceStream.PdfObjectDictionary.Set("Info", this.infoObj)
|
||||
crossReferenceStream.PdfObjectDictionary.Set("Root", this.root)
|
||||
// If encrypted!
|
||||
if this.crypter != nil {
|
||||
crossReferenceStream.Set("Encrypt", this.encryptObj)
|
||||
crossReferenceStream.Set("ID", this.ids)
|
||||
common.Log.Trace("Ids: %s", this.ids)
|
||||
}
|
||||
|
||||
this.writeObject(int(crossReferenceStream.ObjectNumber), crossReferenceStream)
|
||||
|
||||
} else {
|
||||
this.writeString("xref\r\n")
|
||||
outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.crossReferenceMap))
|
||||
this.writeString(outStr)
|
||||
for idx := 0; idx < len(this.crossReferenceMap); idx++ {
|
||||
ref := this.crossReferenceMap[idx]
|
||||
switch ref.Type {
|
||||
case 0:
|
||||
outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535)
|
||||
this.writeString(outStr)
|
||||
case 1:
|
||||
outStr = fmt.Sprintf("%.10d %.5d n\r\n", ref.Offset, 0)
|
||||
this.writeString(outStr)
|
||||
}
|
||||
}
|
||||
|
||||
// Generate & write trailer
|
||||
trailer := MakeDict()
|
||||
trailer.Set("Info", this.infoObj)
|
||||
trailer.Set("Root", this.root)
|
||||
trailer.Set("Size", MakeInteger(int64(len(this.objects)+1)))
|
||||
// If encrypted!
|
||||
if this.crypter != nil {
|
||||
trailer.Set("Encrypt", this.encryptObj)
|
||||
trailer.Set("ID", this.ids)
|
||||
common.Log.Trace("Ids: %s", this.ids)
|
||||
}
|
||||
this.writeString("trailer\n")
|
||||
this.writeString(trailer.DefaultWriteString())
|
||||
this.writeString("\n")
|
||||
|
||||
// Generate & write trailer
|
||||
trailer := MakeDict()
|
||||
trailer.Set("Info", this.infoObj)
|
||||
trailer.Set("Root", this.root)
|
||||
trailer.Set("Size", MakeInteger(int64(len(this.objects)+1)))
|
||||
// If encrypted!
|
||||
if this.crypter != nil {
|
||||
trailer.Set("Encrypt", this.encryptObj)
|
||||
trailer.Set("ID", this.ids)
|
||||
common.Log.Trace("Ids: %s", this.ids)
|
||||
}
|
||||
this.writeString("trailer\n")
|
||||
this.writeString(trailer.DefaultWriteString())
|
||||
this.writeString("\n")
|
||||
|
||||
// Make offset reference.
|
||||
outStr = fmt.Sprintf("startxref\n%d\n", xrefOffset)
|
||||
outStr := fmt.Sprintf("startxref\n%d\n", xrefOffset)
|
||||
this.writeString(outStr)
|
||||
this.writeString("%%EOF\n")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user